1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.digest; 19 20 import org.apache.commons.codec.binary.StringUtils; 21 22 /** 23 * Implements the MurmurHash2 32-bit and 64-bit hash functions. 24 * 25 * <p>MurmurHash is a non-cryptographic hash function suitable for general 26 * hash-based lookup. The name comes from two basic operations, multiply (MU) 27 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions, 28 * it is not specifically designed to be difficult to reverse by an adversary, 29 * making it unsuitable for cryptographic purposes.</p> 30 * 31 * <p>This contains a Java port of the 32-bit hash function {@code MurmurHash2} 32 * and the 64-bit hash function {@code MurmurHash64A} from Austin Appleby's 33 * original {@code c++} code in SMHasher.</p> 34 * 35 * <p>This is a re-implementation of the original C code plus some additional 36 * features.</p> 37 * 38 * <p>This is public domain code with no copyrights. From home page of 39 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:</p> 40 * 41 * <blockquote> 42 * "All MurmurHash versions are public domain software, and the author 43 * disclaims all copyright to their code." 44 * </blockquote> 45 * 46 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 47 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp"> 48 * Original MurmurHash2 c++ code</a> 49 * @since 1.13 50 */ 51 public final class MurmurHash2 { 52 53 // Constants for 32-bit variant 54 private static final int M32 = 0x5bd1e995; 55 private static final int R32 = 24; 56 57 // Constants for 64-bit variant 58 private static final long M64 = 0xc6a4a7935bd1e995L; 59 private static final int R64 = 47; 60 61 /** 62 * Generates a 32-bit hash from byte array with the given length and a default seed value. 63 * This is a helper method that will produce the same result as: 64 * 65 * <pre> 66 * int seed = 0x9747b28c; 67 * int hash = MurmurHash2.hash32(data, length, seed); 68 * </pre> 69 * 70 * @param data The input byte array 71 * @param length The length of the array 72 * @return The 32-bit hash 73 * @see #hash32(byte[], int, int) 74 */ 75 public static int hash32(final byte[] data, final int length) { 76 return hash32(data, length, 0x9747b28c); 77 } 78 79 /** 80 * Generates a 32-bit hash from byte array with the given length and seed. 81 * 82 * @param data The input byte array 83 * @param length The length of the array 84 * @param seed The initial seed value 85 * @return The 32-bit hash 86 */ 87 public static int hash32(final byte[] data, final int length, final int seed) { 88 // Initialize the hash to a random value 89 int h = seed ^ length; 90 // Mix 4 bytes at a time into the hash 91 final int nblocks = length >> 2; 92 // body 93 for (int i = 0; i < nblocks; i++) { 94 final int index = i << 2; 95 int k = MurmurHash.getLittleEndianInt(data, index); 96 k *= M32; 97 k ^= k >>> R32; 98 k *= M32; 99 h *= M32; 100 h ^= k; 101 } 102 // Handle the last few bytes of the input array 103 final int index = nblocks << 2; 104 switch (length - index) { 105 case 3: 106 h ^= (data[index + 2] & 0xff) << 16; 107 // falls-through 108 case 2: 109 h ^= (data[index + 1] & 0xff) << 8; 110 // falls-through 111 case 1: 112 h ^= data[index] & 0xff; 113 h *= M32; 114 } 115 // Do a few final mixes of the hash to ensure the last few 116 // bytes are well-incorporated. 117 h ^= h >>> 13; 118 h *= M32; 119 h ^= h >>> 15; 120 return h; 121 } 122 123 /** 124 * Generates a 32-bit hash from a string with a default seed. 125 * <p> 126 * Before 1.14 the string was converted using default encoding. 127 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 128 * </p> 129 * This is a helper method that will produce the same result as: 130 * 131 * <pre> 132 * int seed = 0x9747b28c; 133 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 134 * int hash = MurmurHash2.hash32(bytes, bytes.length, seed); 135 * </pre> 136 * 137 * @param text The input string 138 * @return The 32-bit hash 139 * @see #hash32(byte[], int, int) 140 */ 141 public static int hash32(final String text) { 142 final byte[] bytes = StringUtils.getBytesUtf8(text); 143 return hash32(bytes, bytes.length); 144 } 145 146 /** 147 * Generates a 32-bit hash from a substring with a default seed value. 148 * The string is converted to bytes using the default encoding. 149 * This is a helper method that will produce the same result as: 150 * 151 * <pre> 152 * int seed = 0x9747b28c; 153 * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8); 154 * int hash = MurmurHash2.hash32(bytes, bytes.length, seed); 155 * </pre> 156 * 157 * @param text The input string 158 * @param from The starting index 159 * @param length The length of the substring 160 * @return The 32-bit hash 161 * @see #hash32(byte[], int, int) 162 */ 163 public static int hash32(final String text, final int from, final int length) { 164 return hash32(text.substring(from, from + length)); 165 } 166 167 /** 168 * Generates a 64-bit hash from byte array with given length and a default seed value. 169 * This is a helper method that will produce the same result as: 170 * 171 * <pre> 172 * int seed = 0xe17a1465; 173 * int hash = MurmurHash2.hash64(data, length, seed); 174 * </pre> 175 * 176 * @param data The input byte array 177 * @param length The length of the array 178 * @return The 64-bit hash 179 * @see #hash64(byte[], int, int) 180 */ 181 public static long hash64(final byte[] data, final int length) { 182 return hash64(data, length, 0xe17a1465); 183 } 184 185 /** 186 * Generates a 64-bit hash from byte array of the given length and seed. 187 * 188 * @param data The input byte array 189 * @param length The length of the array 190 * @param seed The initial seed value 191 * @return The 64-bit hash of the given array 192 */ 193 public static long hash64(final byte[] data, final int length, final int seed) { 194 long h = seed & 0xffffffffL ^ length * M64; 195 final int nblocks = length >> 3; 196 // body 197 for (int i = 0; i < nblocks; i++) { 198 final int index = i << 3; 199 long k = MurmurHash.getLittleEndianLong(data, index); 200 201 k *= M64; 202 k ^= k >>> R64; 203 k *= M64; 204 205 h ^= k; 206 h *= M64; 207 } 208 final int index = nblocks << 3; 209 switch (length - index) { 210 case 7: 211 h ^= ((long) data[index + 6] & 0xff) << 48; 212 // falls-through 213 case 6: 214 h ^= ((long) data[index + 5] & 0xff) << 40; 215 // falls-through 216 case 5: 217 h ^= ((long) data[index + 4] & 0xff) << 32; 218 // falls-through 219 case 4: 220 h ^= ((long) data[index + 3] & 0xff) << 24; 221 // falls-through 222 case 3: 223 h ^= ((long) data[index + 2] & 0xff) << 16; 224 // falls-through 225 case 2: 226 h ^= ((long) data[index + 1] & 0xff) << 8; 227 // falls-through 228 case 1: 229 h ^= (long) data[index] & 0xff; 230 h *= M64; 231 } 232 h ^= h >>> R64; 233 h *= M64; 234 h ^= h >>> R64; 235 return h; 236 } 237 238 /** 239 * Generates a 64-bit hash from a string with a default seed. 240 * <p> 241 * Before 1.14 the string was converted using default encoding. 242 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 243 * </p> 244 * <p> 245 * This is a helper method that will produce the same result as: 246 * </p> 247 * 248 * <pre> 249 * int seed = 0xe17a1465; 250 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 251 * int hash = MurmurHash2.hash64(bytes, bytes.length, seed); 252 * </pre> 253 * 254 * @param text The input string 255 * @return The 64-bit hash 256 * @see #hash64(byte[], int, int) 257 */ 258 public static long hash64(final String text) { 259 final byte[] bytes = StringUtils.getBytesUtf8(text); 260 return hash64(bytes, bytes.length); 261 } 262 263 /** 264 * Generates a 64-bit hash from a substring with a default seed value. 265 * The string is converted to bytes using the default encoding. 266 * This is a helper method that will produce the same result as: 267 * 268 * <pre> 269 * int seed = 0xe17a1465; 270 * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8); 271 * int hash = MurmurHash2.hash64(bytes, bytes.length, seed); 272 * </pre> 273 * 274 * @param text The input string 275 * @param from The starting index 276 * @param length The length of the substring 277 * @return The 64-bit hash 278 * @see #hash64(byte[], int, int) 279 */ 280 public static long hash64(final String text, final int from, final int length) { 281 return hash64(text.substring(from, from + length)); 282 } 283 284 /** No instance methods. */ 285 private MurmurHash2() { 286 } 287 }