1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.digest;
19
20 import org.apache.commons.codec.binary.StringUtils;
21
22 /**
23 * Implements the MurmurHash2 32-bit and 64-bit hash functions.
24 *
25 * <p>MurmurHash is a non-cryptographic hash function suitable for general
26 * hash-based lookup. The name comes from two basic operations, multiply (MU)
27 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions,
28 * it is not specifically designed to be difficult to reverse by an adversary,
29 * making it unsuitable for cryptographic purposes.</p>
30 *
31 * <p>This contains a Java port of the 32-bit hash function {@code MurmurHash2}
32 * and the 64-bit hash function {@code MurmurHash64A} from Austin Appleby's
33 * original {@code c++} code in SMHasher.</p>
34 *
35 * <p>This is a re-implementation of the original C code plus some additional
36 * features.</p>
37 *
38 * <p>This is public domain code with no copyrights. From home page of
39 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:</p>
40 *
41 * <blockquote>
42 * "All MurmurHash versions are public domain software, and the author
43 * disclaims all copyright to their code."
44 * </blockquote>
45 *
46 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
47 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp">
48 * Original MurmurHash2 c++ code</a>
49 * @since 1.13
50 */
51 public final class MurmurHash2 {
52
53 // Constants for 32-bit variant
54 private static final int M32 = 0x5bd1e995;
55 private static final int R32 = 24;
56
57 // Constants for 64-bit variant
58 private static final long M64 = 0xc6a4a7935bd1e995L;
59 private static final int R64 = 47;
60
61 /**
62 * Generates a 32-bit hash from byte array with the given length and a default seed value.
63 * This is a helper method that will produce the same result as:
64 *
65 * <pre>
66 * int seed = 0x9747b28c;
67 * int hash = MurmurHash2.hash32(data, length, seed);
68 * </pre>
69 *
70 * @param data The input byte array
71 * @param length The length of the array
72 * @return The 32-bit hash
73 * @see #hash32(byte[], int, int)
74 */
75 public static int hash32(final byte[] data, final int length) {
76 return hash32(data, length, 0x9747b28c);
77 }
78
79 /**
80 * Generates a 32-bit hash from byte array with the given length and seed.
81 *
82 * @param data The input byte array
83 * @param length The length of the array
84 * @param seed The initial seed value
85 * @return The 32-bit hash
86 */
87 public static int hash32(final byte[] data, final int length, final int seed) {
88 // Initialize the hash to a random value
89 int h = seed ^ length;
90 // Mix 4 bytes at a time into the hash
91 final int nblocks = length >> 2;
92 // body
93 for (int i = 0; i < nblocks; i++) {
94 final int index = i << 2;
95 int k = MurmurHash.getLittleEndianInt(data, index);
96 k *= M32;
97 k ^= k >>> R32;
98 k *= M32;
99 h *= M32;
100 h ^= k;
101 }
102 // Handle the last few bytes of the input array
103 final int index = nblocks << 2;
104 switch (length - index) {
105 case 3:
106 h ^= (data[index + 2] & 0xff) << 16;
107 // falls-through
108 case 2:
109 h ^= (data[index + 1] & 0xff) << 8;
110 // falls-through
111 case 1:
112 h ^= data[index] & 0xff;
113 h *= M32;
114 }
115 // Do a few final mixes of the hash to ensure the last few
116 // bytes are well-incorporated.
117 h ^= h >>> 13;
118 h *= M32;
119 h ^= h >>> 15;
120 return h;
121 }
122
123 /**
124 * Generates a 32-bit hash from a string with a default seed.
125 * <p>
126 * Before 1.14 the string was converted using default encoding.
127 * Since 1.14 the string is converted to bytes using UTF-8 encoding.
128 * </p>
129 * This is a helper method that will produce the same result as:
130 *
131 * <pre>
132 * int seed = 0x9747b28c;
133 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
134 * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
135 * </pre>
136 *
137 * @param text The input string
138 * @return The 32-bit hash
139 * @see #hash32(byte[], int, int)
140 */
141 public static int hash32(final String text) {
142 final byte[] bytes = StringUtils.getBytesUtf8(text);
143 return hash32(bytes, bytes.length);
144 }
145
146 /**
147 * Generates a 32-bit hash from a substring with a default seed value.
148 * The string is converted to bytes using the default encoding.
149 * This is a helper method that will produce the same result as:
150 *
151 * <pre>
152 * int seed = 0x9747b28c;
153 * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
154 * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
155 * </pre>
156 *
157 * @param text The input string
158 * @param from The starting index
159 * @param length The length of the substring
160 * @return The 32-bit hash
161 * @see #hash32(byte[], int, int)
162 */
163 public static int hash32(final String text, final int from, final int length) {
164 return hash32(text.substring(from, from + length));
165 }
166
167 /**
168 * Generates a 64-bit hash from byte array with given length and a default seed value.
169 * This is a helper method that will produce the same result as:
170 *
171 * <pre>
172 * int seed = 0xe17a1465;
173 * int hash = MurmurHash2.hash64(data, length, seed);
174 * </pre>
175 *
176 * @param data The input byte array
177 * @param length The length of the array
178 * @return The 64-bit hash
179 * @see #hash64(byte[], int, int)
180 */
181 public static long hash64(final byte[] data, final int length) {
182 return hash64(data, length, 0xe17a1465);
183 }
184
185 /**
186 * Generates a 64-bit hash from byte array of the given length and seed.
187 *
188 * @param data The input byte array
189 * @param length The length of the array
190 * @param seed The initial seed value
191 * @return The 64-bit hash of the given array
192 */
193 public static long hash64(final byte[] data, final int length, final int seed) {
194 long h = seed & 0xffffffffL ^ length * M64;
195 final int nblocks = length >> 3;
196 // body
197 for (int i = 0; i < nblocks; i++) {
198 final int index = i << 3;
199 long k = MurmurHash.getLittleEndianLong(data, index);
200
201 k *= M64;
202 k ^= k >>> R64;
203 k *= M64;
204
205 h ^= k;
206 h *= M64;
207 }
208 final int index = nblocks << 3;
209 switch (length - index) {
210 case 7:
211 h ^= ((long) data[index + 6] & 0xff) << 48;
212 // falls-through
213 case 6:
214 h ^= ((long) data[index + 5] & 0xff) << 40;
215 // falls-through
216 case 5:
217 h ^= ((long) data[index + 4] & 0xff) << 32;
218 // falls-through
219 case 4:
220 h ^= ((long) data[index + 3] & 0xff) << 24;
221 // falls-through
222 case 3:
223 h ^= ((long) data[index + 2] & 0xff) << 16;
224 // falls-through
225 case 2:
226 h ^= ((long) data[index + 1] & 0xff) << 8;
227 // falls-through
228 case 1:
229 h ^= (long) data[index] & 0xff;
230 h *= M64;
231 }
232 h ^= h >>> R64;
233 h *= M64;
234 h ^= h >>> R64;
235 return h;
236 }
237
238 /**
239 * Generates a 64-bit hash from a string with a default seed.
240 * <p>
241 * Before 1.14 the string was converted using default encoding.
242 * Since 1.14 the string is converted to bytes using UTF-8 encoding.
243 * </p>
244 * <p>
245 * This is a helper method that will produce the same result as:
246 * </p>
247 *
248 * <pre>
249 * int seed = 0xe17a1465;
250 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
251 * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
252 * </pre>
253 *
254 * @param text The input string
255 * @return The 64-bit hash
256 * @see #hash64(byte[], int, int)
257 */
258 public static long hash64(final String text) {
259 final byte[] bytes = StringUtils.getBytesUtf8(text);
260 return hash64(bytes, bytes.length);
261 }
262
263 /**
264 * Generates a 64-bit hash from a substring with a default seed value.
265 * The string is converted to bytes using the default encoding.
266 * This is a helper method that will produce the same result as:
267 *
268 * <pre>
269 * int seed = 0xe17a1465;
270 * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
271 * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
272 * </pre>
273 *
274 * @param text The input string
275 * @param from The starting index
276 * @param length The length of the substring
277 * @return The 64-bit hash
278 * @see #hash64(byte[], int, int)
279 */
280 public static long hash64(final String text, final int from, final int length) {
281 return hash64(text.substring(from, from + length));
282 }
283
284 /** No instance methods. */
285 private MurmurHash2() {
286 }
287 }