1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.digest;
19
20 import org.apache.commons.codec.binary.StringUtils;
21
22 /**
23 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24 *
25 * <p>
26 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29 * </p>
30 *
31 * <p>
32 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34 * </p>
35 *
36 * <p>
37 * This is public domain code with no copyrights. From home page of
38 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39 * </p>
40 *
41 * <blockquote>
42 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
43 * code."
44 * </blockquote>
45 *
46 * <p>
47 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
48 * That adaption contains a {@code hash64} method that is not part of the original
49 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
50 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
51 * </p>
52 *
53 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
54 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
55 * code</a>
56 * @see <a href=
57 * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
58 * Apache Hive Murmer3</a>
59 * @since 1.13
60 */
61 public final class MurmurHash3 {
62
63 /**
64 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
65 * hash computed.
66 *
67 * <p>
68 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
69 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
70 * </p>
71 *
72 * <p>
73 * This implementation contains a sign-extension bug in the finalization step of
74 * any bytes left over from dividing the length by 4. This manifests if any of these
75 * bytes are negative.
76 * </p>
77 *
78 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
79 */
80 @Deprecated
81 public static class IncrementalHash32 extends IncrementalHash32x86 {
82
83 /**
84 * Constructs a new instance.
85 */
86 public IncrementalHash32() {
87 // empty
88 }
89
90 /**
91 * {@inheritDoc}
92 *
93 * <p>
94 * This implementation contains a sign-extension bug in the finalization step of
95 * any bytes left over from dividing the length by 4. This manifests if any of these
96 * bytes are negative.
97 * <p>
98 *
99 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100 */
101 @Override
102 @Deprecated
103 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104 int result = hash;
105 // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106 int k1 = 0;
107 switch (unprocessedLength) {
108 case 3:
109 k1 ^= unprocessed[2] << 16;
110 // falls-through
111 case 2:
112 k1 ^= unprocessed[1] << 8;
113 // falls-through
114 case 1:
115 k1 ^= unprocessed[0];
116 // mix functions
117 k1 *= C1_32;
118 k1 = Integer.rotateLeft(k1, R1_32);
119 k1 *= C2_32;
120 result ^= k1;
121 }
122 // finalization
123 result ^= totalLen;
124 return fmix32(result);
125 }
126 }
127
128 /**
129 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130 * hash computed.
131 *
132 * <p>
133 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135 * </p>
136 *
137 * @since 1.14
138 */
139 public static class IncrementalHash32x86 {
140
141 /** The size of byte blocks that are processed together. */
142 private static final int BLOCK_SIZE = 4;
143
144 /**
145 * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146 * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147 * significant.
148 *
149 * @param b1 The first byte
150 * @param b2 The second byte
151 * @param b3 The third byte
152 * @param b4 The fourth byte
153 * @return The 32-bit integer
154 */
155 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156 return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157 }
158
159 /** Up to 3 unprocessed bytes from input data. */
160 private final byte[] unprocessed = new byte[3];
161
162 /** The number of unprocessed bytes in the tail data. */
163 private int unprocessedLength;
164
165 /** The total number of input bytes added since the start. */
166 private int totalLen;
167
168 /**
169 * The current running hash.
170 * This must be finalized to generate the 32-bit hash value.
171 */
172 private int hash;
173
174 /**
175 * Constructs a new instance.
176 */
177 public IncrementalHash32x86() {
178 // empty
179 }
180
181 /**
182 * Adds the byte array to the current incremental hash.
183 *
184 * @param data The input byte array
185 * @param offset The offset of data
186 * @param length The length of array
187 */
188 public final void add(final byte[] data, final int offset, final int length) {
189 if (length <= 0) {
190 // Nothing to add
191 return;
192 }
193 totalLen += length;
194 // Process the bytes in blocks of 4.
195 // New bytes must be added to any current unprocessed bytes,
196 // then processed in blocks of 4 and the remaining bytes saved:
197 //
198 // |--|---------------------------|--|
199 // unprocessed
200 // main block
201 // remaining
202
203 // Check if the unprocessed bytes and new bytes can fill a block of 4.
204 // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205 // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206 if (unprocessedLength + length - BLOCK_SIZE < 0) {
207 // Not enough so add to the unprocessed bytes
208 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209 unprocessedLength += length;
210 return;
211 }
212 // Combine unprocessed bytes with new bytes.
213 final int newOffset;
214 final int newLength;
215 if (unprocessedLength > 0) {
216 int k = -1;
217 switch (unprocessedLength) {
218 case 1:
219 k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220 break;
221 case 2:
222 k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223 break;
224 case 3:
225 k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226 break;
227 default:
228 throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229 }
230 hash = mix32(k, hash);
231 // Update the offset and length
232 final int consumed = BLOCK_SIZE - unprocessedLength;
233 newOffset = offset + consumed;
234 newLength = length - consumed;
235 } else {
236 newOffset = offset;
237 newLength = length;
238 }
239 // Main processing of blocks of 4 bytes
240 final int nblocks = newLength >> 2;
241
242 for (int i = 0; i < nblocks; i++) {
243 final int index = newOffset + (i << 2);
244 final int k = MurmurHash.getLittleEndianInt(data, index);
245 hash = mix32(k, hash);
246 }
247 // Save left-over unprocessed bytes
248 final int consumed = nblocks << 2;
249 unprocessedLength = newLength - consumed;
250 if (unprocessedLength != 0) {
251 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252 }
253 }
254
255 /**
256 * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257 * will generate the same hash value.
258 *
259 * @return The 32-bit hash
260 */
261 public final int end() {
262 // Allow calling end() again after adding no data to return the same result.
263 return finalise(hash, unprocessedLength, unprocessed, totalLen);
264 }
265
266 /**
267 * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268 * and performing final mixing.
269 *
270 * @param hash The running hash
271 * @param unprocessedLength The number of unprocessed bytes in the tail data.
272 * @param unprocessed Up to 3 unprocessed bytes from input data.
273 * @param totalLen The total number of input bytes added since the start.
274 * @return The 32-bit hash
275 */
276 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277 int result = hash;
278 int k1 = 0;
279 switch (unprocessedLength) {
280 case 3:
281 k1 ^= (unprocessed[2] & 0xff) << 16;
282 // falls-through
283 case 2:
284 k1 ^= (unprocessed[1] & 0xff) << 8;
285 // falls-through
286 case 1:
287 k1 ^= unprocessed[0] & 0xff;
288 // mix functions
289 k1 *= C1_32;
290 k1 = Integer.rotateLeft(k1, R1_32);
291 k1 *= C2_32;
292 result ^= k1;
293 }
294 // finalization
295 result ^= totalLen;
296 return fmix32(result);
297 }
298
299 /**
300 * Starts a new incremental hash.
301 *
302 * @param seed The initial seed value
303 */
304 public final void start(final int seed) {
305 // Reset
306 unprocessedLength = totalLen = 0;
307 hash = seed;
308 }
309 }
310
311 /**
312 * A random number to use for a hash code.
313 *
314 * @deprecated This is not used internally and will be removed in a future release.
315 */
316 @Deprecated
317 public static final long NULL_HASHCODE = 2862933555777941757L;
318 /**
319 * A default seed to use for the murmur hash algorithm.
320 * Has the value {@code 104729}.
321 */
322 public static final int DEFAULT_SEED = 104729;
323 // Constants for 32-bit variant
324 private static final int C1_32 = 0xcc9e2d51;
325 private static final int C2_32 = 0x1b873593;
326 private static final int R1_32 = 15;
327 private static final int R2_32 = 13;
328
329 private static final int M_32 = 5;
330 private static final int N_32 = 0xe6546b64;
331 // Constants for 128-bit variant
332 private static final long C1 = 0x87c37b91114253d5L;
333 private static final long C2 = 0x4cf5ad432745937fL;
334 private static final int R1 = 31;
335 private static final int R2 = 27;
336 private static final int R3 = 33;
337 private static final int M = 5;
338
339 private static final int N1 = 0x52dce729;
340
341 private static final int N2 = 0x38495ab5;
342
343 /**
344 * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
345 *
346 * @param hash The current hash
347 * @return The final hash
348 */
349 private static int fmix32(int hash) {
350 hash ^= hash >>> 16;
351 hash *= 0x85ebca6b;
352 hash ^= hash >>> 13;
353 hash *= 0xc2b2ae35;
354 hash ^= hash >>> 16;
355 return hash;
356 }
357
358 /**
359 * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
360 *
361 * @param hash The current hash
362 * @return The final hash
363 */
364 private static long fmix64(long hash) {
365 hash ^= hash >>> 33;
366 hash *= 0xff51afd7ed558ccdL;
367 hash ^= hash >>> 33;
368 hash *= 0xc4ceb9fe1a85ec53L;
369 hash ^= hash >>> 33;
370 return hash;
371 }
372
373 /**
374 * Generates 128-bit hash from the byte array with a default seed.
375 * This is a helper method that will produce the same result as:
376 *
377 * <pre>
378 * int offset = 0;
379 * int seed = 104729;
380 * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
381 * </pre>
382 *
383 * <p>
384 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
385 * this result as the default seed is positive.
386 * </p>
387 *
388 * @param data The input byte array
389 * @return The 128-bit hash (2 longs)
390 * @see #hash128(byte[], int, int, int)
391 */
392 public static long[] hash128(final byte[] data) {
393 return hash128(data, 0, data.length, DEFAULT_SEED);
394 }
395
396 /**
397 * Generates 128-bit hash from the byte array with the given offset, length and seed.
398 *
399 * <p>
400 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
401 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
402 * </p>
403 *
404 * <p>
405 * This implementation contains a sign-extension bug in the seed initialization.
406 * This manifests if the seed is negative.
407 * </p>
408 *
409 * @param data The input byte array
410 * @param offset The first element of array
411 * @param length The length of array
412 * @param seed The initial seed value
413 * @return The 128-bit hash (2 longs)
414 * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
415 */
416 @Deprecated
417 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
418 // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
419 // to maintain behavioral compatibility with the original version.
420 // The implicit conversion to a long will extend a negative sign
421 // bit through the upper 32-bits of the long seed. These should be zero.
422 return hash128x64Internal(data, offset, length, seed);
423 }
424
425 /**
426 * Generates 128-bit hash from a string with a default seed.
427 * <p>
428 * Before 1.14 the string was converted using default encoding.
429 * Since 1.14 the string is converted to bytes using UTF-8 encoding.
430 * </p>
431 * <p>
432 * This is a helper method that will produce the same result as:
433 * </p>
434 *
435 * <pre>
436 * int offset = 0;
437 * int seed = 104729;
438 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
439 * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
440 * </pre>
441 *
442 * <p>
443 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
444 * this result as the default seed is positive.
445 * </p>
446 *
447 * @param data The input String
448 * @return The 128-bit hash (2 longs)
449 * @see #hash128(byte[], int, int, int)
450 * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
451 * {@link String#getBytes(java.nio.charset.Charset)}.
452 */
453 @Deprecated
454 public static long[] hash128(final String data) {
455 final byte[] bytes = StringUtils.getBytesUtf8(data);
456 return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
457 }
458
459 /**
460 * Generates 128-bit hash from the byte array with a seed of zero.
461 * This is a helper method that will produce the same result as:
462 *
463 * <pre>
464 * int offset = 0;
465 * int seed = 0;
466 * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
467 * </pre>
468 *
469 * @param data The input byte array
470 * @return The 128-bit hash (2 longs)
471 * @see #hash128x64(byte[], int, int, int)
472 * @since 1.14
473 */
474 public static long[] hash128x64(final byte[] data) {
475 return hash128x64(data, 0, data.length, 0);
476 }
477
478 /**
479 * Generates 128-bit hash from the byte array with the given offset, length and seed.
480 *
481 * <p>
482 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
483 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
484 * </p>
485 *
486 * @param data The input byte array
487 * @param offset The first element of array
488 * @param length The length of array
489 * @param seed The initial seed value
490 * @return The 128-bit hash (2 longs)
491 * @since 1.14
492 */
493 public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
494 // Use an unsigned 32-bit integer as the seed
495 return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
496 }
497
498 /**
499 * Generates 128-bit hash from the byte array with the given offset, length and seed.
500 *
501 * <p>
502 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
503 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
504 * </p>
505 *
506 * @param data The input byte array
507 * @param offset The first element of array
508 * @param length The length of array
509 * @param seed The initial seed value
510 * @return The 128-bit hash (2 longs)
511 */
512 private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
513 long h1 = seed;
514 long h2 = seed;
515 final int nblocks = length >> 4;
516 // body
517 for (int i = 0; i < nblocks; i++) {
518 final int index = offset + (i << 4);
519 long k1 = MurmurHash.getLittleEndianLong(data, index);
520 long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
521
522 // mix functions for k1
523 k1 *= C1;
524 k1 = Long.rotateLeft(k1, R1);
525 k1 *= C2;
526 h1 ^= k1;
527 h1 = Long.rotateLeft(h1, R2);
528 h1 += h2;
529 h1 = h1 * M + N1;
530
531 // mix functions for k2
532 k2 *= C2;
533 k2 = Long.rotateLeft(k2, R3);
534 k2 *= C1;
535 h2 ^= k2;
536 h2 = Long.rotateLeft(h2, R1);
537 h2 += h1;
538 h2 = h2 * M + N2;
539 }
540 // tail
541 long k1 = 0;
542 long k2 = 0;
543 final int index = offset + (nblocks << 4);
544 switch (offset + length - index) {
545 case 15:
546 k2 ^= ((long) data[index + 14] & 0xff) << 48;
547 // falls-through
548 case 14:
549 k2 ^= ((long) data[index + 13] & 0xff) << 40;
550 // falls-through
551 case 13:
552 k2 ^= ((long) data[index + 12] & 0xff) << 32;
553 // falls-through
554 case 12:
555 k2 ^= ((long) data[index + 11] & 0xff) << 24;
556 // falls-through
557 case 11:
558 k2 ^= ((long) data[index + 10] & 0xff) << 16;
559 // falls-through
560 case 10:
561 k2 ^= ((long) data[index + 9] & 0xff) << 8;
562 // falls-through
563 case 9:
564 k2 ^= data[index + 8] & 0xff;
565 k2 *= C2;
566 k2 = Long.rotateLeft(k2, R3);
567 k2 *= C1;
568 h2 ^= k2;
569 // falls-through
570 case 8:
571 k1 ^= ((long) data[index + 7] & 0xff) << 56;
572 // falls-through
573 case 7:
574 k1 ^= ((long) data[index + 6] & 0xff) << 48;
575 // falls-through
576 case 6:
577 k1 ^= ((long) data[index + 5] & 0xff) << 40;
578 // falls-through
579 case 5:
580 k1 ^= ((long) data[index + 4] & 0xff) << 32;
581 // falls-through
582 case 4:
583 k1 ^= ((long) data[index + 3] & 0xff) << 24;
584 // falls-through
585 case 3:
586 k1 ^= ((long) data[index + 2] & 0xff) << 16;
587 // falls-through
588 case 2:
589 k1 ^= ((long) data[index + 1] & 0xff) << 8;
590 // falls-through
591 case 1:
592 k1 ^= data[index] & 0xff;
593 k1 *= C1;
594 k1 = Long.rotateLeft(k1, R1);
595 k1 *= C2;
596 h1 ^= k1;
597 }
598 // finalization
599 h1 ^= length;
600 h2 ^= length;
601
602 h1 += h2;
603 h2 += h1;
604
605 h1 = fmix64(h1);
606 h2 = fmix64(h2);
607
608 h1 += h2;
609 h2 += h1;
610 return new long[] { h1, h2 };
611 }
612
613 /**
614 * Generates 32-bit hash from the byte array with a default seed.
615 * This is a helper method that will produce the same result as:
616 *
617 * <pre>
618 * int offset = 0;
619 * int seed = 104729;
620 * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
621 * </pre>
622 *
623 * <p>
624 * This implementation contains a sign-extension bug in the finalization step of
625 * any bytes left over from dividing the length by 4. This manifests if any of these
626 * bytes are negative.
627 * </p>
628 *
629 * @param data The input byte array
630 * @return The 32-bit hash
631 * @see #hash32(byte[], int, int, int)
632 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
633 */
634 @Deprecated
635 public static int hash32(final byte[] data) {
636 return hash32(data, 0, data.length, DEFAULT_SEED);
637 }
638
639 /**
640 * Generates 32-bit hash from the byte array with the given length and a default seed.
641 * This is a helper method that will produce the same result as:
642 *
643 * <pre>
644 * int offset = 0;
645 * int seed = 104729;
646 * int hash = MurmurHash3.hash32(data, offset, length, seed);
647 * </pre>
648 *
649 * <p>
650 * This implementation contains a sign-extension bug in the finalization step of
651 * any bytes left over from dividing the length by 4. This manifests if any of these
652 * bytes are negative.
653 * </p>
654 *
655 * @param data The input byte array
656 * @param length The length of array
657 * @return The 32-bit hash
658 * @see #hash32(byte[], int, int, int)
659 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
660 */
661 @Deprecated
662 public static int hash32(final byte[] data, final int length) {
663 return hash32(data, length, DEFAULT_SEED);
664 }
665
666 /**
667 * Generates 32-bit hash from the byte array with the given length and seed. This is a
668 * helper method that will produce the same result as:
669 *
670 * <pre>
671 * int offset = 0;
672 * int hash = MurmurHash3.hash32(data, offset, length, seed);
673 * </pre>
674 *
675 * <p>
676 * This implementation contains a sign-extension bug in the finalization step of
677 * any bytes left over from dividing the length by 4. This manifests if any of these
678 * bytes are negative.
679 * </p>
680 *
681 * @param data The input byte array
682 * @param length The length of array
683 * @param seed The initial seed value
684 * @return The 32-bit hash
685 * @see #hash32(byte[], int, int, int)
686 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
687 */
688 @Deprecated
689 public static int hash32(final byte[] data, final int length, final int seed) {
690 return hash32(data, 0, length, seed);
691 }
692
693 /**
694 * Generates 32-bit hash from the byte array with the given offset, length and seed.
695 *
696 * <p>
697 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
698 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
699 * </p>
700 *
701 * <p>
702 * This implementation contains a sign-extension bug in the finalization step of
703 * any bytes left over from dividing the length by 4. This manifests if any of these
704 * bytes are negative.
705 * </p>
706 *
707 * @param data The input byte array
708 * @param offset The offset of data
709 * @param length The length of array
710 * @param seed The initial seed value
711 * @return The 32-bit hash
712 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
713 */
714 @Deprecated
715 public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
716 int hash = seed;
717 final int nblocks = length >> 2;
718 // body
719 for (int i = 0; i < nblocks; i++) {
720 final int index = offset + (i << 2);
721 final int k = MurmurHash.getLittleEndianInt(data, index);
722 hash = mix32(k, hash);
723 }
724 // tail
725 // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
726 final int index = offset + (nblocks << 2);
727 int k1 = 0;
728 switch (offset + length - index) {
729 case 3:
730 k1 ^= data[index + 2] << 16;
731 // falls-through
732 case 2:
733 k1 ^= data[index + 1] << 8;
734 // falls-through
735 case 1:
736 k1 ^= data[index];
737 // mix functions
738 k1 *= C1_32;
739 k1 = Integer.rotateLeft(k1, R1_32);
740 k1 *= C2_32;
741 hash ^= k1;
742 }
743 hash ^= length;
744 return fmix32(hash);
745 }
746
747 /**
748 * Generates 32-bit hash from a long with a default seed value.
749 * This is a helper method that will produce the same result as:
750 *
751 * <pre>
752 * int offset = 0;
753 * int seed = 104729;
754 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
755 * .putLong(data)
756 * .array(), offset, 8, seed);
757 * </pre>
758 *
759 * @param data The long to hash
760 * @return The 32-bit hash
761 * @see #hash32x86(byte[], int, int, int)
762 */
763 public static int hash32(final long data) {
764 return hash32(data, DEFAULT_SEED);
765 }
766
767 /**
768 * Generates 32-bit hash from a long with the given seed.
769 * This is a helper method that will produce the same result as:
770 *
771 * <pre>
772 * int offset = 0;
773 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
774 * .putLong(data)
775 * .array(), offset, 8, seed);
776 * </pre>
777 *
778 * @param data The long to hash
779 * @param seed The initial seed value
780 * @return The 32-bit hash
781 * @see #hash32x86(byte[], int, int, int)
782 */
783 public static int hash32(final long data, final int seed) {
784 int hash = seed;
785 final long r0 = Long.reverseBytes(data);
786
787 hash = mix32((int) r0, hash);
788 hash = mix32((int) (r0 >>> 32), hash);
789
790 hash ^= Long.BYTES;
791 return fmix32(hash);
792 }
793
794 /**
795 * Generates 32-bit hash from two longs with a default seed value.
796 * This is a helper method that will produce the same result as:
797 *
798 * <pre>
799 * int offset = 0;
800 * int seed = 104729;
801 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
802 * .putLong(data1)
803 * .putLong(data2)
804 * .array(), offset, 16, seed);
805 * </pre>
806 *
807 * @param data1 The first long to hash
808 * @param data2 The second long to hash
809 * @return The 32-bit hash
810 * @see #hash32x86(byte[], int, int, int)
811 */
812 public static int hash32(final long data1, final long data2) {
813 return hash32(data1, data2, DEFAULT_SEED);
814 }
815
816 /**
817 * Generates 32-bit hash from two longs with the given seed.
818 * This is a helper method that will produce the same result as:
819 *
820 * <pre>
821 * int offset = 0;
822 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
823 * .putLong(data1)
824 * .putLong(data2)
825 * .array(), offset, 16, seed);
826 * </pre>
827 *
828 * @param data1 The first long to hash
829 * @param data2 The second long to hash
830 * @param seed The initial seed value
831 * @return The 32-bit hash
832 * @see #hash32x86(byte[], int, int, int)
833 */
834 public static int hash32(final long data1, final long data2, final int seed) {
835 int hash = seed;
836 final long r0 = Long.reverseBytes(data1);
837 final long r1 = Long.reverseBytes(data2);
838
839 hash = mix32((int) r0, hash);
840 hash = mix32((int) (r0 >>> 32), hash);
841 hash = mix32((int) r1, hash);
842 hash = mix32((int) (r1 >>> 32), hash);
843
844 hash ^= Long.BYTES * 2;
845 return fmix32(hash);
846 }
847
848 /**
849 * Generates 32-bit hash from a string with a default seed.
850 * <p>
851 * Before 1.14 the string was converted using default encoding.
852 * Since 1.14 the string is converted to bytes using UTF-8 encoding.
853 * </p>
854 * This is a helper method that will produce the same result as:
855 *
856 * <pre>
857 * int offset = 0;
858 * int seed = 104729;
859 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
860 * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
861 * </pre>
862 *
863 * <p>
864 * This implementation contains a sign-extension bug in the finalization step of
865 * any bytes left over from dividing the length by 4. This manifests if any of these
866 * bytes are negative.
867 * </p>
868 *
869 * @param data The input string
870 * @return The 32-bit hash
871 * @see #hash32(byte[], int, int, int)
872 * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
873 * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
874 */
875 @Deprecated
876 public static int hash32(final String data) {
877 final byte[] bytes = StringUtils.getBytesUtf8(data);
878 return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
879 }
880
881 /**
882 * Generates 32-bit hash from the byte array with a seed of zero.
883 * This is a helper method that will produce the same result as:
884 *
885 * <pre>
886 * int offset = 0;
887 * int seed = 0;
888 * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
889 * </pre>
890 *
891 * @param data The input byte array
892 * @return The 32-bit hash
893 * @see #hash32x86(byte[], int, int, int)
894 * @since 1.14
895 */
896 public static int hash32x86(final byte[] data) {
897 return hash32x86(data, 0, data.length, 0);
898 }
899
900 /**
901 * Generates 32-bit hash from the byte array with the given offset, length and seed.
902 *
903 * <p>
904 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
905 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
906 * </p>
907 *
908 * @param data The input byte array
909 * @param offset The offset of data
910 * @param length The length of array
911 * @param seed The initial seed value
912 * @return The 32-bit hash
913 * @since 1.14
914 */
915 public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
916 int hash = seed;
917 final int nblocks = length >> 2;
918 // body
919 for (int i = 0; i < nblocks; i++) {
920 final int index = offset + (i << 2);
921 final int k = MurmurHash.getLittleEndianInt(data, index);
922 hash = mix32(k, hash);
923 }
924 // tail
925 final int index = offset + (nblocks << 2);
926 int k1 = 0;
927 switch (offset + length - index) {
928 case 3:
929 k1 ^= (data[index + 2] & 0xff) << 16;
930 // falls-through
931 case 2:
932 // falls-through
933 k1 ^= (data[index + 1] & 0xff) << 8;
934 // falls-through
935 case 1:
936 k1 ^= data[index] & 0xff;
937 // mix functions
938 k1 *= C1_32;
939 k1 = Integer.rotateLeft(k1, R1_32);
940 k1 *= C2_32;
941 hash ^= k1;
942 }
943 hash ^= length;
944 return fmix32(hash);
945 }
946
947 /**
948 * Generates 64-bit hash from a byte array with a default seed.
949 *
950 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
951 *
952 * <p>
953 * This is a Murmur3-like 64-bit variant.
954 * The method does not produce the same result as either half of the hash bytes from
955 * {@linkplain #hash128x64(byte[])} with the same byte data.
956 * This method will be removed in a future release.
957 * </p>
958 *
959 * <p>
960 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
961 * this result as the default seed is positive.
962 * </p>
963 *
964 * <p>
965 * This is a helper method that will produce the same result as:
966 * </p>
967 *
968 * <pre>
969 * int offset = 0;
970 * int seed = 104729;
971 * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
972 * </pre>
973 *
974 * @param data The input byte array
975 * @return The 64-bit hash
976 * @see #hash64(byte[], int, int, int)
977 * @deprecated Not part of the MurmurHash3 implementation.
978 * Use half of the hash bytes from {@link #hash128x64(byte[])}.
979 */
980 @Deprecated
981 public static long hash64(final byte[] data) {
982 return hash64(data, 0, data.length, DEFAULT_SEED);
983 }
984
985 /**
986 * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
987 *
988 * <p><strong>
989 * This is not part of the original MurmurHash3 {@code c++} implementation.
990 * </strong></p>
991 *
992 * <p>
993 * This is a Murmur3-like 64-bit variant.
994 * The method does not produce the same result as either half of the hash bytes from
995 * {@linkplain #hash128x64(byte[])} with the same byte data.
996 * This method will be removed in a future release.
997 * </p>
998 *
999 * <p>
1000 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1001 * this result as the default seed is positive.
1002 * </p>
1003 *
1004 * <p>
1005 * This is a helper method that will produce the same result as:
1006 * </p>
1007 *
1008 * <pre>
1009 * int seed = 104729;
1010 * long hash = MurmurHash3.hash64(data, offset, length, seed);
1011 * </pre>
1012 *
1013 * @param data The input byte array
1014 * @param offset The offset of data
1015 * @param length The length of array
1016 * @return The 64-bit hash
1017 * @see #hash64(byte[], int, int, int)
1018 * @deprecated Not part of the MurmurHash3 implementation.
1019 * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1020 */
1021 @Deprecated
1022 public static long hash64(final byte[] data, final int offset, final int length) {
1023 return hash64(data, offset, length, DEFAULT_SEED);
1024 }
1025
1026 /**
1027 * Generates 64-bit hash from a byte array with the given offset, length and seed.
1028 *
1029 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1030 *
1031 * <p>
1032 * This is a Murmur3-like 64-bit variant.
1033 * This method will be removed in a future release.
1034 * </p>
1035 *
1036 * <p>
1037 * This implementation contains a sign-extension bug in the seed initialization.
1038 * This manifests if the seed is negative.
1039 * </p>
1040 *
1041 * <p>
1042 * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1043 * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1044 * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1045 * the same value as the first or second 64-bits of the function
1046 * {@link #hash128(byte[], int, int, int)}.
1047 * </p>
1048 *
1049 * <p>
1050 * Use of this method is not advised. Use the first long returned from
1051 * {@link #hash128x64(byte[], int, int, int)}.
1052 * </p>
1053 *
1054 * @param data The input byte array
1055 * @param offset The offset of data
1056 * @param length The length of array
1057 * @param seed The initial seed value
1058 * @return The 64-bit hash
1059 * @deprecated Not part of the MurmurHash3 implementation.
1060 * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1061 */
1062 @Deprecated
1063 public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1064 // Note: This fails to apply masking using 0xffffffffL to the seed.
1065 long hash = seed;
1066 final int nblocks = length >> 3;
1067 // body
1068 for (int i = 0; i < nblocks; i++) {
1069 final int index = offset + (i << 3);
1070 long k = MurmurHash.getLittleEndianLong(data, index);
1071 // mix functions
1072 k *= C1;
1073 k = Long.rotateLeft(k, R1);
1074 k *= C2;
1075 hash ^= k;
1076 hash = Long.rotateLeft(hash, R2) * M + N1;
1077 }
1078 // tail
1079 long k1 = 0;
1080 final int index = offset + (nblocks << 3);
1081 switch (offset + length - index) {
1082 case 7:
1083 k1 ^= ((long) data[index + 6] & 0xff) << 48;
1084 // falls-through
1085 case 6:
1086 k1 ^= ((long) data[index + 5] & 0xff) << 40;
1087 // falls-through
1088 case 5:
1089 k1 ^= ((long) data[index + 4] & 0xff) << 32;
1090 // falls-through
1091 case 4:
1092 k1 ^= ((long) data[index + 3] & 0xff) << 24;
1093 // falls-through
1094 case 3:
1095 k1 ^= ((long) data[index + 2] & 0xff) << 16;
1096 // falls-through
1097 case 2:
1098 k1 ^= ((long) data[index + 1] & 0xff) << 8;
1099 // falls-through
1100 case 1:
1101 k1 ^= (long) data[index] & 0xff;
1102 k1 *= C1;
1103 k1 = Long.rotateLeft(k1, R1);
1104 k1 *= C2;
1105 hash ^= k1;
1106 }
1107 // finalization
1108 hash ^= length;
1109 return fmix64(hash);
1110 }
1111
1112 /**
1113 * Generates 64-bit hash from an int with a default seed.
1114 *
1115 * <p><strong>
1116 * This is not part of the original MurmurHash3 {@code c++} implementation.
1117 * </strong></p>
1118 *
1119 * <p>
1120 * This is a Murmur3-like 64-bit variant.
1121 * The method does not produce the same result as either half of the hash bytes from
1122 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1123 * This method will be removed in a future release.
1124 * </p>
1125 *
1126 * <p>
1127 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1128 * this result as the default seed is positive.
1129 * </p>
1130 *
1131 * <p>
1132 * This is a helper method that will produce the same result as:
1133 * </p>
1134 *
1135 * <pre>
1136 * int offset = 0;
1137 * int seed = 104729;
1138 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1139 * .putInt(data)
1140 * .array(), offset, 4, seed);
1141 * </pre>
1142 *
1143 * @param data The int to hash
1144 * @return The 64-bit hash
1145 * @see #hash64(byte[], int, int, int)
1146 * @deprecated Not part of the MurmurHash3 implementation.
1147 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1148 */
1149 @Deprecated
1150 public static long hash64(final int data) {
1151 long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1152 long hash = DEFAULT_SEED;
1153 k1 *= C1;
1154 k1 = Long.rotateLeft(k1, R1);
1155 k1 *= C2;
1156 hash ^= k1;
1157 // finalization
1158 hash ^= Integer.BYTES;
1159 return fmix64(hash);
1160 }
1161
1162 /**
1163 * Generates 64-bit hash from a long with a default seed.
1164 *
1165 * <p><strong>
1166 * This is not part of the original MurmurHash3 {@code c++} implementation.
1167 * </strong></p>
1168 *
1169 * <p>
1170 * This is a Murmur3-like 64-bit variant.
1171 * The method does not produce the same result as either half of the hash bytes from
1172 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1173 * This method will be removed in a future release.
1174 * </p>
1175 *
1176 * <p>
1177 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1178 * this result as the default seed is positive.
1179 * </p>
1180 *
1181 * <p>
1182 * This is a helper method that will produce the same result as:
1183 * </p>
1184 *
1185 * <pre>
1186 * int offset = 0;
1187 * int seed = 104729;
1188 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1189 * .putLong(data)
1190 * .array(), offset, 8, seed);
1191 * </pre>
1192 *
1193 * @param data The long to hash
1194 * @return The 64-bit hash
1195 * @see #hash64(byte[], int, int, int)
1196 * @deprecated Not part of the MurmurHash3 implementation.
1197 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1198 */
1199 @Deprecated
1200 public static long hash64(final long data) {
1201 long hash = DEFAULT_SEED;
1202 long k = Long.reverseBytes(data);
1203 // mix functions
1204 k *= C1;
1205 k = Long.rotateLeft(k, R1);
1206 k *= C2;
1207 hash ^= k;
1208 hash = Long.rotateLeft(hash, R2) * M + N1;
1209 // finalization
1210 hash ^= Long.BYTES;
1211 return fmix64(hash);
1212 }
1213
1214 /**
1215 * Generates 64-bit hash from a short with a default seed.
1216 *
1217 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1218 *
1219 * <p>
1220 * This is a Murmur3-like 64-bit variant.
1221 * The method does not produce the same result as either half of the hash bytes from
1222 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1223 * This method will be removed in a future release.
1224 * </p>
1225 *
1226 * <p>
1227 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1228 * this result as the default seed is positive.
1229 * </p>
1230 *
1231 * <p>
1232 * This is a helper method that will produce the same result as:
1233 * </p>
1234 *
1235 * <pre>
1236 * int offset = 0;
1237 * int seed = 104729;
1238 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1239 * .putShort(data)
1240 * .array(), offset, 2, seed);
1241 * </pre>
1242 *
1243 * @param data The short to hash
1244 * @return The 64-bit hash
1245 * @see #hash64(byte[], int, int, int)
1246 * @deprecated Not part of the MurmurHash3 implementation.
1247 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1248 */
1249 @Deprecated
1250 public static long hash64(final short data) {
1251 long hash = DEFAULT_SEED;
1252 long k1 = 0;
1253 k1 ^= ((long) data & 0xff) << 8;
1254 k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1255 k1 *= C1;
1256 k1 = Long.rotateLeft(k1, R1);
1257 k1 *= C2;
1258 hash ^= k1;
1259
1260 // finalization
1261 hash ^= Short.BYTES;
1262 return fmix64(hash);
1263 }
1264
1265 /**
1266 * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1267 *
1268 * @param k The data to add to the hash
1269 * @param hash The current hash
1270 * @return The new hash
1271 */
1272 private static int mix32(int k, int hash) {
1273 k *= C1_32;
1274 k = Integer.rotateLeft(k, R1_32);
1275 k *= C2_32;
1276 hash ^= k;
1277 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1278 }
1279
1280 /** No instance methods. */
1281 private MurmurHash3() {
1282 }
1283 }