1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.digest;
19
20 import org.apache.commons.codec.binary.StringUtils;
21
22 /**
23 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24 *
25 * <p>
26 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29 * </p>
30 *
31 * <p>
32 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34 * </p>
35 *
36 * <p>
37 * This is public domain code with no copyrights. From home page of
38 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39 * </p>
40 *
41 * <blockquote>
42 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
43 * code."
44 * </blockquote>
45 *
46 * <p>
47 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
48 * That adaption contains a {@code hash64} method that is not part of the original
49 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
50 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
51 * </p>
52 *
53 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
54 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
55 * code</a>
56 * @see <a href=
57 * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
58 * Apache Hive Murmer3</a>
59 * @since 1.13
60 */
61 public final class MurmurHash3 {
62
63 /**
64 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
65 * hash computed.
66 *
67 * <p>
68 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
69 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
70 * </p>
71 *
72 * <p>
73 * This implementation contains a sign-extension bug in the finalization step of
74 * any bytes left over from dividing the length by 4. This manifests if any of these
75 * bytes are negative.
76 * </p>
77 *
78 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
79 */
80 @Deprecated
81 public static class IncrementalHash32 extends IncrementalHash32x86 {
82
83 /**
84 * Constructs a new instance.
85 */
86 public IncrementalHash32() {
87 // empty
88 }
89
90 /**
91 * {@inheritDoc}
92 *
93 * <p>
94 * This implementation contains a sign-extension bug in the finalization step of
95 * any bytes left over from dividing the length by 4. This manifests if any of these
96 * bytes are negative.
97 * <p>
98 *
99 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100 */
101 @Override
102 @Deprecated
103 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104 int result = hash;
105 // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106 int k1 = 0;
107 switch (unprocessedLength) {
108 case 3:
109 k1 ^= unprocessed[2] << 16;
110 // falls-through
111 case 2:
112 k1 ^= unprocessed[1] << 8;
113 // falls-through
114 case 1:
115 k1 ^= unprocessed[0];
116 // mix functions
117 k1 *= C1_32;
118 k1 = Integer.rotateLeft(k1, R1_32);
119 k1 *= C2_32;
120 result ^= k1;
121 }
122 // finalization
123 result ^= totalLen;
124 return fmix32(result);
125 }
126 }
127
128 /**
129 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130 * hash computed.
131 *
132 * <p>
133 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135 * </p>
136 *
137 * @since 1.14
138 */
139 public static class IncrementalHash32x86 {
140
141 /** The size of byte blocks that are processed together. */
142 private static final int BLOCK_SIZE = 4;
143
144 /**
145 * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146 * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147 * significant.
148 *
149 * @param b1 The first byte.
150 * @param b2 The second byte.
151 * @param b3 The third byte.
152 * @param b4 The fourth byte.
153 * @return The 32-bit integer.
154 */
155 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156 return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157 }
158
159 /** Up to 3 unprocessed bytes from input data. */
160 private final byte[] unprocessed = new byte[3];
161
162 /** The number of unprocessed bytes in the tail data. */
163 private int unprocessedLength;
164
165 /** The total number of input bytes added since the start. */
166 private int totalLen;
167
168 /**
169 * The current running hash.
170 * This must be finalized to generate the 32-bit hash value.
171 */
172 private int hash;
173
174 /**
175 * Constructs a new instance.
176 */
177 public IncrementalHash32x86() {
178 // empty
179 }
180
181 /**
182 * Adds the byte array to the current incremental hash.
183 *
184 * @param data The input byte array.
185 * @param offset The offset of data.
186 * @param length The length of array.
187 */
188 public final void add(final byte[] data, final int offset, final int length) {
189 if (length <= 0) {
190 // Nothing to add
191 return;
192 }
193 totalLen += length;
194 // Process the bytes in blocks of 4.
195 // New bytes must be added to any current unprocessed bytes,
196 // then processed in blocks of 4 and the remaining bytes saved:
197 //
198 // |--|---------------------------|--|
199 // unprocessed
200 // main block
201 // remaining
202
203 // Check if the unprocessed bytes and new bytes can fill a block of 4.
204 // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205 // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206 if (unprocessedLength + length - BLOCK_SIZE < 0) {
207 // Not enough so add to the unprocessed bytes
208 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209 unprocessedLength += length;
210 return;
211 }
212 // Combine unprocessed bytes with new bytes.
213 final int newOffset;
214 final int newLength;
215 if (unprocessedLength > 0) {
216 int k = -1;
217 switch (unprocessedLength) {
218 case 1:
219 k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220 break;
221 case 2:
222 k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223 break;
224 case 3:
225 k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226 break;
227 default:
228 throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229 }
230 hash = mix32(k, hash);
231 // Update the offset and length
232 final int consumed = BLOCK_SIZE - unprocessedLength;
233 newOffset = offset + consumed;
234 newLength = length - consumed;
235 } else {
236 newOffset = offset;
237 newLength = length;
238 }
239 // Main processing of blocks of 4 bytes
240 final int nblocks = newLength >> 2;
241
242 for (int i = 0; i < nblocks; i++) {
243 final int index = newOffset + (i << 2);
244 final int k = MurmurHash.getLittleEndianInt(data, index);
245 hash = mix32(k, hash);
246 }
247 // Save left-over unprocessed bytes
248 final int consumed = nblocks << 2;
249 unprocessedLength = newLength - consumed;
250 if (unprocessedLength != 0) {
251 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252 }
253 }
254
255 /**
256 * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257 * will generate the same hash value.
258 *
259 * @return The 32-bit hash
260 */
261 public final int end() {
262 // Allow calling end() again after adding no data to return the same result.
263 return finalise(hash, unprocessedLength, unprocessed, totalLen);
264 }
265
266 /**
267 * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268 * and performing final mixing.
269 *
270 * @param hash The running hash.
271 * @param unprocessedLength The number of unprocessed bytes in the tail data.
272 * @param unprocessed Up to 3 unprocessed bytes from input data.
273 * @param totalLen The total number of input bytes added since the start.
274 * @return The 32-bit hash.
275 */
276 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277 int result = hash;
278 int k1 = 0;
279 switch (unprocessedLength) {
280 case 3:
281 k1 ^= (unprocessed[2] & 0xff) << 16;
282 // falls-through
283 case 2:
284 k1 ^= (unprocessed[1] & 0xff) << 8;
285 // falls-through
286 case 1:
287 k1 ^= unprocessed[0] & 0xff;
288 // mix functions
289 k1 *= C1_32;
290 k1 = Integer.rotateLeft(k1, R1_32);
291 k1 *= C2_32;
292 result ^= k1;
293 }
294 // finalization
295 result ^= totalLen;
296 return fmix32(result);
297 }
298
299 /**
300 * Starts a new incremental hash.
301 *
302 * @param seed The initial seed value.
303 */
304 public final void start(final int seed) {
305 // Reset
306 unprocessedLength = totalLen = 0;
307 hash = seed;
308 }
309 }
310
311 /**
312 * A random number to use for a hash code.
313 *
314 * @deprecated This is not used internally and will be removed in a future release.
315 */
316 @Deprecated
317 public static final long NULL_HASHCODE = 2862933555777941757L;
318 /**
319 * A default seed to use for the murmur hash algorithm.
320 * Has the value {@code 104729}.
321 */
322 public static final int DEFAULT_SEED = 104729;
323 // Constants for 32-bit variant
324 private static final int C1_32 = 0xcc9e2d51;
325 private static final int C2_32 = 0x1b873593;
326 private static final int R1_32 = 15;
327 private static final int R2_32 = 13;
328
329 private static final int M_32 = 5;
330 private static final int N_32 = 0xe6546b64;
331 // Constants for 128-bit variant
332 private static final long C1 = 0x87c37b91114253d5L;
333 private static final long C2 = 0x4cf5ad432745937fL;
334 private static final int R1 = 31;
335 private static final int R2 = 27;
336 private static final int R3 = 33;
337 private static final int M = 5;
338
339 private static final int N1 = 0x52dce729;
340
341 private static final int N2 = 0x38495ab5;
342
343 /**
344 * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
345 *
346 * @param hash The current hash.
347 * @return The final hash.
348 */
349 private static int fmix32(int hash) {
350 hash ^= hash >>> 16;
351 hash *= 0x85ebca6b;
352 hash ^= hash >>> 13;
353 hash *= 0xc2b2ae35;
354 hash ^= hash >>> 16;
355 return hash;
356 }
357
358 /**
359 * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
360 *
361 * @param hash The current hash.
362 * @return The final hash.
363 */
364 private static long fmix64(long hash) {
365 hash ^= hash >>> 33;
366 hash *= 0xff51afd7ed558ccdL;
367 hash ^= hash >>> 33;
368 hash *= 0xc4ceb9fe1a85ec53L;
369 hash ^= hash >>> 33;
370 return hash;
371 }
372
373 /**
374 * Generates 128-bit hash from the byte array with a default seed.
375 * This is a helper method that will produce the same result as:
376 *
377 * <pre>
378 * int offset = 0;
379 * int seed = 104729;
380 * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
381 * </pre>
382 *
383 * <p>
384 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
385 * this result as the default seed is positive.
386 * </p>
387 *
388 * @param data The input byte array.
389 * @return The 128-bit hash (2 longs).
390 * @see #hash128(byte[], int, int, int)
391 */
392 public static long[] hash128(final byte[] data) {
393 return hash128(data, 0, data.length, DEFAULT_SEED);
394 }
395
396 /**
397 * Generates 128-bit hash from the byte array with the given offset, length and seed.
398 *
399 * <p>
400 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
401 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
402 * </p>
403 *
404 * <p>
405 * This implementation contains a sign-extension bug in the seed initialization.
406 * This manifests if the seed is negative.
407 * </p>
408 *
409 * @param data The input byte array.
410 * @param offset The first element of array.
411 * @param length The length of array.
412 * @param seed The initial seed value.
413 * @return The 128-bit hash (2 longs).
414 * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
415 */
416 @Deprecated
417 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
418 // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
419 // to maintain behavioral compatibility with the original version.
420 // The implicit conversion to a long will extend a negative sign
421 // bit through the upper 32-bits of the long seed. These should be zero.
422 return hash128x64Internal(data, offset, length, seed);
423 }
424
425 /**
426 * Generates 128-bit hash from a string with a default seed.
427 *
428 * <p>
429 * Before 1.14 the string was converted using default encoding. Since 1.14 the string is converted to bytes using UTF-8 encoding.
430 * </p>
431 * <p>
432 * This is a helper method that will produce the same result as:
433 * </p>
434 *
435 * <pre>
436 * int offset = 0;
437 * int seed = 104729;
438 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
439 * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
440 * </pre>
441 *
442 * <p>
443 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect this result as the default seed is positive.
444 * </p>
445 *
446 * @param data The input String.
447 * @return The 128-bit hash (2 longs).
448 * @see #hash128(byte[], int, int, int)
449 * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from {@link String#getBytes(java.nio.charset.Charset)}.
450 */
451 @Deprecated
452 public static long[] hash128(final String data) {
453 final byte[] bytes = StringUtils.getBytesUtf8(data);
454 return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
455 }
456
457 /**
458 * Generates 128-bit hash from the byte array with a seed of zero.
459 * This is a helper method that will produce the same result as:
460 *
461 * <pre>
462 * int offset = 0;
463 * int seed = 0;
464 * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
465 * </pre>
466 *
467 * @param data The input byte array.
468 * @return The 128-bit hash (2 longs).
469 * @see #hash128x64(byte[], int, int, int)
470 * @since 1.14
471 */
472 public static long[] hash128x64(final byte[] data) {
473 return hash128x64(data, 0, data.length, 0);
474 }
475
476 /**
477 * Generates 128-bit hash from the byte array with the given offset, length and seed.
478 *
479 * <p>
480 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
481 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
482 * </p>
483 *
484 * @param data The input byte array.
485 * @param offset The first element of array.
486 * @param length The length of array.
487 * @param seed The initial seed value.
488 * @return The 128-bit hash (2 longs).
489 * @since 1.14
490 */
491 public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
492 // Use an unsigned 32-bit integer as the seed
493 return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
494 }
495
496 /**
497 * Generates 128-bit hash from the byte array with the given offset, length and seed.
498 *
499 * <p>
500 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
501 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
502 * </p>
503 *
504 * @param data The input byte array.
505 * @param offset The first element of array.
506 * @param length The length of array.
507 * @param seed The initial seed value.
508 * @return The 128-bit hash (2 longs).
509 */
510 private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
511 long h1 = seed;
512 long h2 = seed;
513 final int nblocks = length >> 4;
514 // body
515 for (int i = 0; i < nblocks; i++) {
516 final int index = offset + (i << 4);
517 long k1 = MurmurHash.getLittleEndianLong(data, index);
518 long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
519 // mix functions for k1
520 k1 *= C1;
521 k1 = Long.rotateLeft(k1, R1);
522 k1 *= C2;
523 h1 ^= k1;
524 h1 = Long.rotateLeft(h1, R2);
525 h1 += h2;
526 h1 = h1 * M + N1;
527 // mix functions for k2
528 k2 *= C2;
529 k2 = Long.rotateLeft(k2, R3);
530 k2 *= C1;
531 h2 ^= k2;
532 h2 = Long.rotateLeft(h2, R1);
533 h2 += h1;
534 h2 = h2 * M + N2;
535 }
536 // tail
537 long k1 = 0;
538 long k2 = 0;
539 final int index = offset + (nblocks << 4);
540 switch (offset + length - index) {
541 case 15:
542 k2 ^= ((long) data[index + 14] & 0xff) << 48;
543 // falls-through
544 case 14:
545 k2 ^= ((long) data[index + 13] & 0xff) << 40;
546 // falls-through
547 case 13:
548 k2 ^= ((long) data[index + 12] & 0xff) << 32;
549 // falls-through
550 case 12:
551 k2 ^= ((long) data[index + 11] & 0xff) << 24;
552 // falls-through
553 case 11:
554 k2 ^= ((long) data[index + 10] & 0xff) << 16;
555 // falls-through
556 case 10:
557 k2 ^= ((long) data[index + 9] & 0xff) << 8;
558 // falls-through
559 case 9:
560 k2 ^= data[index + 8] & 0xff;
561 k2 *= C2;
562 k2 = Long.rotateLeft(k2, R3);
563 k2 *= C1;
564 h2 ^= k2;
565 // falls-through
566 case 8:
567 k1 ^= ((long) data[index + 7] & 0xff) << 56;
568 // falls-through
569 case 7:
570 k1 ^= ((long) data[index + 6] & 0xff) << 48;
571 // falls-through
572 case 6:
573 k1 ^= ((long) data[index + 5] & 0xff) << 40;
574 // falls-through
575 case 5:
576 k1 ^= ((long) data[index + 4] & 0xff) << 32;
577 // falls-through
578 case 4:
579 k1 ^= ((long) data[index + 3] & 0xff) << 24;
580 // falls-through
581 case 3:
582 k1 ^= ((long) data[index + 2] & 0xff) << 16;
583 // falls-through
584 case 2:
585 k1 ^= ((long) data[index + 1] & 0xff) << 8;
586 // falls-through
587 case 1:
588 k1 ^= data[index] & 0xff;
589 k1 *= C1;
590 k1 = Long.rotateLeft(k1, R1);
591 k1 *= C2;
592 h1 ^= k1;
593 }
594 // finalization
595 h1 ^= length;
596 h2 ^= length;
597
598 h1 += h2;
599 h2 += h1;
600
601 h1 = fmix64(h1);
602 h2 = fmix64(h2);
603
604 h1 += h2;
605 h2 += h1;
606 return new long[] { h1, h2 };
607 }
608
609 /**
610 * Generates 32-bit hash from the byte array with a default seed.
611 * This is a helper method that will produce the same result as:
612 *
613 * <pre>
614 * int offset = 0;
615 * int seed = 104729;
616 * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
617 * </pre>
618 *
619 * <p>
620 * This implementation contains a sign-extension bug in the finalization step of
621 * any bytes left over from dividing the length by 4. This manifests if any of these
622 * bytes are negative.
623 * </p>
624 *
625 * @param data The input byte array.
626 * @return The 32-bit hash.
627 * @see #hash32(byte[], int, int, int)
628 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
629 */
630 @Deprecated
631 public static int hash32(final byte[] data) {
632 return hash32(data, 0, data.length, DEFAULT_SEED);
633 }
634
635 /**
636 * Generates 32-bit hash from the byte array with the given length and a default seed.
637 * This is a helper method that will produce the same result as:
638 *
639 * <pre>
640 * int offset = 0;
641 * int seed = 104729;
642 * int hash = MurmurHash3.hash32(data, offset, length, seed);
643 * </pre>
644 *
645 * <p>
646 * This implementation contains a sign-extension bug in the finalization step of
647 * any bytes left over from dividing the length by 4. This manifests if any of these
648 * bytes are negative.
649 * </p>
650 *
651 * @param data The input byte array.
652 * @param length The length of array.
653 * @return The 32-bit hash.
654 * @see #hash32(byte[], int, int, int)
655 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
656 */
657 @Deprecated
658 public static int hash32(final byte[] data, final int length) {
659 return hash32(data, length, DEFAULT_SEED);
660 }
661
662 /**
663 * Generates 32-bit hash from the byte array with the given length and seed. This is a
664 * helper method that will produce the same result as:
665 *
666 * <pre>
667 * int offset = 0;
668 * int hash = MurmurHash3.hash32(data, offset, length, seed);
669 * </pre>
670 *
671 * <p>
672 * This implementation contains a sign-extension bug in the finalization step of
673 * any bytes left over from dividing the length by 4. This manifests if any of these
674 * bytes are negative.
675 * </p>
676 *
677 * @param data The input byte array.
678 * @param length The length of array.
679 * @param seed The initial seed value.
680 * @return The 32-bit hash.
681 * @see #hash32(byte[], int, int, int)
682 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
683 */
684 @Deprecated
685 public static int hash32(final byte[] data, final int length, final int seed) {
686 return hash32(data, 0, length, seed);
687 }
688
689 /**
690 * Generates 32-bit hash from the byte array with the given offset, length and seed.
691 *
692 * <p>
693 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
694 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
695 * </p>
696 *
697 * <p>
698 * This implementation contains a sign-extension bug in the finalization step of
699 * any bytes left over from dividing the length by 4. This manifests if any of these
700 * bytes are negative.
701 * </p>
702 *
703 * @param data The input byte array.
704 * @param offset The offset of data.
705 * @param length The length of array.
706 * @param seed The initial seed value.
707 * @return The 32-bit hash
708 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
709 */
710 @Deprecated
711 public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
712 int hash = seed;
713 final int nblocks = length >> 2;
714 // body
715 for (int i = 0; i < nblocks; i++) {
716 final int index = offset + (i << 2);
717 final int k = MurmurHash.getLittleEndianInt(data, index);
718 hash = mix32(k, hash);
719 }
720 // tail
721 // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
722 final int index = offset + (nblocks << 2);
723 int k1 = 0;
724 switch (offset + length - index) {
725 case 3:
726 k1 ^= data[index + 2] << 16;
727 // falls-through
728 case 2:
729 k1 ^= data[index + 1] << 8;
730 // falls-through
731 case 1:
732 k1 ^= data[index];
733 // mix functions
734 k1 *= C1_32;
735 k1 = Integer.rotateLeft(k1, R1_32);
736 k1 *= C2_32;
737 hash ^= k1;
738 }
739 hash ^= length;
740 return fmix32(hash);
741 }
742
743 /**
744 * Generates 32-bit hash from a long with a default seed value.
745 * This is a helper method that will produce the same result as:
746 *
747 * <pre>
748 * int offset = 0;
749 * int seed = 104729;
750 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
751 * .putLong(data)
752 * .array(), offset, 8, seed);
753 * </pre>
754 *
755 * @param data The long to hash.
756 * @return The 32-bit hash.
757 * @see #hash32x86(byte[], int, int, int)
758 */
759 public static int hash32(final long data) {
760 return hash32(data, DEFAULT_SEED);
761 }
762
763 /**
764 * Generates 32-bit hash from a long with the given seed.
765 * This is a helper method that will produce the same result as:
766 *
767 * <pre>
768 * int offset = 0;
769 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
770 * .putLong(data)
771 * .array(), offset, 8, seed);
772 * </pre>
773 *
774 * @param data The long to hash.
775 * @param seed The initial seed value.
776 * @return The 32-bit hash.
777 * @see #hash32x86(byte[], int, int, int)
778 */
779 public static int hash32(final long data, final int seed) {
780 int hash = seed;
781 final long r0 = Long.reverseBytes(data);
782
783 hash = mix32((int) r0, hash);
784 hash = mix32((int) (r0 >>> 32), hash);
785
786 hash ^= Long.BYTES;
787 return fmix32(hash);
788 }
789
790 /**
791 * Generates 32-bit hash from two longs with a default seed value.
792 * This is a helper method that will produce the same result as:
793 *
794 * <pre>
795 * int offset = 0;
796 * int seed = 104729;
797 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
798 * .putLong(data1)
799 * .putLong(data2)
800 * .array(), offset, 16, seed);
801 * </pre>
802 *
803 * @param data1 The first long to hash.
804 * @param data2 The second long to hash.
805 * @return The 32-bit hash.
806 * @see #hash32x86(byte[], int, int, int)
807 */
808 public static int hash32(final long data1, final long data2) {
809 return hash32(data1, data2, DEFAULT_SEED);
810 }
811
812 /**
813 * Generates 32-bit hash from two longs with the given seed.
814 * This is a helper method that will produce the same result as:
815 *
816 * <pre>
817 * int offset = 0;
818 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
819 * .putLong(data1)
820 * .putLong(data2)
821 * .array(), offset, 16, seed);
822 * </pre>
823 *
824 * @param data1 The first long to hash.
825 * @param data2 The second long to hash.
826 * @param seed The initial seed value.
827 * @return The 32-bit hash.
828 * @see #hash32x86(byte[], int, int, int)
829 */
830 public static int hash32(final long data1, final long data2, final int seed) {
831 int hash = seed;
832 final long r0 = Long.reverseBytes(data1);
833 final long r1 = Long.reverseBytes(data2);
834
835 hash = mix32((int) r0, hash);
836 hash = mix32((int) (r0 >>> 32), hash);
837 hash = mix32((int) r1, hash);
838 hash = mix32((int) (r1 >>> 32), hash);
839
840 hash ^= Long.BYTES * 2;
841 return fmix32(hash);
842 }
843
844 /**
845 * Generates 32-bit hash from a string with a default seed.
846 * <p>
847 * Before 1.14 the string was converted using default encoding.
848 * Since 1.14 the string is converted to bytes using UTF-8 encoding.
849 * </p>
850 * This is a helper method that will produce the same result as:
851 *
852 * <pre>
853 * int offset = 0;
854 * int seed = 104729;
855 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
856 * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
857 * </pre>
858 *
859 * <p>
860 * This implementation contains a sign-extension bug in the finalization step of
861 * any bytes left over from dividing the length by 4. This manifests if any of these
862 * bytes are negative.
863 * </p>
864 *
865 * @param data The input string
866 * @return The 32-bit hash
867 * @see #hash32(byte[], int, int, int)
868 * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
869 * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
870 */
871 @Deprecated
872 public static int hash32(final String data) {
873 final byte[] bytes = StringUtils.getBytesUtf8(data);
874 return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
875 }
876
877 /**
878 * Generates 32-bit hash from the byte array with a seed of zero.
879 * This is a helper method that will produce the same result as:
880 *
881 * <pre>
882 * int offset = 0;
883 * int seed = 0;
884 * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
885 * </pre>
886 *
887 * @param data The input byte array.
888 * @return The 32-bit hash.
889 * @see #hash32x86(byte[], int, int, int)
890 * @since 1.14
891 */
892 public static int hash32x86(final byte[] data) {
893 return hash32x86(data, 0, data.length, 0);
894 }
895
896 /**
897 * Generates 32-bit hash from the byte array with the given offset, length and seed.
898 *
899 * <p>
900 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
901 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
902 * </p>
903 *
904 * @param data The input byte array.
905 * @param offset The offset of data.
906 * @param length The length of array.
907 * @param seed The initial seed value.
908 * @return The 32-bit hash.
909 * @since 1.14
910 */
911 public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
912 int hash = seed;
913 final int nblocks = length >> 2;
914 // body
915 for (int i = 0; i < nblocks; i++) {
916 final int index = offset + (i << 2);
917 final int k = MurmurHash.getLittleEndianInt(data, index);
918 hash = mix32(k, hash);
919 }
920 // tail
921 final int index = offset + (nblocks << 2);
922 int k1 = 0;
923 switch (offset + length - index) {
924 case 3:
925 k1 ^= (data[index + 2] & 0xff) << 16;
926 // falls-through
927 case 2:
928 // falls-through
929 k1 ^= (data[index + 1] & 0xff) << 8;
930 // falls-through
931 case 1:
932 k1 ^= data[index] & 0xff;
933 // mix functions
934 k1 *= C1_32;
935 k1 = Integer.rotateLeft(k1, R1_32);
936 k1 *= C2_32;
937 hash ^= k1;
938 }
939 hash ^= length;
940 return fmix32(hash);
941 }
942
943 /**
944 * Generates 64-bit hash from a byte array with a default seed.
945 *
946 * <p>
947 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
948 * </p>
949 *
950 * <p>
951 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
952 * with the same byte data. This method will be removed in a future release.
953 * </p>
954 *
955 * <p>
956 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
957 * </p>
958 *
959 * <p>
960 * This is a helper method that will produce the same result as:
961 * </p>
962 *
963 * <pre>
964 *
965 * int offset = 0;
966 * int seed = 104729;
967 * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
968 * </pre>
969 *
970 * @param data The input byte array.
971 * @return The 64-bit hash.
972 * @see #hash64(byte[], int, int, int)
973 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])}.
974 */
975 @Deprecated
976 public static long hash64(final byte[] data) {
977 return hash64(data, 0, data.length, DEFAULT_SEED);
978 }
979
980 /**
981 * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
982 *
983 * <p>
984 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
985 * </p>
986 *
987 * <p>
988 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
989 * with the same byte data. This method will be removed in a future release.
990 * </p>
991 *
992 * <p>
993 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
994 * </p>
995 *
996 * <p>
997 * This is a helper method that will produce the same result as:
998 * </p>
999 *
1000 * <pre>
1001 *
1002 * int seed = 104729;
1003 * long hash = MurmurHash3.hash64(data, offset, length, seed);
1004 * </pre>
1005 *
1006 * @param data The input byte array.
1007 * @param offset The offset of data.
1008 * @param length The length of array.
1009 * @return The 64-bit hash.
1010 * @see #hash64(byte[], int, int, int)
1011 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1012 */
1013 @Deprecated
1014 public static long hash64(final byte[] data, final int offset, final int length) {
1015 return hash64(data, offset, length, DEFAULT_SEED);
1016 }
1017
1018 /**
1019 * Generates 64-bit hash from a byte array with the given offset, length and seed.
1020 *
1021 * <p>
1022 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1023 * </p>
1024 *
1025 * <p>
1026 * This is a Murmur3-like 64-bit variant. This method will be removed in a future release.
1027 * </p>
1028 *
1029 * <p>
1030 * This implementation contains a sign-extension bug in the seed initialization. This manifests if the seed is negative.
1031 * </p>
1032 *
1033 * <p>
1034 * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks of data processed in the MurmurHash3
1035 * {@code MurmurHash3_x64_128} method. However the hash is not mixed with a hash chunk from the next 8 bytes of data. The method will not return the same
1036 * value as the first or second 64-bits of the function {@link #hash128(byte[], int, int, int)}.
1037 * </p>
1038 *
1039 * <p>
1040 * Use of this method is not advised. Use the first long returned from {@link #hash128x64(byte[], int, int, int)}.
1041 * </p>
1042 *
1043 * @param data The input byte array.
1044 * @param offset The offset of data.
1045 * @param length The length of array.
1046 * @param seed The initial seed value.
1047 * @return The 64-bit hash.
1048 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1049 */
1050 @Deprecated
1051 public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1052 // Note: This fails to apply masking using 0xffffffffL to the seed.
1053 long hash = seed;
1054 final int nblocks = length >> 3;
1055 // body
1056 for (int i = 0; i < nblocks; i++) {
1057 final int index = offset + (i << 3);
1058 long k = MurmurHash.getLittleEndianLong(data, index);
1059 // mix functions
1060 k *= C1;
1061 k = Long.rotateLeft(k, R1);
1062 k *= C2;
1063 hash ^= k;
1064 hash = Long.rotateLeft(hash, R2) * M + N1;
1065 }
1066 // tail
1067 long k1 = 0;
1068 final int index = offset + (nblocks << 3);
1069 switch (offset + length - index) {
1070 case 7:
1071 k1 ^= ((long) data[index + 6] & 0xff) << 48;
1072 // falls-through
1073 case 6:
1074 k1 ^= ((long) data[index + 5] & 0xff) << 40;
1075 // falls-through
1076 case 5:
1077 k1 ^= ((long) data[index + 4] & 0xff) << 32;
1078 // falls-through
1079 case 4:
1080 k1 ^= ((long) data[index + 3] & 0xff) << 24;
1081 // falls-through
1082 case 3:
1083 k1 ^= ((long) data[index + 2] & 0xff) << 16;
1084 // falls-through
1085 case 2:
1086 k1 ^= ((long) data[index + 1] & 0xff) << 8;
1087 // falls-through
1088 case 1:
1089 k1 ^= (long) data[index] & 0xff;
1090 k1 *= C1;
1091 k1 = Long.rotateLeft(k1, R1);
1092 k1 *= C2;
1093 hash ^= k1;
1094 }
1095 // finalization
1096 hash ^= length;
1097 return fmix64(hash);
1098 }
1099
1100 /**
1101 * Generates 64-bit hash from an int with a default seed.
1102 *
1103 * <p>
1104 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1105 * </p>
1106 *
1107 * <p>
1108 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1109 * with the same byte data from the {@code int}. This method will be removed in a future release.
1110 * </p>
1111 *
1112 * <p>
1113 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1114 * </p>
1115 *
1116 * <p>
1117 * This is a helper method that will produce the same result as:
1118 * </p>
1119 *
1120 * <pre>
1121 *
1122 * int offset = 0;
1123 * int seed = 104729;
1124 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4).putInt(data).array(), offset, 4, seed);
1125 * </pre>
1126 *
1127 * @param data The int to hash.
1128 * @return The 64-bit hash.
1129 * @see #hash64(byte[], int, int, int)
1130 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1131 */
1132 @Deprecated
1133 public static long hash64(final int data) {
1134 long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1135 long hash = DEFAULT_SEED;
1136 k1 *= C1;
1137 k1 = Long.rotateLeft(k1, R1);
1138 k1 *= C2;
1139 hash ^= k1;
1140 // finalization
1141 hash ^= Integer.BYTES;
1142 return fmix64(hash);
1143 }
1144
1145 /**
1146 * Generates 64-bit hash from a long with a default seed.
1147 *
1148 * <p>
1149 * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1150 * </p>
1151 *
1152 * <p>
1153 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1154 * with the same byte data from the {@code long}. This method will be removed in a future release.
1155 * </p>
1156 *
1157 * <p>
1158 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1159 * </p>
1160 *
1161 * <p>
1162 * This is a helper method that will produce the same result as:
1163 * </p>
1164 *
1165 * <pre>
1166 *
1167 * int offset = 0;
1168 * int seed = 104729;
1169 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8).putLong(data).array(), offset, 8, seed);
1170 * </pre>
1171 *
1172 * @param data The long to hash.
1173 * @return The 64-bit hash.
1174 * @see #hash64(byte[], int, int, int)
1175 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1176 */
1177 @Deprecated
1178 public static long hash64(final long data) {
1179 long hash = DEFAULT_SEED;
1180 long k = Long.reverseBytes(data);
1181 // mix functions
1182 k *= C1;
1183 k = Long.rotateLeft(k, R1);
1184 k *= C2;
1185 hash ^= k;
1186 hash = Long.rotateLeft(hash, R2) * M + N1;
1187 // finalization
1188 hash ^= Long.BYTES;
1189 return fmix64(hash);
1190 }
1191
1192 /**
1193 * Generates 64-bit hash from a short with a default seed.
1194 *
1195 * <p>
1196 * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1197 * </p>
1198 *
1199 * <p>
1200 * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1201 * with the same byte data from the {@code short}. This method will be removed in a future release.
1202 * </p>
1203 *
1204 * <p>
1205 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1206 * </p>
1207 *
1208 * <p>
1209 * This is a helper method that will produce the same result as:
1210 * </p>
1211 *
1212 * <pre>
1213 *
1214 * int offset = 0;
1215 * int seed = 104729;
1216 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2).putShort(data).array(), offset, 2, seed);
1217 * </pre>
1218 *
1219 * @param data The short to hash.
1220 * @return The 64-bit hash.
1221 * @see #hash64(byte[], int, int, int)
1222 * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the
1223 * {@code short}.
1224 */
1225 @Deprecated
1226 public static long hash64(final short data) {
1227 long hash = DEFAULT_SEED;
1228 long k1 = 0;
1229 k1 ^= ((long) data & 0xff) << 8;
1230 k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1231 k1 *= C1;
1232 k1 = Long.rotateLeft(k1, R1);
1233 k1 *= C2;
1234 hash ^= k1;
1235 // finalization
1236 hash ^= Short.BYTES;
1237 return fmix64(hash);
1238 }
1239
1240 /**
1241 * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1242 *
1243 * @param k The data to add to the hash.
1244 * @param hash The current hash.
1245 * @return The new hash.
1246 */
1247 private static int mix32(int k, int hash) {
1248 k *= C1_32;
1249 k = Integer.rotateLeft(k, R1_32);
1250 k *= C2_32;
1251 hash ^= k;
1252 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1253 }
1254
1255 /** No instance methods. */
1256 private MurmurHash3() {
1257 }
1258 }