View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import org.apache.commons.codec.binary.StringUtils;
21  
22  /**
23   * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24   *
25   * <p>
26   * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27   * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28   * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29   * </p>
30   *
31   * <p>
32   * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33   * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34   * </p>
35   *
36   * <p>
37   * This is public domain code with no copyrights. From home page of
38   * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39   * </p>
40   *
41   * <blockquote>
42   * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
43   * code."
44   * </blockquote>
45   *
46   * <p>
47   * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
48   * That adaption contains a {@code hash64} method that is not part of the original
49   * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
50   * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
51   * </p>
52   *
53   * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
54   * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
55   *      code</a>
56   * @see <a href=
57   *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
58   *      Apache Hive Murmer3</a>
59   * @since 1.13
60   */
61  public final class MurmurHash3 {
62  
63      /**
64       * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
65       * hash computed.
66       *
67       * <p>
68       * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
69       * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
70       * </p>
71       *
72       * <p>
73       * This implementation contains a sign-extension bug in the finalization step of
74       * any bytes left over from dividing the length by 4. This manifests if any of these
75       * bytes are negative.
76       * </p>
77       *
78       * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
79       */
80      @Deprecated
81      public static class IncrementalHash32 extends IncrementalHash32x86 {
82  
83          /**
84           * Constructs a new instance.
85           */
86          public IncrementalHash32() {
87              // empty
88          }
89  
90          /**
91           * {@inheritDoc}
92           *
93           * <p>
94           * This implementation contains a sign-extension bug in the finalization step of
95           * any bytes left over from dividing the length by 4. This manifests if any of these
96           * bytes are negative.
97           * <p>
98           *
99           * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100          */
101         @Override
102         @Deprecated
103         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104             int result = hash;
105             // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106             int k1 = 0;
107             switch (unprocessedLength) {
108             case 3:
109                 k1 ^= unprocessed[2] << 16;
110                 // falls-through
111             case 2:
112                 k1 ^= unprocessed[1] << 8;
113                 // falls-through
114             case 1:
115                 k1 ^= unprocessed[0];
116                 // mix functions
117                 k1 *= C1_32;
118                 k1 = Integer.rotateLeft(k1, R1_32);
119                 k1 *= C2_32;
120                 result ^= k1;
121             }
122             // finalization
123             result ^= totalLen;
124             return fmix32(result);
125         }
126     }
127 
128     /**
129      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130      * hash computed.
131      *
132      * <p>
133      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135      * </p>
136      *
137      * @since 1.14
138      */
139     public static class IncrementalHash32x86 {
140 
141         /** The size of byte blocks that are processed together. */
142         private static final int BLOCK_SIZE = 4;
143 
144         /**
145          * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146          * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147          * significant.
148          *
149          * @param b1 The first byte
150          * @param b2 The second byte
151          * @param b3 The third byte
152          * @param b4 The fourth byte
153          * @return The 32-bit integer
154          */
155         private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156             return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157         }
158 
159         /** Up to 3 unprocessed bytes from input data. */
160         private final byte[] unprocessed = new byte[3];
161 
162         /** The number of unprocessed bytes in the tail data. */
163         private int unprocessedLength;
164 
165         /** The total number of input bytes added since the start. */
166         private int totalLen;
167 
168         /**
169          * The current running hash.
170          * This must be finalized to generate the 32-bit hash value.
171          */
172         private int hash;
173 
174         /**
175          * Constructs a new instance.
176          */
177         public IncrementalHash32x86() {
178             // empty
179         }
180 
181         /**
182          * Adds the byte array to the current incremental hash.
183          *
184          * @param data The input byte array
185          * @param offset The offset of data
186          * @param length The length of array
187          */
188         public final void add(final byte[] data, final int offset, final int length) {
189             if (length <= 0) {
190                 // Nothing to add
191                 return;
192             }
193             totalLen += length;
194             // Process the bytes in blocks of 4.
195             // New bytes must be added to any current unprocessed bytes,
196             // then processed in blocks of 4 and the remaining bytes saved:
197             //
198             //    |--|---------------------------|--|
199             // unprocessed
200             //                main block
201             //                                remaining
202 
203             // Check if the unprocessed bytes and new bytes can fill a block of 4.
204             // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205             // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206             if (unprocessedLength + length - BLOCK_SIZE < 0) {
207                 // Not enough so add to the unprocessed bytes
208                 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209                 unprocessedLength += length;
210                 return;
211             }
212             // Combine unprocessed bytes with new bytes.
213             final int newOffset;
214             final int newLength;
215             if (unprocessedLength > 0) {
216                 int k = -1;
217                 switch (unprocessedLength) {
218                 case 1:
219                     k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220                     break;
221                 case 2:
222                     k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223                     break;
224                 case 3:
225                     k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226                     break;
227                 default:
228                     throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229                 }
230                 hash = mix32(k, hash);
231                 // Update the offset and length
232                 final int consumed = BLOCK_SIZE - unprocessedLength;
233                 newOffset = offset + consumed;
234                 newLength = length - consumed;
235             } else {
236                 newOffset = offset;
237                 newLength = length;
238             }
239             // Main processing of blocks of 4 bytes
240             final int nblocks = newLength >> 2;
241 
242             for (int i = 0; i < nblocks; i++) {
243                 final int index = newOffset + (i << 2);
244                 final int k = MurmurHash.getLittleEndianInt(data, index);
245                 hash = mix32(k, hash);
246             }
247             // Save left-over unprocessed bytes
248             final int consumed = nblocks << 2;
249             unprocessedLength = newLength - consumed;
250             if (unprocessedLength != 0) {
251                 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252             }
253         }
254 
255         /**
256          * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257          * will generate the same hash value.
258          *
259          * @return The 32-bit hash
260          */
261         public final int end() {
262             // Allow calling end() again after adding no data to return the same result.
263             return finalise(hash, unprocessedLength, unprocessed, totalLen);
264         }
265 
266         /**
267          * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268          * and performing final mixing.
269          *
270          * @param hash The running hash
271          * @param unprocessedLength The number of unprocessed bytes in the tail data.
272          * @param unprocessed Up to 3 unprocessed bytes from input data.
273          * @param totalLen The total number of input bytes added since the start.
274          * @return The 32-bit hash
275          */
276         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277             int result = hash;
278             int k1 = 0;
279             switch (unprocessedLength) {
280             case 3:
281                 k1 ^= (unprocessed[2] & 0xff) << 16;
282                 // falls-through
283             case 2:
284                 k1 ^= (unprocessed[1] & 0xff) << 8;
285                 // falls-through
286             case 1:
287                 k1 ^= unprocessed[0] & 0xff;
288                 // mix functions
289                 k1 *= C1_32;
290                 k1 = Integer.rotateLeft(k1, R1_32);
291                 k1 *= C2_32;
292                 result ^= k1;
293             }
294             // finalization
295             result ^= totalLen;
296             return fmix32(result);
297         }
298 
299         /**
300          * Starts a new incremental hash.
301          *
302          * @param seed The initial seed value
303          */
304         public final void start(final int seed) {
305             // Reset
306             unprocessedLength = totalLen = 0;
307             hash = seed;
308         }
309     }
310 
311     /**
312      * A random number to use for a hash code.
313      *
314      * @deprecated This is not used internally and will be removed in a future release.
315      */
316     @Deprecated
317     public static final long NULL_HASHCODE = 2862933555777941757L;
318     /**
319      * A default seed to use for the murmur hash algorithm.
320      * Has the value {@code 104729}.
321      */
322     public static final int DEFAULT_SEED = 104729;
323     // Constants for 32-bit variant
324     private static final int C1_32 = 0xcc9e2d51;
325     private static final int C2_32 = 0x1b873593;
326     private static final int R1_32 = 15;
327     private static final int R2_32 = 13;
328 
329     private static final int M_32 = 5;
330     private static final int N_32 = 0xe6546b64;
331     // Constants for 128-bit variant
332     private static final long C1 = 0x87c37b91114253d5L;
333     private static final long C2 = 0x4cf5ad432745937fL;
334     private static final int R1 = 31;
335     private static final int R2 = 27;
336     private static final int R3 = 33;
337     private static final int M = 5;
338 
339     private static final int N1 = 0x52dce729;
340 
341     private static final int N2 = 0x38495ab5;
342 
343     /**
344      * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
345      *
346      * @param hash The current hash
347      * @return The final hash
348      */
349     private static int fmix32(int hash) {
350         hash ^= hash >>> 16;
351         hash *= 0x85ebca6b;
352         hash ^= hash >>> 13;
353         hash *= 0xc2b2ae35;
354         hash ^= hash >>> 16;
355         return hash;
356     }
357 
358     /**
359      * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
360      *
361      * @param hash The current hash
362      * @return The final hash
363      */
364     private static long fmix64(long hash) {
365         hash ^= hash >>> 33;
366         hash *= 0xff51afd7ed558ccdL;
367         hash ^= hash >>> 33;
368         hash *= 0xc4ceb9fe1a85ec53L;
369         hash ^= hash >>> 33;
370         return hash;
371     }
372 
373     /**
374      * Generates 128-bit hash from the byte array with a default seed.
375      * This is a helper method that will produce the same result as:
376      *
377      * <pre>
378      * int offset = 0;
379      * int seed = 104729;
380      * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
381      * </pre>
382      *
383      * <p>
384      * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
385      * this result as the default seed is positive.
386      * </p>
387      *
388      * @param data The input byte array
389      * @return The 128-bit hash (2 longs)
390      * @see #hash128(byte[], int, int, int)
391      */
392     public static long[] hash128(final byte[] data) {
393         return hash128(data, 0, data.length, DEFAULT_SEED);
394     }
395 
396     /**
397      * Generates 128-bit hash from the byte array with the given offset, length and seed.
398      *
399      * <p>
400      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
401      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
402      * </p>
403      *
404      * <p>
405      * This implementation contains a sign-extension bug in the seed initialization.
406      * This manifests if the seed is negative.
407      * </p>
408      *
409      * @param data The input byte array
410      * @param offset The first element of array
411      * @param length The length of array
412      * @param seed The initial seed value
413      * @return The 128-bit hash (2 longs)
414      * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
415      */
416     @Deprecated
417     public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
418         // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
419         // to maintain behavioral compatibility with the original version.
420         // The implicit conversion to a long will extend a negative sign
421         // bit through the upper 32-bits of the long seed. These should be zero.
422         return hash128x64Internal(data, offset, length, seed);
423     }
424 
425     /**
426      * Generates 128-bit hash from a string with a default seed.
427      * <p>
428      * Before 1.14 the string was converted using default encoding.
429      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
430      * </p>
431      * <p>
432      * This is a helper method that will produce the same result as:
433      * </p>
434      *
435      * <pre>
436      * int offset = 0;
437      * int seed = 104729;
438      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
439      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
440      * </pre>
441      *
442      * <p>
443      * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
444      * this result as the default seed is positive.
445      * </p>
446      *
447      * @param data The input String
448      * @return The 128-bit hash (2 longs)
449      * @see #hash128(byte[], int, int, int)
450      * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
451      * {@link String#getBytes(java.nio.charset.Charset)}.
452      */
453     @Deprecated
454     public static long[] hash128(final String data) {
455         final byte[] bytes = StringUtils.getBytesUtf8(data);
456         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
457     }
458 
459     /**
460      * Generates 128-bit hash from the byte array with a seed of zero.
461      * This is a helper method that will produce the same result as:
462      *
463      * <pre>
464      * int offset = 0;
465      * int seed = 0;
466      * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
467      * </pre>
468      *
469      * @param data The input byte array
470      * @return The 128-bit hash (2 longs)
471      * @see #hash128x64(byte[], int, int, int)
472      * @since 1.14
473      */
474     public static long[] hash128x64(final byte[] data) {
475         return hash128x64(data, 0, data.length, 0);
476     }
477 
478     /**
479      * Generates 128-bit hash from the byte array with the given offset, length and seed.
480      *
481      * <p>
482      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
483      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
484      * </p>
485      *
486      * @param data The input byte array
487      * @param offset The first element of array
488      * @param length The length of array
489      * @param seed The initial seed value
490      * @return The 128-bit hash (2 longs)
491      * @since 1.14
492      */
493     public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
494         // Use an unsigned 32-bit integer as the seed
495         return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
496     }
497 
498     /**
499      * Generates 128-bit hash from the byte array with the given offset, length and seed.
500      *
501      * <p>
502      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
503      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
504      * </p>
505      *
506      * @param data The input byte array
507      * @param offset The first element of array
508      * @param length The length of array
509      * @param seed The initial seed value
510      * @return The 128-bit hash (2 longs)
511      */
512     private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
513         long h1 = seed;
514         long h2 = seed;
515         final int nblocks = length >> 4;
516         // body
517         for (int i = 0; i < nblocks; i++) {
518             final int index = offset + (i << 4);
519             long k1 = MurmurHash.getLittleEndianLong(data, index);
520             long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
521 
522             // mix functions for k1
523             k1 *= C1;
524             k1 = Long.rotateLeft(k1, R1);
525             k1 *= C2;
526             h1 ^= k1;
527             h1 = Long.rotateLeft(h1, R2);
528             h1 += h2;
529             h1 = h1 * M + N1;
530 
531             // mix functions for k2
532             k2 *= C2;
533             k2 = Long.rotateLeft(k2, R3);
534             k2 *= C1;
535             h2 ^= k2;
536             h2 = Long.rotateLeft(h2, R1);
537             h2 += h1;
538             h2 = h2 * M + N2;
539         }
540         // tail
541         long k1 = 0;
542         long k2 = 0;
543         final int index = offset + (nblocks << 4);
544         switch (offset + length - index) {
545         case 15:
546             k2 ^= ((long) data[index + 14] & 0xff) << 48;
547             // falls-through
548         case 14:
549             k2 ^= ((long) data[index + 13] & 0xff) << 40;
550             // falls-through
551         case 13:
552             k2 ^= ((long) data[index + 12] & 0xff) << 32;
553             // falls-through
554         case 12:
555             k2 ^= ((long) data[index + 11] & 0xff) << 24;
556             // falls-through
557         case 11:
558             k2 ^= ((long) data[index + 10] & 0xff) << 16;
559             // falls-through
560         case 10:
561             k2 ^= ((long) data[index + 9] & 0xff) << 8;
562             // falls-through
563         case 9:
564             k2 ^= data[index + 8] & 0xff;
565             k2 *= C2;
566             k2 = Long.rotateLeft(k2, R3);
567             k2 *= C1;
568             h2 ^= k2;
569             // falls-through
570         case 8:
571             k1 ^= ((long) data[index + 7] & 0xff) << 56;
572             // falls-through
573         case 7:
574             k1 ^= ((long) data[index + 6] & 0xff) << 48;
575             // falls-through
576         case 6:
577             k1 ^= ((long) data[index + 5] & 0xff) << 40;
578             // falls-through
579         case 5:
580             k1 ^= ((long) data[index + 4] & 0xff) << 32;
581             // falls-through
582         case 4:
583             k1 ^= ((long) data[index + 3] & 0xff) << 24;
584             // falls-through
585         case 3:
586             k1 ^= ((long) data[index + 2] & 0xff) << 16;
587             // falls-through
588         case 2:
589             k1 ^= ((long) data[index + 1] & 0xff) << 8;
590             // falls-through
591         case 1:
592             k1 ^= data[index] & 0xff;
593             k1 *= C1;
594             k1 = Long.rotateLeft(k1, R1);
595             k1 *= C2;
596             h1 ^= k1;
597         }
598         // finalization
599         h1 ^= length;
600         h2 ^= length;
601 
602         h1 += h2;
603         h2 += h1;
604 
605         h1 = fmix64(h1);
606         h2 = fmix64(h2);
607 
608         h1 += h2;
609         h2 += h1;
610         return new long[] { h1, h2 };
611     }
612 
613     /**
614      * Generates 32-bit hash from the byte array with a default seed.
615      * This is a helper method that will produce the same result as:
616      *
617      * <pre>
618      * int offset = 0;
619      * int seed = 104729;
620      * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
621      * </pre>
622      *
623      * <p>
624      * This implementation contains a sign-extension bug in the finalization step of
625      * any bytes left over from dividing the length by 4. This manifests if any of these
626      * bytes are negative.
627      * </p>
628      *
629      * @param data The input byte array
630      * @return The 32-bit hash
631      * @see #hash32(byte[], int, int, int)
632      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
633      */
634     @Deprecated
635     public static int hash32(final byte[] data) {
636         return hash32(data, 0, data.length, DEFAULT_SEED);
637     }
638 
639     /**
640      * Generates 32-bit hash from the byte array with the given length and a default seed.
641      * This is a helper method that will produce the same result as:
642      *
643      * <pre>
644      * int offset = 0;
645      * int seed = 104729;
646      * int hash = MurmurHash3.hash32(data, offset, length, seed);
647      * </pre>
648      *
649      * <p>
650      * This implementation contains a sign-extension bug in the finalization step of
651      * any bytes left over from dividing the length by 4. This manifests if any of these
652      * bytes are negative.
653      * </p>
654      *
655      * @param data The input byte array
656      * @param length The length of array
657      * @return The 32-bit hash
658      * @see #hash32(byte[], int, int, int)
659      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
660      */
661     @Deprecated
662     public static int hash32(final byte[] data, final int length) {
663         return hash32(data, length, DEFAULT_SEED);
664     }
665 
666     /**
667      * Generates 32-bit hash from the byte array with the given length and seed. This is a
668      * helper method that will produce the same result as:
669      *
670      * <pre>
671      * int offset = 0;
672      * int hash = MurmurHash3.hash32(data, offset, length, seed);
673      * </pre>
674      *
675      * <p>
676      * This implementation contains a sign-extension bug in the finalization step of
677      * any bytes left over from dividing the length by 4. This manifests if any of these
678      * bytes are negative.
679      * </p>
680      *
681      * @param data The input byte array
682      * @param length The length of array
683      * @param seed The initial seed value
684      * @return The 32-bit hash
685      * @see #hash32(byte[], int, int, int)
686      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
687      */
688     @Deprecated
689     public static int hash32(final byte[] data, final int length, final int seed) {
690         return hash32(data, 0, length, seed);
691     }
692 
693     /**
694      * Generates 32-bit hash from the byte array with the given offset, length and seed.
695      *
696      * <p>
697      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
698      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
699      * </p>
700      *
701      * <p>
702      * This implementation contains a sign-extension bug in the finalization step of
703      * any bytes left over from dividing the length by 4. This manifests if any of these
704      * bytes are negative.
705      * </p>
706      *
707      * @param data The input byte array
708      * @param offset The offset of data
709      * @param length The length of array
710      * @param seed The initial seed value
711      * @return The 32-bit hash
712      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
713      */
714     @Deprecated
715     public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
716         int hash = seed;
717         final int nblocks = length >> 2;
718         // body
719         for (int i = 0; i < nblocks; i++) {
720             final int index = offset + (i << 2);
721             final int k = MurmurHash.getLittleEndianInt(data, index);
722             hash = mix32(k, hash);
723         }
724         // tail
725         // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
726         final int index = offset + (nblocks << 2);
727         int k1 = 0;
728         switch (offset + length - index) {
729         case 3:
730             k1 ^= data[index + 2] << 16;
731             // falls-through
732         case 2:
733             k1 ^= data[index + 1] << 8;
734             // falls-through
735         case 1:
736             k1 ^= data[index];
737             // mix functions
738             k1 *= C1_32;
739             k1 = Integer.rotateLeft(k1, R1_32);
740             k1 *= C2_32;
741             hash ^= k1;
742         }
743         hash ^= length;
744         return fmix32(hash);
745     }
746 
747     /**
748      * Generates 32-bit hash from a long with a default seed value.
749      * This is a helper method that will produce the same result as:
750      *
751      * <pre>
752      * int offset = 0;
753      * int seed = 104729;
754      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
755      *                                            .putLong(data)
756      *                                            .array(), offset, 8, seed);
757      * </pre>
758      *
759      * @param data The long to hash
760      * @return The 32-bit hash
761      * @see #hash32x86(byte[], int, int, int)
762      */
763     public static int hash32(final long data) {
764         return hash32(data, DEFAULT_SEED);
765     }
766 
767     /**
768      * Generates 32-bit hash from a long with the given seed.
769      * This is a helper method that will produce the same result as:
770      *
771      * <pre>
772      * int offset = 0;
773      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
774      *                                            .putLong(data)
775      *                                            .array(), offset, 8, seed);
776      * </pre>
777      *
778      * @param data The long to hash
779      * @param seed The initial seed value
780      * @return The 32-bit hash
781      * @see #hash32x86(byte[], int, int, int)
782      */
783     public static int hash32(final long data, final int seed) {
784         int hash = seed;
785         final long r0 = Long.reverseBytes(data);
786 
787         hash = mix32((int) r0, hash);
788         hash = mix32((int) (r0 >>> 32), hash);
789 
790         hash ^= Long.BYTES;
791         return fmix32(hash);
792     }
793 
794     /**
795      * Generates 32-bit hash from two longs with a default seed value.
796      * This is a helper method that will produce the same result as:
797      *
798      * <pre>
799      * int offset = 0;
800      * int seed = 104729;
801      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
802      *                                            .putLong(data1)
803      *                                            .putLong(data2)
804      *                                            .array(), offset, 16, seed);
805      * </pre>
806      *
807      * @param data1 The first long to hash
808      * @param data2 The second long to hash
809      * @return The 32-bit hash
810      * @see #hash32x86(byte[], int, int, int)
811      */
812     public static int hash32(final long data1, final long data2) {
813         return hash32(data1, data2, DEFAULT_SEED);
814     }
815 
816     /**
817      * Generates 32-bit hash from two longs with the given seed.
818      * This is a helper method that will produce the same result as:
819      *
820      * <pre>
821      * int offset = 0;
822      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
823      *                                            .putLong(data1)
824      *                                            .putLong(data2)
825      *                                            .array(), offset, 16, seed);
826      * </pre>
827      *
828      * @param data1 The first long to hash
829      * @param data2 The second long to hash
830      * @param seed The initial seed value
831      * @return The 32-bit hash
832      * @see #hash32x86(byte[], int, int, int)
833      */
834     public static int hash32(final long data1, final long data2, final int seed) {
835         int hash = seed;
836         final long r0 = Long.reverseBytes(data1);
837         final long r1 = Long.reverseBytes(data2);
838 
839         hash = mix32((int) r0, hash);
840         hash = mix32((int) (r0 >>> 32), hash);
841         hash = mix32((int) r1, hash);
842         hash = mix32((int) (r1 >>> 32), hash);
843 
844         hash ^= Long.BYTES * 2;
845         return fmix32(hash);
846     }
847 
848     /**
849      * Generates 32-bit hash from a string with a default seed.
850      * <p>
851      * Before 1.14 the string was converted using default encoding.
852      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
853      * </p>
854      * This is a helper method that will produce the same result as:
855      *
856      * <pre>
857      * int offset = 0;
858      * int seed = 104729;
859      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
860      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
861      * </pre>
862      *
863      * <p>
864      * This implementation contains a sign-extension bug in the finalization step of
865      * any bytes left over from dividing the length by 4. This manifests if any of these
866      * bytes are negative.
867      * </p>
868      *
869      * @param data The input string
870      * @return The 32-bit hash
871      * @see #hash32(byte[], int, int, int)
872      * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
873      * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
874      */
875     @Deprecated
876     public static int hash32(final String data) {
877         final byte[] bytes = StringUtils.getBytesUtf8(data);
878         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
879     }
880 
881     /**
882      * Generates 32-bit hash from the byte array with a seed of zero.
883      * This is a helper method that will produce the same result as:
884      *
885      * <pre>
886      * int offset = 0;
887      * int seed = 0;
888      * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
889      * </pre>
890      *
891      * @param data The input byte array
892      * @return The 32-bit hash
893      * @see #hash32x86(byte[], int, int, int)
894      * @since 1.14
895      */
896     public static int hash32x86(final byte[] data) {
897         return hash32x86(data, 0, data.length, 0);
898     }
899 
900     /**
901      * Generates 32-bit hash from the byte array with the given offset, length and seed.
902      *
903      * <p>
904      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
905      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
906      * </p>
907      *
908      * @param data The input byte array
909      * @param offset The offset of data
910      * @param length The length of array
911      * @param seed The initial seed value
912      * @return The 32-bit hash
913      * @since 1.14
914      */
915     public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
916         int hash = seed;
917         final int nblocks = length >> 2;
918         // body
919         for (int i = 0; i < nblocks; i++) {
920             final int index = offset + (i << 2);
921             final int k = MurmurHash.getLittleEndianInt(data, index);
922             hash = mix32(k, hash);
923         }
924         // tail
925         final int index = offset + (nblocks << 2);
926         int k1 = 0;
927         switch (offset + length - index) {
928         case 3:
929             k1 ^= (data[index + 2] & 0xff) << 16;
930             // falls-through
931         case 2:
932             // falls-through
933             k1 ^= (data[index + 1] & 0xff) << 8;
934             // falls-through
935         case 1:
936             k1 ^= data[index] & 0xff;
937             // mix functions
938             k1 *= C1_32;
939             k1 = Integer.rotateLeft(k1, R1_32);
940             k1 *= C2_32;
941             hash ^= k1;
942         }
943         hash ^= length;
944         return fmix32(hash);
945     }
946 
947     /**
948      * Generates 64-bit hash from a byte array with a default seed.
949      *
950      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
951      *
952      * <p>
953      * This is a Murmur3-like 64-bit variant.
954      * The method does not produce the same result as either half of the hash bytes from
955      * {@linkplain #hash128x64(byte[])} with the same byte data.
956      * This method will be removed in a future release.
957      * </p>
958      *
959      * <p>
960      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
961      * this result as the default seed is positive.
962      * </p>
963      *
964      * <p>
965      * This is a helper method that will produce the same result as:
966      * </p>
967      *
968      * <pre>
969      * int offset = 0;
970      * int seed = 104729;
971      * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
972      * </pre>
973      *
974      * @param data The input byte array
975      * @return The 64-bit hash
976      * @see #hash64(byte[], int, int, int)
977      * @deprecated Not part of the MurmurHash3 implementation.
978      * Use half of the hash bytes from {@link #hash128x64(byte[])}.
979      */
980     @Deprecated
981     public static long hash64(final byte[] data) {
982         return hash64(data, 0, data.length, DEFAULT_SEED);
983     }
984 
985     /**
986      * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
987      *
988      * <p><strong>
989      * This is not part of the original MurmurHash3 {@code c++} implementation.
990      * </strong></p>
991      *
992      * <p>
993      * This is a Murmur3-like 64-bit variant.
994      * The method does not produce the same result as either half of the hash bytes from
995      * {@linkplain #hash128x64(byte[])} with the same byte data.
996      * This method will be removed in a future release.
997      * </p>
998      *
999      * <p>
1000      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1001      * this result as the default seed is positive.
1002      * </p>
1003      *
1004      * <p>
1005      * This is a helper method that will produce the same result as:
1006      * </p>
1007      *
1008      * <pre>
1009      * int seed = 104729;
1010      * long hash = MurmurHash3.hash64(data, offset, length, seed);
1011      * </pre>
1012      *
1013      * @param data The input byte array
1014      * @param offset The offset of data
1015      * @param length The length of array
1016      * @return The 64-bit hash
1017      * @see #hash64(byte[], int, int, int)
1018      * @deprecated Not part of the MurmurHash3 implementation.
1019      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1020      */
1021     @Deprecated
1022     public static long hash64(final byte[] data, final int offset, final int length) {
1023         return hash64(data, offset, length, DEFAULT_SEED);
1024     }
1025 
1026     /**
1027      * Generates 64-bit hash from a byte array with the given offset, length and seed.
1028      *
1029      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1030      *
1031      * <p>
1032      * This is a Murmur3-like 64-bit variant.
1033      * This method will be removed in a future release.
1034      * </p>
1035      *
1036      * <p>
1037      * This implementation contains a sign-extension bug in the seed initialization.
1038      * This manifests if the seed is negative.
1039      * </p>
1040      *
1041      * <p>
1042      * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1043      * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1044      * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1045      * the same value as the first or second 64-bits of the function
1046      * {@link #hash128(byte[], int, int, int)}.
1047      * </p>
1048      *
1049      * <p>
1050      * Use of this method is not advised. Use the first long returned from
1051      * {@link #hash128x64(byte[], int, int, int)}.
1052      * </p>
1053      *
1054      * @param data The input byte array
1055      * @param offset The offset of data
1056      * @param length The length of array
1057      * @param seed The initial seed value
1058      * @return The 64-bit hash
1059      * @deprecated Not part of the MurmurHash3 implementation.
1060      * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1061      */
1062     @Deprecated
1063     public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1064         // Note: This fails to apply masking using 0xffffffffL to the seed.
1065         long hash = seed;
1066         final int nblocks = length >> 3;
1067         // body
1068         for (int i = 0; i < nblocks; i++) {
1069             final int index = offset + (i << 3);
1070             long k = MurmurHash.getLittleEndianLong(data, index);
1071             // mix functions
1072             k *= C1;
1073             k = Long.rotateLeft(k, R1);
1074             k *= C2;
1075             hash ^= k;
1076             hash = Long.rotateLeft(hash, R2) * M + N1;
1077         }
1078         // tail
1079         long k1 = 0;
1080         final int index = offset + (nblocks << 3);
1081         switch (offset + length - index) {
1082         case 7:
1083             k1 ^= ((long) data[index + 6] & 0xff) << 48;
1084             // falls-through
1085         case 6:
1086             k1 ^= ((long) data[index + 5] & 0xff) << 40;
1087             // falls-through
1088         case 5:
1089             k1 ^= ((long) data[index + 4] & 0xff) << 32;
1090             // falls-through
1091         case 4:
1092             k1 ^= ((long) data[index + 3] & 0xff) << 24;
1093             // falls-through
1094         case 3:
1095             k1 ^= ((long) data[index + 2] & 0xff) << 16;
1096             // falls-through
1097         case 2:
1098             k1 ^= ((long) data[index + 1] & 0xff) << 8;
1099             // falls-through
1100         case 1:
1101             k1 ^= (long) data[index] & 0xff;
1102             k1 *= C1;
1103             k1 = Long.rotateLeft(k1, R1);
1104             k1 *= C2;
1105             hash ^= k1;
1106         }
1107         // finalization
1108         hash ^= length;
1109         return fmix64(hash);
1110     }
1111 
1112     /**
1113      * Generates 64-bit hash from an int with a default seed.
1114      *
1115      * <p><strong>
1116      * This is not part of the original MurmurHash3 {@code c++} implementation.
1117      * </strong></p>
1118      *
1119      * <p>
1120      * This is a Murmur3-like 64-bit variant.
1121      * The method does not produce the same result as either half of the hash bytes from
1122      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1123      * This method will be removed in a future release.
1124      * </p>
1125      *
1126      * <p>
1127      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1128      * this result as the default seed is positive.
1129      * </p>
1130      *
1131      * <p>
1132      * This is a helper method that will produce the same result as:
1133      * </p>
1134      *
1135      * <pre>
1136      * int offset = 0;
1137      * int seed = 104729;
1138      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1139      *                                          .putInt(data)
1140      *                                          .array(), offset, 4, seed);
1141      * </pre>
1142      *
1143      * @param data The int to hash
1144      * @return The 64-bit hash
1145      * @see #hash64(byte[], int, int, int)
1146      * @deprecated Not part of the MurmurHash3 implementation.
1147      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1148      */
1149     @Deprecated
1150     public static long hash64(final int data) {
1151         long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1152         long hash = DEFAULT_SEED;
1153         k1 *= C1;
1154         k1 = Long.rotateLeft(k1, R1);
1155         k1 *= C2;
1156         hash ^= k1;
1157         // finalization
1158         hash ^= Integer.BYTES;
1159         return fmix64(hash);
1160     }
1161 
1162     /**
1163      * Generates 64-bit hash from a long with a default seed.
1164      *
1165      * <p><strong>
1166      * This is not part of the original MurmurHash3 {@code c++} implementation.
1167      * </strong></p>
1168      *
1169      * <p>
1170      * This is a Murmur3-like 64-bit variant.
1171      * The method does not produce the same result as either half of the hash bytes from
1172      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1173      * This method will be removed in a future release.
1174      * </p>
1175      *
1176      * <p>
1177      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1178      * this result as the default seed is positive.
1179      * </p>
1180      *
1181      * <p>
1182      * This is a helper method that will produce the same result as:
1183      * </p>
1184      *
1185      * <pre>
1186      * int offset = 0;
1187      * int seed = 104729;
1188      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1189      *                                          .putLong(data)
1190      *                                          .array(), offset, 8, seed);
1191      * </pre>
1192      *
1193      * @param data The long to hash
1194      * @return The 64-bit hash
1195      * @see #hash64(byte[], int, int, int)
1196      * @deprecated Not part of the MurmurHash3 implementation.
1197      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1198      */
1199     @Deprecated
1200     public static long hash64(final long data) {
1201         long hash = DEFAULT_SEED;
1202         long k = Long.reverseBytes(data);
1203         // mix functions
1204         k *= C1;
1205         k = Long.rotateLeft(k, R1);
1206         k *= C2;
1207         hash ^= k;
1208         hash = Long.rotateLeft(hash, R2) * M + N1;
1209         // finalization
1210         hash ^= Long.BYTES;
1211         return fmix64(hash);
1212     }
1213 
1214     /**
1215      * Generates 64-bit hash from a short with a default seed.
1216      *
1217      * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1218      *
1219      * <p>
1220      * This is a Murmur3-like 64-bit variant.
1221      * The method does not produce the same result as either half of the hash bytes from
1222      * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1223      * This method will be removed in a future release.
1224      * </p>
1225      *
1226      * <p>
1227      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1228      * this result as the default seed is positive.
1229      * </p>
1230      *
1231      * <p>
1232      * This is a helper method that will produce the same result as:
1233      * </p>
1234      *
1235      * <pre>
1236      * int offset = 0;
1237      * int seed = 104729;
1238      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1239      *                                          .putShort(data)
1240      *                                          .array(), offset, 2, seed);
1241      * </pre>
1242      *
1243      * @param data The short to hash
1244      * @return The 64-bit hash
1245      * @see #hash64(byte[], int, int, int)
1246      * @deprecated Not part of the MurmurHash3 implementation.
1247      * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1248      */
1249     @Deprecated
1250     public static long hash64(final short data) {
1251         long hash = DEFAULT_SEED;
1252         long k1 = 0;
1253         k1 ^= ((long) data & 0xff) << 8;
1254         k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1255         k1 *= C1;
1256         k1 = Long.rotateLeft(k1, R1);
1257         k1 *= C2;
1258         hash ^= k1;
1259 
1260         // finalization
1261         hash ^= Short.BYTES;
1262         return fmix64(hash);
1263     }
1264 
1265     /**
1266      * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1267      *
1268      * @param k The data to add to the hash
1269      * @param hash The current hash
1270      * @return The new hash
1271      */
1272     private static int mix32(int k, int hash) {
1273         k *= C1_32;
1274         k = Integer.rotateLeft(k, R1_32);
1275         k *= C2_32;
1276         hash ^= k;
1277         return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1278     }
1279 
1280     /** No instance methods. */
1281     private MurmurHash3() {
1282     }
1283 }