View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import org.apache.commons.codec.binary.StringUtils;
21  
22  /**
23   * Implements the MurmurHash3 32-bit and 128-bit hash functions.
24   *
25   * <p>
26   * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
27   * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
28   * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
29   * </p>
30   *
31   * <p>
32   * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
33   * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
34   * </p>
35   *
36   * <p>
37   * This is public domain code with no copyrights. From home page of
38   * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
39   * </p>
40   *
41   * <blockquote>
42   * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
43   * code."
44   * </blockquote>
45   *
46   * <p>
47   * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
48   * That adaption contains a {@code hash64} method that is not part of the original
49   * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
50   * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
51   * </p>
52   *
53   * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
54   * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
55   *      code</a>
56   * @see <a href=
57   *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
58   *      Apache Hive Murmer3</a>
59   * @since 1.13
60   */
61  public final class MurmurHash3 {
62  
63      /**
64       * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
65       * hash computed.
66       *
67       * <p>
68       * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
69       * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
70       * </p>
71       *
72       * <p>
73       * This implementation contains a sign-extension bug in the finalization step of
74       * any bytes left over from dividing the length by 4. This manifests if any of these
75       * bytes are negative.
76       * </p>
77       *
78       * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
79       */
80      @Deprecated
81      public static class IncrementalHash32 extends IncrementalHash32x86 {
82  
83          /**
84           * Constructs a new instance.
85           */
86          public IncrementalHash32() {
87              // empty
88          }
89  
90          /**
91           * {@inheritDoc}
92           *
93           * <p>
94           * This implementation contains a sign-extension bug in the finalization step of
95           * any bytes left over from dividing the length by 4. This manifests if any of these
96           * bytes are negative.
97           * <p>
98           *
99           * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100          */
101         @Override
102         @Deprecated
103         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104             int result = hash;
105             // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106             int k1 = 0;
107             switch (unprocessedLength) {
108             case 3:
109                 k1 ^= unprocessed[2] << 16;
110                 // falls-through
111             case 2:
112                 k1 ^= unprocessed[1] << 8;
113                 // falls-through
114             case 1:
115                 k1 ^= unprocessed[0];
116                 // mix functions
117                 k1 *= C1_32;
118                 k1 = Integer.rotateLeft(k1, R1_32);
119                 k1 *= C2_32;
120                 result ^= k1;
121             }
122             // finalization
123             result ^= totalLen;
124             return fmix32(result);
125         }
126     }
127 
128     /**
129      * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130      * hash computed.
131      *
132      * <p>
133      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135      * </p>
136      *
137      * @since 1.14
138      */
139     public static class IncrementalHash32x86 {
140 
141         /** The size of byte blocks that are processed together. */
142         private static final int BLOCK_SIZE = 4;
143 
144         /**
145          * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146          * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147          * significant.
148          *
149          * @param b1 The first byte.
150          * @param b2 The second byte.
151          * @param b3 The third byte.
152          * @param b4 The fourth byte.
153          * @return The 32-bit integer.
154          */
155         private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156             return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157         }
158 
159         /** Up to 3 unprocessed bytes from input data. */
160         private final byte[] unprocessed = new byte[3];
161 
162         /** The number of unprocessed bytes in the tail data. */
163         private int unprocessedLength;
164 
165         /** The total number of input bytes added since the start. */
166         private int totalLen;
167 
168         /**
169          * The current running hash.
170          * This must be finalized to generate the 32-bit hash value.
171          */
172         private int hash;
173 
174         /**
175          * Constructs a new instance.
176          */
177         public IncrementalHash32x86() {
178             // empty
179         }
180 
181         /**
182          * Adds the byte array to the current incremental hash.
183          *
184          * @param data The input byte array.
185          * @param offset The offset of data.
186          * @param length The length of array.
187          */
188         public final void add(final byte[] data, final int offset, final int length) {
189             if (length <= 0) {
190                 // Nothing to add
191                 return;
192             }
193             totalLen += length;
194             // Process the bytes in blocks of 4.
195             // New bytes must be added to any current unprocessed bytes,
196             // then processed in blocks of 4 and the remaining bytes saved:
197             //
198             //    |--|---------------------------|--|
199             // unprocessed
200             //                main block
201             //                                remaining
202 
203             // Check if the unprocessed bytes and new bytes can fill a block of 4.
204             // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205             // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206             if (unprocessedLength + length - BLOCK_SIZE < 0) {
207                 // Not enough so add to the unprocessed bytes
208                 System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209                 unprocessedLength += length;
210                 return;
211             }
212             // Combine unprocessed bytes with new bytes.
213             final int newOffset;
214             final int newLength;
215             if (unprocessedLength > 0) {
216                 int k = -1;
217                 switch (unprocessedLength) {
218                 case 1:
219                     k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220                     break;
221                 case 2:
222                     k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223                     break;
224                 case 3:
225                     k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226                     break;
227                 default:
228                     throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229                 }
230                 hash = mix32(k, hash);
231                 // Update the offset and length
232                 final int consumed = BLOCK_SIZE - unprocessedLength;
233                 newOffset = offset + consumed;
234                 newLength = length - consumed;
235             } else {
236                 newOffset = offset;
237                 newLength = length;
238             }
239             // Main processing of blocks of 4 bytes
240             final int nblocks = newLength >> 2;
241 
242             for (int i = 0; i < nblocks; i++) {
243                 final int index = newOffset + (i << 2);
244                 final int k = MurmurHash.getLittleEndianInt(data, index);
245                 hash = mix32(k, hash);
246             }
247             // Save left-over unprocessed bytes
248             final int consumed = nblocks << 2;
249             unprocessedLength = newLength - consumed;
250             if (unprocessedLength != 0) {
251                 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252             }
253         }
254 
255         /**
256          * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257          * will generate the same hash value.
258          *
259          * @return The 32-bit hash
260          */
261         public final int end() {
262             // Allow calling end() again after adding no data to return the same result.
263             return finalise(hash, unprocessedLength, unprocessed, totalLen);
264         }
265 
266         /**
267          * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268          * and performing final mixing.
269          *
270          * @param hash The running hash.
271          * @param unprocessedLength The number of unprocessed bytes in the tail data.
272          * @param unprocessed Up to 3 unprocessed bytes from input data.
273          * @param totalLen The total number of input bytes added since the start.
274          * @return The 32-bit hash.
275          */
276         int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277             int result = hash;
278             int k1 = 0;
279             switch (unprocessedLength) {
280             case 3:
281                 k1 ^= (unprocessed[2] & 0xff) << 16;
282                 // falls-through
283             case 2:
284                 k1 ^= (unprocessed[1] & 0xff) << 8;
285                 // falls-through
286             case 1:
287                 k1 ^= unprocessed[0] & 0xff;
288                 // mix functions
289                 k1 *= C1_32;
290                 k1 = Integer.rotateLeft(k1, R1_32);
291                 k1 *= C2_32;
292                 result ^= k1;
293             }
294             // finalization
295             result ^= totalLen;
296             return fmix32(result);
297         }
298 
299         /**
300          * Starts a new incremental hash.
301          *
302          * @param seed The initial seed value.
303          */
304         public final void start(final int seed) {
305             // Reset
306             unprocessedLength = totalLen = 0;
307             hash = seed;
308         }
309     }
310 
311     /**
312      * A random number to use for a hash code.
313      *
314      * @deprecated This is not used internally and will be removed in a future release.
315      */
316     @Deprecated
317     public static final long NULL_HASHCODE = 2862933555777941757L;
318     /**
319      * A default seed to use for the murmur hash algorithm.
320      * Has the value {@code 104729}.
321      */
322     public static final int DEFAULT_SEED = 104729;
323     // Constants for 32-bit variant
324     private static final int C1_32 = 0xcc9e2d51;
325     private static final int C2_32 = 0x1b873593;
326     private static final int R1_32 = 15;
327     private static final int R2_32 = 13;
328 
329     private static final int M_32 = 5;
330     private static final int N_32 = 0xe6546b64;
331     // Constants for 128-bit variant
332     private static final long C1 = 0x87c37b91114253d5L;
333     private static final long C2 = 0x4cf5ad432745937fL;
334     private static final int R1 = 31;
335     private static final int R2 = 27;
336     private static final int R3 = 33;
337     private static final int M = 5;
338 
339     private static final int N1 = 0x52dce729;
340 
341     private static final int N2 = 0x38495ab5;
342 
343     /**
344      * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
345      *
346      * @param hash The current hash.
347      * @return The final hash.
348      */
349     private static int fmix32(int hash) {
350         hash ^= hash >>> 16;
351         hash *= 0x85ebca6b;
352         hash ^= hash >>> 13;
353         hash *= 0xc2b2ae35;
354         hash ^= hash >>> 16;
355         return hash;
356     }
357 
358     /**
359      * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
360      *
361      * @param hash The current hash.
362      * @return The final hash.
363      */
364     private static long fmix64(long hash) {
365         hash ^= hash >>> 33;
366         hash *= 0xff51afd7ed558ccdL;
367         hash ^= hash >>> 33;
368         hash *= 0xc4ceb9fe1a85ec53L;
369         hash ^= hash >>> 33;
370         return hash;
371     }
372 
373     /**
374      * Generates 128-bit hash from the byte array with a default seed.
375      * This is a helper method that will produce the same result as:
376      *
377      * <pre>
378      * int offset = 0;
379      * int seed = 104729;
380      * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
381      * </pre>
382      *
383      * <p>
384      * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
385      * this result as the default seed is positive.
386      * </p>
387      *
388      * @param data The input byte array.
389      * @return The 128-bit hash (2 longs).
390      * @see #hash128(byte[], int, int, int)
391      */
392     public static long[] hash128(final byte[] data) {
393         return hash128(data, 0, data.length, DEFAULT_SEED);
394     }
395 
396     /**
397      * Generates 128-bit hash from the byte array with the given offset, length and seed.
398      *
399      * <p>
400      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
401      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
402      * </p>
403      *
404      * <p>
405      * This implementation contains a sign-extension bug in the seed initialization.
406      * This manifests if the seed is negative.
407      * </p>
408      *
409      * @param data The input byte array.
410      * @param offset The first element of array.
411      * @param length The length of array.
412      * @param seed The initial seed value.
413      * @return The 128-bit hash (2 longs).
414      * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
415      */
416     @Deprecated
417     public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
418         // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
419         // to maintain behavioral compatibility with the original version.
420         // The implicit conversion to a long will extend a negative sign
421         // bit through the upper 32-bits of the long seed. These should be zero.
422         return hash128x64Internal(data, offset, length, seed);
423     }
424 
425     /**
426      * Generates 128-bit hash from a string with a default seed.
427      *
428      * <p>
429      * Before 1.14 the string was converted using default encoding. Since 1.14 the string is converted to bytes using UTF-8 encoding.
430      * </p>
431      * <p>
432      * This is a helper method that will produce the same result as:
433      * </p>
434      *
435      * <pre>
436      * int offset = 0;
437      * int seed = 104729;
438      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
439      * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
440      * </pre>
441      *
442      * <p>
443      * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect this result as the default seed is positive.
444      * </p>
445      *
446      * @param data The input String.
447      * @return The 128-bit hash (2 longs).
448      * @see #hash128(byte[], int, int, int)
449      * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from {@link String#getBytes(java.nio.charset.Charset)}.
450      */
451     @Deprecated
452     public static long[] hash128(final String data) {
453         final byte[] bytes = StringUtils.getBytesUtf8(data);
454         return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
455     }
456 
457     /**
458      * Generates 128-bit hash from the byte array with a seed of zero.
459      * This is a helper method that will produce the same result as:
460      *
461      * <pre>
462      * int offset = 0;
463      * int seed = 0;
464      * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
465      * </pre>
466      *
467      * @param data The input byte array.
468      * @return The 128-bit hash (2 longs).
469      * @see #hash128x64(byte[], int, int, int)
470      * @since 1.14
471      */
472     public static long[] hash128x64(final byte[] data) {
473         return hash128x64(data, 0, data.length, 0);
474     }
475 
476     /**
477      * Generates 128-bit hash from the byte array with the given offset, length and seed.
478      *
479      * <p>
480      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
481      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
482      * </p>
483      *
484      * @param data The input byte array.
485      * @param offset The first element of array.
486      * @param length The length of array.
487      * @param seed The initial seed value.
488      * @return The 128-bit hash (2 longs).
489      * @since 1.14
490      */
491     public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
492         // Use an unsigned 32-bit integer as the seed
493         return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
494     }
495 
496     /**
497      * Generates 128-bit hash from the byte array with the given offset, length and seed.
498      *
499      * <p>
500      * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
501      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
502      * </p>
503      *
504      * @param data The input byte array.
505      * @param offset The first element of array.
506      * @param length The length of array.
507      * @param seed The initial seed value.
508      * @return The 128-bit hash (2 longs).
509      */
510     private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
511         long h1 = seed;
512         long h2 = seed;
513         final int nblocks = length >> 4;
514         // body
515         for (int i = 0; i < nblocks; i++) {
516             final int index = offset + (i << 4);
517             long k1 = MurmurHash.getLittleEndianLong(data, index);
518             long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
519             // mix functions for k1
520             k1 *= C1;
521             k1 = Long.rotateLeft(k1, R1);
522             k1 *= C2;
523             h1 ^= k1;
524             h1 = Long.rotateLeft(h1, R2);
525             h1 += h2;
526             h1 = h1 * M + N1;
527             // mix functions for k2
528             k2 *= C2;
529             k2 = Long.rotateLeft(k2, R3);
530             k2 *= C1;
531             h2 ^= k2;
532             h2 = Long.rotateLeft(h2, R1);
533             h2 += h1;
534             h2 = h2 * M + N2;
535         }
536         // tail
537         long k1 = 0;
538         long k2 = 0;
539         final int index = offset + (nblocks << 4);
540         switch (offset + length - index) {
541         case 15:
542             k2 ^= ((long) data[index + 14] & 0xff) << 48;
543             // falls-through
544         case 14:
545             k2 ^= ((long) data[index + 13] & 0xff) << 40;
546             // falls-through
547         case 13:
548             k2 ^= ((long) data[index + 12] & 0xff) << 32;
549             // falls-through
550         case 12:
551             k2 ^= ((long) data[index + 11] & 0xff) << 24;
552             // falls-through
553         case 11:
554             k2 ^= ((long) data[index + 10] & 0xff) << 16;
555             // falls-through
556         case 10:
557             k2 ^= ((long) data[index + 9] & 0xff) << 8;
558             // falls-through
559         case 9:
560             k2 ^= data[index + 8] & 0xff;
561             k2 *= C2;
562             k2 = Long.rotateLeft(k2, R3);
563             k2 *= C1;
564             h2 ^= k2;
565             // falls-through
566         case 8:
567             k1 ^= ((long) data[index + 7] & 0xff) << 56;
568             // falls-through
569         case 7:
570             k1 ^= ((long) data[index + 6] & 0xff) << 48;
571             // falls-through
572         case 6:
573             k1 ^= ((long) data[index + 5] & 0xff) << 40;
574             // falls-through
575         case 5:
576             k1 ^= ((long) data[index + 4] & 0xff) << 32;
577             // falls-through
578         case 4:
579             k1 ^= ((long) data[index + 3] & 0xff) << 24;
580             // falls-through
581         case 3:
582             k1 ^= ((long) data[index + 2] & 0xff) << 16;
583             // falls-through
584         case 2:
585             k1 ^= ((long) data[index + 1] & 0xff) << 8;
586             // falls-through
587         case 1:
588             k1 ^= data[index] & 0xff;
589             k1 *= C1;
590             k1 = Long.rotateLeft(k1, R1);
591             k1 *= C2;
592             h1 ^= k1;
593         }
594         // finalization
595         h1 ^= length;
596         h2 ^= length;
597 
598         h1 += h2;
599         h2 += h1;
600 
601         h1 = fmix64(h1);
602         h2 = fmix64(h2);
603 
604         h1 += h2;
605         h2 += h1;
606         return new long[] { h1, h2 };
607     }
608 
609     /**
610      * Generates 32-bit hash from the byte array with a default seed.
611      * This is a helper method that will produce the same result as:
612      *
613      * <pre>
614      * int offset = 0;
615      * int seed = 104729;
616      * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
617      * </pre>
618      *
619      * <p>
620      * This implementation contains a sign-extension bug in the finalization step of
621      * any bytes left over from dividing the length by 4. This manifests if any of these
622      * bytes are negative.
623      * </p>
624      *
625      * @param data The input byte array.
626      * @return The 32-bit hash.
627      * @see #hash32(byte[], int, int, int)
628      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
629      */
630     @Deprecated
631     public static int hash32(final byte[] data) {
632         return hash32(data, 0, data.length, DEFAULT_SEED);
633     }
634 
635     /**
636      * Generates 32-bit hash from the byte array with the given length and a default seed.
637      * This is a helper method that will produce the same result as:
638      *
639      * <pre>
640      * int offset = 0;
641      * int seed = 104729;
642      * int hash = MurmurHash3.hash32(data, offset, length, seed);
643      * </pre>
644      *
645      * <p>
646      * This implementation contains a sign-extension bug in the finalization step of
647      * any bytes left over from dividing the length by 4. This manifests if any of these
648      * bytes are negative.
649      * </p>
650      *
651      * @param data The input byte array.
652      * @param length The length of array.
653      * @return The 32-bit hash.
654      * @see #hash32(byte[], int, int, int)
655      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
656      */
657     @Deprecated
658     public static int hash32(final byte[] data, final int length) {
659         return hash32(data, length, DEFAULT_SEED);
660     }
661 
662     /**
663      * Generates 32-bit hash from the byte array with the given length and seed. This is a
664      * helper method that will produce the same result as:
665      *
666      * <pre>
667      * int offset = 0;
668      * int hash = MurmurHash3.hash32(data, offset, length, seed);
669      * </pre>
670      *
671      * <p>
672      * This implementation contains a sign-extension bug in the finalization step of
673      * any bytes left over from dividing the length by 4. This manifests if any of these
674      * bytes are negative.
675      * </p>
676      *
677      * @param data The input byte array.
678      * @param length The length of array.
679      * @param seed The initial seed value.
680      * @return The 32-bit hash.
681      * @see #hash32(byte[], int, int, int)
682      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
683      */
684     @Deprecated
685     public static int hash32(final byte[] data, final int length, final int seed) {
686         return hash32(data, 0, length, seed);
687     }
688 
689     /**
690      * Generates 32-bit hash from the byte array with the given offset, length and seed.
691      *
692      * <p>
693      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
694      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
695      * </p>
696      *
697      * <p>
698      * This implementation contains a sign-extension bug in the finalization step of
699      * any bytes left over from dividing the length by 4. This manifests if any of these
700      * bytes are negative.
701      * </p>
702      *
703      * @param data The input byte array.
704      * @param offset The offset of data.
705      * @param length The length of array.
706      * @param seed The initial seed value.
707      * @return The 32-bit hash
708      * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
709      */
710     @Deprecated
711     public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
712         int hash = seed;
713         final int nblocks = length >> 2;
714         // body
715         for (int i = 0; i < nblocks; i++) {
716             final int index = offset + (i << 2);
717             final int k = MurmurHash.getLittleEndianInt(data, index);
718             hash = mix32(k, hash);
719         }
720         // tail
721         // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
722         final int index = offset + (nblocks << 2);
723         int k1 = 0;
724         switch (offset + length - index) {
725         case 3:
726             k1 ^= data[index + 2] << 16;
727             // falls-through
728         case 2:
729             k1 ^= data[index + 1] << 8;
730             // falls-through
731         case 1:
732             k1 ^= data[index];
733             // mix functions
734             k1 *= C1_32;
735             k1 = Integer.rotateLeft(k1, R1_32);
736             k1 *= C2_32;
737             hash ^= k1;
738         }
739         hash ^= length;
740         return fmix32(hash);
741     }
742 
743     /**
744      * Generates 32-bit hash from a long with a default seed value.
745      * This is a helper method that will produce the same result as:
746      *
747      * <pre>
748      * int offset = 0;
749      * int seed = 104729;
750      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
751      *                                            .putLong(data)
752      *                                            .array(), offset, 8, seed);
753      * </pre>
754      *
755      * @param data The long to hash.
756      * @return The 32-bit hash.
757      * @see #hash32x86(byte[], int, int, int)
758      */
759     public static int hash32(final long data) {
760         return hash32(data, DEFAULT_SEED);
761     }
762 
763     /**
764      * Generates 32-bit hash from a long with the given seed.
765      * This is a helper method that will produce the same result as:
766      *
767      * <pre>
768      * int offset = 0;
769      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
770      *                                            .putLong(data)
771      *                                            .array(), offset, 8, seed);
772      * </pre>
773      *
774      * @param data The long to hash.
775      * @param seed The initial seed value.
776      * @return The 32-bit hash.
777      * @see #hash32x86(byte[], int, int, int)
778      */
779     public static int hash32(final long data, final int seed) {
780         int hash = seed;
781         final long r0 = Long.reverseBytes(data);
782 
783         hash = mix32((int) r0, hash);
784         hash = mix32((int) (r0 >>> 32), hash);
785 
786         hash ^= Long.BYTES;
787         return fmix32(hash);
788     }
789 
790     /**
791      * Generates 32-bit hash from two longs with a default seed value.
792      * This is a helper method that will produce the same result as:
793      *
794      * <pre>
795      * int offset = 0;
796      * int seed = 104729;
797      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
798      *                                            .putLong(data1)
799      *                                            .putLong(data2)
800      *                                            .array(), offset, 16, seed);
801      * </pre>
802      *
803      * @param data1 The first long to hash.
804      * @param data2 The second long to hash.
805      * @return The 32-bit hash.
806      * @see #hash32x86(byte[], int, int, int)
807      */
808     public static int hash32(final long data1, final long data2) {
809         return hash32(data1, data2, DEFAULT_SEED);
810     }
811 
812     /**
813      * Generates 32-bit hash from two longs with the given seed.
814      * This is a helper method that will produce the same result as:
815      *
816      * <pre>
817      * int offset = 0;
818      * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
819      *                                            .putLong(data1)
820      *                                            .putLong(data2)
821      *                                            .array(), offset, 16, seed);
822      * </pre>
823      *
824      * @param data1 The first long to hash.
825      * @param data2 The second long to hash.
826      * @param seed The initial seed value.
827      * @return The 32-bit hash.
828      * @see #hash32x86(byte[], int, int, int)
829      */
830     public static int hash32(final long data1, final long data2, final int seed) {
831         int hash = seed;
832         final long r0 = Long.reverseBytes(data1);
833         final long r1 = Long.reverseBytes(data2);
834 
835         hash = mix32((int) r0, hash);
836         hash = mix32((int) (r0 >>> 32), hash);
837         hash = mix32((int) r1, hash);
838         hash = mix32((int) (r1 >>> 32), hash);
839 
840         hash ^= Long.BYTES * 2;
841         return fmix32(hash);
842     }
843 
844     /**
845      * Generates 32-bit hash from a string with a default seed.
846      * <p>
847      * Before 1.14 the string was converted using default encoding.
848      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
849      * </p>
850      * This is a helper method that will produce the same result as:
851      *
852      * <pre>
853      * int offset = 0;
854      * int seed = 104729;
855      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
856      * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
857      * </pre>
858      *
859      * <p>
860      * This implementation contains a sign-extension bug in the finalization step of
861      * any bytes left over from dividing the length by 4. This manifests if any of these
862      * bytes are negative.
863      * </p>
864      *
865      * @param data The input string
866      * @return The 32-bit hash
867      * @see #hash32(byte[], int, int, int)
868      * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
869      * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
870      */
871     @Deprecated
872     public static int hash32(final String data) {
873         final byte[] bytes = StringUtils.getBytesUtf8(data);
874         return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
875     }
876 
877     /**
878      * Generates 32-bit hash from the byte array with a seed of zero.
879      * This is a helper method that will produce the same result as:
880      *
881      * <pre>
882      * int offset = 0;
883      * int seed = 0;
884      * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
885      * </pre>
886      *
887      * @param data The input byte array.
888      * @return The 32-bit hash.
889      * @see #hash32x86(byte[], int, int, int)
890      * @since 1.14
891      */
892     public static int hash32x86(final byte[] data) {
893         return hash32x86(data, 0, data.length, 0);
894     }
895 
896     /**
897      * Generates 32-bit hash from the byte array with the given offset, length and seed.
898      *
899      * <p>
900      * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
901      * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
902      * </p>
903      *
904      * @param data The input byte array.
905      * @param offset The offset of data.
906      * @param length The length of array.
907      * @param seed The initial seed value.
908      * @return The 32-bit hash.
909      * @since 1.14
910      */
911     public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
912         int hash = seed;
913         final int nblocks = length >> 2;
914         // body
915         for (int i = 0; i < nblocks; i++) {
916             final int index = offset + (i << 2);
917             final int k = MurmurHash.getLittleEndianInt(data, index);
918             hash = mix32(k, hash);
919         }
920         // tail
921         final int index = offset + (nblocks << 2);
922         int k1 = 0;
923         switch (offset + length - index) {
924         case 3:
925             k1 ^= (data[index + 2] & 0xff) << 16;
926             // falls-through
927         case 2:
928             // falls-through
929             k1 ^= (data[index + 1] & 0xff) << 8;
930             // falls-through
931         case 1:
932             k1 ^= data[index] & 0xff;
933             // mix functions
934             k1 *= C1_32;
935             k1 = Integer.rotateLeft(k1, R1_32);
936             k1 *= C2_32;
937             hash ^= k1;
938         }
939         hash ^= length;
940         return fmix32(hash);
941     }
942 
943     /**
944      * Generates 64-bit hash from a byte array with a default seed.
945      *
946      * <p>
947      * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
948      * </p>
949      *
950      * <p>
951      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
952      * with the same byte data. This method will be removed in a future release.
953      * </p>
954      *
955      * <p>
956      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
957      * </p>
958      *
959      * <p>
960      * This is a helper method that will produce the same result as:
961      * </p>
962      *
963      * <pre>
964      *
965      * int offset = 0;
966      * int seed = 104729;
967      * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
968      * </pre>
969      *
970      * @param data The input byte array.
971      * @return The 64-bit hash.
972      * @see #hash64(byte[], int, int, int)
973      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])}.
974      */
975     @Deprecated
976     public static long hash64(final byte[] data) {
977         return hash64(data, 0, data.length, DEFAULT_SEED);
978     }
979 
980     /**
981      * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
982      *
983      * <p>
984      * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
985      * </p>
986      *
987      * <p>
988      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
989      * with the same byte data. This method will be removed in a future release.
990      * </p>
991      *
992      * <p>
993      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
994      * </p>
995      *
996      * <p>
997      * This is a helper method that will produce the same result as:
998      * </p>
999      *
1000      * <pre>
1001      *
1002      * int seed = 104729;
1003      * long hash = MurmurHash3.hash64(data, offset, length, seed);
1004      * </pre>
1005      *
1006      * @param data   The input byte array.
1007      * @param offset The offset of data.
1008      * @param length The length of array.
1009      * @return The 64-bit hash.
1010      * @see #hash64(byte[], int, int, int)
1011      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1012      */
1013     @Deprecated
1014     public static long hash64(final byte[] data, final int offset, final int length) {
1015         return hash64(data, offset, length, DEFAULT_SEED);
1016     }
1017 
1018     /**
1019      * Generates 64-bit hash from a byte array with the given offset, length and seed.
1020      *
1021      * <p>
1022      * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1023      * </p>
1024      *
1025      * <p>
1026      * This is a Murmur3-like 64-bit variant. This method will be removed in a future release.
1027      * </p>
1028      *
1029      * <p>
1030      * This implementation contains a sign-extension bug in the seed initialization. This manifests if the seed is negative.
1031      * </p>
1032      *
1033      * <p>
1034      * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks of data processed in the MurmurHash3
1035      * {@code MurmurHash3_x64_128} method. However the hash is not mixed with a hash chunk from the next 8 bytes of data. The method will not return the same
1036      * value as the first or second 64-bits of the function {@link #hash128(byte[], int, int, int)}.
1037      * </p>
1038      *
1039      * <p>
1040      * Use of this method is not advised. Use the first long returned from {@link #hash128x64(byte[], int, int, int)}.
1041      * </p>
1042      *
1043      * @param data   The input byte array.
1044      * @param offset The offset of data.
1045      * @param length The length of array.
1046      * @param seed   The initial seed value.
1047      * @return The 64-bit hash.
1048      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1049      */
1050     @Deprecated
1051     public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1052         // Note: This fails to apply masking using 0xffffffffL to the seed.
1053         long hash = seed;
1054         final int nblocks = length >> 3;
1055         // body
1056         for (int i = 0; i < nblocks; i++) {
1057             final int index = offset + (i << 3);
1058             long k = MurmurHash.getLittleEndianLong(data, index);
1059             // mix functions
1060             k *= C1;
1061             k = Long.rotateLeft(k, R1);
1062             k *= C2;
1063             hash ^= k;
1064             hash = Long.rotateLeft(hash, R2) * M + N1;
1065         }
1066         // tail
1067         long k1 = 0;
1068         final int index = offset + (nblocks << 3);
1069         switch (offset + length - index) {
1070         case 7:
1071             k1 ^= ((long) data[index + 6] & 0xff) << 48;
1072             // falls-through
1073         case 6:
1074             k1 ^= ((long) data[index + 5] & 0xff) << 40;
1075             // falls-through
1076         case 5:
1077             k1 ^= ((long) data[index + 4] & 0xff) << 32;
1078             // falls-through
1079         case 4:
1080             k1 ^= ((long) data[index + 3] & 0xff) << 24;
1081             // falls-through
1082         case 3:
1083             k1 ^= ((long) data[index + 2] & 0xff) << 16;
1084             // falls-through
1085         case 2:
1086             k1 ^= ((long) data[index + 1] & 0xff) << 8;
1087             // falls-through
1088         case 1:
1089             k1 ^= (long) data[index] & 0xff;
1090             k1 *= C1;
1091             k1 = Long.rotateLeft(k1, R1);
1092             k1 *= C2;
1093             hash ^= k1;
1094         }
1095         // finalization
1096         hash ^= length;
1097         return fmix64(hash);
1098     }
1099 
1100     /**
1101      * Generates 64-bit hash from an int with a default seed.
1102      *
1103      * <p>
1104      * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1105      * </p>
1106      *
1107      * <p>
1108      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1109      * with the same byte data from the {@code int}. This method will be removed in a future release.
1110      * </p>
1111      *
1112      * <p>
1113      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1114      * </p>
1115      *
1116      * <p>
1117      * This is a helper method that will produce the same result as:
1118      * </p>
1119      *
1120      * <pre>
1121      *
1122      * int offset = 0;
1123      * int seed = 104729;
1124      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4).putInt(data).array(), offset, 4, seed);
1125      * </pre>
1126      *
1127      * @param data The int to hash.
1128      * @return The 64-bit hash.
1129      * @see #hash64(byte[], int, int, int)
1130      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1131      */
1132     @Deprecated
1133     public static long hash64(final int data) {
1134         long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1135         long hash = DEFAULT_SEED;
1136         k1 *= C1;
1137         k1 = Long.rotateLeft(k1, R1);
1138         k1 *= C2;
1139         hash ^= k1;
1140         // finalization
1141         hash ^= Integer.BYTES;
1142         return fmix64(hash);
1143     }
1144 
1145     /**
1146      * Generates 64-bit hash from a long with a default seed.
1147      *
1148      * <p>
1149      * <strong> This is not part of the original MurmurHash3 {@code c++} implementation. </strong>
1150      * </p>
1151      *
1152      * <p>
1153      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1154      * with the same byte data from the {@code long}. This method will be removed in a future release.
1155      * </p>
1156      *
1157      * <p>
1158      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1159      * </p>
1160      *
1161      * <p>
1162      * This is a helper method that will produce the same result as:
1163      * </p>
1164      *
1165      * <pre>
1166      *
1167      * int offset = 0;
1168      * int seed = 104729;
1169      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8).putLong(data).array(), offset, 8, seed);
1170      * </pre>
1171      *
1172      * @param data The long to hash.
1173      * @return The 64-bit hash.
1174      * @see #hash64(byte[], int, int, int)
1175      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1176      */
1177     @Deprecated
1178     public static long hash64(final long data) {
1179         long hash = DEFAULT_SEED;
1180         long k = Long.reverseBytes(data);
1181         // mix functions
1182         k *= C1;
1183         k = Long.rotateLeft(k, R1);
1184         k *= C2;
1185         hash ^= k;
1186         hash = Long.rotateLeft(hash, R2) * M + N1;
1187         // finalization
1188         hash ^= Long.BYTES;
1189         return fmix64(hash);
1190     }
1191 
1192     /**
1193      * Generates 64-bit hash from a short with a default seed.
1194      *
1195      * <p>
1196      * <strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong>
1197      * </p>
1198      *
1199      * <p>
1200      * This is a Murmur3-like 64-bit variant. The method does not produce the same result as either half of the hash bytes from {@linkplain #hash128x64(byte[])}
1201      * with the same byte data from the {@code short}. This method will be removed in a future release.
1202      * </p>
1203      *
1204      * <p>
1205      * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect this result as the default seed is positive.
1206      * </p>
1207      *
1208      * <p>
1209      * This is a helper method that will produce the same result as:
1210      * </p>
1211      *
1212      * <pre>
1213      *
1214      * int offset = 0;
1215      * int seed = 104729;
1216      * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2).putShort(data).array(), offset, 2, seed);
1217      * </pre>
1218      *
1219      * @param data The short to hash.
1220      * @return The 64-bit hash.
1221      * @see #hash64(byte[], int, int, int)
1222      * @deprecated Not part of the MurmurHash3 implementation. Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the
1223      *             {@code short}.
1224      */
1225     @Deprecated
1226     public static long hash64(final short data) {
1227         long hash = DEFAULT_SEED;
1228         long k1 = 0;
1229         k1 ^= ((long) data & 0xff) << 8;
1230         k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1231         k1 *= C1;
1232         k1 = Long.rotateLeft(k1, R1);
1233         k1 *= C2;
1234         hash ^= k1;
1235         // finalization
1236         hash ^= Short.BYTES;
1237         return fmix64(hash);
1238     }
1239 
1240     /**
1241      * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1242      *
1243      * @param k The data to add to the hash.
1244      * @param hash The current hash.
1245      * @return The new hash.
1246      */
1247     private static int mix32(int k, int hash) {
1248         k *= C1_32;
1249         k = Integer.rotateLeft(k, R1_32);
1250         k *= C2_32;
1251         hash ^= k;
1252         return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1253     }
1254 
1255     /** No instance methods. */
1256     private MurmurHash3() {
1257     }
1258 }