001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020import org.apache.commons.codec.binary.StringUtils;
021
022/**
023 * Implements the MurmurHash3 32-bit and 128-bit hash functions.
024 *
025 * <p>
026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic
027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not
028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes.
029 * </p>
030 *
031 * <p>
032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function
033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher.
034 * </p>
035 *
036 * <p>
037 * This is public domain code with no copyrights. From home page of
038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:
039 * </p>
040 *
041 * <blockquote>
042 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their
043 * code."
044 * </blockquote>
045 *
046 * <p>
047 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>.
048 * That adaption contains a {@code hash64} method that is not part of the original
049 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a
050 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes.
051 * </p>
052 *
053 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
054 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++
055 *      code</a>
056 * @see <a href=
057 *      "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java">
058 *      Apache Hive Murmer3</a>
059 * @since 1.13
060 */
061public final class MurmurHash3 {
062
063    /**
064     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
065     * hash computed.
066     *
067     * <p>
068     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
069     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
070     * </p>
071     *
072     * <p>
073     * This implementation contains a sign-extension bug in the finalization step of
074     * any bytes left over from dividing the length by 4. This manifests if any of these
075     * bytes are negative.
076     * </p>
077     *
078     * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
079     */
080    @Deprecated
081    public static class IncrementalHash32 extends IncrementalHash32x86 {
082
083        /**
084         * Constructs a new instance.
085         */
086        public IncrementalHash32() {
087            // empty
088        }
089
090        /**
091         * {@inheritDoc}
092         *
093         * <p>
094         * This implementation contains a sign-extension bug in the finalization step of
095         * any bytes left over from dividing the length by 4. This manifests if any of these
096         * bytes are negative.
097         * <p>
098         *
099         * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes.
100         */
101        @Override
102        @Deprecated
103        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
104            int result = hash;
105            // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
106            int k1 = 0;
107            switch (unprocessedLength) {
108            case 3:
109                k1 ^= unprocessed[2] << 16;
110                // falls-through
111            case 2:
112                k1 ^= unprocessed[1] << 8;
113                // falls-through
114            case 1:
115                k1 ^= unprocessed[0];
116                // mix functions
117                k1 *= C1_32;
118                k1 = Integer.rotateLeft(k1, R1_32);
119                k1 *= C2_32;
120                result ^= k1;
121            }
122            // finalization
123            result ^= totalLen;
124            return fmix32(result);
125        }
126    }
127
128    /**
129     * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new
130     * hash computed.
131     *
132     * <p>
133     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
134     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
135     * </p>
136     *
137     * @since 1.14
138     */
139    public static class IncrementalHash32x86 {
140
141        /** The size of byte blocks that are processed together. */
142        private static final int BLOCK_SIZE = 4;
143
144        /**
145         * Combines the bytes using an Or operation ({@code | } in a little-endian representation
146         * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most
147         * significant.
148         *
149         * @param b1 The first byte
150         * @param b2 The second byte
151         * @param b3 The third byte
152         * @param b4 The fourth byte
153         * @return The 32-bit integer
154         */
155        private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
156            return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24;
157        }
158
159        /** Up to 3 unprocessed bytes from input data. */
160        private final byte[] unprocessed = new byte[3];
161
162        /** The number of unprocessed bytes in the tail data. */
163        private int unprocessedLength;
164
165        /** The total number of input bytes added since the start. */
166        private int totalLen;
167
168        /**
169         * The current running hash.
170         * This must be finalized to generate the 32-bit hash value.
171         */
172        private int hash;
173
174        /**
175         * Constructs a new instance.
176         */
177        public IncrementalHash32x86() {
178            // empty
179        }
180
181        /**
182         * Adds the byte array to the current incremental hash.
183         *
184         * @param data The input byte array
185         * @param offset The offset of data
186         * @param length The length of array
187         */
188        public final void add(final byte[] data, final int offset, final int length) {
189            if (length <= 0) {
190                // Nothing to add
191                return;
192            }
193            totalLen += length;
194            // Process the bytes in blocks of 4.
195            // New bytes must be added to any current unprocessed bytes,
196            // then processed in blocks of 4 and the remaining bytes saved:
197            //
198            //    |--|---------------------------|--|
199            // unprocessed
200            //                main block
201            //                                remaining
202
203            // Check if the unprocessed bytes and new bytes can fill a block of 4.
204            // Make this overflow safe in the event that length is Integer.MAX_VALUE.
205            // Equivalent to: (unprocessedLength + length < BLOCK_SIZE)
206            if (unprocessedLength + length - BLOCK_SIZE < 0) {
207                // Not enough so add to the unprocessed bytes
208                System.arraycopy(data, offset, unprocessed, unprocessedLength, length);
209                unprocessedLength += length;
210                return;
211            }
212            // Combine unprocessed bytes with new bytes.
213            final int newOffset;
214            final int newLength;
215            if (unprocessedLength > 0) {
216                int k = -1;
217                switch (unprocessedLength) {
218                case 1:
219                    k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]);
220                    break;
221                case 2:
222                    k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]);
223                    break;
224                case 3:
225                    k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]);
226                    break;
227                default:
228                    throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength);
229                }
230                hash = mix32(k, hash);
231                // Update the offset and length
232                final int consumed = BLOCK_SIZE - unprocessedLength;
233                newOffset = offset + consumed;
234                newLength = length - consumed;
235            } else {
236                newOffset = offset;
237                newLength = length;
238            }
239            // Main processing of blocks of 4 bytes
240            final int nblocks = newLength >> 2;
241
242            for (int i = 0; i < nblocks; i++) {
243                final int index = newOffset + (i << 2);
244                final int k = MurmurHash.getLittleEndianInt(data, index);
245                hash = mix32(k, hash);
246            }
247            // Save left-over unprocessed bytes
248            final int consumed = nblocks << 2;
249            unprocessedLength = newLength - consumed;
250            if (unprocessedLength != 0) {
251                System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength);
252            }
253        }
254
255        /**
256         * Generates the 32-bit hash value. Repeat calls to this method with no additional data
257         * will generate the same hash value.
258         *
259         * @return The 32-bit hash
260         */
261        public final int end() {
262            // Allow calling end() again after adding no data to return the same result.
263            return finalise(hash, unprocessedLength, unprocessed, totalLen);
264        }
265
266        /**
267         * Finalizes the running hash to the output 32-bit hash by processing remaining bytes
268         * and performing final mixing.
269         *
270         * @param hash The running hash
271         * @param unprocessedLength The number of unprocessed bytes in the tail data.
272         * @param unprocessed Up to 3 unprocessed bytes from input data.
273         * @param totalLen The total number of input bytes added since the start.
274         * @return The 32-bit hash
275         */
276        int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) {
277            int result = hash;
278            int k1 = 0;
279            switch (unprocessedLength) {
280            case 3:
281                k1 ^= (unprocessed[2] & 0xff) << 16;
282                // falls-through
283            case 2:
284                k1 ^= (unprocessed[1] & 0xff) << 8;
285                // falls-through
286            case 1:
287                k1 ^= unprocessed[0] & 0xff;
288                // mix functions
289                k1 *= C1_32;
290                k1 = Integer.rotateLeft(k1, R1_32);
291                k1 *= C2_32;
292                result ^= k1;
293            }
294            // finalization
295            result ^= totalLen;
296            return fmix32(result);
297        }
298
299        /**
300         * Starts a new incremental hash.
301         *
302         * @param seed The initial seed value
303         */
304        public final void start(final int seed) {
305            // Reset
306            unprocessedLength = totalLen = 0;
307            hash = seed;
308        }
309    }
310
311    /**
312     * A random number to use for a hash code.
313     *
314     * @deprecated This is not used internally and will be removed in a future release.
315     */
316    @Deprecated
317    public static final long NULL_HASHCODE = 2862933555777941757L;
318    /**
319     * A default seed to use for the murmur hash algorithm.
320     * Has the value {@code 104729}.
321     */
322    public static final int DEFAULT_SEED = 104729;
323    // Constants for 32-bit variant
324    private static final int C1_32 = 0xcc9e2d51;
325    private static final int C2_32 = 0x1b873593;
326    private static final int R1_32 = 15;
327    private static final int R2_32 = 13;
328
329    private static final int M_32 = 5;
330    private static final int N_32 = 0xe6546b64;
331    // Constants for 128-bit variant
332    private static final long C1 = 0x87c37b91114253d5L;
333    private static final long C2 = 0x4cf5ad432745937fL;
334    private static final int R1 = 31;
335    private static final int R2 = 27;
336    private static final int R3 = 33;
337    private static final int M = 5;
338
339    private static final int N1 = 0x52dce729;
340
341    private static final int N2 = 0x38495ab5;
342
343    /**
344     * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
345     *
346     * @param hash The current hash
347     * @return The final hash
348     */
349    private static int fmix32(int hash) {
350        hash ^= hash >>> 16;
351        hash *= 0x85ebca6b;
352        hash ^= hash >>> 13;
353        hash *= 0xc2b2ae35;
354        hash ^= hash >>> 16;
355        return hash;
356    }
357
358    /**
359     * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}.
360     *
361     * @param hash The current hash
362     * @return The final hash
363     */
364    private static long fmix64(long hash) {
365        hash ^= hash >>> 33;
366        hash *= 0xff51afd7ed558ccdL;
367        hash ^= hash >>> 33;
368        hash *= 0xc4ceb9fe1a85ec53L;
369        hash ^= hash >>> 33;
370        return hash;
371    }
372
373    /**
374     * Generates 128-bit hash from the byte array with a default seed.
375     * This is a helper method that will produce the same result as:
376     *
377     * <pre>
378     * int offset = 0;
379     * int seed = 104729;
380     * int hash = MurmurHash3.hash128(data, offset, data.length, seed);
381     * </pre>
382     *
383     * <p>
384     * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
385     * this result as the default seed is positive.
386     * </p>
387     *
388     * @param data The input byte array
389     * @return The 128-bit hash (2 longs)
390     * @see #hash128(byte[], int, int, int)
391     */
392    public static long[] hash128(final byte[] data) {
393        return hash128(data, 0, data.length, DEFAULT_SEED);
394    }
395
396    /**
397     * Generates 128-bit hash from the byte array with the given offset, length and seed.
398     *
399     * <p>
400     * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
401     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
402     * </p>
403     *
404     * <p>
405     * This implementation contains a sign-extension bug in the seed initialization.
406     * This manifests if the seed is negative.
407     * </p>
408     *
409     * @param data The input byte array
410     * @param offset The first element of array
411     * @param length The length of array
412     * @param seed The initial seed value
413     * @return The 128-bit hash (2 longs)
414     * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization.
415     */
416    @Deprecated
417    public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
418        // Note: This deliberately fails to apply masking using 0xffffffffL to the seed
419        // to maintain behavioral compatibility with the original version.
420        // The implicit conversion to a long will extend a negative sign
421        // bit through the upper 32-bits of the long seed. These should be zero.
422        return hash128x64Internal(data, offset, length, seed);
423    }
424
425    /**
426     * Generates 128-bit hash from a string with a default seed.
427     * <p>
428     * Before 1.14 the string was converted using default encoding.
429     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
430     * </p>
431     * <p>
432     * This is a helper method that will produce the same result as:
433     * </p>
434     *
435     * <pre>
436     * int offset = 0;
437     * int seed = 104729;
438     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
439     * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed);
440     * </pre>
441     *
442     * <p>
443     * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect
444     * this result as the default seed is positive.
445     * </p>
446     *
447     * @param data The input String
448     * @return The 128-bit hash (2 longs)
449     * @see #hash128(byte[], int, int, int)
450     * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from
451     * {@link String#getBytes(java.nio.charset.Charset)}.
452     */
453    @Deprecated
454    public static long[] hash128(final String data) {
455        final byte[] bytes = StringUtils.getBytesUtf8(data);
456        return hash128(bytes, 0, bytes.length, DEFAULT_SEED);
457    }
458
459    /**
460     * Generates 128-bit hash from the byte array with a seed of zero.
461     * This is a helper method that will produce the same result as:
462     *
463     * <pre>
464     * int offset = 0;
465     * int seed = 0;
466     * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed);
467     * </pre>
468     *
469     * @param data The input byte array
470     * @return The 128-bit hash (2 longs)
471     * @see #hash128x64(byte[], int, int, int)
472     * @since 1.14
473     */
474    public static long[] hash128x64(final byte[] data) {
475        return hash128x64(data, 0, data.length, 0);
476    }
477
478    /**
479     * Generates 128-bit hash from the byte array with the given offset, length and seed.
480     *
481     * <p>
482     * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
483     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
484     * </p>
485     *
486     * @param data The input byte array
487     * @param offset The first element of array
488     * @param length The length of array
489     * @param seed The initial seed value
490     * @return The 128-bit hash (2 longs)
491     * @since 1.14
492     */
493    public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) {
494        // Use an unsigned 32-bit integer as the seed
495        return hash128x64Internal(data, offset, length, seed & 0xffffffffL);
496    }
497
498    /**
499     * Generates 128-bit hash from the byte array with the given offset, length and seed.
500     *
501     * <p>
502     * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128}
503     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
504     * </p>
505     *
506     * @param data The input byte array
507     * @param offset The first element of array
508     * @param length The length of array
509     * @param seed The initial seed value
510     * @return The 128-bit hash (2 longs)
511     */
512    private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) {
513        long h1 = seed;
514        long h2 = seed;
515        final int nblocks = length >> 4;
516        // body
517        for (int i = 0; i < nblocks; i++) {
518            final int index = offset + (i << 4);
519            long k1 = MurmurHash.getLittleEndianLong(data, index);
520            long k2 = MurmurHash.getLittleEndianLong(data, index + 8);
521
522            // mix functions for k1
523            k1 *= C1;
524            k1 = Long.rotateLeft(k1, R1);
525            k1 *= C2;
526            h1 ^= k1;
527            h1 = Long.rotateLeft(h1, R2);
528            h1 += h2;
529            h1 = h1 * M + N1;
530
531            // mix functions for k2
532            k2 *= C2;
533            k2 = Long.rotateLeft(k2, R3);
534            k2 *= C1;
535            h2 ^= k2;
536            h2 = Long.rotateLeft(h2, R1);
537            h2 += h1;
538            h2 = h2 * M + N2;
539        }
540        // tail
541        long k1 = 0;
542        long k2 = 0;
543        final int index = offset + (nblocks << 4);
544        switch (offset + length - index) {
545        case 15:
546            k2 ^= ((long) data[index + 14] & 0xff) << 48;
547            // falls-through
548        case 14:
549            k2 ^= ((long) data[index + 13] & 0xff) << 40;
550            // falls-through
551        case 13:
552            k2 ^= ((long) data[index + 12] & 0xff) << 32;
553            // falls-through
554        case 12:
555            k2 ^= ((long) data[index + 11] & 0xff) << 24;
556            // falls-through
557        case 11:
558            k2 ^= ((long) data[index + 10] & 0xff) << 16;
559            // falls-through
560        case 10:
561            k2 ^= ((long) data[index + 9] & 0xff) << 8;
562            // falls-through
563        case 9:
564            k2 ^= data[index + 8] & 0xff;
565            k2 *= C2;
566            k2 = Long.rotateLeft(k2, R3);
567            k2 *= C1;
568            h2 ^= k2;
569            // falls-through
570        case 8:
571            k1 ^= ((long) data[index + 7] & 0xff) << 56;
572            // falls-through
573        case 7:
574            k1 ^= ((long) data[index + 6] & 0xff) << 48;
575            // falls-through
576        case 6:
577            k1 ^= ((long) data[index + 5] & 0xff) << 40;
578            // falls-through
579        case 5:
580            k1 ^= ((long) data[index + 4] & 0xff) << 32;
581            // falls-through
582        case 4:
583            k1 ^= ((long) data[index + 3] & 0xff) << 24;
584            // falls-through
585        case 3:
586            k1 ^= ((long) data[index + 2] & 0xff) << 16;
587            // falls-through
588        case 2:
589            k1 ^= ((long) data[index + 1] & 0xff) << 8;
590            // falls-through
591        case 1:
592            k1 ^= data[index] & 0xff;
593            k1 *= C1;
594            k1 = Long.rotateLeft(k1, R1);
595            k1 *= C2;
596            h1 ^= k1;
597        }
598        // finalization
599        h1 ^= length;
600        h2 ^= length;
601
602        h1 += h2;
603        h2 += h1;
604
605        h1 = fmix64(h1);
606        h2 = fmix64(h2);
607
608        h1 += h2;
609        h2 += h1;
610        return new long[] { h1, h2 };
611    }
612
613    /**
614     * Generates 32-bit hash from the byte array with a default seed.
615     * This is a helper method that will produce the same result as:
616     *
617     * <pre>
618     * int offset = 0;
619     * int seed = 104729;
620     * int hash = MurmurHash3.hash32(data, offset, data.length, seed);
621     * </pre>
622     *
623     * <p>
624     * This implementation contains a sign-extension bug in the finalization step of
625     * any bytes left over from dividing the length by 4. This manifests if any of these
626     * bytes are negative.
627     * </p>
628     *
629     * @param data The input byte array
630     * @return The 32-bit hash
631     * @see #hash32(byte[], int, int, int)
632     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
633     */
634    @Deprecated
635    public static int hash32(final byte[] data) {
636        return hash32(data, 0, data.length, DEFAULT_SEED);
637    }
638
639    /**
640     * Generates 32-bit hash from the byte array with the given length and a default seed.
641     * This is a helper method that will produce the same result as:
642     *
643     * <pre>
644     * int offset = 0;
645     * int seed = 104729;
646     * int hash = MurmurHash3.hash32(data, offset, length, seed);
647     * </pre>
648     *
649     * <p>
650     * This implementation contains a sign-extension bug in the finalization step of
651     * any bytes left over from dividing the length by 4. This manifests if any of these
652     * bytes are negative.
653     * </p>
654     *
655     * @param data The input byte array
656     * @param length The length of array
657     * @return The 32-bit hash
658     * @see #hash32(byte[], int, int, int)
659     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
660     */
661    @Deprecated
662    public static int hash32(final byte[] data, final int length) {
663        return hash32(data, length, DEFAULT_SEED);
664    }
665
666    /**
667     * Generates 32-bit hash from the byte array with the given length and seed. This is a
668     * helper method that will produce the same result as:
669     *
670     * <pre>
671     * int offset = 0;
672     * int hash = MurmurHash3.hash32(data, offset, length, seed);
673     * </pre>
674     *
675     * <p>
676     * This implementation contains a sign-extension bug in the finalization step of
677     * any bytes left over from dividing the length by 4. This manifests if any of these
678     * bytes are negative.
679     * </p>
680     *
681     * @param data The input byte array
682     * @param length The length of array
683     * @param seed The initial seed value
684     * @return The 32-bit hash
685     * @see #hash32(byte[], int, int, int)
686     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
687     */
688    @Deprecated
689    public static int hash32(final byte[] data, final int length, final int seed) {
690        return hash32(data, 0, length, seed);
691    }
692
693    /**
694     * Generates 32-bit hash from the byte array with the given offset, length and seed.
695     *
696     * <p>
697     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
698     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
699     * </p>
700     *
701     * <p>
702     * This implementation contains a sign-extension bug in the finalization step of
703     * any bytes left over from dividing the length by 4. This manifests if any of these
704     * bytes are negative.
705     * </p>
706     *
707     * @param data The input byte array
708     * @param offset The offset of data
709     * @param length The length of array
710     * @param seed The initial seed value
711     * @return The 32-bit hash
712     * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes.
713     */
714    @Deprecated
715    public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
716        int hash = seed;
717        final int nblocks = length >> 2;
718        // body
719        for (int i = 0; i < nblocks; i++) {
720            final int index = offset + (i << 2);
721            final int k = MurmurHash.getLittleEndianInt(data, index);
722            hash = mix32(k, hash);
723        }
724        // tail
725        // Note: This fails to apply masking using 0xff to the 3 remaining bytes.
726        final int index = offset + (nblocks << 2);
727        int k1 = 0;
728        switch (offset + length - index) {
729        case 3:
730            k1 ^= data[index + 2] << 16;
731            // falls-through
732        case 2:
733            k1 ^= data[index + 1] << 8;
734            // falls-through
735        case 1:
736            k1 ^= data[index];
737            // mix functions
738            k1 *= C1_32;
739            k1 = Integer.rotateLeft(k1, R1_32);
740            k1 *= C2_32;
741            hash ^= k1;
742        }
743        hash ^= length;
744        return fmix32(hash);
745    }
746
747    /**
748     * Generates 32-bit hash from a long with a default seed value.
749     * This is a helper method that will produce the same result as:
750     *
751     * <pre>
752     * int offset = 0;
753     * int seed = 104729;
754     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
755     *                                            .putLong(data)
756     *                                            .array(), offset, 8, seed);
757     * </pre>
758     *
759     * @param data The long to hash
760     * @return The 32-bit hash
761     * @see #hash32x86(byte[], int, int, int)
762     */
763    public static int hash32(final long data) {
764        return hash32(data, DEFAULT_SEED);
765    }
766
767    /**
768     * Generates 32-bit hash from a long with the given seed.
769     * This is a helper method that will produce the same result as:
770     *
771     * <pre>
772     * int offset = 0;
773     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8)
774     *                                            .putLong(data)
775     *                                            .array(), offset, 8, seed);
776     * </pre>
777     *
778     * @param data The long to hash
779     * @param seed The initial seed value
780     * @return The 32-bit hash
781     * @see #hash32x86(byte[], int, int, int)
782     */
783    public static int hash32(final long data, final int seed) {
784        int hash = seed;
785        final long r0 = Long.reverseBytes(data);
786
787        hash = mix32((int) r0, hash);
788        hash = mix32((int) (r0 >>> 32), hash);
789
790        hash ^= Long.BYTES;
791        return fmix32(hash);
792    }
793
794    /**
795     * Generates 32-bit hash from two longs with a default seed value.
796     * This is a helper method that will produce the same result as:
797     *
798     * <pre>
799     * int offset = 0;
800     * int seed = 104729;
801     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
802     *                                            .putLong(data1)
803     *                                            .putLong(data2)
804     *                                            .array(), offset, 16, seed);
805     * </pre>
806     *
807     * @param data1 The first long to hash
808     * @param data2 The second long to hash
809     * @return The 32-bit hash
810     * @see #hash32x86(byte[], int, int, int)
811     */
812    public static int hash32(final long data1, final long data2) {
813        return hash32(data1, data2, DEFAULT_SEED);
814    }
815
816    /**
817     * Generates 32-bit hash from two longs with the given seed.
818     * This is a helper method that will produce the same result as:
819     *
820     * <pre>
821     * int offset = 0;
822     * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16)
823     *                                            .putLong(data1)
824     *                                            .putLong(data2)
825     *                                            .array(), offset, 16, seed);
826     * </pre>
827     *
828     * @param data1 The first long to hash
829     * @param data2 The second long to hash
830     * @param seed The initial seed value
831     * @return The 32-bit hash
832     * @see #hash32x86(byte[], int, int, int)
833     */
834    public static int hash32(final long data1, final long data2, final int seed) {
835        int hash = seed;
836        final long r0 = Long.reverseBytes(data1);
837        final long r1 = Long.reverseBytes(data2);
838
839        hash = mix32((int) r0, hash);
840        hash = mix32((int) (r0 >>> 32), hash);
841        hash = mix32((int) r1, hash);
842        hash = mix32((int) (r1 >>> 32), hash);
843
844        hash ^= Long.BYTES * 2;
845        return fmix32(hash);
846    }
847
848    /**
849     * Generates 32-bit hash from a string with a default seed.
850     * <p>
851     * Before 1.14 the string was converted using default encoding.
852     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
853     * </p>
854     * This is a helper method that will produce the same result as:
855     *
856     * <pre>
857     * int offset = 0;
858     * int seed = 104729;
859     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
860     * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed);
861     * </pre>
862     *
863     * <p>
864     * This implementation contains a sign-extension bug in the finalization step of
865     * any bytes left over from dividing the length by 4. This manifests if any of these
866     * bytes are negative.
867     * </p>
868     *
869     * @param data The input string
870     * @return The 32-bit hash
871     * @see #hash32(byte[], int, int, int)
872     * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from
873     * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes.
874     */
875    @Deprecated
876    public static int hash32(final String data) {
877        final byte[] bytes = StringUtils.getBytesUtf8(data);
878        return hash32(bytes, 0, bytes.length, DEFAULT_SEED);
879    }
880
881    /**
882     * Generates 32-bit hash from the byte array with a seed of zero.
883     * This is a helper method that will produce the same result as:
884     *
885     * <pre>
886     * int offset = 0;
887     * int seed = 0;
888     * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed);
889     * </pre>
890     *
891     * @param data The input byte array
892     * @return The 32-bit hash
893     * @see #hash32x86(byte[], int, int, int)
894     * @since 1.14
895     */
896    public static int hash32x86(final byte[] data) {
897        return hash32x86(data, 0, data.length, 0);
898    }
899
900    /**
901     * Generates 32-bit hash from the byte array with the given offset, length and seed.
902     *
903     * <p>
904     * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32}
905     * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.
906     * </p>
907     *
908     * @param data The input byte array
909     * @param offset The offset of data
910     * @param length The length of array
911     * @param seed The initial seed value
912     * @return The 32-bit hash
913     * @since 1.14
914     */
915    public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) {
916        int hash = seed;
917        final int nblocks = length >> 2;
918        // body
919        for (int i = 0; i < nblocks; i++) {
920            final int index = offset + (i << 2);
921            final int k = MurmurHash.getLittleEndianInt(data, index);
922            hash = mix32(k, hash);
923        }
924        // tail
925        final int index = offset + (nblocks << 2);
926        int k1 = 0;
927        switch (offset + length - index) {
928        case 3:
929            k1 ^= (data[index + 2] & 0xff) << 16;
930            // falls-through
931        case 2:
932            // falls-through
933            k1 ^= (data[index + 1] & 0xff) << 8;
934            // falls-through
935        case 1:
936            k1 ^= data[index] & 0xff;
937            // mix functions
938            k1 *= C1_32;
939            k1 = Integer.rotateLeft(k1, R1_32);
940            k1 *= C2_32;
941            hash ^= k1;
942        }
943        hash ^= length;
944        return fmix32(hash);
945    }
946
947    /**
948     * Generates 64-bit hash from a byte array with a default seed.
949     *
950     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
951     *
952     * <p>
953     * This is a Murmur3-like 64-bit variant.
954     * The method does not produce the same result as either half of the hash bytes from
955     * {@linkplain #hash128x64(byte[])} with the same byte data.
956     * This method will be removed in a future release.
957     * </p>
958     *
959     * <p>
960     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
961     * this result as the default seed is positive.
962     * </p>
963     *
964     * <p>
965     * This is a helper method that will produce the same result as:
966     * </p>
967     *
968     * <pre>
969     * int offset = 0;
970     * int seed = 104729;
971     * long hash = MurmurHash3.hash64(data, offset, data.length, seed);
972     * </pre>
973     *
974     * @param data The input byte array
975     * @return The 64-bit hash
976     * @see #hash64(byte[], int, int, int)
977     * @deprecated Not part of the MurmurHash3 implementation.
978     * Use half of the hash bytes from {@link #hash128x64(byte[])}.
979     */
980    @Deprecated
981    public static long hash64(final byte[] data) {
982        return hash64(data, 0, data.length, DEFAULT_SEED);
983    }
984
985    /**
986     * Generates 64-bit hash from a byte array with the given offset and length and a default seed.
987     *
988     * <p><strong>
989     * This is not part of the original MurmurHash3 {@code c++} implementation.
990     * </strong></p>
991     *
992     * <p>
993     * This is a Murmur3-like 64-bit variant.
994     * The method does not produce the same result as either half of the hash bytes from
995     * {@linkplain #hash128x64(byte[])} with the same byte data.
996     * This method will be removed in a future release.
997     * </p>
998     *
999     * <p>
1000     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1001     * this result as the default seed is positive.
1002     * </p>
1003     *
1004     * <p>
1005     * This is a helper method that will produce the same result as:
1006     * </p>
1007     *
1008     * <pre>
1009     * int seed = 104729;
1010     * long hash = MurmurHash3.hash64(data, offset, length, seed);
1011     * </pre>
1012     *
1013     * @param data The input byte array
1014     * @param offset The offset of data
1015     * @param length The length of array
1016     * @return The 64-bit hash
1017     * @see #hash64(byte[], int, int, int)
1018     * @deprecated Not part of the MurmurHash3 implementation.
1019     * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1020     */
1021    @Deprecated
1022    public static long hash64(final byte[] data, final int offset, final int length) {
1023        return hash64(data, offset, length, DEFAULT_SEED);
1024    }
1025
1026    /**
1027     * Generates 64-bit hash from a byte array with the given offset, length and seed.
1028     *
1029     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1030     *
1031     * <p>
1032     * This is a Murmur3-like 64-bit variant.
1033     * This method will be removed in a future release.
1034     * </p>
1035     *
1036     * <p>
1037     * This implementation contains a sign-extension bug in the seed initialization.
1038     * This manifests if the seed is negative.
1039     * </p>
1040     *
1041     * <p>
1042     * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks
1043     * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash
1044     * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return
1045     * the same value as the first or second 64-bits of the function
1046     * {@link #hash128(byte[], int, int, int)}.
1047     * </p>
1048     *
1049     * <p>
1050     * Use of this method is not advised. Use the first long returned from
1051     * {@link #hash128x64(byte[], int, int, int)}.
1052     * </p>
1053     *
1054     * @param data The input byte array
1055     * @param offset The offset of data
1056     * @param length The length of array
1057     * @param seed The initial seed value
1058     * @return The 64-bit hash
1059     * @deprecated Not part of the MurmurHash3 implementation.
1060     * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}.
1061     */
1062    @Deprecated
1063    public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
1064        // Note: This fails to apply masking using 0xffffffffL to the seed.
1065        long hash = seed;
1066        final int nblocks = length >> 3;
1067        // body
1068        for (int i = 0; i < nblocks; i++) {
1069            final int index = offset + (i << 3);
1070            long k = MurmurHash.getLittleEndianLong(data, index);
1071            // mix functions
1072            k *= C1;
1073            k = Long.rotateLeft(k, R1);
1074            k *= C2;
1075            hash ^= k;
1076            hash = Long.rotateLeft(hash, R2) * M + N1;
1077        }
1078        // tail
1079        long k1 = 0;
1080        final int index = offset + (nblocks << 3);
1081        switch (offset + length - index) {
1082        case 7:
1083            k1 ^= ((long) data[index + 6] & 0xff) << 48;
1084            // falls-through
1085        case 6:
1086            k1 ^= ((long) data[index + 5] & 0xff) << 40;
1087            // falls-through
1088        case 5:
1089            k1 ^= ((long) data[index + 4] & 0xff) << 32;
1090            // falls-through
1091        case 4:
1092            k1 ^= ((long) data[index + 3] & 0xff) << 24;
1093            // falls-through
1094        case 3:
1095            k1 ^= ((long) data[index + 2] & 0xff) << 16;
1096            // falls-through
1097        case 2:
1098            k1 ^= ((long) data[index + 1] & 0xff) << 8;
1099            // falls-through
1100        case 1:
1101            k1 ^= (long) data[index] & 0xff;
1102            k1 *= C1;
1103            k1 = Long.rotateLeft(k1, R1);
1104            k1 *= C2;
1105            hash ^= k1;
1106        }
1107        // finalization
1108        hash ^= length;
1109        return fmix64(hash);
1110    }
1111
1112    /**
1113     * Generates 64-bit hash from an int with a default seed.
1114     *
1115     * <p><strong>
1116     * This is not part of the original MurmurHash3 {@code c++} implementation.
1117     * </strong></p>
1118     *
1119     * <p>
1120     * This is a Murmur3-like 64-bit variant.
1121     * The method does not produce the same result as either half of the hash bytes from
1122     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}.
1123     * This method will be removed in a future release.
1124     * </p>
1125     *
1126     * <p>
1127     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1128     * this result as the default seed is positive.
1129     * </p>
1130     *
1131     * <p>
1132     * This is a helper method that will produce the same result as:
1133     * </p>
1134     *
1135     * <pre>
1136     * int offset = 0;
1137     * int seed = 104729;
1138     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4)
1139     *                                          .putInt(data)
1140     *                                          .array(), offset, 4, seed);
1141     * </pre>
1142     *
1143     * @param data The int to hash
1144     * @return The 64-bit hash
1145     * @see #hash64(byte[], int, int, int)
1146     * @deprecated Not part of the MurmurHash3 implementation.
1147     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}.
1148     */
1149    @Deprecated
1150    public static long hash64(final int data) {
1151        long k1 = Integer.reverseBytes(data) & -1L >>> 32;
1152        long hash = DEFAULT_SEED;
1153        k1 *= C1;
1154        k1 = Long.rotateLeft(k1, R1);
1155        k1 *= C2;
1156        hash ^= k1;
1157        // finalization
1158        hash ^= Integer.BYTES;
1159        return fmix64(hash);
1160    }
1161
1162    /**
1163     * Generates 64-bit hash from a long with a default seed.
1164     *
1165     * <p><strong>
1166     * This is not part of the original MurmurHash3 {@code c++} implementation.
1167     * </strong></p>
1168     *
1169     * <p>
1170     * This is a Murmur3-like 64-bit variant.
1171     * The method does not produce the same result as either half of the hash bytes from
1172     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}.
1173     * This method will be removed in a future release.
1174     * </p>
1175     *
1176     * <p>
1177     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1178     * this result as the default seed is positive.
1179     * </p>
1180     *
1181     * <p>
1182     * This is a helper method that will produce the same result as:
1183     * </p>
1184     *
1185     * <pre>
1186     * int offset = 0;
1187     * int seed = 104729;
1188     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8)
1189     *                                          .putLong(data)
1190     *                                          .array(), offset, 8, seed);
1191     * </pre>
1192     *
1193     * @param data The long to hash
1194     * @return The 64-bit hash
1195     * @see #hash64(byte[], int, int, int)
1196     * @deprecated Not part of the MurmurHash3 implementation.
1197     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}.
1198     */
1199    @Deprecated
1200    public static long hash64(final long data) {
1201        long hash = DEFAULT_SEED;
1202        long k = Long.reverseBytes(data);
1203        // mix functions
1204        k *= C1;
1205        k = Long.rotateLeft(k, R1);
1206        k *= C2;
1207        hash ^= k;
1208        hash = Long.rotateLeft(hash, R2) * M + N1;
1209        // finalization
1210        hash ^= Long.BYTES;
1211        return fmix64(hash);
1212    }
1213
1214    /**
1215     * Generates 64-bit hash from a short with a default seed.
1216     *
1217     * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p>
1218     *
1219     * <p>
1220     * This is a Murmur3-like 64-bit variant.
1221     * The method does not produce the same result as either half of the hash bytes from
1222     * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}.
1223     * This method will be removed in a future release.
1224     * </p>
1225     *
1226     * <p>
1227     * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect
1228     * this result as the default seed is positive.
1229     * </p>
1230     *
1231     * <p>
1232     * This is a helper method that will produce the same result as:
1233     * </p>
1234     *
1235     * <pre>
1236     * int offset = 0;
1237     * int seed = 104729;
1238     * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2)
1239     *                                          .putShort(data)
1240     *                                          .array(), offset, 2, seed);
1241     * </pre>
1242     *
1243     * @param data The short to hash
1244     * @return The 64-bit hash
1245     * @see #hash64(byte[], int, int, int)
1246     * @deprecated Not part of the MurmurHash3 implementation.
1247     * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}.
1248     */
1249    @Deprecated
1250    public static long hash64(final short data) {
1251        long hash = DEFAULT_SEED;
1252        long k1 = 0;
1253        k1 ^= ((long) data & 0xff) << 8;
1254        k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff;
1255        k1 *= C1;
1256        k1 = Long.rotateLeft(k1, R1);
1257        k1 *= C2;
1258        hash ^= k1;
1259
1260        // finalization
1261        hash ^= Short.BYTES;
1262        return fmix64(hash);
1263    }
1264
1265    /**
1266     * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}.
1267     *
1268     * @param k The data to add to the hash
1269     * @param hash The current hash
1270     * @return The new hash
1271     */
1272    private static int mix32(int k, int hash) {
1273        k *= C1_32;
1274        k = Integer.rotateLeft(k, R1_32);
1275        k *= C2_32;
1276        hash ^= k;
1277        return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
1278    }
1279
1280    /** No instance methods. */
1281    private MurmurHash3() {
1282    }
1283}