001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020/**
021 * MurmurHash3 yields a 32-bit or 128-bit value.
022 *
023 * MurmurHash is a non-cryptographic hash function suitable for general
024 * hash-based lookup. The name comes from two basic operations, multiply (MU)
025 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions,
026 * it is not specifically designed to be difficult to reverse by an adversary,
027 * making it unsuitable for cryptographic purposes.
028 *
029 * 32-bit Java port of
030 * https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94
031 * 128-bit Java port of
032 * https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255
033 *
034 * This is a public domain code with no copyrights. From homepage of MurmurHash
035 * (https://code.google.com/p/smhasher/), "All MurmurHash versions are public
036 * domain software, and the author disclaims all copyright to their code."
037 *
038 * Copied from Apache Hive:
039 * https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java
040 *
041 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
042 * @since 1.13
043 */
044public final class MurmurHash3 {
045
046  /** TODO Replace on Java 8 with Long.BYTES. */
047  static final int LONG_BYTES = Long.SIZE / Byte.SIZE;
048
049  /** TODO Replace on Java 8 with Integer.BYTES. */
050  static final int INTEGER_BYTES = Integer.SIZE / Byte.SIZE;
051
052  /** TODO Replace on Java 8 with Short.BYTES. */
053  static final int SHORT_BYTES = Short.SIZE / Byte.SIZE;
054
055        // from 64-bit linear congruential generator
056        public static final long NULL_HASHCODE = 2862933555777941757L;
057
058        // Constants for 32 bit variant
059        private static final int C1_32 = 0xcc9e2d51;
060        private static final int C2_32 = 0x1b873593;
061        private static final int R1_32 = 15;
062        private static final int R2_32 = 13;
063        private static final int M_32 = 5;
064        private static final int N_32 = 0xe6546b64;
065
066        // Constants for 128 bit variant
067        private static final long C1 = 0x87c37b91114253d5L;
068        private static final long C2 = 0x4cf5ad432745937fL;
069        private static final int R1 = 31;
070        private static final int R2 = 27;
071        private static final int R3 = 33;
072        private static final int M = 5;
073        private static final int N1 = 0x52dce729;
074        private static final int N2 = 0x38495ab5;
075
076        public static final int DEFAULT_SEED = 104729;
077
078        // all methods static; private constructor.
079        private MurmurHash3() {
080        }
081
082        /**
083         * Generates 32 bit hash from two longs with default seed value.
084         *
085         * @param l0 long to hash
086         * @param l1 long to hash
087         * @return 32 bit hash
088         */
089        public static int hash32(final long l0, final long l1) {
090                return hash32(l0, l1, DEFAULT_SEED);
091        }
092
093        /**
094         * Generates 32 bit hash from a long with default seed value.
095         *
096         * @param l0 long to hash
097         * @return 32 bit hash
098         */
099        public static int hash32(final long l0) {
100                return hash32(l0, DEFAULT_SEED);
101        }
102
103        /**
104         * Generates 32 bit hash from a long with the given seed.
105         *
106         * @param l0   long to hash
107         * @param seed initial seed value
108         * @return 32 bit hash
109         */
110        public static int hash32(final long l0, final int seed) {
111                int hash = seed;
112                final long r0 = Long.reverseBytes(l0);
113
114                hash = mix32((int) r0, hash);
115                hash = mix32((int) (r0 >>> 32), hash);
116
117                return fmix32(LONG_BYTES, hash);
118        }
119
120        /**
121         * Generates 32 bit hash from two longs with the given seed.
122         *
123         * @param l0   long to hash
124         * @param l1   long to hash
125         * @param seed initial seed value
126         * @return 32 bit hash
127         */
128        public static int hash32(final long l0, final long l1, final int seed) {
129                int hash = seed;
130                final long r0 = Long.reverseBytes(l0);
131                final long r1 = Long.reverseBytes(l1);
132
133                hash = mix32((int) r0, hash);
134                hash = mix32((int) (r0 >>> 32), hash);
135                hash = mix32((int) (r1), hash);
136                hash = mix32((int) (r1 >>> 32), hash);
137
138                return fmix32(LONG_BYTES * 2, hash);
139        }
140
141        /**
142         * Generates 32 bit hash from byte array with the default seed.
143         *
144         * @param data - input byte array
145         * @return 32 bit hash
146         */
147        public static int hash32(final byte[] data) {
148                return hash32(data, 0, data.length, DEFAULT_SEED);
149        }
150
151        /**
152         * Generates 32 bit hash from a string with the default seed.
153         *
154         * @param data - input string
155         * @return 32 bit hash
156         */
157        public static int hash32(final String data) {
158                final byte[] origin = data.getBytes();
159                return hash32(origin, 0, origin.length, DEFAULT_SEED);
160        }
161
162        /**
163         * Generates 32 bit hash from byte array with the default seed.
164         *
165         * @param data   - input byte array
166         * @param length - length of array
167         * @return 32 bit hash
168         */
169        public static int hash32(final byte[] data, final int length) {
170                return hash32(data, length, DEFAULT_SEED);
171        }
172
173        /**
174         * Generates 32 bit hash from byte array with the given length and seed.
175         *
176         * @param data   - input byte array
177         * @param length - length of array
178         * @param seed   - seed. (default 0)
179         * @return 32 bit hash
180         */
181        public static int hash32(final byte[] data, final int length, final int seed) {
182                return hash32(data, 0, length, seed);
183        }
184
185        /**
186         * Generates 32 bit hash from byte array with the given length, offset and seed.
187         *
188         * @param data   - input byte array
189         * @param offset - offset of data
190         * @param length - length of array
191         * @param seed   - seed. (default 0)
192         * @return 32 bit hash
193         */
194        public static int hash32(final byte[] data, final int offset, final int length, final int seed) {
195                int hash = seed;
196                final int nblocks = length >> 2;
197
198                // body
199                for (int i = 0; i < nblocks; i++) {
200                        final int i_4 = i << 2;
201                        final int k = (data[offset + i_4] & 0xff) | ((data[offset + i_4 + 1] & 0xff) << 8)
202                                        | ((data[offset + i_4 + 2] & 0xff) << 16) | ((data[offset + i_4 + 3] & 0xff) << 24);
203
204                        hash = mix32(k, hash);
205                }
206
207                // tail
208                final int idx = nblocks << 2;
209                int k1 = 0;
210                switch (length - idx) {
211                case 3:
212                        k1 ^= data[offset + idx + 2] << 16;
213                case 2:
214                        k1 ^= data[offset + idx + 1] << 8;
215                case 1:
216                        k1 ^= data[offset + idx];
217
218                        // mix functions
219                        k1 *= C1_32;
220                        k1 = Integer.rotateLeft(k1, R1_32);
221                        k1 *= C2_32;
222                        hash ^= k1;
223                }
224
225                return fmix32(length, hash);
226        }
227
228        /**
229         * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit
230         * variant.
231         *
232         * @param data - input byte array
233         * @return 64 bit hash
234         */
235        public static long hash64(final byte[] data) {
236                return hash64(data, 0, data.length, DEFAULT_SEED);
237        }
238
239        /**
240         * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit
241         * variant.
242         *
243         * @param data - input long
244         * @return 64 bit hash
245         */
246        public static long hash64(final long data) {
247                long hash = DEFAULT_SEED;
248                long k = Long.reverseBytes(data);
249                final int length = LONG_BYTES;
250                // mix functions
251                k *= C1;
252                k = Long.rotateLeft(k, R1);
253                k *= C2;
254                hash ^= k;
255                hash = Long.rotateLeft(hash, R2) * M + N1;
256                // finalization
257                hash ^= length;
258                hash = fmix64(hash);
259                return hash;
260        }
261
262        /**
263         * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit
264         * variant.
265         *
266         * @param data - input int
267         * @return 64 bit hash
268         */
269        public static long hash64(final int data) {
270                long k1 = Integer.reverseBytes(data) & (-1L >>> 32);
271                final int length = INTEGER_BYTES;
272                long hash = DEFAULT_SEED;
273                k1 *= C1;
274                k1 = Long.rotateLeft(k1, R1);
275                k1 *= C2;
276                hash ^= k1;
277                // finalization
278                hash ^= length;
279                hash = fmix64(hash);
280                return hash;
281        }
282
283        /**
284         * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit
285         * variant.
286         *
287         * @param data - input short
288         * @return 64 bit hash
289         */
290        public static long hash64(final short data) {
291                long hash = DEFAULT_SEED;
292                long k1 = 0;
293                k1 ^= ((long) data & 0xff) << 8;
294                k1 ^= ((long) ((data & 0xFF00) >> 8) & 0xff);
295                k1 *= C1;
296                k1 = Long.rotateLeft(k1, R1);
297                k1 *= C2;
298                hash ^= k1;
299
300                // finalization
301                hash ^= SHORT_BYTES;
302                hash = fmix64(hash);
303                return hash;
304        }
305
306        /**
307         * Generates 64 bit hash from byte array with the given length, offset and
308         * default seed.
309         *
310         * @param data   - input byte array
311         * @param offset - offset of data
312         * @param length - length of array
313         * @return 64 bit hash
314         */
315        public static long hash64(final byte[] data, final int offset, final int length) {
316                return hash64(data, offset, length, DEFAULT_SEED);
317        }
318
319        /**
320         * Generates 64 bit hash from byte array with the given length, offset and seed.
321         *
322         * @param data   - input byte array
323         * @param offset - offset of data
324         * @param length - length of array
325         * @param seed   - seed. (default 0)
326         * @return 64 bit hash
327         */
328        public static long hash64(final byte[] data, final int offset, final int length, final int seed) {
329                long hash = seed;
330                final int nblocks = length >> 3;
331
332                // body
333                for (int i = 0; i < nblocks; i++) {
334                        final int i8 = i << 3;
335                        long k = ((long) data[offset + i8] & 0xff) | (((long) data[offset + i8 + 1] & 0xff) << 8)
336                                        | (((long) data[offset + i8 + 2] & 0xff) << 16) | (((long) data[offset + i8 + 3] & 0xff) << 24)
337                                        | (((long) data[offset + i8 + 4] & 0xff) << 32) | (((long) data[offset + i8 + 5] & 0xff) << 40)
338                                        | (((long) data[offset + i8 + 6] & 0xff) << 48) | (((long) data[offset + i8 + 7] & 0xff) << 56);
339
340                        // mix functions
341                        k *= C1;
342                        k = Long.rotateLeft(k, R1);
343                        k *= C2;
344                        hash ^= k;
345                        hash = Long.rotateLeft(hash, R2) * M + N1;
346                }
347
348                // tail
349                long k1 = 0;
350                final int tailStart = nblocks << 3;
351                switch (length - tailStart) {
352                case 7:
353                        k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48;
354                case 6:
355                        k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40;
356                case 5:
357                        k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32;
358                case 4:
359                        k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24;
360                case 3:
361                        k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16;
362                case 2:
363                        k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8;
364                case 1:
365                        k1 ^= ((long) data[offset + tailStart] & 0xff);
366                        k1 *= C1;
367                        k1 = Long.rotateLeft(k1, R1);
368                        k1 *= C2;
369                        hash ^= k1;
370                }
371
372                // finalization
373                hash ^= length;
374                hash = fmix64(hash);
375
376                return hash;
377        }
378
379        /**
380         * Murmur3 128-bit variant.
381         *
382         * @param data - input byte array
383         * @return - 128 bit hash (2 longs)
384         */
385        public static long[] hash128(final byte[] data) {
386                return hash128(data, 0, data.length, DEFAULT_SEED);
387        }
388
389        /**
390         * Murmur3 128-bit variant.
391         *
392         * @param data - input String
393         * @return - 128 bit hash (2 longs)
394         */
395        public static long[] hash128(final String data) {
396                final byte[] origin = data.getBytes();
397                return hash128(origin, 0, origin.length, DEFAULT_SEED);
398        }
399
400        /**
401         * Murmur3 128-bit variant.
402         *
403         * @param data   - input byte array
404         * @param offset - the first element of array
405         * @param length - length of array
406         * @param seed   - seed. (default is 0)
407         * @return - 128 bit hash (2 longs)
408         */
409        public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) {
410                long h1 = seed;
411                long h2 = seed;
412                final int nblocks = length >> 4;
413
414                // body
415                for (int i = 0; i < nblocks; i++) {
416                        final int i16 = i << 4;
417                        long k1 = ((long) data[offset + i16] & 0xff) | (((long) data[offset + i16 + 1] & 0xff) << 8)
418                                        | (((long) data[offset + i16 + 2] & 0xff) << 16) | (((long) data[offset + i16 + 3] & 0xff) << 24)
419                                        | (((long) data[offset + i16 + 4] & 0xff) << 32) | (((long) data[offset + i16 + 5] & 0xff) << 40)
420                                        | (((long) data[offset + i16 + 6] & 0xff) << 48) | (((long) data[offset + i16 + 7] & 0xff) << 56);
421
422                        long k2 = ((long) data[offset + i16 + 8] & 0xff) | (((long) data[offset + i16 + 9] & 0xff) << 8)
423                                        | (((long) data[offset + i16 + 10] & 0xff) << 16) | (((long) data[offset + i16 + 11] & 0xff) << 24)
424                                        | (((long) data[offset + i16 + 12] & 0xff) << 32) | (((long) data[offset + i16 + 13] & 0xff) << 40)
425                                        | (((long) data[offset + i16 + 14] & 0xff) << 48) | (((long) data[offset + i16 + 15] & 0xff) << 56);
426
427                        // mix functions for k1
428                        k1 *= C1;
429                        k1 = Long.rotateLeft(k1, R1);
430                        k1 *= C2;
431                        h1 ^= k1;
432                        h1 = Long.rotateLeft(h1, R2);
433                        h1 += h2;
434                        h1 = h1 * M + N1;
435
436                        // mix functions for k2
437                        k2 *= C2;
438                        k2 = Long.rotateLeft(k2, R3);
439                        k2 *= C1;
440                        h2 ^= k2;
441                        h2 = Long.rotateLeft(h2, R1);
442                        h2 += h1;
443                        h2 = h2 * M + N2;
444                }
445
446                // tail
447                long k1 = 0;
448                long k2 = 0;
449                final int tailStart = nblocks << 4;
450                switch (length - tailStart) {
451                case 15:
452                        k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48;
453                case 14:
454                        k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40;
455                case 13:
456                        k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32;
457                case 12:
458                        k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24;
459                case 11:
460                        k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16;
461                case 10:
462                        k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8;
463                case 9:
464                        k2 ^= data[offset + tailStart + 8] & 0xff;
465                        k2 *= C2;
466                        k2 = Long.rotateLeft(k2, R3);
467                        k2 *= C1;
468                        h2 ^= k2;
469
470                case 8:
471                        k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56;
472                case 7:
473                        k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48;
474                case 6:
475                        k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40;
476                case 5:
477                        k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32;
478                case 4:
479                        k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24;
480                case 3:
481                        k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16;
482                case 2:
483                        k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8;
484                case 1:
485                        k1 ^= data[offset + tailStart] & 0xff;
486                        k1 *= C1;
487                        k1 = Long.rotateLeft(k1, R1);
488                        k1 *= C2;
489                        h1 ^= k1;
490                }
491
492                // finalization
493                h1 ^= length;
494                h2 ^= length;
495
496                h1 += h2;
497                h2 += h1;
498
499                h1 = fmix64(h1);
500                h2 = fmix64(h2);
501
502                h1 += h2;
503                h2 += h1;
504
505                return new long[] { h1, h2 };
506        }
507
508        private static int mix32(int k, int hash) {
509                k *= C1_32;
510                k = Integer.rotateLeft(k, R1_32);
511                k *= C2_32;
512                hash ^= k;
513                return Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
514        }
515
516        private static int fmix32(final int length, int hash) {
517                hash ^= length;
518                hash ^= (hash >>> 16);
519                hash *= 0x85ebca6b;
520                hash ^= (hash >>> 13);
521                hash *= 0xc2b2ae35;
522                hash ^= (hash >>> 16);
523
524                return hash;
525        }
526
527        private static long fmix64(long h) {
528                h ^= (h >>> 33);
529                h *= 0xff51afd7ed558ccdL;
530                h ^= (h >>> 33);
531                h *= 0xc4ceb9fe1a85ec53L;
532                h ^= (h >>> 33);
533                return h;
534        }
535
536        public static class IncrementalHash32 {
537                byte[] tail = new byte[3];
538                int tailLen;
539                int totalLen;
540                int hash;
541
542                public final void start(final int hash) {
543                        tailLen = totalLen = 0;
544                        this.hash = hash;
545                }
546
547                public final void add(final byte[] data, int offset, final int length) {
548                        if (length == 0) {
549                return;
550            }
551                        totalLen += length;
552                        if (tailLen + length < 4) {
553                                System.arraycopy(data, offset, tail, tailLen, length);
554                                tailLen += length;
555                                return;
556                        }
557                        int offset2 = 0;
558                        if (tailLen > 0) {
559                                offset2 = (4 - tailLen);
560                                int k = -1;
561                                switch (tailLen) {
562                                case 1:
563                                        k = orBytes(tail[0], data[offset], data[offset + 1], data[offset + 2]);
564                                        break;
565                                case 2:
566                                        k = orBytes(tail[0], tail[1], data[offset], data[offset + 1]);
567                                        break;
568                                case 3:
569                                        k = orBytes(tail[0], tail[1], tail[2], data[offset]);
570                                        break;
571                                default:
572                                        throw new AssertionError(tailLen);
573                                }
574                                // mix functions
575                                k *= C1_32;
576                                k = Integer.rotateLeft(k, R1_32);
577                                k *= C2_32;
578                                hash ^= k;
579                                hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
580                        }
581                        final int length2 = length - offset2;
582                        offset += offset2;
583                        final int nblocks = length2 >> 2;
584
585                        for (int i = 0; i < nblocks; i++) {
586                                final int i_4 = (i << 2) + offset;
587                                int k = orBytes(data[i_4], data[i_4 + 1], data[i_4 + 2], data[i_4 + 3]);
588
589                                // mix functions
590                                k *= C1_32;
591                                k = Integer.rotateLeft(k, R1_32);
592                                k *= C2_32;
593                                hash ^= k;
594                                hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32;
595                        }
596
597                        final int consumed = (nblocks << 2);
598                        tailLen = length2 - consumed;
599                        if (consumed == length2) {
600                return;
601            }
602                        System.arraycopy(data, offset + consumed, tail, 0, tailLen);
603                }
604
605                public final int end() {
606                        int k1 = 0;
607                        switch (tailLen) {
608                        case 3:
609                                k1 ^= tail[2] << 16;
610                        case 2:
611                                k1 ^= tail[1] << 8;
612                        case 1:
613                                k1 ^= tail[0];
614
615                                // mix functions
616                                k1 *= C1_32;
617                                k1 = Integer.rotateLeft(k1, R1_32);
618                                k1 *= C2_32;
619                                hash ^= k1;
620                        }
621
622                        // finalization
623                        hash ^= totalLen;
624                        hash ^= (hash >>> 16);
625                        hash *= 0x85ebca6b;
626                        hash ^= (hash >>> 13);
627                        hash *= 0xc2b2ae35;
628                        hash ^= (hash >>> 16);
629                        return hash;
630                }
631        }
632
633        private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) {
634                return (b1 & 0xff) | ((b2 & 0xff) << 8) | ((b3 & 0xff) << 16) | ((b4 & 0xff) << 24);
635        }
636}