001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.digest; 019 020/** 021 * MurmurHash3 yields a 32-bit or 128-bit value. 022 * 023 * MurmurHash is a non-cryptographic hash function suitable for general 024 * hash-based lookup. The name comes from two basic operations, multiply (MU) 025 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions, 026 * it is not specifically designed to be difficult to reverse by an adversary, 027 * making it unsuitable for cryptographic purposes. 028 * 029 * 32-bit Java port of 030 * https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#94 031 * 128-bit Java port of 032 * https://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp#255 033 * 034 * This is a public domain code with no copyrights. From homepage of MurmurHash 035 * (https://code.google.com/p/smhasher/), "All MurmurHash versions are public 036 * domain software, and the author disclaims all copyright to their code." 037 * 038 * Copied from Apache Hive: 039 * https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java 040 * 041 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 042 * @since 1.13 043 */ 044public final class MurmurHash3 { 045 046 /** TODO Replace on Java 8 with Long.BYTES. */ 047 static final int LONG_BYTES = Long.SIZE / Byte.SIZE; 048 049 /** TODO Replace on Java 8 with Integer.BYTES. */ 050 static final int INTEGER_BYTES = Integer.SIZE / Byte.SIZE; 051 052 /** TODO Replace on Java 8 with Short.BYTES. */ 053 static final int SHORT_BYTES = Short.SIZE / Byte.SIZE; 054 055 // from 64-bit linear congruential generator 056 public static final long NULL_HASHCODE = 2862933555777941757L; 057 058 // Constants for 32 bit variant 059 private static final int C1_32 = 0xcc9e2d51; 060 private static final int C2_32 = 0x1b873593; 061 private static final int R1_32 = 15; 062 private static final int R2_32 = 13; 063 private static final int M_32 = 5; 064 private static final int N_32 = 0xe6546b64; 065 066 // Constants for 128 bit variant 067 private static final long C1 = 0x87c37b91114253d5L; 068 private static final long C2 = 0x4cf5ad432745937fL; 069 private static final int R1 = 31; 070 private static final int R2 = 27; 071 private static final int R3 = 33; 072 private static final int M = 5; 073 private static final int N1 = 0x52dce729; 074 private static final int N2 = 0x38495ab5; 075 076 public static final int DEFAULT_SEED = 104729; 077 078 // all methods static; private constructor. 079 private MurmurHash3() { 080 } 081 082 /** 083 * Generates 32 bit hash from two longs with default seed value. 084 * 085 * @param l0 long to hash 086 * @param l1 long to hash 087 * @return 32 bit hash 088 */ 089 public static int hash32(final long l0, final long l1) { 090 return hash32(l0, l1, DEFAULT_SEED); 091 } 092 093 /** 094 * Generates 32 bit hash from a long with default seed value. 095 * 096 * @param l0 long to hash 097 * @return 32 bit hash 098 */ 099 public static int hash32(final long l0) { 100 return hash32(l0, DEFAULT_SEED); 101 } 102 103 /** 104 * Generates 32 bit hash from a long with the given seed. 105 * 106 * @param l0 long to hash 107 * @param seed initial seed value 108 * @return 32 bit hash 109 */ 110 public static int hash32(final long l0, final int seed) { 111 int hash = seed; 112 final long r0 = Long.reverseBytes(l0); 113 114 hash = mix32((int) r0, hash); 115 hash = mix32((int) (r0 >>> 32), hash); 116 117 return fmix32(LONG_BYTES, hash); 118 } 119 120 /** 121 * Generates 32 bit hash from two longs with the given seed. 122 * 123 * @param l0 long to hash 124 * @param l1 long to hash 125 * @param seed initial seed value 126 * @return 32 bit hash 127 */ 128 public static int hash32(final long l0, final long l1, final int seed) { 129 int hash = seed; 130 final long r0 = Long.reverseBytes(l0); 131 final long r1 = Long.reverseBytes(l1); 132 133 hash = mix32((int) r0, hash); 134 hash = mix32((int) (r0 >>> 32), hash); 135 hash = mix32((int) (r1), hash); 136 hash = mix32((int) (r1 >>> 32), hash); 137 138 return fmix32(LONG_BYTES * 2, hash); 139 } 140 141 /** 142 * Generates 32 bit hash from byte array with the default seed. 143 * 144 * @param data - input byte array 145 * @return 32 bit hash 146 */ 147 public static int hash32(final byte[] data) { 148 return hash32(data, 0, data.length, DEFAULT_SEED); 149 } 150 151 /** 152 * Generates 32 bit hash from a string with the default seed. 153 * 154 * @param data - input string 155 * @return 32 bit hash 156 */ 157 public static int hash32(final String data) { 158 final byte[] origin = data.getBytes(); 159 return hash32(origin, 0, origin.length, DEFAULT_SEED); 160 } 161 162 /** 163 * Generates 32 bit hash from byte array with the default seed. 164 * 165 * @param data - input byte array 166 * @param length - length of array 167 * @return 32 bit hash 168 */ 169 public static int hash32(final byte[] data, final int length) { 170 return hash32(data, length, DEFAULT_SEED); 171 } 172 173 /** 174 * Generates 32 bit hash from byte array with the given length and seed. 175 * 176 * @param data - input byte array 177 * @param length - length of array 178 * @param seed - seed. (default 0) 179 * @return 32 bit hash 180 */ 181 public static int hash32(final byte[] data, final int length, final int seed) { 182 return hash32(data, 0, length, seed); 183 } 184 185 /** 186 * Generates 32 bit hash from byte array with the given length, offset and seed. 187 * 188 * @param data - input byte array 189 * @param offset - offset of data 190 * @param length - length of array 191 * @param seed - seed. (default 0) 192 * @return 32 bit hash 193 */ 194 public static int hash32(final byte[] data, final int offset, final int length, final int seed) { 195 int hash = seed; 196 final int nblocks = length >> 2; 197 198 // body 199 for (int i = 0; i < nblocks; i++) { 200 final int i_4 = i << 2; 201 final int k = (data[offset + i_4] & 0xff) | ((data[offset + i_4 + 1] & 0xff) << 8) 202 | ((data[offset + i_4 + 2] & 0xff) << 16) | ((data[offset + i_4 + 3] & 0xff) << 24); 203 204 hash = mix32(k, hash); 205 } 206 207 // tail 208 final int idx = nblocks << 2; 209 int k1 = 0; 210 switch (length - idx) { 211 case 3: 212 k1 ^= data[offset + idx + 2] << 16; 213 case 2: 214 k1 ^= data[offset + idx + 1] << 8; 215 case 1: 216 k1 ^= data[offset + idx]; 217 218 // mix functions 219 k1 *= C1_32; 220 k1 = Integer.rotateLeft(k1, R1_32); 221 k1 *= C2_32; 222 hash ^= k1; 223 } 224 225 return fmix32(length, hash); 226 } 227 228 /** 229 * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit 230 * variant. 231 * 232 * @param data - input byte array 233 * @return 64 bit hash 234 */ 235 public static long hash64(final byte[] data) { 236 return hash64(data, 0, data.length, DEFAULT_SEED); 237 } 238 239 /** 240 * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit 241 * variant. 242 * 243 * @param data - input long 244 * @return 64 bit hash 245 */ 246 public static long hash64(final long data) { 247 long hash = DEFAULT_SEED; 248 long k = Long.reverseBytes(data); 249 final int length = LONG_BYTES; 250 // mix functions 251 k *= C1; 252 k = Long.rotateLeft(k, R1); 253 k *= C2; 254 hash ^= k; 255 hash = Long.rotateLeft(hash, R2) * M + N1; 256 // finalization 257 hash ^= length; 258 hash = fmix64(hash); 259 return hash; 260 } 261 262 /** 263 * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit 264 * variant. 265 * 266 * @param data - input int 267 * @return 64 bit hash 268 */ 269 public static long hash64(final int data) { 270 long k1 = Integer.reverseBytes(data) & (-1L >>> 32); 271 final int length = INTEGER_BYTES; 272 long hash = DEFAULT_SEED; 273 k1 *= C1; 274 k1 = Long.rotateLeft(k1, R1); 275 k1 *= C2; 276 hash ^= k1; 277 // finalization 278 hash ^= length; 279 hash = fmix64(hash); 280 return hash; 281 } 282 283 /** 284 * Murmur3 64-bit variant. This is essentially MSB 8 bytes of Murmur3 128-bit 285 * variant. 286 * 287 * @param data - input short 288 * @return 64 bit hash 289 */ 290 public static long hash64(final short data) { 291 long hash = DEFAULT_SEED; 292 long k1 = 0; 293 k1 ^= ((long) data & 0xff) << 8; 294 k1 ^= ((long) ((data & 0xFF00) >> 8) & 0xff); 295 k1 *= C1; 296 k1 = Long.rotateLeft(k1, R1); 297 k1 *= C2; 298 hash ^= k1; 299 300 // finalization 301 hash ^= SHORT_BYTES; 302 hash = fmix64(hash); 303 return hash; 304 } 305 306 /** 307 * Generates 64 bit hash from byte array with the given length, offset and 308 * default seed. 309 * 310 * @param data - input byte array 311 * @param offset - offset of data 312 * @param length - length of array 313 * @return 64 bit hash 314 */ 315 public static long hash64(final byte[] data, final int offset, final int length) { 316 return hash64(data, offset, length, DEFAULT_SEED); 317 } 318 319 /** 320 * Generates 64 bit hash from byte array with the given length, offset and seed. 321 * 322 * @param data - input byte array 323 * @param offset - offset of data 324 * @param length - length of array 325 * @param seed - seed. (default 0) 326 * @return 64 bit hash 327 */ 328 public static long hash64(final byte[] data, final int offset, final int length, final int seed) { 329 long hash = seed; 330 final int nblocks = length >> 3; 331 332 // body 333 for (int i = 0; i < nblocks; i++) { 334 final int i8 = i << 3; 335 long k = ((long) data[offset + i8] & 0xff) | (((long) data[offset + i8 + 1] & 0xff) << 8) 336 | (((long) data[offset + i8 + 2] & 0xff) << 16) | (((long) data[offset + i8 + 3] & 0xff) << 24) 337 | (((long) data[offset + i8 + 4] & 0xff) << 32) | (((long) data[offset + i8 + 5] & 0xff) << 40) 338 | (((long) data[offset + i8 + 6] & 0xff) << 48) | (((long) data[offset + i8 + 7] & 0xff) << 56); 339 340 // mix functions 341 k *= C1; 342 k = Long.rotateLeft(k, R1); 343 k *= C2; 344 hash ^= k; 345 hash = Long.rotateLeft(hash, R2) * M + N1; 346 } 347 348 // tail 349 long k1 = 0; 350 final int tailStart = nblocks << 3; 351 switch (length - tailStart) { 352 case 7: 353 k1 ^= ((long) data[offset + tailStart + 6] & 0xff) << 48; 354 case 6: 355 k1 ^= ((long) data[offset + tailStart + 5] & 0xff) << 40; 356 case 5: 357 k1 ^= ((long) data[offset + tailStart + 4] & 0xff) << 32; 358 case 4: 359 k1 ^= ((long) data[offset + tailStart + 3] & 0xff) << 24; 360 case 3: 361 k1 ^= ((long) data[offset + tailStart + 2] & 0xff) << 16; 362 case 2: 363 k1 ^= ((long) data[offset + tailStart + 1] & 0xff) << 8; 364 case 1: 365 k1 ^= ((long) data[offset + tailStart] & 0xff); 366 k1 *= C1; 367 k1 = Long.rotateLeft(k1, R1); 368 k1 *= C2; 369 hash ^= k1; 370 } 371 372 // finalization 373 hash ^= length; 374 hash = fmix64(hash); 375 376 return hash; 377 } 378 379 /** 380 * Murmur3 128-bit variant. 381 * 382 * @param data - input byte array 383 * @return - 128 bit hash (2 longs) 384 */ 385 public static long[] hash128(final byte[] data) { 386 return hash128(data, 0, data.length, DEFAULT_SEED); 387 } 388 389 /** 390 * Murmur3 128-bit variant. 391 * 392 * @param data - input String 393 * @return - 128 bit hash (2 longs) 394 */ 395 public static long[] hash128(final String data) { 396 final byte[] origin = data.getBytes(); 397 return hash128(origin, 0, origin.length, DEFAULT_SEED); 398 } 399 400 /** 401 * Murmur3 128-bit variant. 402 * 403 * @param data - input byte array 404 * @param offset - the first element of array 405 * @param length - length of array 406 * @param seed - seed. (default is 0) 407 * @return - 128 bit hash (2 longs) 408 */ 409 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) { 410 long h1 = seed; 411 long h2 = seed; 412 final int nblocks = length >> 4; 413 414 // body 415 for (int i = 0; i < nblocks; i++) { 416 final int i16 = i << 4; 417 long k1 = ((long) data[offset + i16] & 0xff) | (((long) data[offset + i16 + 1] & 0xff) << 8) 418 | (((long) data[offset + i16 + 2] & 0xff) << 16) | (((long) data[offset + i16 + 3] & 0xff) << 24) 419 | (((long) data[offset + i16 + 4] & 0xff) << 32) | (((long) data[offset + i16 + 5] & 0xff) << 40) 420 | (((long) data[offset + i16 + 6] & 0xff) << 48) | (((long) data[offset + i16 + 7] & 0xff) << 56); 421 422 long k2 = ((long) data[offset + i16 + 8] & 0xff) | (((long) data[offset + i16 + 9] & 0xff) << 8) 423 | (((long) data[offset + i16 + 10] & 0xff) << 16) | (((long) data[offset + i16 + 11] & 0xff) << 24) 424 | (((long) data[offset + i16 + 12] & 0xff) << 32) | (((long) data[offset + i16 + 13] & 0xff) << 40) 425 | (((long) data[offset + i16 + 14] & 0xff) << 48) | (((long) data[offset + i16 + 15] & 0xff) << 56); 426 427 // mix functions for k1 428 k1 *= C1; 429 k1 = Long.rotateLeft(k1, R1); 430 k1 *= C2; 431 h1 ^= k1; 432 h1 = Long.rotateLeft(h1, R2); 433 h1 += h2; 434 h1 = h1 * M + N1; 435 436 // mix functions for k2 437 k2 *= C2; 438 k2 = Long.rotateLeft(k2, R3); 439 k2 *= C1; 440 h2 ^= k2; 441 h2 = Long.rotateLeft(h2, R1); 442 h2 += h1; 443 h2 = h2 * M + N2; 444 } 445 446 // tail 447 long k1 = 0; 448 long k2 = 0; 449 final int tailStart = nblocks << 4; 450 switch (length - tailStart) { 451 case 15: 452 k2 ^= (long) (data[offset + tailStart + 14] & 0xff) << 48; 453 case 14: 454 k2 ^= (long) (data[offset + tailStart + 13] & 0xff) << 40; 455 case 13: 456 k2 ^= (long) (data[offset + tailStart + 12] & 0xff) << 32; 457 case 12: 458 k2 ^= (long) (data[offset + tailStart + 11] & 0xff) << 24; 459 case 11: 460 k2 ^= (long) (data[offset + tailStart + 10] & 0xff) << 16; 461 case 10: 462 k2 ^= (long) (data[offset + tailStart + 9] & 0xff) << 8; 463 case 9: 464 k2 ^= data[offset + tailStart + 8] & 0xff; 465 k2 *= C2; 466 k2 = Long.rotateLeft(k2, R3); 467 k2 *= C1; 468 h2 ^= k2; 469 470 case 8: 471 k1 ^= (long) (data[offset + tailStart + 7] & 0xff) << 56; 472 case 7: 473 k1 ^= (long) (data[offset + tailStart + 6] & 0xff) << 48; 474 case 6: 475 k1 ^= (long) (data[offset + tailStart + 5] & 0xff) << 40; 476 case 5: 477 k1 ^= (long) (data[offset + tailStart + 4] & 0xff) << 32; 478 case 4: 479 k1 ^= (long) (data[offset + tailStart + 3] & 0xff) << 24; 480 case 3: 481 k1 ^= (long) (data[offset + tailStart + 2] & 0xff) << 16; 482 case 2: 483 k1 ^= (long) (data[offset + tailStart + 1] & 0xff) << 8; 484 case 1: 485 k1 ^= data[offset + tailStart] & 0xff; 486 k1 *= C1; 487 k1 = Long.rotateLeft(k1, R1); 488 k1 *= C2; 489 h1 ^= k1; 490 } 491 492 // finalization 493 h1 ^= length; 494 h2 ^= length; 495 496 h1 += h2; 497 h2 += h1; 498 499 h1 = fmix64(h1); 500 h2 = fmix64(h2); 501 502 h1 += h2; 503 h2 += h1; 504 505 return new long[] { h1, h2 }; 506 } 507 508 private static int mix32(int k, int hash) { 509 k *= C1_32; 510 k = Integer.rotateLeft(k, R1_32); 511 k *= C2_32; 512 hash ^= k; 513 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 514 } 515 516 private static int fmix32(final int length, int hash) { 517 hash ^= length; 518 hash ^= (hash >>> 16); 519 hash *= 0x85ebca6b; 520 hash ^= (hash >>> 13); 521 hash *= 0xc2b2ae35; 522 hash ^= (hash >>> 16); 523 524 return hash; 525 } 526 527 private static long fmix64(long h) { 528 h ^= (h >>> 33); 529 h *= 0xff51afd7ed558ccdL; 530 h ^= (h >>> 33); 531 h *= 0xc4ceb9fe1a85ec53L; 532 h ^= (h >>> 33); 533 return h; 534 } 535 536 public static class IncrementalHash32 { 537 byte[] tail = new byte[3]; 538 int tailLen; 539 int totalLen; 540 int hash; 541 542 public final void start(final int hash) { 543 tailLen = totalLen = 0; 544 this.hash = hash; 545 } 546 547 public final void add(final byte[] data, int offset, final int length) { 548 if (length == 0) { 549 return; 550 } 551 totalLen += length; 552 if (tailLen + length < 4) { 553 System.arraycopy(data, offset, tail, tailLen, length); 554 tailLen += length; 555 return; 556 } 557 int offset2 = 0; 558 if (tailLen > 0) { 559 offset2 = (4 - tailLen); 560 int k = -1; 561 switch (tailLen) { 562 case 1: 563 k = orBytes(tail[0], data[offset], data[offset + 1], data[offset + 2]); 564 break; 565 case 2: 566 k = orBytes(tail[0], tail[1], data[offset], data[offset + 1]); 567 break; 568 case 3: 569 k = orBytes(tail[0], tail[1], tail[2], data[offset]); 570 break; 571 default: 572 throw new AssertionError(tailLen); 573 } 574 // mix functions 575 k *= C1_32; 576 k = Integer.rotateLeft(k, R1_32); 577 k *= C2_32; 578 hash ^= k; 579 hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 580 } 581 final int length2 = length - offset2; 582 offset += offset2; 583 final int nblocks = length2 >> 2; 584 585 for (int i = 0; i < nblocks; i++) { 586 final int i_4 = (i << 2) + offset; 587 int k = orBytes(data[i_4], data[i_4 + 1], data[i_4 + 2], data[i_4 + 3]); 588 589 // mix functions 590 k *= C1_32; 591 k = Integer.rotateLeft(k, R1_32); 592 k *= C2_32; 593 hash ^= k; 594 hash = Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 595 } 596 597 final int consumed = (nblocks << 2); 598 tailLen = length2 - consumed; 599 if (consumed == length2) { 600 return; 601 } 602 System.arraycopy(data, offset + consumed, tail, 0, tailLen); 603 } 604 605 public final int end() { 606 int k1 = 0; 607 switch (tailLen) { 608 case 3: 609 k1 ^= tail[2] << 16; 610 case 2: 611 k1 ^= tail[1] << 8; 612 case 1: 613 k1 ^= tail[0]; 614 615 // mix functions 616 k1 *= C1_32; 617 k1 = Integer.rotateLeft(k1, R1_32); 618 k1 *= C2_32; 619 hash ^= k1; 620 } 621 622 // finalization 623 hash ^= totalLen; 624 hash ^= (hash >>> 16); 625 hash *= 0x85ebca6b; 626 hash ^= (hash >>> 13); 627 hash *= 0xc2b2ae35; 628 hash ^= (hash >>> 16); 629 return hash; 630 } 631 } 632 633 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) { 634 return (b1 & 0xff) | ((b2 & 0xff) << 8) | ((b3 & 0xff) << 16) | ((b4 & 0xff) << 24); 635 } 636}