001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.digest; 019 020import org.apache.commons.codec.binary.StringUtils; 021 022/** 023 * Implements the MurmurHash3 32-bit and 128-bit hash functions. 024 * 025 * <p> 026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic 027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not 028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes. 029 * </p> 030 * 031 * <p> 032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function 033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher. 034 * </p> 035 * 036 * <p> 037 * This is public domain code with no copyrights. From home page of 038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>: 039 * </p> 040 * 041 * <blockquote> 042 * "All MurmurHash versions are public domain software, and the author disclaims all copyright to their 043 * code." 044 * </blockquote> 045 * 046 * <p> 047 * Original adaption from <a href="https://hive.apache.org/">Apache Hive</a>. 048 * That adaption contains a {@code hash64} method that is not part of the original 049 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a 050 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes. 051 * </p> 052 * 053 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 054 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 C++ 055 * code</a> 056 * @see <a href= 057 * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java"> 058 * Apache Hive Murmer3</a> 059 * @since 1.13 060 */ 061public final class MurmurHash3 { 062 063 /** 064 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new 065 * hash computed. 066 * 067 * <p> 068 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 069 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 070 * </p> 071 * 072 * <p> 073 * This implementation contains a sign-extension bug in the finalization step of 074 * any bytes left over from dividing the length by 4. This manifests if any of these 075 * bytes are negative. 076 * </p> 077 * 078 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes. 079 */ 080 @Deprecated 081 public static class IncrementalHash32 extends IncrementalHash32x86 { 082 083 /** 084 * Constructs a new instance. 085 */ 086 public IncrementalHash32() { 087 // empty 088 } 089 090 /** 091 * {@inheritDoc} 092 * 093 * <p> 094 * This implementation contains a sign-extension bug in the finalization step of 095 * any bytes left over from dividing the length by 4. This manifests if any of these 096 * bytes are negative. 097 * <p> 098 * 099 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes. 100 */ 101 @Override 102 @Deprecated 103 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) { 104 int result = hash; 105 // Note: This fails to apply masking using 0xff to the 3 remaining bytes. 106 int k1 = 0; 107 switch (unprocessedLength) { 108 case 3: 109 k1 ^= unprocessed[2] << 16; 110 // falls-through 111 case 2: 112 k1 ^= unprocessed[1] << 8; 113 // falls-through 114 case 1: 115 k1 ^= unprocessed[0]; 116 // mix functions 117 k1 *= C1_32; 118 k1 = Integer.rotateLeft(k1, R1_32); 119 k1 *= C2_32; 120 result ^= k1; 121 } 122 // finalization 123 result ^= totalLen; 124 return fmix32(result); 125 } 126 } 127 128 /** 129 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new 130 * hash computed. 131 * 132 * <p> 133 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 134 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 135 * </p> 136 * 137 * @since 1.14 138 */ 139 public static class IncrementalHash32x86 { 140 141 /** The size of byte blocks that are processed together. */ 142 private static final int BLOCK_SIZE = 4; 143 144 /** 145 * Combines the bytes using an Or operation ({@code | } in a little-endian representation 146 * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most 147 * significant. 148 * 149 * @param b1 The first byte 150 * @param b2 The second byte 151 * @param b3 The third byte 152 * @param b4 The fourth byte 153 * @return The 32-bit integer 154 */ 155 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) { 156 return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24; 157 } 158 159 /** Up to 3 unprocessed bytes from input data. */ 160 private final byte[] unprocessed = new byte[3]; 161 162 /** The number of unprocessed bytes in the tail data. */ 163 private int unprocessedLength; 164 165 /** The total number of input bytes added since the start. */ 166 private int totalLen; 167 168 /** 169 * The current running hash. 170 * This must be finalized to generate the 32-bit hash value. 171 */ 172 private int hash; 173 174 /** 175 * Constructs a new instance. 176 */ 177 public IncrementalHash32x86() { 178 // empty 179 } 180 181 /** 182 * Adds the byte array to the current incremental hash. 183 * 184 * @param data The input byte array 185 * @param offset The offset of data 186 * @param length The length of array 187 */ 188 public final void add(final byte[] data, final int offset, final int length) { 189 if (length <= 0) { 190 // Nothing to add 191 return; 192 } 193 totalLen += length; 194 // Process the bytes in blocks of 4. 195 // New bytes must be added to any current unprocessed bytes, 196 // then processed in blocks of 4 and the remaining bytes saved: 197 // 198 // |--|---------------------------|--| 199 // unprocessed 200 // main block 201 // remaining 202 203 // Check if the unprocessed bytes and new bytes can fill a block of 4. 204 // Make this overflow safe in the event that length is Integer.MAX_VALUE. 205 // Equivalent to: (unprocessedLength + length < BLOCK_SIZE) 206 if (unprocessedLength + length - BLOCK_SIZE < 0) { 207 // Not enough so add to the unprocessed bytes 208 System.arraycopy(data, offset, unprocessed, unprocessedLength, length); 209 unprocessedLength += length; 210 return; 211 } 212 // Combine unprocessed bytes with new bytes. 213 final int newOffset; 214 final int newLength; 215 if (unprocessedLength > 0) { 216 int k = -1; 217 switch (unprocessedLength) { 218 case 1: 219 k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]); 220 break; 221 case 2: 222 k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]); 223 break; 224 case 3: 225 k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]); 226 break; 227 default: 228 throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength); 229 } 230 hash = mix32(k, hash); 231 // Update the offset and length 232 final int consumed = BLOCK_SIZE - unprocessedLength; 233 newOffset = offset + consumed; 234 newLength = length - consumed; 235 } else { 236 newOffset = offset; 237 newLength = length; 238 } 239 // Main processing of blocks of 4 bytes 240 final int nblocks = newLength >> 2; 241 242 for (int i = 0; i < nblocks; i++) { 243 final int index = newOffset + (i << 2); 244 final int k = MurmurHash.getLittleEndianInt(data, index); 245 hash = mix32(k, hash); 246 } 247 // Save left-over unprocessed bytes 248 final int consumed = nblocks << 2; 249 unprocessedLength = newLength - consumed; 250 if (unprocessedLength != 0) { 251 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength); 252 } 253 } 254 255 /** 256 * Generates the 32-bit hash value. Repeat calls to this method with no additional data 257 * will generate the same hash value. 258 * 259 * @return The 32-bit hash 260 */ 261 public final int end() { 262 // Allow calling end() again after adding no data to return the same result. 263 return finalise(hash, unprocessedLength, unprocessed, totalLen); 264 } 265 266 /** 267 * Finalizes the running hash to the output 32-bit hash by processing remaining bytes 268 * and performing final mixing. 269 * 270 * @param hash The running hash 271 * @param unprocessedLength The number of unprocessed bytes in the tail data. 272 * @param unprocessed Up to 3 unprocessed bytes from input data. 273 * @param totalLen The total number of input bytes added since the start. 274 * @return The 32-bit hash 275 */ 276 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) { 277 int result = hash; 278 int k1 = 0; 279 switch (unprocessedLength) { 280 case 3: 281 k1 ^= (unprocessed[2] & 0xff) << 16; 282 // falls-through 283 case 2: 284 k1 ^= (unprocessed[1] & 0xff) << 8; 285 // falls-through 286 case 1: 287 k1 ^= unprocessed[0] & 0xff; 288 // mix functions 289 k1 *= C1_32; 290 k1 = Integer.rotateLeft(k1, R1_32); 291 k1 *= C2_32; 292 result ^= k1; 293 } 294 // finalization 295 result ^= totalLen; 296 return fmix32(result); 297 } 298 299 /** 300 * Starts a new incremental hash. 301 * 302 * @param seed The initial seed value 303 */ 304 public final void start(final int seed) { 305 // Reset 306 unprocessedLength = totalLen = 0; 307 hash = seed; 308 } 309 } 310 311 /** 312 * A random number to use for a hash code. 313 * 314 * @deprecated This is not used internally and will be removed in a future release. 315 */ 316 @Deprecated 317 public static final long NULL_HASHCODE = 2862933555777941757L; 318 /** 319 * A default seed to use for the murmur hash algorithm. 320 * Has the value {@code 104729}. 321 */ 322 public static final int DEFAULT_SEED = 104729; 323 // Constants for 32-bit variant 324 private static final int C1_32 = 0xcc9e2d51; 325 private static final int C2_32 = 0x1b873593; 326 private static final int R1_32 = 15; 327 private static final int R2_32 = 13; 328 329 private static final int M_32 = 5; 330 private static final int N_32 = 0xe6546b64; 331 // Constants for 128-bit variant 332 private static final long C1 = 0x87c37b91114253d5L; 333 private static final long C2 = 0x4cf5ad432745937fL; 334 private static final int R1 = 31; 335 private static final int R2 = 27; 336 private static final int R3 = 33; 337 private static final int M = 5; 338 339 private static final int N1 = 0x52dce729; 340 341 private static final int N2 = 0x38495ab5; 342 343 /** 344 * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}. 345 * 346 * @param hash The current hash 347 * @return The final hash 348 */ 349 private static int fmix32(int hash) { 350 hash ^= hash >>> 16; 351 hash *= 0x85ebca6b; 352 hash ^= hash >>> 13; 353 hash *= 0xc2b2ae35; 354 hash ^= hash >>> 16; 355 return hash; 356 } 357 358 /** 359 * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}. 360 * 361 * @param hash The current hash 362 * @return The final hash 363 */ 364 private static long fmix64(long hash) { 365 hash ^= hash >>> 33; 366 hash *= 0xff51afd7ed558ccdL; 367 hash ^= hash >>> 33; 368 hash *= 0xc4ceb9fe1a85ec53L; 369 hash ^= hash >>> 33; 370 return hash; 371 } 372 373 /** 374 * Generates 128-bit hash from the byte array with a default seed. 375 * This is a helper method that will produce the same result as: 376 * 377 * <pre> 378 * int offset = 0; 379 * int seed = 104729; 380 * int hash = MurmurHash3.hash128(data, offset, data.length, seed); 381 * </pre> 382 * 383 * <p> 384 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect 385 * this result as the default seed is positive. 386 * </p> 387 * 388 * @param data The input byte array 389 * @return The 128-bit hash (2 longs) 390 * @see #hash128(byte[], int, int, int) 391 */ 392 public static long[] hash128(final byte[] data) { 393 return hash128(data, 0, data.length, DEFAULT_SEED); 394 } 395 396 /** 397 * Generates 128-bit hash from the byte array with the given offset, length and seed. 398 * 399 * <p> 400 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 401 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 402 * </p> 403 * 404 * <p> 405 * This implementation contains a sign-extension bug in the seed initialization. 406 * This manifests if the seed is negative. 407 * </p> 408 * 409 * @param data The input byte array 410 * @param offset The first element of array 411 * @param length The length of array 412 * @param seed The initial seed value 413 * @return The 128-bit hash (2 longs) 414 * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization. 415 */ 416 @Deprecated 417 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) { 418 // Note: This deliberately fails to apply masking using 0xffffffffL to the seed 419 // to maintain behavioral compatibility with the original version. 420 // The implicit conversion to a long will extend a negative sign 421 // bit through the upper 32-bits of the long seed. These should be zero. 422 return hash128x64Internal(data, offset, length, seed); 423 } 424 425 /** 426 * Generates 128-bit hash from a string with a default seed. 427 * <p> 428 * Before 1.14 the string was converted using default encoding. 429 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 430 * </p> 431 * <p> 432 * This is a helper method that will produce the same result as: 433 * </p> 434 * 435 * <pre> 436 * int offset = 0; 437 * int seed = 104729; 438 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 439 * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed); 440 * </pre> 441 * 442 * <p> 443 * Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect 444 * this result as the default seed is positive. 445 * </p> 446 * 447 * @param data The input String 448 * @return The 128-bit hash (2 longs) 449 * @see #hash128(byte[], int, int, int) 450 * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from 451 * {@link String#getBytes(java.nio.charset.Charset)}. 452 */ 453 @Deprecated 454 public static long[] hash128(final String data) { 455 final byte[] bytes = StringUtils.getBytesUtf8(data); 456 return hash128(bytes, 0, bytes.length, DEFAULT_SEED); 457 } 458 459 /** 460 * Generates 128-bit hash from the byte array with a seed of zero. 461 * This is a helper method that will produce the same result as: 462 * 463 * <pre> 464 * int offset = 0; 465 * int seed = 0; 466 * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed); 467 * </pre> 468 * 469 * @param data The input byte array 470 * @return The 128-bit hash (2 longs) 471 * @see #hash128x64(byte[], int, int, int) 472 * @since 1.14 473 */ 474 public static long[] hash128x64(final byte[] data) { 475 return hash128x64(data, 0, data.length, 0); 476 } 477 478 /** 479 * Generates 128-bit hash from the byte array with the given offset, length and seed. 480 * 481 * <p> 482 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 483 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 484 * </p> 485 * 486 * @param data The input byte array 487 * @param offset The first element of array 488 * @param length The length of array 489 * @param seed The initial seed value 490 * @return The 128-bit hash (2 longs) 491 * @since 1.14 492 */ 493 public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) { 494 // Use an unsigned 32-bit integer as the seed 495 return hash128x64Internal(data, offset, length, seed & 0xffffffffL); 496 } 497 498 /** 499 * Generates 128-bit hash from the byte array with the given offset, length and seed. 500 * 501 * <p> 502 * This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 503 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 504 * </p> 505 * 506 * @param data The input byte array 507 * @param offset The first element of array 508 * @param length The length of array 509 * @param seed The initial seed value 510 * @return The 128-bit hash (2 longs) 511 */ 512 private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) { 513 long h1 = seed; 514 long h2 = seed; 515 final int nblocks = length >> 4; 516 // body 517 for (int i = 0; i < nblocks; i++) { 518 final int index = offset + (i << 4); 519 long k1 = MurmurHash.getLittleEndianLong(data, index); 520 long k2 = MurmurHash.getLittleEndianLong(data, index + 8); 521 522 // mix functions for k1 523 k1 *= C1; 524 k1 = Long.rotateLeft(k1, R1); 525 k1 *= C2; 526 h1 ^= k1; 527 h1 = Long.rotateLeft(h1, R2); 528 h1 += h2; 529 h1 = h1 * M + N1; 530 531 // mix functions for k2 532 k2 *= C2; 533 k2 = Long.rotateLeft(k2, R3); 534 k2 *= C1; 535 h2 ^= k2; 536 h2 = Long.rotateLeft(h2, R1); 537 h2 += h1; 538 h2 = h2 * M + N2; 539 } 540 // tail 541 long k1 = 0; 542 long k2 = 0; 543 final int index = offset + (nblocks << 4); 544 switch (offset + length - index) { 545 case 15: 546 k2 ^= ((long) data[index + 14] & 0xff) << 48; 547 // falls-through 548 case 14: 549 k2 ^= ((long) data[index + 13] & 0xff) << 40; 550 // falls-through 551 case 13: 552 k2 ^= ((long) data[index + 12] & 0xff) << 32; 553 // falls-through 554 case 12: 555 k2 ^= ((long) data[index + 11] & 0xff) << 24; 556 // falls-through 557 case 11: 558 k2 ^= ((long) data[index + 10] & 0xff) << 16; 559 // falls-through 560 case 10: 561 k2 ^= ((long) data[index + 9] & 0xff) << 8; 562 // falls-through 563 case 9: 564 k2 ^= data[index + 8] & 0xff; 565 k2 *= C2; 566 k2 = Long.rotateLeft(k2, R3); 567 k2 *= C1; 568 h2 ^= k2; 569 // falls-through 570 case 8: 571 k1 ^= ((long) data[index + 7] & 0xff) << 56; 572 // falls-through 573 case 7: 574 k1 ^= ((long) data[index + 6] & 0xff) << 48; 575 // falls-through 576 case 6: 577 k1 ^= ((long) data[index + 5] & 0xff) << 40; 578 // falls-through 579 case 5: 580 k1 ^= ((long) data[index + 4] & 0xff) << 32; 581 // falls-through 582 case 4: 583 k1 ^= ((long) data[index + 3] & 0xff) << 24; 584 // falls-through 585 case 3: 586 k1 ^= ((long) data[index + 2] & 0xff) << 16; 587 // falls-through 588 case 2: 589 k1 ^= ((long) data[index + 1] & 0xff) << 8; 590 // falls-through 591 case 1: 592 k1 ^= data[index] & 0xff; 593 k1 *= C1; 594 k1 = Long.rotateLeft(k1, R1); 595 k1 *= C2; 596 h1 ^= k1; 597 } 598 // finalization 599 h1 ^= length; 600 h2 ^= length; 601 602 h1 += h2; 603 h2 += h1; 604 605 h1 = fmix64(h1); 606 h2 = fmix64(h2); 607 608 h1 += h2; 609 h2 += h1; 610 return new long[] { h1, h2 }; 611 } 612 613 /** 614 * Generates 32-bit hash from the byte array with a default seed. 615 * This is a helper method that will produce the same result as: 616 * 617 * <pre> 618 * int offset = 0; 619 * int seed = 104729; 620 * int hash = MurmurHash3.hash32(data, offset, data.length, seed); 621 * </pre> 622 * 623 * <p> 624 * This implementation contains a sign-extension bug in the finalization step of 625 * any bytes left over from dividing the length by 4. This manifests if any of these 626 * bytes are negative. 627 * </p> 628 * 629 * @param data The input byte array 630 * @return The 32-bit hash 631 * @see #hash32(byte[], int, int, int) 632 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 633 */ 634 @Deprecated 635 public static int hash32(final byte[] data) { 636 return hash32(data, 0, data.length, DEFAULT_SEED); 637 } 638 639 /** 640 * Generates 32-bit hash from the byte array with the given length and a default seed. 641 * This is a helper method that will produce the same result as: 642 * 643 * <pre> 644 * int offset = 0; 645 * int seed = 104729; 646 * int hash = MurmurHash3.hash32(data, offset, length, seed); 647 * </pre> 648 * 649 * <p> 650 * This implementation contains a sign-extension bug in the finalization step of 651 * any bytes left over from dividing the length by 4. This manifests if any of these 652 * bytes are negative. 653 * </p> 654 * 655 * @param data The input byte array 656 * @param length The length of array 657 * @return The 32-bit hash 658 * @see #hash32(byte[], int, int, int) 659 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 660 */ 661 @Deprecated 662 public static int hash32(final byte[] data, final int length) { 663 return hash32(data, length, DEFAULT_SEED); 664 } 665 666 /** 667 * Generates 32-bit hash from the byte array with the given length and seed. This is a 668 * helper method that will produce the same result as: 669 * 670 * <pre> 671 * int offset = 0; 672 * int hash = MurmurHash3.hash32(data, offset, length, seed); 673 * </pre> 674 * 675 * <p> 676 * This implementation contains a sign-extension bug in the finalization step of 677 * any bytes left over from dividing the length by 4. This manifests if any of these 678 * bytes are negative. 679 * </p> 680 * 681 * @param data The input byte array 682 * @param length The length of array 683 * @param seed The initial seed value 684 * @return The 32-bit hash 685 * @see #hash32(byte[], int, int, int) 686 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 687 */ 688 @Deprecated 689 public static int hash32(final byte[] data, final int length, final int seed) { 690 return hash32(data, 0, length, seed); 691 } 692 693 /** 694 * Generates 32-bit hash from the byte array with the given offset, length and seed. 695 * 696 * <p> 697 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 698 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 699 * </p> 700 * 701 * <p> 702 * This implementation contains a sign-extension bug in the finalization step of 703 * any bytes left over from dividing the length by 4. This manifests if any of these 704 * bytes are negative. 705 * </p> 706 * 707 * @param data The input byte array 708 * @param offset The offset of data 709 * @param length The length of array 710 * @param seed The initial seed value 711 * @return The 32-bit hash 712 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 713 */ 714 @Deprecated 715 public static int hash32(final byte[] data, final int offset, final int length, final int seed) { 716 int hash = seed; 717 final int nblocks = length >> 2; 718 // body 719 for (int i = 0; i < nblocks; i++) { 720 final int index = offset + (i << 2); 721 final int k = MurmurHash.getLittleEndianInt(data, index); 722 hash = mix32(k, hash); 723 } 724 // tail 725 // Note: This fails to apply masking using 0xff to the 3 remaining bytes. 726 final int index = offset + (nblocks << 2); 727 int k1 = 0; 728 switch (offset + length - index) { 729 case 3: 730 k1 ^= data[index + 2] << 16; 731 // falls-through 732 case 2: 733 k1 ^= data[index + 1] << 8; 734 // falls-through 735 case 1: 736 k1 ^= data[index]; 737 // mix functions 738 k1 *= C1_32; 739 k1 = Integer.rotateLeft(k1, R1_32); 740 k1 *= C2_32; 741 hash ^= k1; 742 } 743 hash ^= length; 744 return fmix32(hash); 745 } 746 747 /** 748 * Generates 32-bit hash from a long with a default seed value. 749 * This is a helper method that will produce the same result as: 750 * 751 * <pre> 752 * int offset = 0; 753 * int seed = 104729; 754 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8) 755 * .putLong(data) 756 * .array(), offset, 8, seed); 757 * </pre> 758 * 759 * @param data The long to hash 760 * @return The 32-bit hash 761 * @see #hash32x86(byte[], int, int, int) 762 */ 763 public static int hash32(final long data) { 764 return hash32(data, DEFAULT_SEED); 765 } 766 767 /** 768 * Generates 32-bit hash from a long with the given seed. 769 * This is a helper method that will produce the same result as: 770 * 771 * <pre> 772 * int offset = 0; 773 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8) 774 * .putLong(data) 775 * .array(), offset, 8, seed); 776 * </pre> 777 * 778 * @param data The long to hash 779 * @param seed The initial seed value 780 * @return The 32-bit hash 781 * @see #hash32x86(byte[], int, int, int) 782 */ 783 public static int hash32(final long data, final int seed) { 784 int hash = seed; 785 final long r0 = Long.reverseBytes(data); 786 787 hash = mix32((int) r0, hash); 788 hash = mix32((int) (r0 >>> 32), hash); 789 790 hash ^= Long.BYTES; 791 return fmix32(hash); 792 } 793 794 /** 795 * Generates 32-bit hash from two longs with a default seed value. 796 * This is a helper method that will produce the same result as: 797 * 798 * <pre> 799 * int offset = 0; 800 * int seed = 104729; 801 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16) 802 * .putLong(data1) 803 * .putLong(data2) 804 * .array(), offset, 16, seed); 805 * </pre> 806 * 807 * @param data1 The first long to hash 808 * @param data2 The second long to hash 809 * @return The 32-bit hash 810 * @see #hash32x86(byte[], int, int, int) 811 */ 812 public static int hash32(final long data1, final long data2) { 813 return hash32(data1, data2, DEFAULT_SEED); 814 } 815 816 /** 817 * Generates 32-bit hash from two longs with the given seed. 818 * This is a helper method that will produce the same result as: 819 * 820 * <pre> 821 * int offset = 0; 822 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16) 823 * .putLong(data1) 824 * .putLong(data2) 825 * .array(), offset, 16, seed); 826 * </pre> 827 * 828 * @param data1 The first long to hash 829 * @param data2 The second long to hash 830 * @param seed The initial seed value 831 * @return The 32-bit hash 832 * @see #hash32x86(byte[], int, int, int) 833 */ 834 public static int hash32(final long data1, final long data2, final int seed) { 835 int hash = seed; 836 final long r0 = Long.reverseBytes(data1); 837 final long r1 = Long.reverseBytes(data2); 838 839 hash = mix32((int) r0, hash); 840 hash = mix32((int) (r0 >>> 32), hash); 841 hash = mix32((int) r1, hash); 842 hash = mix32((int) (r1 >>> 32), hash); 843 844 hash ^= Long.BYTES * 2; 845 return fmix32(hash); 846 } 847 848 /** 849 * Generates 32-bit hash from a string with a default seed. 850 * <p> 851 * Before 1.14 the string was converted using default encoding. 852 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 853 * </p> 854 * This is a helper method that will produce the same result as: 855 * 856 * <pre> 857 * int offset = 0; 858 * int seed = 104729; 859 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 860 * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed); 861 * </pre> 862 * 863 * <p> 864 * This implementation contains a sign-extension bug in the finalization step of 865 * any bytes left over from dividing the length by 4. This manifests if any of these 866 * bytes are negative. 867 * </p> 868 * 869 * @param data The input string 870 * @return The 32-bit hash 871 * @see #hash32(byte[], int, int, int) 872 * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from 873 * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes. 874 */ 875 @Deprecated 876 public static int hash32(final String data) { 877 final byte[] bytes = StringUtils.getBytesUtf8(data); 878 return hash32(bytes, 0, bytes.length, DEFAULT_SEED); 879 } 880 881 /** 882 * Generates 32-bit hash from the byte array with a seed of zero. 883 * This is a helper method that will produce the same result as: 884 * 885 * <pre> 886 * int offset = 0; 887 * int seed = 0; 888 * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed); 889 * </pre> 890 * 891 * @param data The input byte array 892 * @return The 32-bit hash 893 * @see #hash32x86(byte[], int, int, int) 894 * @since 1.14 895 */ 896 public static int hash32x86(final byte[] data) { 897 return hash32x86(data, 0, data.length, 0); 898 } 899 900 /** 901 * Generates 32-bit hash from the byte array with the given offset, length and seed. 902 * 903 * <p> 904 * This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 905 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher. 906 * </p> 907 * 908 * @param data The input byte array 909 * @param offset The offset of data 910 * @param length The length of array 911 * @param seed The initial seed value 912 * @return The 32-bit hash 913 * @since 1.14 914 */ 915 public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) { 916 int hash = seed; 917 final int nblocks = length >> 2; 918 // body 919 for (int i = 0; i < nblocks; i++) { 920 final int index = offset + (i << 2); 921 final int k = MurmurHash.getLittleEndianInt(data, index); 922 hash = mix32(k, hash); 923 } 924 // tail 925 final int index = offset + (nblocks << 2); 926 int k1 = 0; 927 switch (offset + length - index) { 928 case 3: 929 k1 ^= (data[index + 2] & 0xff) << 16; 930 // falls-through 931 case 2: 932 // falls-through 933 k1 ^= (data[index + 1] & 0xff) << 8; 934 // falls-through 935 case 1: 936 k1 ^= data[index] & 0xff; 937 // mix functions 938 k1 *= C1_32; 939 k1 = Integer.rotateLeft(k1, R1_32); 940 k1 *= C2_32; 941 hash ^= k1; 942 } 943 hash ^= length; 944 return fmix32(hash); 945 } 946 947 /** 948 * Generates 64-bit hash from a byte array with a default seed. 949 * 950 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 951 * 952 * <p> 953 * This is a Murmur3-like 64-bit variant. 954 * The method does not produce the same result as either half of the hash bytes from 955 * {@linkplain #hash128x64(byte[])} with the same byte data. 956 * This method will be removed in a future release. 957 * </p> 958 * 959 * <p> 960 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 961 * this result as the default seed is positive. 962 * </p> 963 * 964 * <p> 965 * This is a helper method that will produce the same result as: 966 * </p> 967 * 968 * <pre> 969 * int offset = 0; 970 * int seed = 104729; 971 * long hash = MurmurHash3.hash64(data, offset, data.length, seed); 972 * </pre> 973 * 974 * @param data The input byte array 975 * @return The 64-bit hash 976 * @see #hash64(byte[], int, int, int) 977 * @deprecated Not part of the MurmurHash3 implementation. 978 * Use half of the hash bytes from {@link #hash128x64(byte[])}. 979 */ 980 @Deprecated 981 public static long hash64(final byte[] data) { 982 return hash64(data, 0, data.length, DEFAULT_SEED); 983 } 984 985 /** 986 * Generates 64-bit hash from a byte array with the given offset and length and a default seed. 987 * 988 * <p><strong> 989 * This is not part of the original MurmurHash3 {@code c++} implementation. 990 * </strong></p> 991 * 992 * <p> 993 * This is a Murmur3-like 64-bit variant. 994 * The method does not produce the same result as either half of the hash bytes from 995 * {@linkplain #hash128x64(byte[])} with the same byte data. 996 * This method will be removed in a future release. 997 * </p> 998 * 999 * <p> 1000 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1001 * this result as the default seed is positive. 1002 * </p> 1003 * 1004 * <p> 1005 * This is a helper method that will produce the same result as: 1006 * </p> 1007 * 1008 * <pre> 1009 * int seed = 104729; 1010 * long hash = MurmurHash3.hash64(data, offset, length, seed); 1011 * </pre> 1012 * 1013 * @param data The input byte array 1014 * @param offset The offset of data 1015 * @param length The length of array 1016 * @return The 64-bit hash 1017 * @see #hash64(byte[], int, int, int) 1018 * @deprecated Not part of the MurmurHash3 implementation. 1019 * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}. 1020 */ 1021 @Deprecated 1022 public static long hash64(final byte[] data, final int offset, final int length) { 1023 return hash64(data, offset, length, DEFAULT_SEED); 1024 } 1025 1026 /** 1027 * Generates 64-bit hash from a byte array with the given offset, length and seed. 1028 * 1029 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 1030 * 1031 * <p> 1032 * This is a Murmur3-like 64-bit variant. 1033 * This method will be removed in a future release. 1034 * </p> 1035 * 1036 * <p> 1037 * This implementation contains a sign-extension bug in the seed initialization. 1038 * This manifests if the seed is negative. 1039 * </p> 1040 * 1041 * <p> 1042 * This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks 1043 * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash 1044 * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return 1045 * the same value as the first or second 64-bits of the function 1046 * {@link #hash128(byte[], int, int, int)}. 1047 * </p> 1048 * 1049 * <p> 1050 * Use of this method is not advised. Use the first long returned from 1051 * {@link #hash128x64(byte[], int, int, int)}. 1052 * </p> 1053 * 1054 * @param data The input byte array 1055 * @param offset The offset of data 1056 * @param length The length of array 1057 * @param seed The initial seed value 1058 * @return The 64-bit hash 1059 * @deprecated Not part of the MurmurHash3 implementation. 1060 * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}. 1061 */ 1062 @Deprecated 1063 public static long hash64(final byte[] data, final int offset, final int length, final int seed) { 1064 // Note: This fails to apply masking using 0xffffffffL to the seed. 1065 long hash = seed; 1066 final int nblocks = length >> 3; 1067 // body 1068 for (int i = 0; i < nblocks; i++) { 1069 final int index = offset + (i << 3); 1070 long k = MurmurHash.getLittleEndianLong(data, index); 1071 // mix functions 1072 k *= C1; 1073 k = Long.rotateLeft(k, R1); 1074 k *= C2; 1075 hash ^= k; 1076 hash = Long.rotateLeft(hash, R2) * M + N1; 1077 } 1078 // tail 1079 long k1 = 0; 1080 final int index = offset + (nblocks << 3); 1081 switch (offset + length - index) { 1082 case 7: 1083 k1 ^= ((long) data[index + 6] & 0xff) << 48; 1084 // falls-through 1085 case 6: 1086 k1 ^= ((long) data[index + 5] & 0xff) << 40; 1087 // falls-through 1088 case 5: 1089 k1 ^= ((long) data[index + 4] & 0xff) << 32; 1090 // falls-through 1091 case 4: 1092 k1 ^= ((long) data[index + 3] & 0xff) << 24; 1093 // falls-through 1094 case 3: 1095 k1 ^= ((long) data[index + 2] & 0xff) << 16; 1096 // falls-through 1097 case 2: 1098 k1 ^= ((long) data[index + 1] & 0xff) << 8; 1099 // falls-through 1100 case 1: 1101 k1 ^= (long) data[index] & 0xff; 1102 k1 *= C1; 1103 k1 = Long.rotateLeft(k1, R1); 1104 k1 *= C2; 1105 hash ^= k1; 1106 } 1107 // finalization 1108 hash ^= length; 1109 return fmix64(hash); 1110 } 1111 1112 /** 1113 * Generates 64-bit hash from an int with a default seed. 1114 * 1115 * <p><strong> 1116 * This is not part of the original MurmurHash3 {@code c++} implementation. 1117 * </strong></p> 1118 * 1119 * <p> 1120 * This is a Murmur3-like 64-bit variant. 1121 * The method does not produce the same result as either half of the hash bytes from 1122 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}. 1123 * This method will be removed in a future release. 1124 * </p> 1125 * 1126 * <p> 1127 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1128 * this result as the default seed is positive. 1129 * </p> 1130 * 1131 * <p> 1132 * This is a helper method that will produce the same result as: 1133 * </p> 1134 * 1135 * <pre> 1136 * int offset = 0; 1137 * int seed = 104729; 1138 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4) 1139 * .putInt(data) 1140 * .array(), offset, 4, seed); 1141 * </pre> 1142 * 1143 * @param data The int to hash 1144 * @return The 64-bit hash 1145 * @see #hash64(byte[], int, int, int) 1146 * @deprecated Not part of the MurmurHash3 implementation. 1147 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}. 1148 */ 1149 @Deprecated 1150 public static long hash64(final int data) { 1151 long k1 = Integer.reverseBytes(data) & -1L >>> 32; 1152 long hash = DEFAULT_SEED; 1153 k1 *= C1; 1154 k1 = Long.rotateLeft(k1, R1); 1155 k1 *= C2; 1156 hash ^= k1; 1157 // finalization 1158 hash ^= Integer.BYTES; 1159 return fmix64(hash); 1160 } 1161 1162 /** 1163 * Generates 64-bit hash from a long with a default seed. 1164 * 1165 * <p><strong> 1166 * This is not part of the original MurmurHash3 {@code c++} implementation. 1167 * </strong></p> 1168 * 1169 * <p> 1170 * This is a Murmur3-like 64-bit variant. 1171 * The method does not produce the same result as either half of the hash bytes from 1172 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}. 1173 * This method will be removed in a future release. 1174 * </p> 1175 * 1176 * <p> 1177 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1178 * this result as the default seed is positive. 1179 * </p> 1180 * 1181 * <p> 1182 * This is a helper method that will produce the same result as: 1183 * </p> 1184 * 1185 * <pre> 1186 * int offset = 0; 1187 * int seed = 104729; 1188 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8) 1189 * .putLong(data) 1190 * .array(), offset, 8, seed); 1191 * </pre> 1192 * 1193 * @param data The long to hash 1194 * @return The 64-bit hash 1195 * @see #hash64(byte[], int, int, int) 1196 * @deprecated Not part of the MurmurHash3 implementation. 1197 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}. 1198 */ 1199 @Deprecated 1200 public static long hash64(final long data) { 1201 long hash = DEFAULT_SEED; 1202 long k = Long.reverseBytes(data); 1203 // mix functions 1204 k *= C1; 1205 k = Long.rotateLeft(k, R1); 1206 k *= C2; 1207 hash ^= k; 1208 hash = Long.rotateLeft(hash, R2) * M + N1; 1209 // finalization 1210 hash ^= Long.BYTES; 1211 return fmix64(hash); 1212 } 1213 1214 /** 1215 * Generates 64-bit hash from a short with a default seed. 1216 * 1217 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 1218 * 1219 * <p> 1220 * This is a Murmur3-like 64-bit variant. 1221 * The method does not produce the same result as either half of the hash bytes from 1222 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}. 1223 * This method will be removed in a future release. 1224 * </p> 1225 * 1226 * <p> 1227 * Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1228 * this result as the default seed is positive. 1229 * </p> 1230 * 1231 * <p> 1232 * This is a helper method that will produce the same result as: 1233 * </p> 1234 * 1235 * <pre> 1236 * int offset = 0; 1237 * int seed = 104729; 1238 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2) 1239 * .putShort(data) 1240 * .array(), offset, 2, seed); 1241 * </pre> 1242 * 1243 * @param data The short to hash 1244 * @return The 64-bit hash 1245 * @see #hash64(byte[], int, int, int) 1246 * @deprecated Not part of the MurmurHash3 implementation. 1247 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}. 1248 */ 1249 @Deprecated 1250 public static long hash64(final short data) { 1251 long hash = DEFAULT_SEED; 1252 long k1 = 0; 1253 k1 ^= ((long) data & 0xff) << 8; 1254 k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff; 1255 k1 *= C1; 1256 k1 = Long.rotateLeft(k1, R1); 1257 k1 *= C2; 1258 hash ^= k1; 1259 1260 // finalization 1261 hash ^= Short.BYTES; 1262 return fmix64(hash); 1263 } 1264 1265 /** 1266 * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}. 1267 * 1268 * @param k The data to add to the hash 1269 * @param hash The current hash 1270 * @return The new hash 1271 */ 1272 private static int mix32(int k, int hash) { 1273 k *= C1_32; 1274 k = Integer.rotateLeft(k, R1_32); 1275 k *= C2_32; 1276 hash ^= k; 1277 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 1278 } 1279 1280 /** No instance methods. */ 1281 private MurmurHash3() { 1282 } 1283}