001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.digest; 019 020import org.apache.commons.codec.binary.StringUtils; 021 022/** 023 * Implements the MurmurHash3 32-bit and 128-bit hash functions. 024 * 025 * <p> 026 * MurmurHash is a non-cryptographic hash function suitable for general hash-based lookup. The name comes from two basic 027 * operations, multiply (MU) and rotate (R), used in its inner loop. Unlike cryptographic hash functions, it is not 028 * specifically designed to be difficult to reverse by an adversary, making it unsuitable for cryptographic purposes. 029 * </p> 030 * 031 * <p> 032 * This contains a Java port of the 32-bit hash function {@code MurmurHash3_x86_32} and the 128-bit hash function 033 * {@code MurmurHash3_x64_128} from Austin Appleby's original {@code c++} code in SMHasher. 034 * </p> 035 * 036 * <p> 037 * This is public domain code with no copyrights. From home page of 038 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>: 039 * </p> 040 * 041 * <blockquote> "All MurmurHash versions are public domain software, and the author disclaims all copyright to their 042 * code." </blockquote> 043 * 044 * <p> 045 * Original adaption from Apache Hive. That adaption contains a {@code hash64} method that is not part of the original 046 * MurmurHash3 code. It is not recommended to use these methods. They will be removed in a future release. To obtain a 047 * 64-bit hash use half of the bits from the {@code hash128x64} methods using the input data converted to bytes. 048 * </p> 049 * 050 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 051 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp"> Original MurmurHash3 c++ 052 * code</a> 053 * @see <a href= 054 * "https://github.com/apache/hive/blob/master/storage-api/src/java/org/apache/hive/common/util/Murmur3.java"> 055 * Apache Hive Murmer3</a> 056 * @since 1.13 057 */ 058public final class MurmurHash3 { 059 060 /** 061 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new 062 * hash computed. 063 * 064 * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 065 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 066 * 067 * <p>This implementation contains a sign-extension bug in the finalization step of 068 * any bytes left over from dividing the length by 4. This manifests if any of these 069 * bytes are negative.</p> 070 * 071 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes. 072 */ 073 @Deprecated 074 public static class IncrementalHash32 extends IncrementalHash32x86 { 075 076 /** 077 * {@inheritDoc} 078 * 079 * <p>This implementation contains a sign-extension bug in the finalization step of 080 * any bytes left over from dividing the length by 4. This manifests if any of these 081 * bytes are negative.<p> 082 * 083 * @deprecated Use IncrementalHash32x86. This corrects the processing of trailing bytes. 084 */ 085 @Override 086 @Deprecated 087 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) { 088 int result = hash; 089 // ************ 090 // Note: This fails to apply masking using 0xff to the 3 remaining bytes. 091 // ************ 092 int k1 = 0; 093 switch (unprocessedLength) { 094 case 3: 095 k1 ^= unprocessed[2] << 16; 096 case 2: 097 k1 ^= unprocessed[1] << 8; 098 case 1: 099 k1 ^= unprocessed[0]; 100 101 // mix functions 102 k1 *= C1_32; 103 k1 = Integer.rotateLeft(k1, R1_32); 104 k1 *= C2_32; 105 result ^= k1; 106 } 107 108 // finalization 109 result ^= totalLen; 110 return fmix32(result); 111 } 112 } 113 114 /** 115 * Generates 32-bit hash from input bytes. Bytes can be added incrementally and the new 116 * hash computed. 117 * 118 * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 119 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 120 * 121 * @since 1.14 122 */ 123 public static class IncrementalHash32x86 { 124 125 /** The size of byte blocks that are processed together. */ 126 private static final int BLOCK_SIZE = 4; 127 128 /** 129 * Combines the bytes using an Or operation ({@code | } in a little-endian representation 130 * of a 32-bit integer; byte 1 will be the least significant byte, byte 4 the most 131 * significant. 132 * 133 * @param b1 The first byte 134 * @param b2 The second byte 135 * @param b3 The third byte 136 * @param b4 The fourth byte 137 * @return The 32-bit integer 138 */ 139 private static int orBytes(final byte b1, final byte b2, final byte b3, final byte b4) { 140 return b1 & 0xff | (b2 & 0xff) << 8 | (b3 & 0xff) << 16 | (b4 & 0xff) << 24; 141 } 142 143 /** Up to 3 unprocessed bytes from input data. */ 144 private final byte[] unprocessed = new byte[3]; 145 146 /** The number of unprocessed bytes in the tail data. */ 147 private int unprocessedLength; 148 149 /** The total number of input bytes added since the start. */ 150 private int totalLen; 151 152 /** 153 * The current running hash. 154 * This must be finalised to generate the 32-bit hash value. 155 */ 156 private int hash; 157 158 /** 159 * Adds the byte array to the current incremental hash. 160 * 161 * @param data The input byte array 162 * @param offset The offset of data 163 * @param length The length of array 164 */ 165 public final void add(final byte[] data, final int offset, final int length) { 166 if (length <= 0) { 167 // Nothing to add 168 return; 169 } 170 totalLen += length; 171 172 // Process the bytes in blocks of 4. 173 // New bytes must be added to any current unprocessed bytes, 174 // then processed in blocks of 4 and the remaining bytes saved: 175 // 176 // |--|---------------------------|--| 177 // unprocessed 178 // main block 179 // remaining 180 181 // Check if the unprocessed bytes and new bytes can fill a block of 4. 182 // Make this overflow safe in the event that length is Integer.MAX_VALUE. 183 // Equivalent to: (unprocessedLength + length < BLOCK_SIZE) 184 if (unprocessedLength + length - BLOCK_SIZE < 0) { 185 // Not enough so add to the unprocessed bytes 186 System.arraycopy(data, offset, unprocessed, unprocessedLength, length); 187 unprocessedLength += length; 188 return; 189 } 190 191 // Combine unprocessed bytes with new bytes. 192 final int newOffset; 193 final int newLength; 194 if (unprocessedLength > 0) { 195 int k = -1; 196 switch (unprocessedLength) { 197 case 1: 198 k = orBytes(unprocessed[0], data[offset], data[offset + 1], data[offset + 2]); 199 break; 200 case 2: 201 k = orBytes(unprocessed[0], unprocessed[1], data[offset], data[offset + 1]); 202 break; 203 case 3: 204 k = orBytes(unprocessed[0], unprocessed[1], unprocessed[2], data[offset]); 205 break; 206 default: 207 throw new IllegalStateException("Unprocessed length should be 1, 2, or 3: " + unprocessedLength); 208 } 209 hash = mix32(k, hash); 210 // Update the offset and length 211 final int consumed = BLOCK_SIZE - unprocessedLength; 212 newOffset = offset + consumed; 213 newLength = length - consumed; 214 } else { 215 newOffset = offset; 216 newLength = length; 217 } 218 219 // Main processing of blocks of 4 bytes 220 final int nblocks = newLength >> 2; 221 222 for (int i = 0; i < nblocks; i++) { 223 final int index = newOffset + (i << 2); 224 final int k = getLittleEndianInt(data, index); 225 hash = mix32(k, hash); 226 } 227 228 // Save left-over unprocessed bytes 229 final int consumed = nblocks << 2; 230 unprocessedLength = newLength - consumed; 231 if (unprocessedLength != 0) { 232 System.arraycopy(data, newOffset + consumed, unprocessed, 0, unprocessedLength); 233 } 234 } 235 236 /** 237 * Generates the 32-bit hash value. Repeat calls to this method with no additional data 238 * will generate the same hash value. 239 * 240 * @return The 32-bit hash 241 */ 242 public final int end() { 243 // Allow calling end() again after adding no data to return the same result. 244 return finalise(hash, unprocessedLength, unprocessed, totalLen); 245 } 246 247 /** 248 * Finalizes the running hash to the output 32-bit hash by processing remaining bytes 249 * and performing final mixing. 250 * 251 * @param hash The running hash 252 * @param unprocessedLength The number of unprocessed bytes in the tail data. 253 * @param unprocessed Up to 3 unprocessed bytes from input data. 254 * @param totalLen The total number of input bytes added since the start. 255 * @return The 32-bit hash 256 */ 257 int finalise(final int hash, final int unprocessedLength, final byte[] unprocessed, final int totalLen) { 258 int result = hash; 259 int k1 = 0; 260 switch (unprocessedLength) { 261 case 3: 262 k1 ^= (unprocessed[2] & 0xff) << 16; 263 case 2: 264 k1 ^= (unprocessed[1] & 0xff) << 8; 265 case 1: 266 k1 ^= unprocessed[0] & 0xff; 267 268 // mix functions 269 k1 *= C1_32; 270 k1 = Integer.rotateLeft(k1, R1_32); 271 k1 *= C2_32; 272 result ^= k1; 273 } 274 275 // finalization 276 result ^= totalLen; 277 return fmix32(result); 278 } 279 280 /** 281 * Starts a new incremental hash. 282 * 283 * @param seed The initial seed value 284 */ 285 public final void start(final int seed) { 286 // Reset 287 unprocessedLength = totalLen = 0; 288 this.hash = seed; 289 } 290 } 291 292 /** 293 * A random number to use for a hash code. 294 * 295 * @deprecated This is not used internally and will be removed in a future release. 296 */ 297 @Deprecated 298 public static final long NULL_HASHCODE = 2862933555777941757L; 299 /** 300 * A default seed to use for the murmur hash algorithm. 301 * Has the value {@code 104729}. 302 */ 303 public static final int DEFAULT_SEED = 104729; 304 // Constants for 32-bit variant 305 private static final int C1_32 = 0xcc9e2d51; 306 private static final int C2_32 = 0x1b873593; 307 private static final int R1_32 = 15; 308 private static final int R2_32 = 13; 309 310 private static final int M_32 = 5; 311 private static final int N_32 = 0xe6546b64; 312 // Constants for 128-bit variant 313 private static final long C1 = 0x87c37b91114253d5L; 314 private static final long C2 = 0x4cf5ad432745937fL; 315 private static final int R1 = 31; 316 private static final int R2 = 27; 317 private static final int R3 = 33; 318 private static final int M = 5; 319 320 private static final int N1 = 0x52dce729; 321 322 private static final int N2 = 0x38495ab5; 323 324 /** 325 * Performs the final avalanche mix step of the 32-bit hash function {@code MurmurHash3_x86_32}. 326 * 327 * @param hash The current hash 328 * @return The final hash 329 */ 330 private static int fmix32(int hash) { 331 hash ^= hash >>> 16; 332 hash *= 0x85ebca6b; 333 hash ^= hash >>> 13; 334 hash *= 0xc2b2ae35; 335 hash ^= hash >>> 16; 336 return hash; 337 } 338 339 /** 340 * Performs the final avalanche mix step of the 64-bit hash function {@code MurmurHash3_x64_128}. 341 * 342 * @param hash The current hash 343 * @return The final hash 344 */ 345 private static long fmix64(long hash) { 346 hash ^= hash >>> 33; 347 hash *= 0xff51afd7ed558ccdL; 348 hash ^= hash >>> 33; 349 hash *= 0xc4ceb9fe1a85ec53L; 350 hash ^= hash >>> 33; 351 return hash; 352 } 353 354 /** 355 * Gets the little-endian int from 4 bytes starting at the specified index. 356 * 357 * @param data The data 358 * @param index The index 359 * @return The little-endian int 360 */ 361 private static int getLittleEndianInt(final byte[] data, final int index) { 362 return data[index ] & 0xff | 363 (data[index + 1] & 0xff) << 8 | 364 (data[index + 2] & 0xff) << 16 | 365 (data[index + 3] & 0xff) << 24; 366 } 367 368 /** 369 * Gets the little-endian long from 8 bytes starting at the specified index. 370 * 371 * @param data The data 372 * @param index The index 373 * @return The little-endian long 374 */ 375 private static long getLittleEndianLong(final byte[] data, final int index) { 376 return (long) data[index ] & 0xff | 377 ((long) data[index + 1] & 0xff) << 8 | 378 ((long) data[index + 2] & 0xff) << 16 | 379 ((long) data[index + 3] & 0xff) << 24 | 380 ((long) data[index + 4] & 0xff) << 32 | 381 ((long) data[index + 5] & 0xff) << 40 | 382 ((long) data[index + 6] & 0xff) << 48 | 383 ((long) data[index + 7] & 0xff) << 56; 384 } 385 386 /** 387 * Generates 128-bit hash from the byte array with a default seed. 388 * This is a helper method that will produce the same result as: 389 * 390 * <pre> 391 * int offset = 0; 392 * int seed = 104729; 393 * int hash = MurmurHash3.hash128(data, offset, data.length, seed); 394 * </pre> 395 * 396 * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect 397 * this result as the default seed is positive.</p> 398 * 399 * @param data The input byte array 400 * @return The 128-bit hash (2 longs) 401 * @see #hash128(byte[], int, int, int) 402 */ 403 public static long[] hash128(final byte[] data) { 404 return hash128(data, 0, data.length, DEFAULT_SEED); 405 } 406 407 /** 408 * Generates 128-bit hash from the byte array with the given offset, length and seed. 409 * 410 * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 411 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 412 * 413 * <p>This implementation contains a sign-extension bug in the seed initialization. 414 * This manifests if the seed is negative.</p> 415 * 416 * @param data The input byte array 417 * @param offset The first element of array 418 * @param length The length of array 419 * @param seed The initial seed value 420 * @return The 128-bit hash (2 longs) 421 * @deprecated Use {@link #hash128x64(byte[], int, int, int)}. This corrects the seed initialization. 422 */ 423 @Deprecated 424 public static long[] hash128(final byte[] data, final int offset, final int length, final int seed) { 425 // ************ 426 // Note: This deliberately fails to apply masking using 0xffffffffL to the seed 427 // to maintain behavioral compatibility with the original version. 428 // The implicit conversion to a long will extend a negative sign 429 // bit through the upper 32-bits of the long seed. These should be zero. 430 // ************ 431 return hash128x64Internal(data, offset, length, seed); 432 } 433 434 /** 435 * Generates 128-bit hash from a string with a default seed. 436 * <p> 437 * Before 1.14 the string was converted using default encoding. 438 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 439 * </p> 440 * <p> 441 * This is a helper method that will produce the same result as: 442 * </p> 443 * 444 * <pre> 445 * int offset = 0; 446 * int seed = 104729; 447 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 448 * int hash = MurmurHash3.hash128(bytes, offset, bytes.length, seed); 449 * </pre> 450 * 451 * <p>Note: The sign extension bug in {@link #hash128(byte[], int, int, int)} does not effect 452 * this result as the default seed is positive.</p> 453 * 454 * @param data The input String 455 * @return The 128-bit hash (2 longs) 456 * @see #hash128(byte[], int, int, int) 457 * @deprecated Use {@link #hash128x64(byte[])} using the bytes returned from 458 * {@link String#getBytes(java.nio.charset.Charset)}. 459 */ 460 @Deprecated 461 public static long[] hash128(final String data) { 462 final byte[] bytes = StringUtils.getBytesUtf8(data); 463 return hash128(bytes, 0, bytes.length, DEFAULT_SEED); 464 } 465 466 /** 467 * Generates 128-bit hash from the byte array with a seed of zero. 468 * This is a helper method that will produce the same result as: 469 * 470 * <pre> 471 * int offset = 0; 472 * int seed = 0; 473 * int hash = MurmurHash3.hash128x64(data, offset, data.length, seed); 474 * </pre> 475 * 476 * @param data The input byte array 477 * @return The 128-bit hash (2 longs) 478 * @see #hash128x64(byte[], int, int, int) 479 * @since 1.14 480 */ 481 public static long[] hash128x64(final byte[] data) { 482 return hash128x64(data, 0, data.length, 0); 483 } 484 485 /** 486 * Generates 128-bit hash from the byte array with the given offset, length and seed. 487 * 488 * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 489 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 490 * 491 * @param data The input byte array 492 * @param offset The first element of array 493 * @param length The length of array 494 * @param seed The initial seed value 495 * @return The 128-bit hash (2 longs) 496 * @since 1.14 497 */ 498 public static long[] hash128x64(final byte[] data, final int offset, final int length, final int seed) { 499 // Use an unsigned 32-bit integer as the seed 500 return hash128x64Internal(data, offset, length, seed & 0xffffffffL); 501 } 502 503 /** 504 * Generates 128-bit hash from the byte array with the given offset, length and seed. 505 * 506 * <p>This is an implementation of the 128-bit hash function {@code MurmurHash3_x64_128} 507 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 508 * 509 * @param data The input byte array 510 * @param offset The first element of array 511 * @param length The length of array 512 * @param seed The initial seed value 513 * @return The 128-bit hash (2 longs) 514 */ 515 private static long[] hash128x64Internal(final byte[] data, final int offset, final int length, final long seed) { 516 long h1 = seed; 517 long h2 = seed; 518 final int nblocks = length >> 4; 519 520 // body 521 for (int i = 0; i < nblocks; i++) { 522 final int index = offset + (i << 4); 523 long k1 = getLittleEndianLong(data, index); 524 long k2 = getLittleEndianLong(data, index + 8); 525 526 // mix functions for k1 527 k1 *= C1; 528 k1 = Long.rotateLeft(k1, R1); 529 k1 *= C2; 530 h1 ^= k1; 531 h1 = Long.rotateLeft(h1, R2); 532 h1 += h2; 533 h1 = h1 * M + N1; 534 535 // mix functions for k2 536 k2 *= C2; 537 k2 = Long.rotateLeft(k2, R3); 538 k2 *= C1; 539 h2 ^= k2; 540 h2 = Long.rotateLeft(h2, R1); 541 h2 += h1; 542 h2 = h2 * M + N2; 543 } 544 545 // tail 546 long k1 = 0; 547 long k2 = 0; 548 final int index = offset + (nblocks << 4); 549 switch (offset + length - index) { 550 case 15: 551 k2 ^= ((long) data[index + 14] & 0xff) << 48; 552 case 14: 553 k2 ^= ((long) data[index + 13] & 0xff) << 40; 554 case 13: 555 k2 ^= ((long) data[index + 12] & 0xff) << 32; 556 case 12: 557 k2 ^= ((long) data[index + 11] & 0xff) << 24; 558 case 11: 559 k2 ^= ((long) data[index + 10] & 0xff) << 16; 560 case 10: 561 k2 ^= ((long) data[index + 9] & 0xff) << 8; 562 case 9: 563 k2 ^= data[index + 8] & 0xff; 564 k2 *= C2; 565 k2 = Long.rotateLeft(k2, R3); 566 k2 *= C1; 567 h2 ^= k2; 568 569 case 8: 570 k1 ^= ((long) data[index + 7] & 0xff) << 56; 571 case 7: 572 k1 ^= ((long) data[index + 6] & 0xff) << 48; 573 case 6: 574 k1 ^= ((long) data[index + 5] & 0xff) << 40; 575 case 5: 576 k1 ^= ((long) data[index + 4] & 0xff) << 32; 577 case 4: 578 k1 ^= ((long) data[index + 3] & 0xff) << 24; 579 case 3: 580 k1 ^= ((long) data[index + 2] & 0xff) << 16; 581 case 2: 582 k1 ^= ((long) data[index + 1] & 0xff) << 8; 583 case 1: 584 k1 ^= data[index] & 0xff; 585 k1 *= C1; 586 k1 = Long.rotateLeft(k1, R1); 587 k1 *= C2; 588 h1 ^= k1; 589 } 590 591 // finalization 592 h1 ^= length; 593 h2 ^= length; 594 595 h1 += h2; 596 h2 += h1; 597 598 h1 = fmix64(h1); 599 h2 = fmix64(h2); 600 601 h1 += h2; 602 h2 += h1; 603 604 return new long[] { h1, h2 }; 605 } 606 607 /** 608 * Generates 32-bit hash from the byte array with a default seed. 609 * This is a helper method that will produce the same result as: 610 * 611 * <pre> 612 * int offset = 0; 613 * int seed = 104729; 614 * int hash = MurmurHash3.hash32(data, offset, data.length, seed); 615 * </pre> 616 * 617 * <p>This implementation contains a sign-extension bug in the finalization step of 618 * any bytes left over from dividing the length by 4. This manifests if any of these 619 * bytes are negative.</p> 620 * 621 * @param data The input byte array 622 * @return The 32-bit hash 623 * @see #hash32(byte[], int, int, int) 624 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 625 */ 626 @Deprecated 627 public static int hash32(final byte[] data) { 628 return hash32(data, 0, data.length, DEFAULT_SEED); 629 } 630 631 /** 632 * Generates 32-bit hash from the byte array with the given length and a default seed. 633 * This is a helper method that will produce the same result as: 634 * 635 * <pre> 636 * int offset = 0; 637 * int seed = 104729; 638 * int hash = MurmurHash3.hash32(data, offset, length, seed); 639 * </pre> 640 * 641 * <p>This implementation contains a sign-extension bug in the finalization step of 642 * any bytes left over from dividing the length by 4. This manifests if any of these 643 * bytes are negative.</p> 644 * 645 * @param data The input byte array 646 * @param length The length of array 647 * @return The 32-bit hash 648 * @see #hash32(byte[], int, int, int) 649 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 650 */ 651 @Deprecated 652 public static int hash32(final byte[] data, final int length) { 653 return hash32(data, length, DEFAULT_SEED); 654 } 655 656 /** 657 * Generates 32-bit hash from the byte array with the given length and seed. This is a 658 * helper method that will produce the same result as: 659 * 660 * <pre> 661 * int offset = 0; 662 * int hash = MurmurHash3.hash32(data, offset, length, seed); 663 * </pre> 664 * 665 * <p>This implementation contains a sign-extension bug in the finalization step of 666 * any bytes left over from dividing the length by 4. This manifests if any of these 667 * bytes are negative.</p> 668 * 669 * @param data The input byte array 670 * @param length The length of array 671 * @param seed The initial seed value 672 * @return The 32-bit hash 673 * @see #hash32(byte[], int, int, int) 674 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 675 */ 676 @Deprecated 677 public static int hash32(final byte[] data, final int length, final int seed) { 678 return hash32(data, 0, length, seed); 679 } 680 681 /** 682 * Generates 32-bit hash from the byte array with the given offset, length and seed. 683 * 684 * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 685 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 686 * 687 * <p>This implementation contains a sign-extension bug in the finalization step of 688 * any bytes left over from dividing the length by 4. This manifests if any of these 689 * bytes are negative.</p> 690 * 691 * @param data The input byte array 692 * @param offset The offset of data 693 * @param length The length of array 694 * @param seed The initial seed value 695 * @return The 32-bit hash 696 * @deprecated Use {@link #hash32x86(byte[], int, int, int)}. This corrects the processing of trailing bytes. 697 */ 698 @Deprecated 699 public static int hash32(final byte[] data, final int offset, final int length, final int seed) { 700 int hash = seed; 701 final int nblocks = length >> 2; 702 703 // body 704 for (int i = 0; i < nblocks; i++) { 705 final int index = offset + (i << 2); 706 final int k = getLittleEndianInt(data, index); 707 hash = mix32(k, hash); 708 } 709 710 // tail 711 // ************ 712 // Note: This fails to apply masking using 0xff to the 3 remaining bytes. 713 // ************ 714 final int index = offset + (nblocks << 2); 715 int k1 = 0; 716 switch (offset + length - index) { 717 case 3: 718 k1 ^= data[index + 2] << 16; 719 case 2: 720 k1 ^= data[index + 1] << 8; 721 case 1: 722 k1 ^= data[index]; 723 724 // mix functions 725 k1 *= C1_32; 726 k1 = Integer.rotateLeft(k1, R1_32); 727 k1 *= C2_32; 728 hash ^= k1; 729 } 730 731 hash ^= length; 732 return fmix32(hash); 733 } 734 735 /** 736 * Generates 32-bit hash from a long with a default seed value. 737 * This is a helper method that will produce the same result as: 738 * 739 * <pre> 740 * int offset = 0; 741 * int seed = 104729; 742 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8) 743 * .putLong(data) 744 * .array(), offset, 8, seed); 745 * </pre> 746 * 747 * @param data The long to hash 748 * @return The 32-bit hash 749 * @see #hash32x86(byte[], int, int, int) 750 */ 751 public static int hash32(final long data) { 752 return hash32(data, DEFAULT_SEED); 753 } 754 755 /** 756 * Generates 32-bit hash from a long with the given seed. 757 * This is a helper method that will produce the same result as: 758 * 759 * <pre> 760 * int offset = 0; 761 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(8) 762 * .putLong(data) 763 * .array(), offset, 8, seed); 764 * </pre> 765 * 766 * @param data The long to hash 767 * @param seed The initial seed value 768 * @return The 32-bit hash 769 * @see #hash32x86(byte[], int, int, int) 770 */ 771 public static int hash32(final long data, final int seed) { 772 int hash = seed; 773 final long r0 = Long.reverseBytes(data); 774 775 hash = mix32((int) r0, hash); 776 hash = mix32((int) (r0 >>> 32), hash); 777 778 hash ^= Long.BYTES; 779 return fmix32(hash); 780 } 781 782 /** 783 * Generates 32-bit hash from two longs with a default seed value. 784 * This is a helper method that will produce the same result as: 785 * 786 * <pre> 787 * int offset = 0; 788 * int seed = 104729; 789 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16) 790 * .putLong(data1) 791 * .putLong(data2) 792 * .array(), offset, 16, seed); 793 * </pre> 794 * 795 * @param data1 The first long to hash 796 * @param data2 The second long to hash 797 * @return The 32-bit hash 798 * @see #hash32x86(byte[], int, int, int) 799 */ 800 public static int hash32(final long data1, final long data2) { 801 return hash32(data1, data2, DEFAULT_SEED); 802 } 803 804 /** 805 * Generates 32-bit hash from two longs with the given seed. 806 * This is a helper method that will produce the same result as: 807 * 808 * <pre> 809 * int offset = 0; 810 * int hash = MurmurHash3.hash32x86(ByteBuffer.allocate(16) 811 * .putLong(data1) 812 * .putLong(data2) 813 * .array(), offset, 16, seed); 814 * </pre> 815 * 816 * @param data1 The first long to hash 817 * @param data2 The second long to hash 818 * @param seed The initial seed value 819 * @return The 32-bit hash 820 * @see #hash32x86(byte[], int, int, int) 821 */ 822 public static int hash32(final long data1, final long data2, final int seed) { 823 int hash = seed; 824 final long r0 = Long.reverseBytes(data1); 825 final long r1 = Long.reverseBytes(data2); 826 827 hash = mix32((int) r0, hash); 828 hash = mix32((int) (r0 >>> 32), hash); 829 hash = mix32((int) r1, hash); 830 hash = mix32((int) (r1 >>> 32), hash); 831 832 hash ^= Long.BYTES * 2; 833 return fmix32(hash); 834 } 835 836 /** 837 * Generates 32-bit hash from a string with a default seed. 838 * <p> 839 * Before 1.14 the string was converted using default encoding. 840 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 841 * </p> 842 * This is a helper method that will produce the same result as: 843 * 844 * <pre> 845 * int offset = 0; 846 * int seed = 104729; 847 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 848 * int hash = MurmurHash3.hash32(bytes, offset, bytes.length, seed); 849 * </pre> 850 * 851 * <p>This implementation contains a sign-extension bug in the finalization step of 852 * any bytes left over from dividing the length by 4. This manifests if any of these 853 * bytes are negative.</p> 854 * 855 * @param data The input string 856 * @return The 32-bit hash 857 * @see #hash32(byte[], int, int, int) 858 * @deprecated Use {@link #hash32x86(byte[], int, int, int)} with the bytes returned from 859 * {@link String#getBytes(java.nio.charset.Charset)}. This corrects the processing of trailing bytes. 860 */ 861 @Deprecated 862 public static int hash32(final String data) { 863 final byte[] bytes = StringUtils.getBytesUtf8(data); 864 return hash32(bytes, 0, bytes.length, DEFAULT_SEED); 865 } 866 867 /** 868 * Generates 32-bit hash from the byte array with a seed of zero. 869 * This is a helper method that will produce the same result as: 870 * 871 * <pre> 872 * int offset = 0; 873 * int seed = 0; 874 * int hash = MurmurHash3.hash32x86(data, offset, data.length, seed); 875 * </pre> 876 * 877 * @param data The input byte array 878 * @return The 32-bit hash 879 * @see #hash32x86(byte[], int, int, int) 880 * @since 1.14 881 */ 882 public static int hash32x86(final byte[] data) { 883 return hash32x86(data, 0, data.length, 0); 884 } 885 886 /** 887 * Generates 32-bit hash from the byte array with the given offset, length and seed. 888 * 889 * <p>This is an implementation of the 32-bit hash function {@code MurmurHash3_x86_32} 890 * from Austin Appleby's original MurmurHash3 {@code c++} code in SMHasher.</p> 891 * 892 * @param data The input byte array 893 * @param offset The offset of data 894 * @param length The length of array 895 * @param seed The initial seed value 896 * @return The 32-bit hash 897 * @since 1.14 898 */ 899 public static int hash32x86(final byte[] data, final int offset, final int length, final int seed) { 900 int hash = seed; 901 final int nblocks = length >> 2; 902 903 // body 904 for (int i = 0; i < nblocks; i++) { 905 final int index = offset + (i << 2); 906 final int k = getLittleEndianInt(data, index); 907 hash = mix32(k, hash); 908 } 909 910 // tail 911 final int index = offset + (nblocks << 2); 912 int k1 = 0; 913 switch (offset + length - index) { 914 case 3: 915 k1 ^= (data[index + 2] & 0xff) << 16; 916 case 2: 917 k1 ^= (data[index + 1] & 0xff) << 8; 918 case 1: 919 k1 ^= data[index] & 0xff; 920 921 // mix functions 922 k1 *= C1_32; 923 k1 = Integer.rotateLeft(k1, R1_32); 924 k1 *= C2_32; 925 hash ^= k1; 926 } 927 928 hash ^= length; 929 return fmix32(hash); 930 } 931 932 /** 933 * Generates 64-bit hash from a byte array with a default seed. 934 * 935 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 936 * 937 * <p>This is a Murmur3-like 64-bit variant. 938 * The method does not produce the same result as either half of the hash bytes from 939 * {@linkplain #hash128x64(byte[])} with the same byte data. 940 * This method will be removed in a future release.</p> 941 * 942 * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 943 * this result as the default seed is positive.</p> 944 * 945 * <p>This is a helper method that will produce the same result as:</p> 946 * 947 * <pre> 948 * int offset = 0; 949 * int seed = 104729; 950 * long hash = MurmurHash3.hash64(data, offset, data.length, seed); 951 * </pre> 952 * 953 * @param data The input byte array 954 * @return The 64-bit hash 955 * @see #hash64(byte[], int, int, int) 956 * @deprecated Not part of the MurmurHash3 implementation. 957 * Use half of the hash bytes from {@link #hash128x64(byte[])}. 958 */ 959 @Deprecated 960 public static long hash64(final byte[] data) { 961 return hash64(data, 0, data.length, DEFAULT_SEED); 962 } 963 964 /** 965 * Generates 64-bit hash from a byte array with the given offset and length and a default seed. 966 * 967 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 968 * 969 * <p>This is a Murmur3-like 64-bit variant. 970 * The method does not produce the same result as either half of the hash bytes from 971 * {@linkplain #hash128x64(byte[])} with the same byte data. 972 * This method will be removed in a future release.</p> 973 * 974 * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 975 * this result as the default seed is positive.</p> 976 * 977 * <p>This is a helper method that will produce the same result as:</p> 978 * 979 * <pre> 980 * int seed = 104729; 981 * long hash = MurmurHash3.hash64(data, offset, length, seed); 982 * </pre> 983 * 984 * @param data The input byte array 985 * @param offset The offset of data 986 * @param length The length of array 987 * @return The 64-bit hash 988 * @see #hash64(byte[], int, int, int) 989 * @deprecated Not part of the MurmurHash3 implementation. 990 * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}. 991 */ 992 @Deprecated 993 public static long hash64(final byte[] data, final int offset, final int length) { 994 return hash64(data, offset, length, DEFAULT_SEED); 995 } 996 997 /** 998 * Generates 64-bit hash from a byte array with the given offset, length and seed. 999 * 1000 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 1001 * 1002 * <p>This is a Murmur3-like 64-bit variant. 1003 * This method will be removed in a future release.</p> 1004 * 1005 * <p>This implementation contains a sign-extension bug in the seed initialization. 1006 * This manifests if the seed is negative.</p> 1007 * 1008 * <p>This algorithm processes 8 bytes chunks of data in a manner similar to the 16 byte chunks 1009 * of data processed in the MurmurHash3 {@code MurmurHash3_x64_128} method. However the hash 1010 * is not mixed with a hash chunk from the next 8 bytes of data. The method will not return 1011 * the same value as the first or second 64-bits of the function 1012 * {@link #hash128(byte[], int, int, int)}.</p> 1013 * 1014 * <p>Use of this method is not advised. Use the first long returned from 1015 * {@link #hash128x64(byte[], int, int, int)}.</p> 1016 * 1017 * @param data The input byte array 1018 * @param offset The offset of data 1019 * @param length The length of array 1020 * @param seed The initial seed value 1021 * @return The 64-bit hash 1022 * @deprecated Not part of the MurmurHash3 implementation. 1023 * Use half of the hash bytes from {@link #hash128x64(byte[], int, int, int)}. 1024 */ 1025 @Deprecated 1026 public static long hash64(final byte[] data, final int offset, final int length, final int seed) { 1027 // 1028 // Note: This fails to apply masking using 0xffffffffL to the seed. 1029 // 1030 long hash = seed; 1031 final int nblocks = length >> 3; 1032 1033 // body 1034 for (int i = 0; i < nblocks; i++) { 1035 final int index = offset + (i << 3); 1036 long k = getLittleEndianLong(data, index); 1037 1038 // mix functions 1039 k *= C1; 1040 k = Long.rotateLeft(k, R1); 1041 k *= C2; 1042 hash ^= k; 1043 hash = Long.rotateLeft(hash, R2) * M + N1; 1044 } 1045 1046 // tail 1047 long k1 = 0; 1048 final int index = offset + (nblocks << 3); 1049 switch (offset + length - index) { 1050 case 7: 1051 k1 ^= ((long) data[index + 6] & 0xff) << 48; 1052 case 6: 1053 k1 ^= ((long) data[index + 5] & 0xff) << 40; 1054 case 5: 1055 k1 ^= ((long) data[index + 4] & 0xff) << 32; 1056 case 4: 1057 k1 ^= ((long) data[index + 3] & 0xff) << 24; 1058 case 3: 1059 k1 ^= ((long) data[index + 2] & 0xff) << 16; 1060 case 2: 1061 k1 ^= ((long) data[index + 1] & 0xff) << 8; 1062 case 1: 1063 k1 ^= (long) data[index] & 0xff; 1064 k1 *= C1; 1065 k1 = Long.rotateLeft(k1, R1); 1066 k1 *= C2; 1067 hash ^= k1; 1068 } 1069 1070 // finalization 1071 hash ^= length; 1072 return fmix64(hash); 1073 } 1074 1075 /** 1076 * Generates 64-bit hash from an int with a default seed. 1077 * 1078 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 1079 * 1080 * <p>This is a Murmur3-like 64-bit variant. 1081 * The method does not produce the same result as either half of the hash bytes from 1082 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code int}. 1083 * This method will be removed in a future release.</p> 1084 * 1085 * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1086 * this result as the default seed is positive.</p> 1087 * 1088 * <p>This is a helper method that will produce the same result as:</p> 1089 * 1090 * <pre> 1091 * int offset = 0; 1092 * int seed = 104729; 1093 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(4) 1094 * .putInt(data) 1095 * .array(), offset, 4, seed); 1096 * </pre> 1097 * 1098 * @param data The int to hash 1099 * @return The 64-bit hash 1100 * @see #hash64(byte[], int, int, int) 1101 * @deprecated Not part of the MurmurHash3 implementation. 1102 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code int}. 1103 */ 1104 @Deprecated 1105 public static long hash64(final int data) { 1106 long k1 = Integer.reverseBytes(data) & -1L >>> 32; 1107 long hash = DEFAULT_SEED; 1108 k1 *= C1; 1109 k1 = Long.rotateLeft(k1, R1); 1110 k1 *= C2; 1111 hash ^= k1; 1112 // finalization 1113 hash ^= Integer.BYTES; 1114 return fmix64(hash); 1115 } 1116 1117 /** 1118 * Generates 64-bit hash from a long with a default seed. 1119 * 1120 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 1121 * 1122 * <p>This is a Murmur3-like 64-bit variant. 1123 * The method does not produce the same result as either half of the hash bytes from 1124 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code long}. 1125 * This method will be removed in a future release.</p> 1126 * 1127 * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1128 * this result as the default seed is positive.</p> 1129 * 1130 * <p>This is a helper method that will produce the same result as:</p> 1131 * 1132 * <pre> 1133 * int offset = 0; 1134 * int seed = 104729; 1135 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(8) 1136 * .putLong(data) 1137 * .array(), offset, 8, seed); 1138 * </pre> 1139 * 1140 * @param data The long to hash 1141 * @return The 64-bit hash 1142 * @see #hash64(byte[], int, int, int) 1143 * @deprecated Not part of the MurmurHash3 implementation. 1144 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code long}. 1145 */ 1146 @Deprecated 1147 public static long hash64(final long data) { 1148 long hash = DEFAULT_SEED; 1149 long k = Long.reverseBytes(data); 1150 // mix functions 1151 k *= C1; 1152 k = Long.rotateLeft(k, R1); 1153 k *= C2; 1154 hash ^= k; 1155 hash = Long.rotateLeft(hash, R2) * M + N1; 1156 // finalization 1157 hash ^= Long.BYTES; 1158 return fmix64(hash); 1159 } 1160 1161 /** 1162 * Generates 64-bit hash from a short with a default seed. 1163 * 1164 * <p><strong>This is not part of the original MurmurHash3 {@code c++} implementation.</strong></p> 1165 * 1166 * <p>This is a Murmur3-like 64-bit variant. 1167 * The method does not produce the same result as either half of the hash bytes from 1168 * {@linkplain #hash128x64(byte[])} with the same byte data from the {@code short}. 1169 * This method will be removed in a future release.</p> 1170 * 1171 * <p>Note: The sign extension bug in {@link #hash64(byte[], int, int, int)} does not effect 1172 * this result as the default seed is positive.</p> 1173 * 1174 * <p>This is a helper method that will produce the same result as:</p> 1175 * 1176 * <pre> 1177 * int offset = 0; 1178 * int seed = 104729; 1179 * long hash = MurmurHash3.hash64(ByteBuffer.allocate(2) 1180 * .putShort(data) 1181 * .array(), offset, 2, seed); 1182 * </pre> 1183 * 1184 * @param data The short to hash 1185 * @return The 64-bit hash 1186 * @see #hash64(byte[], int, int, int) 1187 * @deprecated Not part of the MurmurHash3 implementation. 1188 * Use half of the hash bytes from {@link #hash128x64(byte[])} with the bytes from the {@code short}. 1189 */ 1190 @Deprecated 1191 public static long hash64(final short data) { 1192 long hash = DEFAULT_SEED; 1193 long k1 = 0; 1194 k1 ^= ((long) data & 0xff) << 8; 1195 k1 ^= (long) ((data & 0xFF00) >> 8) & 0xff; 1196 k1 *= C1; 1197 k1 = Long.rotateLeft(k1, R1); 1198 k1 *= C2; 1199 hash ^= k1; 1200 1201 // finalization 1202 hash ^= Short.BYTES; 1203 return fmix64(hash); 1204 } 1205 1206 /** 1207 * Performs the intermediate mix step of the 32-bit hash function {@code MurmurHash3_x86_32}. 1208 * 1209 * @param k The data to add to the hash 1210 * @param hash The current hash 1211 * @return The new hash 1212 */ 1213 private static int mix32(int k, int hash) { 1214 k *= C1_32; 1215 k = Integer.rotateLeft(k, R1_32); 1216 k *= C2_32; 1217 hash ^= k; 1218 return Integer.rotateLeft(hash, R2_32) * M_32 + N_32; 1219 } 1220 1221 /** No instance methods. */ 1222 private MurmurHash3() { 1223 } 1224}