XXHash32.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.digest;

  18. import static java.lang.Integer.rotateLeft;

  19. import java.util.zip.Checksum;

  20. /**
  21.  * Implements the xxHash32 hash algorithm.
  22.  *
  23.  * <p>
  24.  * Copied from Commons Compress 1.14 <a href=
  25.  * "https://gitbox.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://gitbox.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a>
  26.  * </p>
  27.  * <p>
  28.  * NotThreadSafe
  29.  * </p>
  30.  *
  31.  * @see <a href="https://cyan4973.github.io/xxHash/">xxHash</a>
  32.  * @since 1.11
  33.  */
  34. public class XXHash32 implements Checksum {

  35.     private static final int BUF_SIZE = 16;
  36.     private static final int ROTATE_BITS = 13;

  37.     private static final int PRIME1 = (int) 2654435761L;
  38.     private static final int PRIME2 = (int) 2246822519L;
  39.     private static final int PRIME3 = (int) 3266489917L;
  40.     private static final int PRIME4 =  668265263;
  41.     private static final int PRIME5 =  374761393;

  42.     /**
  43.      * Gets the little-endian int from 4 bytes starting at the specified index.
  44.      *
  45.      * @param buffer The data
  46.      * @param idx The index
  47.      * @return The little-endian int
  48.      */
  49.     private static int getInt(final byte[] buffer, final int idx) {
  50.         return buffer[idx    ] & 0xff |
  51.                (buffer[idx + 1] & 0xff) <<  8 |
  52.                (buffer[idx + 2] & 0xff) << 16 |
  53.                (buffer[idx + 3] & 0xff) << 24;
  54.     }
  55.     private final byte[] oneByte = new byte[1];
  56.     private final int[] state = new int[4];
  57.     // Note: The code used to use ByteBuffer but the manual method is 50% faster
  58.     // See: https://gitbox.apache.org/repos/asf/commons-compress/diff/2f56fb5c
  59.     private final byte[] buffer = new byte[BUF_SIZE];

  60.     private final int seed;
  61.     private int totalLen;

  62.     private int pos;

  63.     /** Sets to true when the state array has been updated since the last reset. */
  64.     private boolean stateUpdated;

  65.     /**
  66.      * Creates an XXHash32 instance with a seed of 0.
  67.      */
  68.     public XXHash32() {
  69.         this(0);
  70.     }

  71.     /**
  72.      * Creates an XXHash32 instance.
  73.      * @param seed the seed to use
  74.      */
  75.     public XXHash32(final int seed) {
  76.         this.seed = seed;
  77.         initializeState();
  78.     }

  79.     @Override
  80.     public long getValue() {
  81.         int hash;
  82.         if (stateUpdated) {
  83.             // Hash with the state
  84.             hash =
  85.                 rotateLeft(state[0],  1) +
  86.                 rotateLeft(state[1],  7) +
  87.                 rotateLeft(state[2], 12) +
  88.                 rotateLeft(state[3], 18);
  89.         } else {
  90.             // Hash using the original seed from position 2
  91.             hash = state[2] + PRIME5;
  92.         }
  93.         hash += totalLen;

  94.         int idx = 0;
  95.         final int limit = pos - 4;
  96.         for (; idx <= limit; idx += 4) {
  97.             hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
  98.         }
  99.         while (idx < pos) {
  100.             hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
  101.         }

  102.         hash ^= hash >>> 15;
  103.         hash *= PRIME2;
  104.         hash ^= hash >>> 13;
  105.         hash *= PRIME3;
  106.         hash ^= hash >>> 16;
  107.         return hash & 0xffffffffL;
  108.     }

  109.     private void initializeState() {
  110.         state[0] = seed + PRIME1 + PRIME2;
  111.         state[1] = seed + PRIME2;
  112.         state[2] = seed;
  113.         state[3] = seed - PRIME1;
  114.     }

  115.     private void process(final byte[] b, final int offset) {
  116.         // local shadows for performance
  117.         int s0 = state[0];
  118.         int s1 = state[1];
  119.         int s2 = state[2];
  120.         int s3 = state[3];

  121.         s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
  122.         s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
  123.         s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
  124.         s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;

  125.         state[0] = s0;
  126.         state[1] = s1;
  127.         state[2] = s2;
  128.         state[3] = s3;

  129.         stateUpdated = true;
  130.     }

  131.     @Override
  132.     public void reset() {
  133.         initializeState();
  134.         totalLen = 0;
  135.         pos = 0;
  136.         stateUpdated = false;
  137.     }

  138.     @Override
  139.     public void update(final byte[] b, int off, final int len) {
  140.         if (len <= 0) {
  141.             return;
  142.         }
  143.         totalLen += len;

  144.         final int end = off + len;

  145.         // Check if the unprocessed bytes and new bytes can fill a block of 16.
  146.         // Make this overflow safe in the event that len is Integer.MAX_VALUE.
  147.         // Equivalent to: (pos + len < BUF_SIZE)
  148.         if (pos + len - BUF_SIZE < 0) {
  149.             System.arraycopy(b, off, buffer, pos, len);
  150.             pos += len;
  151.             return;
  152.         }

  153.         // Process left-over bytes with new bytes
  154.         if (pos > 0) {
  155.             final int size = BUF_SIZE - pos;
  156.             System.arraycopy(b, off, buffer, pos, size);
  157.             process(buffer, 0);
  158.             off += size;
  159.         }

  160.         final int limit = end - BUF_SIZE;
  161.         while (off <= limit) {
  162.             process(b, off);
  163.             off += BUF_SIZE;
  164.         }

  165.         // Handle left-over bytes
  166.         if (off < end) {
  167.             pos = end - off;
  168.             System.arraycopy(b, off, buffer, 0, pos);
  169.         } else {
  170.             pos = 0;
  171.         }
  172.     }

  173.     @Override
  174.     public void update(final int b) {
  175.         oneByte[0] = (byte) (b & 0xff);
  176.         update(oneByte, 0, 1);
  177.     }
  178. }