XXHash32.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one
  3.  * or more contributor license agreements.  See the NOTICE file
  4.  * distributed with this work for additional information
  5.  * regarding copyright ownership.  The ASF licenses this file
  6.  * to you under the Apache License, Version 2.0 (the
  7.  * "License"); you may not use this file except in compliance
  8.  * with the License.  You may obtain a copy of the License at
  9.  *
  10.  * http://www.apache.org/licenses/LICENSE-2.0
  11.  *
  12.  * Unless required by applicable law or agreed to in writing,
  13.  * software distributed under the License is distributed on an
  14.  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  15.  * KIND, either express or implied.  See the License for the
  16.  * specific language governing permissions and limitations
  17.  * under the License.
  18.  */
  19. package org.apache.commons.codec.digest;

  20. import static java.lang.Integer.rotateLeft;

  21. import java.util.zip.Checksum;

  22. /**
  23.  * Implementation of the xxhash32 hash algorithm.
  24.  *
  25.  * <p>Copied from Commons Compress 1.14
  26.  * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p>
  27.  * <p>NotThreadSafe</p>
  28.  * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a>
  29.  * @since 1.11
  30.  */
  31. public class XXHash32 implements Checksum {

  32.     private static final int BUF_SIZE = 16;
  33.     private static final int ROTATE_BITS = 13;

  34.     private static final int PRIME1 = (int) 2654435761l;
  35.     private static final int PRIME2 = (int) 2246822519l;
  36.     private static final int PRIME3 = (int) 3266489917l;
  37.     private static final int PRIME4 =  668265263;
  38.     private static final int PRIME5 =  374761393;

  39.     private final byte[] oneByte = new byte[1];
  40.     private final int[] state = new int[4];
  41.     // Note: the code used to use ByteBuffer but the manual method is 50% faster
  42.     // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c
  43.     private final byte[] buffer = new byte[BUF_SIZE];
  44.     private final int seed;

  45.     private int totalLen;
  46.     private int pos;

  47.     /**
  48.      * Creates an XXHash32 instance with a seed of 0.
  49.      */
  50.     public XXHash32() {
  51.         this(0);
  52.     }

  53.     /**
  54.      * Creates an XXHash32 instance.
  55.      * @param seed the seed to use
  56.      */
  57.     public XXHash32(final int seed) {
  58.         this.seed = seed;
  59.         initializeState();
  60.     }

  61.     @Override
  62.     public void reset() {
  63.         initializeState();
  64.         totalLen = 0;
  65.         pos = 0;
  66.     }

  67.     @Override
  68.     public void update(final int b) {
  69.         oneByte[0] = (byte) (b & 0xff);
  70.         update(oneByte, 0, 1);
  71.     }

  72.     @Override
  73.     public void update(final byte[] b, int off, final int len) {
  74.         if (len <= 0) {
  75.             return;
  76.         }
  77.         totalLen += len;

  78.         final int end = off + len;

  79.         if (pos + len < BUF_SIZE) {
  80.             System.arraycopy(b, off, buffer, pos, len);
  81.             pos += len;
  82.             return;
  83.         }

  84.         if (pos > 0) {
  85.             final int size = BUF_SIZE - pos;
  86.             System.arraycopy(b, off, buffer, pos, size);
  87.             process(buffer, 0);
  88.             off += size;
  89.         }

  90.         final int limit = end - BUF_SIZE;
  91.         while (off <= limit) {
  92.             process(b, off);
  93.             off += BUF_SIZE;
  94.         }

  95.         if (off < end) {
  96.             pos = end - off;
  97.             System.arraycopy(b, off, buffer, 0, pos);
  98.         }
  99.     }

  100.     @Override
  101.     public long getValue() {
  102.         int hash;
  103.         if (totalLen > BUF_SIZE) {
  104.             hash =
  105.                 rotateLeft(state[0],  1) +
  106.                 rotateLeft(state[1],  7) +
  107.                 rotateLeft(state[2], 12) +
  108.                 rotateLeft(state[3], 18);
  109.         } else {
  110.             hash = state[2] + PRIME5;
  111.         }
  112.         hash += totalLen;

  113.         int idx = 0;
  114.         final int limit = pos - 4;
  115.         for (; idx <= limit; idx += 4) {
  116.             hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
  117.         }
  118.         while (idx < pos) {
  119.             hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
  120.         }

  121.         hash ^= hash >>> 15;
  122.         hash *= PRIME2;
  123.         hash ^= hash >>> 13;
  124.         hash *= PRIME3;
  125.         hash ^= hash >>> 16;
  126.         return hash & 0xffffffffl;
  127.     }

  128.     private static int getInt(final byte[] buffer, final int idx) {
  129.         return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl);
  130.     }

  131.     private void initializeState() {
  132.         state[0] = seed + PRIME1 + PRIME2;
  133.         state[1] = seed + PRIME2;
  134.         state[2] = seed;
  135.         state[3] = seed - PRIME1;
  136.     }

  137.     private void process(final byte[] b, final int offset) {
  138.         // local shadows for performance
  139.         int s0 = state[0];
  140.         int s1 = state[1];
  141.         int s2 = state[2];
  142.         int s3 = state[3];

  143.         s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
  144.         s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
  145.         s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
  146.         s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;

  147.         state[0] = s0;
  148.         state[1] = s1;
  149.         state[2] = s2;
  150.         state[3] = s3;

  151.         pos = 0;
  152.     }

  153.     /**
  154.      * Reads the given byte array as a little endian long.
  155.      * @param bytes the byte array to convert
  156.      * @param off the offset into the array that starts the value
  157.      * @param length the number of bytes representing the value
  158.      * @return the number read
  159.      * @throws IllegalArgumentException if len is bigger than eight
  160.      */
  161.     private static long fromLittleEndian(final byte[] bytes, final int off, final int length) {
  162.         if (length > 8) {
  163.             throw new IllegalArgumentException("can't read more than eight bytes into a long value");
  164.         }
  165.         long l = 0;
  166.         for (int i = 0; i < length; i++) {
  167.             l |= (bytes[off + i] & 0xffl) << (8 * i);
  168.         }
  169.         return l;
  170.     }
  171. }