View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.codec.digest;
20  
21  import static java.lang.Integer.rotateLeft;
22  
23  import java.util.zip.Checksum;
24  
25  /**
26   * Implementation of the xxhash32 hash algorithm.
27   *
28   * <p>Copied from Commons Compress 1.14
29   * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p>
30   * <p>NotThreadSafe</p>
31   * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a>
32   * @since 1.11
33   */
34  public class XXHash32 implements Checksum {
35  
36      private static final int BUF_SIZE = 16;
37      private static final int ROTATE_BITS = 13;
38  
39      private static final int PRIME1 = (int) 2654435761l;
40      private static final int PRIME2 = (int) 2246822519l;
41      private static final int PRIME3 = (int) 3266489917l;
42      private static final int PRIME4 =  668265263;
43      private static final int PRIME5 =  374761393;
44  
45      private final byte[] oneByte = new byte[1];
46      private final int[] state = new int[4];
47      // Note: the code used to use ByteBuffer but the manual method is 50% faster
48      // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c
49      private final byte[] buffer = new byte[BUF_SIZE];
50      private final int seed;
51  
52      private int totalLen;
53      private int pos;
54  
55      /**
56       * Creates an XXHash32 instance with a seed of 0.
57       */
58      public XXHash32() {
59          this(0);
60      }
61  
62      /**
63       * Creates an XXHash32 instance.
64       * @param seed the seed to use
65       */
66      public XXHash32(final int seed) {
67          this.seed = seed;
68          initializeState();
69      }
70  
71      @Override
72      public void reset() {
73          initializeState();
74          totalLen = 0;
75          pos = 0;
76      }
77  
78      @Override
79      public void update(final int b) {
80          oneByte[0] = (byte) (b & 0xff);
81          update(oneByte, 0, 1);
82      }
83  
84      @Override
85      public void update(final byte[] b, int off, final int len) {
86          if (len <= 0) {
87              return;
88          }
89          totalLen += len;
90  
91          final int end = off + len;
92  
93          if (pos + len < BUF_SIZE) {
94              System.arraycopy(b, off, buffer, pos, len);
95              pos += len;
96              return;
97          }
98  
99          if (pos > 0) {
100             final int size = BUF_SIZE - pos;
101             System.arraycopy(b, off, buffer, pos, size);
102             process(buffer, 0);
103             off += size;
104         }
105 
106         final int limit = end - BUF_SIZE;
107         while (off <= limit) {
108             process(b, off);
109             off += BUF_SIZE;
110         }
111 
112         if (off < end) {
113             pos = end - off;
114             System.arraycopy(b, off, buffer, 0, pos);
115         }
116     }
117 
118     @Override
119     public long getValue() {
120         int hash;
121         if (totalLen > BUF_SIZE) {
122             hash =
123                 rotateLeft(state[0],  1) +
124                 rotateLeft(state[1],  7) +
125                 rotateLeft(state[2], 12) +
126                 rotateLeft(state[3], 18);
127         } else {
128             hash = state[2] + PRIME5;
129         }
130         hash += totalLen;
131 
132         int idx = 0;
133         final int limit = pos - 4;
134         for (; idx <= limit; idx += 4) {
135             hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
136         }
137         while (idx < pos) {
138             hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
139         }
140 
141         hash ^= hash >>> 15;
142         hash *= PRIME2;
143         hash ^= hash >>> 13;
144         hash *= PRIME3;
145         hash ^= hash >>> 16;
146         return hash & 0xffffffffl;
147     }
148 
149     private static int getInt(final byte[] buffer, final int idx) {
150         return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl);
151     }
152 
153     private void initializeState() {
154         state[0] = seed + PRIME1 + PRIME2;
155         state[1] = seed + PRIME2;
156         state[2] = seed;
157         state[3] = seed - PRIME1;
158     }
159 
160     private void process(final byte[] b, final int offset) {
161         // local shadows for performance
162         int s0 = state[0];
163         int s1 = state[1];
164         int s2 = state[2];
165         int s3 = state[3];
166 
167         s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
168         s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
169         s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
170         s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;
171 
172         state[0] = s0;
173         state[1] = s1;
174         state[2] = s2;
175         state[3] = s3;
176 
177         pos = 0;
178     }
179 
180     /**
181      * Reads the given byte array as a little endian long.
182      * @param bytes the byte array to convert
183      * @param off the offset into the array that starts the value
184      * @param length the number of bytes representing the value
185      * @return the number read
186      * @throws IllegalArgumentException if len is bigger than eight
187      */
188     private static long fromLittleEndian(final byte[] bytes, final int off, final int length) {
189         if (length > 8) {
190             throw new IllegalArgumentException("can't read more than eight bytes into a long value");
191         }
192         long l = 0;
193         for (int i = 0; i < length; i++) {
194             l |= (bytes[off + i] & 0xffl) << (8 * i);
195         }
196         return l;
197     }
198 }