001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020import static java.lang.Integer.rotateLeft;
021
022import java.util.zip.Checksum;
023
024/**
025 * Implementation of the xxhash32 hash algorithm.
026 *
027 * <p>Copied from Commons Compress 1.14
028 * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p>
029 * <p>NotThreadSafe</p>
030 * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a>
031 * @since 1.11
032 */
033public class XXHash32 implements Checksum {
034
035    private static final int BUF_SIZE = 16;
036    private static final int ROTATE_BITS = 13;
037
038    private static final int PRIME1 = (int) 2654435761l;
039    private static final int PRIME2 = (int) 2246822519l;
040    private static final int PRIME3 = (int) 3266489917l;
041    private static final int PRIME4 =  668265263;
042    private static final int PRIME5 =  374761393;
043
044    private final byte[] oneByte = new byte[1];
045    private final int[] state = new int[4];
046    // Note: the code used to use ByteBuffer but the manual method is 50% faster
047    // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c
048    private final byte[] buffer = new byte[BUF_SIZE];
049    private final int seed;
050
051    private int totalLen;
052    private int pos;
053
054    /**
055     * Creates an XXHash32 instance with a seed of 0.
056     */
057    public XXHash32() {
058        this(0);
059    }
060
061    /**
062     * Creates an XXHash32 instance.
063     * @param seed the seed to use
064     */
065    public XXHash32(final int seed) {
066        this.seed = seed;
067        initializeState();
068    }
069
070    @Override
071    public void reset() {
072        initializeState();
073        totalLen = 0;
074        pos = 0;
075    }
076
077    @Override
078    public void update(final int b) {
079        oneByte[0] = (byte) (b & 0xff);
080        update(oneByte, 0, 1);
081    }
082
083    @Override
084    public void update(final byte[] b, int off, final int len) {
085        if (len <= 0) {
086            return;
087        }
088        totalLen += len;
089
090        final int end = off + len;
091
092        if (pos + len < BUF_SIZE) {
093            System.arraycopy(b, off, buffer, pos, len);
094            pos += len;
095            return;
096        }
097
098        if (pos > 0) {
099            final int size = BUF_SIZE - pos;
100            System.arraycopy(b, off, buffer, pos, size);
101            process(buffer, 0);
102            off += size;
103        }
104
105        final int limit = end - BUF_SIZE;
106        while (off <= limit) {
107            process(b, off);
108            off += BUF_SIZE;
109        }
110
111        if (off < end) {
112            pos = end - off;
113            System.arraycopy(b, off, buffer, 0, pos);
114        }
115    }
116
117    @Override
118    public long getValue() {
119        int hash;
120        if (totalLen > BUF_SIZE) {
121            hash =
122                rotateLeft(state[0],  1) +
123                rotateLeft(state[1],  7) +
124                rotateLeft(state[2], 12) +
125                rotateLeft(state[3], 18);
126        } else {
127            hash = state[2] + PRIME5;
128        }
129        hash += totalLen;
130
131        int idx = 0;
132        final int limit = pos - 4;
133        for (; idx <= limit; idx += 4) {
134            hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
135        }
136        while (idx < pos) {
137            hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
138        }
139
140        hash ^= hash >>> 15;
141        hash *= PRIME2;
142        hash ^= hash >>> 13;
143        hash *= PRIME3;
144        hash ^= hash >>> 16;
145        return hash & 0xffffffffl;
146    }
147
148    private static int getInt(final byte[] buffer, final int idx) {
149        return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl);
150    }
151
152    private void initializeState() {
153        state[0] = seed + PRIME1 + PRIME2;
154        state[1] = seed + PRIME2;
155        state[2] = seed;
156        state[3] = seed - PRIME1;
157    }
158
159    private void process(final byte[] b, final int offset) {
160        // local shadows for performance
161        int s0 = state[0];
162        int s1 = state[1];
163        int s2 = state[2];
164        int s3 = state[3];
165
166        s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
167        s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
168        s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
169        s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;
170
171        state[0] = s0;
172        state[1] = s1;
173        state[2] = s2;
174        state[3] = s3;
175
176        pos = 0;
177    }
178
179    /**
180     * Reads the given byte array as a little endian long.
181     * @param bytes the byte array to convert
182     * @param off the offset into the array that starts the value
183     * @param length the number of bytes representing the value
184     * @return the number read
185     * @throws IllegalArgumentException if len is bigger than eight
186     */
187    private static long fromLittleEndian(final byte[] bytes, final int off, final int length) {
188        if (length > 8) {
189            throw new IllegalArgumentException("can't read more than eight bytes into a long value");
190        }
191        long l = 0;
192        for (int i = 0; i < length; i++) {
193            l |= (bytes[off + i] & 0xffl) << (8 * i);
194        }
195        return l;
196    }
197}