001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing,
013 * software distributed under the License is distributed on an
014 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 * KIND, either express or implied.  See the License for the
016 * specific language governing permissions and limitations
017 * under the License.
018 */
019package org.apache.commons.codec.digest;
020
021import static java.lang.Integer.rotateLeft;
022
023import java.util.zip.Checksum;
024
025/**
026 * Implementation of the xxhash32 hash algorithm.
027 *
028 * <p>Copied from Commons Compress 1.14
029 * <a href="https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD">https://git-wip-us.apache.org/repos/asf?p=commons-compress.git;a=blob;f=src/main/java/org/apache/commons/compress/compressors/lz4/XXHash32.java;h=a406ffc197449be594d46f0d2712b2d4786a1e68;hb=HEAD</a></p>
030 * <p>NotThreadSafe</p>
031 * @see <a href="http://cyan4973.github.io/xxHash/">xxHash</a>
032 * @since 1.11
033 */
034public class XXHash32 implements Checksum {
035
036    private static final int BUF_SIZE = 16;
037    private static final int ROTATE_BITS = 13;
038
039    private static final int PRIME1 = (int) 2654435761l;
040    private static final int PRIME2 = (int) 2246822519l;
041    private static final int PRIME3 = (int) 3266489917l;
042    private static final int PRIME4 =  668265263;
043    private static final int PRIME5 =  374761393;
044
045    private final byte[] oneByte = new byte[1];
046    private final int[] state = new int[4];
047    // Note: the code used to use ByteBuffer but the manual method is 50% faster
048    // See: http://git-wip-us.apache.org/repos/asf/commons-compress/diff/2f56fb5c
049    private final byte[] buffer = new byte[BUF_SIZE];
050    private final int seed;
051
052    private int totalLen;
053    private int pos;
054
055    /**
056     * Creates an XXHash32 instance with a seed of 0.
057     */
058    public XXHash32() {
059        this(0);
060    }
061
062    /**
063     * Creates an XXHash32 instance.
064     * @param seed the seed to use
065     */
066    public XXHash32(final int seed) {
067        this.seed = seed;
068        initializeState();
069    }
070
071    @Override
072    public void reset() {
073        initializeState();
074        totalLen = 0;
075        pos = 0;
076    }
077
078    @Override
079    public void update(final int b) {
080        oneByte[0] = (byte) (b & 0xff);
081        update(oneByte, 0, 1);
082    }
083
084    @Override
085    public void update(final byte[] b, int off, final int len) {
086        if (len <= 0) {
087            return;
088        }
089        totalLen += len;
090
091        final int end = off + len;
092
093        if (pos + len < BUF_SIZE) {
094            System.arraycopy(b, off, buffer, pos, len);
095            pos += len;
096            return;
097        }
098
099        if (pos > 0) {
100            final int size = BUF_SIZE - pos;
101            System.arraycopy(b, off, buffer, pos, size);
102            process(buffer, 0);
103            off += size;
104        }
105
106        final int limit = end - BUF_SIZE;
107        while (off <= limit) {
108            process(b, off);
109            off += BUF_SIZE;
110        }
111
112        if (off < end) {
113            pos = end - off;
114            System.arraycopy(b, off, buffer, 0, pos);
115        }
116    }
117
118    @Override
119    public long getValue() {
120        int hash;
121        if (totalLen > BUF_SIZE) {
122            hash =
123                rotateLeft(state[0],  1) +
124                rotateLeft(state[1],  7) +
125                rotateLeft(state[2], 12) +
126                rotateLeft(state[3], 18);
127        } else {
128            hash = state[2] + PRIME5;
129        }
130        hash += totalLen;
131
132        int idx = 0;
133        final int limit = pos - 4;
134        for (; idx <= limit; idx += 4) {
135            hash = rotateLeft(hash + getInt(buffer, idx) * PRIME3, 17) * PRIME4;
136        }
137        while (idx < pos) {
138            hash = rotateLeft(hash + (buffer[idx++] & 0xff) * PRIME5, 11) * PRIME1;
139        }
140
141        hash ^= hash >>> 15;
142        hash *= PRIME2;
143        hash ^= hash >>> 13;
144        hash *= PRIME3;
145        hash ^= hash >>> 16;
146        return hash & 0xffffffffl;
147    }
148
149    private static int getInt(final byte[] buffer, final int idx) {
150        return (int) (fromLittleEndian(buffer, idx, 4) & 0xffffffffl);
151    }
152
153    private void initializeState() {
154        state[0] = seed + PRIME1 + PRIME2;
155        state[1] = seed + PRIME2;
156        state[2] = seed;
157        state[3] = seed - PRIME1;
158    }
159
160    private void process(final byte[] b, final int offset) {
161        // local shadows for performance
162        int s0 = state[0];
163        int s1 = state[1];
164        int s2 = state[2];
165        int s3 = state[3];
166
167        s0 = rotateLeft(s0 + getInt(b, offset) * PRIME2, ROTATE_BITS) * PRIME1;
168        s1 = rotateLeft(s1 + getInt(b, offset + 4) * PRIME2, ROTATE_BITS) * PRIME1;
169        s2 = rotateLeft(s2 + getInt(b, offset + 8) * PRIME2, ROTATE_BITS) * PRIME1;
170        s3 = rotateLeft(s3 + getInt(b, offset + 12) * PRIME2, ROTATE_BITS) * PRIME1;
171
172        state[0] = s0;
173        state[1] = s1;
174        state[2] = s2;
175        state[3] = s3;
176
177        pos = 0;
178    }
179
180    /**
181     * Reads the given byte array as a little endian long.
182     * @param bytes the byte array to convert
183     * @param off the offset into the array that starts the value
184     * @param length the number of bytes representing the value
185     * @return the number read
186     * @throws IllegalArgumentException if len is bigger than eight
187     */
188    private static long fromLittleEndian(final byte[] bytes, final int off, final int length) {
189        if (length > 8) {
190            throw new IllegalArgumentException("can't read more than eight bytes into a long value");
191        }
192        long l = 0;
193        for (int i = 0; i < length; i++) {
194            l |= (bytes[off + i] & 0xffl) << (8 * i);
195        }
196        return l;
197    }
198}