001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.digest;
019
020import org.apache.commons.codec.binary.StringUtils;
021
022/**
023 * Implements the MurmurHash2 32-bit and 64-bit hash functions.
024 *
025 * <p>MurmurHash is a non-cryptographic hash function suitable for general
026 * hash-based lookup. The name comes from two basic operations, multiply (MU)
027 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions,
028 * it is not specifically designed to be difficult to reverse by an adversary,
029 * making it unsuitable for cryptographic purposes.</p>
030 *
031 * <p>This contains a Java port of the 32-bit hash function {@code MurmurHash2}
032 * and the 64-bit hash function {@code MurmurHash64A} from Austin Appleby's
033 * original {@code c++} code in SMHasher.</p>
034 *
035 * <p>This is a re-implementation of the original C code plus some additional
036 * features.</p>
037 *
038 * <p>This is public domain code with no copyrights. From home page of
039 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:</p>
040 *
041 * <blockquote>
042 * "All MurmurHash versions are public domain software, and the author
043 * disclaims all copyright to their code."
044 * </blockquote>
045 *
046 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
047 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp">
048 *   Original MurmurHash2 c++ code</a>
049 * @since 1.13
050 */
051public final class MurmurHash2 {
052
053    // Constants for 32-bit variant
054    private static final int M32 = 0x5bd1e995;
055    private static final int R32 = 24;
056
057    // Constants for 64-bit variant
058    private static final long M64 = 0xc6a4a7935bd1e995L;
059    private static final int R64 = 47;
060
061    /**
062     * Generates a 32-bit hash from byte array with the given length and a default seed value.
063     * This is a helper method that will produce the same result as:
064     *
065     * <pre>
066     * int seed = 0x9747b28c;
067     * int hash = MurmurHash2.hash32(data, length, seed);
068     * </pre>
069     *
070     * @param data The input byte array
071     * @param length The length of the array
072     * @return The 32-bit hash
073     * @see #hash32(byte[], int, int)
074     */
075    public static int hash32(final byte[] data, final int length) {
076        return hash32(data, length, 0x9747b28c);
077    }
078
079    /**
080     * Generates a 32-bit hash from byte array with the given length and seed.
081     *
082     * @param data The input byte array
083     * @param length The length of the array
084     * @param seed The initial seed value
085     * @return The 32-bit hash
086     */
087    public static int hash32(final byte[] data, final int length, final int seed) {
088        // Initialize the hash to a random value
089        int h = seed ^ length;
090        // Mix 4 bytes at a time into the hash
091        final int nblocks = length >> 2;
092        // body
093        for (int i = 0; i < nblocks; i++) {
094            final int index = i << 2;
095            int k = MurmurHash.getLittleEndianInt(data, index);
096            k *= M32;
097            k ^= k >>> R32;
098            k *= M32;
099            h *= M32;
100            h ^= k;
101        }
102        // Handle the last few bytes of the input array
103        final int index = nblocks << 2;
104        switch (length - index) {
105        case 3:
106            h ^= (data[index + 2] & 0xff) << 16;
107            // falls-through
108        case 2:
109            h ^= (data[index + 1] & 0xff) << 8;
110            // falls-through
111        case 1:
112            h ^= data[index] & 0xff;
113            h *= M32;
114        }
115        // Do a few final mixes of the hash to ensure the last few
116        // bytes are well-incorporated.
117        h ^= h >>> 13;
118        h *= M32;
119        h ^= h >>> 15;
120        return h;
121    }
122
123    /**
124     * Generates a 32-bit hash from a string with a default seed.
125     * <p>
126     * Before 1.14 the string was converted using default encoding.
127     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
128     * </p>
129     * This is a helper method that will produce the same result as:
130     *
131     * <pre>
132     * int seed = 0x9747b28c;
133     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
134     * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
135     * </pre>
136     *
137     * @param text The input string
138     * @return The 32-bit hash
139     * @see #hash32(byte[], int, int)
140     */
141    public static int hash32(final String text) {
142        final byte[] bytes = StringUtils.getBytesUtf8(text);
143        return hash32(bytes, bytes.length);
144    }
145
146    /**
147     * Generates a 32-bit hash from a substring with a default seed value.
148     * The string is converted to bytes using the default encoding.
149     * This is a helper method that will produce the same result as:
150     *
151     * <pre>
152     * int seed = 0x9747b28c;
153     * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
154     * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
155     * </pre>
156     *
157     * @param text The input string
158     * @param from The starting index
159     * @param length The length of the substring
160     * @return The 32-bit hash
161     * @see #hash32(byte[], int, int)
162     */
163    public static int hash32(final String text, final int from, final int length) {
164        return hash32(text.substring(from, from + length));
165    }
166
167    /**
168     * Generates a 64-bit hash from byte array with given length and a default seed value.
169     * This is a helper method that will produce the same result as:
170     *
171     * <pre>
172     * int seed = 0xe17a1465;
173     * int hash = MurmurHash2.hash64(data, length, seed);
174     * </pre>
175     *
176     * @param data The input byte array
177     * @param length The length of the array
178     * @return The 64-bit hash
179     * @see #hash64(byte[], int, int)
180     */
181    public static long hash64(final byte[] data, final int length) {
182        return hash64(data, length, 0xe17a1465);
183    }
184
185    /**
186     * Generates a 64-bit hash from byte array of the given length and seed.
187     *
188     * @param data The input byte array
189     * @param length The length of the array
190     * @param seed The initial seed value
191     * @return The 64-bit hash of the given array
192     */
193    public static long hash64(final byte[] data, final int length, final int seed) {
194        long h = seed & 0xffffffffL ^ length * M64;
195        final int nblocks = length >> 3;
196        // body
197        for (int i = 0; i < nblocks; i++) {
198            final int index = i << 3;
199            long k = MurmurHash.getLittleEndianLong(data, index);
200
201            k *= M64;
202            k ^= k >>> R64;
203            k *= M64;
204
205            h ^= k;
206            h *= M64;
207        }
208        final int index = nblocks << 3;
209        switch (length - index) {
210        case 7:
211            h ^= ((long) data[index + 6] & 0xff) << 48;
212            // falls-through
213        case 6:
214            h ^= ((long) data[index + 5] & 0xff) << 40;
215            // falls-through
216        case 5:
217            h ^= ((long) data[index + 4] & 0xff) << 32;
218            // falls-through
219        case 4:
220            h ^= ((long) data[index + 3] & 0xff) << 24;
221            // falls-through
222        case 3:
223            h ^= ((long) data[index + 2] & 0xff) << 16;
224            // falls-through
225        case 2:
226            h ^= ((long) data[index + 1] & 0xff) << 8;
227            // falls-through
228        case 1:
229            h ^= (long) data[index] & 0xff;
230            h *= M64;
231        }
232        h ^= h >>> R64;
233        h *= M64;
234        h ^= h >>> R64;
235        return h;
236    }
237
238    /**
239     * Generates a 64-bit hash from a string with a default seed.
240     * <p>
241     * Before 1.14 the string was converted using default encoding.
242     * Since 1.14 the string is converted to bytes using UTF-8 encoding.
243     * </p>
244     * <p>
245     * This is a helper method that will produce the same result as:
246     * </p>
247     *
248     * <pre>
249     * int seed = 0xe17a1465;
250     * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
251     * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
252     * </pre>
253     *
254     * @param text The input string
255     * @return The 64-bit hash
256     * @see #hash64(byte[], int, int)
257     */
258    public static long hash64(final String text) {
259        final byte[] bytes = StringUtils.getBytesUtf8(text);
260        return hash64(bytes, bytes.length);
261    }
262
263    /**
264     * Generates a 64-bit hash from a substring with a default seed value.
265     * The string is converted to bytes using the default encoding.
266     * This is a helper method that will produce the same result as:
267     *
268     * <pre>
269     * int seed = 0xe17a1465;
270     * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
271     * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
272     * </pre>
273     *
274     * @param text The input string
275     * @param from The starting index
276     * @param length The length of the substring
277     * @return The 64-bit hash
278     * @see #hash64(byte[], int, int)
279     */
280    public static long hash64(final String text, final int from, final int length) {
281        return hash64(text.substring(from, from + length));
282    }
283
284    /** No instance methods. */
285    private MurmurHash2() {
286    }
287}