View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.digest;
19  
20  import org.apache.commons.codec.binary.StringUtils;
21  
22  /**
23   * Implements the MurmurHash2 32-bit and 64-bit hash functions.
24   *
25   * <p>MurmurHash is a non-cryptographic hash function suitable for general
26   * hash-based lookup. The name comes from two basic operations, multiply (MU)
27   * and rotate (R), used in its inner loop. Unlike cryptographic hash functions,
28   * it is not specifically designed to be difficult to reverse by an adversary,
29   * making it unsuitable for cryptographic purposes.</p>
30   *
31   * <p>This contains a Java port of the 32-bit hash function {@code MurmurHash2}
32   * and the 64-bit hash function {@code MurmurHash64A} from Austin Appleby's
33   * original {@code c++} code in SMHasher.</p>
34   *
35   * <p>This is a re-implementation of the original C code plus some additional
36   * features.</p>
37   *
38   * <p>This is public domain code with no copyrights. From home page of
39   * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:</p>
40   *
41   * <blockquote>
42   * "All MurmurHash versions are public domain software, and the author
43   * disclaims all copyright to their code."
44   * </blockquote>
45   *
46   * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a>
47   * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp">
48   *   Original MurmurHash2 c++ code</a>
49   * @since 1.13
50   */
51  public final class MurmurHash2 {
52  
53      // Constants for 32-bit variant
54      private static final int M32 = 0x5bd1e995;
55      private static final int R32 = 24;
56  
57      // Constants for 64-bit variant
58      private static final long M64 = 0xc6a4a7935bd1e995L;
59      private static final int R64 = 47;
60  
61      /**
62       * Generates a 32-bit hash from byte array with the given length and a default seed value.
63       * This is a helper method that will produce the same result as:
64       *
65       * <pre>
66       * int seed = 0x9747b28c;
67       * int hash = MurmurHash2.hash32(data, length, seed);
68       * </pre>
69       *
70       * @param data The input byte array
71       * @param length The length of the array
72       * @return The 32-bit hash
73       * @see #hash32(byte[], int, int)
74       */
75      public static int hash32(final byte[] data, final int length) {
76          return hash32(data, length, 0x9747b28c);
77      }
78  
79      /**
80       * Generates a 32-bit hash from byte array with the given length and seed.
81       *
82       * @param data The input byte array
83       * @param length The length of the array
84       * @param seed The initial seed value
85       * @return The 32-bit hash
86       */
87      public static int hash32(final byte[] data, final int length, final int seed) {
88          // Initialize the hash to a random value
89          int h = seed ^ length;
90          // Mix 4 bytes at a time into the hash
91          final int nblocks = length >> 2;
92          // body
93          for (int i = 0; i < nblocks; i++) {
94              final int index = i << 2;
95              int k = MurmurHash.getLittleEndianInt(data, index);
96              k *= M32;
97              k ^= k >>> R32;
98              k *= M32;
99              h *= M32;
100             h ^= k;
101         }
102         // Handle the last few bytes of the input array
103         final int index = nblocks << 2;
104         switch (length - index) {
105         case 3:
106             h ^= (data[index + 2] & 0xff) << 16;
107             // falls-through
108         case 2:
109             h ^= (data[index + 1] & 0xff) << 8;
110             // falls-through
111         case 1:
112             h ^= data[index] & 0xff;
113             h *= M32;
114         }
115         // Do a few final mixes of the hash to ensure the last few
116         // bytes are well-incorporated.
117         h ^= h >>> 13;
118         h *= M32;
119         h ^= h >>> 15;
120         return h;
121     }
122 
123     /**
124      * Generates a 32-bit hash from a string with a default seed.
125      * <p>
126      * Before 1.14 the string was converted using default encoding.
127      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
128      * </p>
129      * This is a helper method that will produce the same result as:
130      *
131      * <pre>
132      * int seed = 0x9747b28c;
133      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
134      * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
135      * </pre>
136      *
137      * @param text The input string
138      * @return The 32-bit hash
139      * @see #hash32(byte[], int, int)
140      */
141     public static int hash32(final String text) {
142         final byte[] bytes = StringUtils.getBytesUtf8(text);
143         return hash32(bytes, bytes.length);
144     }
145 
146     /**
147      * Generates a 32-bit hash from a substring with a default seed value.
148      * The string is converted to bytes using the default encoding.
149      * This is a helper method that will produce the same result as:
150      *
151      * <pre>
152      * int seed = 0x9747b28c;
153      * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
154      * int hash = MurmurHash2.hash32(bytes, bytes.length, seed);
155      * </pre>
156      *
157      * @param text The input string
158      * @param from The starting index
159      * @param length The length of the substring
160      * @return The 32-bit hash
161      * @see #hash32(byte[], int, int)
162      */
163     public static int hash32(final String text, final int from, final int length) {
164         return hash32(text.substring(from, from + length));
165     }
166 
167     /**
168      * Generates a 64-bit hash from byte array with given length and a default seed value.
169      * This is a helper method that will produce the same result as:
170      *
171      * <pre>
172      * int seed = 0xe17a1465;
173      * int hash = MurmurHash2.hash64(data, length, seed);
174      * </pre>
175      *
176      * @param data The input byte array
177      * @param length The length of the array
178      * @return The 64-bit hash
179      * @see #hash64(byte[], int, int)
180      */
181     public static long hash64(final byte[] data, final int length) {
182         return hash64(data, length, 0xe17a1465);
183     }
184 
185     /**
186      * Generates a 64-bit hash from byte array of the given length and seed.
187      *
188      * @param data The input byte array
189      * @param length The length of the array
190      * @param seed The initial seed value
191      * @return The 64-bit hash of the given array
192      */
193     public static long hash64(final byte[] data, final int length, final int seed) {
194         long h = seed & 0xffffffffL ^ length * M64;
195         final int nblocks = length >> 3;
196         // body
197         for (int i = 0; i < nblocks; i++) {
198             final int index = i << 3;
199             long k = MurmurHash.getLittleEndianLong(data, index);
200 
201             k *= M64;
202             k ^= k >>> R64;
203             k *= M64;
204 
205             h ^= k;
206             h *= M64;
207         }
208         final int index = nblocks << 3;
209         switch (length - index) {
210         case 7:
211             h ^= ((long) data[index + 6] & 0xff) << 48;
212             // falls-through
213         case 6:
214             h ^= ((long) data[index + 5] & 0xff) << 40;
215             // falls-through
216         case 5:
217             h ^= ((long) data[index + 4] & 0xff) << 32;
218             // falls-through
219         case 4:
220             h ^= ((long) data[index + 3] & 0xff) << 24;
221             // falls-through
222         case 3:
223             h ^= ((long) data[index + 2] & 0xff) << 16;
224             // falls-through
225         case 2:
226             h ^= ((long) data[index + 1] & 0xff) << 8;
227             // falls-through
228         case 1:
229             h ^= (long) data[index] & 0xff;
230             h *= M64;
231         }
232         h ^= h >>> R64;
233         h *= M64;
234         h ^= h >>> R64;
235         return h;
236     }
237 
238     /**
239      * Generates a 64-bit hash from a string with a default seed.
240      * <p>
241      * Before 1.14 the string was converted using default encoding.
242      * Since 1.14 the string is converted to bytes using UTF-8 encoding.
243      * </p>
244      * <p>
245      * This is a helper method that will produce the same result as:
246      * </p>
247      *
248      * <pre>
249      * int seed = 0xe17a1465;
250      * byte[] bytes = data.getBytes(StandardCharsets.UTF_8);
251      * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
252      * </pre>
253      *
254      * @param text The input string
255      * @return The 64-bit hash
256      * @see #hash64(byte[], int, int)
257      */
258     public static long hash64(final String text) {
259         final byte[] bytes = StringUtils.getBytesUtf8(text);
260         return hash64(bytes, bytes.length);
261     }
262 
263     /**
264      * Generates a 64-bit hash from a substring with a default seed value.
265      * The string is converted to bytes using the default encoding.
266      * This is a helper method that will produce the same result as:
267      *
268      * <pre>
269      * int seed = 0xe17a1465;
270      * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8);
271      * int hash = MurmurHash2.hash64(bytes, bytes.length, seed);
272      * </pre>
273      *
274      * @param text The input string
275      * @param from The starting index
276      * @param length The length of the substring
277      * @return The 64-bit hash
278      * @see #hash64(byte[], int, int)
279      */
280     public static long hash64(final String text, final int from, final int length) {
281         return hash64(text.substring(from, from + length));
282     }
283 
284     /** No instance methods. */
285     private MurmurHash2() {
286     }
287 }