001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.digest; 019 020import org.apache.commons.codec.binary.StringUtils; 021 022/** 023 * Implements the MurmurHash2 32-bit and 64-bit hash functions. 024 * 025 * <p>MurmurHash is a non-cryptographic hash function suitable for general 026 * hash-based lookup. The name comes from two basic operations, multiply (MU) 027 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions, 028 * it is not specifically designed to be difficult to reverse by an adversary, 029 * making it unsuitable for cryptographic purposes.</p> 030 * 031 * <p>This contains a Java port of the 32-bit hash function {@code MurmurHash2} 032 * and the 64-bit hash function {@code MurmurHash64A} from Austin Appleby's 033 * original {@code c++} code in SMHasher.</p> 034 * 035 * <p>This is a re-implementation of the original C code plus some additional 036 * features.</p> 037 * 038 * <p>This is public domain code with no copyrights. From home page of 039 * <a href="https://github.com/aappleby/smhasher">SMHasher</a>:</p> 040 * 041 * <blockquote> 042 * "All MurmurHash versions are public domain software, and the author 043 * disclaims all copyright to their code." 044 * </blockquote> 045 * 046 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 047 * @see <a href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash2.cpp"> 048 * Original MurmurHash2 c++ code</a> 049 * @since 1.13 050 */ 051public final class MurmurHash2 { 052 053 // Constants for 32-bit variant 054 private static final int M32 = 0x5bd1e995; 055 private static final int R32 = 24; 056 057 // Constants for 64-bit variant 058 private static final long M64 = 0xc6a4a7935bd1e995L; 059 private static final int R64 = 47; 060 061 /** 062 * Generates a 32-bit hash from byte array with the given length and a default seed value. 063 * This is a helper method that will produce the same result as: 064 * 065 * <pre> 066 * int seed = 0x9747b28c; 067 * int hash = MurmurHash2.hash32(data, length, seed); 068 * </pre> 069 * 070 * @param data The input byte array 071 * @param length The length of the array 072 * @return The 32-bit hash 073 * @see #hash32(byte[], int, int) 074 */ 075 public static int hash32(final byte[] data, final int length) { 076 return hash32(data, length, 0x9747b28c); 077 } 078 079 /** 080 * Generates a 32-bit hash from byte array with the given length and seed. 081 * 082 * @param data The input byte array 083 * @param length The length of the array 084 * @param seed The initial seed value 085 * @return The 32-bit hash 086 */ 087 public static int hash32(final byte[] data, final int length, final int seed) { 088 // Initialize the hash to a random value 089 int h = seed ^ length; 090 // Mix 4 bytes at a time into the hash 091 final int nblocks = length >> 2; 092 // body 093 for (int i = 0; i < nblocks; i++) { 094 final int index = i << 2; 095 int k = MurmurHash.getLittleEndianInt(data, index); 096 k *= M32; 097 k ^= k >>> R32; 098 k *= M32; 099 h *= M32; 100 h ^= k; 101 } 102 // Handle the last few bytes of the input array 103 final int index = nblocks << 2; 104 switch (length - index) { 105 case 3: 106 h ^= (data[index + 2] & 0xff) << 16; 107 // falls-through 108 case 2: 109 h ^= (data[index + 1] & 0xff) << 8; 110 // falls-through 111 case 1: 112 h ^= data[index] & 0xff; 113 h *= M32; 114 } 115 // Do a few final mixes of the hash to ensure the last few 116 // bytes are well-incorporated. 117 h ^= h >>> 13; 118 h *= M32; 119 h ^= h >>> 15; 120 return h; 121 } 122 123 /** 124 * Generates a 32-bit hash from a string with a default seed. 125 * <p> 126 * Before 1.14 the string was converted using default encoding. 127 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 128 * </p> 129 * This is a helper method that will produce the same result as: 130 * 131 * <pre> 132 * int seed = 0x9747b28c; 133 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 134 * int hash = MurmurHash2.hash32(bytes, bytes.length, seed); 135 * </pre> 136 * 137 * @param text The input string 138 * @return The 32-bit hash 139 * @see #hash32(byte[], int, int) 140 */ 141 public static int hash32(final String text) { 142 final byte[] bytes = StringUtils.getBytesUtf8(text); 143 return hash32(bytes, bytes.length); 144 } 145 146 /** 147 * Generates a 32-bit hash from a substring with a default seed value. 148 * The string is converted to bytes using the default encoding. 149 * This is a helper method that will produce the same result as: 150 * 151 * <pre> 152 * int seed = 0x9747b28c; 153 * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8); 154 * int hash = MurmurHash2.hash32(bytes, bytes.length, seed); 155 * </pre> 156 * 157 * @param text The input string 158 * @param from The starting index 159 * @param length The length of the substring 160 * @return The 32-bit hash 161 * @see #hash32(byte[], int, int) 162 */ 163 public static int hash32(final String text, final int from, final int length) { 164 return hash32(text.substring(from, from + length)); 165 } 166 167 /** 168 * Generates a 64-bit hash from byte array with given length and a default seed value. 169 * This is a helper method that will produce the same result as: 170 * 171 * <pre> 172 * int seed = 0xe17a1465; 173 * int hash = MurmurHash2.hash64(data, length, seed); 174 * </pre> 175 * 176 * @param data The input byte array 177 * @param length The length of the array 178 * @return The 64-bit hash 179 * @see #hash64(byte[], int, int) 180 */ 181 public static long hash64(final byte[] data, final int length) { 182 return hash64(data, length, 0xe17a1465); 183 } 184 185 /** 186 * Generates a 64-bit hash from byte array of the given length and seed. 187 * 188 * @param data The input byte array 189 * @param length The length of the array 190 * @param seed The initial seed value 191 * @return The 64-bit hash of the given array 192 */ 193 public static long hash64(final byte[] data, final int length, final int seed) { 194 long h = seed & 0xffffffffL ^ length * M64; 195 final int nblocks = length >> 3; 196 // body 197 for (int i = 0; i < nblocks; i++) { 198 final int index = i << 3; 199 long k = MurmurHash.getLittleEndianLong(data, index); 200 201 k *= M64; 202 k ^= k >>> R64; 203 k *= M64; 204 205 h ^= k; 206 h *= M64; 207 } 208 final int index = nblocks << 3; 209 switch (length - index) { 210 case 7: 211 h ^= ((long) data[index + 6] & 0xff) << 48; 212 // falls-through 213 case 6: 214 h ^= ((long) data[index + 5] & 0xff) << 40; 215 // falls-through 216 case 5: 217 h ^= ((long) data[index + 4] & 0xff) << 32; 218 // falls-through 219 case 4: 220 h ^= ((long) data[index + 3] & 0xff) << 24; 221 // falls-through 222 case 3: 223 h ^= ((long) data[index + 2] & 0xff) << 16; 224 // falls-through 225 case 2: 226 h ^= ((long) data[index + 1] & 0xff) << 8; 227 // falls-through 228 case 1: 229 h ^= (long) data[index] & 0xff; 230 h *= M64; 231 } 232 h ^= h >>> R64; 233 h *= M64; 234 h ^= h >>> R64; 235 return h; 236 } 237 238 /** 239 * Generates a 64-bit hash from a string with a default seed. 240 * <p> 241 * Before 1.14 the string was converted using default encoding. 242 * Since 1.14 the string is converted to bytes using UTF-8 encoding. 243 * </p> 244 * <p> 245 * This is a helper method that will produce the same result as: 246 * </p> 247 * 248 * <pre> 249 * int seed = 0xe17a1465; 250 * byte[] bytes = data.getBytes(StandardCharsets.UTF_8); 251 * int hash = MurmurHash2.hash64(bytes, bytes.length, seed); 252 * </pre> 253 * 254 * @param text The input string 255 * @return The 64-bit hash 256 * @see #hash64(byte[], int, int) 257 */ 258 public static long hash64(final String text) { 259 final byte[] bytes = StringUtils.getBytesUtf8(text); 260 return hash64(bytes, bytes.length); 261 } 262 263 /** 264 * Generates a 64-bit hash from a substring with a default seed value. 265 * The string is converted to bytes using the default encoding. 266 * This is a helper method that will produce the same result as: 267 * 268 * <pre> 269 * int seed = 0xe17a1465; 270 * byte[] bytes = text.substring(from, from + length).getBytes(StandardCharsets.UTF_8); 271 * int hash = MurmurHash2.hash64(bytes, bytes.length, seed); 272 * </pre> 273 * 274 * @param text The input string 275 * @param from The starting index 276 * @param length The length of the substring 277 * @return The 64-bit hash 278 * @see #hash64(byte[], int, int) 279 */ 280 public static long hash64(final String text, final int from, final int length) { 281 return hash64(text.substring(from, from + length)); 282 } 283 284 /** No instance methods. */ 285 private MurmurHash2() { 286 } 287}