001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.digest; 019 020/** 021 * MurmurHash2 yields a 32-bit or 64-bit value. 022 * 023 * MurmurHash is a non-cryptographic hash function suitable for general 024 * hash-based lookup. The name comes from two basic operations, multiply (MU) 025 * and rotate (R), used in its inner loop. Unlike cryptographic hash functions, 026 * it is not specifically designed to be difficult to reverse by an adversary, 027 * making it unsuitable for cryptographic purposes. 028 * 029 * This is a re-implementation of the original C code plus some additional 030 * features. 031 * 032 * Public domain. 033 * 034 * @see <a href="https://en.wikipedia.org/wiki/MurmurHash">MurmurHash</a> 035 * @since 1.13 036 */ 037public final class MurmurHash2 { 038 039 // all methods static; private constructor. 040 private MurmurHash2() { 041 } 042 043 /** 044 * Generates 32 bit hash from byte array of the given length and seed. 045 * 046 * @param data byte array to hash 047 * @param length length of the array to hash 048 * @param seed initial seed value 049 * @return 32 bit hash of the given array 050 */ 051 public static int hash32(final byte[] data, final int length, final int seed) { 052 // 'm' and 'r' are mixing constants generated offline. 053 // They're not really 'magic', they just happen to work well. 054 final int m = 0x5bd1e995; 055 final int r = 24; 056 057 // Initialize the hash to a random value 058 int h = seed ^ length; 059 final int length4 = length / 4; 060 061 for (int i = 0; i < length4; i++) { 062 final int i4 = i * 4; 063 int k = (data[i4 + 0] & 0xff) + ((data[i4 + 1] & 0xff) << 8) + ((data[i4 + 2] & 0xff) << 16) 064 + ((data[i4 + 3] & 0xff) << 24); 065 k *= m; 066 k ^= k >>> r; 067 k *= m; 068 h *= m; 069 h ^= k; 070 } 071 072 // Handle the last few bytes of the input array 073 switch (length % 4) { 074 case 3: 075 h ^= (data[(length & ~3) + 2] & 0xff) << 16; 076 case 2: 077 h ^= (data[(length & ~3) + 1] & 0xff) << 8; 078 case 1: 079 h ^= (data[length & ~3] & 0xff); 080 h *= m; 081 } 082 083 h ^= h >>> 13; 084 h *= m; 085 h ^= h >>> 15; 086 087 return h; 088 } 089 090 /** 091 * Generates 32 bit hash from byte array with default seed value. 092 * 093 * @param data byte array to hash 094 * @param length length of the array to hash 095 * @return 32 bit hash of the given array 096 */ 097 public static int hash32(final byte[] data, final int length) { 098 return hash32(data, length, 0x9747b28c); 099 } 100 101 /** 102 * Generates 32 bit hash from a string. 103 * 104 * @param text string to hash 105 * @return 32 bit hash of the given string 106 */ 107 public static int hash32(final String text) { 108 final byte[] bytes = text.getBytes(); 109 return hash32(bytes, bytes.length); 110 } 111 112 /** 113 * Generates 32 bit hash from a substring. 114 * 115 * @param text string to hash 116 * @param from starting index 117 * @param length length of the substring to hash 118 * @return 32 bit hash of the given string 119 */ 120 public static int hash32(final String text, final int from, final int length) { 121 return hash32(text.substring(from, from + length)); 122 } 123 124 /** 125 * Generates 64 bit hash from byte array of the given length and seed. 126 * 127 * @param data byte array to hash 128 * @param length length of the array to hash 129 * @param seed initial seed value 130 * @return 64 bit hash of the given array 131 */ 132 public static long hash64(final byte[] data, final int length, final int seed) { 133 final long m = 0xc6a4a7935bd1e995L; 134 final int r = 47; 135 136 long h = (seed & 0xffffffffl) ^ (length * m); 137 138 final int length8 = length / 8; 139 140 for (int i = 0; i < length8; i++) { 141 final int i8 = i * 8; 142 long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8) 143 + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24) 144 + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40) 145 + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56); 146 147 k *= m; 148 k ^= k >>> r; 149 k *= m; 150 151 h ^= k; 152 h *= m; 153 } 154 155 switch (length % 8) { 156 case 7: 157 h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48; 158 case 6: 159 h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40; 160 case 5: 161 h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32; 162 case 4: 163 h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24; 164 case 3: 165 h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16; 166 case 2: 167 h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8; 168 case 1: 169 h ^= data[length & ~7] & 0xff; 170 h *= m; 171 } 172 173 h ^= h >>> r; 174 h *= m; 175 h ^= h >>> r; 176 177 return h; 178 } 179 180 /** 181 * Generates 64 bit hash from byte array with default seed value. 182 * 183 * @param data byte array to hash 184 * @param length length of the array to hash 185 * @return 64 bit hash of the given string 186 */ 187 public static long hash64(final byte[] data, final int length) { 188 return hash64(data, length, 0xe17a1465); 189 } 190 191 /** 192 * Generates 64 bit hash from a string. 193 * 194 * @param text string to hash 195 * @return 64 bit hash of the given string 196 */ 197 public static long hash64(final String text) { 198 final byte[] bytes = text.getBytes(); 199 return hash64(bytes, bytes.length); 200 } 201 202 /** 203 * Generates 64 bit hash from a substring. 204 * 205 * @param text string to hash 206 * @param from starting index 207 * @param length length of the substring to hash 208 * @return 64 bit hash of the given array 209 */ 210 public static long hash64(final String text, final int from, final int length) { 211 return hash64(text.substring(from, from + length)); 212 } 213}