001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Objects; 020 021/** 022 * The interface that describes a Bloom filter. 023 * <p> 024 * <em>See implementation notes for BitMapExtractor and IndexExtractor.</em> 025 * </p> 026 * @see BitMapExtractor 027 * @see IndexExtractor 028 * @since 4.5 029 */ 030public interface BloomFilter extends IndexExtractor, BitMapExtractor { 031 032 /** 033 * The sparse characteristic used to determine the best method for matching. 034 * <p>For `sparse` implementations 035 * the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations 036 * the {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier 037 * for the implementation to produce indexes of bit map blocks.</p> 038 */ 039 int SPARSE = 0x1; 040 041 /** 042 * Gets the cardinality (number of enabled bits) of this Bloom filter. 043 * 044 * <p>This is also known as the Hamming value or Hamming number.</p> 045 * 046 * @return the cardinality of this filter 047 */ 048 int cardinality(); 049 050 // Query Operations 051 052 /** 053 * Returns the characteristics of the filter. 054 * <p> 055 * Characteristics are defined as bits within the characteristics integer. 056 * @return the characteristics for this bloom filter. 057 */ 058 int characteristics(); 059 060 /** 061 * Resets the filter to its initial, unpopulated state. 062 */ 063 void clear(); 064 065 /** 066 * Returns {@code true} if this filter contains the bits specified in the bit maps produced by the 067 * bitMapExtractor. 068 * 069 * @param bitMapExtractor the {@code BitMapExtractor} to provide the bit maps. 070 * @return {@code true} if this filter is enabled for all bits specified by the bit maps 071 */ 072 default boolean contains(final BitMapExtractor bitMapExtractor) { 073 return processBitMapPairs(bitMapExtractor, (x, y) -> (x & y) == y); 074 } 075 076 /** 077 * Returns {@code true} if this filter contains the specified filter. 078 * 079 * <p>Specifically this 080 * returns {@code true} if this filter is enabled for all bits that are enabled in the 081 * {@code other} filter. Using the bit representations this is 082 * effectively {@code (this AND other) == other}.</p> 083 * 084 * @param other the other Bloom filter 085 * @return true if all enabled bits in the other filter are enabled in this filter. 086 */ 087 default boolean contains(final BloomFilter other) { 088 Objects.requireNonNull(other, "other"); 089 return (characteristics() & SPARSE) != 0 ? contains((IndexExtractor) other) : contains((BitMapExtractor) other); 090 } 091 092 /** 093 * Returns {@code true} if this filter contains the bits specified in the hasher. 094 * 095 * <p>Specifically this returns {@code true} if this filter is enabled for all bit indexes 096 * identified by the {@code hasher}. Using the bit map representations this is 097 * effectively {@code (this AND hasher) == hasher}.</p> 098 * 099 * @param hasher the hasher to provide the indexes 100 * @return true if this filter is enabled for all bits specified by the hasher 101 */ 102 default boolean contains(final Hasher hasher) { 103 Objects.requireNonNull(hasher, "Hasher"); 104 final Shape shape = getShape(); 105 return contains(hasher.indices(shape)); 106 } 107 108 /** 109 * Returns {@code true} if this filter contains the indices specified IndexExtractor. 110 * 111 * <p>Specifically this returns {@code true} if this filter is enabled for all bit indexes 112 * identified by the {@code IndexExtractor}.</p> 113 * 114 * @param indexExtractor the IndexExtractor to provide the indexes 115 * @return {@code true} if this filter is enabled for all bits specified by the IndexExtractor 116 */ 117 boolean contains(IndexExtractor indexExtractor); 118 119 /** 120 * Creates a new instance of the BloomFilter with the same properties as the current one. 121 * 122 * @param <T> Type of BloomFilter. 123 * @return a copy of this BloomFilter 124 */ 125 <T extends BloomFilter> T copy(); 126 127 // update operations 128 129 /** 130 * Estimates the number of items in the intersection of this Bloom filter with the other bloom filter. 131 * 132 * <p>This method produces estimate is roughly equivalent to the number of unique Hashers that have been merged into both 133 * of the filters by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p> 134 * 135 * <p><em>{@code estimateIntersection} should only be called with Bloom filters of the same Shape. If called on Bloom 136 * filters of differing shape this method is not symmetric. If {@code other} has more bits an {@code IllegalArgumentException} 137 * may be thrown.</em></p> 138 * 139 * @param other The other Bloom filter 140 * @return an estimate of the number of items in the intersection. If the calculated estimate is larger than Integer.MAX_VALUE then MAX_VALUE is returned. 141 * @throws IllegalArgumentException if the estimated N for the union of the filters is infinite. 142 * @see #estimateN() 143 * @see Shape 144 */ 145 default int estimateIntersection(final BloomFilter other) { 146 Objects.requireNonNull(other, "other"); 147 final double eThis = getShape().estimateN(cardinality()); 148 final double eOther = getShape().estimateN(other.cardinality()); 149 if (Double.isInfinite(eThis) && Double.isInfinite(eOther)) { 150 // if both are infinite the union is infinite and we return Integer.MAX_VALUE 151 return Integer.MAX_VALUE; 152 } 153 long estimate; 154 // if one is infinite the intersection is the other. 155 if (Double.isInfinite(eThis)) { 156 estimate = Math.round(eOther); 157 } else if (Double.isInfinite(eOther)) { 158 estimate = Math.round(eThis); 159 } else { 160 final BloomFilter union = this.copy(); 161 union.merge(other); 162 final double eUnion = getShape().estimateN(union.cardinality()); 163 if (Double.isInfinite(eUnion)) { 164 throw new IllegalArgumentException("The estimated N for the union of the filters is infinite"); 165 } 166 // maximum estimate value using integer values is: 46144189292 thus 167 // eThis + eOther can not overflow the long value. 168 estimate = Math.round(eThis + eOther - eUnion); 169 estimate = estimate < 0 ? 0 : estimate; 170 } 171 return estimate > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) estimate; 172 } 173 174 /** 175 * Estimates the number of items in the Bloom filter. 176 * 177 * <p>By default this is the rounding of the {@code Shape.estimateN(cardinality)} calculation for the 178 * shape and cardinality of this filter.</p> 179 * 180 * <p>This produces an estimate roughly equivalent to the number of Hashers that have been merged into the filter 181 * by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p> 182 * 183 * <p><em>Note:</em></p> 184 * <ul> 185 * <li>if cardinality == numberOfBits, then result is Integer.MAX_VALUE.</li> 186 * <li>if cardinality > numberOfBits, then an IllegalArgumentException is thrown.</li> 187 * </ul> 188 * 189 * @return an estimate of the number of items in the bloom filter. Will return Integer.MAX_VALUE if the 190 * estimate is larger than Integer.MAX_VALUE. 191 * @throws IllegalArgumentException if the cardinality is > numberOfBits as defined in Shape. 192 * @see Shape#estimateN(int) 193 * @see Shape 194 */ 195 default int estimateN() { 196 final double d = getShape().estimateN(cardinality()); 197 if (Double.isInfinite(d)) { 198 return Integer.MAX_VALUE; 199 } 200 if (Double.isNaN(d)) { 201 throw new IllegalArgumentException("Cardinality too large: " + cardinality()); 202 } 203 final long l = Math.round(d); 204 return l > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) l; 205 } 206 207 /** 208 * Estimates the number of items in the union of this Bloom filter with the other bloom filter. 209 * 210 * <p>This produces an estimate roughly equivalent to the number of unique Hashers that have been merged into either 211 * of the filters by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p> 212 * 213 * <p><em>{@code estimateUnion} should only be called with Bloom filters of the same Shape. If called on Bloom 214 * filters of differing shape this method is not symmetric. If {@code other} has more bits an {@code IllegalArgumentException} 215 * may be thrown.</em></p> 216 * 217 * @param other The other Bloom filter 218 * @return an estimate of the number of items in the union. Will return Integer.MAX_VALUE if the 219 * estimate is larger than Integer.MAX_VALUE. 220 * @see #estimateN() 221 * @see Shape 222 */ 223 default int estimateUnion(final BloomFilter other) { 224 Objects.requireNonNull(other, "other"); 225 final BloomFilter cpy = this.copy(); 226 cpy.merge(other); 227 return cpy.estimateN(); 228 } 229 230 /** 231 * Gets the shape that was used when the filter was built. 232 * @return The shape the filter was built with. 233 */ 234 Shape getShape(); 235 236 // Counting Operations 237 238 /** 239 * Determines if all the bits are off. This is equivalent to 240 * {@code cardinality() == 0}. 241 * 242 * <p> 243 * <em>Note: This method is optimised for non-sparse filters.</em> Implementers 244 * are encouraged to implement faster checks if possible. 245 * </p> 246 * 247 * @return {@code true} if no bits are enabled, {@code false} otherwise. 248 */ 249 default boolean isEmpty() { 250 return processBitMaps(y -> y == 0); 251 } 252 253 /** 254 * Determines if the bloom filter is "full". 255 * 256 * <p>Full is defined as having no unset bits.</p> 257 * 258 * @return {@code true} if the filter is full, {@code false} otherwise. 259 */ 260 default boolean isFull() { 261 return cardinality() == getShape().getNumberOfBits(); 262 } 263 264 /** 265 * Merges the specified hasher into this Bloom filter. Specifically all 266 * bit indexes that are identified by the {@code bitMapExtractor} will be enabled in this filter. 267 * 268 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 269 * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes 270 * enabled in the {@code bitMapExtractor}.</em> This state may occur in complex Bloom filter implementations like 271 * counting Bloom filters.</p> 272 * 273 * @param bitMapExtractor The BitMapExtractor to merge. 274 * @return true if the merge was successful 275 * @throws IllegalArgumentException if bitMapExtractor sends illegal value. 276 */ 277 boolean merge(BitMapExtractor bitMapExtractor); 278 279 /** 280 * Merges the specified Bloom filter into this Bloom filter. 281 * 282 * <p>Specifically all 283 * bit indexes that are identified by the {@code other} will be enabled in this filter.</p> 284 * 285 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 286 * enabled. A {@code false} result indicates that this filter may or may not contain 287 * the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like 288 * counting Bloom filters.</p> 289 * 290 * @param other The bloom filter to merge into this one. 291 * @return true if the merge was successful 292 */ 293 default boolean merge(final BloomFilter other) { 294 return (characteristics() & SPARSE) != 0 ? merge((IndexExtractor) other) : merge((BitMapExtractor) other); 295 } 296 297 /** 298 * Merges the specified hasher into this Bloom filter. Specifically all 299 * bit indexes that are identified by the {@code hasher} will be enabled in this filter. 300 * 301 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 302 * enabled. A {@code false} result indicates that this filter may or may not contain 303 * the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like 304 * counting Bloom filters.</p> 305 * 306 * @param hasher The hasher to merge. 307 * @return true if the merge was successful 308 * @throws IllegalArgumentException if hasher produces an illegal value. 309 */ 310 default boolean merge(final Hasher hasher) { 311 Objects.requireNonNull(hasher, "hasher"); 312 return merge(hasher.indices(getShape())); 313 } 314 315 /** 316 * Merges the specified IndexExtractor into this Bloom filter. Specifically all 317 * bit indexes that are identified by the {@code indexExtractor} will be enabled in this filter. 318 * 319 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 320 * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of 321 * the {@code indexExtractor}.</em> This state may occur in complex Bloom filter implementations like 322 * counting Bloom filters.</p> 323 * 324 * @param indexExtractor The IndexExtractor to merge. 325 * @return true if the merge was successful 326 * @throws IllegalArgumentException if indexExtractor sends illegal value. 327 */ 328 boolean merge(IndexExtractor indexExtractor); 329 330 /** 331 * Most Bloom filters create unique IndexExtractors. 332 */ 333 @Override 334 default IndexExtractor uniqueIndices() { 335 return this; 336 } 337}