1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.collections4.bloomfilter; 18 19 import java.util.Objects; 20 21 /** 22 * The interface that describes a Bloom filter. 23 * <p> 24 * <em>See implementation notes for BitMapProducer and IndexProducer.</em> 25 * </p> 26 * @see BitMapProducer 27 * @see IndexProducer 28 * @since 4.5 29 */ 30 public interface BloomFilter extends IndexProducer, BitMapProducer { 31 32 /** 33 * The sparse characteristic used to determine the best method for matching. 34 * <p>For `sparse` implementations 35 * the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations 36 * the {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier 37 * for the implementation to produce indexes of bit map blocks.</p> 38 */ 39 int SPARSE = 0x1; 40 41 /** 42 * Gets the cardinality (number of enabled bits) of this Bloom filter. 43 * 44 * <p>This is also known as the Hamming value or Hamming number.</p> 45 * 46 * @return the cardinality of this filter 47 */ 48 int cardinality(); 49 50 // Query Operations 51 52 /** 53 * Returns the characteristics of the filter. 54 * <p> 55 * Characteristics are defined as bits within the characteristics integer. 56 * @return the characteristics for this bloom filter. 57 */ 58 int characteristics(); 59 60 /** 61 * Resets the filter to its initial, unpopulated state. 62 */ 63 void clear(); 64 65 /** 66 * Returns {@code true} if this filter contains the bits specified in the bit maps produced by the 67 * bitMapProducer. 68 * 69 * @param bitMapProducer the {@code BitMapProducer} to provide the bit maps. 70 * @return {@code true} if this filter is enabled for all bits specified by the bit maps 71 */ 72 default boolean contains(final BitMapProducer bitMapProducer) { 73 return forEachBitMapPair(bitMapProducer, (x, y) -> (x & y) == y); 74 } 75 76 /** 77 * Returns {@code true} if this filter contains the specified filter. 78 * 79 * <p>Specifically this 80 * returns {@code true} if this filter is enabled for all bits that are enabled in the 81 * {@code other} filter. Using the bit representations this is 82 * effectively {@code (this AND other) == other}.</p> 83 * 84 * @param other the other Bloom filter 85 * @return true if all enabled bits in the other filter are enabled in this filter. 86 */ 87 default boolean contains(final BloomFilter other) { 88 Objects.requireNonNull(other, "other"); 89 return (characteristics() & SPARSE) != 0 ? contains((IndexProducer) other) : contains((BitMapProducer) other); 90 } 91 92 /** 93 * Returns {@code true} if this filter contains the bits specified in the hasher. 94 * 95 * <p>Specifically this returns {@code true} if this filter is enabled for all bit indexes 96 * identified by the {@code hasher}. Using the bit map representations this is 97 * effectively {@code (this AND hasher) == hasher}.</p> 98 * 99 * @param hasher the hasher to provide the indexes 100 * @return true if this filter is enabled for all bits specified by the hasher 101 */ 102 default boolean contains(final Hasher hasher) { 103 Objects.requireNonNull(hasher, "Hasher"); 104 final Shape shape = getShape(); 105 return contains(hasher.indices(shape)); 106 } 107 108 /** 109 * Returns {@code true} if this filter contains the indices specified IndexProducer. 110 * 111 * <p>Specifically this returns {@code true} if this filter is enabled for all bit indexes 112 * identified by the {@code IndexProducer}.</p> 113 * 114 * @param indexProducer the IndexProducer to provide the indexes 115 * @return {@code true} if this filter is enabled for all bits specified by the IndexProducer 116 */ 117 boolean contains(IndexProducer indexProducer); 118 119 /** 120 * Creates a new instance of the BloomFilter with the same properties as the current one. 121 * @return a copy of this BloomFilter 122 */ 123 BloomFilter copy(); 124 125 // update operations 126 127 /** 128 * Estimates the number of items in the intersection of this Bloom filter with the other bloom filter. 129 * 130 * <p>This method produces estimate is roughly equivalent to the number of unique Hashers that have been merged into both 131 * of the filters by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p> 132 * 133 * <p><em>{@code estimateIntersection} should only be called with Bloom filters of the same Shape. If called on Bloom 134 * filters of differing shape this method is not symmetric. If {@code other} has more bits an {@code IllegalArgumentException} 135 * may be thrown.</em></p> 136 * 137 * @param other The other Bloom filter 138 * @return an estimate of the number of items in the intersection. If the calculated estimate is larger than Integer.MAX_VALUE then MAX_VALUE is returned. 139 * @throws IllegalArgumentException if the estimated N for the union of the filters is infinite. 140 * @see #estimateN() 141 * @see Shape 142 */ 143 default int estimateIntersection(final BloomFilter other) { 144 Objects.requireNonNull(other, "other"); 145 final double eThis = getShape().estimateN(cardinality()); 146 final double eOther = getShape().estimateN(other.cardinality()); 147 if (Double.isInfinite(eThis) && Double.isInfinite(eOther)) { 148 // if both are infinite the union is infinite and we return Integer.MAX_VALUE 149 return Integer.MAX_VALUE; 150 } 151 long estimate; 152 // if one is infinite the intersection is the other. 153 if (Double.isInfinite(eThis)) { 154 estimate = Math.round(eOther); 155 } else if (Double.isInfinite(eOther)) { 156 estimate = Math.round(eThis); 157 } else { 158 final BloomFilter union = this.copy(); 159 union.merge(other); 160 final double eUnion = getShape().estimateN(union.cardinality()); 161 if (Double.isInfinite(eUnion)) { 162 throw new IllegalArgumentException("The estimated N for the union of the filters is infinite"); 163 } 164 // maximum estimate value using integer values is: 46144189292 thus 165 // eThis + eOther can not overflow the long value. 166 estimate = Math.round(eThis + eOther - eUnion); 167 estimate = estimate < 0 ? 0 : estimate; 168 } 169 return estimate>Integer.MAX_VALUE?Integer.MAX_VALUE:(int) estimate; 170 } 171 172 /** 173 * Estimates the number of items in the Bloom filter. 174 * 175 * <p>By default this is the rounding of the {@code Shape.estimateN(cardinality)} calculation for the 176 * shape and cardinality of this filter.</p> 177 * 178 * <p>This produces an estimate roughly equivalent to the number of Hashers that have been merged into the filter 179 * by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p> 180 * 181 * <p><em>Note:</em></p> 182 * <ul> 183 * <li>if cardinality == numberOfBits, then result is Integer.MAX_VALUE.</li> 184 * <li>if cardinality > numberOfBits, then an IllegalArgumentException is thrown.</li> 185 * </ul> 186 * 187 * @return an estimate of the number of items in the bloom filter. Will return Integer.MAX_VALUE if the 188 * estimate is larger than Integer.MAX_VALUE. 189 * @throws IllegalArgumentException if the cardinality is > numberOfBits as defined in Shape. 190 * @see Shape#estimateN(int) 191 * @see Shape 192 */ 193 default int estimateN() { 194 final double d = getShape().estimateN(cardinality()); 195 if (Double.isInfinite(d)) { 196 return Integer.MAX_VALUE; 197 } 198 if (Double.isNaN(d)) { 199 throw new IllegalArgumentException("Cardinality too large: " + cardinality()); 200 } 201 final long l = Math.round(d); 202 return l > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) l; 203 } 204 205 /** 206 * Estimates the number of items in the union of this Bloom filter with the other bloom filter. 207 * 208 * <p>This produces an estimate roughly equivalent to the number of unique Hashers that have been merged into either 209 * of the filters by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p> 210 * 211 * <p><em>{@code estimateUnion} should only be called with Bloom filters of the same Shape. If called on Bloom 212 * filters of differing shape this method is not symmetric. If {@code other} has more bits an {@code IllegalArgumentException} 213 * may be thrown.</em></p> 214 * 215 * @param other The other Bloom filter 216 * @return an estimate of the number of items in the union. Will return Integer.MAX_VALUE if the 217 * estimate is larger than Integer.MAX_VALUE. 218 * @see #estimateN() 219 * @see Shape 220 */ 221 default int estimateUnion(final BloomFilter other) { 222 Objects.requireNonNull(other, "other"); 223 final BloomFilter cpy = this.copy(); 224 cpy.merge(other); 225 return cpy.estimateN(); 226 } 227 228 /** 229 * Gets the shape that was used when the filter was built. 230 * @return The shape the filter was built with. 231 */ 232 Shape getShape(); 233 234 // Counting Operations 235 236 /** 237 * Determines if all the bits are off. This is equivalent to 238 * {@code cardinality() == 0}. 239 * 240 * <p> 241 * <em>Note: This method is optimised for non-sparse filters.</em> Implementers 242 * are encouraged to implement faster checks if possible. 243 * </p> 244 * 245 * @return {@code true} if no bits are enabled, {@code false} otherwise. 246 */ 247 default boolean isEmpty() { 248 return forEachBitMap(y -> y == 0); 249 } 250 251 /** 252 * Determines if the bloom filter is "full". 253 * 254 * <p>Full is defined as having no unset bits.</p> 255 * 256 * @return {@code true} if the filter is full, {@code false} otherwise. 257 */ 258 default boolean isFull() { 259 return cardinality() == getShape().getNumberOfBits(); 260 } 261 262 /** 263 * Merges the specified hasher into this Bloom filter. Specifically all 264 * bit indexes that are identified by the {@code producer} will be enabled in this filter. 265 * 266 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 267 * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes 268 * enabled in the {@code producer}.</em> This state may occur in complex Bloom filter implementations like 269 * counting Bloom filters.</p> 270 * 271 * @param bitMapProducer The producer to merge. 272 * @return true if the merge was successful 273 * @throws IllegalArgumentException if producer sends illegal value. 274 */ 275 boolean merge(BitMapProducer bitMapProducer); 276 277 /** 278 * Merges the specified Bloom filter into this Bloom filter. 279 * 280 * <p>Specifically all 281 * bit indexes that are identified by the {@code other} will be enabled in this filter.</p> 282 * 283 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 284 * enabled. A {@code false} result indicates that this filter may or may not contain 285 * the {@code other} Bloom filter.</em> This state may occur in complex Bloom filter implementations like 286 * counting Bloom filters.</p> 287 * 288 * @param other The bloom filter to merge into this one. 289 * @return true if the merge was successful 290 */ 291 default boolean merge(final BloomFilter other) { 292 return (characteristics() & SPARSE) != 0 ? merge((IndexProducer) other) : merge((BitMapProducer) other); 293 } 294 295 /** 296 * Merges the specified hasher into this Bloom filter. Specifically all 297 * bit indexes that are identified by the {@code hasher} will be enabled in this filter. 298 * 299 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 300 * enabled. A {@code false} result indicates that this filter may or may not contain 301 * the {@code hasher} values.</em> This state may occur in complex Bloom filter implementations like 302 * counting Bloom filters.</p> 303 * 304 * @param hasher The hasher to merge. 305 * @return true if the merge was successful 306 * @throws IllegalArgumentException if hasher produces an illegal value. 307 */ 308 default boolean merge(final Hasher hasher) { 309 Objects.requireNonNull(hasher, "hasher"); 310 return merge(hasher.indices(getShape())); 311 } 312 313 /** 314 * Merges the specified IndexProducer into this Bloom filter. Specifically all 315 * bit indexes that are identified by the {@code producer} will be enabled in this filter. 316 * 317 * <p><em>Note: This method should return {@code true} even if no additional bit indexes were 318 * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of 319 * the {@code producer}.</em> This state may occur in complex Bloom filter implementations like 320 * counting Bloom filters.</p> 321 * 322 * @param indexProducer The IndexProducer to merge. 323 * @return true if the merge was successful 324 * @throws IllegalArgumentException if producer sends illegal value. 325 */ 326 boolean merge(IndexProducer indexProducer); 327 328 /** 329 * Most Bloom filters create unique IndexProducers. 330 */ 331 @Override 332 default IndexProducer uniqueIndices() { 333 return this; 334 } 335 }