001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Objects;
020
021/**
022 * The interface that describes a Bloom filter.
023 * <p>
024 * <em>See implementation notes for {@link BitMapExtractor} and {@link IndexExtractor}.</em>
025 * </p>
026 *
027 * @param <T> The BloomFilter type.
028 * @see BitMapExtractor
029 * @see IndexExtractor
030 * @since 4.5.0-M1
031 */
032public interface BloomFilter<T extends BloomFilter<T>> extends IndexExtractor, BitMapExtractor {
033
034    /**
035     * The sparse characteristic used to determine the best method for matching: {@value}.
036     * <p>
037     * For `sparse` implementations the {@code forEachIndex(IntConsumer consumer)} method is more efficient. For non `sparse` implementations the
038     * {@code forEachBitMap(LongConsumer consumer)} is more efficient. Implementers should determine if it is easier.
039     * </p>
040     */
041    int SPARSE = 0x1;
042
043    /**
044     * Gets the cardinality (number of enabled bits) of this Bloom filter.
045     *
046     * <p>This is also known as the Hamming value or Hamming number.</p>
047     *
048     * @return the cardinality of this filter
049     */
050    int cardinality();
051
052    // Query Operations
053
054    /**
055     * Gets the characteristics of the filter.
056     * <p>
057     * Characteristics are defined as bits within the characteristics integer.
058     * </p>
059     *
060     * @return the characteristics for this bloom filter.
061     */
062    int characteristics();
063
064    /**
065     * Clears the filter to by resetting it to its initial, unpopulated state.
066     */
067    void clear();
068
069    /**
070     * Returns {@code true} if this filter contains the bits specified in the bit maps produced by the
071     * bitMapExtractor.
072     *
073     * @param bitMapExtractor the {@code BitMapExtractor} to provide the bit maps.
074     * @return {@code true} if this filter is enabled for all bits specified by the bit maps
075     */
076    default boolean contains(final BitMapExtractor bitMapExtractor) {
077        return processBitMapPairs(bitMapExtractor, (x, y) -> (x & y) == y);
078    }
079
080    /**
081     * Returns {@code true} if this filter contains the specified filter.
082     *
083     * <p>Specifically this
084     * returns {@code true} if this filter is enabled for all bits that are enabled in the
085     * {@code other} filter. Using the bit representations this is
086     * effectively {@code (this AND other) == other}.</p>
087     *
088     * @param other the other Bloom filter
089     * @return true if all enabled bits in the other filter are enabled in this filter.
090     */
091    default boolean contains(final BloomFilter<?> other) {
092        Objects.requireNonNull(other, "other");
093        return (characteristics() & SPARSE) != 0 ? contains((IndexExtractor) other) : contains((BitMapExtractor) other);
094    }
095
096    /**
097     * Returns {@code true} if this filter contains the bits specified in the hasher.
098     *
099     * <p>Specifically this returns {@code true} if this filter is enabled for all bit indexes
100     * identified by the {@code hasher}. Using the bit map representations this is
101     * effectively {@code (this AND hasher) == hasher}.</p>
102     *
103     * @param hasher the hasher to provide the indexes
104     * @return true if this filter is enabled for all bits specified by the hasher
105     */
106    default boolean contains(final Hasher hasher) {
107        Objects.requireNonNull(hasher, "Hasher");
108        final Shape shape = getShape();
109        return contains(hasher.indices(shape));
110    }
111
112    /**
113     * Returns {@code true} if this filter contains the indices specified IndexExtractor.
114     *
115     * <p>Specifically this returns {@code true} if this filter is enabled for all bit indexes
116     * identified by the {@code IndexExtractor}.</p>
117     *
118     * @param indexExtractor the IndexExtractor to provide the indexes
119     * @return {@code true} if this filter is enabled for all bits specified by the IndexExtractor
120     */
121    boolean contains(IndexExtractor indexExtractor);
122
123    /**
124     * Creates a new instance of this {@link BloomFilter} with the same properties as the current one.
125     *
126     * @return a copy of this {@link BloomFilter}.
127     */
128    T copy();
129
130    // update operations
131
132    /**
133     * Estimates the number of items in the intersection of this Bloom filter with the other bloom filter.
134     *
135     * <p>This method produces estimate is roughly equivalent to the number of unique Hashers that have been merged into both
136     * of the filters by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p>
137     *
138     * <p><em>{@code estimateIntersection} should only be called with Bloom filters of the same Shape.  If called on Bloom
139     * filters of differing shape this method is not symmetric. If {@code other} has more bits an {@code IllegalArgumentException}
140     * may be thrown.</em></p>
141     *
142     * @param other The other Bloom filter
143     * @return an estimate of the number of items in the intersection. If the calculated estimate is larger than Integer.MAX_VALUE then MAX_VALUE is returned.
144     * @throws IllegalArgumentException if the estimated N for the union of the filters is infinite.
145     * @see #estimateN()
146     * @see Shape
147     */
148    default int estimateIntersection(final BloomFilter<?> other) {
149        Objects.requireNonNull(other, "other");
150        final double eThis = getShape().estimateN(cardinality());
151        final double eOther = getShape().estimateN(other.cardinality());
152        if (Double.isInfinite(eThis) && Double.isInfinite(eOther)) {
153            // if both are infinite the union is infinite and we return Integer.MAX_VALUE
154            return Integer.MAX_VALUE;
155        }
156        long estimate;
157        // if one is infinite the intersection is the other.
158        if (Double.isInfinite(eThis)) {
159            estimate = Math.round(eOther);
160        } else if (Double.isInfinite(eOther)) {
161            estimate = Math.round(eThis);
162        } else {
163            final T union = this.copy();
164            union.merge(other);
165            final double eUnion = getShape().estimateN(union.cardinality());
166            if (Double.isInfinite(eUnion)) {
167                throw new IllegalArgumentException("The estimated N for the union of the filters is infinite");
168            }
169            // maximum estimate value using integer values is: 46144189292 thus
170            // eThis + eOther cannot overflow the long value.
171            estimate = Math.round(eThis + eOther - eUnion);
172            estimate = estimate < 0 ? 0 : estimate;
173        }
174        return estimate > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) estimate;
175    }
176
177    /**
178     * Estimates the number of items in the Bloom filter.
179     *
180     * <p>By default this is the rounding of the {@code Shape.estimateN(cardinality)} calculation for the
181     * shape and cardinality of this filter.</p>
182     *
183     * <p>This produces an estimate roughly equivalent to the number of Hashers that have been merged into the filter
184     * by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p>
185     *
186     * <p><em>Note:</em></p>
187     * <ul>
188     * <li>if cardinality == numberOfBits, then result is Integer.MAX_VALUE.</li>
189     * <li>if cardinality &gt; numberOfBits, then an IllegalArgumentException is thrown.</li>
190     * </ul>
191     *
192     * @return an estimate of the number of items in the bloom filter.  Will return Integer.MAX_VALUE if the
193     * estimate is larger than Integer.MAX_VALUE.
194     * @throws IllegalArgumentException if the cardinality is &gt; numberOfBits as defined in Shape.
195     * @see Shape#estimateN(int)
196     * @see Shape
197     */
198    default int estimateN() {
199        final double d = getShape().estimateN(cardinality());
200        if (Double.isInfinite(d)) {
201            return Integer.MAX_VALUE;
202        }
203        if (Double.isNaN(d)) {
204            throw new IllegalArgumentException("Cardinality too large: " + cardinality());
205        }
206        final long l = Math.round(d);
207        return l > Integer.MAX_VALUE ? Integer.MAX_VALUE : (int) l;
208    }
209
210    /**
211     * Estimates the number of items in the union of this Bloom filter with the other bloom filter.
212     *
213     * <p>This produces an estimate roughly equivalent to the number of unique Hashers that have been merged into either
214     * of the filters by rounding the value from the calculation described in the {@link Shape} class Javadoc.</p>
215     *
216     * <p><em>{@code estimateUnion} should only be called with Bloom filters of the same Shape.  If called on Bloom
217     * filters of differing shape this method is not symmetric. If {@code other} has more bits an {@code IllegalArgumentException}
218     * may be thrown.</em></p>
219     *
220     * @param other The other Bloom filter
221     * @return an estimate of the number of items in the union.  Will return Integer.MAX_VALUE if the
222     * estimate is larger than Integer.MAX_VALUE.
223     * @see #estimateN()
224     * @see Shape
225     */
226    default int estimateUnion(final BloomFilter<?> other) {
227        Objects.requireNonNull(other, "other");
228        final T copy = this.copy();
229        copy.merge(other);
230        return copy.estimateN();
231    }
232
233    /**
234     * Gets the shape that was used when the filter was built.
235     * @return The shape the filter was built with.
236     */
237    Shape getShape();
238
239    // Counting Operations
240
241    /**
242     * Determines if all the bits are off. This is equivalent to
243     * {@code cardinality() == 0}.
244     *
245     * <p>
246     * <em>Note: This method is optimized for non-sparse filters.</em> Implementers
247     * are encouraged to implement faster checks if possible.
248     * </p>
249     *
250     * @return {@code true} if no bits are enabled, {@code false} otherwise.
251     */
252    default boolean isEmpty() {
253        return processBitMaps(y -> y == 0);
254    }
255
256    /**
257     * Determines if the bloom filter is "full".
258     *
259     * <p>Full is defined as having no unset bits.</p>
260     *
261     * @return {@code true} if the filter is full, {@code false} otherwise.
262     */
263    default boolean isFull() {
264        return cardinality() == getShape().getNumberOfBits();
265    }
266
267    /**
268     * Merges the specified hasher into this Bloom filter. Specifically all
269     * bit indexes that are identified by the {@code bitMapExtractor} will be enabled in this filter.
270     *
271     * <p><em>Note: This method should return {@code true} even if no additional bit indexes were
272     * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes
273     * enabled in the {@code bitMapExtractor}.</em>  This state may occur in complex Bloom filter implementations like
274     * counting Bloom filters.</p>
275     *
276     * @param bitMapExtractor The BitMapExtractor to merge.
277     * @return true if the merge was successful
278     * @throws IllegalArgumentException if bitMapExtractor sends illegal value.
279     */
280    boolean merge(BitMapExtractor bitMapExtractor);
281
282    /**
283     * Merges the specified Bloom filter into this Bloom filter.
284     *
285     * <p>Specifically all
286     * bit indexes that are identified by the {@code other} will be enabled in this filter.</p>
287     *
288     * <p><em>Note: This method should return {@code true} even if no additional bit indexes were
289     * enabled. A {@code false} result indicates that this filter may or may not contain
290     * the {@code other} Bloom filter.</em>  This state may occur in complex Bloom filter implementations like
291     * counting Bloom filters.</p>
292     *
293     * @param other The bloom filter to merge into this one.
294     * @return true if the merge was successful
295     */
296    default boolean merge(final BloomFilter<?> other) {
297        return (characteristics() & SPARSE) != 0 ? merge((IndexExtractor) other) : merge((BitMapExtractor) other);
298    }
299
300    /**
301     * Merges the specified hasher into this Bloom filter. Specifically all
302     * bit indexes that are identified by the {@code hasher} will be enabled in this filter.
303     *
304     * <p><em>Note: This method should return {@code true} even if no additional bit indexes were
305     * enabled. A {@code false} result indicates that this filter may or may not contain
306     * the {@code hasher} values.</em>  This state may occur in complex Bloom filter implementations like
307     * counting Bloom filters.</p>
308     *
309     * @param hasher The hasher to merge.
310     * @return true if the merge was successful
311     * @throws IllegalArgumentException if hasher produces an illegal value.
312     */
313    default boolean merge(final Hasher hasher) {
314        Objects.requireNonNull(hasher, "hasher");
315        return merge(hasher.indices(getShape()));
316    }
317
318    /**
319     * Merges the specified IndexExtractor into this Bloom filter. Specifically all
320     * bit indexes that are identified by the {@code indexExtractor} will be enabled in this filter.
321     *
322     * <p><em>Note: This method should return {@code true} even if no additional bit indexes were
323     * enabled. A {@code false} result indicates that this filter may or may not contain all the indexes of
324     * the {@code indexExtractor}.</em>  This state may occur in complex Bloom filter implementations like
325     * counting Bloom filters.</p>
326     *
327     * @param indexExtractor The IndexExtractor to merge.
328     * @return true if the merge was successful
329     * @throws IllegalArgumentException if indexExtractor sends illegal value.
330     */
331    boolean merge(IndexExtractor indexExtractor);
332
333    /**
334     * Most Bloom filters create unique IndexExtractors.
335     */
336    @Override
337    default IndexExtractor uniqueIndices() {
338        return this;
339    }
340}