001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Arrays;
020import java.util.NoSuchElementException;
021import java.util.Objects;
022import java.util.function.IntPredicate;
023import java.util.function.LongPredicate;
024import java.util.function.Predicate;
025
026/**
027 * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
028 * International Conference on Advanced Computer Theory and Engineering (ICACTE 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
029 * 978-1-4244-6539-2, S2CID 3108985
030 * <p>
031 * In short, Layered Bloom filter contains several bloom filters arranged in layers.
032 * </p>
033 * <ul>
034 * <li>When membership in the filter is checked each layer in turn is checked and if a match is found {@code true} is returned.</li>
035 * <li>When merging each bloom filter is merged into the newest filter in the list of layers.</li>
036 * <li>When questions of cardinality are asked the cardinality of the union of the enclosed Bloom filters is used.</li>
037 * </ul>
038 * <p>
039 * The net result is that the layered Bloom filter can be populated with more items than the Shape would indicate and yet still return a false positive rate in
040 * line with the Shape and not the over population.
041 * </p>
042 * <p>
043 * This implementation uses a LayerManager to handle the manipulation of the layers.
044 * </p>
045 * <ul>
046 * <li>Level 0 is the oldest layer and the highest level is the newest.</li>
047 * <li>There is always at least one enclosed filter.</li>
048 * <li>The newest filter is the {@code target} into which merges are performed.
049 * <li>Whenever the target is retrieved, or a {@code merge} operation is performed the code checks if any older layers should be removed, and if so removes
050 * them. It also checks it a new layer should be added, and if so adds it and sets the {@code target} before the operation.</li>
051 * </ul>
052 *
053 * @param <T> The type of Bloom Filter that is used for the layers.
054 * @since 4.5.0-M2
055 */
056public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter<LayeredBloomFilter<T>>, BloomFilterExtractor {
057
058    /**
059     * A class used to locate matching filters across all the layers.
060     */
061    private class Finder implements Predicate<BloomFilter> {
062        int[] result = new int[layerManager.getDepth()];
063        int bfIdx;
064        int resultIdx;
065        BloomFilter<?> bf;
066
067        Finder(final BloomFilter<?> bf) {
068            this.bf = bf;
069        }
070
071        int[] getResult() {
072            return Arrays.copyOf(result, resultIdx);
073        }
074
075        @Override
076        public boolean test(final BloomFilter x) {
077            if (x.contains(bf)) {
078                result[resultIdx++] = bfIdx;
079            }
080            bfIdx++;
081            return true;
082        }
083    }
084
085    private final Shape shape;
086
087    private final LayerManager<T> layerManager;
088
089    /**
090     * Constructs a new instance.
091     *
092     * @param shape        the Shape of the enclosed Bloom filters
093     * @param layerManager the LayerManager to manage the layers.
094     */
095    public LayeredBloomFilter(final Shape shape, final LayerManager<T> layerManager) {
096        this.shape = shape;
097        this.layerManager = layerManager;
098    }
099
100    @Override
101    public int cardinality() {
102        return SetOperations.cardinality(this);
103    }
104
105    @Override
106    public int characteristics() {
107        return 0;
108    }
109
110    /**
111     * Forces the execution of the cleanup Consumer that was provided when the associated LayerManager was built.
112     *
113     * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
114     */
115    public void cleanup() {
116        layerManager.cleanup();
117    }
118
119    @Override
120    public final void clear() {
121        layerManager.clear();
122    }
123
124    @Override
125    public boolean contains(final BitMapExtractor bitMapExtractor) {
126        return contains(createFilter(bitMapExtractor));
127    }
128
129    /**
130     * Returns {@code true} if this any layer contained by this filter contains the specified filter.
131     * <p>
132     * If the {@code other} is a BloomFilterExtractor each filter within the {@code other} is checked to see if it exits within this filter.
133     * </p>
134     *
135     * @param other the other Bloom filter
136     * @return {@code true} if this filter contains the other filter.
137     */
138    @Override
139    public boolean contains(final BloomFilter other) {
140        return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) : !processBloomFilters(x -> !x.contains(other));
141    }
142
143    /**
144     * Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within this filter.
145     *
146     * @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check for.
147     * @return {@code true} if this filter contains all of the filters contained in the {@code bloomFilterExtractor}.
148     */
149    public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
150        final boolean[] result = { true };
151        // return false when we have found a match to short circuit checks
152        return bloomFilterExtractor.processBloomFilters(x -> {
153            result[0] &= contains(x);
154            return result[0];
155        });
156    }
157
158    @Override
159    public boolean contains(final Hasher hasher) {
160        return contains(createFilter(hasher));
161    }
162
163    @Override
164    public boolean contains(final IndexExtractor indexExtractor) {
165        return contains(createFilter(indexExtractor));
166    }
167
168    /**
169     * Creates a new instance of this {@link LayeredBloomFilter} with the same properties as the current one.
170     *
171     * @return a copy of this {@link LayeredBloomFilter}.
172     */
173    @Override
174    public LayeredBloomFilter<T> copy() {
175        return new LayeredBloomFilter<>(shape, layerManager.copy());
176    }
177
178    /**
179     * Creates a Bloom filter from a BitMapExtractor.
180     *
181     * @param bitMapExtractor the BitMapExtractor to create the filter from.
182     * @return the BloomFilter.
183     */
184    private SimpleBloomFilter createFilter(final BitMapExtractor bitMapExtractor) {
185        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
186        bf.merge(bitMapExtractor);
187        return bf;
188    }
189
190    /**
191     * Creates a Bloom filter from a Hasher.
192     *
193     * @param hasher the hasher to create the filter from.
194     * @return the BloomFilter.
195     */
196    private SimpleBloomFilter createFilter(final Hasher hasher) {
197        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
198        bf.merge(hasher);
199        return bf;
200    }
201
202    /**
203     * Creates a Bloom filter from an IndexExtractor.
204     *
205     * @param indexExtractor the IndexExtractor to create the filter from.
206     * @return the BloomFilter.
207     */
208    private SimpleBloomFilter createFilter(final IndexExtractor indexExtractor) {
209        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
210        bf.merge(indexExtractor);
211        return bf;
212    }
213
214    @Override
215    public int estimateN() {
216        return flatten().estimateN();
217    }
218
219    @Override
220    public int estimateUnion(final BloomFilter other) {
221        Objects.requireNonNull(other, "other");
222        final BloomFilter cpy = this.flatten();
223        cpy.merge(other);
224        return cpy.estimateN();
225    }
226
227    /**
228     * Finds the layers in which the BitMapExtractor is found.
229     *
230     * @param bitMapExtractor the BitMapExtractor to search for.
231     * @return an array of layer indices in which the Bloom filter is found.
232     */
233    public int[] find(final BitMapExtractor bitMapExtractor) {
234        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
235        bf.merge(bitMapExtractor);
236        return find(bf);
237    }
238
239    /**
240     * Finds the layers in which the Bloom filter is found.
241     *
242     * @param bf the Bloom filter to search for.
243     * @return an array of layer indices in which the Bloom filter is found.
244     */
245    public int[] find(final BloomFilter bf) {
246        final Finder finder = new Finder(bf);
247        processBloomFilters(finder);
248        return finder.getResult();
249    }
250
251    /**
252     * Finds the layers in which the Hasher is found.
253     *
254     * @param hasher the Hasher to search for.
255     * @return an array of layer indices in which the Bloom filter is found.
256     */
257    public int[] find(final Hasher hasher) {
258        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
259        bf.merge(hasher);
260        return find(bf);
261    }
262
263    /**
264     * Finds the layers in which the IndexExtractor is found.
265     *
266     * @param indexExtractor the Index extractor to search for.
267     * @return an array of layer indices in which the Bloom filter is found.
268     */
269    public int[] find(final IndexExtractor indexExtractor) {
270        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
271        bf.merge(indexExtractor);
272        return find(bf);
273    }
274
275    /**
276     * Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
277     *
278     * @return the merged bloom filter.
279     */
280    @Override
281    public SimpleBloomFilter flatten() {
282        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
283        processBloomFilters(bf::merge);
284        return bf;
285    }
286
287    /**
288     * Gets the Bloom filter at the specified depth
289     *
290     * @param depth the depth of the filter to return.
291     * @return the Bloom filter at the specified depth.
292     * @throws NoSuchElementException if depth is not in the range [0,getDepth())
293     */
294    public T get(final int depth) {
295        return layerManager.get(depth);
296    }
297
298    /**
299     * Gets the depth of the deepest layer. The minimum value returned by this method is 1.
300     *
301     * @return the depth of the deepest layer.
302     */
303    public final int getDepth() {
304        return layerManager.getDepth();
305    }
306
307    @Override
308    public final Shape getShape() {
309        return shape;
310    }
311
312    @Override
313    public boolean isEmpty() {
314        return processBloomFilters(BloomFilter::isEmpty);
315    }
316
317    @Override
318    public boolean merge(final BitMapExtractor bitMapExtractor) {
319        return layerManager.getTarget().merge(bitMapExtractor);
320    }
321
322    @Override
323    public boolean merge(final BloomFilter bf) {
324        return layerManager.getTarget().merge(bf);
325    }
326
327    @Override
328    public boolean merge(final IndexExtractor indexExtractor) {
329        return layerManager.getTarget().merge(indexExtractor);
330    }
331
332    /**
333     * Forces and advance to the next layer. This method will clean-up the current layers and generate a new filter layer. In most cases is it unnecessary to
334     * call this method directly.
335     *
336     * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
337     * @see LayerManager.Builder#setExtendCheck(Predicate)
338     */
339    public void next() {
340        layerManager.next();
341    }
342
343    @Override
344    public boolean processBitMaps(final LongPredicate predicate) {
345        return flatten().processBitMaps(predicate);
346    }
347
348    /**
349     * Processes the Bloom filters in depth order with the most recent filters first. Each filter is passed to the predicate in turn. The function exits on the
350     * first {@code false} returned by the predicate.
351     *
352     * @param bloomFilterPredicate the predicate to execute.
353     * @return {@code true} if all filters passed the predicate, {@code false} otherwise.
354     */
355    @Override
356    public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
357        return layerManager.processBloomFilters(bloomFilterPredicate);
358    }
359
360    @Override
361    public boolean processIndices(final IntPredicate predicate) {
362        return processBloomFilters(bf -> bf.processIndices(predicate));
363    }
364
365}