001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Arrays;
020import java.util.NoSuchElementException;
021import java.util.Objects;
022import java.util.function.IntPredicate;
023import java.util.function.LongPredicate;
024import java.util.function.Predicate;
025
026/**
027 * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun
028 * (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
029 * International Conference on Advanced Computer Theory and Engineering (ICACTE
030 * 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
031 * 978-1-4244-6539-2, S2CID 3108985
032 * <p>
033 * In short, Layered Bloom filter contains several bloom filters arranged in
034 * layers.
035 * </p>
036 * <ul>
037 * <li>When membership in the filter is checked each layer in turn is checked
038 * and if a match is found {@code true} is returned.</li>
039 * <li>When merging each bloom filter is merged into the newest filter in the
040 * list of layers.</li>
041 * <li>When questions of cardinality are asked the cardinality of the union of
042 * the enclosed Bloom filters is used.</li>
043 * </ul>
044 * <p>
045 * The net result is that the layered Bloom filter can be populated with more
046 * items than the Shape would indicate and yet still return a false positive
047 * rate in line with the Shape and not the over population.
048 * </p>
049 * <p>
050 * This implementation uses a LayerManager to handle the manipulation of the
051 * layers.
052 * </p>
053 * <ul>
054 * <li>Level 0 is the oldest layer and the highest level is the newest.</li>
055 * <li>There is always at least one enclosed filter.</li>
056 * <li>The newest filter is the {@code target} into which merges are performed.
057 * <li>Whenever the target is retrieved, or a {@code merge} operation is
058 * performed the code checks if any older layers should be removed, and if so
059 * removes them. It also checks it a new layer should be added, and if so adds
060 * it and sets the {@code target} before the operation.</li>
061 * </ul>
062 * @param <T> The type of Bloom Filter that is used for the layers.
063 * @since 4.5
064 */
065public class LayeredBloomFilter<T extends BloomFilter> implements BloomFilter, BloomFilterExtractor {
066    /**
067     * A class used to locate matching filters across all the layers.
068     */
069    private class Finder implements Predicate<BloomFilter> {
070        int[] result = new int[layerManager.getDepth()];
071        int bfIdx;
072        int resultIdx;
073        BloomFilter bf;
074
075        Finder(final BloomFilter bf) {
076            this.bf = bf;
077        }
078
079        int[] getResult() {
080            return Arrays.copyOf(result, resultIdx);
081        }
082
083        @Override
084        public boolean test(final BloomFilter x) {
085            if (x.contains(bf)) {
086                result[resultIdx++] = bfIdx;
087            }
088            bfIdx++;
089            return true;
090        }
091    }
092
093    private final Shape shape;
094
095    private final LayerManager<T> layerManager;
096
097    /**
098     * Constructor.
099     *
100     * @param shape        the Shape of the enclosed Bloom filters
101     * @param layerManager the LayerManager to manage the layers.
102     */
103    public LayeredBloomFilter(final Shape shape, final LayerManager<T> layerManager) {
104        this.shape = shape;
105        this.layerManager = layerManager;
106    }
107
108    @Override
109    public int cardinality() {
110        return SetOperations.cardinality(this);
111    }
112
113    @Override
114    public int characteristics() {
115        return 0;
116    }
117
118    /**
119     * Forces the execution of the cleanup Consumer that was provided when the associated LayerManager
120     * was built.
121     *
122     * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
123     */
124    public void cleanup() {
125        layerManager.cleanup();
126    }
127
128    @Override
129    public final void clear() {
130        layerManager.clear();
131    }
132
133    @Override
134    public boolean contains(final BitMapExtractor bitMapExtractor) {
135        return contains(createFilter(bitMapExtractor));
136    }
137
138    /**
139     * Returns {@code true} if this any layer contained by this filter contains the
140     * specified filter.
141     * <p>
142     * If the {@code other} is a BloomFilterExtractor each filter within the
143     * {@code other} is checked to see if it exits within this filter.
144     * </p>
145     *
146     * @param other the other Bloom filter
147     * @return {@code true} if this filter contains the other filter.
148     */
149    @Override
150    public boolean contains(final BloomFilter other) {
151        return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other)
152                : !processBloomFilters(x -> !x.contains(other));
153    }
154
155    /**
156     * Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within
157     * this filter.
158     *
159     * @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check
160     *                 for.
161     * @return {@code true} if this filter contains all of the filters contained in
162     *         the {@code bloomFilterExtractor}.
163     */
164    public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
165        final boolean[] result = { true };
166        // return false when we have found a match to short circuit checks
167        return bloomFilterExtractor.processBloomFilters(x -> {
168            result[0] &= contains(x);
169            return result[0];
170        });
171    }
172
173    @Override
174    public boolean contains(final Hasher hasher) {
175        return contains(createFilter(hasher));
176    }
177
178    @Override
179    public boolean contains(final IndexExtractor indexExtractor) {
180        return contains(createFilter(indexExtractor));
181    }
182
183    @Override
184    public LayeredBloomFilter<T> copy() {
185        return new LayeredBloomFilter<>(shape, layerManager.copy());
186    }
187
188    /**
189     * Creates a Bloom filter from a BitMapExtractor.
190     *
191     * @param bitMapExtractor the BitMapExtractor to create the filter from.
192     * @return the BloomFilter.
193     */
194    private BloomFilter createFilter(final BitMapExtractor bitMapExtractor) {
195        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
196        bf.merge(bitMapExtractor);
197        return bf;
198    }
199
200    /**
201     * Creates a Bloom filter from a Hasher.
202     *
203     * @param hasher the hasher to create the filter from.
204     * @return the BloomFilter.
205     */
206    private BloomFilter createFilter(final Hasher hasher) {
207        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
208        bf.merge(hasher);
209        return bf;
210    }
211
212    /**
213     * Creates a Bloom filter from an IndexExtractor.
214     *
215     * @param indexExtractor the IndexExtractor to create the filter from.
216     * @return the BloomFilter.
217     */
218    private BloomFilter createFilter(final IndexExtractor indexExtractor) {
219        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
220        bf.merge(indexExtractor);
221        return bf;
222    }
223
224    @Override
225    public int estimateN() {
226        return flatten().estimateN();
227    }
228
229    @Override
230    public int estimateUnion(final BloomFilter other) {
231        Objects.requireNonNull(other, "other");
232        final BloomFilter cpy = this.flatten();
233        cpy.merge(other);
234        return cpy.estimateN();
235    }
236
237    /**
238     * Finds the layers in which the BitMapExtractor is found.
239     *
240     * @param bitMapExtractor the BitMapExtractor to search for.
241     * @return an array of layer indices in which the Bloom filter is found.
242     */
243    public int[] find(final BitMapExtractor bitMapExtractor) {
244        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
245        bf.merge(bitMapExtractor);
246        return find(bf);
247    }
248
249    /**
250     * Finds the layers in which the Bloom filter is found.
251     *
252     * @param bf the Bloom filter to search for.
253     * @return an array of layer indices in which the Bloom filter is found.
254     */
255    public int[] find(final BloomFilter bf) {
256        final Finder finder = new Finder(bf);
257        processBloomFilters(finder);
258        return finder.getResult();
259    }
260
261    /**
262     * Finds the layers in which the Hasher is found.
263     *
264     * @param hasher the Hasher to search for.
265     * @return an array of layer indices in which the Bloom filter is found.
266     */
267    public int[] find(final Hasher hasher) {
268        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
269        bf.merge(hasher);
270        return find(bf);
271    }
272
273    /**
274     * Finds the layers in which the IndexExtractor is found.
275     *
276     * @param indexExtractor the Index extractor to search for.
277     * @return an array of layer indices in which the Bloom filter is found.
278     */
279    public int[] find(final IndexExtractor indexExtractor) {
280        final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
281        bf.merge(indexExtractor);
282        return find(bf);
283    }
284
285    /**
286     * Create a standard (non-layered) Bloom filter by merging all of the layers. If
287     * the filter is empty this method will return an empty Bloom filter.
288     *
289     * @return the merged bloom filter.
290     */
291    @Override
292    public BloomFilter flatten() {
293        final BloomFilter bf = new SimpleBloomFilter(shape);
294        processBloomFilters(bf::merge);
295        return bf;
296    }
297
298    @Override
299    public boolean processBitMaps(final LongPredicate predicate) {
300        return flatten().processBitMaps(predicate);
301    }
302
303    /**
304     * Processes the Bloom filters in depth order with the most recent filters
305     * first. Each filter is passed to the predicate in turn. The function exits on
306     * the first {@code false} returned by the predicate.
307     *
308     * @param bloomFilterPredicate the predicate to execute.
309     * @return {@code true} if all filters passed the predicate, {@code false}
310     *         otherwise.
311     */
312    @Override
313    public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
314        return layerManager.processBloomFilters(bloomFilterPredicate);
315    }
316
317    @Override
318    public boolean processIndices(final IntPredicate predicate) {
319        return processBloomFilters(bf -> bf.processIndices(predicate));
320    }
321
322    /**
323     * Gets the Bloom filter at the specified depth
324     *
325     * @param depth the depth of the filter to return.
326     * @return the Bloom filter at the specified depth.
327     * @throws NoSuchElementException if depth is not in the range [0,getDepth())
328     */
329    public T get(final int depth) {
330        return layerManager.get(depth);
331    }
332
333    /**
334     * Gets the depth of the deepest layer. The minimum value returned by this
335     * method is 1.
336     *
337     * @return the depth of the deepest layer.
338     */
339    public final int getDepth() {
340        return layerManager.getDepth();
341    }
342
343    @Override
344    public final Shape getShape() {
345        return shape;
346    }
347
348    @Override
349    public boolean isEmpty() {
350        return processBloomFilters(BloomFilter::isEmpty);
351    }
352
353    @Override
354    public boolean merge(final BitMapExtractor bitMapExtractor) {
355        return layerManager.getTarget().merge(bitMapExtractor);
356    }
357
358    @Override
359    public boolean merge(final BloomFilter bf) {
360        return layerManager.getTarget().merge(bf);
361    }
362
363    @Override
364    public boolean merge(final IndexExtractor indexExtractor) {
365        return layerManager.getTarget().merge(indexExtractor);
366    }
367
368    /**
369     * Forces and advance to the next layer. This method will clean-up the current
370     * layers and generate a new filter layer. In most cases is it unnecessary to
371     * call this method directly.
372     *
373     * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
374     * @see LayerManager.Builder#setExtendCheck(Predicate)
375     */
376    public void next() {
377        layerManager.next();
378    }
379
380}