001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Arrays;
020import java.util.NoSuchElementException;
021import java.util.Objects;
022import java.util.function.IntPredicate;
023import java.util.function.LongPredicate;
024import java.util.function.Predicate;
025
026/**
027 * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun
028 * (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
029 * International Conference on Advanced Computer Theory and Engineering (ICACTE
030 * 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
031 * 978-1-4244-6539-2, S2CID 3108985
032 * <p>
033 * In short, Layered Bloom filter contains several bloom filters arranged in
034 * layers.
035 * </p>
036 * <ul>
037 * <li>When membership in the filter is checked each layer in turn is checked
038 * and if a match is found {@code true} is returned.</li>
039 * <li>When merging each bloom filter is merged into the newest filter in the
040 * list of layers.</li>
041 * <li>When questions of cardinality are asked the cardinality of the union of
042 * the enclosed Bloom filters is used.</li>
043 * </ul>
044 * <p>
045 * The net result is that the layered Bloom filter can be populated with more
046 * items than the Shape would indicate and yet still return a false positive
047 * rate in line with the Shape and not the over population.
048 * </p>
049 * <p>
050 * This implementation uses a LayerManager to handle the manipulation of the
051 * layers.
052 * </p>
053 * <ul>
054 * <li>Level 0 is the oldest layer and the highest level is the newest.</li>
055 * <li>There is always at least one enclosed filter.</li>
056 * <li>The newest filter is the {@code target} into which merges are performed.
057 * <li>Whenever the target is retrieved, or a {@code merge} operation is
058 * performed the code checks if any older layers should be removed, and if so
059 * removes them. It also checks it a new layer should be added, and if so adds
060 * it and sets the {@code target} before the operation.</li>
061 * </ul>
062 * @since 4.5
063 */
064public class LayeredBloomFilter implements BloomFilter, BloomFilterProducer {
065    /**
066     * A class used to locate matching filters across all the layers.
067     */
068    private class Finder implements Predicate<BloomFilter> {
069        int[] result = new int[layerManager.getDepth()];
070        int bfIdx;
071        int resultIdx;
072        BloomFilter bf;
073
074        Finder(BloomFilter bf) {
075            this.bf = bf;
076        }
077
078        int[] getResult() {
079            return Arrays.copyOf(result, resultIdx);
080        }
081
082        @Override
083        public boolean test(BloomFilter x) {
084            if (x.contains(bf)) {
085                result[resultIdx++] = bfIdx;
086            }
087            bfIdx++;
088            return true;
089        }
090    }
091    /**
092     * Creates a fixed size layered bloom filter that adds new filters to the list,
093     * but never merges them. List will never exceed maxDepth. As additional filters
094     * are added earlier filters are removed.
095     *
096     * @param shape    The shape for the enclosed Bloom filters.
097     * @param maxDepth The maximum depth of layers.
098     * @return An empty layered Bloom filter of the specified shape and depth.
099     */
100    public static LayeredBloomFilter fixed(final Shape shape, int maxDepth) {
101        LayerManager manager = LayerManager.builder().setExtendCheck(LayerManager.ExtendCheck.advanceOnPopulated())
102                .setCleanup(LayerManager.Cleanup.onMaxSize(maxDepth)).setSupplier(() -> new SimpleBloomFilter(shape)).build();
103        return new LayeredBloomFilter(shape, manager);
104    }
105
106    private final Shape shape;
107
108    private LayerManager layerManager;
109
110    /**
111     * Constructor.
112     *
113     * @param shape        the Shape of the enclosed Bloom filters
114     * @param layerManager the LayerManager to manage the layers.
115     */
116    public LayeredBloomFilter(Shape shape, LayerManager layerManager) {
117        this.shape = shape;
118        this.layerManager = layerManager;
119    }
120
121    @Override
122    public int cardinality() {
123        return SetOperations.cardinality(this);
124    }
125
126    @Override
127    public int characteristics() {
128        return 0;
129    }
130
131    @Override
132    public final void clear() {
133        layerManager.clear();
134    }
135
136    @Override
137    public boolean contains(final BitMapProducer bitMapProducer) {
138        return contains(createFilter(bitMapProducer));
139    }
140
141    /**
142     * Returns {@code true} if this any layer contained by this filter contains the
143     * specified filter.
144     * <p>
145     * If the {@code other} is a BloomFilterProducer each filter within the
146     * {@code other} is checked to see if it exits within this filter.
147     * </p>
148     *
149     * @param other the other Bloom filter
150     * @return {@code true} if this filter contains the other filter.
151     */
152    @Override
153    public boolean contains(final BloomFilter other) {
154        return other instanceof BloomFilterProducer ? contains((BloomFilterProducer) other)
155                : !forEachBloomFilter(x -> !x.contains(other));
156    }
157
158    /**
159     * Returns {@code true} if each filter within the {@code producer} exits within
160     * this filter.
161     *
162     * @param producer the BloomFilterProducer that provides the filters to check
163     *                 for.
164     * @return {@code true} if this filter contains all of the filters contained in
165     *         the {@code producer}.
166     */
167    public boolean contains(final BloomFilterProducer producer) {
168        boolean[] result = { true };
169        // return false when we have found a match to short circuit checks
170        return producer.forEachBloomFilter(x -> {
171            result[0] &= contains(x);
172            return result[0];
173        });
174    }
175
176    @Override
177    public boolean contains(final Hasher hasher) {
178        return contains(createFilter(hasher));
179    }
180
181    @Override
182    public boolean contains(IndexProducer indexProducer) {
183        return contains(createFilter(indexProducer));
184    }
185
186    @Override
187    public LayeredBloomFilter copy() {
188        return new LayeredBloomFilter(shape, layerManager.copy());
189    }
190
191    /**
192     * Creates a Bloom filter from a BitMapProducer.
193     *
194     * @param bitMapProducer the BitMapProducer to create the filter from.
195     * @return the BloomFilter.
196     */
197    private BloomFilter createFilter(final BitMapProducer bitMapProducer) {
198        SimpleBloomFilter bf = new SimpleBloomFilter(shape);
199        bf.merge(bitMapProducer);
200        return bf;
201    }
202
203    /**
204     * Creates a Bloom filter from a Hasher.
205     *
206     * @param hasher the hasher to create the filter from.
207     * @return the BloomFilter.
208     */
209    private BloomFilter createFilter(final Hasher hasher) {
210        SimpleBloomFilter bf = new SimpleBloomFilter(shape);
211        bf.merge(hasher);
212        return bf;
213    }
214
215    /**
216     * Creates a Bloom filter from an IndexProducer.
217     *
218     * @param indexProducer the IndexProducer to create the filter from.
219     * @return the BloomFilter.
220     */
221    private BloomFilter createFilter(final IndexProducer indexProducer) {
222        SimpleBloomFilter bf = new SimpleBloomFilter(shape);
223        bf.merge(indexProducer);
224        return bf;
225    }
226
227    @Override
228    public int estimateN() {
229        return flatten().estimateN();
230    }
231
232    @Override
233    public int estimateUnion(final BloomFilter other) {
234        Objects.requireNonNull(other, "other");
235        final BloomFilter cpy = this.flatten();
236        cpy.merge(other);
237        return cpy.estimateN();
238    }
239
240    /**
241     * Finds the layers in which the BitMapProducer is found.
242     *
243     * @param bitMapProducer the BitMapProducer to search for.
244     * @return an array of layer indices in which the Bloom filter is found.
245     */
246    public int[] find(final BitMapProducer bitMapProducer) {
247        SimpleBloomFilter bf = new SimpleBloomFilter(shape);
248        bf.merge(bitMapProducer);
249        return find(bf);
250    }
251
252    /**
253     * Finds the layers in which the Bloom filter is found.
254     *
255     * @param bf the Bloom filter to search for.
256     * @return an array of layer indices in which the Bloom filter is found.
257     */
258    public int[] find(BloomFilter bf) {
259        Finder finder = new Finder(bf);
260        forEachBloomFilter(finder);
261        return finder.getResult();
262    }
263
264    /**
265     * Finds the layers in which the Hasher is found.
266     *
267     * @param hasher the Hasher to search for.
268     * @return an array of layer indices in which the Bloom filter is found.
269     */
270    public int[] find(final Hasher hasher) {
271        SimpleBloomFilter bf = new SimpleBloomFilter(shape);
272        bf.merge(hasher);
273        return find(bf);
274    }
275
276    /**
277     * Finds the layers in which the IndexProducer is found.
278     *
279     * @param indexProducer the Index producer to search for.
280     * @return an array of layer indices in which the Bloom filter is found.
281     */
282    public int[] find(final IndexProducer indexProducer) {
283        SimpleBloomFilter bf = new SimpleBloomFilter(shape);
284        bf.merge(indexProducer);
285        return find(bf);
286    }
287
288    /**
289     * Create a standard (non-layered) Bloom filter by merging all of the layers. If
290     * the filter is empty this method will return an empty Bloom filter.
291     *
292     * @return the merged bloom filter.
293     */
294    @Override
295    public BloomFilter flatten() {
296        BloomFilter bf = new SimpleBloomFilter(shape);
297        forEachBloomFilter(bf::merge);
298        return bf;
299    }
300
301    @Override
302    public boolean forEachBitMap(LongPredicate predicate) {
303        return flatten().forEachBitMap(predicate);
304    }
305
306    /**
307     * Processes the Bloom filters in depth order with the most recent filters
308     * first. Each filter is passed to the predicate in turn. The function exits on
309     * the first {@code false} returned by the predicate.
310     *
311     * @param bloomFilterPredicate the predicate to execute.
312     * @return {@code true} if all filters passed the predicate, {@code false}
313     *         otherwise.
314     */
315    @Override
316    public final boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
317        return layerManager.forEachBloomFilter(bloomFilterPredicate);
318    }
319
320    @Override
321    public boolean forEachIndex(IntPredicate predicate) {
322        return forEachBloomFilter(bf -> bf.forEachIndex(predicate));
323    }
324
325    /**
326     * Gets the Bloom filter at the specified depth
327     *
328     * @param depth the depth of the filter to return.
329     * @return the Bloom filter at the specified depth.
330     * @throws NoSuchElementException if depth is not in the range [0,getDepth())
331     */
332    public BloomFilter get(int depth) {
333        return layerManager.get(depth);
334    }
335
336    /**
337     * Gets the depth of the deepest layer. The minimum value returned by this
338     * method is 1.
339     *
340     * @return the depth of the deepest layer.
341     */
342    public final int getDepth() {
343        return layerManager.getDepth();
344    }
345
346    @Override
347    public final Shape getShape() {
348        return shape;
349    }
350
351    @Override
352    public boolean isEmpty() {
353        return forEachBloomFilter(BloomFilter::isEmpty);
354    }
355
356    @Override
357    public boolean merge(BitMapProducer bitMapProducer) {
358        return layerManager.getTarget().merge(bitMapProducer);
359    }
360
361    @Override
362    public boolean merge(BloomFilter bf) {
363        return layerManager.getTarget().merge(bf);
364    }
365
366    @Override
367    public boolean merge(IndexProducer indexProducer) {
368        return layerManager.getTarget().merge(indexProducer);
369    }
370
371    /**
372     * Forces and advance to the next layer. Executes the same logic as when
373     * LayerManager.extendCheck returns {@code true}
374     *
375     * @see LayerManager
376     */
377    public void next() {
378        layerManager.next();
379    }
380}