View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.Arrays;
20  import java.util.NoSuchElementException;
21  import java.util.Objects;
22  import java.util.function.IntPredicate;
23  import java.util.function.LongPredicate;
24  import java.util.function.Predicate;
25  
26  /**
27   * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
28   * International Conference on Advanced Computer Theory and Engineering (ICACTE 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
29   * 978-1-4244-6539-2, S2CID 3108985
30   * <p>
31   * In short, Layered Bloom filter contains several bloom filters arranged in layers.
32   * </p>
33   * <ul>
34   * <li>When membership in the filter is checked each layer in turn is checked and if a match is found {@code true} is returned.</li>
35   * <li>When merging each bloom filter is merged into the newest filter in the list of layers.</li>
36   * <li>When questions of cardinality are asked the cardinality of the union of the enclosed Bloom filters is used.</li>
37   * </ul>
38   * <p>
39   * The net result is that the layered Bloom filter can be populated with more items than the Shape would indicate and yet still return a false positive rate in
40   * line with the Shape and not the over population.
41   * </p>
42   * <p>
43   * This implementation uses a LayerManager to handle the manipulation of the layers.
44   * </p>
45   * <ul>
46   * <li>Level 0 is the oldest layer and the highest level is the newest.</li>
47   * <li>There is always at least one enclosed filter.</li>
48   * <li>The newest filter is the {@code target} into which merges are performed.
49   * <li>Whenever the target is retrieved, or a {@code merge} operation is performed the code checks if any older layers should be removed, and if so removes
50   * them. It also checks it a new layer should be added, and if so adds it and sets the {@code target} before the operation.</li>
51   * </ul>
52   *
53   * @param <T> The type of Bloom Filter that is used for the layers.
54   * @since 4.5.0-M2
55   */
56  public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter<LayeredBloomFilter<T>>, BloomFilterExtractor {
57  
58      /**
59       * A class used to locate matching filters across all the layers.
60       */
61      private class Finder implements Predicate<BloomFilter> {
62          int[] result = new int[layerManager.getDepth()];
63          int bfIdx;
64          int resultIdx;
65          BloomFilter<?> bf;
66  
67          Finder(final BloomFilter<?> bf) {
68              this.bf = bf;
69          }
70  
71          int[] getResult() {
72              return Arrays.copyOf(result, resultIdx);
73          }
74  
75          @Override
76          public boolean test(final BloomFilter x) {
77              if (x.contains(bf)) {
78                  result[resultIdx++] = bfIdx;
79              }
80              bfIdx++;
81              return true;
82          }
83      }
84  
85      private final Shape shape;
86  
87      private final LayerManager<T> layerManager;
88  
89      /**
90       * Constructs a new instance.
91       *
92       * @param shape        the Shape of the enclosed Bloom filters
93       * @param layerManager the LayerManager to manage the layers.
94       */
95      public LayeredBloomFilter(final Shape shape, final LayerManager<T> layerManager) {
96          this.shape = shape;
97          this.layerManager = layerManager;
98      }
99  
100     @Override
101     public int cardinality() {
102         return SetOperations.cardinality(this);
103     }
104 
105     @Override
106     public int characteristics() {
107         return 0;
108     }
109 
110     /**
111      * Forces the execution of the cleanup Consumer that was provided when the associated LayerManager was built.
112      *
113      * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
114      */
115     public void cleanup() {
116         layerManager.cleanup();
117     }
118 
119     @Override
120     public final void clear() {
121         layerManager.clear();
122     }
123 
124     @Override
125     public boolean contains(final BitMapExtractor bitMapExtractor) {
126         return contains(createFilter(bitMapExtractor));
127     }
128 
129     /**
130      * Returns {@code true} if this any layer contained by this filter contains the specified filter.
131      * <p>
132      * If the {@code other} is a BloomFilterExtractor each filter within the {@code other} is checked to see if it exits within this filter.
133      * </p>
134      *
135      * @param other the other Bloom filter
136      * @return {@code true} if this filter contains the other filter.
137      */
138     @Override
139     public boolean contains(final BloomFilter other) {
140         return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) : !processBloomFilters(x -> !x.contains(other));
141     }
142 
143     /**
144      * Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within this filter.
145      *
146      * @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check for.
147      * @return {@code true} if this filter contains all of the filters contained in the {@code bloomFilterExtractor}.
148      */
149     public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
150         final boolean[] result = { true };
151         // return false when we have found a match to short circuit checks
152         return bloomFilterExtractor.processBloomFilters(x -> {
153             result[0] &= contains(x);
154             return result[0];
155         });
156     }
157 
158     @Override
159     public boolean contains(final Hasher hasher) {
160         return contains(createFilter(hasher));
161     }
162 
163     @Override
164     public boolean contains(final IndexExtractor indexExtractor) {
165         return contains(createFilter(indexExtractor));
166     }
167 
168     /**
169      * Creates a new instance of this {@link LayeredBloomFilter} with the same properties as the current one.
170      *
171      * @return a copy of this {@link LayeredBloomFilter}.
172      */
173     @Override
174     public LayeredBloomFilter<T> copy() {
175         return new LayeredBloomFilter<>(shape, layerManager.copy());
176     }
177 
178     /**
179      * Creates a Bloom filter from a BitMapExtractor.
180      *
181      * @param bitMapExtractor the BitMapExtractor to create the filter from.
182      * @return the BloomFilter.
183      */
184     private SimpleBloomFilter createFilter(final BitMapExtractor bitMapExtractor) {
185         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
186         bf.merge(bitMapExtractor);
187         return bf;
188     }
189 
190     /**
191      * Creates a Bloom filter from a Hasher.
192      *
193      * @param hasher the hasher to create the filter from.
194      * @return the BloomFilter.
195      */
196     private SimpleBloomFilter createFilter(final Hasher hasher) {
197         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
198         bf.merge(hasher);
199         return bf;
200     }
201 
202     /**
203      * Creates a Bloom filter from an IndexExtractor.
204      *
205      * @param indexExtractor the IndexExtractor to create the filter from.
206      * @return the BloomFilter.
207      */
208     private SimpleBloomFilter createFilter(final IndexExtractor indexExtractor) {
209         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
210         bf.merge(indexExtractor);
211         return bf;
212     }
213 
214     @Override
215     public int estimateN() {
216         return flatten().estimateN();
217     }
218 
219     @Override
220     public int estimateUnion(final BloomFilter other) {
221         Objects.requireNonNull(other, "other");
222         final BloomFilter cpy = this.flatten();
223         cpy.merge(other);
224         return cpy.estimateN();
225     }
226 
227     /**
228      * Finds the layers in which the BitMapExtractor is found.
229      *
230      * @param bitMapExtractor the BitMapExtractor to search for.
231      * @return an array of layer indices in which the Bloom filter is found.
232      */
233     public int[] find(final BitMapExtractor bitMapExtractor) {
234         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
235         bf.merge(bitMapExtractor);
236         return find(bf);
237     }
238 
239     /**
240      * Finds the layers in which the Bloom filter is found.
241      *
242      * @param bf the Bloom filter to search for.
243      * @return an array of layer indices in which the Bloom filter is found.
244      */
245     public int[] find(final BloomFilter bf) {
246         final Finder finder = new Finder(bf);
247         processBloomFilters(finder);
248         return finder.getResult();
249     }
250 
251     /**
252      * Finds the layers in which the Hasher is found.
253      *
254      * @param hasher the Hasher to search for.
255      * @return an array of layer indices in which the Bloom filter is found.
256      */
257     public int[] find(final Hasher hasher) {
258         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
259         bf.merge(hasher);
260         return find(bf);
261     }
262 
263     /**
264      * Finds the layers in which the IndexExtractor is found.
265      *
266      * @param indexExtractor the Index extractor to search for.
267      * @return an array of layer indices in which the Bloom filter is found.
268      */
269     public int[] find(final IndexExtractor indexExtractor) {
270         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
271         bf.merge(indexExtractor);
272         return find(bf);
273     }
274 
275     /**
276      * Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
277      *
278      * @return the merged bloom filter.
279      */
280     @Override
281     public SimpleBloomFilter flatten() {
282         final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
283         processBloomFilters(bf::merge);
284         return bf;
285     }
286 
287     /**
288      * Gets the Bloom filter at the specified depth
289      *
290      * @param depth the depth of the filter to return.
291      * @return the Bloom filter at the specified depth.
292      * @throws NoSuchElementException if depth is not in the range [0,getDepth())
293      */
294     public T get(final int depth) {
295         return layerManager.get(depth);
296     }
297 
298     /**
299      * Gets the depth of the deepest layer. The minimum value returned by this method is 1.
300      *
301      * @return the depth of the deepest layer.
302      */
303     public final int getDepth() {
304         return layerManager.getDepth();
305     }
306 
307     @Override
308     public final Shape getShape() {
309         return shape;
310     }
311 
312     @Override
313     public boolean isEmpty() {
314         return processBloomFilters(BloomFilter::isEmpty);
315     }
316 
317     @Override
318     public boolean merge(final BitMapExtractor bitMapExtractor) {
319         return layerManager.getTarget().merge(bitMapExtractor);
320     }
321 
322     @Override
323     public boolean merge(final BloomFilter bf) {
324         return layerManager.getTarget().merge(bf);
325     }
326 
327     @Override
328     public boolean merge(final IndexExtractor indexExtractor) {
329         return layerManager.getTarget().merge(indexExtractor);
330     }
331 
332     /**
333      * Forces and advance to the next layer. This method will clean-up the current layers and generate a new filter layer. In most cases is it unnecessary to
334      * call this method directly.
335      *
336      * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
337      * @see LayerManager.Builder#setExtendCheck(Predicate)
338      */
339     public void next() {
340         layerManager.next();
341     }
342 
343     @Override
344     public boolean processBitMaps(final LongPredicate predicate) {
345         return flatten().processBitMaps(predicate);
346     }
347 
348     /**
349      * Processes the Bloom filters in depth order with the most recent filters first. Each filter is passed to the predicate in turn. The function exits on the
350      * first {@code false} returned by the predicate.
351      *
352      * @param bloomFilterPredicate the predicate to execute.
353      * @return {@code true} if all filters passed the predicate, {@code false} otherwise.
354      */
355     @Override
356     public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
357         return layerManager.processBloomFilters(bloomFilterPredicate);
358     }
359 
360     @Override
361     public boolean processIndices(final IntPredicate predicate) {
362         return processBloomFilters(bf -> bf.processIndices(predicate));
363     }
364 
365 }