View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.Deque;
20  import java.util.LinkedList;
21  import java.util.NoSuchElementException;
22  import java.util.Objects;
23  import java.util.function.Consumer;
24  import java.util.function.Predicate;
25  import java.util.function.Supplier;
26  
27  /**
28   * Implementation of the methods to manage the layers in a layered Bloom filter.
29   * <p>
30   * The manager comprises a list of Bloom filters that are managed based on
31   * various rules. The last filter in the list is known as the {@code target} and
32   * is the filter into which merges are performed. The Layered manager utilizes
33   * three methods to manage the list.
34   * </p>
35   * <ul>
36   * <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be
37   * created as the new target.</li>
38   * <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used
39   * as a new target.</li>
40   * <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any
41   * expired or out dated filters from the list.</li>
42   * </ul>
43   * <p>
44   * When extendCheck returns {@code true} the following steps are taken:
45   * </p>
46   * <ol>
47   * <li>{@code Cleanup} is called</li>
48   * <li>{@code FilterSuplier} is executed and the new filter added to the list as
49   * the {@code target} filter.</li>
50   * </ol>
51   *
52   *
53   * @param <T> the {@link BloomFilter} type.
54   * @since 4.5.0-M1
55   */
56  public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtractor {
57  
58      /**
59       * Builds new instances of {@link LayerManager}.
60       *
61       * @param <T> the {@link BloomFilter} type.
62       */
63      public static class Builder<T extends BloomFilter<T>> implements Supplier<LayerManager<T>> {
64  
65          private Predicate<LayerManager<T>> extendCheck;
66          private Supplier<T> supplier;
67          private Consumer<Deque<T>> cleanup;
68  
69          private Builder() {
70              extendCheck = ExtendCheck.neverAdvance();
71              cleanup = Cleanup.noCleanup();
72          }
73  
74          /**
75           * Builds the layer manager with the specified properties.
76           *
77           * @return a new LayerManager.
78           */
79          @Override
80          public LayerManager<T> get() {
81              return new LayerManager<>(supplier, extendCheck, cleanup, true);
82          }
83  
84          /**
85           * Sets the Consumer that cleans the list of Bloom filters.
86           *
87           * @param cleanup the Consumer that will modify the list of filters removing out
88           *                dated or stale filters.
89           * @return {@code this} instance.
90           */
91          public Builder<T> setCleanup(final Consumer<Deque<T>> cleanup) {
92              this.cleanup = cleanup;
93              return this;
94          }
95  
96          /**
97           * Sets the extendCheck predicate. When the predicate returns {@code true} a new
98           * target will be created.
99           *
100          * @param extendCheck The predicate to determine if a new target should be
101          *                    created.
102          * @return {@code this} instance.
103          */
104         public Builder<T> setExtendCheck(final Predicate<LayerManager<T>> extendCheck) {
105             this.extendCheck = extendCheck;
106             return this;
107         }
108 
109         /**
110          * Sets the supplier of Bloom filters. When extendCheck creates a new target,
111          * the supplier provides the instance of the Bloom filter.
112          *
113          * @param supplier The supplier of new Bloom filter instances.
114          * @return {@code this} instance.
115          */
116         public Builder<T> setSupplier(final Supplier<T> supplier) {
117             this.supplier = supplier;
118             return this;
119         }
120     }
121 
122     /**
123      * Static methods to create a Consumer of a List of BloomFilter perform
124      * tests on whether to reduce the collection of Bloom filters.
125      */
126     public static final class Cleanup {
127 
128         /**
129          * A Cleanup that never removes anything.
130          *
131          * @param <T> Type of BloomFilter.
132          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
133          */
134         public static <T extends BloomFilter<T>> Consumer<Deque<T>> noCleanup() {
135             return x -> {
136                 // empty
137             };
138         }
139 
140         /**
141          * Removes the earliest filters in the list when the the number of filters
142          * exceeds maxSize.
143          *
144          * @param <T> Type of BloomFilter.
145          * @param maxSize the maximum number of filters for the list. Must be greater
146          *                than 0
147          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
148          * @throws IllegalArgumentException if {@code maxSize <= 0}.
149          */
150         public static <T extends BloomFilter<T>> Consumer<Deque<T>> onMaxSize(final int maxSize) {
151             if (maxSize <= 0) {
152                 throw new IllegalArgumentException("'maxSize' must be greater than 0");
153             }
154             return ll -> {
155                 while (ll.size() > maxSize) {
156                     ll.removeFirst();
157                 }
158             };
159         }
160 
161         /**
162          * Removes the last added target if it is empty.  Useful as the first in a chain
163          * of cleanup consumers.  (for example {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )})
164          *
165          * @param <T> Type of BloomFilter.
166          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
167          */
168         public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeEmptyTarget() {
169             return x -> {
170                 if (!x.isEmpty() && x.getLast().isEmpty()) {
171                     x.removeLast();
172                 }
173             };
174         }
175 
176         /**
177          * Removes any layer identified by the predicate.
178          *
179          * @param <T> Type of BloomFilter.
180          * @param test Predicate.
181          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
182          */
183         public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeIf(final Predicate<? super T> test) {
184             return x -> x.removeIf(test);
185         }
186 
187         private Cleanup() {
188         }
189     }
190 
191     /**
192      * A collection of common ExtendCheck implementations to test whether to extend
193      * the depth of a LayerManager.
194      */
195     public static final class ExtendCheck {
196 
197         /**
198          * Creates a new target after a specific number of filters have been added to
199          * the current target.
200          *
201          * @param <T> Type of BloomFilter.
202          * @param breakAt the number of filters to merge into each filter in the list.
203          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
204          * @throws IllegalArgumentException if {@code breakAt <= 0}
205          */
206         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnCount(final int breakAt) {
207             if (breakAt <= 0) {
208                 throw new IllegalArgumentException("'breakAt' must be greater than 0");
209             }
210             return new Predicate<LayerManager<T>>() {
211                 int count;
212 
213                 @Override
214                 public boolean test(final LayerManager<T> filter) {
215                     if (++count == breakAt) {
216                         count = 0;
217                         return true;
218                     }
219                     return false;
220                 }
221             };
222         }
223 
224         /**
225          * Advances the target once a merge has been performed.
226          *
227          * @param <T> Type of BloomFilter.
228          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
229          */
230         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnPopulated() {
231             return lm -> !lm.last().isEmpty();
232         }
233 
234         /**
235          * Creates a new target after the current target is saturated. Saturation is
236          * defined as the {@code Bloom filter estimated N >= maxN}.
237          *
238          * <p>An example usage is advancing on a calculated saturation by calling:
239          * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p>
240          *
241          * @param <T> Type of BloomFilter.
242          * @param maxN the maximum number of estimated items in the filter.
243          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
244          * @throws IllegalArgumentException if {@code maxN <= 0}
245          */
246         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnSaturation(final double maxN) {
247             if (maxN <= 0) {
248                 throw new IllegalArgumentException("'maxN' must be greater than 0");
249             }
250             return manager -> {
251                 final T bf = manager.last();
252                 return maxN <= bf.getShape().estimateN(bf.cardinality());
253             };
254         }
255 
256         /**
257          * Does not automatically advance the target. @{code next()} must be called directly to
258          * perform the advance.
259          *
260          * @param <T> Type of BloomFilter.
261          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
262          */
263         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> neverAdvance() {
264             return x -> false;
265         }
266 
267         private ExtendCheck() {
268         }
269     }
270 
271     /**
272      * Creates a new Builder with defaults of {@link ExtendCheck#neverAdvance()} and
273      * {@link Cleanup#noCleanup()}.
274      *
275      * @param <T> Type of BloomFilter.
276      * @return A builder.
277      * @see ExtendCheck#neverAdvance()
278      * @see Cleanup#noCleanup()
279      */
280     public static <T extends BloomFilter<T>> Builder<T> builder() {
281         return new Builder<>();
282     }
283 
284     private final LinkedList<T> filters = new LinkedList<>();
285 
286     private final Consumer<Deque<T>> filterCleanup;
287 
288     private final Predicate<LayerManager<T>> extendCheck;
289 
290     private final Supplier<T> filterSupplier;
291 
292     /**
293      * Constructs a new instance.
294      *
295      * @param filterSupplier the non-null supplier of new Bloom filters to add the the list
296      *                       when necessary.
297      * @param extendCheck    The non-null predicate that checks if a new filter should be
298      *                       added to the list.
299      * @param filterCleanup  the non-null consumer that removes any old filters from the
300      *                       list.
301      * @param initialize     true if the filter list should be initialized.
302      */
303     private LayerManager(final Supplier<T> filterSupplier, final Predicate<LayerManager<T>> extendCheck,
304             final Consumer<Deque<T>> filterCleanup, final boolean initialize) {
305         this.filterSupplier = Objects.requireNonNull(filterSupplier, "filterSupplier");
306         this.extendCheck = Objects.requireNonNull(extendCheck, "extendCheck");
307         this.filterCleanup = Objects.requireNonNull(filterCleanup, "filterCleanup");
308         if (initialize) {
309             addFilter();
310         }
311     }
312 
313     /**
314      * Adds a new Bloom filter to the list.
315      */
316     private void addFilter() {
317         filters.add(Objects.requireNonNull(filterSupplier.get(), "filterSupplier.get() returned null."));
318     }
319 
320     /**
321      * Forces execution the configured cleanup without creating a new filter except in cases
322      * where the cleanup removes all the layers.
323      *
324      * @see LayerManager.Builder#setCleanup(Consumer)
325      */
326     void cleanup() {
327         filterCleanup.accept(filters);
328         if (filters.isEmpty()) {
329             addFilter();
330         }
331     }
332 
333     /**
334      * Removes all the filters from the layer manager, and sets up a new one as the
335      * target.
336      */
337     public final void clear() {
338         filters.clear();
339         addFilter();
340     }
341 
342     /**
343      * Creates a deep copy of this {@link LayerManager}.
344      * <p>
345      * <em>Filters in the copy are deep copies, not references, so changes in the copy are NOT reflected in the original.</em>
346      * </p>
347      * <p>
348      * The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between the copy and this instance.
349      * </p>
350      *
351      * @return a copy of this {@link LayerManager}.
352      */
353     public LayerManager<T> copy() {
354         final LayerManager<T> newMgr = new LayerManager<>(filterSupplier, extendCheck, filterCleanup, false);
355         for (final T bf : filters) {
356             newMgr.filters.add(bf.copy());
357         }
358         return newMgr;
359     }
360 
361     /**
362      * Gets the Bloom filter from the first layer.
363      * No extension check is performed during this call.
364      * @return The Bloom filter from the first layer.
365      * @see #getTarget()
366      */
367     public final T first() {
368         return filters.getFirst();
369     }
370 
371     /**
372      * Gets the Bloom filter at the specified depth. The filter at depth 0 is the
373      * oldest filter.
374      *
375      * @param depth the depth at which the desired filter is to be found.
376      * @return the filter.
377      * @throws NoSuchElementException if depth is not in the range
378      *                                [0,filters.size())
379      */
380     public final T get(final int depth) {
381         if (depth < 0 || depth >= filters.size()) {
382             throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size()));
383         }
384         return filters.get(depth);
385     }
386 
387     /**
388      * Gets the number of filters in the LayerManager.  In the default LayerManager implementation
389      * there is always at least one layer.
390      *
391      * @return the current depth.
392      */
393     public final int getDepth() {
394         return filters.size();
395     }
396 
397     /**
398      * Gets the current target filter. If a new filter should be created based on
399      * {@code extendCheck} it will be created before this method returns.
400      *
401      * @return the current target filter after any extension.
402      */
403     public final T getTarget() {
404         if (extendCheck.test(this)) {
405             next();
406         }
407         return last();
408     }
409 
410     /**
411      * Gets the Bloom filter from the last layer.
412      * No extension check is performed during this call.
413      *
414      * @return The Bloom filter from the last layer.
415      * @see #getTarget()
416      */
417     public final T last() {
418         return filters.getLast();
419     }
420 
421     /**
422      * Forces an advance to the next depth. This method will clean-up the current
423      * layers and generate a new filter layer. In most cases is it unnecessary to
424      * call this method directly.
425      * <p>
426      * Ths method is used within {@link #getTarget()} when the configured
427      * {@code ExtendCheck} returns {@code true}.
428      * </p>
429      *
430      * @see LayerManager.Builder#setExtendCheck(Predicate)
431      * @see LayerManager.Builder#setCleanup(Consumer)
432      */
433     void next() {
434         filterCleanup.accept(filters);
435         addFilter();
436     }
437 
438     /**
439      * Executes a Bloom filter Predicate on each Bloom filter in the manager in
440      * depth order. Oldest filter first.
441      *
442      * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
443      * @return {@code false} when the a filter fails the predicate test. Returns
444      *         {@code true} if all filters pass the test.
445      */
446     @Override
447     public boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
448         return filters.stream().allMatch(bloomFilterPredicate);
449     }
450 }