View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.LinkedList;
20  import java.util.NoSuchElementException;
21  import java.util.Objects;
22  import java.util.function.Consumer;
23  import java.util.function.Predicate;
24  import java.util.function.Supplier;
25  
26  /**
27   * Implementation of the methods to manage the layers in a layered Bloom filter.
28   * <p>
29   * The manager comprises a list of Bloom filters that are managed based on
30   * various rules. The last filter in the list is known as the {@code target} and
31   * is the filter into which merges are performed. The Layered manager utilizes
32   * three methods to manage the list.
33   * </p>
34   * <ul>
35   * <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be
36   * created as the new target.</li>
37   * <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used
38   * as a new target.</li>
39   * <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any
40   * expired or out dated filters from the list.</li>
41   * </ul>
42   * <p>
43   * When extendCheck returns {@code true} the following steps are taken:
44   * </p>
45   * <ol>
46   * <li>{@code Cleanup} is called</li>
47   * <li>{@code FilterSuplier} is executed and the new filter added to the list as
48   * the {@code target} filter.</li>
49   * </ol>
50   *
51   * @since 4.5
52   */
53  public class LayerManager implements BloomFilterProducer {
54  
55      /**
56       * Builder to create Layer Manager
57       */
58      public static class Builder {
59          private Predicate<LayerManager> extendCheck;
60          private Supplier<BloomFilter> supplier;
61          private Consumer<LinkedList<BloomFilter>> cleanup;
62  
63          private Builder() {
64              extendCheck = ExtendCheck.neverAdvance();
65              cleanup = Cleanup.noCleanup();
66          }
67  
68          /**
69           * Builds the layer manager with the specified properties.
70           *
71           * @return a new LayerManager.
72           */
73          public LayerManager build() {
74              Objects.requireNonNull(supplier, "Supplier must not be null");
75              Objects.requireNonNull(extendCheck, "ExtendCheck must not be null");
76              Objects.requireNonNull(cleanup, "Cleanup must not be null");
77              return new LayerManager(supplier, extendCheck, cleanup, true);
78          }
79  
80          /**
81           * Sets the Consumer that cleans the list of Bloom filters.
82           *
83           * @param cleanup the Consumer that will modify the list of filters removing out
84           *                dated or stale filters.
85           * @return this for chaining.
86           */
87          public Builder setCleanup(Consumer<LinkedList<BloomFilter>> cleanup) {
88              this.cleanup = cleanup;
89              return this;
90          }
91  
92          /**
93           * Sets the extendCheck predicate. When the predicate returns {@code true} a new
94           * target will be created.
95           *
96           * @param extendCheck The predicate to determine if a new target should be
97           *                    created.
98           * @return this for chaining.
99           */
100         public Builder setExtendCheck(Predicate<LayerManager> extendCheck) {
101             this.extendCheck = extendCheck;
102             return this;
103         }
104 
105         /**
106          * Sets the supplier of Bloom filters. When extendCheck creates a new target,
107          * the supplier provides the instance of the Bloom filter.
108          *
109          * @param supplier The supplier of new Bloom filter instances.
110          * @return this for chaining.
111          */
112         public Builder setSupplier(Supplier<BloomFilter> supplier) {
113             this.supplier = supplier;
114             return this;
115         }
116     }
117 
118     /**
119      * Static methods to create a Consumer of a LinkedList of BloomFilter perform
120      * tests on whether to reduce the collection of Bloom filters.
121      */
122     public static final class Cleanup {
123         /**
124          * A Cleanup that never removes anything.
125          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
126          */
127         public static Consumer<LinkedList<BloomFilter>> noCleanup() {
128             return x -> {};
129         }
130 
131         /**
132          * Removes the earliest filters in the list when the the number of filters
133          * exceeds maxSize.
134          *
135          * @param maxSize the maximum number of filters for the list. Must be greater
136          *                than 0
137          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
138          * @throws IllegalArgumentException if {@code maxSize <= 0}.
139          */
140         public static Consumer<LinkedList<BloomFilter>> onMaxSize(int maxSize) {
141             if (maxSize <= 0) {
142                 throw new IllegalArgumentException("'maxSize' must be greater than 0");
143             }
144             return ll -> {
145                 while (ll.size() > maxSize) {
146                     ll.removeFirst();
147                 }
148             };
149         }
150 
151         /**
152          * Removes the last added target if it is empty.  Useful as the first in a chain
153          * of cleanup consumers.  (e.g. {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )})
154          *
155          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
156          */
157         public static Consumer<LinkedList<BloomFilter>> removeEmptyTarget() {
158             return x -> {
159                 if (x.getLast().cardinality() == 0) {
160                     x.removeLast();
161                 }
162             };
163         }
164 
165         private Cleanup() {
166         }
167     }
168 
169     /**
170      * A collection of common ExtendCheck implementations to test whether to extend
171      * the depth of a LayerManager.
172      */
173     public static final class ExtendCheck {
174         /**
175          * Creates a new target after a specific number of filters have been added to
176          * the current target.
177          *
178          * @param breakAt the number of filters to merge into each filter in the list.
179          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
180          * @throws IllegalArgumentException if {@code breakAt <= 0}
181          */
182         public static Predicate<LayerManager> advanceOnCount(int breakAt) {
183             if (breakAt <= 0) {
184                 throw new IllegalArgumentException("'breakAt' must be greater than 0");
185             }
186             return new Predicate<LayerManager>() {
187                 int count;
188 
189                 @Override
190                 public boolean test(LayerManager filter) {
191                     return ++count % breakAt == 0;
192                 }
193             };
194         }
195 
196         /**
197          * Advances the target once a merge has been performed.
198          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
199          */
200         public static Predicate<LayerManager> advanceOnPopulated() {
201             return lm -> !lm.filters.peekLast().isEmpty();
202         }
203 
204         /**
205          * Creates a new target after the current target is saturated. Saturation is
206          * defined as the {@code Bloom filter estimated N >= maxN}.
207          *
208          * <p>An example usage is advancing on a calculated saturation by calling:
209          * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p>
210          *
211          * @param maxN the maximum number of estimated items in the filter.
212          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
213          * @throws IllegalArgumentException if {@code maxN <= 0}
214          */
215         public static Predicate<LayerManager> advanceOnSaturation(double maxN) {
216             if (maxN <= 0) {
217                 throw new IllegalArgumentException("'maxN' must be greater than 0");
218             }
219             return manager -> {
220                 BloomFilter bf = manager.filters.peekLast();
221                 return maxN <= bf.getShape().estimateN(bf.cardinality());
222             };
223         }
224 
225         /**
226          * Does not automatically advance the target. @{code next()} must be called directly to
227          * perform the advance.
228          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
229          */
230         public static Predicate<LayerManager> neverAdvance() {
231             return x -> false;
232         }
233 
234         private ExtendCheck() {
235         }
236     }
237     /**
238      * Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and
239      * {@code Cleanup.noCleanup()}.
240      *
241      * @return A builder.
242      * @see ExtendCheck#neverAdvance()
243      * @see Cleanup#noCleanup()
244      */
245     public static Builder builder() {
246         return new Builder();
247     }
248     private final LinkedList<BloomFilter> filters = new LinkedList<>();
249     private final Consumer<LinkedList<BloomFilter>> filterCleanup;
250 
251     private final Predicate<LayerManager> extendCheck;
252 
253     private final Supplier<BloomFilter> filterSupplier;
254 
255     /**
256      * Constructor.
257      *
258      * @param filterSupplier the supplier of new Bloom filters to add the the list
259      *                       when necessary.
260      * @param extendCheck    The predicate that checks if a new filter should be
261      *                       added to the list.
262      * @param filterCleanup  the consumer that removes any old filters from the
263      *                       list.
264      * @param initialize     true if the filter list should be initialized.
265      */
266     private LayerManager(Supplier<BloomFilter> filterSupplier, Predicate<LayerManager> extendCheck,
267             Consumer<LinkedList<BloomFilter>> filterCleanup, boolean initialize) {
268         this.filterSupplier = filterSupplier;
269         this.extendCheck = extendCheck;
270         this.filterCleanup = filterCleanup;
271         if (initialize) {
272             addFilter();
273         }
274     }
275 
276     /**
277      * Adds a new Bloom filter to the list.
278      */
279     private void addFilter() {
280         BloomFilter bf = filterSupplier.get();
281         if (bf == null) {
282             throw new NullPointerException("filterSupplier returned null.");
283         }
284         filters.add(bf);
285     }
286 
287     /**
288      * Removes all the filters from the layer manager, and sets up a new one as the
289      * target.
290      */
291     public final void clear() {
292         filters.clear();
293         addFilter();
294     }
295 
296     /**
297      * Creates a deep copy of this LayerManager.
298      * <p><em>Filters in the copy are deep copies, not references, so changes in the copy
299      * are NOT reflected in the original.</em></p>
300      * <p>The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between
301      * the copy and this instance.</p>
302      *
303      * @return a copy of this layer Manager.
304      */
305     public LayerManager copy() {
306         LayerManager newMgr = new LayerManager(filterSupplier, extendCheck, filterCleanup, false);
307         for (BloomFilter bf : filters) {
308             newMgr.filters.add(bf.copy());
309         }
310         return newMgr;
311     }
312 
313     /**
314      * Executes a Bloom filter Predicate on each Bloom filter in the manager in
315      * depth order. Oldest filter first.
316      *
317      * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
318      * @return {@code false} when the a filter fails the predicate test. Returns
319      *         {@code true} if all filters pass the test.
320      */
321     @Override
322     public boolean forEachBloomFilter(Predicate<BloomFilter> bloomFilterPredicate) {
323         for (BloomFilter bf : filters) {
324             if (!bloomFilterPredicate.test(bf)) {
325                 return false;
326             }
327         }
328         return true;
329     }
330 
331     /**
332      * Gets the Bloom filter at the specified depth. The filter at depth 0 is the
333      * oldest filter.
334      *
335      * @param depth the depth at which the desired filter is to be found.
336      * @return the filter.
337      * @throws NoSuchElementException if depth is not in the range
338      *                                [0,filters.size())
339      */
340     public final BloomFilter get(int depth) {
341         if (depth < 0 || depth >= filters.size()) {
342             throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size()));
343         }
344         return filters.get(depth);
345     }
346 
347     /**
348      * Returns the number of filters in the LayerManager.  In the default LayerManager implementation
349      * there is alwasy at least one layer.
350      *
351      * @return the current depth.
352      */
353     public final int getDepth() {
354         return filters.size();
355     }
356 
357     /**
358      * Returns the current target filter. If a new filter should be created based on
359      * {@code extendCheck} it will be created before this method returns.
360      *
361      * @return the current target filter after any extension.
362      */
363     public final BloomFilter getTarget() {
364         if (extendCheck.test(this)) {
365             next();
366         }
367         return filters.peekLast();
368     }
369 
370     /**
371      * Forces an advance to the next depth. This method will clean-up the current
372      * layers and generate a new filter layer. In most cases is it unnecessary to
373      * call this method directly.
374      * <p>
375      * Ths method is used within {@link #getTarget()} when the configured
376      * {@code ExtendCheck} returns {@code true}.
377      * </p>
378      */
379     void next() {
380         this.filterCleanup.accept(filters);
381         addFilter();
382     }
383 }