1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.bloomfilter;
18
19 import java.util.Deque;
20 import java.util.LinkedList;
21 import java.util.NoSuchElementException;
22 import java.util.Objects;
23 import java.util.function.Consumer;
24 import java.util.function.Predicate;
25 import java.util.function.Supplier;
26
27 /**
28 * Implementation of the methods to manage the layers in a layered Bloom filter.
29 * <p>
30 * The manager comprises a list of Bloom filters that are managed based on
31 * various rules. The last filter in the list is known as the {@code target} and
32 * is the filter into which merges are performed. The Layered manager utilizes
33 * three methods to manage the list.
34 * </p>
35 * <ul>
36 * <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be
37 * created as the new target.</li>
38 * <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used
39 * as a new target.</li>
40 * <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any
41 * expired or out dated filters from the list.</li>
42 * </ul>
43 * <p>
44 * When extendCheck returns {@code true} the following steps are taken:
45 * </p>
46 * <ol>
47 * <li>{@code Cleanup} is called</li>
48 * <li>{@code FilterSuplier} is executed and the new filter added to the list as
49 * the {@code target} filter.</li>
50 * </ol>
51 *
52 *
53 * @param <T> the {@link BloomFilter} type.
54 * @since 4.5.0-M1
55 */
56 public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtractor {
57
58 /**
59 * Builds new instances of {@link LayerManager}.
60 *
61 * @param <T> the {@link BloomFilter} type.
62 */
63 public static class Builder<T extends BloomFilter<T>> implements Supplier<LayerManager<T>> {
64
65 private Predicate<LayerManager<T>> extendCheck;
66 private Supplier<T> supplier;
67 private Consumer<Deque<T>> cleanup;
68
69 private Builder() {
70 extendCheck = ExtendCheck.neverAdvance();
71 cleanup = Cleanup.noCleanup();
72 }
73
74 /**
75 * Builds the layer manager with the specified properties.
76 *
77 * @return a new LayerManager.
78 */
79 @Override
80 public LayerManager<T> get() {
81 return new LayerManager<>(supplier, extendCheck, cleanup, true);
82 }
83
84 /**
85 * Sets the Consumer that cleans the list of Bloom filters.
86 *
87 * @param cleanup the Consumer that will modify the list of filters removing out
88 * dated or stale filters.
89 * @return {@code this} instance.
90 */
91 public Builder<T> setCleanup(final Consumer<Deque<T>> cleanup) {
92 this.cleanup = cleanup;
93 return this;
94 }
95
96 /**
97 * Sets the extendCheck predicate. When the predicate returns {@code true} a new
98 * target will be created.
99 *
100 * @param extendCheck The predicate to determine if a new target should be
101 * created.
102 * @return {@code this} instance.
103 */
104 public Builder<T> setExtendCheck(final Predicate<LayerManager<T>> extendCheck) {
105 this.extendCheck = extendCheck;
106 return this;
107 }
108
109 /**
110 * Sets the supplier of Bloom filters. When extendCheck creates a new target,
111 * the supplier provides the instance of the Bloom filter.
112 *
113 * @param supplier The supplier of new Bloom filter instances.
114 * @return {@code this} instance.
115 */
116 public Builder<T> setSupplier(final Supplier<T> supplier) {
117 this.supplier = supplier;
118 return this;
119 }
120 }
121
122 /**
123 * Static methods to create a Consumer of a List of BloomFilter perform
124 * tests on whether to reduce the collection of Bloom filters.
125 */
126 public static final class Cleanup {
127
128 /**
129 * A Cleanup that never removes anything.
130 *
131 * @param <T> Type of BloomFilter.
132 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
133 */
134 public static <T extends BloomFilter<T>> Consumer<Deque<T>> noCleanup() {
135 return x -> {
136 // empty
137 };
138 }
139
140 /**
141 * Removes the earliest filters in the list when the the number of filters
142 * exceeds maxSize.
143 *
144 * @param <T> Type of BloomFilter.
145 * @param maxSize the maximum number of filters for the list. Must be greater
146 * than 0
147 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
148 * @throws IllegalArgumentException if {@code maxSize <= 0}.
149 */
150 public static <T extends BloomFilter<T>> Consumer<Deque<T>> onMaxSize(final int maxSize) {
151 if (maxSize <= 0) {
152 throw new IllegalArgumentException("'maxSize' must be greater than 0");
153 }
154 return ll -> {
155 while (ll.size() > maxSize) {
156 ll.removeFirst();
157 }
158 };
159 }
160
161 /**
162 * Removes the last added target if it is empty. Useful as the first in a chain
163 * of cleanup consumers. (for example {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )})
164 *
165 * @param <T> Type of BloomFilter.
166 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
167 */
168 public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeEmptyTarget() {
169 return x -> {
170 if (!x.isEmpty() && x.getLast().isEmpty()) {
171 x.removeLast();
172 }
173 };
174 }
175
176 /**
177 * Removes any layer identified by the predicate.
178 *
179 * @param <T> Type of BloomFilter.
180 * @param test Predicate.
181 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
182 */
183 public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeIf(final Predicate<? super T> test) {
184 return x -> x.removeIf(test);
185 }
186
187 private Cleanup() {
188 }
189 }
190
191 /**
192 * A collection of common ExtendCheck implementations to test whether to extend
193 * the depth of a LayerManager.
194 */
195 public static final class ExtendCheck {
196
197 /**
198 * Creates a new target after a specific number of filters have been added to
199 * the current target.
200 *
201 * @param <T> Type of BloomFilter.
202 * @param breakAt the number of filters to merge into each filter in the list.
203 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
204 * @throws IllegalArgumentException if {@code breakAt <= 0}
205 */
206 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnCount(final int breakAt) {
207 if (breakAt <= 0) {
208 throw new IllegalArgumentException("'breakAt' must be greater than 0");
209 }
210 return new Predicate<LayerManager<T>>() {
211 int count;
212
213 @Override
214 public boolean test(final LayerManager<T> filter) {
215 if (++count == breakAt) {
216 count = 0;
217 return true;
218 }
219 return false;
220 }
221 };
222 }
223
224 /**
225 * Advances the target once a merge has been performed.
226 *
227 * @param <T> Type of BloomFilter.
228 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
229 */
230 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnPopulated() {
231 return lm -> !lm.last().isEmpty();
232 }
233
234 /**
235 * Creates a new target after the current target is saturated. Saturation is
236 * defined as the {@code Bloom filter estimated N >= maxN}.
237 *
238 * <p>An example usage is advancing on a calculated saturation by calling:
239 * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p>
240 *
241 * @param <T> Type of BloomFilter.
242 * @param maxN the maximum number of estimated items in the filter.
243 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
244 * @throws IllegalArgumentException if {@code maxN <= 0}
245 */
246 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnSaturation(final double maxN) {
247 if (maxN <= 0) {
248 throw new IllegalArgumentException("'maxN' must be greater than 0");
249 }
250 return manager -> {
251 final T bf = manager.last();
252 return maxN <= bf.getShape().estimateN(bf.cardinality());
253 };
254 }
255
256 /**
257 * Does not automatically advance the target. @{code next()} must be called directly to
258 * perform the advance.
259 *
260 * @param <T> Type of BloomFilter.
261 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
262 */
263 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> neverAdvance() {
264 return x -> false;
265 }
266
267 private ExtendCheck() {
268 }
269 }
270
271 /**
272 * Creates a new Builder with defaults of {@link ExtendCheck#neverAdvance()} and
273 * {@link Cleanup#noCleanup()}.
274 *
275 * @param <T> Type of BloomFilter.
276 * @return A builder.
277 * @see ExtendCheck#neverAdvance()
278 * @see Cleanup#noCleanup()
279 */
280 public static <T extends BloomFilter<T>> Builder<T> builder() {
281 return new Builder<>();
282 }
283
284 private final LinkedList<T> filters = new LinkedList<>();
285
286 private final Consumer<Deque<T>> filterCleanup;
287
288 private final Predicate<LayerManager<T>> extendCheck;
289
290 private final Supplier<T> filterSupplier;
291
292 /**
293 * Constructs a new instance.
294 *
295 * @param filterSupplier the non-null supplier of new Bloom filters to add the the list
296 * when necessary.
297 * @param extendCheck The non-null predicate that checks if a new filter should be
298 * added to the list.
299 * @param filterCleanup the non-null consumer that removes any old filters from the
300 * list.
301 * @param initialize true if the filter list should be initialized.
302 */
303 private LayerManager(final Supplier<T> filterSupplier, final Predicate<LayerManager<T>> extendCheck,
304 final Consumer<Deque<T>> filterCleanup, final boolean initialize) {
305 this.filterSupplier = Objects.requireNonNull(filterSupplier, "filterSupplier");
306 this.extendCheck = Objects.requireNonNull(extendCheck, "extendCheck");
307 this.filterCleanup = Objects.requireNonNull(filterCleanup, "filterCleanup");
308 if (initialize) {
309 addFilter();
310 }
311 }
312
313 /**
314 * Adds a new Bloom filter to the list.
315 */
316 private void addFilter() {
317 filters.add(Objects.requireNonNull(filterSupplier.get(), "filterSupplier.get() returned null."));
318 }
319
320 /**
321 * Forces execution the configured cleanup without creating a new filter except in cases
322 * where the cleanup removes all the layers.
323 *
324 * @see LayerManager.Builder#setCleanup(Consumer)
325 */
326 void cleanup() {
327 filterCleanup.accept(filters);
328 if (filters.isEmpty()) {
329 addFilter();
330 }
331 }
332
333 /**
334 * Removes all the filters from the layer manager, and sets up a new one as the
335 * target.
336 */
337 public final void clear() {
338 filters.clear();
339 addFilter();
340 }
341
342 /**
343 * Creates a deep copy of this {@link LayerManager}.
344 * <p>
345 * <em>Filters in the copy are deep copies, not references, so changes in the copy are NOT reflected in the original.</em>
346 * </p>
347 * <p>
348 * The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between the copy and this instance.
349 * </p>
350 *
351 * @return a copy of this {@link LayerManager}.
352 */
353 public LayerManager<T> copy() {
354 final LayerManager<T> newMgr = new LayerManager<>(filterSupplier, extendCheck, filterCleanup, false);
355 for (final T bf : filters) {
356 newMgr.filters.add(bf.copy());
357 }
358 return newMgr;
359 }
360
361 /**
362 * Gets the Bloom filter from the first layer.
363 * No extension check is performed during this call.
364 * @return The Bloom filter from the first layer.
365 * @see #getTarget()
366 */
367 public final T first() {
368 return filters.getFirst();
369 }
370
371 /**
372 * Gets the Bloom filter at the specified depth. The filter at depth 0 is the
373 * oldest filter.
374 *
375 * @param depth the depth at which the desired filter is to be found.
376 * @return the filter.
377 * @throws NoSuchElementException if depth is not in the range
378 * [0,filters.size())
379 */
380 public final T get(final int depth) {
381 if (depth < 0 || depth >= filters.size()) {
382 throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size()));
383 }
384 return filters.get(depth);
385 }
386
387 /**
388 * Gets the number of filters in the LayerManager. In the default LayerManager implementation
389 * there is always at least one layer.
390 *
391 * @return the current depth.
392 */
393 public final int getDepth() {
394 return filters.size();
395 }
396
397 /**
398 * Gets the current target filter. If a new filter should be created based on
399 * {@code extendCheck} it will be created before this method returns.
400 *
401 * @return the current target filter after any extension.
402 */
403 public final T getTarget() {
404 if (extendCheck.test(this)) {
405 next();
406 }
407 return last();
408 }
409
410 /**
411 * Gets the Bloom filter from the last layer.
412 * No extension check is performed during this call.
413 *
414 * @return The Bloom filter from the last layer.
415 * @see #getTarget()
416 */
417 public final T last() {
418 return filters.getLast();
419 }
420
421 /**
422 * Forces an advance to the next depth. This method will clean-up the current
423 * layers and generate a new filter layer. In most cases is it unnecessary to
424 * call this method directly.
425 * <p>
426 * Ths method is used within {@link #getTarget()} when the configured
427 * {@code ExtendCheck} returns {@code true}.
428 * </p>
429 *
430 * @see LayerManager.Builder#setExtendCheck(Predicate)
431 * @see LayerManager.Builder#setCleanup(Consumer)
432 */
433 void next() {
434 filterCleanup.accept(filters);
435 addFilter();
436 }
437
438 /**
439 * Executes a Bloom filter Predicate on each Bloom filter in the manager in
440 * depth order. Oldest filter first.
441 *
442 * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
443 * @return {@code false} when the a filter fails the predicate test. Returns
444 * {@code true} if all filters pass the test.
445 */
446 @Override
447 public boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
448 return filters.stream().allMatch(bloomFilterPredicate);
449 }
450 }