001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Deque; 020import java.util.LinkedList; 021import java.util.NoSuchElementException; 022import java.util.Objects; 023import java.util.function.Consumer; 024import java.util.function.Predicate; 025import java.util.function.Supplier; 026 027/** 028 * Implementation of the methods to manage the layers in a layered Bloom filter. 029 * <p> 030 * The manager comprises a list of Bloom filters that are managed based on 031 * various rules. The last filter in the list is known as the {@code target} and 032 * is the filter into which merges are performed. The Layered manager utilizes 033 * three methods to manage the list. 034 * </p> 035 * <ul> 036 * <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be 037 * created as the new target.</li> 038 * <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used 039 * as a new target.</li> 040 * <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any 041 * expired or out dated filters from the list.</li> 042 * </ul> 043 * <p> 044 * When extendCheck returns {@code true} the following steps are taken: 045 * </p> 046 * <ol> 047 * <li>{@code Cleanup} is called</li> 048 * <li>{@code FilterSuplier} is executed and the new filter added to the list as 049 * the {@code target} filter.</li> 050 * </ol> 051 * 052 * 053 * @param <T> the {@link BloomFilter} type. 054 * @since 4.5.0-M1 055 */ 056public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtractor { 057 058 /** 059 * Builds new instances of {@link LayerManager}. 060 * 061 * @param <T> the {@link BloomFilter} type. 062 */ 063 public static class Builder<T extends BloomFilter<T>> implements Supplier<LayerManager<T>> { 064 065 private Predicate<LayerManager<T>> extendCheck; 066 private Supplier<T> supplier; 067 private Consumer<Deque<T>> cleanup; 068 069 private Builder() { 070 extendCheck = ExtendCheck.neverAdvance(); 071 cleanup = Cleanup.noCleanup(); 072 } 073 074 /** 075 * Builds the layer manager with the specified properties. 076 * 077 * @return a new LayerManager. 078 */ 079 @Override 080 public LayerManager<T> get() { 081 return new LayerManager<>(supplier, extendCheck, cleanup, true); 082 } 083 084 /** 085 * Sets the Consumer that cleans the list of Bloom filters. 086 * 087 * @param cleanup the Consumer that will modify the list of filters removing out 088 * dated or stale filters. 089 * @return {@code this} instance. 090 */ 091 public Builder<T> setCleanup(final Consumer<Deque<T>> cleanup) { 092 this.cleanup = cleanup; 093 return this; 094 } 095 096 /** 097 * Sets the extendCheck predicate. When the predicate returns {@code true} a new 098 * target will be created. 099 * 100 * @param extendCheck The predicate to determine if a new target should be 101 * created. 102 * @return {@code this} instance. 103 */ 104 public Builder<T> setExtendCheck(final Predicate<LayerManager<T>> extendCheck) { 105 this.extendCheck = extendCheck; 106 return this; 107 } 108 109 /** 110 * Sets the supplier of Bloom filters. When extendCheck creates a new target, 111 * the supplier provides the instance of the Bloom filter. 112 * 113 * @param supplier The supplier of new Bloom filter instances. 114 * @return {@code this} instance. 115 */ 116 public Builder<T> setSupplier(final Supplier<T> supplier) { 117 this.supplier = supplier; 118 return this; 119 } 120 } 121 122 /** 123 * Static methods to create a Consumer of a List of BloomFilter perform 124 * tests on whether to reduce the collection of Bloom filters. 125 */ 126 public static final class Cleanup { 127 128 /** 129 * A Cleanup that never removes anything. 130 * 131 * @param <T> Type of BloomFilter. 132 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 133 */ 134 public static <T extends BloomFilter<T>> Consumer<Deque<T>> noCleanup() { 135 return x -> { 136 // empty 137 }; 138 } 139 140 /** 141 * Removes the earliest filters in the list when the the number of filters 142 * exceeds maxSize. 143 * 144 * @param <T> Type of BloomFilter. 145 * @param maxSize the maximum number of filters for the list. Must be greater 146 * than 0 147 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 148 * @throws IllegalArgumentException if {@code maxSize <= 0}. 149 */ 150 public static <T extends BloomFilter<T>> Consumer<Deque<T>> onMaxSize(final int maxSize) { 151 if (maxSize <= 0) { 152 throw new IllegalArgumentException("'maxSize' must be greater than 0"); 153 } 154 return ll -> { 155 while (ll.size() > maxSize) { 156 ll.removeFirst(); 157 } 158 }; 159 } 160 161 /** 162 * Removes the last added target if it is empty. Useful as the first in a chain 163 * of cleanup consumers. (e.g. {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )}) 164 * 165 * @param <T> Type of BloomFilter. 166 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 167 */ 168 public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeEmptyTarget() { 169 return x -> { 170 if (!x.isEmpty() && x.getLast().isEmpty()) { 171 x.removeLast(); 172 } 173 }; 174 } 175 176 /** 177 * Removes any layer identified by the predicate. 178 * 179 * @param <T> Type of BloomFilter. 180 * @param test Predicate. 181 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 182 */ 183 public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeIf(final Predicate<? super T> test) { 184 return x -> x.removeIf(test); 185 } 186 187 private Cleanup() { 188 } 189 } 190 191 /** 192 * A collection of common ExtendCheck implementations to test whether to extend 193 * the depth of a LayerManager. 194 */ 195 public static final class ExtendCheck { 196 197 /** 198 * Creates a new target after a specific number of filters have been added to 199 * the current target. 200 * 201 * @param <T> Type of BloomFilter. 202 * @param breakAt the number of filters to merge into each filter in the list. 203 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 204 * @throws IllegalArgumentException if {@code breakAt <= 0} 205 */ 206 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnCount(final int breakAt) { 207 if (breakAt <= 0) { 208 throw new IllegalArgumentException("'breakAt' must be greater than 0"); 209 } 210 return new Predicate<LayerManager<T>>() { 211 int count; 212 213 @Override 214 public boolean test(final LayerManager<T> filter) { 215 if (++count == breakAt) { 216 count = 0; 217 return true; 218 } 219 return false; 220 } 221 }; 222 } 223 224 /** 225 * Advances the target once a merge has been performed. 226 * 227 * @param <T> Type of BloomFilter. 228 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 229 */ 230 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnPopulated() { 231 return lm -> !lm.last().isEmpty(); 232 } 233 234 /** 235 * Creates a new target after the current target is saturated. Saturation is 236 * defined as the {@code Bloom filter estimated N >= maxN}. 237 * 238 * <p>An example usage is advancing on a calculated saturation by calling: 239 * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p> 240 * 241 * @param <T> Type of BloomFilter. 242 * @param maxN the maximum number of estimated items in the filter. 243 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 244 * @throws IllegalArgumentException if {@code maxN <= 0} 245 */ 246 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnSaturation(final double maxN) { 247 if (maxN <= 0) { 248 throw new IllegalArgumentException("'maxN' must be greater than 0"); 249 } 250 return manager -> { 251 final T bf = manager.last(); 252 return maxN <= bf.getShape().estimateN(bf.cardinality()); 253 }; 254 } 255 256 /** 257 * Does not automatically advance the target. @{code next()} must be called directly to 258 * perform the advance. 259 * 260 * @param <T> Type of BloomFilter. 261 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 262 */ 263 public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> neverAdvance() { 264 return x -> false; 265 } 266 267 private ExtendCheck() { 268 } 269 } 270 271 /** 272 * Creates a new Builder with defaults of {@link ExtendCheck#neverAdvance()} and 273 * {@link Cleanup#noCleanup()}. 274 * 275 * @param <T> Type of BloomFilter. 276 * @return A builder. 277 * @see ExtendCheck#neverAdvance() 278 * @see Cleanup#noCleanup() 279 */ 280 public static <T extends BloomFilter<T>> Builder<T> builder() { 281 return new Builder<>(); 282 } 283 284 private final LinkedList<T> filters = new LinkedList<>(); 285 286 private final Consumer<Deque<T>> filterCleanup; 287 288 private final Predicate<LayerManager<T>> extendCheck; 289 290 private final Supplier<T> filterSupplier; 291 292 /** 293 * Constructs a new instance. 294 * 295 * @param filterSupplier the non-null supplier of new Bloom filters to add the the list 296 * when necessary. 297 * @param extendCheck The non-null predicate that checks if a new filter should be 298 * added to the list. 299 * @param filterCleanup the non-null consumer that removes any old filters from the 300 * list. 301 * @param initialize true if the filter list should be initialized. 302 */ 303 private LayerManager(final Supplier<T> filterSupplier, final Predicate<LayerManager<T>> extendCheck, 304 final Consumer<Deque<T>> filterCleanup, final boolean initialize) { 305 this.filterSupplier = Objects.requireNonNull(filterSupplier, "filterSupplier"); 306 this.extendCheck = Objects.requireNonNull(extendCheck, "extendCheck"); 307 this.filterCleanup = Objects.requireNonNull(filterCleanup, "filterCleanup"); 308 if (initialize) { 309 addFilter(); 310 } 311 } 312 313 /** 314 * Adds a new Bloom filter to the list. 315 */ 316 private void addFilter() { 317 filters.add(Objects.requireNonNull(filterSupplier.get(), "filterSupplier.get() returned null.")); 318 } 319 320 /** 321 * Forces execution the configured cleanup without creating a new filter except in cases 322 * where the cleanup removes all the layers. 323 * 324 * @see LayerManager.Builder#setCleanup(Consumer) 325 */ 326 void cleanup() { 327 filterCleanup.accept(filters); 328 if (filters.isEmpty()) { 329 addFilter(); 330 } 331 } 332 333 /** 334 * Removes all the filters from the layer manager, and sets up a new one as the 335 * target. 336 */ 337 public final void clear() { 338 filters.clear(); 339 addFilter(); 340 } 341 342 /** 343 * Creates a deep copy of this {@link LayerManager}. 344 * <p> 345 * <em>Filters in the copy are deep copies, not references, so changes in the copy are NOT reflected in the original.</em> 346 * </p> 347 * <p> 348 * The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between the copy and this instance. 349 * </p> 350 * 351 * @return a copy of this {@link LayerManager}. 352 */ 353 public LayerManager<T> copy() { 354 final LayerManager<T> newMgr = new LayerManager<>(filterSupplier, extendCheck, filterCleanup, false); 355 for (final T bf : filters) { 356 newMgr.filters.add(bf.copy()); 357 } 358 return newMgr; 359 } 360 361 /** 362 * Gets the Bloom filter from the first layer. 363 * No extension check is performed during this call. 364 * @return The Bloom filter from the first layer. 365 * @see #getTarget() 366 */ 367 public final T first() { 368 return filters.getFirst(); 369 } 370 371 /** 372 * Gets the Bloom filter at the specified depth. The filter at depth 0 is the 373 * oldest filter. 374 * 375 * @param depth the depth at which the desired filter is to be found. 376 * @return the filter. 377 * @throws NoSuchElementException if depth is not in the range 378 * [0,filters.size()) 379 */ 380 public final T get(final int depth) { 381 if (depth < 0 || depth >= filters.size()) { 382 throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size())); 383 } 384 return filters.get(depth); 385 } 386 387 /** 388 * Returns the number of filters in the LayerManager. In the default LayerManager implementation 389 * there is always at least one layer. 390 * 391 * @return the current depth. 392 */ 393 public final int getDepth() { 394 return filters.size(); 395 } 396 397 /** 398 * Returns the current target filter. If a new filter should be created based on 399 * {@code extendCheck} it will be created before this method returns. 400 * 401 * @return the current target filter after any extension. 402 */ 403 public final T getTarget() { 404 if (extendCheck.test(this)) { 405 next(); 406 } 407 return last(); 408 } 409 410 /** 411 * Gets the Bloom filter from the last layer. 412 * No extension check is performed during this call. 413 * 414 * @return The Bloom filter from the last layer. 415 * @see #getTarget() 416 */ 417 public final T last() { 418 return filters.getLast(); 419 } 420 421 /** 422 * Forces an advance to the next depth. This method will clean-up the current 423 * layers and generate a new filter layer. In most cases is it unnecessary to 424 * call this method directly. 425 * <p> 426 * Ths method is used within {@link #getTarget()} when the configured 427 * {@code ExtendCheck} returns {@code true}. 428 * </p> 429 * 430 * @see LayerManager.Builder#setExtendCheck(Predicate) 431 * @see LayerManager.Builder#setCleanup(Consumer) 432 */ 433 void next() { 434 filterCleanup.accept(filters); 435 addFilter(); 436 } 437 438 /** 439 * Executes a Bloom filter Predicate on each Bloom filter in the manager in 440 * depth order. Oldest filter first. 441 * 442 * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with. 443 * @return {@code false} when the a filter fails the predicate test. Returns 444 * {@code true} if all filters pass the test. 445 */ 446 @Override 447 public boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) { 448 for (final T bf : filters) { 449 if (!bloomFilterPredicate.test(bf)) { 450 return false; 451 } 452 } 453 return true; 454 } 455}