001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Deque; 020import java.util.LinkedList; 021import java.util.NoSuchElementException; 022import java.util.Objects; 023import java.util.function.Consumer; 024import java.util.function.Predicate; 025import java.util.function.Supplier; 026 027/** 028 * Implementation of the methods to manage the layers in a layered Bloom filter. 029 * <p> 030 * The manager comprises a list of Bloom filters that are managed based on 031 * various rules. The last filter in the list is known as the {@code target} and 032 * is the filter into which merges are performed. The Layered manager utilizes 033 * three methods to manage the list. 034 * </p> 035 * <ul> 036 * <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be 037 * created as the new target.</li> 038 * <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used 039 * as a new target.</li> 040 * <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any 041 * expired or out dated filters from the list.</li> 042 * </ul> 043 * <p> 044 * When extendCheck returns {@code true} the following steps are taken: 045 * </p> 046 * <ol> 047 * <li>{@code Cleanup} is called</li> 048 * <li>{@code FilterSuplier} is executed and the new filter added to the list as 049 * the {@code target} filter.</li> 050 * </ol> 051 * 052 * @since 4.5 053 */ 054public class LayerManager<T extends BloomFilter> implements BloomFilterExtractor { 055 056 /** 057 * Builder to create Layer Manager 058 */ 059 public static class Builder<T extends BloomFilter> { 060 private Predicate<LayerManager<T>> extendCheck; 061 private Supplier<T> supplier; 062 private Consumer<Deque<T>> cleanup; 063 064 private Builder() { 065 extendCheck = ExtendCheck.neverAdvance(); 066 cleanup = Cleanup.noCleanup(); 067 } 068 069 /** 070 * Builds the layer manager with the specified properties. 071 * 072 * @return a new LayerManager. 073 */ 074 public LayerManager<T> build() { 075 Objects.requireNonNull(supplier, "Supplier must not be null"); 076 Objects.requireNonNull(extendCheck, "ExtendCheck must not be null"); 077 Objects.requireNonNull(cleanup, "Cleanup must not be null"); 078 return new LayerManager<>(supplier, extendCheck, cleanup, true); 079 } 080 081 /** 082 * Sets the Consumer that cleans the list of Bloom filters. 083 * 084 * @param cleanup the Consumer that will modify the list of filters removing out 085 * dated or stale filters. 086 * @return {@code this} instance. 087 */ 088 public Builder<T> setCleanup(final Consumer<Deque<T>> cleanup) { 089 this.cleanup = cleanup; 090 return this; 091 } 092 093 /** 094 * Sets the extendCheck predicate. When the predicate returns {@code true} a new 095 * target will be created. 096 * 097 * @param extendCheck The predicate to determine if a new target should be 098 * created. 099 * @return this for chaining. 100 */ 101 public Builder<T> setExtendCheck(final Predicate<LayerManager<T>> extendCheck) { 102 this.extendCheck = extendCheck; 103 return this; 104 } 105 106 /** 107 * Sets the supplier of Bloom filters. When extendCheck creates a new target, 108 * the supplier provides the instance of the Bloom filter. 109 * 110 * @param supplier The supplier of new Bloom filter instances. 111 * @return this for chaining. 112 */ 113 public Builder<T> setSupplier(final Supplier<T> supplier) { 114 this.supplier = supplier; 115 return this; 116 } 117 } 118 119 /** 120 * Static methods to create a Consumer of a List of BloomFilter perform 121 * tests on whether to reduce the collection of Bloom filters. 122 */ 123 public static final class Cleanup { 124 125 /** 126 * A Cleanup that never removes anything. 127 * 128 * @param <T> Type of BloomFilter. 129 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 130 */ 131 public static <T extends BloomFilter> Consumer<Deque<T>> noCleanup() { 132 return x -> { 133 // empty 134 }; 135 } 136 137 /** 138 * Removes the earliest filters in the list when the the number of filters 139 * exceeds maxSize. 140 * 141 * @param <T> Type of BloomFilter. 142 * @param maxSize the maximum number of filters for the list. Must be greater 143 * than 0 144 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 145 * @throws IllegalArgumentException if {@code maxSize <= 0}. 146 */ 147 public static <T extends BloomFilter> Consumer<Deque<T>> onMaxSize(final int maxSize) { 148 if (maxSize <= 0) { 149 throw new IllegalArgumentException("'maxSize' must be greater than 0"); 150 } 151 return ll -> { 152 while (ll.size() > maxSize) { 153 ll.removeFirst(); 154 } 155 }; 156 } 157 158 /** 159 * Removes the last added target if it is empty. Useful as the first in a chain 160 * of cleanup consumers. (e.g. {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )}) 161 * 162 * @param <T> Type of BloomFilter. 163 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 164 */ 165 public static <T extends BloomFilter> Consumer<Deque<T>> removeEmptyTarget() { 166 return x -> { 167 if (!x.isEmpty() && x.getLast().isEmpty()) { 168 x.removeLast(); 169 } 170 }; 171 } 172 173 /** 174 * Removes any layer identified by the predicate. 175 * 176 * @param <T> Type of BloomFilter. 177 * @param test Predicate. 178 * @return A Consumer suitable for the LayerManager {@code cleanup} parameter. 179 */ 180 public static <T extends BloomFilter> Consumer<Deque<T>> removeIf(final Predicate<? super T> test) { 181 return x -> x.removeIf(test); 182 } 183 184 private Cleanup() { 185 } 186 } 187 188 /** 189 * A collection of common ExtendCheck implementations to test whether to extend 190 * the depth of a LayerManager. 191 */ 192 public static final class ExtendCheck { 193 /** 194 * Creates a new target after a specific number of filters have been added to 195 * the current target. 196 * 197 * @param <T> Type of BloomFilter. 198 * @param breakAt the number of filters to merge into each filter in the list. 199 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 200 * @throws IllegalArgumentException if {@code breakAt <= 0} 201 */ 202 public static <T extends BloomFilter> Predicate<LayerManager<T>> advanceOnCount(final int breakAt) { 203 if (breakAt <= 0) { 204 throw new IllegalArgumentException("'breakAt' must be greater than 0"); 205 } 206 return new Predicate<LayerManager<T>>() { 207 int count; 208 209 @Override 210 public boolean test(final LayerManager<T> filter) { 211 if (++count == breakAt) { 212 count = 0; 213 return true; 214 } 215 return false; 216 } 217 }; 218 } 219 220 /** 221 * Advances the target once a merge has been performed. 222 * 223 * @param <T> Type of BloomFilter. 224 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 225 */ 226 public static <T extends BloomFilter> Predicate<LayerManager<T>> advanceOnPopulated() { 227 return lm -> !lm.last().isEmpty(); 228 } 229 230 /** 231 * Creates a new target after the current target is saturated. Saturation is 232 * defined as the {@code Bloom filter estimated N >= maxN}. 233 * 234 * <p>An example usage is advancing on a calculated saturation by calling: 235 * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p> 236 * 237 * @param <T> Type of BloomFilter. 238 * @param maxN the maximum number of estimated items in the filter. 239 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 240 * @throws IllegalArgumentException if {@code maxN <= 0} 241 */ 242 public static <T extends BloomFilter> Predicate<LayerManager<T>> advanceOnSaturation(final double maxN) { 243 if (maxN <= 0) { 244 throw new IllegalArgumentException("'maxN' must be greater than 0"); 245 } 246 return manager -> { 247 final BloomFilter bf = manager.last(); 248 return maxN <= bf.getShape().estimateN(bf.cardinality()); 249 }; 250 } 251 252 /** 253 * Does not automatically advance the target. @{code next()} must be called directly to 254 * perform the advance. 255 * 256 * @param <T> Type of BloomFilter. 257 * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter. 258 */ 259 public static <T extends BloomFilter> Predicate<LayerManager<T>> neverAdvance() { 260 return x -> false; 261 } 262 263 private ExtendCheck() { 264 } 265 } 266 /** 267 * Creates a new Builder with defaults of {@code ExtendCheck.neverAdvance()} and 268 * {@code Cleanup.noCleanup()}. 269 * 270 * @param <T> Type of BloomFilter. 271 * @return A builder. 272 * @see ExtendCheck#neverAdvance() 273 * @see Cleanup#noCleanup() 274 */ 275 public static <T extends BloomFilter> Builder<T> builder() { 276 return new Builder<>(); 277 } 278 279 private final LinkedList<T> filters = new LinkedList<>(); 280 281 private final Consumer<Deque<T>> filterCleanup; 282 283 private final Predicate<LayerManager<T>> extendCheck; 284 285 private final Supplier<T> filterSupplier; 286 287 /** 288 * Constructor. 289 * 290 * @param filterSupplier the supplier of new Bloom filters to add the the list 291 * when necessary. 292 * @param extendCheck The predicate that checks if a new filter should be 293 * added to the list. 294 * @param filterCleanup the consumer that removes any old filters from the 295 * list. 296 * @param initialize true if the filter list should be initialized. 297 */ 298 private LayerManager(final Supplier<T> filterSupplier, final Predicate<LayerManager<T>> extendCheck, 299 final Consumer<Deque<T>> filterCleanup, final boolean initialize) { 300 this.filterSupplier = filterSupplier; 301 this.extendCheck = extendCheck; 302 this.filterCleanup = filterCleanup; 303 if (initialize) { 304 addFilter(); 305 } 306 } 307 308 /** 309 * Adds a new Bloom filter to the list. 310 */ 311 private void addFilter() { 312 final T bf = filterSupplier.get(); 313 if (bf == null) { 314 throw new NullPointerException("filterSupplier returned null."); 315 } 316 filters.add(bf); 317 } 318 319 /** 320 * Forces execution the configured cleanup without creating a new filter except in cases 321 * where the cleanup removes all the layers. 322 * @see LayerManager.Builder#setCleanup(Consumer) 323 */ 324 void cleanup() { 325 this.filterCleanup.accept(filters); 326 if (filters.isEmpty()) { 327 addFilter(); 328 } 329 } 330 331 /** 332 * Removes all the filters from the layer manager, and sets up a new one as the 333 * target. 334 */ 335 public final void clear() { 336 filters.clear(); 337 addFilter(); 338 } 339 340 /** 341 * Creates a deep copy of this LayerManager. 342 * <p><em>Filters in the copy are deep copies, not references, so changes in the copy 343 * are NOT reflected in the original.</em></p> 344 * <p>The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between 345 * the copy and this instance.</p> 346 * 347 * @return a copy of this layer Manager. 348 */ 349 public LayerManager<T> copy() { 350 final LayerManager<T> newMgr = new LayerManager<>(filterSupplier, extendCheck, filterCleanup, false); 351 for (final T bf : filters) { 352 newMgr.filters.add(bf.copy()); 353 } 354 return newMgr; 355 } 356 357 /** 358 * Gets the Bloom filter from the first layer. 359 * No extension check is performed during this call. 360 * @return The Bloom filter from the first layer. 361 * @see #getTarget() 362 */ 363 public final T first() { 364 return filters.getFirst(); 365 } 366 367 /** 368 * Executes a Bloom filter Predicate on each Bloom filter in the manager in 369 * depth order. Oldest filter first. 370 * 371 * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with. 372 * @return {@code false} when the a filter fails the predicate test. Returns 373 * {@code true} if all filters pass the test. 374 */ 375 @Override 376 public boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) { 377 for (final BloomFilter bf : filters) { 378 if (!bloomFilterPredicate.test(bf)) { 379 return false; 380 } 381 } 382 return true; 383 } 384 385 /** 386 * Gets the Bloom filter at the specified depth. The filter at depth 0 is the 387 * oldest filter. 388 * 389 * @param depth the depth at which the desired filter is to be found. 390 * @return the filter. 391 * @throws NoSuchElementException if depth is not in the range 392 * [0,filters.size()) 393 */ 394 public final T get(final int depth) { 395 if (depth < 0 || depth >= filters.size()) { 396 throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size())); 397 } 398 return filters.get(depth); 399 } 400 401 /** 402 * Returns the number of filters in the LayerManager. In the default LayerManager implementation 403 * there is always at least one layer. 404 * 405 * @return the current depth. 406 */ 407 public final int getDepth() { 408 return filters.size(); 409 } 410 411 /** 412 * Returns the current target filter. If a new filter should be created based on 413 * {@code extendCheck} it will be created before this method returns. 414 * 415 * @return the current target filter after any extension. 416 */ 417 public final T getTarget() { 418 if (extendCheck.test(this)) { 419 next(); 420 } 421 return last(); 422 } 423 424 /** 425 * Gets the Bloom filter from the last layer. 426 * No extension check is performed during this call. 427 * @return The Bloom filter from the last layer. 428 * @see #getTarget() 429 */ 430 public final T last() { 431 return filters.getLast(); 432 } 433 434 /** 435 * Forces an advance to the next depth. This method will clean-up the current 436 * layers and generate a new filter layer. In most cases is it unnecessary to 437 * call this method directly. 438 * <p> 439 * Ths method is used within {@link #getTarget()} when the configured 440 * {@code ExtendCheck} returns {@code true}. 441 * </p> 442 * @see LayerManager.Builder#setExtendCheck(Predicate) 443 * @see LayerManager.Builder#setCleanup(Consumer) 444 */ 445 void next() { 446 this.filterCleanup.accept(filters); 447 addFilter(); 448 } 449}