LayerManager.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.collections4.bloomfilter;

  18. import java.util.Deque;
  19. import java.util.LinkedList;
  20. import java.util.NoSuchElementException;
  21. import java.util.Objects;
  22. import java.util.function.Consumer;
  23. import java.util.function.Predicate;
  24. import java.util.function.Supplier;

  25. /**
  26.  * Implementation of the methods to manage the layers in a layered Bloom filter.
  27.  * <p>
  28.  * The manager comprises a list of Bloom filters that are managed based on
  29.  * various rules. The last filter in the list is known as the {@code target} and
  30.  * is the filter into which merges are performed. The Layered manager utilizes
  31.  * three methods to manage the list.
  32.  * </p>
  33.  * <ul>
  34.  * <li>ExtendCheck - A Predicate that if true causes a new Bloom filter to be
  35.  * created as the new target.</li>
  36.  * <li>FilterSupplier - A Supplier that produces empty Bloom filters to be used
  37.  * as a new target.</li>
  38.  * <li>Cleanup - A Consumer of a {@code LinkedList} of BloomFilter that removes any
  39.  * expired or out dated filters from the list.</li>
  40.  * </ul>
  41.  * <p>
  42.  * When extendCheck returns {@code true} the following steps are taken:
  43.  * </p>
  44.  * <ol>
  45.  * <li>{@code Cleanup} is called</li>
  46.  * <li>{@code FilterSuplier} is executed and the new filter added to the list as
  47.  * the {@code target} filter.</li>
  48.  * </ol>
  49.  *
  50.  *
  51.  * @param <T> the {@link BloomFilter} type.
  52.  * @since 4.5.0-M1
  53.  */
  54. public class LayerManager<T extends BloomFilter<T>> implements BloomFilterExtractor {

  55.     /**
  56.      * Builds new instances of {@link LayerManager}.
  57.      *
  58.      * @param <T> the {@link BloomFilter} type.
  59.      */
  60.     public static class Builder<T extends BloomFilter<T>> implements Supplier<LayerManager<T>> {

  61.         private Predicate<LayerManager<T>> extendCheck;
  62.         private Supplier<T> supplier;
  63.         private Consumer<Deque<T>> cleanup;

  64.         private Builder() {
  65.             extendCheck = ExtendCheck.neverAdvance();
  66.             cleanup = Cleanup.noCleanup();
  67.         }

  68.         /**
  69.          * Builds the layer manager with the specified properties.
  70.          *
  71.          * @return a new LayerManager.
  72.          */
  73.         @Override
  74.         public LayerManager<T> get() {
  75.             return new LayerManager<>(supplier, extendCheck, cleanup, true);
  76.         }

  77.         /**
  78.          * Sets the Consumer that cleans the list of Bloom filters.
  79.          *
  80.          * @param cleanup the Consumer that will modify the list of filters removing out
  81.          *                dated or stale filters.
  82.          * @return {@code this} instance.
  83.          */
  84.         public Builder<T> setCleanup(final Consumer<Deque<T>> cleanup) {
  85.             this.cleanup = cleanup;
  86.             return this;
  87.         }

  88.         /**
  89.          * Sets the extendCheck predicate. When the predicate returns {@code true} a new
  90.          * target will be created.
  91.          *
  92.          * @param extendCheck The predicate to determine if a new target should be
  93.          *                    created.
  94.          * @return {@code this} instance.
  95.          */
  96.         public Builder<T> setExtendCheck(final Predicate<LayerManager<T>> extendCheck) {
  97.             this.extendCheck = extendCheck;
  98.             return this;
  99.         }

  100.         /**
  101.          * Sets the supplier of Bloom filters. When extendCheck creates a new target,
  102.          * the supplier provides the instance of the Bloom filter.
  103.          *
  104.          * @param supplier The supplier of new Bloom filter instances.
  105.          * @return {@code this} instance.
  106.          */
  107.         public Builder<T> setSupplier(final Supplier<T> supplier) {
  108.             this.supplier = supplier;
  109.             return this;
  110.         }
  111.     }

  112.     /**
  113.      * Static methods to create a Consumer of a List of BloomFilter perform
  114.      * tests on whether to reduce the collection of Bloom filters.
  115.      */
  116.     public static final class Cleanup {

  117.         /**
  118.          * A Cleanup that never removes anything.
  119.          *
  120.          * @param <T> Type of BloomFilter.
  121.          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
  122.          */
  123.         public static <T extends BloomFilter<T>> Consumer<Deque<T>> noCleanup() {
  124.             return x -> {
  125.                 // empty
  126.             };
  127.         }

  128.         /**
  129.          * Removes the earliest filters in the list when the the number of filters
  130.          * exceeds maxSize.
  131.          *
  132.          * @param <T> Type of BloomFilter.
  133.          * @param maxSize the maximum number of filters for the list. Must be greater
  134.          *                than 0
  135.          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
  136.          * @throws IllegalArgumentException if {@code maxSize <= 0}.
  137.          */
  138.         public static <T extends BloomFilter<T>> Consumer<Deque<T>> onMaxSize(final int maxSize) {
  139.             if (maxSize <= 0) {
  140.                 throw new IllegalArgumentException("'maxSize' must be greater than 0");
  141.             }
  142.             return ll -> {
  143.                 while (ll.size() > maxSize) {
  144.                     ll.removeFirst();
  145.                 }
  146.             };
  147.         }

  148.         /**
  149.          * Removes the last added target if it is empty.  Useful as the first in a chain
  150.          * of cleanup consumers.  (for example {@code Cleanup.removeEmptyTarget.andThen( otherConsumer )})
  151.          *
  152.          * @param <T> Type of BloomFilter.
  153.          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
  154.          */
  155.         public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeEmptyTarget() {
  156.             return x -> {
  157.                 if (!x.isEmpty() && x.getLast().isEmpty()) {
  158.                     x.removeLast();
  159.                 }
  160.             };
  161.         }

  162.         /**
  163.          * Removes any layer identified by the predicate.
  164.          *
  165.          * @param <T> Type of BloomFilter.
  166.          * @param test Predicate.
  167.          * @return A Consumer suitable for the LayerManager {@code cleanup} parameter.
  168.          */
  169.         public static <T extends BloomFilter<T>> Consumer<Deque<T>> removeIf(final Predicate<? super T> test) {
  170.             return x -> x.removeIf(test);
  171.         }

  172.         private Cleanup() {
  173.         }
  174.     }

  175.     /**
  176.      * A collection of common ExtendCheck implementations to test whether to extend
  177.      * the depth of a LayerManager.
  178.      */
  179.     public static final class ExtendCheck {

  180.         /**
  181.          * Creates a new target after a specific number of filters have been added to
  182.          * the current target.
  183.          *
  184.          * @param <T> Type of BloomFilter.
  185.          * @param breakAt the number of filters to merge into each filter in the list.
  186.          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
  187.          * @throws IllegalArgumentException if {@code breakAt <= 0}
  188.          */
  189.         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnCount(final int breakAt) {
  190.             if (breakAt <= 0) {
  191.                 throw new IllegalArgumentException("'breakAt' must be greater than 0");
  192.             }
  193.             return new Predicate<LayerManager<T>>() {
  194.                 int count;

  195.                 @Override
  196.                 public boolean test(final LayerManager<T> filter) {
  197.                     if (++count == breakAt) {
  198.                         count = 0;
  199.                         return true;
  200.                     }
  201.                     return false;
  202.                 }
  203.             };
  204.         }

  205.         /**
  206.          * Advances the target once a merge has been performed.
  207.          *
  208.          * @param <T> Type of BloomFilter.
  209.          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
  210.          */
  211.         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnPopulated() {
  212.             return lm -> !lm.last().isEmpty();
  213.         }

  214.         /**
  215.          * Creates a new target after the current target is saturated. Saturation is
  216.          * defined as the {@code Bloom filter estimated N >= maxN}.
  217.          *
  218.          * <p>An example usage is advancing on a calculated saturation by calling:
  219.          * {@code ExtendCheck.advanceOnSaturation(shape.estimateMaxN()) }</p>
  220.          *
  221.          * @param <T> Type of BloomFilter.
  222.          * @param maxN the maximum number of estimated items in the filter.
  223.          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
  224.          * @throws IllegalArgumentException if {@code maxN <= 0}
  225.          */
  226.         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> advanceOnSaturation(final double maxN) {
  227.             if (maxN <= 0) {
  228.                 throw new IllegalArgumentException("'maxN' must be greater than 0");
  229.             }
  230.             return manager -> {
  231.                 final T bf = manager.last();
  232.                 return maxN <= bf.getShape().estimateN(bf.cardinality());
  233.             };
  234.         }

  235.         /**
  236.          * Does not automatically advance the target. @{code next()} must be called directly to
  237.          * perform the advance.
  238.          *
  239.          * @param <T> Type of BloomFilter.
  240.          * @return A Predicate suitable for the LayerManager {@code extendCheck} parameter.
  241.          */
  242.         public static <T extends BloomFilter<T>> Predicate<LayerManager<T>> neverAdvance() {
  243.             return x -> false;
  244.         }

  245.         private ExtendCheck() {
  246.         }
  247.     }

  248.     /**
  249.      * Creates a new Builder with defaults of {@link ExtendCheck#neverAdvance()} and
  250.      * {@link Cleanup#noCleanup()}.
  251.      *
  252.      * @param <T> Type of BloomFilter.
  253.      * @return A builder.
  254.      * @see ExtendCheck#neverAdvance()
  255.      * @see Cleanup#noCleanup()
  256.      */
  257.     public static <T extends BloomFilter<T>> Builder<T> builder() {
  258.         return new Builder<>();
  259.     }

  260.     private final LinkedList<T> filters = new LinkedList<>();

  261.     private final Consumer<Deque<T>> filterCleanup;

  262.     private final Predicate<LayerManager<T>> extendCheck;

  263.     private final Supplier<T> filterSupplier;

  264.     /**
  265.      * Constructs a new instance.
  266.      *
  267.      * @param filterSupplier the non-null supplier of new Bloom filters to add the the list
  268.      *                       when necessary.
  269.      * @param extendCheck    The non-null predicate that checks if a new filter should be
  270.      *                       added to the list.
  271.      * @param filterCleanup  the non-null consumer that removes any old filters from the
  272.      *                       list.
  273.      * @param initialize     true if the filter list should be initialized.
  274.      */
  275.     private LayerManager(final Supplier<T> filterSupplier, final Predicate<LayerManager<T>> extendCheck,
  276.             final Consumer<Deque<T>> filterCleanup, final boolean initialize) {
  277.         this.filterSupplier = Objects.requireNonNull(filterSupplier, "filterSupplier");
  278.         this.extendCheck = Objects.requireNonNull(extendCheck, "extendCheck");
  279.         this.filterCleanup = Objects.requireNonNull(filterCleanup, "filterCleanup");
  280.         if (initialize) {
  281.             addFilter();
  282.         }
  283.     }

  284.     /**
  285.      * Adds a new Bloom filter to the list.
  286.      */
  287.     private void addFilter() {
  288.         filters.add(Objects.requireNonNull(filterSupplier.get(), "filterSupplier.get() returned null."));
  289.     }

  290.     /**
  291.      * Forces execution the configured cleanup without creating a new filter except in cases
  292.      * where the cleanup removes all the layers.
  293.      *
  294.      * @see LayerManager.Builder#setCleanup(Consumer)
  295.      */
  296.     void cleanup() {
  297.         filterCleanup.accept(filters);
  298.         if (filters.isEmpty()) {
  299.             addFilter();
  300.         }
  301.     }

  302.     /**
  303.      * Removes all the filters from the layer manager, and sets up a new one as the
  304.      * target.
  305.      */
  306.     public final void clear() {
  307.         filters.clear();
  308.         addFilter();
  309.     }

  310.     /**
  311.      * Creates a deep copy of this {@link LayerManager}.
  312.      * <p>
  313.      * <em>Filters in the copy are deep copies, not references, so changes in the copy are NOT reflected in the original.</em>
  314.      * </p>
  315.      * <p>
  316.      * The {@code filterSupplier}, {@code extendCheck}, and the {@code filterCleanup} are shared between the copy and this instance.
  317.      * </p>
  318.      *
  319.      * @return a copy of this {@link LayerManager}.
  320.      */
  321.     public LayerManager<T> copy() {
  322.         final LayerManager<T> newMgr = new LayerManager<>(filterSupplier, extendCheck, filterCleanup, false);
  323.         for (final T bf : filters) {
  324.             newMgr.filters.add(bf.copy());
  325.         }
  326.         return newMgr;
  327.     }

  328.     /**
  329.      * Gets the Bloom filter from the first layer.
  330.      * No extension check is performed during this call.
  331.      * @return The Bloom filter from the first layer.
  332.      * @see #getTarget()
  333.      */
  334.     public final T first() {
  335.         return filters.getFirst();
  336.     }

  337.     /**
  338.      * Gets the Bloom filter at the specified depth. The filter at depth 0 is the
  339.      * oldest filter.
  340.      *
  341.      * @param depth the depth at which the desired filter is to be found.
  342.      * @return the filter.
  343.      * @throws NoSuchElementException if depth is not in the range
  344.      *                                [0,filters.size())
  345.      */
  346.     public final T get(final int depth) {
  347.         if (depth < 0 || depth >= filters.size()) {
  348.             throw new NoSuchElementException(String.format("Depth must be in the range [0,%s)", filters.size()));
  349.         }
  350.         return filters.get(depth);
  351.     }

  352.     /**
  353.      * Gets the number of filters in the LayerManager.  In the default LayerManager implementation
  354.      * there is always at least one layer.
  355.      *
  356.      * @return the current depth.
  357.      */
  358.     public final int getDepth() {
  359.         return filters.size();
  360.     }

  361.     /**
  362.      * Gets the current target filter. If a new filter should be created based on
  363.      * {@code extendCheck} it will be created before this method returns.
  364.      *
  365.      * @return the current target filter after any extension.
  366.      */
  367.     public final T getTarget() {
  368.         if (extendCheck.test(this)) {
  369.             next();
  370.         }
  371.         return last();
  372.     }

  373.     /**
  374.      * Gets the Bloom filter from the last layer.
  375.      * No extension check is performed during this call.
  376.      *
  377.      * @return The Bloom filter from the last layer.
  378.      * @see #getTarget()
  379.      */
  380.     public final T last() {
  381.         return filters.getLast();
  382.     }

  383.     /**
  384.      * Forces an advance to the next depth. This method will clean-up the current
  385.      * layers and generate a new filter layer. In most cases is it unnecessary to
  386.      * call this method directly.
  387.      * <p>
  388.      * Ths method is used within {@link #getTarget()} when the configured
  389.      * {@code ExtendCheck} returns {@code true}.
  390.      * </p>
  391.      *
  392.      * @see LayerManager.Builder#setExtendCheck(Predicate)
  393.      * @see LayerManager.Builder#setCleanup(Consumer)
  394.      */
  395.     void next() {
  396.         filterCleanup.accept(filters);
  397.         addFilter();
  398.     }

  399.     /**
  400.      * Executes a Bloom filter Predicate on each Bloom filter in the manager in
  401.      * depth order. Oldest filter first.
  402.      *
  403.      * @param bloomFilterPredicate the predicate to evaluate each Bloom filter with.
  404.      * @return {@code false} when the a filter fails the predicate test. Returns
  405.      *         {@code true} if all filters pass the test.
  406.      */
  407.     @Override
  408.     public boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
  409.         return filters.stream().allMatch(bloomFilterPredicate);
  410.     }
  411. }