LayeredBloomFilter.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.collections4.bloomfilter;
- import java.util.Arrays;
- import java.util.NoSuchElementException;
- import java.util.Objects;
- import java.util.function.IntPredicate;
- import java.util.function.LongPredicate;
- import java.util.function.Predicate;
- /**
- * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
- * International Conference on Advanced Computer Theory and Engineering (ICACTE 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
- * 978-1-4244-6539-2, S2CID 3108985
- * <p>
- * In short, Layered Bloom filter contains several bloom filters arranged in layers.
- * </p>
- * <ul>
- * <li>When membership in the filter is checked each layer in turn is checked and if a match is found {@code true} is returned.</li>
- * <li>When merging each bloom filter is merged into the newest filter in the list of layers.</li>
- * <li>When questions of cardinality are asked the cardinality of the union of the enclosed Bloom filters is used.</li>
- * </ul>
- * <p>
- * The net result is that the layered Bloom filter can be populated with more items than the Shape would indicate and yet still return a false positive rate in
- * line with the Shape and not the over population.
- * </p>
- * <p>
- * This implementation uses a LayerManager to handle the manipulation of the layers.
- * </p>
- * <ul>
- * <li>Level 0 is the oldest layer and the highest level is the newest.</li>
- * <li>There is always at least one enclosed filter.</li>
- * <li>The newest filter is the {@code target} into which merges are performed.
- * <li>Whenever the target is retrieved, or a {@code merge} operation is performed the code checks if any older layers should be removed, and if so removes
- * them. It also checks it a new layer should be added, and if so adds it and sets the {@code target} before the operation.</li>
- * </ul>
- *
- * @param <T> The type of Bloom Filter that is used for the layers.
- * @since 4.5.0-M2
- */
- public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter<LayeredBloomFilter<T>>, BloomFilterExtractor {
- /**
- * A class used to locate matching filters across all the layers.
- */
- private class Finder implements Predicate<BloomFilter> {
- int[] result = new int[layerManager.getDepth()];
- int bfIdx;
- int resultIdx;
- BloomFilter<?> bf;
- Finder(final BloomFilter<?> bf) {
- this.bf = bf;
- }
- int[] getResult() {
- return Arrays.copyOf(result, resultIdx);
- }
- @Override
- public boolean test(final BloomFilter x) {
- if (x.contains(bf)) {
- result[resultIdx++] = bfIdx;
- }
- bfIdx++;
- return true;
- }
- }
- private final Shape shape;
- private final LayerManager<T> layerManager;
- /**
- * Constructs a new instance.
- *
- * @param shape the Shape of the enclosed Bloom filters
- * @param layerManager the LayerManager to manage the layers.
- */
- public LayeredBloomFilter(final Shape shape, final LayerManager<T> layerManager) {
- this.shape = shape;
- this.layerManager = layerManager;
- }
- @Override
- public int cardinality() {
- return SetOperations.cardinality(this);
- }
- @Override
- public int characteristics() {
- return 0;
- }
- /**
- * Forces the execution of the cleanup Consumer that was provided when the associated LayerManager was built.
- *
- * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
- */
- public void cleanup() {
- layerManager.cleanup();
- }
- @Override
- public final void clear() {
- layerManager.clear();
- }
- @Override
- public boolean contains(final BitMapExtractor bitMapExtractor) {
- return contains(createFilter(bitMapExtractor));
- }
- /**
- * Returns {@code true} if this any layer contained by this filter contains the specified filter.
- * <p>
- * If the {@code other} is a BloomFilterExtractor each filter within the {@code other} is checked to see if it exits within this filter.
- * </p>
- *
- * @param other the other Bloom filter
- * @return {@code true} if this filter contains the other filter.
- */
- @Override
- public boolean contains(final BloomFilter other) {
- return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) : !processBloomFilters(x -> !x.contains(other));
- }
- /**
- * Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within this filter.
- *
- * @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check for.
- * @return {@code true} if this filter contains all of the filters contained in the {@code bloomFilterExtractor}.
- */
- public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
- final boolean[] result = { true };
- // return false when we have found a match to short circuit checks
- return bloomFilterExtractor.processBloomFilters(x -> {
- result[0] &= contains(x);
- return result[0];
- });
- }
- @Override
- public boolean contains(final Hasher hasher) {
- return contains(createFilter(hasher));
- }
- @Override
- public boolean contains(final IndexExtractor indexExtractor) {
- return contains(createFilter(indexExtractor));
- }
- /**
- * Creates a new instance of this {@link LayeredBloomFilter} with the same properties as the current one.
- *
- * @return a copy of this {@link LayeredBloomFilter}.
- */
- @Override
- public LayeredBloomFilter<T> copy() {
- return new LayeredBloomFilter<>(shape, layerManager.copy());
- }
- /**
- * Creates a Bloom filter from a BitMapExtractor.
- *
- * @param bitMapExtractor the BitMapExtractor to create the filter from.
- * @return the BloomFilter.
- */
- private SimpleBloomFilter createFilter(final BitMapExtractor bitMapExtractor) {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- bf.merge(bitMapExtractor);
- return bf;
- }
- /**
- * Creates a Bloom filter from a Hasher.
- *
- * @param hasher the hasher to create the filter from.
- * @return the BloomFilter.
- */
- private SimpleBloomFilter createFilter(final Hasher hasher) {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- bf.merge(hasher);
- return bf;
- }
- /**
- * Creates a Bloom filter from an IndexExtractor.
- *
- * @param indexExtractor the IndexExtractor to create the filter from.
- * @return the BloomFilter.
- */
- private SimpleBloomFilter createFilter(final IndexExtractor indexExtractor) {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- bf.merge(indexExtractor);
- return bf;
- }
- @Override
- public int estimateN() {
- return flatten().estimateN();
- }
- @Override
- public int estimateUnion(final BloomFilter other) {
- Objects.requireNonNull(other, "other");
- final BloomFilter cpy = this.flatten();
- cpy.merge(other);
- return cpy.estimateN();
- }
- /**
- * Finds the layers in which the BitMapExtractor is found.
- *
- * @param bitMapExtractor the BitMapExtractor to search for.
- * @return an array of layer indices in which the Bloom filter is found.
- */
- public int[] find(final BitMapExtractor bitMapExtractor) {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- bf.merge(bitMapExtractor);
- return find(bf);
- }
- /**
- * Finds the layers in which the Bloom filter is found.
- *
- * @param bf the Bloom filter to search for.
- * @return an array of layer indices in which the Bloom filter is found.
- */
- public int[] find(final BloomFilter bf) {
- final Finder finder = new Finder(bf);
- processBloomFilters(finder);
- return finder.getResult();
- }
- /**
- * Finds the layers in which the Hasher is found.
- *
- * @param hasher the Hasher to search for.
- * @return an array of layer indices in which the Bloom filter is found.
- */
- public int[] find(final Hasher hasher) {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- bf.merge(hasher);
- return find(bf);
- }
- /**
- * Finds the layers in which the IndexExtractor is found.
- *
- * @param indexExtractor the Index extractor to search for.
- * @return an array of layer indices in which the Bloom filter is found.
- */
- public int[] find(final IndexExtractor indexExtractor) {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- bf.merge(indexExtractor);
- return find(bf);
- }
- /**
- * Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
- *
- * @return the merged bloom filter.
- */
- @Override
- public SimpleBloomFilter flatten() {
- final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
- processBloomFilters(bf::merge);
- return bf;
- }
- /**
- * Gets the Bloom filter at the specified depth
- *
- * @param depth the depth of the filter to return.
- * @return the Bloom filter at the specified depth.
- * @throws NoSuchElementException if depth is not in the range [0,getDepth())
- */
- public T get(final int depth) {
- return layerManager.get(depth);
- }
- /**
- * Gets the depth of the deepest layer. The minimum value returned by this method is 1.
- *
- * @return the depth of the deepest layer.
- */
- public final int getDepth() {
- return layerManager.getDepth();
- }
- @Override
- public final Shape getShape() {
- return shape;
- }
- @Override
- public boolean isEmpty() {
- return processBloomFilters(BloomFilter::isEmpty);
- }
- @Override
- public boolean merge(final BitMapExtractor bitMapExtractor) {
- return layerManager.getTarget().merge(bitMapExtractor);
- }
- @Override
- public boolean merge(final BloomFilter bf) {
- return layerManager.getTarget().merge(bf);
- }
- @Override
- public boolean merge(final IndexExtractor indexExtractor) {
- return layerManager.getTarget().merge(indexExtractor);
- }
- /**
- * Forces and advance to the next layer. This method will clean-up the current layers and generate a new filter layer. In most cases is it unnecessary to
- * call this method directly.
- *
- * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
- * @see LayerManager.Builder#setExtendCheck(Predicate)
- */
- public void next() {
- layerManager.next();
- }
- @Override
- public boolean processBitMaps(final LongPredicate predicate) {
- return flatten().processBitMaps(predicate);
- }
- /**
- * Processes the Bloom filters in depth order with the most recent filters first. Each filter is passed to the predicate in turn. The function exits on the
- * first {@code false} returned by the predicate.
- *
- * @param bloomFilterPredicate the predicate to execute.
- * @return {@code true} if all filters passed the predicate, {@code false} otherwise.
- */
- @Override
- public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
- return layerManager.processBloomFilters(bloomFilterPredicate);
- }
- @Override
- public boolean processIndices(final IntPredicate predicate) {
- return processBloomFilters(bf -> bf.processIndices(predicate));
- }
- }