SimpleBloomFilter.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.collections4.bloomfilter;

  18. import java.util.Arrays;
  19. import java.util.Objects;
  20. import java.util.function.IntPredicate;
  21. import java.util.function.LongPredicate;

  22. /**
  23.  * A bloom filter using an array of bit maps to track enabled bits. This is a standard implementation and should work well for most Bloom filters.
  24.  *
  25.  * @since 4.5.0-M1
  26.  */
  27. public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> {

  28.     /**
  29.      * The array of bit map longs that defines this Bloom filter. Will be null if the filter is empty.
  30.      */
  31.     private final long[] bitMap;

  32.     /**
  33.      * The Shape of this Bloom filter.
  34.      */
  35.     private final Shape shape;

  36.     /**
  37.      * The cardinality of this Bloom filter.
  38.      */
  39.     private int cardinality;

  40.     /**
  41.      * Creates an empty instance.
  42.      *
  43.      * @param shape The shape for the filter.
  44.      */
  45.     public SimpleBloomFilter(final Shape shape) {
  46.         Objects.requireNonNull(shape, "shape");
  47.         this.shape = shape;
  48.         this.bitMap = BitMaps.newBitMap(shape);
  49.         this.cardinality = 0;
  50.     }

  51.     /**
  52.      * Copy constructor for {@code copy()} use.
  53.      *
  54.      * @param source
  55.      */
  56.     private SimpleBloomFilter(final SimpleBloomFilter source) {
  57.         this.shape = source.shape;
  58.         this.bitMap = source.bitMap.clone();
  59.         this.cardinality = source.cardinality;
  60.     }

  61.     @Override
  62.     public long[] asBitMapArray() {
  63.         return Arrays.copyOf(bitMap, bitMap.length);
  64.     }

  65.     @Override
  66.     public int cardinality() {
  67.         // Lazy evaluation with caching
  68.         int c = cardinality;
  69.         if (c < 0) {
  70.             cardinality = c = SetOperations.cardinality(this);
  71.         }
  72.         return c;
  73.     }

  74.     @Override
  75.     public int characteristics() {
  76.         return 0;
  77.     }

  78.     @Override
  79.     public void clear() {
  80.         Arrays.fill(bitMap, 0L);
  81.         cardinality = 0;
  82.     }

  83.     @Override
  84.     public boolean contains(final IndexExtractor indexExtractor) {
  85.         return indexExtractor.processIndices(idx -> BitMaps.contains(bitMap, idx));
  86.     }

  87.     /**
  88.      * Creates a new instance of this {@link SimpleBloomFilter} with the same properties as the current one.
  89.      *
  90.      * @return a copy of this {@link SimpleBloomFilter}.
  91.      */
  92.     @Override
  93.     public SimpleBloomFilter copy() {
  94.         return new SimpleBloomFilter(this);
  95.     }

  96.     @Override
  97.     public Shape getShape() {
  98.         return shape;
  99.     }

  100.     @Override
  101.     public boolean isEmpty() {
  102.         return cardinality == 0 || processBitMaps(y -> y == 0);
  103.     }

  104.     @Override
  105.     public boolean merge(final BitMapExtractor bitMapExtractor) {
  106.         Objects.requireNonNull(bitMapExtractor, "bitMapExtractor");
  107.         try {
  108.             final int[] idx = new int[1];
  109.             bitMapExtractor.processBitMaps(value -> {
  110.                 bitMap[idx[0]++] |= value;
  111.                 return true;
  112.             });
  113.             // idx[0] will be limit+1 so decrement it
  114.             idx[0]--;
  115.             final int idxLimit = BitMaps.getLongIndex(shape.getNumberOfBits());
  116.             if (idxLimit == idx[0]) {
  117.                 final long excess = bitMap[idxLimit] >> shape.getNumberOfBits();
  118.                 if (excess != 0) {
  119.                     throw new IllegalArgumentException(
  120.                             String.format("BitMapExtractor set a bit higher than the limit for the shape: %s", shape.getNumberOfBits()));
  121.                 }
  122.             }
  123.             cardinality = -1;
  124.         } catch (final IndexOutOfBoundsException e) {
  125.             throw new IllegalArgumentException(String.format("BitMapExtractor should send at most %s maps", bitMap.length), e);
  126.         }
  127.         return true;
  128.     }

  129.     @Override
  130.     public boolean merge(final BloomFilter<?> other) {
  131.         Objects.requireNonNull(other, "other");
  132.         if ((other.characteristics() & SPARSE) != 0) {
  133.             merge((IndexExtractor) other);
  134.         } else {
  135.             merge((BitMapExtractor) other);
  136.         }
  137.         return true;
  138.     }

  139.     @Override
  140.     public boolean merge(final Hasher hasher) {
  141.         Objects.requireNonNull(hasher, "hasher");
  142.         return merge(hasher.indices(shape));
  143.     }

  144.     @Override
  145.     public boolean merge(final IndexExtractor indexExtractor) {
  146.         Objects.requireNonNull(indexExtractor, "indexExtractor");
  147.         indexExtractor.processIndices(idx -> {
  148.             if (idx < 0 || idx >= shape.getNumberOfBits()) {
  149.                 throw new IllegalArgumentException(String.format("IndexExtractor should only send values in the range[0,%s)", shape.getNumberOfBits()));
  150.             }
  151.             BitMaps.set(bitMap, idx);
  152.             return true;
  153.         });
  154.         cardinality = -1;
  155.         return true;
  156.     }

  157.     @Override
  158.     public boolean processBitMapPairs(final BitMapExtractor other, final LongBiPredicate func) {
  159.         final CountingLongPredicate p = new CountingLongPredicate(bitMap, func);
  160.         return other.processBitMaps(p) && p.processRemaining();
  161.     }

  162.     @Override
  163.     public boolean processBitMaps(final LongPredicate consumer) {
  164.         Objects.requireNonNull(consumer, "consumer");
  165.         for (final long l : bitMap) {
  166.             if (!consumer.test(l)) {
  167.                 return false;
  168.             }
  169.         }
  170.         return true;
  171.     }

  172.     @Override
  173.     public boolean processIndices(final IntPredicate consumer) {
  174.         Objects.requireNonNull(consumer, "consumer");
  175.         return IndexExtractor.fromBitMapExtractor(this).processIndices(consumer);
  176.     }
  177. }