SparseBloomFilter.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.collections4.bloomfilter;

  18. import java.util.Objects;
  19. import java.util.TreeSet;
  20. import java.util.function.IntPredicate;
  21. import java.util.function.LongPredicate;

  22. /**
  23.  * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
  24.  * implementation and should work well for most low cardinality Bloom filters.
  25.  *
  26.  * @since 4.5.0-M1
  27.  */
  28. public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {

  29.     /**
  30.      * The bitSet that defines this BloomFilter.
  31.      */
  32.     private final TreeSet<Integer> indices;

  33.     /**
  34.      * The shape of this BloomFilter.
  35.      */
  36.     private final Shape shape;

  37.     /**
  38.      * Constructs an empty BitSetBloomFilter.
  39.      *
  40.      * @param shape The shape of the filter.
  41.      */
  42.     public SparseBloomFilter(final Shape shape) {
  43.         Objects.requireNonNull(shape, "shape");
  44.         this.shape = shape;
  45.         this.indices = new TreeSet<>();
  46.     }

  47.     private SparseBloomFilter(final SparseBloomFilter source) {
  48.         shape = source.shape;
  49.         indices = new TreeSet<>(source.indices);
  50.     }

  51.     /**
  52.      * Adds the index to the indices.
  53.      *
  54.      * @param idx the index to add.
  55.      * @return {@code true} always
  56.      */
  57.     private boolean add(final int idx) {
  58.         indices.add(idx);
  59.         return true;
  60.     }

  61.     @Override
  62.     public long[] asBitMapArray() {
  63.         final long[] result = BitMaps.newBitMap(shape);
  64.         for (final int i : indices) {
  65.             BitMaps.set(result, i);
  66.         }
  67.         return result;
  68.     }

  69.     @Override
  70.     public int cardinality() {
  71.         return indices.size();
  72.     }

  73.     @Override
  74.     public int characteristics() {
  75.         return SPARSE;
  76.     }

  77.     @Override
  78.     public void clear() {
  79.         indices.clear();
  80.     }

  81.     @Override
  82.     public boolean contains(final BitMapExtractor bitMapExtractor) {
  83.         return contains(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
  84.     }

  85.     @Override
  86.     public boolean contains(final IndexExtractor indexExtractor) {
  87.         return indexExtractor.processIndices(indices::contains);
  88.     }

  89.     /**
  90.      * Creates a new instance of this {@link SparseBloomFilter} with the same properties as the current one.
  91.      *
  92.      * @return a copy of this {@link SparseBloomFilter}.
  93.      */
  94.     @Override
  95.     public SparseBloomFilter copy() {
  96.         return new SparseBloomFilter(this);
  97.     }

  98.     @Override
  99.     public Shape getShape() {
  100.         return shape;
  101.     }

  102.     @Override
  103.     public boolean isEmpty() {
  104.         return indices.isEmpty();
  105.     }

  106.     @Override
  107.     public boolean merge(final BitMapExtractor bitMapExtractor) {
  108.         Objects.requireNonNull(bitMapExtractor, "bitMapExtractor");
  109.         return this.merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
  110.     }

  111.     @Override
  112.     public boolean merge(final BloomFilter<?> other) {
  113.         Objects.requireNonNull(other, "other");
  114.         final IndexExtractor indexExtractor = (other.characteristics() & SPARSE) != 0 ? (IndexExtractor) other : IndexExtractor.fromBitMapExtractor(other);
  115.         merge(indexExtractor);
  116.         return true;
  117.     }

  118.     @Override
  119.     public boolean merge(final Hasher hasher) {
  120.         Objects.requireNonNull(hasher, "hasher");
  121.         merge(hasher.indices(shape));
  122.         return true;
  123.     }

  124.     @Override
  125.     public boolean merge(final IndexExtractor indexExtractor) {
  126.         Objects.requireNonNull(indexExtractor, "indexExtractor");
  127.         indexExtractor.processIndices(this::add);
  128.         if (!indices.isEmpty()) {
  129.             if (indices.last() >= shape.getNumberOfBits()) {
  130.                 throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
  131.                         indices.last(), shape.getNumberOfBits() - 1));
  132.             }
  133.             if (indices.first() < 0) {
  134.                 throw new IllegalArgumentException(
  135.                         String.format("Value in list %s is less than 0", indices.first()));
  136.             }
  137.         }
  138.         return true;
  139.     }

  140.     @Override
  141.     public boolean processBitMaps(final LongPredicate consumer) {
  142.         Objects.requireNonNull(consumer, "consumer");
  143.         final int limit = BitMaps.numberOfBitMaps(shape);
  144.         //
  145.         // because our indices are always in order we can shorten the time necessary to
  146.         // create the longs for the consumer
  147.         //
  148.         // the currently constructed bitMap
  149.         long bitMap = 0;
  150.         // the bitmap we are working on
  151.         int idx = 0;
  152.         for (final int i : indices) {
  153.             while (BitMaps.getLongIndex(i) != idx) {
  154.                 if (!consumer.test(bitMap)) {
  155.                     return false;
  156.                 }
  157.                 bitMap = 0;
  158.                 idx++;
  159.             }
  160.             bitMap |= BitMaps.getLongBit(i);
  161.         }
  162.         // we fall through with data in the bitMap
  163.         if (!consumer.test(bitMap)) {
  164.             return false;
  165.         }
  166.         // account for hte bitMap in the previous block + the next one
  167.         idx++;
  168.         // while there are more blocks to generate send zero to the consumer.
  169.         while (idx < limit) {
  170.             if (!consumer.test(0L)) {
  171.                 return false;
  172.             }
  173.             idx++;
  174.         }
  175.         return true;
  176.     }

  177.     @Override
  178.     public boolean processIndices(final IntPredicate consumer) {
  179.         Objects.requireNonNull(consumer, "consumer");
  180.         return indices.stream().allMatch(consumer::test);
  181.     }
  182. }