SparseBloomFilter.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.collections4.bloomfilter;
- import java.util.Objects;
- import java.util.TreeSet;
- import java.util.function.IntPredicate;
- import java.util.function.LongPredicate;
- /**
- * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
- * implementation and should work well for most low cardinality Bloom filters.
- *
- * @since 4.5.0-M1
- */
- public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
- /**
- * The bitSet that defines this BloomFilter.
- */
- private final TreeSet<Integer> indices;
- /**
- * The shape of this BloomFilter.
- */
- private final Shape shape;
- /**
- * Constructs an empty BitSetBloomFilter.
- *
- * @param shape The shape of the filter.
- */
- public SparseBloomFilter(final Shape shape) {
- Objects.requireNonNull(shape, "shape");
- this.shape = shape;
- this.indices = new TreeSet<>();
- }
- private SparseBloomFilter(final SparseBloomFilter source) {
- shape = source.shape;
- indices = new TreeSet<>(source.indices);
- }
- /**
- * Adds the index to the indices.
- *
- * @param idx the index to add.
- * @return {@code true} always
- */
- private boolean add(final int idx) {
- indices.add(idx);
- return true;
- }
- @Override
- public long[] asBitMapArray() {
- final long[] result = BitMaps.newBitMap(shape);
- for (final int i : indices) {
- BitMaps.set(result, i);
- }
- return result;
- }
- @Override
- public int cardinality() {
- return indices.size();
- }
- @Override
- public int characteristics() {
- return SPARSE;
- }
- @Override
- public void clear() {
- indices.clear();
- }
- @Override
- public boolean contains(final BitMapExtractor bitMapExtractor) {
- return contains(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
- }
- @Override
- public boolean contains(final IndexExtractor indexExtractor) {
- return indexExtractor.processIndices(indices::contains);
- }
- /**
- * Creates a new instance of this {@link SparseBloomFilter} with the same properties as the current one.
- *
- * @return a copy of this {@link SparseBloomFilter}.
- */
- @Override
- public SparseBloomFilter copy() {
- return new SparseBloomFilter(this);
- }
- @Override
- public Shape getShape() {
- return shape;
- }
- @Override
- public boolean isEmpty() {
- return indices.isEmpty();
- }
- @Override
- public boolean merge(final BitMapExtractor bitMapExtractor) {
- Objects.requireNonNull(bitMapExtractor, "bitMapExtractor");
- return this.merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
- }
- @Override
- public boolean merge(final BloomFilter<?> other) {
- Objects.requireNonNull(other, "other");
- final IndexExtractor indexExtractor = (other.characteristics() & SPARSE) != 0 ? (IndexExtractor) other : IndexExtractor.fromBitMapExtractor(other);
- merge(indexExtractor);
- return true;
- }
- @Override
- public boolean merge(final Hasher hasher) {
- Objects.requireNonNull(hasher, "hasher");
- merge(hasher.indices(shape));
- return true;
- }
- @Override
- public boolean merge(final IndexExtractor indexExtractor) {
- Objects.requireNonNull(indexExtractor, "indexExtractor");
- indexExtractor.processIndices(this::add);
- if (!indices.isEmpty()) {
- if (indices.last() >= shape.getNumberOfBits()) {
- throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
- indices.last(), shape.getNumberOfBits() - 1));
- }
- if (indices.first() < 0) {
- throw new IllegalArgumentException(
- String.format("Value in list %s is less than 0", indices.first()));
- }
- }
- return true;
- }
- @Override
- public boolean processBitMaps(final LongPredicate consumer) {
- Objects.requireNonNull(consumer, "consumer");
- final int limit = BitMaps.numberOfBitMaps(shape);
- //
- // because our indices are always in order we can shorten the time necessary to
- // create the longs for the consumer
- //
- // the currently constructed bitMap
- long bitMap = 0;
- // the bitmap we are working on
- int idx = 0;
- for (final int i : indices) {
- while (BitMaps.getLongIndex(i) != idx) {
- if (!consumer.test(bitMap)) {
- return false;
- }
- bitMap = 0;
- idx++;
- }
- bitMap |= BitMaps.getLongBit(i);
- }
- // we fall through with data in the bitMap
- if (!consumer.test(bitMap)) {
- return false;
- }
- // account for hte bitMap in the previous block + the next one
- idx++;
- // while there are more blocks to generate send zero to the consumer.
- while (idx < limit) {
- if (!consumer.test(0L)) {
- return false;
- }
- idx++;
- }
- return true;
- }
- @Override
- public boolean processIndices(final IntPredicate consumer) {
- Objects.requireNonNull(consumer, "consumer");
- return indices.stream().allMatch(consumer::test);
- }
- }