001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Objects;
020import java.util.TreeSet;
021import java.util.function.IntPredicate;
022import java.util.function.LongPredicate;
023
024/**
025 * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
026 * implementation and should work well for most low cardinality Bloom filters.
027 * @since 4.5
028 */
029public final class SparseBloomFilter implements BloomFilter {
030
031    /**
032     * The bitSet that defines this BloomFilter.
033     */
034    private final TreeSet<Integer> indices;
035
036    /**
037     * The shape of this BloomFilter.
038     */
039    private final Shape shape;
040
041    /**
042     * Constructs an empty BitSetBloomFilter.
043     *
044     * @param shape The shape of the filter.
045     */
046    public SparseBloomFilter(final Shape shape) {
047        Objects.requireNonNull(shape, "shape");
048        this.shape = shape;
049        this.indices = new TreeSet<>();
050    }
051
052    private SparseBloomFilter(final SparseBloomFilter source) {
053        shape = source.shape;
054        indices = new TreeSet<>(source.indices);
055    }
056
057    /**
058     * Adds the index to the indices.
059     * @param idx the index to add.
060     * @return {@code true} always
061     */
062    private boolean add(final int idx) {
063        indices.add(idx);
064        return true;
065    }
066
067    @Override
068    public long[] asBitMapArray() {
069        final long[] result = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
070        for (final int i : indices) {
071            BitMap.set(result, i);
072        }
073        return result;
074    }
075
076    @Override
077    public int cardinality() {
078        return indices.size();
079    }
080
081    @Override
082    public int characteristics() {
083        return SPARSE;
084    }
085
086    @Override
087    public void clear() {
088        indices.clear();
089    }
090
091    @Override
092    public boolean contains(final BitMapProducer bitMapProducer) {
093        return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
094    }
095
096    @Override
097    public boolean contains(final IndexProducer indexProducer) {
098        return indexProducer.forEachIndex(indices::contains);
099    }
100
101    @Override
102    public SparseBloomFilter copy() {
103        return new SparseBloomFilter(this);
104    }
105
106    @Override
107    public boolean forEachBitMap(final LongPredicate consumer) {
108        Objects.requireNonNull(consumer, "consumer");
109        final int limit = BitMap.numberOfBitMaps(shape.getNumberOfBits());
110        /*
111         * because our indices are always in order we can shorten the time necessary to
112         * create the longs for the consumer
113         */
114        // the currently constructed bitMap
115        long bitMap = 0;
116        // the bitmap we are working on
117        int idx = 0;
118        for (final int i : indices) {
119            while (BitMap.getLongIndex(i) != idx) {
120                if (!consumer.test(bitMap)) {
121                    return false;
122                }
123                bitMap = 0;
124                idx++;
125            }
126            bitMap |= BitMap.getLongBit(i);
127        }
128        // we fall through with data in the bitMap
129        if (!consumer.test(bitMap)) {
130            return false;
131        }
132        // account for hte bitMap in the previous block + the next one
133        idx++;
134        // while there are more blocks to generate send zero to the consumer.
135        while (idx < limit) {
136            if (!consumer.test(0L)) {
137                return false;
138            }
139            idx++;
140        }
141        return true;
142    }
143
144    @Override
145    public boolean forEachIndex(final IntPredicate consumer) {
146        Objects.requireNonNull(consumer, "consumer");
147        for (final int value : indices) {
148            if (!consumer.test(value)) {
149                return false;
150            }
151        }
152        return true;
153    }
154
155    @Override
156    public Shape getShape() {
157        return shape;
158    }
159
160    @Override
161    public boolean isEmpty() {
162        return indices.isEmpty();
163    }
164
165    @Override
166    public boolean merge(final BitMapProducer bitMapProducer) {
167        Objects.requireNonNull(bitMapProducer, "bitMapProducer");
168        return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer));
169    }
170
171    @Override
172    public boolean merge(final BloomFilter other) {
173        Objects.requireNonNull(other, "other");
174        final IndexProducer producer = (other.characteristics() & SPARSE) != 0 ? (IndexProducer) other : IndexProducer.fromBitMapProducer(other);
175        merge(producer);
176        return true;
177    }
178
179    @Override
180    public boolean merge(final Hasher hasher) {
181        Objects.requireNonNull(hasher, "hasher");
182        merge(hasher.indices(shape));
183        return true;
184    }
185
186    @Override
187    public boolean merge(final IndexProducer indexProducer) {
188        Objects.requireNonNull(indexProducer, "indexProducer");
189        indexProducer.forEachIndex(this::add);
190        if (!this.indices.isEmpty()) {
191            if (this.indices.last() >= shape.getNumberOfBits()) {
192                throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
193                        this.indices.last(), shape.getNumberOfBits() - 1));
194            }
195            if (this.indices.first() < 0) {
196                throw new IllegalArgumentException(
197                        String.format("Value in list %s is less than 0", this.indices.first()));
198            }
199        }
200        return true;
201    }
202}