001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.Objects;
020
021/**
022 * The interface that describes a Bloom filter that associates a count with each
023 * bit index rather than a bit.  This allows reversal of merge operations with
024 * remove operations.
025 *
026 * <p>A counting Bloom filter is expected to function identically to a standard
027 * Bloom filter that is the merge of all the Bloom filters that have been added
028 * to and not later subtracted from the counting Bloom filter. The functional
029 * state of a CountingBloomFilter at the start and end of a series of merge and
030 * subsequent remove operations of the same Bloom filters, irrespective of
031 * remove order, is expected to be the same.</p>
032 *
033 * <p>Removal of a filter that has not previously been merged results in an
034 * invalid state where the cells no longer represent a sum of merged Bloom
035 * filters. It is impossible to validate merge and remove exactly without
036 * explicitly storing all filters. Consequently such an operation may go
037 * undetected. The CountingBloomFilter maintains a state flag that is used as a
038 * warning that an operation was performed that resulted in invalid cells and
039 * thus an invalid state. For example this may occur if a cell for an index was
040 * set to negative following a remove operation.</p>
041 *
042 * <p>Implementations should document the expected state of the filter after an
043 * operation that generates invalid cells, and any potential recovery options.
044 * An implementation may support a reversal of the operation to restore the
045 * state to that prior to the operation. In the event that invalid cells are
046 * adjusted to a valid range then it should be documented if there has been
047 * irreversible information loss.</p>
048 *
049 * <p>Implementations may choose to throw an exception during an operation that
050 * generates invalid cells. Implementations should document the expected state
051 * of the filter after such an operation. For example are the cells not updated,
052 * partially updated or updated entirely before the exception is raised.</p>
053 *
054 * @see CellProducer
055 * @since 4.5
056 */
057public interface CountingBloomFilter extends BloomFilter, CellProducer {
058
059    // Query Operations
060
061    /**
062     * Adds the specified CellProducer to this Bloom filter.
063     *
064     * <p>Specifically
065     * all cells for the indexes identified by the {@code other} will be incremented
066     * by their corresponding values in the {@code other}.</p>
067     *
068     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
069     *
070     * @param other the CellProducer to add.
071     * @return {@code true} if the addition was successful and the state is valid
072     * @see #isValid()
073     * @see #subtract(CellProducer)
074     */
075    boolean add(CellProducer other);
076
077    /**
078     * Creates a new instance of the CountingBloomFilter with the same properties as the current one.
079     * @return a copy of this CountingBloomFilter
080     */
081    @Override
082    CountingBloomFilter copy();
083
084    /**
085     * Returns the maximum allowable value for a cell count in this Counting filter.
086     * @return the maximum allowable value for a cell count in this Counting filter.
087     */
088    int getMaxCell();
089
090    /**
091     * Determines the maximum number of times the BitMapProducer could have been merged into this
092     * counting filter.
093     * @param bitMapProducer the BitMapProducer to provide the indices.
094     * @return the maximum number of times the BitMapProducer could have been inserted.
095     */
096    default int getMaxInsert(final BitMapProducer bitMapProducer) {
097        if (!contains(bitMapProducer)) {
098            return 0;
099        }
100        final long[] bitMaps = bitMapProducer.asBitMapArray();
101        final int[] max = { Integer.MAX_VALUE };
102        forEachCell((x, y) -> {
103            if ((bitMaps[BitMap.getLongIndex(x)] & BitMap.getLongBit(x)) != 0) {
104                max[0] = max[0] <= y ? max[0] : y;
105            }
106            return true;
107        });
108        return max[0];
109    }
110
111    /**
112     * Determines the maximum number of times the Bloom filter could have been merged
113     * into this counting filter.
114     * @param bloomFilter the Bloom filter the check for.
115     * @return the maximum number of times the Bloom filter could have been inserted.
116     */
117    default int getMaxInsert(final BloomFilter bloomFilter) {
118        return getMaxInsert((BitMapProducer) bloomFilter);
119    }
120
121    /**
122     * Determines the maximum number of times the Cell Producer could have been add.
123     * @param cellProducer the producer of cells.
124     * @return the maximum number of times the CellProducer could have been inserted.
125     */
126    int getMaxInsert(CellProducer cellProducer);
127
128    /**
129     * Determines the maximum number of times the Hasher could have been merged into this
130     * counting filter.
131     * @param hasher the Hasher to provide the indices.
132     * @return the maximum number of times the hasher could have been inserted.
133     */
134    default int getMaxInsert(final Hasher hasher) {
135        return getMaxInsert(hasher.indices(getShape()));
136    }
137
138    // Modification Operations
139
140    /**
141     * Determines the maximum number of times the IndexProducer could have been merged
142     * into this counting filter.
143     * <p>To determine how many times an indxProducer could have been added create a CellProducer
144     * from the indexProducer and check that</p>
145     * @param idxProducer the producer to drive the count check.
146     * @return the maximum number of times the IndexProducer could have been inserted.
147     * @see #getMaxInsert(CellProducer)
148     */
149    default int getMaxInsert(final IndexProducer idxProducer) {
150        return getMaxInsert(CellProducer.from(idxProducer.uniqueIndices()) );
151    }
152
153    /**
154     * Returns {@code true} if the internal state is valid.
155     *
156     * <p>This flag is a warning that an addition or
157     * subtraction of cells from this filter resulted in an invalid cell for one or more
158     * indexes. For example this may occur if a cell for an index was
159     * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an
160     * addition operation.</p>
161     *
162     * <p>A counting Bloom filter that has an invalid state is no longer ensured to function
163     * identically to a standard Bloom filter instance that is the merge of all the Bloom filters
164     * that have been added to and not later subtracted from this counting Bloom filter.</p>
165     *
166     * <p>Note: The change to an invalid state may or may not be reversible. Implementations
167     * are expected to document their policy on recovery from an addition or removal operation
168     * that generated an invalid state.</p>
169     *
170     * @return {@code true} if the state is valid
171     */
172    boolean isValid();
173
174    /**
175     * Merges the specified BitMap producer into this Bloom filter.
176     *
177     * <p>Specifically: all cells for the indexes identified by the {@code bitMapProducer} will be incremented by 1.</p>
178     *
179     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
180     *
181     * @param bitMapProducer the BitMapProducer
182     * @return {@code true} if the removal was successful and the state is valid
183     * @see #isValid()
184     * @see #add(CellProducer)
185     */
186    @Override
187    default boolean merge(final BitMapProducer bitMapProducer) {
188        Objects.requireNonNull(bitMapProducer, "bitMapProducer");
189        return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
190    }
191
192    /**
193     * Merges the specified Bloom filter into this Bloom filter.
194     *
195     * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p>
196     *
197     * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
198     * IndexProducer.</p>
199     *
200     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
201     *
202     * @param other the other Bloom filter
203     * @return {@code true} if the removal was successful and the state is valid
204     * @see #isValid()
205     * @see #add(CellProducer)
206     */
207    @Override
208    default boolean merge(final BloomFilter other) {
209        Objects.requireNonNull(other, "other");
210        return merge((IndexProducer) other);
211    }
212
213    /**
214     * Merges the specified Hasher into this Bloom filter.
215     *
216     * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p>
217     *
218     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
219     *
220     * @param hasher the hasher
221     * @return {@code true} if the removal was successful and the state is valid
222     * @see #isValid()
223     * @see #add(CellProducer)
224     */
225    @Override
226    default boolean merge(final Hasher hasher) {
227        Objects.requireNonNull(hasher, "hasher");
228        return merge(hasher.indices(getShape()));
229    }
230
231    /**
232     * Merges the specified index producer into this Bloom filter.
233     *
234     * <p>Specifically: all unique cells for the indices identified by the {@code indexProducer} will be incremented by 1.</p>
235     *
236     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
237     *
238     * <p>Note: If indices that are returned multiple times should be incremented multiple times convert the IndexProducer
239     * to a CellProducer and add that.</p>
240     *
241     * @param indexProducer the IndexProducer
242     * @return {@code true} if the removal was successful and the state is valid
243     * @see #isValid()
244     * @see #add(CellProducer)
245     */
246    @Override
247    default boolean merge(final IndexProducer indexProducer) {
248        Objects.requireNonNull(indexProducer, "indexProducer");
249        try {
250            return add(CellProducer.from(indexProducer.uniqueIndices()));
251        } catch (final IndexOutOfBoundsException e) {
252            throw new IllegalArgumentException(
253                    String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
254        }
255    }
256
257    /**
258     * Removes the specified BitMapProducer from this Bloom filter.
259     *
260     * <p>Specifically all cells for the indices produced by the {@code bitMapProducer} will be
261     * decremented by 1.</p>
262     *
263     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
264     *
265     * @param bitMapProducer the BitMapProducer to provide the indexes
266     * @return {@code true} if the removal was successful and the state is valid
267     * @see #isValid()
268     * @see #subtract(CellProducer)
269     */
270    default boolean remove(final BitMapProducer bitMapProducer) {
271        Objects.requireNonNull(bitMapProducer, "bitMapProducer");
272        return remove(IndexProducer.fromBitMapProducer(bitMapProducer));
273    }
274
275    /**
276     * Removes the specified Bloom filter from this Bloom filter.
277     *
278     * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p>
279     *
280     * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
281     * IndexProducer.</p>
282     *
283     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
284     *
285     * @param other the other Bloom filter
286     * @return {@code true} if the removal was successful and the state is valid
287     * @see #isValid()
288     * @see #subtract(CellProducer)
289     */
290    default boolean remove(final BloomFilter other) {
291        Objects.requireNonNull(other, "other");
292        return remove((IndexProducer) other);
293    }
294
295    /**
296     * Removes the unique values from the specified hasher from this Bloom filter.
297     *
298     * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
299     * decremented by 1.</p>
300     *
301     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
302     *
303     * @param hasher the hasher to provide the indexes
304     * @return {@code true} if the removal was successful and the state is valid
305     * @see #isValid()
306     * @see #subtract(CellProducer)
307     */
308    default boolean remove(final Hasher hasher) {
309        Objects.requireNonNull(hasher, "hasher");
310        return remove(hasher.indices(getShape()));
311    }
312
313    /**
314     * Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter.
315     *
316     * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
317     * decremented by 1.</p>
318     *
319     * <p>This method will return {@code true} if the filter is valid after the operation.</p>
320     *
321     * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexProducer
322     * to a CellProducer and subtract that.</p>
323     *
324     * @param indexProducer the IndexProducer to provide the indexes
325     * @return {@code true} if the removal was successful and the state is valid
326     * @see #isValid()
327     * @see #subtract(CellProducer)
328     */
329    default boolean remove(final IndexProducer indexProducer) {
330        Objects.requireNonNull(indexProducer, "indexProducer");
331        try {
332            return subtract(CellProducer.from(indexProducer.uniqueIndices()));
333        } catch (final IndexOutOfBoundsException e) {
334            throw new IllegalArgumentException(
335                    String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
336        }
337    }
338
339
340    /**
341     * Adds the specified CellProducer to this Bloom filter.
342     *
343     * <p>Specifically
344     * all cells for the indexes identified by the {@code other} will be decremented
345     * by their corresponding values in the {@code other}.</p>
346     *
347     * <p>This method will return true if the filter is valid after the operation.</p>
348     *
349     * @param other the CellProducer to subtract.
350     * @return {@code true} if the subtraction was successful and the state is valid
351     * @see #isValid()
352     * @see #add(CellProducer)
353     */
354    boolean subtract(CellProducer other);
355
356    @Override
357    default IndexProducer uniqueIndices() {
358        return this;
359    }
360}