1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.bloomfilter;
18
19 import java.util.Objects;
20
21 /**
22 * The interface that describes a Bloom filter that associates a count with each
23 * bit index rather than a bit. This allows reversal of merge operations with
24 * remove operations.
25 *
26 * <p>A counting Bloom filter is expected to function identically to a standard
27 * Bloom filter that is the merge of all the Bloom filters that have been added
28 * to and not later subtracted from the counting Bloom filter. The functional
29 * state of a CountingBloomFilter at the start and end of a series of merge and
30 * subsequent remove operations of the same Bloom filters, irrespective of
31 * remove order, is expected to be the same.</p>
32 *
33 * <p>Removal of a filter that has not previously been merged results in an
34 * invalid state where the cells no longer represent a sum of merged Bloom
35 * filters. It is impossible to validate merge and remove exactly without
36 * explicitly storing all filters. Consequently such an operation may go
37 * undetected. The CountingBloomFilter maintains a state flag that is used as a
38 * warning that an operation was performed that resulted in invalid cells and
39 * thus an invalid state. For example this may occur if a cell for an index was
40 * set to negative following a remove operation.</p>
41 *
42 * <p>Implementations should document the expected state of the filter after an
43 * operation that generates invalid cells, and any potential recovery options.
44 * An implementation may support a reversal of the operation to restore the
45 * state to that prior to the operation. In the event that invalid cells are
46 * adjusted to a valid range then it should be documented if there has been
47 * irreversible information loss.</p>
48 *
49 * <p>Implementations may choose to throw an exception during an operation that
50 * generates invalid cells. Implementations should document the expected state
51 * of the filter after such an operation. For example are the cells not updated,
52 * partially updated or updated entirely before the exception is raised.</p>
53 *
54 * @see CellExtractor
55 * @since 4.5.0-M1
56 */
57 public interface CountingBloomFilter extends BloomFilter<CountingBloomFilter>, CellExtractor {
58
59 // Query Operations
60
61 /**
62 * Adds the specified CellExtractor to this Bloom filter.
63 *
64 * <p>Specifically
65 * all cells for the indexes identified by the {@code other} will be incremented
66 * by their corresponding values in the {@code other}.</p>
67 *
68 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
69 *
70 * @param other the CellExtractor to add.
71 * @return {@code true} if the addition was successful and the state is valid
72 * @see #isValid()
73 * @see #subtract(CellExtractor)
74 */
75 boolean add(CellExtractor other);
76
77 /**
78 * Gets the maximum allowable value for a cell count in this Counting filter.
79 *
80 * @return the maximum allowable value for a cell count in this Counting filter.
81 */
82 int getMaxCell();
83
84 /**
85 * Determines the maximum number of times the BitMapExtractor could have been merged into this counting filter.
86 *
87 * @param bitMapExtractor the BitMapExtractor to provide the indices.
88 * @return the maximum number of times the BitMapExtractor could have been inserted.
89 */
90 default int getMaxInsert(final BitMapExtractor bitMapExtractor) {
91 if (!contains(bitMapExtractor)) {
92 return 0;
93 }
94 final long[] bitMaps = bitMapExtractor.asBitMapArray();
95 final int[] max = { Integer.MAX_VALUE };
96 processCells((x, y) -> {
97 if ((bitMaps[BitMaps.getLongIndex(x)] & BitMaps.getLongBit(x)) != 0) {
98 max[0] = max[0] <= y ? max[0] : y;
99 }
100 return true;
101 });
102 return max[0];
103 }
104
105 /**
106 * Determines the maximum number of times the Bloom filter could have been merged into this counting filter.
107 *
108 * @param bloomFilter the Bloom filter the check for.
109 * @return the maximum number of times the Bloom filter could have been inserted.
110 */
111 default int getMaxInsert(final BloomFilter<?> bloomFilter) {
112 return getMaxInsert((BitMapExtractor) bloomFilter);
113 }
114
115 /**
116 * Determines the maximum number of times the Cell Extractor could have been added.
117 *
118 * @param cellExtractor the extractor of cells.
119 * @return the maximum number of times the CellExtractor could have been inserted.
120 */
121 int getMaxInsert(CellExtractor cellExtractor);
122
123 /**
124 * Determines the maximum number of times the Hasher could have been merged into this counting filter.
125 *
126 * @param hasher the Hasher to provide the indices.
127 * @return the maximum number of times the hasher could have been inserted.
128 */
129 default int getMaxInsert(final Hasher hasher) {
130 return getMaxInsert(hasher.indices(getShape()));
131 }
132
133 /**
134 * Determines the maximum number of times the IndexExtractor could have been merged into this counting filter.
135 * <p>
136 * To determine how many times an indexExtractor could have been added create a CellExtractor from the indexExtractor and check that
137 * </p>
138 *
139 * @param indexExtractor the extractor to drive the count check.
140 * @return the maximum number of times the IndexExtractor could have been inserted.
141 * @see #getMaxInsert(CellExtractor)
142 */
143 default int getMaxInsert(final IndexExtractor indexExtractor) {
144 return getMaxInsert(CellExtractor.from(indexExtractor.uniqueIndices()));
145 }
146
147 /**
148 * Returns {@code true} if the internal state is valid.
149 *
150 * <p>This flag is a warning that an addition or
151 * subtraction of cells from this filter resulted in an invalid cell for one or more
152 * indexes. For example this may occur if a cell for an index was
153 * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an
154 * addition operation.</p>
155 *
156 * <p>A counting Bloom filter that has an invalid state is no longer ensured to function
157 * identically to a standard Bloom filter instance that is the merge of all the Bloom filters
158 * that have been added to and not later subtracted from this counting Bloom filter.</p>
159 *
160 * <p>Note: The change to an invalid state may or may not be reversible. Implementations
161 * are expected to document their policy on recovery from an addition or removal operation
162 * that generated an invalid state.</p>
163 *
164 * @return {@code true} if the state is valid
165 */
166 boolean isValid();
167
168 /**
169 * Merges the specified BitMap extractor into this Bloom filter.
170 *
171 * <p>Specifically: all cells for the indexes identified by the {@code bitMapExtractor} will be incremented by 1.</p>
172 *
173 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
174 *
175 * @param bitMapExtractor the BitMapExtractor
176 * @return {@code true} if the removal was successful and the state is valid
177 * @see #isValid()
178 * @see #add(CellExtractor)
179 */
180 @Override
181 default boolean merge(final BitMapExtractor bitMapExtractor) {
182 return merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
183 }
184
185 /**
186 * Merges the specified Bloom filter into this Bloom filter.
187 *
188 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p>
189 *
190 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
191 * IndexExtractor.</p>
192 *
193 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
194 *
195 * @param other the other Bloom filter
196 * @return {@code true} if the removal was successful and the state is valid
197 * @see #isValid()
198 * @see #add(CellExtractor)
199 */
200 @Override
201 default boolean merge(final BloomFilter<?> other) {
202 Objects.requireNonNull(other, "other");
203 return merge((IndexExtractor) other);
204 }
205
206 /**
207 * Merges the specified Hasher into this Bloom filter.
208 *
209 * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p>
210 *
211 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
212 *
213 * @param hasher the hasher
214 * @return {@code true} if the removal was successful and the state is valid
215 * @see #isValid()
216 * @see #add(CellExtractor)
217 */
218 @Override
219 default boolean merge(final Hasher hasher) {
220 Objects.requireNonNull(hasher, "hasher");
221 return merge(hasher.indices(getShape()));
222 }
223
224 /**
225 * Merges the specified index extractor into this Bloom filter.
226 *
227 * <p>Specifically: all unique cells for the indices identified by the {@code indexExtractor} will be incremented by 1.</p>
228 *
229 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
230 *
231 * <p>Notes:</p>
232 * <ul>
233 * <li>If indices that are returned multiple times should be incremented multiple times convert the IndexExtractor
234 * to a CellExtractor and add that.</li>
235 * <li>Implementations should throw {@code IllegalArgumentException} and no other exception on bad input.</li>
236 * </ul>
237 * @param indexExtractor the IndexExtractor
238 * @return {@code true} if the removal was successful and the state is valid
239 * @see #isValid()
240 * @see #add(CellExtractor)
241 */
242 @Override
243 default boolean merge(final IndexExtractor indexExtractor) {
244 Objects.requireNonNull(indexExtractor, "indexExtractor");
245 try {
246 return add(CellExtractor.from(indexExtractor.uniqueIndices()));
247 } catch (final IndexOutOfBoundsException e) {
248 throw new IllegalArgumentException(
249 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e);
250 }
251 }
252
253 /**
254 * Removes the specified BitMapExtractor from this Bloom filter.
255 *
256 * <p>Specifically all cells for the indices produced by the {@code bitMapExtractor} will be
257 * decremented by 1.</p>
258 *
259 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
260 *
261 * @param bitMapExtractor the BitMapExtractor to provide the indexes
262 * @return {@code true} if the removal was successful and the state is valid
263 * @see #isValid()
264 * @see #subtract(CellExtractor)
265 */
266 default boolean remove(final BitMapExtractor bitMapExtractor) {
267 return remove(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
268 }
269
270 /**
271 * Removes the specified Bloom filter from this Bloom filter.
272 *
273 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p>
274 *
275 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an
276 * IndexExtractor.</p>
277 *
278 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
279 *
280 * @param other the other Bloom filter
281 * @return {@code true} if the removal was successful and the state is valid
282 * @see #isValid()
283 * @see #subtract(CellExtractor)
284 */
285 default boolean remove(final BloomFilter<?> other) {
286 return remove((IndexExtractor) other);
287 }
288
289 /**
290 * Removes the unique values from the specified hasher from this Bloom filter.
291 *
292 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
293 * decremented by 1.</p>
294 *
295 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
296 *
297 * @param hasher the hasher to provide the indexes
298 * @return {@code true} if the removal was successful and the state is valid
299 * @see #isValid()
300 * @see #subtract(CellExtractor)
301 */
302 default boolean remove(final Hasher hasher) {
303 Objects.requireNonNull(hasher, "hasher");
304 return remove(hasher.indices(getShape()));
305 }
306
307 /**
308 * Removes the values from the specified IndexExtractor from the Bloom filter from this Bloom filter.
309 *
310 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be
311 * decremented by 1.</p>
312 *
313 * <p>This method will return {@code true} if the filter is valid after the operation.</p>
314 *
315 * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexExtractor
316 * to a CellExtractor and subtract that.</p>
317 *
318 * @param indexExtractor the IndexExtractor to provide the indexes
319 * @return {@code true} if the removal was successful and the state is valid
320 * @see #isValid()
321 * @see #subtract(CellExtractor)
322 */
323 default boolean remove(final IndexExtractor indexExtractor) {
324 Objects.requireNonNull(indexExtractor, "indexExtractor");
325 try {
326 return subtract(CellExtractor.from(indexExtractor.uniqueIndices()));
327 } catch (final IndexOutOfBoundsException e) {
328 throw new IllegalArgumentException(
329 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()));
330 }
331 }
332
333 /**
334 * Adds the specified CellExtractor to this Bloom filter.
335 *
336 * <p>Specifically
337 * all cells for the indexes identified by the {@code other} will be decremented
338 * by their corresponding values in the {@code other}.</p>
339 *
340 * <p>This method will return true if the filter is valid after the operation.</p>
341 *
342 * @param other the CellExtractor to subtract.
343 * @return {@code true} if the subtraction was successful and the state is valid
344 * @see #isValid()
345 * @see #add(CellExtractor)
346 */
347 boolean subtract(CellExtractor other);
348
349 /**
350 * The default implementation is a no-op since the counting bloom filter returns an unique IndexExtractor by default.
351 * @return this counting Bloom filter.
352 */
353 @Override
354 default IndexExtractor uniqueIndices() {
355 return this;
356 }
357 }