1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.collections4.bloomfilter; 18 19 import java.util.Objects; 20 21 /** 22 * The interface that describes a Bloom filter that associates a count with each 23 * bit index rather than a bit. This allows reversal of merge operations with 24 * remove operations. 25 * 26 * <p>A counting Bloom filter is expected to function identically to a standard 27 * Bloom filter that is the merge of all the Bloom filters that have been added 28 * to and not later subtracted from the counting Bloom filter. The functional 29 * state of a CountingBloomFilter at the start and end of a series of merge and 30 * subsequent remove operations of the same Bloom filters, irrespective of 31 * remove order, is expected to be the same.</p> 32 * 33 * <p>Removal of a filter that has not previously been merged results in an 34 * invalid state where the cells no longer represent a sum of merged Bloom 35 * filters. It is impossible to validate merge and remove exactly without 36 * explicitly storing all filters. Consequently such an operation may go 37 * undetected. The CountingBloomFilter maintains a state flag that is used as a 38 * warning that an operation was performed that resulted in invalid cells and 39 * thus an invalid state. For example this may occur if a cell for an index was 40 * set to negative following a remove operation.</p> 41 * 42 * <p>Implementations should document the expected state of the filter after an 43 * operation that generates invalid cells, and any potential recovery options. 44 * An implementation may support a reversal of the operation to restore the 45 * state to that prior to the operation. In the event that invalid cells are 46 * adjusted to a valid range then it should be documented if there has been 47 * irreversible information loss.</p> 48 * 49 * <p>Implementations may choose to throw an exception during an operation that 50 * generates invalid cells. Implementations should document the expected state 51 * of the filter after such an operation. For example are the cells not updated, 52 * partially updated or updated entirely before the exception is raised.</p> 53 * 54 * @see CellProducer 55 * @since 4.5 56 */ 57 public interface CountingBloomFilter extends BloomFilter, CellProducer { 58 59 // Query Operations 60 61 /** 62 * Adds the specified CellProducer to this Bloom filter. 63 * 64 * <p>Specifically 65 * all cells for the indexes identified by the {@code other} will be incremented 66 * by their corresponding values in the {@code other}.</p> 67 * 68 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 69 * 70 * @param other the CellProducer to add. 71 * @return {@code true} if the addition was successful and the state is valid 72 * @see #isValid() 73 * @see #subtract(CellProducer) 74 */ 75 boolean add(CellProducer other); 76 77 /** 78 * Creates a new instance of the CountingBloomFilter with the same properties as the current one. 79 * @return a copy of this CountingBloomFilter 80 */ 81 @Override 82 CountingBloomFilter copy(); 83 84 /** 85 * Returns the maximum allowable value for a cell count in this Counting filter. 86 * @return the maximum allowable value for a cell count in this Counting filter. 87 */ 88 int getMaxCell(); 89 90 /** 91 * Determines the maximum number of times the BitMapProducer could have been merged into this 92 * counting filter. 93 * @param bitMapProducer the BitMapProducer to provide the indices. 94 * @return the maximum number of times the BitMapProducer could have been inserted. 95 */ 96 default int getMaxInsert(final BitMapProducer bitMapProducer) { 97 if (!contains(bitMapProducer)) { 98 return 0; 99 } 100 final long[] bitMaps = bitMapProducer.asBitMapArray(); 101 final int[] max = { Integer.MAX_VALUE }; 102 forEachCell((x, y) -> { 103 if ((bitMaps[BitMap.getLongIndex(x)] & BitMap.getLongBit(x)) != 0) { 104 max[0] = max[0] <= y ? max[0] : y; 105 } 106 return true; 107 }); 108 return max[0]; 109 } 110 111 /** 112 * Determines the maximum number of times the Bloom filter could have been merged 113 * into this counting filter. 114 * @param bloomFilter the Bloom filter the check for. 115 * @return the maximum number of times the Bloom filter could have been inserted. 116 */ 117 default int getMaxInsert(final BloomFilter bloomFilter) { 118 return getMaxInsert((BitMapProducer) bloomFilter); 119 } 120 121 /** 122 * Determines the maximum number of times the Cell Producer could have been add. 123 * @param cellProducer the producer of cells. 124 * @return the maximum number of times the CellProducer could have been inserted. 125 */ 126 int getMaxInsert(CellProducer cellProducer); 127 128 /** 129 * Determines the maximum number of times the Hasher could have been merged into this 130 * counting filter. 131 * @param hasher the Hasher to provide the indices. 132 * @return the maximum number of times the hasher could have been inserted. 133 */ 134 default int getMaxInsert(final Hasher hasher) { 135 return getMaxInsert(hasher.indices(getShape())); 136 } 137 138 // Modification Operations 139 140 /** 141 * Determines the maximum number of times the IndexProducer could have been merged 142 * into this counting filter. 143 * <p>To determine how many times an indxProducer could have been added create a CellProducer 144 * from the indexProducer and check that</p> 145 * @param idxProducer the producer to drive the count check. 146 * @return the maximum number of times the IndexProducer could have been inserted. 147 * @see #getMaxInsert(CellProducer) 148 */ 149 default int getMaxInsert(final IndexProducer idxProducer) { 150 return getMaxInsert(CellProducer.from(idxProducer.uniqueIndices()) ); 151 } 152 153 /** 154 * Returns {@code true} if the internal state is valid. 155 * 156 * <p>This flag is a warning that an addition or 157 * subtraction of cells from this filter resulted in an invalid cell for one or more 158 * indexes. For example this may occur if a cell for an index was 159 * set to negative following a subtraction operation, or overflows the value specified by {@code getMaxCell()} following an 160 * addition operation.</p> 161 * 162 * <p>A counting Bloom filter that has an invalid state is no longer ensured to function 163 * identically to a standard Bloom filter instance that is the merge of all the Bloom filters 164 * that have been added to and not later subtracted from this counting Bloom filter.</p> 165 * 166 * <p>Note: The change to an invalid state may or may not be reversible. Implementations 167 * are expected to document their policy on recovery from an addition or removal operation 168 * that generated an invalid state.</p> 169 * 170 * @return {@code true} if the state is valid 171 */ 172 boolean isValid(); 173 174 /** 175 * Merges the specified BitMap producer into this Bloom filter. 176 * 177 * <p>Specifically: all cells for the indexes identified by the {@code bitMapProducer} will be incremented by 1.</p> 178 * 179 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 180 * 181 * @param bitMapProducer the BitMapProducer 182 * @return {@code true} if the removal was successful and the state is valid 183 * @see #isValid() 184 * @see #add(CellProducer) 185 */ 186 @Override 187 default boolean merge(final BitMapProducer bitMapProducer) { 188 Objects.requireNonNull(bitMapProducer, "bitMapProducer"); 189 return merge(IndexProducer.fromBitMapProducer(bitMapProducer)); 190 } 191 192 /** 193 * Merges the specified Bloom filter into this Bloom filter. 194 * 195 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be incremented by 1.</p> 196 * 197 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an 198 * IndexProducer.</p> 199 * 200 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 201 * 202 * @param other the other Bloom filter 203 * @return {@code true} if the removal was successful and the state is valid 204 * @see #isValid() 205 * @see #add(CellProducer) 206 */ 207 @Override 208 default boolean merge(final BloomFilter other) { 209 Objects.requireNonNull(other, "other"); 210 return merge((IndexProducer) other); 211 } 212 213 /** 214 * Merges the specified Hasher into this Bloom filter. 215 * 216 * <p>Specifically: all cells for the unique indexes identified by the {@code hasher} will be incremented by 1.</p> 217 * 218 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 219 * 220 * @param hasher the hasher 221 * @return {@code true} if the removal was successful and the state is valid 222 * @see #isValid() 223 * @see #add(CellProducer) 224 */ 225 @Override 226 default boolean merge(final Hasher hasher) { 227 Objects.requireNonNull(hasher, "hasher"); 228 return merge(hasher.indices(getShape())); 229 } 230 231 /** 232 * Merges the specified index producer into this Bloom filter. 233 * 234 * <p>Specifically: all unique cells for the indices identified by the {@code indexProducer} will be incremented by 1.</p> 235 * 236 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 237 * 238 * <p>Note: If indices that are returned multiple times should be incremented multiple times convert the IndexProducer 239 * to a CellProducer and add that.</p> 240 * 241 * @param indexProducer the IndexProducer 242 * @return {@code true} if the removal was successful and the state is valid 243 * @see #isValid() 244 * @see #add(CellProducer) 245 */ 246 @Override 247 default boolean merge(final IndexProducer indexProducer) { 248 Objects.requireNonNull(indexProducer, "indexProducer"); 249 try { 250 return add(CellProducer.from(indexProducer.uniqueIndices())); 251 } catch (final IndexOutOfBoundsException e) { 252 throw new IllegalArgumentException( 253 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits()), e); 254 } 255 } 256 257 /** 258 * Removes the specified BitMapProducer from this Bloom filter. 259 * 260 * <p>Specifically all cells for the indices produced by the {@code bitMapProducer} will be 261 * decremented by 1.</p> 262 * 263 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 264 * 265 * @param bitMapProducer the BitMapProducer to provide the indexes 266 * @return {@code true} if the removal was successful and the state is valid 267 * @see #isValid() 268 * @see #subtract(CellProducer) 269 */ 270 default boolean remove(final BitMapProducer bitMapProducer) { 271 Objects.requireNonNull(bitMapProducer, "bitMapProducer"); 272 return remove(IndexProducer.fromBitMapProducer(bitMapProducer)); 273 } 274 275 /** 276 * Removes the specified Bloom filter from this Bloom filter. 277 * 278 * <p>Specifically: all cells for the indexes identified by the {@code other} filter will be decremented by 1.</p> 279 * 280 * <p>Note: If the other filter is a counting Bloom filter the other filter's cells are ignored and it is treated as an 281 * IndexProducer.</p> 282 * 283 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 284 * 285 * @param other the other Bloom filter 286 * @return {@code true} if the removal was successful and the state is valid 287 * @see #isValid() 288 * @see #subtract(CellProducer) 289 */ 290 default boolean remove(final BloomFilter other) { 291 Objects.requireNonNull(other, "other"); 292 return remove((IndexProducer) other); 293 } 294 295 /** 296 * Removes the unique values from the specified hasher from this Bloom filter. 297 * 298 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be 299 * decremented by 1.</p> 300 * 301 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 302 * 303 * @param hasher the hasher to provide the indexes 304 * @return {@code true} if the removal was successful and the state is valid 305 * @see #isValid() 306 * @see #subtract(CellProducer) 307 */ 308 default boolean remove(final Hasher hasher) { 309 Objects.requireNonNull(hasher, "hasher"); 310 return remove(hasher.indices(getShape())); 311 } 312 313 /** 314 * Removes the values from the specified IndexProducer from the Bloom filter from this Bloom filter. 315 * 316 * <p>Specifically all cells for the unique indices produced by the {@code hasher} will be 317 * decremented by 1.</p> 318 * 319 * <p>This method will return {@code true} if the filter is valid after the operation.</p> 320 * 321 * <p>Note: If indices that are returned multiple times should be decremented multiple times convert the IndexProducer 322 * to a CellProducer and subtract that.</p> 323 * 324 * @param indexProducer the IndexProducer to provide the indexes 325 * @return {@code true} if the removal was successful and the state is valid 326 * @see #isValid() 327 * @see #subtract(CellProducer) 328 */ 329 default boolean remove(final IndexProducer indexProducer) { 330 Objects.requireNonNull(indexProducer, "indexProducer"); 331 try { 332 return subtract(CellProducer.from(indexProducer.uniqueIndices())); 333 } catch (final IndexOutOfBoundsException e) { 334 throw new IllegalArgumentException( 335 String.format("Filter only accepts values in the [0,%d) range", getShape().getNumberOfBits())); 336 } 337 } 338 339 340 /** 341 * Adds the specified CellProducer to this Bloom filter. 342 * 343 * <p>Specifically 344 * all cells for the indexes identified by the {@code other} will be decremented 345 * by their corresponding values in the {@code other}.</p> 346 * 347 * <p>This method will return true if the filter is valid after the operation.</p> 348 * 349 * @param other the CellProducer to subtract. 350 * @return {@code true} if the subtraction was successful and the state is valid 351 * @see #isValid() 352 * @see #add(CellProducer) 353 */ 354 boolean subtract(CellProducer other); 355 356 @Override 357 default IndexProducer uniqueIndices() { 358 return this; 359 } 360 }