001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Arrays; 020import java.util.NoSuchElementException; 021import java.util.Objects; 022import java.util.function.IntPredicate; 023import java.util.function.LongPredicate; 024import java.util.function.Predicate; 025 026/** 027 * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun 028 * (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd 029 * International Conference on Advanced Computer Theory and Engineering (ICACTE 030 * 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN 031 * 978-1-4244-6539-2, S2CID 3108985 032 * <p> 033 * In short, Layered Bloom filter contains several bloom filters arranged in 034 * layers. 035 * </p> 036 * <ul> 037 * <li>When membership in the filter is checked each layer in turn is checked 038 * and if a match is found {@code true} is returned.</li> 039 * <li>When merging each bloom filter is merged into the newest filter in the 040 * list of layers.</li> 041 * <li>When questions of cardinality are asked the cardinality of the union of 042 * the enclosed Bloom filters is used.</li> 043 * </ul> 044 * <p> 045 * The net result is that the layered Bloom filter can be populated with more 046 * items than the Shape would indicate and yet still return a false positive 047 * rate in line with the Shape and not the over population. 048 * </p> 049 * <p> 050 * This implementation uses a LayerManager to handle the manipulation of the 051 * layers. 052 * </p> 053 * <ul> 054 * <li>Level 0 is the oldest layer and the highest level is the newest.</li> 055 * <li>There is always at least one enclosed filter.</li> 056 * <li>The newest filter is the {@code target} into which merges are performed. 057 * <li>Whenever the target is retrieved, or a {@code merge} operation is 058 * performed the code checks if any older layers should be removed, and if so 059 * removes them. It also checks it a new layer should be added, and if so adds 060 * it and sets the {@code target} before the operation.</li> 061 * </ul> 062 * @param <T> The type of Bloom Filter that is used for the layers. 063 * @since 4.5 064 */ 065public class LayeredBloomFilter<T extends BloomFilter> implements BloomFilter, BloomFilterExtractor { 066 /** 067 * A class used to locate matching filters across all the layers. 068 */ 069 private class Finder implements Predicate<BloomFilter> { 070 int[] result = new int[layerManager.getDepth()]; 071 int bfIdx; 072 int resultIdx; 073 BloomFilter bf; 074 075 Finder(final BloomFilter bf) { 076 this.bf = bf; 077 } 078 079 int[] getResult() { 080 return Arrays.copyOf(result, resultIdx); 081 } 082 083 @Override 084 public boolean test(final BloomFilter x) { 085 if (x.contains(bf)) { 086 result[resultIdx++] = bfIdx; 087 } 088 bfIdx++; 089 return true; 090 } 091 } 092 093 private final Shape shape; 094 095 private final LayerManager<T> layerManager; 096 097 /** 098 * Constructor. 099 * 100 * @param shape the Shape of the enclosed Bloom filters 101 * @param layerManager the LayerManager to manage the layers. 102 */ 103 public LayeredBloomFilter(final Shape shape, final LayerManager<T> layerManager) { 104 this.shape = shape; 105 this.layerManager = layerManager; 106 } 107 108 @Override 109 public int cardinality() { 110 return SetOperations.cardinality(this); 111 } 112 113 @Override 114 public int characteristics() { 115 return 0; 116 } 117 118 /** 119 * Forces the execution of the cleanup Consumer that was provided when the associated LayerManager 120 * was built. 121 * 122 * @see LayerManager.Builder#setCleanup(java.util.function.Consumer) 123 */ 124 public void cleanup() { 125 layerManager.cleanup(); 126 } 127 128 @Override 129 public final void clear() { 130 layerManager.clear(); 131 } 132 133 @Override 134 public boolean contains(final BitMapExtractor bitMapExtractor) { 135 return contains(createFilter(bitMapExtractor)); 136 } 137 138 /** 139 * Returns {@code true} if this any layer contained by this filter contains the 140 * specified filter. 141 * <p> 142 * If the {@code other} is a BloomFilterExtractor each filter within the 143 * {@code other} is checked to see if it exits within this filter. 144 * </p> 145 * 146 * @param other the other Bloom filter 147 * @return {@code true} if this filter contains the other filter. 148 */ 149 @Override 150 public boolean contains(final BloomFilter other) { 151 return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) 152 : !processBloomFilters(x -> !x.contains(other)); 153 } 154 155 /** 156 * Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within 157 * this filter. 158 * 159 * @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check 160 * for. 161 * @return {@code true} if this filter contains all of the filters contained in 162 * the {@code bloomFilterExtractor}. 163 */ 164 public boolean contains(final BloomFilterExtractor bloomFilterExtractor) { 165 final boolean[] result = { true }; 166 // return false when we have found a match to short circuit checks 167 return bloomFilterExtractor.processBloomFilters(x -> { 168 result[0] &= contains(x); 169 return result[0]; 170 }); 171 } 172 173 @Override 174 public boolean contains(final Hasher hasher) { 175 return contains(createFilter(hasher)); 176 } 177 178 @Override 179 public boolean contains(final IndexExtractor indexExtractor) { 180 return contains(createFilter(indexExtractor)); 181 } 182 183 @Override 184 public LayeredBloomFilter<T> copy() { 185 return new LayeredBloomFilter<>(shape, layerManager.copy()); 186 } 187 188 /** 189 * Creates a Bloom filter from a BitMapExtractor. 190 * 191 * @param bitMapExtractor the BitMapExtractor to create the filter from. 192 * @return the BloomFilter. 193 */ 194 private BloomFilter createFilter(final BitMapExtractor bitMapExtractor) { 195 final SimpleBloomFilter bf = new SimpleBloomFilter(shape); 196 bf.merge(bitMapExtractor); 197 return bf; 198 } 199 200 /** 201 * Creates a Bloom filter from a Hasher. 202 * 203 * @param hasher the hasher to create the filter from. 204 * @return the BloomFilter. 205 */ 206 private BloomFilter createFilter(final Hasher hasher) { 207 final SimpleBloomFilter bf = new SimpleBloomFilter(shape); 208 bf.merge(hasher); 209 return bf; 210 } 211 212 /** 213 * Creates a Bloom filter from an IndexExtractor. 214 * 215 * @param indexExtractor the IndexExtractor to create the filter from. 216 * @return the BloomFilter. 217 */ 218 private BloomFilter createFilter(final IndexExtractor indexExtractor) { 219 final SimpleBloomFilter bf = new SimpleBloomFilter(shape); 220 bf.merge(indexExtractor); 221 return bf; 222 } 223 224 @Override 225 public int estimateN() { 226 return flatten().estimateN(); 227 } 228 229 @Override 230 public int estimateUnion(final BloomFilter other) { 231 Objects.requireNonNull(other, "other"); 232 final BloomFilter cpy = this.flatten(); 233 cpy.merge(other); 234 return cpy.estimateN(); 235 } 236 237 /** 238 * Finds the layers in which the BitMapExtractor is found. 239 * 240 * @param bitMapExtractor the BitMapExtractor to search for. 241 * @return an array of layer indices in which the Bloom filter is found. 242 */ 243 public int[] find(final BitMapExtractor bitMapExtractor) { 244 final SimpleBloomFilter bf = new SimpleBloomFilter(shape); 245 bf.merge(bitMapExtractor); 246 return find(bf); 247 } 248 249 /** 250 * Finds the layers in which the Bloom filter is found. 251 * 252 * @param bf the Bloom filter to search for. 253 * @return an array of layer indices in which the Bloom filter is found. 254 */ 255 public int[] find(final BloomFilter bf) { 256 final Finder finder = new Finder(bf); 257 processBloomFilters(finder); 258 return finder.getResult(); 259 } 260 261 /** 262 * Finds the layers in which the Hasher is found. 263 * 264 * @param hasher the Hasher to search for. 265 * @return an array of layer indices in which the Bloom filter is found. 266 */ 267 public int[] find(final Hasher hasher) { 268 final SimpleBloomFilter bf = new SimpleBloomFilter(shape); 269 bf.merge(hasher); 270 return find(bf); 271 } 272 273 /** 274 * Finds the layers in which the IndexExtractor is found. 275 * 276 * @param indexExtractor the Index extractor to search for. 277 * @return an array of layer indices in which the Bloom filter is found. 278 */ 279 public int[] find(final IndexExtractor indexExtractor) { 280 final SimpleBloomFilter bf = new SimpleBloomFilter(shape); 281 bf.merge(indexExtractor); 282 return find(bf); 283 } 284 285 /** 286 * Create a standard (non-layered) Bloom filter by merging all of the layers. If 287 * the filter is empty this method will return an empty Bloom filter. 288 * 289 * @return the merged bloom filter. 290 */ 291 @Override 292 public BloomFilter flatten() { 293 final BloomFilter bf = new SimpleBloomFilter(shape); 294 processBloomFilters(bf::merge); 295 return bf; 296 } 297 298 @Override 299 public boolean processBitMaps(final LongPredicate predicate) { 300 return flatten().processBitMaps(predicate); 301 } 302 303 /** 304 * Processes the Bloom filters in depth order with the most recent filters 305 * first. Each filter is passed to the predicate in turn. The function exits on 306 * the first {@code false} returned by the predicate. 307 * 308 * @param bloomFilterPredicate the predicate to execute. 309 * @return {@code true} if all filters passed the predicate, {@code false} 310 * otherwise. 311 */ 312 @Override 313 public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) { 314 return layerManager.processBloomFilters(bloomFilterPredicate); 315 } 316 317 @Override 318 public boolean processIndices(final IntPredicate predicate) { 319 return processBloomFilters(bf -> bf.processIndices(predicate)); 320 } 321 322 /** 323 * Gets the Bloom filter at the specified depth 324 * 325 * @param depth the depth of the filter to return. 326 * @return the Bloom filter at the specified depth. 327 * @throws NoSuchElementException if depth is not in the range [0,getDepth()) 328 */ 329 public T get(final int depth) { 330 return layerManager.get(depth); 331 } 332 333 /** 334 * Gets the depth of the deepest layer. The minimum value returned by this 335 * method is 1. 336 * 337 * @return the depth of the deepest layer. 338 */ 339 public final int getDepth() { 340 return layerManager.getDepth(); 341 } 342 343 @Override 344 public final Shape getShape() { 345 return shape; 346 } 347 348 @Override 349 public boolean isEmpty() { 350 return processBloomFilters(BloomFilter::isEmpty); 351 } 352 353 @Override 354 public boolean merge(final BitMapExtractor bitMapExtractor) { 355 return layerManager.getTarget().merge(bitMapExtractor); 356 } 357 358 @Override 359 public boolean merge(final BloomFilter bf) { 360 return layerManager.getTarget().merge(bf); 361 } 362 363 @Override 364 public boolean merge(final IndexExtractor indexExtractor) { 365 return layerManager.getTarget().merge(indexExtractor); 366 } 367 368 /** 369 * Forces and advance to the next layer. This method will clean-up the current 370 * layers and generate a new filter layer. In most cases is it unnecessary to 371 * call this method directly. 372 * 373 * @see LayerManager.Builder#setCleanup(java.util.function.Consumer) 374 * @see LayerManager.Builder#setExtendCheck(Predicate) 375 */ 376 public void next() { 377 layerManager.next(); 378 } 379 380}