1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.bloomfilter;
18
19 import java.util.Arrays;
20 import java.util.NoSuchElementException;
21 import java.util.Objects;
22 import java.util.function.IntPredicate;
23 import java.util.function.LongPredicate;
24 import java.util.function.Predicate;
25
26 /**
27 * Layered Bloom filters are described in Zhiwang, Cen; Jungang, Xu; Jian, Sun (2010), "A multi-layer Bloom filter for duplicated URL detection", Proc. 3rd
28 * International Conference on Advanced Computer Theory and Engineering (ICACTE 2010), vol. 1, pp. V1-586-V1-591, doi:10.1109/ICACTE.2010.5578947, ISBN
29 * 978-1-4244-6539-2, S2CID 3108985
30 * <p>
31 * In short, Layered Bloom filter contains several bloom filters arranged in layers.
32 * </p>
33 * <ul>
34 * <li>When membership in the filter is checked each layer in turn is checked and if a match is found {@code true} is returned.</li>
35 * <li>When merging each bloom filter is merged into the newest filter in the list of layers.</li>
36 * <li>When questions of cardinality are asked the cardinality of the union of the enclosed Bloom filters is used.</li>
37 * </ul>
38 * <p>
39 * The net result is that the layered Bloom filter can be populated with more items than the Shape would indicate and yet still return a false positive rate in
40 * line with the Shape and not the over population.
41 * </p>
42 * <p>
43 * This implementation uses a LayerManager to handle the manipulation of the layers.
44 * </p>
45 * <ul>
46 * <li>Level 0 is the oldest layer and the highest level is the newest.</li>
47 * <li>There is always at least one enclosed filter.</li>
48 * <li>The newest filter is the {@code target} into which merges are performed.
49 * <li>Whenever the target is retrieved, or a {@code merge} operation is performed the code checks if any older layers should be removed, and if so removes
50 * them. It also checks it a new layer should be added, and if so adds it and sets the {@code target} before the operation.</li>
51 * </ul>
52 *
53 * @param <T> The type of Bloom Filter that is used for the layers.
54 * @since 4.5.0-M2
55 */
56 public class LayeredBloomFilter<T extends BloomFilter<T>> implements BloomFilter<LayeredBloomFilter<T>>, BloomFilterExtractor {
57
58 /**
59 * A class used to locate matching filters across all the layers.
60 */
61 private class Finder implements Predicate<BloomFilter> {
62 int[] result = new int[layerManager.getDepth()];
63 int bfIdx;
64 int resultIdx;
65 BloomFilter<?> bf;
66
67 Finder(final BloomFilter<?> bf) {
68 this.bf = bf;
69 }
70
71 int[] getResult() {
72 return Arrays.copyOf(result, resultIdx);
73 }
74
75 @Override
76 public boolean test(final BloomFilter x) {
77 if (x.contains(bf)) {
78 result[resultIdx++] = bfIdx;
79 }
80 bfIdx++;
81 return true;
82 }
83 }
84
85 private final Shape shape;
86
87 private final LayerManager<T> layerManager;
88
89 /**
90 * Constructs a new instance.
91 *
92 * @param shape the Shape of the enclosed Bloom filters
93 * @param layerManager the LayerManager to manage the layers.
94 */
95 public LayeredBloomFilter(final Shape shape, final LayerManager<T> layerManager) {
96 this.shape = shape;
97 this.layerManager = layerManager;
98 }
99
100 @Override
101 public int cardinality() {
102 return SetOperations.cardinality(this);
103 }
104
105 @Override
106 public int characteristics() {
107 return 0;
108 }
109
110 /**
111 * Forces the execution of the cleanup Consumer that was provided when the associated LayerManager was built.
112 *
113 * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
114 */
115 public void cleanup() {
116 layerManager.cleanup();
117 }
118
119 @Override
120 public final void clear() {
121 layerManager.clear();
122 }
123
124 @Override
125 public boolean contains(final BitMapExtractor bitMapExtractor) {
126 return contains(createFilter(bitMapExtractor));
127 }
128
129 /**
130 * Returns {@code true} if this any layer contained by this filter contains the specified filter.
131 * <p>
132 * If the {@code other} is a BloomFilterExtractor each filter within the {@code other} is checked to see if it exits within this filter.
133 * </p>
134 *
135 * @param other the other Bloom filter
136 * @return {@code true} if this filter contains the other filter.
137 */
138 @Override
139 public boolean contains(final BloomFilter other) {
140 return other instanceof BloomFilterExtractor ? contains((BloomFilterExtractor) other) : !processBloomFilters(x -> !x.contains(other));
141 }
142
143 /**
144 * Returns {@code true} if each filter within the {@code bloomFilterExtractor} exits within this filter.
145 *
146 * @param bloomFilterExtractor the BloomFilterExtractor that provides the filters to check for.
147 * @return {@code true} if this filter contains all of the filters contained in the {@code bloomFilterExtractor}.
148 */
149 public boolean contains(final BloomFilterExtractor bloomFilterExtractor) {
150 final boolean[] result = { true };
151 // return false when we have found a match to short circuit checks
152 return bloomFilterExtractor.processBloomFilters(x -> {
153 result[0] &= contains(x);
154 return result[0];
155 });
156 }
157
158 @Override
159 public boolean contains(final Hasher hasher) {
160 return contains(createFilter(hasher));
161 }
162
163 @Override
164 public boolean contains(final IndexExtractor indexExtractor) {
165 return contains(createFilter(indexExtractor));
166 }
167
168 /**
169 * Creates a new instance of this {@link LayeredBloomFilter} with the same properties as the current one.
170 *
171 * @return a copy of this {@link LayeredBloomFilter}.
172 */
173 @Override
174 public LayeredBloomFilter<T> copy() {
175 return new LayeredBloomFilter<>(shape, layerManager.copy());
176 }
177
178 /**
179 * Creates a Bloom filter from a BitMapExtractor.
180 *
181 * @param bitMapExtractor the BitMapExtractor to create the filter from.
182 * @return the BloomFilter.
183 */
184 private SimpleBloomFilter createFilter(final BitMapExtractor bitMapExtractor) {
185 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
186 bf.merge(bitMapExtractor);
187 return bf;
188 }
189
190 /**
191 * Creates a Bloom filter from a Hasher.
192 *
193 * @param hasher the hasher to create the filter from.
194 * @return the BloomFilter.
195 */
196 private SimpleBloomFilter createFilter(final Hasher hasher) {
197 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
198 bf.merge(hasher);
199 return bf;
200 }
201
202 /**
203 * Creates a Bloom filter from an IndexExtractor.
204 *
205 * @param indexExtractor the IndexExtractor to create the filter from.
206 * @return the BloomFilter.
207 */
208 private SimpleBloomFilter createFilter(final IndexExtractor indexExtractor) {
209 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
210 bf.merge(indexExtractor);
211 return bf;
212 }
213
214 @Override
215 public int estimateN() {
216 return flatten().estimateN();
217 }
218
219 @Override
220 public int estimateUnion(final BloomFilter other) {
221 Objects.requireNonNull(other, "other");
222 final BloomFilter cpy = this.flatten();
223 cpy.merge(other);
224 return cpy.estimateN();
225 }
226
227 /**
228 * Finds the layers in which the BitMapExtractor is found.
229 *
230 * @param bitMapExtractor the BitMapExtractor to search for.
231 * @return an array of layer indices in which the Bloom filter is found.
232 */
233 public int[] find(final BitMapExtractor bitMapExtractor) {
234 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
235 bf.merge(bitMapExtractor);
236 return find(bf);
237 }
238
239 /**
240 * Finds the layers in which the Bloom filter is found.
241 *
242 * @param bf the Bloom filter to search for.
243 * @return an array of layer indices in which the Bloom filter is found.
244 */
245 public int[] find(final BloomFilter bf) {
246 final Finder finder = new Finder(bf);
247 processBloomFilters(finder);
248 return finder.getResult();
249 }
250
251 /**
252 * Finds the layers in which the Hasher is found.
253 *
254 * @param hasher the Hasher to search for.
255 * @return an array of layer indices in which the Bloom filter is found.
256 */
257 public int[] find(final Hasher hasher) {
258 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
259 bf.merge(hasher);
260 return find(bf);
261 }
262
263 /**
264 * Finds the layers in which the IndexExtractor is found.
265 *
266 * @param indexExtractor the Index extractor to search for.
267 * @return an array of layer indices in which the Bloom filter is found.
268 */
269 public int[] find(final IndexExtractor indexExtractor) {
270 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
271 bf.merge(indexExtractor);
272 return find(bf);
273 }
274
275 /**
276 * Create a standard (non-layered) Bloom filter by merging all of the layers. If the filter is empty this method will return an empty Bloom filter.
277 *
278 * @return the merged bloom filter.
279 */
280 @Override
281 public SimpleBloomFilter flatten() {
282 final SimpleBloomFilter bf = new SimpleBloomFilter(shape);
283 processBloomFilters(bf::merge);
284 return bf;
285 }
286
287 /**
288 * Gets the Bloom filter at the specified depth
289 *
290 * @param depth the depth of the filter to return.
291 * @return the Bloom filter at the specified depth.
292 * @throws NoSuchElementException if depth is not in the range [0,getDepth())
293 */
294 public T get(final int depth) {
295 return layerManager.get(depth);
296 }
297
298 /**
299 * Gets the depth of the deepest layer. The minimum value returned by this method is 1.
300 *
301 * @return the depth of the deepest layer.
302 */
303 public final int getDepth() {
304 return layerManager.getDepth();
305 }
306
307 @Override
308 public final Shape getShape() {
309 return shape;
310 }
311
312 @Override
313 public boolean isEmpty() {
314 return processBloomFilters(BloomFilter::isEmpty);
315 }
316
317 @Override
318 public boolean merge(final BitMapExtractor bitMapExtractor) {
319 return layerManager.getTarget().merge(bitMapExtractor);
320 }
321
322 @Override
323 public boolean merge(final BloomFilter bf) {
324 return layerManager.getTarget().merge(bf);
325 }
326
327 @Override
328 public boolean merge(final IndexExtractor indexExtractor) {
329 return layerManager.getTarget().merge(indexExtractor);
330 }
331
332 /**
333 * Forces and advance to the next layer. This method will clean-up the current layers and generate a new filter layer. In most cases is it unnecessary to
334 * call this method directly.
335 *
336 * @see LayerManager.Builder#setCleanup(java.util.function.Consumer)
337 * @see LayerManager.Builder#setExtendCheck(Predicate)
338 */
339 public void next() {
340 layerManager.next();
341 }
342
343 @Override
344 public boolean processBitMaps(final LongPredicate predicate) {
345 return flatten().processBitMaps(predicate);
346 }
347
348 /**
349 * Processes the Bloom filters in depth order with the most recent filters first. Each filter is passed to the predicate in turn. The function exits on the
350 * first {@code false} returned by the predicate.
351 *
352 * @param bloomFilterPredicate the predicate to execute.
353 * @return {@code true} if all filters passed the predicate, {@code false} otherwise.
354 */
355 @Override
356 public final boolean processBloomFilters(final Predicate<BloomFilter> bloomFilterPredicate) {
357 return layerManager.processBloomFilters(bloomFilterPredicate);
358 }
359
360 @Override
361 public boolean processIndices(final IntPredicate predicate) {
362 return processBloomFilters(bf -> bf.processIndices(predicate));
363 }
364
365 }