View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNotEquals;
23  import static org.junit.jupiter.api.Assertions.assertNotSame;
24  import static org.junit.jupiter.api.Assertions.assertThrows;
25  import static org.junit.jupiter.api.Assertions.assertTrue;
26  
27  import java.util.ArrayList;
28  import java.util.Arrays;
29  import java.util.BitSet;
30  import java.util.List;
31  
32  import org.junit.jupiter.api.Test;
33  
34  /**
35   * Tests standard methods in the {@link BloomFilter} interface.
36   */
37  public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
38  
39      /**
40       * Test fixture class returns the value as the only value.
41       */
42      public static class BadHasher implements Hasher {
43  
44          IndexExtractor extractor;
45  
46          public BadHasher(final int value) {
47              this.extractor = IndexExtractor.fromIndexArray(value);
48          }
49  
50          @Override
51          public IndexExtractor indices(final Shape shape) {
52              return extractor;
53          }
54      }
55  
56      private void assertFailedIndexExtractorConstructor(final Shape shape, final int[] values) {
57          final IndexExtractor indices = IndexExtractor.fromIndexArray(values);
58          assertThrows(IllegalArgumentException.class, () -> createFilter(shape, indices));
59      }
60  
61      private void assertIndexExtractorMerge(final Shape shape, final int[] values, final int[] expected) {
62          final IndexExtractor indices = IndexExtractor.fromIndexArray(values);
63          final BloomFilter filter = createFilter(shape, indices);
64          final List<Integer> lst = new ArrayList<>();
65          filter.processIndices(x -> {
66              lst.add(x);
67              return true;
68          });
69          assertEquals(expected.length, lst.size());
70          for (final int value : expected) {
71              assertTrue(lst.contains(Integer.valueOf(value)), "Missing " + value);
72          }
73      }
74  
75      /**
76       * Creates an empty version of the BloomFilter implementation we are testing.
77       *
78       * @param shape the shape of the filter.
79       * @return a BloomFilter implementation.
80       */
81      protected abstract T createEmptyFilter(Shape shape);
82  
83      /**
84       * Creates the BloomFilter implementation we are testing.
85       *
86       * @param shape the shape of the filter.
87       * @param extractor A BitMap extractor to build the filter with.
88       * @return a BloomFilter implementation.
89       */
90      protected final T createFilter(final Shape shape, final BitMapExtractor extractor) {
91          final T bf = createEmptyFilter(shape);
92          bf.merge(extractor);
93          return bf;
94      }
95  
96      /**
97       * Creates the BloomFilter implementation we are testing.
98       *
99       * @param shape the shape of the filter.
100      * @param hasher the hasher to use to create the filter.
101      * @return a BloomFilter implementation.
102      */
103     protected final T createFilter(final Shape shape, final Hasher hasher) {
104         final T bf = createEmptyFilter(shape);
105         bf.merge(hasher);
106         return bf;
107     }
108 
109     /**
110      * Creates the BloomFilter implementation we are testing.
111      *
112      * @param shape the shape of the filter.
113      * @param extractor An Index extractor to build the filter with.
114      * @return a BloomFilter implementation.
115      */
116     protected final T createFilter(final Shape shape, final IndexExtractor extractor) {
117         final T bf = createEmptyFilter(shape);
118         bf.merge(extractor);
119         return bf;
120     }
121 
122     /**
123      * The shape of the Bloom filters for testing.
124      * <ul>
125      *  <li>Hash functions (k) = 17
126      *  <li>Number of bits (m) = 72
127      * </ul>
128      * @return the testing shape.
129      */
130     protected Shape getTestShape() {
131         return Shape.fromKM(17, 72);
132     }
133 
134     /**
135      * Tests that asBitMapArray works correctly.
136      */
137     @Test
138     public final void testAsBitMapArray() {
139 
140         // test when multiple long values are returned.
141         final IncrementingHasher hasher = new IncrementingHasher(63, 1);
142         final BloomFilter bf = createFilter(Shape.fromKM(2, 72), hasher);
143         final long[] lb = bf.asBitMapArray();
144         assertEquals(2, lb.length);
145         assertEquals(0x8000000000000000L, lb[0]);
146         assertEquals(0x1, lb[1]);
147     }
148 
149     @Test
150     public void testBitMapExtractorSize() {
151         final int[] idx = new int[1];
152         createFilter(getTestShape(), TestingHashers.FROM1).processBitMaps(i -> {
153             idx[0]++;
154             return true;
155         });
156         assertEquals(BitMaps.numberOfBitMaps(getTestShape()), idx[0]);
157 
158         idx[0] = 0;
159         createEmptyFilter(getTestShape()).processBitMaps(i -> {
160             idx[0]++;
161             return true;
162         });
163         assertEquals(BitMaps.numberOfBitMaps(getTestShape()), idx[0]);
164     }
165 
166     @Test
167     public void testCardinalityAndIsEmpty() {
168         testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));
169     }
170 
171     /**
172      * Tests cardinality and isEmpty. Bloom filter must be able to accept multiple
173      * IndexExtractor merges until all the bits are populated.
174      *
175      * @param bf The Bloom filter to test.
176      */
177     protected void testCardinalityAndIsEmpty(final BloomFilter bf) {
178         assertTrue(bf.isEmpty());
179         assertEquals(0, bf.cardinality());
180         for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
181             bf.merge(IndexExtractor.fromIndexArray(i));
182             assertFalse(bf.isEmpty(), "Wrong value at " + i);
183             assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
184         }
185 
186         // check operations in reverse order
187         bf.clear();
188         assertEquals(0, bf.cardinality());
189         assertTrue(bf.isEmpty());
190         for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
191             bf.merge(IndexExtractor.fromIndexArray(i));
192             assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
193             assertFalse(bf.isEmpty(), "Wrong value at " + i);
194         }
195     }
196 
197     @Test
198     public void testClear() {
199         final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
200         assertNotEquals(0, bf1.cardinality());
201         bf1.clear();
202         assertEquals(0, bf1.cardinality());
203     }
204 
205     @Test
206     public final void testContains() {
207         BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
208         final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
209 
210         assertTrue(bf1.contains(bf1), "BF1 Should contain itself");
211         assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
212         assertFalse(bf1.contains(bf2), "BF1 should not contain BF2");
213         assertTrue(bf2.contains(bf1), "BF2 should contain BF1");
214 
215         assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher");
216         assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher");
217 
218         IndexExtractor indexExtractor = new IncrementingHasher(1, 1).indices(getTestShape());
219         assertTrue(bf2.contains(indexExtractor), "BF2 Should contain this hasher");
220         indexExtractor = new IncrementingHasher(1, 3).indices(getTestShape());
221         assertFalse(bf2.contains(indexExtractor), "BF2 Should not contain this hasher");
222 
223         BitMapExtractor bitMapExtractor = BitMapExtractor.fromIndexExtractor(new IncrementingHasher(1, 1).indices(getTestShape()),
224                 getTestShape().getNumberOfBits());
225         assertTrue(bf2.contains(bitMapExtractor), "BF2 Should contain this hasher");
226         bitMapExtractor = BitMapExtractor.fromIndexExtractor(new IncrementingHasher(1, 3).indices(getTestShape()), getTestShape().getNumberOfBits());
227         assertFalse(bf2.contains(bitMapExtractor), "BF2 Should not contain this hasher");
228 
229         // Test different lengths
230         bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
231         final BloomFilter bf3 = createFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1), TestingHashers.FROM1);
232         assertTrue(bf1.contains(bf3));
233         assertTrue(bf3.contains(bf1));
234 
235         final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)), 1,
236                 11 + getTestShape().getNumberOfHashFunctions());
237 
238         assertFalse(bf1.contains(bf4));
239         assertTrue(bf4.contains(bf1));
240     }
241 
242     @Test
243     public void testCopy() {
244         testCopy(true);
245     }
246 
247     protected void testCopy(final boolean assertClass) {
248         final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
249         assertNotEquals(0, bf1.cardinality());
250         final BloomFilter copy = bf1.copy();
251         assertNotSame(bf1, copy);
252         assertArrayEquals(bf1.asBitMapArray(), copy.asBitMapArray());
253         assertArrayEquals(bf1.asIndexArray(), copy.asIndexArray());
254         assertEquals(bf1.cardinality(), copy.cardinality());
255         assertEquals(bf1.characteristics(), copy.characteristics());
256         assertEquals(bf1.estimateN(), copy.estimateN());
257         if (assertClass) {
258             assertEquals(bf1.getClass(), copy.getClass());
259         }
260         assertEquals(bf1.getShape(), copy.getShape());
261         assertEquals(bf1.isEmpty(), copy.isEmpty());
262         assertEquals(bf1.isFull(), copy.isFull());
263     }
264 
265     @Test
266     public void testEmptyAfterMergeWithNothing() {
267         // test the case where is empty after merge
268         // in this case the internal cardinality == -1
269         final BloomFilter bf = createEmptyFilter(getTestShape());
270         bf.merge(IndexExtractor.fromIndexArray());
271         assertTrue(bf.isEmpty());
272     }
273 
274     /**
275      * Tests that the estimated intersection calculations are correct.
276      */
277     @Test
278     public final void testEstimateIntersection() {
279         final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
280         final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
281 
282         final BloomFilter bf3 = TestingHashers.populateEntireFilter(createEmptyFilter(getTestShape()));
283 
284         assertEquals(1, bf.estimateIntersection(bf2));
285         assertEquals(1, bf2.estimateIntersection(bf));
286         assertEquals(1, bf.estimateIntersection(bf3));
287         assertEquals(1, bf2.estimateIntersection(bf));
288         assertEquals(2, bf3.estimateIntersection(bf2));
289 
290         final BloomFilter bf4 = createEmptyFilter(getTestShape());
291 
292         assertEquals(0, bf.estimateIntersection(bf4));
293         assertEquals(0, bf4.estimateIntersection(bf));
294 
295         final int midPoint = getTestShape().getNumberOfBits() / 2;
296         final BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint);
297         final BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint + 1, getTestShape().getNumberOfBits() - 1);
298         assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6));
299 
300         // infinite with infinite
301         assertEquals(Integer.MAX_VALUE, bf3.estimateIntersection(bf3));
302     }
303 
304     /**
305      * Tests that the size estimate is correctly calculated.
306      */
307     @Test
308     public final void testEstimateN() {
309         // build a filter
310         BloomFilter filter1 = createFilter(getTestShape(), TestingHashers.FROM1);
311         assertEquals(1, filter1.estimateN());
312 
313         // the data provided above do not generate an estimate that is equivalent to the
314         // actual.
315         filter1.merge(new IncrementingHasher(4, 1));
316         assertEquals(1, filter1.estimateN());
317 
318         filter1.merge(new IncrementingHasher(17, 1));
319 
320         assertEquals(3, filter1.estimateN());
321 
322         filter1 = TestingHashers.populateEntireFilter(createEmptyFilter(getTestShape()));
323         assertEquals(Integer.MAX_VALUE, filter1.estimateN());
324     }
325 
326     /**
327      * Tests that the estimated union calculations are correct.
328      */
329     @Test
330     public final void testEstimateUnion() {
331         final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
332         final BloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
333 
334         assertEquals(2, bf.estimateUnion(bf2));
335         assertEquals(2, bf2.estimateUnion(bf));
336 
337         final BloomFilter bf3 = createEmptyFilter(getTestShape());
338 
339         assertEquals(1, bf.estimateUnion(bf3));
340         assertEquals(1, bf3.estimateUnion(bf));
341     }
342 
343     @Test
344     public void testIndexExtractorMerge() {
345         final Shape shape = Shape.fromKM(5, 10);
346 
347         assertIndexExtractorMerge(shape, new int[] {0, 2, 4, 6, 8}, new int[] {0, 2, 4, 6, 8});
348         // test duplicate values
349         assertIndexExtractorMerge(shape, new int[] {0, 2, 4, 2, 8}, new int[] {0, 2, 4, 8});
350         // test negative values
351         assertFailedIndexExtractorConstructor(shape, new int[] {0, 2, 4, -2, 8});
352         // test index too large
353         assertFailedIndexExtractorConstructor(shape, new int[] {0, 2, 4, 12, 8});
354         // test no indices
355         assertIndexExtractorMerge(shape, new int[0], new int[0]);
356     }
357 
358     /**
359      * Tests that isFull() returns the proper values.
360      */
361     @Test
362     public final void testIsFull() {
363 
364         // create empty filter
365         BloomFilter filter = createEmptyFilter(getTestShape());
366         assertFalse(filter.isFull(), "Should not be full");
367 
368         filter = TestingHashers.populateEntireFilter(filter);
369         assertTrue(filter.isFull(), "Should be full");
370 
371         filter = createFilter(getTestShape(), new IncrementingHasher(1, 3));
372         assertFalse(filter.isFull(), "Should not be full");
373     }
374 
375     /**
376      * Tests that merging bloom filters works as expected with a generic BloomFilter.
377      */
378     @Test
379     public final void testMerge() {
380 
381         final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
382         final BloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
383         final BloomFilter bf3 = bf1.copy();
384         bf3.merge(bf2);
385 
386         // test with BloomFilter
387 
388         final long[] bf1Val = bf1.asBitMapArray();
389         final long[] bf2Val = bf2.asBitMapArray();
390         for (int i = 0; i < bf1Val.length; i++) {
391             bf1Val[i] |= bf2Val[i];
392         }
393         bf1.merge(bf2);
394 
395         final long[] bf1New = bf1.asBitMapArray();
396         for (int i = 0; i < bf1Val.length; i++) {
397             assertEquals(bf1Val[i], bf1New[i], "Bad value at " + i);
398         }
399 
400         assertTrue(bf1.contains(bf2), "Should contain bf2");
401         assertTrue(bf1.contains(bf3), "Should contain bf3");
402 
403         // test with hasher
404 
405         final BloomFilter bf4 = createFilter(getTestShape(), TestingHashers.FROM1);
406         bf4.merge(TestingHashers.FROM11);
407 
408         assertTrue(bf4.contains(bf2), "Should contain Bf2");
409         assertTrue(bf4.contains(bf3), "Should contain Bf3");
410 
411         // test with hasher returning numbers out of range
412         assertThrows(IllegalArgumentException.class,
413                 () -> bf1.merge(new BadHasher(bf1.getShape().getNumberOfBits())));
414         assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
415 
416         // test error when bloom filter returns values out of range
417         final Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3);
418         final Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1);
419         final BloomFilter bf5 = new SimpleBloomFilter(s);
420         bf5.merge(h);
421         assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
422 
423         final BloomFilter bf6 = new SparseBloomFilter(s);
424         bf6.merge(h);
425         assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
426     }
427 
428     @Test
429     public void testMergeWithBadHasher() {
430         // value too large
431         final BloomFilter f = createEmptyFilter(getTestShape());
432         assertThrows(IllegalArgumentException.class,
433                 () -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
434         // negative value
435         final BloomFilter f2 = createEmptyFilter(getTestShape());
436         assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
437     }
438 
439     @Test
440     public void testMergeWithBitMapExtractor() {
441         final int bitMapCount = BitMaps.numberOfBitMaps(getTestShape());
442         for (int i = 0; i < 5; i++) {
443             final long[] values = new long[bitMapCount];
444             for (final int idx : DefaultIndexExtractorTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
445                 BitMaps.set(values, idx);
446             }
447             final BloomFilter f = createFilter(getTestShape(), BitMapExtractor.fromBitMapArray(values));
448             final List<Long> lst = new ArrayList<>();
449             for (final long l : values) {
450                 lst.add(l);
451             }
452             assertTrue(f.processBitMaps(l -> lst.remove(Long.valueOf(l))));
453             assertTrue(lst.isEmpty());
454         }
455         // values too large
456         final long[] values = new long[bitMapCount];
457         Arrays.fill(values, Long.MAX_VALUE);
458         final BitMapExtractor badExtractor = BitMapExtractor.fromBitMapArray(values);
459         final BloomFilter bf = createEmptyFilter(getTestShape());
460         assertThrows(IllegalArgumentException.class, () -> bf.merge(badExtractor));
461 
462         // test where merged bits exceed expected bits but both bitmaps are the same length.
463         final BitMapExtractor badExtractor2 = BitMapExtractor.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
464         final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
465         assertThrows(IllegalArgumentException.class, () -> bf2.merge(badExtractor2));
466     }
467 
468     @Test
469     public void testMergeWithHasher() {
470         for (int i = 0; i < 5; i++) {
471             final BloomFilter f = createEmptyFilter(getTestShape());
472             final int[] expected = DefaultIndexExtractorTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
473             final Hasher hasher = new ArrayHasher(expected);
474             f.merge(hasher);
475             // create sorted unique array of expected values
476             assertArrayEquals(DefaultIndexExtractorTest.unique(expected), f.asIndexArray());
477         }
478     }
479 
480     @Test
481     public void testMergeWithIndexExtractor() {
482         for (int i = 0; i < 5; i++) {
483             final int[] values = DefaultIndexExtractorTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
484             final BloomFilter f = createFilter(getTestShape(), IndexExtractor.fromIndexArray(values));
485             final BitSet uniqueValues = DefaultIndexExtractorTest.uniqueSet(values);
486             assertTrue(f.processIndices(idx -> {
487                 final boolean result = uniqueValues.get(idx);
488                 uniqueValues.clear(idx);
489                 return result;
490             }));
491             assertTrue(uniqueValues.isEmpty());
492         }
493         // value to large
494         final BloomFilter f1 = createEmptyFilter(getTestShape());
495         assertThrows(IllegalArgumentException.class,
496                 () -> f1.merge(IndexExtractor.fromIndexArray(getTestShape().getNumberOfBits())));
497         // negative value
498         final BloomFilter f2 = createEmptyFilter(getTestShape());
499         assertThrows(IllegalArgumentException.class,
500                 () -> f2.merge(IndexExtractor.fromIndexArray(-1)));
501     }
502 
503     @Test
504     public final void testNegativeIntersection() {
505         final IndexExtractor p1 = IndexExtractor.fromIndexArray(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20, 26, 28, 30, 32, 34, 35, 36, 37, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71);
506         final IndexExtractor p2 = IndexExtractor.fromIndexArray(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
507 
508         final BloomFilter filter1 = createEmptyFilter(Shape.fromKM(17, 72));
509         filter1.merge(p1);
510         final BloomFilter filter2 = createEmptyFilter(Shape.fromKM(17, 72));
511         filter2.merge(p2);
512         assertEquals(0, filter1.estimateIntersection(filter2));
513     }
514 }