View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNotEquals;
23  import static org.junit.jupiter.api.Assertions.assertThrows;
24  import static org.junit.jupiter.api.Assertions.assertTrue;
25  
26  import java.util.ArrayList;
27  import java.util.Arrays;
28  import java.util.BitSet;
29  import java.util.List;
30  
31  import org.junit.jupiter.api.Test;
32  
33  /**
34   * Test standard methods in the {@link BloomFilter} interface.
35   */
36  public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
37  
38      /**
39       * Testing class returns the value as the only value.
40       */
41      public static class BadHasher implements Hasher {
42  
43          IndexProducer producer;
44  
45          public BadHasher(final int value) {
46              this.producer = IndexProducer.fromIndexArray(value);
47          }
48  
49          @Override
50          public IndexProducer indices(final Shape shape) {
51              return producer;
52          }
53      }
54  
55      private void assertFailedIndexProducerConstructor(final Shape shape, final int[] values) {
56          final IndexProducer indices = IndexProducer.fromIndexArray(values);
57          assertThrows(IllegalArgumentException.class, () -> createFilter(shape, indices));
58      }
59  
60      private void assertIndexProducerMerge(final Shape shape, final int[] values, final int[] expected) {
61          final IndexProducer indices = IndexProducer.fromIndexArray(values);
62          final BloomFilter filter = createFilter(shape, indices);
63          final List<Integer> lst = new ArrayList<>();
64          filter.forEachIndex(x -> {
65              lst.add(x);
66              return true;
67          });
68          assertEquals(expected.length, lst.size());
69          for (final int value : expected) {
70              assertTrue(lst.contains(Integer.valueOf(value)), "Missing " + value);
71          }
72      }
73  
74      /**
75       * Create an empty version of the BloomFilter implementation we are testing.
76       *
77       * @param shape the shape of the filter.
78       * @return a BloomFilter implementation.
79       */
80      protected abstract T createEmptyFilter(Shape shape);
81  
82      /**
83       * Create the BloomFilter implementation we are testing.
84       *
85       * @param shape the shape of the filter.
86       * @param producer A BitMap producer to build the filter with.
87       * @return a BloomFilter implementation.
88       */
89      protected final T createFilter(final Shape shape, final BitMapProducer producer) {
90          final T bf = createEmptyFilter(shape);
91          bf.merge(producer);
92          return bf;
93      }
94  
95      /**
96       * Create the BloomFilter implementation we are testing.
97       *
98       * @param shape the shape of the filter.
99       * @param hasher the hasher to use to create the filter.
100      * @return a BloomFilter implementation.
101      */
102     protected final T createFilter(final Shape shape, final Hasher hasher) {
103         final T bf = createEmptyFilter(shape);
104         bf.merge(hasher);
105         return bf;
106     }
107 
108     /**
109      * Create the BloomFilter implementation we are testing.
110      *
111      * @param shape the shape of the filter.
112      * @param producer An Index producer to build the filter with.
113      * @return a BloomFilter implementation.
114      */
115     protected final T createFilter(final Shape shape, final IndexProducer producer) {
116         final T bf = createEmptyFilter(shape);
117         bf.merge(producer);
118         return bf;
119     }
120 
121     /**
122      * The shape of the Bloom filters for testing.
123      * <ul>
124      *  <li>Hash functions (k) = 17
125      *  <li>Number of bits (m) = 72
126      * </ul>
127      * @return the testing shape.
128      */
129     protected Shape getTestShape() {
130         return Shape.fromKM(17, 72);
131     }
132 
133     /**
134      * Tests that asBitMapArray works correctly.
135      */
136     @Test
137     public final void testAsBitMapArray() {
138 
139         // test when multiple long values are returned.
140         final IncrementingHasher hasher = new IncrementingHasher(63, 1);
141         final BloomFilter bf = createFilter(Shape.fromKM(2, 72), hasher);
142         final long[] lb = bf.asBitMapArray();
143         assertEquals(2, lb.length);
144         assertEquals(0x8000000000000000L, lb[0]);
145         assertEquals(0x1, lb[1]);
146     }
147 
148     @Test
149     public void testBitMapProducerSize() {
150         final int[] idx = new int[1];
151         createFilter(getTestShape(), TestingHashers.FROM1).forEachBitMap(i -> {
152             idx[0]++;
153             return true;
154         });
155         assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
156 
157         idx[0] = 0;
158         createEmptyFilter(getTestShape()).forEachBitMap(i -> {
159             idx[0]++;
160             return true;
161         });
162         assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
163     }
164 
165     @Test
166     public void testCardinalityAndIsEmpty() {
167         testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));
168     }
169 
170     /**
171      * Test cardinality and isEmpty. Bloom filter must be able to accept multiple
172      * IndexProducer merges until all the bits are populated.
173      *
174      * @param bf The Bloom filter to test.
175      */
176     protected void testCardinalityAndIsEmpty(BloomFilter bf) {
177         assertTrue(bf.isEmpty());
178         assertEquals(0, bf.cardinality());
179         for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
180             bf.merge(IndexProducer.fromIndexArray(i));
181             assertFalse(bf.isEmpty(), "Wrong value at " + i);
182             assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
183         }
184 
185         // check operations in reverse order
186         bf.clear();
187         assertEquals(0, bf.cardinality());
188         assertTrue(bf.isEmpty());
189         for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
190             bf.merge(IndexProducer.fromIndexArray(i));
191             assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
192             assertFalse(bf.isEmpty(), "Wrong value at " + i);
193         }
194     }
195     @Test
196     public void testClear() {
197         final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
198         assertNotEquals(0, bf1.cardinality());
199         bf1.clear();
200         assertEquals(0, bf1.cardinality());
201     }
202 
203     @Test
204     public final void testContains() {
205         BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
206         final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
207 
208         assertTrue(bf1.contains(bf1), "BF1 Should contain itself");
209         assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
210         assertFalse(bf1.contains(bf2), "BF1 should not contain BF2");
211         assertTrue(bf2.contains(bf1), "BF2 should contain BF1");
212 
213         assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher");
214         assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher");
215 
216         IndexProducer indexProducer = new IncrementingHasher(1, 1).indices(getTestShape());
217         assertTrue(bf2.contains(indexProducer), "BF2 Should contain this hasher");
218         indexProducer = new IncrementingHasher(1, 3).indices(getTestShape());
219         assertFalse(bf2.contains(indexProducer), "BF2 Should not contain this hasher");
220 
221         BitMapProducer bitMapProducer = BitMapProducer.fromIndexProducer(new IncrementingHasher(1, 1).indices(getTestShape()),
222                 getTestShape().getNumberOfBits());
223         assertTrue(bf2.contains(bitMapProducer), "BF2 Should contain this hasher");
224         bitMapProducer = BitMapProducer.fromIndexProducer(new IncrementingHasher(1, 3).indices(getTestShape()),
225                 getTestShape().getNumberOfBits());
226         assertFalse(bf2.contains(bitMapProducer), "BF2 Should not contain this hasher");
227 
228         // Test different lengths
229         bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
230         final BloomFilter bf3 = createFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1),
231                 TestingHashers.FROM1);
232         assertTrue(bf1.contains(bf3));
233         assertTrue(bf3.contains(bf1));
234 
235         final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)),
236                 1, 11+getTestShape().getNumberOfHashFunctions());
237 
238         assertFalse(bf1.contains(bf4));
239         assertTrue(bf4.contains(bf1));
240     }
241 
242     @Test
243     public void testEmptyAfterMergeWithNothing() {
244         // test the case where is empty after merge
245         // in this case the internal cardinality == -1
246         BloomFilter bf = createEmptyFilter(getTestShape());
247         bf.merge(IndexProducer.fromIndexArray());
248         assertTrue(bf.isEmpty());
249     }
250 
251     /**
252      * Tests that the estimated intersection calculations are correct.
253      */
254     @Test
255     public final void testEstimateIntersection() {
256 
257         final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
258         final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
259 
260         final BloomFilter bf3 = TestingHashers.populateEntireFilter(createEmptyFilter(getTestShape()));
261 
262         assertEquals(1, bf.estimateIntersection(bf2));
263         assertEquals(1, bf2.estimateIntersection(bf));
264         assertEquals(1, bf.estimateIntersection(bf3));
265         assertEquals(1, bf2.estimateIntersection(bf));
266         assertEquals(2, bf3.estimateIntersection(bf2));
267 
268         final BloomFilter bf4 = createEmptyFilter(getTestShape());
269 
270         assertEquals(0, bf.estimateIntersection(bf4));
271         assertEquals(0, bf4.estimateIntersection(bf));
272 
273         final int midPoint = getTestShape().getNumberOfBits() / 2;
274         final BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint);
275         final BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint+1, getTestShape().getNumberOfBits()-1);
276         assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6));
277 
278         // infinite with infinite
279         assertEquals(Integer.MAX_VALUE, bf3.estimateIntersection(bf3));
280     }
281 
282     /**
283      * Tests that the size estimate is correctly calculated.
284      */
285     @Test
286     public final void testEstimateN() {
287         // build a filter
288         BloomFilter filter1 = createFilter(getTestShape(), TestingHashers.FROM1);
289         assertEquals(1, filter1.estimateN());
290 
291         // the data provided above do not generate an estimate that is equivalent to the
292         // actual.
293         filter1.merge(new IncrementingHasher(4, 1));
294         assertEquals(1, filter1.estimateN());
295 
296         filter1.merge(new IncrementingHasher(17, 1));
297 
298         assertEquals(3, filter1.estimateN());
299 
300         filter1 = TestingHashers.populateEntireFilter(createEmptyFilter(getTestShape()));
301         assertEquals(Integer.MAX_VALUE, filter1.estimateN());
302     }
303 
304     /**
305      * Tests that the estimated union calculations are correct.
306      */
307     @Test
308     public final void testEstimateUnion() {
309         final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
310         final BloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
311 
312         assertEquals(2, bf.estimateUnion(bf2));
313         assertEquals(2, bf2.estimateUnion(bf));
314 
315         final BloomFilter bf3 = createEmptyFilter(getTestShape());
316 
317         assertEquals(1, bf.estimateUnion(bf3));
318         assertEquals(1, bf3.estimateUnion(bf));
319     }
320 
321     @Test
322     public void testIndexProducerMerge() {
323         final Shape shape = Shape.fromKM(5, 10);
324 
325         assertIndexProducerMerge(shape, new int[] {0, 2, 4, 6, 8}, new int[] {0, 2, 4, 6, 8});
326         // test duplicate values
327         assertIndexProducerMerge(shape, new int[] {0, 2, 4, 2, 8}, new int[] {0, 2, 4, 8});
328         // test negative values
329         assertFailedIndexProducerConstructor(shape, new int[] {0, 2, 4, -2, 8});
330         // test index too large
331         assertFailedIndexProducerConstructor(shape, new int[] {0, 2, 4, 12, 8});
332         // test no indices
333         assertIndexProducerMerge(shape, new int[0], new int[0]);
334     }
335 
336     /**
337      * Tests that isFull() returns the proper values.
338      */
339     @Test
340     public final void testIsFull() {
341 
342         // create empty filter
343         BloomFilter filter = createEmptyFilter(getTestShape());
344         assertFalse(filter.isFull(), "Should not be full");
345 
346         filter = TestingHashers.populateEntireFilter(filter);
347         assertTrue(filter.isFull(), "Should be full");
348 
349         filter = createFilter(getTestShape(), new IncrementingHasher(1, 3));
350         assertFalse(filter.isFull(), "Should not be full");
351     }
352 
353     /**
354      * Tests that merging bloom filters works as expected with a generic BloomFilter.
355      */
356     @Test
357     public final void testMerge() {
358 
359         final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
360         final BloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
361         final BloomFilter bf3 = bf1.copy();
362         bf3.merge(bf2);
363 
364         // test with BloomFilter
365 
366         final long[] bf1Val = bf1.asBitMapArray();
367         final long[] bf2Val = bf2.asBitMapArray();
368         for (int i = 0; i < bf1Val.length; i++) {
369             bf1Val[i] |= bf2Val[i];
370         }
371         bf1.merge(bf2);
372 
373         final long[] bf1New = bf1.asBitMapArray();
374         for (int i = 0; i < bf1Val.length; i++) {
375             assertEquals(bf1Val[i], bf1New[i], "Bad value at " + i);
376         }
377 
378         assertTrue(bf1.contains(bf2), "Should contain bf2");
379         assertTrue(bf1.contains(bf3), "Should contain bf3");
380 
381         // test with hasher
382 
383         final BloomFilter bf4 = createFilter(getTestShape(), TestingHashers.FROM1);
384         bf4.merge(TestingHashers.FROM11);
385 
386         assertTrue(bf4.contains(bf2), "Should contain Bf2");
387         assertTrue(bf4.contains(bf3), "Should contain Bf3");
388 
389         // test with hasher returning numbers out of range
390         assertThrows(IllegalArgumentException.class,
391                 () -> bf1.merge(new BadHasher(bf1.getShape().getNumberOfBits())));
392         assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
393 
394         // test error when bloom filter returns values out of range
395         final Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3);
396         final Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1);
397         final BloomFilter bf5 = new SimpleBloomFilter(s);
398         bf5.merge(h);
399         assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
400 
401         final BloomFilter bf6 = new SparseBloomFilter(s);
402         bf6.merge(h);
403         assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
404     }
405 
406     @Test
407     public void testMergeWithBadHasher() {
408         // value too large
409         final BloomFilter f = createEmptyFilter(getTestShape());
410         assertThrows(IllegalArgumentException.class,
411                 () -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
412         // negative value
413         final BloomFilter f2 = createEmptyFilter(getTestShape());
414         assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
415     }
416 
417     @Test
418     public void testMergeWithBitMapProducer() {
419         final int bitMapCount = BitMap.numberOfBitMaps(getTestShape().getNumberOfBits());
420         for (int i = 0; i < 5; i++) {
421             final long[] values = new long[bitMapCount];
422             for (final int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
423                 BitMap.set(values, idx);
424             }
425             final BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
426             final List<Long> lst = new ArrayList<>();
427             for (final long l : values) {
428                 lst.add(l);
429             }
430             assertTrue(f.forEachBitMap(l -> lst.remove(Long.valueOf(l))));
431             assertTrue(lst.isEmpty());
432         }
433         // values too large
434         final long[] values = new long[bitMapCount];
435         Arrays.fill(values, Long.MAX_VALUE);
436         final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(values);
437         final BloomFilter bf = createEmptyFilter(getTestShape());
438         assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
439 
440         // test where merged bits exceed expected bits but both bitmaps are the same length.
441         final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
442         final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
443         assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2));
444     }
445 
446     @Test
447     public void testMergeWithHasher() {
448         for (int i = 0; i < 5; i++) {
449             final BloomFilter f = createEmptyFilter(getTestShape());
450             final int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
451             final Hasher hasher = new ArrayHasher(expected);
452             f.merge(hasher);
453             // create sorted unique array of expected values
454             assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray());
455         }
456     }
457 
458     @Test
459     public void testMergeWithIndexProducer() {
460         for (int i = 0; i < 5; i++) {
461             final int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
462             final BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
463             final BitSet uniqueValues = DefaultIndexProducerTest.uniqueSet(values);
464             assertTrue(f.forEachIndex(idx -> {
465                 final boolean result = uniqueValues.get(idx);
466                 uniqueValues.clear(idx);
467                 return result;
468             }));
469             assertTrue(uniqueValues.isEmpty());
470         }
471         // value to large
472         final BloomFilter f1 = createEmptyFilter(getTestShape());
473         assertThrows(IllegalArgumentException.class,
474                 () -> f1.merge(IndexProducer.fromIndexArray(getTestShape().getNumberOfBits())));
475         // negative value
476         final BloomFilter f2 = createEmptyFilter(getTestShape());
477         assertThrows(IllegalArgumentException.class,
478                 () -> f2.merge(IndexProducer.fromIndexArray(-1)));
479     }
480 
481     @Test
482     public final void testNegativeIntersection() {
483         final IndexProducer p1 = IndexProducer.fromIndexArray(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20, 26, 28, 30, 32, 34, 35, 36, 37, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71);
484         final IndexProducer p2 = IndexProducer.fromIndexArray(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
485 
486         final BloomFilter filter1 = createEmptyFilter(Shape.fromKM(17, 72));
487         filter1.merge(p1);
488         final BloomFilter filter2 = createEmptyFilter(Shape.fromKM(17, 72));
489         filter2.merge(p2);
490         assertEquals(0, filter1.estimateIntersection(filter2));
491     }
492 }