View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertThrows;
22  import static org.junit.jupiter.api.Assertions.assertTrue;
23  
24  import java.util.HashMap;
25  import java.util.Map;
26  
27  import org.junit.jupiter.api.Test;
28  
29  /**
30   * Tests for the {@link ArrayCountingBloomFilter}.
31   */
32  public abstract class AbstractCountingBloomFilterTest<T extends CountingBloomFilter>
33          extends AbstractBloomFilterTest<T> {
34  
35      private static final int[] from1Counts = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0};
36  
37      private static final int[] bigHashCounts = {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38          1, 0};
39  
40      private static final long bigHashValue = 0xffffffeL;
41  
42      /**
43       * Assert the counts match the expected values. Values are for indices starting
44       * at 0. Assert the cardinality equals the number of non-zero counts.
45       *
46       * @param bf the bloom filter
47       * @param expected the expected counts
48       */
49      private static void assertCounts(final CountingBloomFilter bf, final int[] expected) {
50          final Map<Integer, Integer> m = new HashMap<>();
51          bf.forEachCell((i, c) -> {
52              m.put(i, c);
53              return true;
54          });
55          int zeros = 0;
56          for (int i = 0; i < expected.length; i++) {
57              if (m.get(i) == null) {
58                  assertEquals(expected[i], 0, "Wrong value for " + i);
59                  zeros++;
60              } else {
61                  assertEquals(expected[i], m.get(i).intValue(), "Wrong value for " + i);
62              }
63          }
64          assertEquals(expected.length - zeros, bf.cardinality());
65      }
66  
67      private void assertCell3(final CountingBloomFilter bf, final int value) {
68          bf.forEachCell((k, v) -> {
69              if (k == 3) {
70                  assertEquals(value, v, "Mismatch at position 3");
71              } else {
72                  assertEquals(0, v, "Mismatch at position " + k);
73              }
74              return true;
75          });
76      }
77  
78      protected final CellProducer getMaximumValueProducer(final int maxValue) {
79          return consumer -> {
80              for (int i = 1; i < 18; i++) {
81                  if (!consumer.test(i, maxValue)) {
82                      return false;
83                  }
84              }
85              return true;
86          };
87      }
88  
89      @Test
90      public void mergeIncrementsAllCellsTest() {
91          final CountingBloomFilter f1 = createEmptyFilter(Shape.fromKM(1, 10));
92          final CountingBloomFilter f2 = f1.copy();
93          final CountingBloomFilter f3 = f1.copy();
94          // index producer produces 3 two times.
95          final IndexProducer ip = p -> {
96              p.test(3);
97              p.test(3);
98              return true;
99          };
100         // The merge should increment cell 3 by 1
101         f1.merge(ip);
102         assertCell3(f1, 1);
103 
104         // The add should increment cells 3 by 2
105         f2.add(CellProducer.from(ip));
106         assertCell3(f2, 2);
107     }
108 
109     @Test
110     public void removeDecrementsAllCellsTest() {
111         final CountingBloomFilter f1 = createEmptyFilter(Shape.fromKM(1, 10));
112         final CellProducer cp = p -> {
113             p.test(3, 3);
114             return true;
115         };
116         f1.add(cp);
117         final CountingBloomFilter f2 = f1.copy();
118         final CountingBloomFilter f3 = f1.copy();
119         // index producer produces 3 two times.
120         final IndexProducer ip = p -> {
121             p.test(3);
122             p.test(3);
123             return true;
124         };
125         // The merge should decrement cell 3 by 1
126         f1.remove(ip);
127         assertCell3(f1, 2);
128 
129         // The add should decrement cells 3 by 2
130         f2.subtract(CellProducer.from(ip));
131         assertCell3(f2, 1);
132 
133         // This merge will decrement by 1 as the round-trip makes the indices unique
134         f3.remove(IndexProducer.fromIndexArray(ip.asIndexArray()));
135         assertCell3(f3, 2);
136     }
137 
138     /**
139      * Tests that merge correctly updates the counts when a CountingBloomFilter is
140      * passed.
141      */
142     @Test
143     public void testAdd() {
144         final CountingBloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
145         assertTrue(bf1.add(createFilter(getTestShape(), TestingHashers.FROM11)), "Add should work");
146         assertTrue(bf1.contains(TestingHashers.FROM1), "Should contain");
147         assertTrue(bf1.contains(TestingHashers.FROM11), "Should contain");
148         assertCounts(bf1, bigHashCounts);
149 
150         // test overflow
151 
152         final CountingBloomFilter bf2 = createEmptyFilter(getTestShape());
153         assertTrue(bf2.add(getMaximumValueProducer(bf2.getMaxCell())), "Should add to empty");
154         assertTrue(bf2.isValid(), "Should be valid");
155 
156         assertFalse(bf2.add(createFilter(getTestShape(), TestingHashers.FROM1)), "Should not add");
157         assertFalse(bf2.isValid(), "Should not be valid");
158     }
159 
160     @Test
161     public final void testCountingBloomFilterSpecificContains() {
162         final BloomFilter bf = new SimpleBloomFilter(getTestShape());
163         bf.merge(TestingHashers.FROM1);
164         final CountingBloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11( createEmptyFilter(getTestShape()));
165 
166         assertTrue(bf.contains(bf), "BF Should contain itself");
167         assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
168         assertFalse(bf.contains(bf2), "BF should not contain BF2");
169         assertTrue(bf2.contains(bf), "BF2 should contain BF");
170         final BitMapProducer producer = bf2;
171         assertTrue(bf2.contains(producer), "BF2 should contain BF bitMapProducer");
172     }
173 
174     /**
175      * Tests that counts are correct when a hasher with duplicates is used in the
176      * constructor.
177      */
178     @Test
179     public final void testCountingSpecificConstructor() {
180         // verify hasher duplicates are counted.
181         // bit hasher has duplicates for 11, 12,13,14,15,16, and 17
182         final CountingBloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
183         bf.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
184 
185         final long[] lb = bf.asBitMapArray();
186         assertEquals(2, lb.length);
187         assertEquals(bigHashValue, lb[0]);
188 
189         assertCounts(bf, bigHashCounts);
190     }
191 
192     /**
193      * Tests that merging bloom filters works as expected with a generic BloomFilter.
194      */
195     @Test
196     public final void testCountingSpecificMerge() {
197         final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
198 
199         final BloomFilter bf2 = new SimpleBloomFilter(getTestShape());
200         bf2.merge(TestingHashers.FROM11);
201 
202         final BloomFilter bf3 = bf1.copy();
203         bf3.merge(bf2);
204         assertTrue(bf3.contains(bf1), "Should contain");
205         assertTrue(bf3.contains(bf2), "Should contain");
206 
207         final BloomFilter bf4 = bf2.copy();
208         bf4.merge(bf1);
209         assertTrue(bf4.contains(bf1), "Should contain");
210         assertTrue(bf4.contains(bf2), "Should contain");
211         assertTrue(bf4.contains(bf3), "Should contain");
212         assertTrue(bf3.contains(bf4), "Should contain");
213 
214         // test overflow
215 
216         final CountingBloomFilter bf5 = createEmptyFilter(getTestShape());
217         assertTrue(bf5.add(getMaximumValueProducer(bf5.getMaxCell())), "Should add to empty");
218         assertTrue(bf5.isValid(), "Should be valid");
219 
220         final CountingBloomFilter bf6 = bf5.copy();
221         final BloomFilter bf7 = new SimpleBloomFilter(getTestShape());
222         bf7.merge(TestingHashers.FROM1);
223         bf6.merge(bf7);
224         assertFalse(bf6.isValid(), "Should not be valid");
225     }
226 
227     @Test
228     public void testExcludesDuplicates() {
229 
230         // create a hasher that produces duplicates with the specified shape.
231         // this setup produces 5, 17, 29, 41, 53, 65 two times
232         final Shape shape = Shape.fromKM(12, 72);
233         final Hasher hasher = new IncrementingHasher(5, 12);
234 
235         CountingBloomFilter bf1 = createFilter(shape, hasher);
236         assertEquals(6, bf1.cardinality());
237         bf1.forEachCell((x, y) -> {
238             assertEquals(1, y, "Hasher in constructor results in value not equal to 1");
239             return true;
240         });
241 
242         bf1 = createEmptyFilter(shape);
243         bf1.merge(hasher);
244         assertEquals(6, bf1.cardinality());
245         bf1.forEachCell((x, y) -> {
246             assertEquals(1, y, "Hasher in merge results in value not equal to 1");
247             return true;
248         });
249 
250         bf1 = createEmptyFilter(shape);
251         bf1.merge(hasher);
252         bf1.remove(hasher);
253         assertEquals(0, bf1.cardinality());
254         assertTrue(bf1.forEachCell((x, y) -> false), "Hasher in removes results in value not equal to 0");
255     }
256 
257     @Test
258     public void testGetMaxInsert() {
259         final CountingBloomFilter bf = createEmptyFilter(getTestShape());
260         verifyMaxInsert(bf, 0, 0);
261         bf.merge(TestingHashers.FROM1);
262         verifyMaxInsert(bf, 1, 0);
263         bf.merge(TestingHashers.FROM1);
264         verifyMaxInsert(bf, 2, 0);
265         bf.merge(TestingHashers.FROM11);
266         verifyMaxInsert(bf, 2, 1);
267         bf.remove(TestingHashers.FROM1);
268         verifyMaxInsert(bf, 1, 1);
269         // verify remove false positive works
270         // Incrementing hasher 5,1 spans the single count cells for both FROM1 and FROM11
271         assertEquals(1, bf.getMaxInsert(new IncrementingHasher(5, 1)));
272         bf.remove(new IncrementingHasher(5, 1));
273         verifyMaxInsert(bf, 0, 0);
274         assertEquals(0, bf.getMaxInsert(new IncrementingHasher(5, 1)));
275     }
276 
277     /**
278      * Tests that merge correctly updates the counts when a CountingBloomFilter is
279      * passed.
280      */
281     @Test
282     public final void testRemove() {
283         final BloomFilter simple = new SimpleBloomFilter(getTestShape());
284         simple.merge(TestingHashers.FROM11);
285 
286         final CountingBloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
287         bf1.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
288 
289         assertTrue(bf1.remove(simple), "Remove should work");
290         assertFalse(bf1.contains(TestingHashers.FROM11), "Should not contain");
291         assertTrue(bf1.contains(TestingHashers.FROM1), "Should contain");
292 
293         assertCounts(bf1, from1Counts);
294 
295         // with hasher
296         final CountingBloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM1);
297         bf2.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
298 
299         assertTrue(bf2.remove(TestingHashers.FROM11), "Remove should work");
300         assertFalse(bf2.contains(TestingHashers.FROM11), "Should not contain");
301         assertTrue(bf2.contains(TestingHashers.FROM1), "Should contain");
302 
303         assertCounts(bf2, from1Counts);
304 
305         // test underflow
306         final CountingBloomFilter bf3 = createFilter(getTestShape(), TestingHashers.FROM1);
307         assertFalse(bf3.remove(simple), "Subtract should not work");
308         assertFalse(bf3.isValid(), "isValid should return false");
309         assertFalse(bf3.contains(TestingHashers.FROM1), "Should not contain");
310         assertFalse(bf3.contains(simple), "Should not contain");
311 
312         assertCounts(bf3, new int[] {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
313 
314         // with IndexProducer
315         final IndexProducer ip = TestingHashers.FROM11.indices(getTestShape());
316 
317         final CountingBloomFilter bf4 = createFilter(getTestShape(), TestingHashers.FROM1);
318         bf4.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
319 
320         assertTrue(bf4.remove(ip), "Remove should work");
321         assertFalse(bf4.contains(TestingHashers.FROM11), "Should not contain");
322         assertTrue(bf4.contains(TestingHashers.FROM1), "Should contain");
323 
324         assertCounts(bf4, from1Counts);
325 
326         // with BitMapProducer
327         final BitMapProducer bmp = BitMapProducer.fromIndexProducer(ip, getTestShape().getNumberOfBits());
328         final CountingBloomFilter bf5 = createFilter(getTestShape(), TestingHashers.FROM1);
329         bf5.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
330 
331         assertTrue(bf5.remove(bmp), "Remove should work");
332         assertFalse(bf5.contains(TestingHashers.FROM11), "Should not contain");
333         assertTrue(bf5.contains(TestingHashers.FROM1), "Should contain");
334 
335         assertCounts(bf5, from1Counts);
336 
337         // test producer errors
338         final IndexProducer ip2 = IndexProducer.fromIndexArray(1, 2, getTestShape().getNumberOfBits());
339         final CountingBloomFilter bf6 = createFilter(getTestShape(), TestingHashers.FROM1);
340         assertThrows(IllegalArgumentException.class, () -> bf6.remove(ip2));
341 
342         final CountingBloomFilter bf7 = createFilter(getTestShape(), TestingHashers.FROM1);
343         final BitMapProducer bmp2 = BitMapProducer.fromIndexProducer(ip2, getTestShape().getNumberOfBits());
344         assertThrows(IllegalArgumentException.class, () -> bf7.remove(bmp2));
345         assertThrows(IllegalArgumentException.class, () -> bf7.remove( new BadHasher(-1)));
346         assertThrows(IllegalArgumentException.class, () -> bf7.remove( new BadHasher(getTestShape().getNumberOfBits())));
347     }
348 
349     /**
350      * Tests that merge correctly updates the counts when a CountingBloomFilter is
351      * passed.
352      */
353     @Test
354     public final void testSubtract() {
355         final CountingBloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
356         bf1.add(CellProducer.from(TestingHashers.FROM11.indices(getTestShape())));
357 
358         final CountingBloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
359 
360         assertTrue(bf1.subtract(bf2), "Subtract should work");
361         assertFalse(bf1.contains( TestingHashers.populateFromHashersFrom1AndFrom11(new SimpleBloomFilter(getTestShape()))), "Should not contain bitHasher");
362         assertTrue(bf1.contains(TestingHashers.FROM1), "Should contain TestingHashers.from1");
363 
364         assertCounts(bf1, from1Counts);
365 
366         // test underflow
367         final CountingBloomFilter bf3 = createFilter(getTestShape(), TestingHashers.FROM1);
368 
369         final CountingBloomFilter bf4 = createFilter(getTestShape(), TestingHashers.FROM11);
370 
371         assertFalse(bf3.subtract(bf4), "Subtract should not work");
372         assertFalse(bf3.isValid(), "isValid should return false");
373         assertFalse(bf3.contains(TestingHashers.FROM1), "Should not contain");
374         assertFalse(bf3.contains(bf4), "Should not contain");
375 
376         assertCounts(bf3, new int[] {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0});
377 
378         assertThrows(IllegalArgumentException.class, () -> bf3.remove( new BadHasher(-1)));
379         assertThrows(IllegalArgumentException.class, () -> bf3.remove( new BadHasher(getTestShape().getNumberOfBits())));
380     }
381 
382     private void verifyMaxInsert(final CountingBloomFilter bf, final int from1, final int from11) {
383         final BloomFilter bfFrom0 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
384         bfFrom0.merge(new IncrementingHasher(0, 1));
385         final BloomFilter bfFrom1 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
386         bfFrom1.merge(TestingHashers.FROM1);
387         final BloomFilter bfFrom11 = new DefaultBloomFilterTest.SparseDefaultBloomFilter(getTestShape());
388         bfFrom11.merge(TestingHashers.FROM11);
389 
390         assertEquals(0, bf.getMaxInsert(new IncrementingHasher(0, 1)));
391         assertEquals(0, bf.getMaxInsert(bfFrom0));
392         assertEquals(0, bf.getMaxInsert((BitMapProducer) bfFrom0));
393         assertEquals(0, bf.getMaxInsert((IndexProducer) bfFrom0));
394 
395         assertEquals(from1, bf.getMaxInsert(TestingHashers.FROM1));
396         assertEquals(from1, bf.getMaxInsert(bfFrom1));
397         assertEquals(from1, bf.getMaxInsert((BitMapProducer) bfFrom1));
398         assertEquals(from1, bf.getMaxInsert((IndexProducer) bfFrom1));
399 
400         assertEquals(from11, bf.getMaxInsert(TestingHashers.FROM11));
401         assertEquals(from11, bf.getMaxInsert(bfFrom11));
402         assertEquals(from11, bf.getMaxInsert((BitMapProducer) bfFrom11));
403         assertEquals(from11, bf.getMaxInsert((IndexProducer) bfFrom11));
404     }
405 }