1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.collections4.bloomfilter;
18
19 import static org.junit.jupiter.api.Assertions.assertArrayEquals;
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertFalse;
22 import static org.junit.jupiter.api.Assertions.assertNotEquals;
23 import static org.junit.jupiter.api.Assertions.assertThrows;
24 import static org.junit.jupiter.api.Assertions.assertTrue;
25
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.BitSet;
29 import java.util.List;
30
31 import org.junit.jupiter.api.Test;
32
33
34
35
36 public abstract class AbstractBloomFilterTest<T extends BloomFilter> {
37
38
39
40
41 public static class BadHasher implements Hasher {
42
43 IndexProducer producer;
44
45 public BadHasher(final int value) {
46 this.producer = IndexProducer.fromIndexArray(value);
47 }
48
49 @Override
50 public IndexProducer indices(final Shape shape) {
51 return producer;
52 }
53 }
54
55 private void assertFailedIndexProducerConstructor(final Shape shape, final int[] values) {
56 final IndexProducer indices = IndexProducer.fromIndexArray(values);
57 assertThrows(IllegalArgumentException.class, () -> createFilter(shape, indices));
58 }
59
60 private void assertIndexProducerMerge(final Shape shape, final int[] values, final int[] expected) {
61 final IndexProducer indices = IndexProducer.fromIndexArray(values);
62 final BloomFilter filter = createFilter(shape, indices);
63 final List<Integer> lst = new ArrayList<>();
64 filter.forEachIndex(x -> {
65 lst.add(x);
66 return true;
67 });
68 assertEquals(expected.length, lst.size());
69 for (final int value : expected) {
70 assertTrue(lst.contains(Integer.valueOf(value)), "Missing " + value);
71 }
72 }
73
74
75
76
77
78
79
80 protected abstract T createEmptyFilter(Shape shape);
81
82
83
84
85
86
87
88
89 protected final T createFilter(final Shape shape, final BitMapProducer producer) {
90 final T bf = createEmptyFilter(shape);
91 bf.merge(producer);
92 return bf;
93 }
94
95
96
97
98
99
100
101
102 protected final T createFilter(final Shape shape, final Hasher hasher) {
103 final T bf = createEmptyFilter(shape);
104 bf.merge(hasher);
105 return bf;
106 }
107
108
109
110
111
112
113
114
115 protected final T createFilter(final Shape shape, final IndexProducer producer) {
116 final T bf = createEmptyFilter(shape);
117 bf.merge(producer);
118 return bf;
119 }
120
121
122
123
124
125
126
127
128
129 protected Shape getTestShape() {
130 return Shape.fromKM(17, 72);
131 }
132
133
134
135
136 @Test
137 public final void testAsBitMapArray() {
138
139
140 final IncrementingHasher hasher = new IncrementingHasher(63, 1);
141 final BloomFilter bf = createFilter(Shape.fromKM(2, 72), hasher);
142 final long[] lb = bf.asBitMapArray();
143 assertEquals(2, lb.length);
144 assertEquals(0x8000000000000000L, lb[0]);
145 assertEquals(0x1, lb[1]);
146 }
147
148 @Test
149 public void testBitMapProducerSize() {
150 final int[] idx = new int[1];
151 createFilter(getTestShape(), TestingHashers.FROM1).forEachBitMap(i -> {
152 idx[0]++;
153 return true;
154 });
155 assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
156
157 idx[0] = 0;
158 createEmptyFilter(getTestShape()).forEachBitMap(i -> {
159 idx[0]++;
160 return true;
161 });
162 assertEquals(BitMap.numberOfBitMaps(getTestShape().getNumberOfBits()), idx[0]);
163 }
164
165 @Test
166 public void testCardinalityAndIsEmpty() {
167 testCardinalityAndIsEmpty(createEmptyFilter(getTestShape()));
168 }
169
170
171
172
173
174
175
176 protected void testCardinalityAndIsEmpty(BloomFilter bf) {
177 assertTrue(bf.isEmpty());
178 assertEquals(0, bf.cardinality());
179 for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
180 bf.merge(IndexProducer.fromIndexArray(i));
181 assertFalse(bf.isEmpty(), "Wrong value at " + i);
182 assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
183 }
184
185
186 bf.clear();
187 assertEquals(0, bf.cardinality());
188 assertTrue(bf.isEmpty());
189 for (int i = 0; i < getTestShape().getNumberOfBits(); i++) {
190 bf.merge(IndexProducer.fromIndexArray(i));
191 assertEquals(i + 1, bf.cardinality(), "Wrong value at " + i);
192 assertFalse(bf.isEmpty(), "Wrong value at " + i);
193 }
194 }
195 @Test
196 public void testClear() {
197 final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
198 assertNotEquals(0, bf1.cardinality());
199 bf1.clear();
200 assertEquals(0, bf1.cardinality());
201 }
202
203 @Test
204 public final void testContains() {
205 BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
206 final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
207
208 assertTrue(bf1.contains(bf1), "BF1 Should contain itself");
209 assertTrue(bf2.contains(bf2), "BF2 Should contain itself");
210 assertFalse(bf1.contains(bf2), "BF1 should not contain BF2");
211 assertTrue(bf2.contains(bf1), "BF2 should contain BF1");
212
213 assertTrue(bf2.contains(new IncrementingHasher(1, 1)), "BF2 Should contain this hasher");
214 assertFalse(bf2.contains(new IncrementingHasher(1, 3)), "BF2 Should not contain this hasher");
215
216 IndexProducer indexProducer = new IncrementingHasher(1, 1).indices(getTestShape());
217 assertTrue(bf2.contains(indexProducer), "BF2 Should contain this hasher");
218 indexProducer = new IncrementingHasher(1, 3).indices(getTestShape());
219 assertFalse(bf2.contains(indexProducer), "BF2 Should not contain this hasher");
220
221 BitMapProducer bitMapProducer = BitMapProducer.fromIndexProducer(new IncrementingHasher(1, 1).indices(getTestShape()),
222 getTestShape().getNumberOfBits());
223 assertTrue(bf2.contains(bitMapProducer), "BF2 Should contain this hasher");
224 bitMapProducer = BitMapProducer.fromIndexProducer(new IncrementingHasher(1, 3).indices(getTestShape()),
225 getTestShape().getNumberOfBits());
226 assertFalse(bf2.contains(bitMapProducer), "BF2 Should not contain this hasher");
227
228
229 bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
230 final BloomFilter bf3 = createFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1),
231 TestingHashers.FROM1);
232 assertTrue(bf1.contains(bf3));
233 assertTrue(bf3.contains(bf1));
234
235 final BloomFilter bf4 = TestingHashers.populateRange(createEmptyFilter(Shape.fromKM(getTestShape().getNumberOfHashFunctions(), Long.SIZE - 1)),
236 1, 11+getTestShape().getNumberOfHashFunctions());
237
238 assertFalse(bf1.contains(bf4));
239 assertTrue(bf4.contains(bf1));
240 }
241
242 @Test
243 public void testEmptyAfterMergeWithNothing() {
244
245
246 BloomFilter bf = createEmptyFilter(getTestShape());
247 bf.merge(IndexProducer.fromIndexArray());
248 assertTrue(bf.isEmpty());
249 }
250
251
252
253
254 @Test
255 public final void testEstimateIntersection() {
256
257 final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
258 final BloomFilter bf2 = TestingHashers.populateFromHashersFrom1AndFrom11(createEmptyFilter(getTestShape()));
259
260 final BloomFilter bf3 = TestingHashers.populateEntireFilter(createEmptyFilter(getTestShape()));
261
262 assertEquals(1, bf.estimateIntersection(bf2));
263 assertEquals(1, bf2.estimateIntersection(bf));
264 assertEquals(1, bf.estimateIntersection(bf3));
265 assertEquals(1, bf2.estimateIntersection(bf));
266 assertEquals(2, bf3.estimateIntersection(bf2));
267
268 final BloomFilter bf4 = createEmptyFilter(getTestShape());
269
270 assertEquals(0, bf.estimateIntersection(bf4));
271 assertEquals(0, bf4.estimateIntersection(bf));
272
273 final int midPoint = getTestShape().getNumberOfBits() / 2;
274 final BloomFilter bf5 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), 0, midPoint);
275 final BloomFilter bf6 = TestingHashers.populateRange(createEmptyFilter(getTestShape()), midPoint+1, getTestShape().getNumberOfBits()-1);
276 assertThrows(IllegalArgumentException.class, () -> bf5.estimateIntersection(bf6));
277
278
279 assertEquals(Integer.MAX_VALUE, bf3.estimateIntersection(bf3));
280 }
281
282
283
284
285 @Test
286 public final void testEstimateN() {
287
288 BloomFilter filter1 = createFilter(getTestShape(), TestingHashers.FROM1);
289 assertEquals(1, filter1.estimateN());
290
291
292
293 filter1.merge(new IncrementingHasher(4, 1));
294 assertEquals(1, filter1.estimateN());
295
296 filter1.merge(new IncrementingHasher(17, 1));
297
298 assertEquals(3, filter1.estimateN());
299
300 filter1 = TestingHashers.populateEntireFilter(createEmptyFilter(getTestShape()));
301 assertEquals(Integer.MAX_VALUE, filter1.estimateN());
302 }
303
304
305
306
307 @Test
308 public final void testEstimateUnion() {
309 final BloomFilter bf = createFilter(getTestShape(), TestingHashers.FROM1);
310 final BloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
311
312 assertEquals(2, bf.estimateUnion(bf2));
313 assertEquals(2, bf2.estimateUnion(bf));
314
315 final BloomFilter bf3 = createEmptyFilter(getTestShape());
316
317 assertEquals(1, bf.estimateUnion(bf3));
318 assertEquals(1, bf3.estimateUnion(bf));
319 }
320
321 @Test
322 public void testIndexProducerMerge() {
323 final Shape shape = Shape.fromKM(5, 10);
324
325 assertIndexProducerMerge(shape, new int[] {0, 2, 4, 6, 8}, new int[] {0, 2, 4, 6, 8});
326
327 assertIndexProducerMerge(shape, new int[] {0, 2, 4, 2, 8}, new int[] {0, 2, 4, 8});
328
329 assertFailedIndexProducerConstructor(shape, new int[] {0, 2, 4, -2, 8});
330
331 assertFailedIndexProducerConstructor(shape, new int[] {0, 2, 4, 12, 8});
332
333 assertIndexProducerMerge(shape, new int[0], new int[0]);
334 }
335
336
337
338
339 @Test
340 public final void testIsFull() {
341
342
343 BloomFilter filter = createEmptyFilter(getTestShape());
344 assertFalse(filter.isFull(), "Should not be full");
345
346 filter = TestingHashers.populateEntireFilter(filter);
347 assertTrue(filter.isFull(), "Should be full");
348
349 filter = createFilter(getTestShape(), new IncrementingHasher(1, 3));
350 assertFalse(filter.isFull(), "Should not be full");
351 }
352
353
354
355
356 @Test
357 public final void testMerge() {
358
359 final BloomFilter bf1 = createFilter(getTestShape(), TestingHashers.FROM1);
360 final BloomFilter bf2 = createFilter(getTestShape(), TestingHashers.FROM11);
361 final BloomFilter bf3 = bf1.copy();
362 bf3.merge(bf2);
363
364
365
366 final long[] bf1Val = bf1.asBitMapArray();
367 final long[] bf2Val = bf2.asBitMapArray();
368 for (int i = 0; i < bf1Val.length; i++) {
369 bf1Val[i] |= bf2Val[i];
370 }
371 bf1.merge(bf2);
372
373 final long[] bf1New = bf1.asBitMapArray();
374 for (int i = 0; i < bf1Val.length; i++) {
375 assertEquals(bf1Val[i], bf1New[i], "Bad value at " + i);
376 }
377
378 assertTrue(bf1.contains(bf2), "Should contain bf2");
379 assertTrue(bf1.contains(bf3), "Should contain bf3");
380
381
382
383 final BloomFilter bf4 = createFilter(getTestShape(), TestingHashers.FROM1);
384 bf4.merge(TestingHashers.FROM11);
385
386 assertTrue(bf4.contains(bf2), "Should contain Bf2");
387 assertTrue(bf4.contains(bf3), "Should contain Bf3");
388
389
390 assertThrows(IllegalArgumentException.class,
391 () -> bf1.merge(new BadHasher(bf1.getShape().getNumberOfBits())));
392 assertThrows(IllegalArgumentException.class, () -> bf1.merge(new BadHasher(-1)));
393
394
395 final Shape s = Shape.fromKM(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits() * 3);
396 final Hasher h = new IncrementingHasher(getTestShape().getNumberOfBits() * 2, 1);
397 final BloomFilter bf5 = new SimpleBloomFilter(s);
398 bf5.merge(h);
399 assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf5));
400
401 final BloomFilter bf6 = new SparseBloomFilter(s);
402 bf6.merge(h);
403 assertThrows(IllegalArgumentException.class, () -> bf1.merge(bf6));
404 }
405
406 @Test
407 public void testMergeWithBadHasher() {
408
409 final BloomFilter f = createEmptyFilter(getTestShape());
410 assertThrows(IllegalArgumentException.class,
411 () -> f.merge(new BadHasher(getTestShape().getNumberOfBits())));
412
413 final BloomFilter f2 = createEmptyFilter(getTestShape());
414 assertThrows(IllegalArgumentException.class, () -> f2.merge(new BadHasher(-1)));
415 }
416
417 @Test
418 public void testMergeWithBitMapProducer() {
419 final int bitMapCount = BitMap.numberOfBitMaps(getTestShape().getNumberOfBits());
420 for (int i = 0; i < 5; i++) {
421 final long[] values = new long[bitMapCount];
422 for (final int idx : DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits())) {
423 BitMap.set(values, idx);
424 }
425 final BloomFilter f = createFilter(getTestShape(), BitMapProducer.fromBitMapArray(values));
426 final List<Long> lst = new ArrayList<>();
427 for (final long l : values) {
428 lst.add(l);
429 }
430 assertTrue(f.forEachBitMap(l -> lst.remove(Long.valueOf(l))));
431 assertTrue(lst.isEmpty());
432 }
433
434 final long[] values = new long[bitMapCount];
435 Arrays.fill(values, Long.MAX_VALUE);
436 final BitMapProducer badProducer = BitMapProducer.fromBitMapArray(values);
437 final BloomFilter bf = createEmptyFilter(getTestShape());
438 assertThrows(IllegalArgumentException.class, () -> bf.merge(badProducer));
439
440
441 final BitMapProducer badProducer2 = BitMapProducer.fromBitMapArray(0x80_00_00_00_00_00_00_00L);
442 final BloomFilter bf2 = createEmptyFilter(Shape.fromKM(3, 32));
443 assertThrows(IllegalArgumentException.class, () -> bf2.merge(badProducer2));
444 }
445
446 @Test
447 public void testMergeWithHasher() {
448 for (int i = 0; i < 5; i++) {
449 final BloomFilter f = createEmptyFilter(getTestShape());
450 final int[] expected = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
451 final Hasher hasher = new ArrayHasher(expected);
452 f.merge(hasher);
453
454 assertArrayEquals(DefaultIndexProducerTest.unique(expected), f.asIndexArray());
455 }
456 }
457
458 @Test
459 public void testMergeWithIndexProducer() {
460 for (int i = 0; i < 5; i++) {
461 final int[] values = DefaultIndexProducerTest.generateIntArray(getTestShape().getNumberOfHashFunctions(), getTestShape().getNumberOfBits());
462 final BloomFilter f = createFilter(getTestShape(), IndexProducer.fromIndexArray(values));
463 final BitSet uniqueValues = DefaultIndexProducerTest.uniqueSet(values);
464 assertTrue(f.forEachIndex(idx -> {
465 final boolean result = uniqueValues.get(idx);
466 uniqueValues.clear(idx);
467 return result;
468 }));
469 assertTrue(uniqueValues.isEmpty());
470 }
471
472 final BloomFilter f1 = createEmptyFilter(getTestShape());
473 assertThrows(IllegalArgumentException.class,
474 () -> f1.merge(IndexProducer.fromIndexArray(getTestShape().getNumberOfBits())));
475
476 final BloomFilter f2 = createEmptyFilter(getTestShape());
477 assertThrows(IllegalArgumentException.class,
478 () -> f2.merge(IndexProducer.fromIndexArray(-1)));
479 }
480
481 @Test
482 public final void testNegativeIntersection() {
483 final IndexProducer p1 = IndexProducer.fromIndexArray(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 20, 26, 28, 30, 32, 34, 35, 36, 37, 39, 40, 41, 42, 43, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71);
484 final IndexProducer p2 = IndexProducer.fromIndexArray(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27);
485
486 final BloomFilter filter1 = createEmptyFilter(Shape.fromKM(17, 72));
487 filter1.merge(p1);
488 final BloomFilter filter2 = createEmptyFilter(Shape.fromKM(17, 72));
489 filter2.merge(p2);
490 assertEquals(0, filter1.estimateIntersection(filter2));
491 }
492 }