1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.collections4.bloomfilter;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20
21 import java.util.function.ToDoubleBiFunction;
22 import java.util.function.ToIntBiFunction;
23
24 import org.junit.jupiter.api.Test;
25
26
27
28
29 public class SetOperationsTest {
30
31 private static void assertSymmetricOperation(final double expected, final ToDoubleBiFunction<BloomFilter, BloomFilter> operation,
32 final BloomFilter filter1, final BloomFilter filter2) {
33 assertEquals(expected, operation.applyAsDouble(filter1, filter2), "op(filter1, filter2)");
34 assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
35 }
36
37 private static void assertSymmetricOperation(final int expected, final ToIntBiFunction<BloomFilter, BloomFilter> operation,
38 final BloomFilter filter1, final BloomFilter filter2) {
39 assertEquals(expected, operation.applyAsInt(filter1, filter2), "op(filter1, filter2)");
40 assertEquals(expected, operation.applyAsInt(filter2, filter1), "op(filter2, filter1)");
41 }
42
43 private final Shape shape = Shape.fromKM(17, 72);
44
45 private BloomFilter createFilter(final Shape shape, final Hasher hasher) {
46 final BloomFilter bf = new SimpleBloomFilter(shape);
47 bf.merge(hasher);
48 return bf;
49 }
50
51 private BloomFilter createFilter(final Shape shape, final IndexProducer producer) {
52 final BloomFilter bf = new SparseBloomFilter(shape);
53 bf.merge(producer);
54 return bf;
55 }
56
57 @Test
58 public final void testAndCardinality() {
59 final Shape shape = Shape.fromKM(3, 128);
60 BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
61 BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
62 assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
63
64 filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
65 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
66 assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
67
68 filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
69 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
70 assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
71 }
72
73 @Test
74 public final void testAndCardinalityWithDifferentLengthFilters() {
75 final Shape shape = Shape.fromKM(3, 128);
76 final Shape shape2 = Shape.fromKM(3, 192);
77 BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
78 BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
79 assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
80
81 filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
82 filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
83 assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
84
85 filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
86 filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
87 assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
88 }
89
90 @Test
91 public final void testCommutativityOnMismatchedSizes() {
92 final BitMapProducer p1 = BitMapProducer.fromBitMapArray(0x3L, 0x5L);
93 final BitMapProducer p2 = BitMapProducer.fromBitMapArray(0x1L);
94
95 assertEquals(SetOperations.orCardinality(p1, p2), SetOperations.orCardinality(p2, p1));
96 assertEquals(SetOperations.xorCardinality(p1, p2), SetOperations.xorCardinality(p2, p1));
97 assertEquals(SetOperations.andCardinality(p1, p2), SetOperations.andCardinality(p2, p1));
98 assertEquals(SetOperations.hammingDistance(p1, p2), SetOperations.hammingDistance(p2, p1));
99 assertEquals(SetOperations.cosineDistance(p1, p2), SetOperations.cosineDistance(p2, p1));
100 assertEquals(SetOperations.cosineSimilarity(p1, p2), SetOperations.cosineSimilarity(p2, p1));
101 assertEquals(SetOperations.jaccardDistance(p1, p2), SetOperations.jaccardDistance(p2, p1));
102 assertEquals(SetOperations.jaccardSimilarity(p1, p2), SetOperations.jaccardSimilarity(p2, p1));
103 }
104
105
106
107
108 @Test
109 public final void testCosineDistance() {
110
111 BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
112 BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
113
114
115 double expected = 0;
116 assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
117
118 final Shape shape2 = Shape.fromKM(2, 72);
119 filter1 = createFilter(shape2, TestingHashers.FROM1);
120 filter2 = createFilter(shape2, new IncrementingHasher(2, 1));
121
122 int dotProduct = 1;
123 int cardinalityA = 2;
124 int cardinalityB = 2;
125 expected = 1 - dotProduct / Math.sqrt(cardinalityA * cardinalityB);
126 assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
127
128 filter1 = createFilter(shape, TestingHashers.FROM1);
129 filter2 = createFilter(shape, TestingHashers.FROM11);
130 dotProduct = 7;
131 cardinalityA = 17;
132 cardinalityB = 17;
133 expected = 1 - dotProduct / Math.sqrt(cardinalityA * cardinalityB);
134 assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
135
136
137 filter1 = createFilter(shape, TestingHashers.FROM1);
138 filter2 = new SimpleBloomFilter(shape);
139
140 dotProduct = 0;
141 cardinalityA = 2;
142 cardinalityB = 0;
143 expected = 1.0;
144 assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
145
146 dotProduct = 0;
147 cardinalityA = 0;
148 cardinalityB = 0;
149 expected = 1.0;
150 assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
151 }
152
153
154
155
156 @Test
157 public final void testCosineSimilarity() {
158 BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
159 BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
160
161 int dotProduct = 17;
162 int cardinalityA = 17;
163 int cardinalityB = 17;
164 double expected = 1.0;
165 assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
166
167 dotProduct = 7;
168 cardinalityA = 17;
169 cardinalityB = 17;
170 expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
171 filter2 = createFilter(shape, TestingHashers.FROM11);
172 assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
173
174
175 filter1 = new SimpleBloomFilter(shape);
176 filter2 = new SimpleBloomFilter(shape);
177
178 final BloomFilter filter3 = createFilter(shape, TestingHashers.FROM1);
179 assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
180 assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
181 }
182
183
184
185
186 @Test
187 public final void testHammingDistance() {
188 final BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
189 BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
190
191 int hammingDistance = 0;
192 assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
193
194 filter2 = createFilter(shape, TestingHashers.FROM11);
195 hammingDistance = 20;
196 assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
197 }
198
199
200
201
202 @Test
203 public final void testJaccardDistance() {
204 BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
205 BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
206
207
208 assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
209
210 filter2 = createFilter(shape, TestingHashers.FROM11);
211 final double intersection = 7.0;
212 final int union = 27;
213 final double expected = 1 - intersection / union;
214 assertSymmetricOperation(expected, SetOperations::jaccardDistance, filter1, filter2);
215
216
217 filter1 = new SimpleBloomFilter(shape);
218 filter2 = new SimpleBloomFilter(shape);
219 final BloomFilter filter3 = createFilter(shape, TestingHashers.FROM1);
220
221
222 assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
223 assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter3);
224 }
225
226
227
228
229 @Test
230 public final void testJaccardSimilarity() {
231 BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
232 BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
233
234 double intersection = 17.0;
235 int union = 17;
236 double expected = intersection / union;
237 assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
238
239 filter2 = createFilter(shape, TestingHashers.FROM11);
240 intersection = 7.0;
241 union = 27;
242 expected = intersection / union;
243 assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
244
245
246 filter1 = new SimpleBloomFilter(shape);
247 filter2 = new SimpleBloomFilter(shape);
248 assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
249
250 intersection = 0.0;
251 union = 17;
252 expected = intersection / union;
253 assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
254 }
255
256 @Test
257 public final void testOrCardinality() {
258 final Shape shape = Shape.fromKM(3, 128);
259 BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
260 BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
261 assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
262
263 filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
264 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
265 assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
266
267 filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
268 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
269 assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
270 }
271
272 @Test
273 public final void testOrCardinalityWithDifferentLengthFilters() {
274 final Shape shape = Shape.fromKM(3, 128);
275 final Shape shape2 = Shape.fromKM(3, 192);
276 BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
277 BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
278 assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
279
280 filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
281 filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
282 assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
283
284 filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
285 filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
286 assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
287 }
288
289 @Test
290 public final void testXorCardinality() {
291 final Shape shape = Shape.fromKM(3, 128);
292 BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
293 BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
294 assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
295
296 filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
297 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
298 assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
299
300 filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
301 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
302 assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
303
304 final Shape bigShape = Shape.fromKM(3, 192);
305 filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(1, 63, 185));
306 filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 63, 69));
307 assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
308 }
309
310 @Test
311 public final void testXorCardinalityWithDifferentLengthFilters() {
312 final Shape shape = Shape.fromKM(3, 128);
313 final Shape shape2 = Shape.fromKM(3, 192);
314
315 BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
316 BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
317 assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
318
319 filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
320 filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
321 assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
322
323 filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
324 filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
325 assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
326 }
327 }