View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  
21  import java.util.function.ToDoubleBiFunction;
22  import java.util.function.ToIntBiFunction;
23  
24  import org.junit.jupiter.api.Test;
25  
26  /**
27   * Test {@link SetOperations}.
28   */
29  public class SetOperationsTest {
30  
31      private static void assertSymmetricOperation(final double expected, final ToDoubleBiFunction<BloomFilter, BloomFilter> operation,
32              final BloomFilter filter1, final BloomFilter filter2) {
33          assertEquals(expected, operation.applyAsDouble(filter1, filter2), "op(filter1, filter2)");
34          assertEquals(expected, operation.applyAsDouble(filter2, filter1), "op(filter2, filter1)");
35      }
36  
37      private static void assertSymmetricOperation(final int expected, final ToIntBiFunction<BloomFilter, BloomFilter> operation,
38              final BloomFilter filter1, final BloomFilter filter2) {
39          assertEquals(expected, operation.applyAsInt(filter1, filter2), "op(filter1, filter2)");
40          assertEquals(expected, operation.applyAsInt(filter2, filter1), "op(filter2, filter1)");
41      }
42  
43      private final Shape shape = Shape.fromKM(17, 72);
44  
45      private BloomFilter createFilter(final Shape shape, final Hasher hasher) {
46          final BloomFilter bf = new SimpleBloomFilter(shape);
47          bf.merge(hasher);
48          return bf;
49      }
50  
51      private BloomFilter createFilter(final Shape shape, final IndexProducer producer) {
52          final BloomFilter bf = new SparseBloomFilter(shape);
53          bf.merge(producer);
54          return bf;
55      }
56  
57      @Test
58      public final void testAndCardinality() {
59          final Shape shape = Shape.fromKM(3, 128);
60          BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
61          BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
62          assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
63  
64          filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
65          filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
66          assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
67  
68          filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
69          filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
70          assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
71      }
72  
73      @Test
74      public final void testAndCardinalityWithDifferentLengthFilters() {
75          final Shape shape = Shape.fromKM(3, 128);
76          final Shape shape2 = Shape.fromKM(3, 192);
77          BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
78          BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
79          assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
80  
81          filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
82          filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
83          assertSymmetricOperation(0, SetOperations::andCardinality, filter1, filter2);
84  
85          filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
86          filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
87          assertSymmetricOperation(1, SetOperations::andCardinality, filter1, filter2);
88      }
89  
90      @Test
91      public final void testCommutativityOnMismatchedSizes() {
92          final BitMapProducer p1 = BitMapProducer.fromBitMapArray(0x3L, 0x5L);
93          final BitMapProducer p2 = BitMapProducer.fromBitMapArray(0x1L);
94  
95          assertEquals(SetOperations.orCardinality(p1, p2), SetOperations.orCardinality(p2, p1));
96          assertEquals(SetOperations.xorCardinality(p1, p2), SetOperations.xorCardinality(p2, p1));
97          assertEquals(SetOperations.andCardinality(p1, p2), SetOperations.andCardinality(p2, p1));
98          assertEquals(SetOperations.hammingDistance(p1, p2), SetOperations.hammingDistance(p2, p1));
99          assertEquals(SetOperations.cosineDistance(p1, p2), SetOperations.cosineDistance(p2, p1));
100         assertEquals(SetOperations.cosineSimilarity(p1, p2), SetOperations.cosineSimilarity(p2, p1));
101         assertEquals(SetOperations.jaccardDistance(p1, p2), SetOperations.jaccardDistance(p2, p1));
102         assertEquals(SetOperations.jaccardSimilarity(p1, p2), SetOperations.jaccardSimilarity(p2, p1));
103     }
104 
105     /**
106      * Tests that the Cosine similarity is correctly calculated.
107      */
108     @Test
109     public final void testCosineDistance() {
110 
111         BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
112         BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
113 
114         // identical filters should have no distance.
115         double expected = 0;
116         assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
117 
118         final Shape shape2 = Shape.fromKM(2, 72);
119         filter1 = createFilter(shape2, TestingHashers.FROM1);
120         filter2 = createFilter(shape2, new IncrementingHasher(2, 1));
121 
122         int dotProduct = /* [1,2] & [2,3] = [2] = */ 1;
123         int cardinalityA = 2;
124         int cardinalityB = 2;
125         expected = 1 - dotProduct / Math.sqrt(cardinalityA * cardinalityB);
126         assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
127 
128         filter1 = createFilter(shape, TestingHashers.FROM1);
129         filter2 = createFilter(shape, TestingHashers.FROM11);
130         dotProduct = /* [1..17] & [11..27] = [] = */ 7;
131         cardinalityA = 17;
132         cardinalityB = 17;
133         expected = 1 - dotProduct / Math.sqrt(cardinalityA * cardinalityB);
134         assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
135 
136         // test with no values
137         filter1 = createFilter(shape, TestingHashers.FROM1);
138         filter2 = new SimpleBloomFilter(shape);
139 
140         dotProduct = /* [1,2] & [] = [] = */ 0;
141         cardinalityA = 2;
142         cardinalityB = 0;
143         expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
144         assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
145 
146         dotProduct = /* [] & [] = [] = */ 0;
147         cardinalityA = 0;
148         cardinalityB = 0;
149         expected = /* 1 - (dotProduct/Math.sqrt(cardinalityA * cardinalityB)) = */ 1.0;
150         assertSymmetricOperation(expected, SetOperations::cosineDistance, filter1, filter2);
151     }
152 
153     /**
154      * Tests that the Cosine similarity is correctly calculated.
155      */
156     @Test
157     public final void testCosineSimilarity() {
158         BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
159         BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
160 
161         int dotProduct = /* [1..17] & [1..17] = [1..17] = */ 17;
162         int cardinalityA = 17;
163         int cardinalityB = 17;
164         double expected = /* dotProduct/Sqrt(cardinalityA * cardinalityB) = */ 1.0;
165         assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
166 
167         dotProduct = /* [1..17] & [11..27] = [11..17] = */ 7;
168         cardinalityA = 17;
169         cardinalityB = 17;
170         expected = dotProduct / Math.sqrt(cardinalityA * cardinalityB);
171         filter2 = createFilter(shape, TestingHashers.FROM11);
172         assertSymmetricOperation(expected, SetOperations::cosineSimilarity, filter1, filter2);
173 
174         // test no values
175         filter1 = new SimpleBloomFilter(shape);
176         filter2 = new SimpleBloomFilter(shape);
177         // build a filter
178         final BloomFilter filter3 = createFilter(shape, TestingHashers.FROM1);
179         assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter2);
180         assertSymmetricOperation(0.0, SetOperations::cosineSimilarity, filter1, filter3);
181     }
182 
183     /**
184      * Tests that the Hamming distance is correctly calculated.
185      */
186     @Test
187     public final void testHammingDistance() {
188         final BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
189         BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
190 
191         int hammingDistance = /* [1..17] ^ [1..17] = [] = */ 0;
192         assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
193 
194         filter2 = createFilter(shape, TestingHashers.FROM11);
195         hammingDistance = /* [1..17] ^ [11..27] = [1..10][17-27] = */ 20;
196         assertSymmetricOperation(hammingDistance, SetOperations::hammingDistance, filter1, filter2);
197     }
198 
199     /**
200      * Tests that the Jaccard distance is correctly calculated.
201      */
202     @Test
203     public final void testJaccardDistance() {
204         BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
205         BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
206 
207         // 1 - jaccardSimilarity -- see jaccardSimilarityTest
208         assertSymmetricOperation(0.0, SetOperations::jaccardDistance, filter1, filter2);
209 
210         filter2 = createFilter(shape, TestingHashers.FROM11);
211         final double intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
212         final int union = /* [1..17] | [11..27] = [1..27] = */ 27;
213         final double expected = 1 - intersection / union;
214         assertSymmetricOperation(expected, SetOperations::jaccardDistance, filter1, filter2);
215 
216         // test no values
217         filter1 = new SimpleBloomFilter(shape);
218         filter2 = new SimpleBloomFilter(shape);
219         final BloomFilter filter3 = createFilter(shape, TestingHashers.FROM1);
220 
221         // 1 - jaccardSimilarity -- see jaccardSimilarityTest
222         assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter2);
223         assertSymmetricOperation(1.0, SetOperations::jaccardDistance, filter1, filter3);
224     }
225 
226     /**
227      * Tests that the Jaccard similarity is correctly calculated.
228      */
229     @Test
230     public final void testJaccardSimilarity() {
231         BloomFilter filter1 = createFilter(shape, TestingHashers.FROM1);
232         BloomFilter filter2 = createFilter(shape, TestingHashers.FROM1);
233 
234         double intersection = /* [1..17] & [1..17] = [1..17] = */ 17.0;
235         int union = /* [1..17] | [1..17] = [1..17] = */ 17;
236         double expected = intersection / union;
237         assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
238 
239         filter2 = createFilter(shape, TestingHashers.FROM11);
240         intersection = /* [1..17] & [11..27] = [11..17] = */ 7.0;
241         union = /* [1..17] | [11..27] = [1..27] = */ 27;
242         expected = intersection / union;
243         assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
244 
245         // test no values
246         filter1 = new SimpleBloomFilter(shape);
247         filter2 = new SimpleBloomFilter(shape);
248         assertSymmetricOperation(0.0, SetOperations::jaccardSimilarity, filter1, filter2);
249 
250         intersection = /* [] & [1..17] = [] = */ 0.0;
251         union = /* [] | [1..17] = [] = */ 17;
252         expected = intersection / union;
253         assertSymmetricOperation(expected, SetOperations::jaccardSimilarity, filter1, filter2);
254     }
255 
256     @Test
257     public final void testOrCardinality() {
258         final Shape shape = Shape.fromKM(3, 128);
259         BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
260         BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
261         assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
262 
263         filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
264         filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
265         assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
266 
267         filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
268         filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
269         assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
270     }
271 
272     @Test
273     public final void testOrCardinalityWithDifferentLengthFilters() {
274         final Shape shape = Shape.fromKM(3, 128);
275         final Shape shape2 = Shape.fromKM(3, 192);
276         BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
277         BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
278         assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
279 
280         filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
281         filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
282         assertSymmetricOperation(5, SetOperations::orCardinality, filter1, filter2);
283 
284         filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
285         filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
286         assertSymmetricOperation(4, SetOperations::orCardinality, filter1, filter2);
287     }
288 
289     @Test
290     public final void testXorCardinality() {
291         final Shape shape = Shape.fromKM(3, 128);
292         BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
293         BloomFilter filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
294         assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
295 
296         filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
297         filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
298         assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
299 
300         filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
301         filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 64, 69));
302         assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
303 
304         final Shape bigShape = Shape.fromKM(3, 192);
305         filter1 = createFilter(bigShape, IndexProducer.fromIndexArray(1, 63, 185));
306         filter2 = createFilter(shape, IndexProducer.fromIndexArray(5, 63, 69));
307         assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
308     }
309 
310     @Test
311     public final void testXorCardinalityWithDifferentLengthFilters() {
312         final Shape shape = Shape.fromKM(3, 128);
313         final Shape shape2 = Shape.fromKM(3, 192);
314 
315         BloomFilter filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63, 64));
316         BloomFilter filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
317         assertSymmetricOperation(4, SetOperations::xorCardinality, filter1, filter2);
318 
319         filter1 = createFilter(shape, IndexProducer.fromIndexArray(1, 63));
320         filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
321         assertSymmetricOperation(5, SetOperations::xorCardinality, filter1, filter2);
322 
323         filter1 = createFilter(shape, IndexProducer.fromIndexArray(5, 63));
324         filter2 = createFilter(shape2, IndexProducer.fromIndexArray(5, 64, 169));
325         assertSymmetricOperation(3, SetOperations::xorCardinality, filter1, filter2);
326     }
327 }