View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertThrows;
21  import static org.junit.jupiter.api.Assertions.assertTrue;
22  
23  import java.util.TreeSet;
24  import java.util.function.IntPredicate;
25  import java.util.function.LongPredicate;
26  
27  import org.junit.jupiter.api.Test;
28  
29  /**
30   * Tests for the {@link BloomFilter}.
31   */
32  public class DefaultBloomFilterTest extends AbstractBloomFilterTest<DefaultBloomFilterTest.AbstractDefaultBloomFilter> {
33      abstract static class AbstractDefaultBloomFilter implements BloomFilter {
34          private final Shape shape;
35          protected TreeSet<Integer> indices;
36  
37          AbstractDefaultBloomFilter(final Shape shape) {
38              this.shape = shape;
39              this.indices = new TreeSet<>();
40          }
41  
42          @Override
43          public int cardinality() {
44              return indices.size();
45          }
46  
47          private void checkIndicesRange() {
48              if (!indices.isEmpty()) {
49                  if (indices.last() >= shape.getNumberOfBits()) {
50                      throw new IllegalArgumentException(
51                          String.format("Value in list %s is greater than maximum value (%s)", indices.last(),
52                              shape.getNumberOfBits()));
53                  }
54                  if (indices.first() < 0) {
55                      throw new IllegalArgumentException(
56                          String.format("Value in list %s is less than 0", indices.first()));
57                  }
58              }
59          }
60  
61          @Override
62          public void clear() {
63              indices.clear();
64          }
65  
66          @Override
67          public boolean contains(final BitMapProducer bitMapProducer) {
68              return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
69          }
70  
71          @Override
72          public boolean contains(final IndexProducer indexProducer) {
73              return indexProducer.forEachIndex(indices::contains);
74          }
75  
76          @Override
77          public boolean forEachBitMap(final LongPredicate consumer) {
78              return BitMapProducer.fromIndexProducer(this, shape.getNumberOfBits()).forEachBitMap(consumer);
79          }
80  
81          @Override
82          public boolean forEachIndex(final IntPredicate consumer) {
83              for (final Integer i : indices) {
84                  if (!consumer.test(i)) {
85                      return false;
86                  }
87              }
88              return true;
89          }
90  
91          @Override
92          public Shape getShape() {
93              return shape;
94          }
95  
96          @Override
97          public boolean merge(final BitMapProducer bitMapProducer) {
98              return merge(IndexProducer.fromBitMapProducer(bitMapProducer));
99          }
100 
101         @Override
102         public boolean merge(final IndexProducer indexProducer) {
103             final boolean result = indexProducer.forEachIndex(x -> {
104                 indices.add(x);
105                 return true;
106             });
107             checkIndicesRange();
108             return result;
109         }
110     }
111 
112     static class BrokenCardinality extends NonSparseDefaultBloomFilter {
113 
114         BrokenCardinality(final Shape shape) {
115             super(shape);
116         }
117 
118         @Override
119         public int cardinality() {
120             return super.cardinality() + 1;
121         }
122     }
123 
124     /**
125      * A default implementation of a non-sparse Bloom filter.
126      */
127     public static class NonSparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
128 
129         public NonSparseDefaultBloomFilter(final Shape shape) {
130             super(shape);
131         }
132 
133         @Override
134         public int characteristics() {
135             return 0;
136         }
137 
138         @Override
139         public AbstractDefaultBloomFilter copy() {
140             final AbstractDefaultBloomFilter result = new NonSparseDefaultBloomFilter(getShape());
141             result.indices.addAll(indices);
142             return result;
143         }
144     }
145 
146     /**
147      * A default implementation of a Sparse bloom filter.
148      */
149     public static class SparseDefaultBloomFilter extends AbstractDefaultBloomFilter {
150 
151         public SparseDefaultBloomFilter(final Shape shape) {
152             super(shape);
153         }
154 
155         @Override
156         public int characteristics() {
157             return SPARSE;
158         }
159 
160         @Override
161         public AbstractDefaultBloomFilter copy() {
162             final AbstractDefaultBloomFilter result = new SparseDefaultBloomFilter(getShape());
163             result.indices.addAll(indices);
164             return result;
165         }
166     }
167 
168     @Override
169     protected AbstractDefaultBloomFilter createEmptyFilter(final Shape shape) {
170         return new SparseDefaultBloomFilter(shape);
171     }
172 
173     @Test
174     public void testDefaultBloomFilterSimpleSpecificMerge() {
175         final AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(Shape.fromKM(3, 150));
176         final Hasher hasher = new IncrementingHasher(0, 1);
177         assertTrue(filter.merge(hasher));
178         assertEquals(3, filter.cardinality());
179     }
180 
181     @Test
182     public void testDefaultBloomFilterSparseSpecificMerge() {
183         final Shape shape = Shape.fromKM(3, 150);
184         final AbstractDefaultBloomFilter filter = new SparseDefaultBloomFilter(shape);
185         final AbstractDefaultBloomFilter filter2 = createFilter(shape, new IncrementingHasher(0, 1));
186         final BloomFilter newFilter = filter.copy();
187         newFilter.merge(filter2);
188         assertEquals(3, newFilter.cardinality());
189     }
190 
191     @Test
192     public void testEstimateLargeN() {
193         final Shape s = Shape.fromKM(1, Integer.MAX_VALUE);
194         // create a very large filter with Integer.MAX_VALUE-1 bits set.
195         final BloomFilter bf1 = new SimpleBloomFilter(s);
196         bf1.merge((BitMapProducer) predicate -> {
197             int limit = Integer.MAX_VALUE - 1;
198             while (limit > 64) {
199                 predicate.test(0xFFFFFFFFFFFFFFFFL);
200                 limit -= 64;
201             }
202             long last = 0L;
203             for (int i = 0; i < limit; i++) {
204                 last |= BitMap.getLongBit(i);
205             }
206             predicate.test(last);
207             return true;
208         });
209         // the actual result of the calculation is: 46144189292, so the returned value
210         // should be Integer.MAX_VALUE.
211         assertEquals(Integer.MAX_VALUE, bf1.estimateN());
212     }
213 
214     @Test
215     public void testEstimateNWithBrokenCardinality() {
216         // build a filter
217         final BloomFilter filter1 = TestingHashers.populateEntireFilter(new BrokenCardinality(getTestShape()));
218         assertThrows(IllegalArgumentException.class, () -> filter1.estimateN());
219     }
220 
221     @Test
222     public void testHasherBasedMergeWithDifferingSparseness() {
223         final Hasher hasher = new IncrementingHasher(1, 1);
224 
225         BloomFilter bf1 = new NonSparseDefaultBloomFilter(getTestShape());
226         bf1.merge(hasher);
227         assertTrue(BitMapProducer.fromIndexProducer(hasher.indices(getTestShape()), getTestShape().getNumberOfBits())
228                 .forEachBitMapPair(bf1, (x, y) -> x == y));
229 
230         bf1 = new SparseDefaultBloomFilter(getTestShape());
231         bf1.merge(hasher);
232         assertTrue(BitMapProducer.fromIndexProducer(hasher.indices(getTestShape()), getTestShape().getNumberOfBits())
233                 .forEachBitMapPair(bf1, (x, y) -> x == y));
234     }
235 
236     @Test
237     public void testIntersectionLimit() {
238         final Shape s = Shape.fromKM(1, Integer.MAX_VALUE);
239         // create a very large filter with Integer.MAX_VALUE-1 bit set.
240         final BloomFilter bf1 = new SimpleBloomFilter(s);
241         bf1.merge((BitMapProducer) predicate -> {
242             int limit = Integer.MAX_VALUE - 1;
243             while (limit > 64) {
244                 predicate.test(0xFFFFFFFFFFFFFFFFL);
245                 limit -= 64;
246             }
247             long last = 0L;
248             for (int i = 0; i < limit; i++) {
249                 last |= BitMap.getLongBit(i);
250             }
251             predicate.test(last);
252             return true;
253         });
254         // the actual result of the calculation is: 46144189292
255         assertEquals(Integer.MAX_VALUE, bf1.estimateIntersection(bf1));
256     }
257 
258     @Test
259     public void testSparseNonSparseMerging() {
260         final BloomFilter bf1 = new SparseDefaultBloomFilter(getTestShape());
261         bf1.merge(TestingHashers.FROM1);
262         final BloomFilter bf2 = new NonSparseDefaultBloomFilter(getTestShape());
263         bf2.merge(TestingHashers.FROM11);
264 
265         BloomFilter result = bf1.copy();
266         result.merge(bf2);
267         assertEquals(27, result.cardinality());
268 
269         result = bf2.copy();
270         result.merge(bf1);
271         assertEquals(27, result.cardinality());
272     }
273 }