View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.Objects;
20  import java.util.TreeSet;
21  import java.util.function.IntPredicate;
22  import java.util.function.LongPredicate;
23  
24  /**
25   * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
26   * implementation and should work well for most low cardinality Bloom filters.
27   * @since 4.5
28   */
29  public final class SparseBloomFilter implements BloomFilter {
30  
31      /**
32       * The bitSet that defines this BloomFilter.
33       */
34      private final TreeSet<Integer> indices;
35  
36      /**
37       * The shape of this BloomFilter.
38       */
39      private final Shape shape;
40  
41      /**
42       * Constructs an empty BitSetBloomFilter.
43       *
44       * @param shape The shape of the filter.
45       */
46      public SparseBloomFilter(final Shape shape) {
47          Objects.requireNonNull(shape, "shape");
48          this.shape = shape;
49          this.indices = new TreeSet<>();
50      }
51  
52      private SparseBloomFilter(final SparseBloomFilter source) {
53          shape = source.shape;
54          indices = new TreeSet<>(source.indices);
55      }
56  
57      /**
58       * Adds the index to the indices.
59       * @param idx the index to add.
60       * @return {@code true} always
61       */
62      private boolean add(final int idx) {
63          indices.add(idx);
64          return true;
65      }
66  
67      @Override
68      public long[] asBitMapArray() {
69          final long[] result = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
70          for (final int i : indices) {
71              BitMap.set(result, i);
72          }
73          return result;
74      }
75  
76      @Override
77      public int cardinality() {
78          return indices.size();
79      }
80  
81      @Override
82      public int characteristics() {
83          return SPARSE;
84      }
85  
86      @Override
87      public void clear() {
88          indices.clear();
89      }
90  
91      @Override
92      public boolean contains(final BitMapProducer bitMapProducer) {
93          return contains(IndexProducer.fromBitMapProducer(bitMapProducer));
94      }
95  
96      @Override
97      public boolean contains(final IndexProducer indexProducer) {
98          return indexProducer.forEachIndex(indices::contains);
99      }
100 
101     @Override
102     public SparseBloomFilter copy() {
103         return new SparseBloomFilter(this);
104     }
105 
106     @Override
107     public boolean forEachBitMap(final LongPredicate consumer) {
108         Objects.requireNonNull(consumer, "consumer");
109         final int limit = BitMap.numberOfBitMaps(shape.getNumberOfBits());
110         /*
111          * because our indices are always in order we can shorten the time necessary to
112          * create the longs for the consumer
113          */
114         // the currently constructed bitMap
115         long bitMap = 0;
116         // the bitmap we are working on
117         int idx = 0;
118         for (final int i : indices) {
119             while (BitMap.getLongIndex(i) != idx) {
120                 if (!consumer.test(bitMap)) {
121                     return false;
122                 }
123                 bitMap = 0;
124                 idx++;
125             }
126             bitMap |= BitMap.getLongBit(i);
127         }
128         // we fall through with data in the bitMap
129         if (!consumer.test(bitMap)) {
130             return false;
131         }
132         // account for hte bitMap in the previous block + the next one
133         idx++;
134         // while there are more blocks to generate send zero to the consumer.
135         while (idx < limit) {
136             if (!consumer.test(0L)) {
137                 return false;
138             }
139             idx++;
140         }
141         return true;
142     }
143 
144     @Override
145     public boolean forEachIndex(final IntPredicate consumer) {
146         Objects.requireNonNull(consumer, "consumer");
147         for (final int value : indices) {
148             if (!consumer.test(value)) {
149                 return false;
150             }
151         }
152         return true;
153     }
154 
155     @Override
156     public Shape getShape() {
157         return shape;
158     }
159 
160     @Override
161     public boolean isEmpty() {
162         return indices.isEmpty();
163     }
164 
165     @Override
166     public boolean merge(final BitMapProducer bitMapProducer) {
167         Objects.requireNonNull(bitMapProducer, "bitMapProducer");
168         return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer));
169     }
170 
171     @Override
172     public boolean merge(final BloomFilter other) {
173         Objects.requireNonNull(other, "other");
174         final IndexProducer producer = (other.characteristics() & SPARSE) != 0 ? (IndexProducer) other : IndexProducer.fromBitMapProducer(other);
175         merge(producer);
176         return true;
177     }
178 
179     @Override
180     public boolean merge(final Hasher hasher) {
181         Objects.requireNonNull(hasher, "hasher");
182         merge(hasher.indices(shape));
183         return true;
184     }
185 
186     @Override
187     public boolean merge(final IndexProducer indexProducer) {
188         Objects.requireNonNull(indexProducer, "indexProducer");
189         indexProducer.forEachIndex(this::add);
190         if (!this.indices.isEmpty()) {
191             if (this.indices.last() >= shape.getNumberOfBits()) {
192                 throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
193                         this.indices.last(), shape.getNumberOfBits() - 1));
194             }
195             if (this.indices.first() < 0) {
196                 throw new IllegalArgumentException(
197                         String.format("Value in list %s is less than 0", this.indices.first()));
198             }
199         }
200         return true;
201     }
202 }