View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.Objects;
20  import java.util.TreeSet;
21  import java.util.function.IntPredicate;
22  import java.util.function.LongPredicate;
23  
24  /**
25   * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard
26   * implementation and should work well for most low cardinality Bloom filters.
27   *
28   * @since 4.5.0-M1
29   */
30  public final class SparseBloomFilter implements BloomFilter<SparseBloomFilter> {
31  
32      /**
33       * The bitSet that defines this BloomFilter.
34       */
35      private final TreeSet<Integer> indices;
36  
37      /**
38       * The shape of this BloomFilter.
39       */
40      private final Shape shape;
41  
42      /**
43       * Constructs an empty BitSetBloomFilter.
44       *
45       * @param shape The shape of the filter.
46       */
47      public SparseBloomFilter(final Shape shape) {
48          Objects.requireNonNull(shape, "shape");
49          this.shape = shape;
50          this.indices = new TreeSet<>();
51      }
52  
53      private SparseBloomFilter(final SparseBloomFilter source) {
54          shape = source.shape;
55          indices = new TreeSet<>(source.indices);
56      }
57  
58      /**
59       * Adds the index to the indices.
60       *
61       * @param idx the index to add.
62       * @return {@code true} always
63       */
64      private boolean add(final int idx) {
65          indices.add(idx);
66          return true;
67      }
68  
69      @Override
70      public long[] asBitMapArray() {
71          final long[] result = BitMaps.newBitMap(shape);
72          for (final int i : indices) {
73              BitMaps.set(result, i);
74          }
75          return result;
76      }
77  
78      @Override
79      public int cardinality() {
80          return indices.size();
81      }
82  
83      @Override
84      public int characteristics() {
85          return SPARSE;
86      }
87  
88      @Override
89      public void clear() {
90          indices.clear();
91      }
92  
93      @Override
94      public boolean contains(final BitMapExtractor bitMapExtractor) {
95          return contains(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
96      }
97  
98      @Override
99      public boolean contains(final IndexExtractor indexExtractor) {
100         return indexExtractor.processIndices(indices::contains);
101     }
102 
103     /**
104      * Creates a new instance of this {@link SparseBloomFilter} with the same properties as the current one.
105      *
106      * @return a copy of this {@link SparseBloomFilter}.
107      */
108     @Override
109     public SparseBloomFilter copy() {
110         return new SparseBloomFilter(this);
111     }
112 
113     @Override
114     public Shape getShape() {
115         return shape;
116     }
117 
118     @Override
119     public boolean isEmpty() {
120         return indices.isEmpty();
121     }
122 
123     @Override
124     public boolean merge(final BitMapExtractor bitMapExtractor) {
125         Objects.requireNonNull(bitMapExtractor, "bitMapExtractor");
126         return this.merge(IndexExtractor.fromBitMapExtractor(bitMapExtractor));
127     }
128 
129     @Override
130     public boolean merge(final BloomFilter<?> other) {
131         Objects.requireNonNull(other, "other");
132         final IndexExtractor indexExtractor = (other.characteristics() & SPARSE) != 0 ? (IndexExtractor) other : IndexExtractor.fromBitMapExtractor(other);
133         merge(indexExtractor);
134         return true;
135     }
136 
137     @Override
138     public boolean merge(final Hasher hasher) {
139         Objects.requireNonNull(hasher, "hasher");
140         merge(hasher.indices(shape));
141         return true;
142     }
143 
144     @Override
145     public boolean merge(final IndexExtractor indexExtractor) {
146         Objects.requireNonNull(indexExtractor, "indexExtractor");
147         indexExtractor.processIndices(this::add);
148         if (!indices.isEmpty()) {
149             if (indices.last() >= shape.getNumberOfBits()) {
150                 throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)",
151                         indices.last(), shape.getNumberOfBits() - 1));
152             }
153             if (indices.first() < 0) {
154                 throw new IllegalArgumentException(
155                         String.format("Value in list %s is less than 0", indices.first()));
156             }
157         }
158         return true;
159     }
160 
161     @Override
162     public boolean processBitMaps(final LongPredicate consumer) {
163         Objects.requireNonNull(consumer, "consumer");
164         final int limit = BitMaps.numberOfBitMaps(shape);
165         //
166         // because our indices are always in order we can shorten the time necessary to
167         // create the longs for the consumer
168         //
169         // the currently constructed bitMap
170         long bitMap = 0;
171         // the bitmap we are working on
172         int idx = 0;
173         for (final int i : indices) {
174             while (BitMaps.getLongIndex(i) != idx) {
175                 if (!consumer.test(bitMap)) {
176                     return false;
177                 }
178                 bitMap = 0;
179                 idx++;
180             }
181             bitMap |= BitMaps.getLongBit(i);
182         }
183         // we fall through with data in the bitMap
184         if (!consumer.test(bitMap)) {
185             return false;
186         }
187         // account for hte bitMap in the previous block + the next one
188         idx++;
189         // while there are more blocks to generate send zero to the consumer.
190         while (idx < limit) {
191             if (!consumer.test(0L)) {
192                 return false;
193             }
194             idx++;
195         }
196         return true;
197     }
198 
199     @Override
200     public boolean processIndices(final IntPredicate consumer) {
201         Objects.requireNonNull(consumer, "consumer");
202         return indices.stream().allMatch(consumer::test);
203     }
204 }