View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.function.IntPredicate;
20  
21  /**
22   * A convenience class for Hasher implementations to filter out duplicate indices.
23   *
24   * <p><em>If the index is negative the behavior is not defined.</em></p>
25   *
26   * <p>This is conceptually a unique filter implemented as an {@link IntPredicate}.</p>
27   *
28   * @since 4.5.0-M1
29   */
30  public final class IndexFilter {
31  
32      /**
33       * An IndexTracker implementation that uses an array of integers to track whether or not a
34       * number has been seen. Suitable for Shapes that have few hash functions.
35       * @since 4.5.0
36       */
37      static class ArrayTracker implements IntPredicate {
38          private final int[] seen;
39          private int populated;
40  
41          /**
42           * Constructs the tracker based on the shape.
43           * @param shape the shape to build the tracker for.
44           */
45          ArrayTracker(final Shape shape) {
46              seen = new int[shape.getNumberOfHashFunctions()];
47          }
48  
49          @Override
50          public boolean test(final int number) {
51              if (number < 0) {
52                  throw new IndexOutOfBoundsException("number may not be less than zero. " + number);
53              }
54              for (int i = 0; i < populated; i++) {
55                  if (seen[i] == number) {
56                      return false;
57                  }
58              }
59              seen[populated++] = number;
60              return true;
61          }
62      }
63  
64      /**
65       * An IndexTracker implementation that uses an array of bit maps to track whether or not a
66       * number has been seen.
67       */
68      static class BitMapTracker implements IntPredicate {
69          private final long[] bits;
70  
71          /**
72           * Constructs a bit map based tracker for the specified shape.
73           * @param shape The shape that is being generated.
74           */
75          BitMapTracker(final Shape shape) {
76              bits = BitMaps.newBitMap(shape);
77          }
78  
79          @Override
80          public boolean test(final int number) {
81              final boolean retval = !BitMaps.contains(bits, number);
82              BitMaps.set(bits, number);
83              return retval;
84          }
85      }
86  
87      /**
88       * Creates an instance optimized for the specified shape.
89       *
90       * @param shape The shape that is being generated.
91       * @param consumer The consumer to accept the values.
92       * @return an IndexFilter optimized for the specified shape.
93       */
94      public static IntPredicate create(final Shape shape, final IntPredicate consumer) {
95          return new IndexFilter(shape, consumer)::test;
96      }
97  
98      private final IntPredicate tracker;
99  
100     private final int size;
101 
102     private final IntPredicate consumer;
103 
104     /**
105      * Creates an instance optimized for the specified shape.
106      *
107      * @param shape The shape that is being generated.
108      * @param consumer The consumer to accept the values.
109      */
110     private IndexFilter(final Shape shape, final IntPredicate consumer) {
111         this.size = shape.getNumberOfBits();
112         this.consumer = consumer;
113         if (BitMaps.numberOfBitMaps(shape) * Long.BYTES < (long) shape.getNumberOfHashFunctions() * Integer.BYTES) {
114             this.tracker = new BitMapTracker(shape);
115         } else {
116             this.tracker = new ArrayTracker(shape);
117         }
118     }
119 
120     /**
121      * Test if the number should be processed by the {@code consumer}.
122      *
123      * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned.
124      * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p>
125      *
126      * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p>
127      *
128      * @param number the number to check.
129      * @return {@code true} if processing should continue, {@code false} otherwise.
130      */
131     public boolean test(final int number) {
132         if (number >= size) {
133             throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
134         }
135         if (tracker.test(number)) {
136             return consumer.test(number);
137         }
138         return true;
139     }
140 }