View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.function.IntPredicate;
20  
21  /**
22   * A convenience class for Hasher implementations to filter out duplicate indices.
23   *
24   * <p><em>If the index is negative the behavior is not defined.</em></p>
25   *
26   * <p>This is conceptually a unique filter implemented as an {@code IntPredicate}.</p>
27   * @since 4.5
28   */
29  public final class IndexFilter {
30      /**
31       * An IndexTracker implementation that uses an array of integers to track whether or not a
32       * number has been seen. Suitable for Shapes that have few hash functions.
33       * @since 4.5
34       */
35      static class ArrayTracker implements IntPredicate {
36          private final int[] seen;
37          private int populated;
38  
39          /**
40           * Constructs the tracker based on the shape.
41           * @param shape the shape to build the tracker for.
42           */
43          ArrayTracker(final Shape shape) {
44              seen = new int[shape.getNumberOfHashFunctions()];
45          }
46  
47          @Override
48          public boolean test(final int number) {
49              if (number < 0) {
50                  throw new IndexOutOfBoundsException("number may not be less than zero. " + number);
51              }
52              for (int i = 0; i < populated; i++) {
53                  if (seen[i] == number) {
54                      return false;
55                  }
56              }
57              seen[populated++] = number;
58              return true;
59          }
60      }
61      /**
62       * An IndexTracker implementation that uses an array of bit maps to track whether or not a
63       * number has been seen.
64       * @since 4.5
65       */
66      static class BitMapTracker implements IntPredicate {
67          private final long[] bits;
68  
69          /**
70           * Constructs a bit map based tracker for the specified shape.
71           * @param shape The shape that is being generated.
72           */
73          BitMapTracker(final Shape shape) {
74              bits = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())];
75          }
76  
77          @Override
78          public boolean test(final int number) {
79              final boolean retval = !BitMap.contains(bits, number);
80              BitMap.set(bits, number);
81              return retval;
82          }
83      }
84      /**
85       * Creates an instance optimized for the specified shape.
86       * @param shape The shape that is being generated.
87       * @param consumer The consumer to accept the values.
88       * @return an IndexFilter optimized for the specified shape.
89       */
90      public static IntPredicate create(final Shape shape, final IntPredicate consumer) {
91          return new IndexFilter(shape, consumer)::test;
92      }
93  
94      private final IntPredicate tracker;
95  
96      private final int size;
97  
98      private final IntPredicate consumer;
99  
100     /**
101      * Creates an instance optimized for the specified shape.
102      * @param shape The shape that is being generated.
103      * @param consumer The consumer to accept the values.
104      */
105     private IndexFilter(final Shape shape, final IntPredicate consumer) {
106         this.size = shape.getNumberOfBits();
107         this.consumer = consumer;
108         if (BitMap.numberOfBitMaps(shape.getNumberOfBits()) * Long.BYTES < (long) shape.getNumberOfHashFunctions()
109                 * Integer.BYTES) {
110             this.tracker = new BitMapTracker(shape);
111         } else {
112             this.tracker = new ArrayTracker(shape);
113         }
114     }
115 
116     /**
117      * Test if the number should be processed by the {@code consumer}.
118      *
119      * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned.
120      * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p>
121      *
122      * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p>
123      *
124      * @param number the number to check.
125      * @return {@code true} if processing should continue, {@code false} otherwise.
126      */
127     public boolean test(final int number) {
128         if (number >= size) {
129             throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
130         }
131         if (tracker.test(number)) {
132             return  consumer.test(number);
133         }
134         return true;
135     }
136 }