1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.bloomfilter;
18
19 import java.util.function.IntPredicate;
20
21 /**
22 * A convenience class for Hasher implementations to filter out duplicate indices.
23 *
24 * <p><em>If the index is negative the behavior is not defined.</em></p>
25 *
26 * <p>This is conceptually a unique filter implemented as an {@link IntPredicate}.</p>
27 *
28 * @since 4.5.0-M1
29 */
30 public final class IndexFilter {
31
32 /**
33 * An IndexTracker implementation that uses an array of integers to track whether or not a
34 * number has been seen. Suitable for Shapes that have few hash functions.
35 * @since 4.5.0
36 */
37 static class ArrayTracker implements IntPredicate {
38 private final int[] seen;
39 private int populated;
40
41 /**
42 * Constructs the tracker based on the shape.
43 * @param shape the shape to build the tracker for.
44 */
45 ArrayTracker(final Shape shape) {
46 seen = new int[shape.getNumberOfHashFunctions()];
47 }
48
49 @Override
50 public boolean test(final int number) {
51 if (number < 0) {
52 throw new IndexOutOfBoundsException("number may not be less than zero. " + number);
53 }
54 for (int i = 0; i < populated; i++) {
55 if (seen[i] == number) {
56 return false;
57 }
58 }
59 seen[populated++] = number;
60 return true;
61 }
62 }
63
64 /**
65 * An IndexTracker implementation that uses an array of bit maps to track whether or not a
66 * number has been seen.
67 */
68 static class BitMapTracker implements IntPredicate {
69 private final long[] bits;
70
71 /**
72 * Constructs a bit map based tracker for the specified shape.
73 * @param shape The shape that is being generated.
74 */
75 BitMapTracker(final Shape shape) {
76 bits = BitMaps.newBitMap(shape);
77 }
78
79 @Override
80 public boolean test(final int number) {
81 final boolean retval = !BitMaps.contains(bits, number);
82 BitMaps.set(bits, number);
83 return retval;
84 }
85 }
86
87 /**
88 * Creates an instance optimized for the specified shape.
89 *
90 * @param shape The shape that is being generated.
91 * @param consumer The consumer to accept the values.
92 * @return an IndexFilter optimized for the specified shape.
93 */
94 public static IntPredicate create(final Shape shape, final IntPredicate consumer) {
95 return new IndexFilter(shape, consumer)::test;
96 }
97
98 private final IntPredicate tracker;
99
100 private final int size;
101
102 private final IntPredicate consumer;
103
104 /**
105 * Creates an instance optimized for the specified shape.
106 *
107 * @param shape The shape that is being generated.
108 * @param consumer The consumer to accept the values.
109 */
110 private IndexFilter(final Shape shape, final IntPredicate consumer) {
111 this.size = shape.getNumberOfBits();
112 this.consumer = consumer;
113 if (BitMaps.numberOfBitMaps(shape) * Long.BYTES < (long) shape.getNumberOfHashFunctions() * Integer.BYTES) {
114 this.tracker = new BitMapTracker(shape);
115 } else {
116 this.tracker = new ArrayTracker(shape);
117 }
118 }
119
120 /**
121 * Test if the number should be processed by the {@code consumer}.
122 *
123 * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned.
124 * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p>
125 *
126 * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p>
127 *
128 * @param number the number to check.
129 * @return {@code true} if processing should continue, {@code false} otherwise.
130 */
131 public boolean test(final int number) {
132 if (number >= size) {
133 throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
134 }
135 if (tracker.test(number)) {
136 return consumer.test(number);
137 }
138 return true;
139 }
140 }