1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.collections4.bloomfilter; 18 19 import java.util.function.IntPredicate; 20 21 /** 22 * A convenience class for Hasher implementations to filter out duplicate indices. 23 * 24 * <p><em>If the index is negative the behavior is not defined.</em></p> 25 * 26 * <p>This is conceptually a unique filter implemented as an {@code IntPredicate}.</p> 27 * @since 4.5 28 */ 29 public final class IndexFilter { 30 /** 31 * An IndexTracker implementation that uses an array of integers to track whether or not a 32 * number has been seen. Suitable for Shapes that have few hash functions. 33 * @since 4.5 34 */ 35 static class ArrayTracker implements IntPredicate { 36 private final int[] seen; 37 private int populated; 38 39 /** 40 * Constructs the tracker based on the shape. 41 * @param shape the shape to build the tracker for. 42 */ 43 ArrayTracker(final Shape shape) { 44 seen = new int[shape.getNumberOfHashFunctions()]; 45 } 46 47 @Override 48 public boolean test(final int number) { 49 if (number < 0) { 50 throw new IndexOutOfBoundsException("number may not be less than zero. " + number); 51 } 52 for (int i = 0; i < populated; i++) { 53 if (seen[i] == number) { 54 return false; 55 } 56 } 57 seen[populated++] = number; 58 return true; 59 } 60 } 61 /** 62 * An IndexTracker implementation that uses an array of bit maps to track whether or not a 63 * number has been seen. 64 * @since 4.5 65 */ 66 static class BitMapTracker implements IntPredicate { 67 private final long[] bits; 68 69 /** 70 * Constructs a bit map based tracker for the specified shape. 71 * @param shape The shape that is being generated. 72 */ 73 BitMapTracker(final Shape shape) { 74 bits = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())]; 75 } 76 77 @Override 78 public boolean test(final int number) { 79 final boolean retval = !BitMap.contains(bits, number); 80 BitMap.set(bits, number); 81 return retval; 82 } 83 } 84 /** 85 * Creates an instance optimized for the specified shape. 86 * @param shape The shape that is being generated. 87 * @param consumer The consumer to accept the values. 88 * @return an IndexFilter optimized for the specified shape. 89 */ 90 public static IntPredicate create(final Shape shape, final IntPredicate consumer) { 91 return new IndexFilter(shape, consumer)::test; 92 } 93 94 private final IntPredicate tracker; 95 96 private final int size; 97 98 private final IntPredicate consumer; 99 100 /** 101 * Creates an instance optimized for the specified shape. 102 * @param shape The shape that is being generated. 103 * @param consumer The consumer to accept the values. 104 */ 105 private IndexFilter(final Shape shape, final IntPredicate consumer) { 106 this.size = shape.getNumberOfBits(); 107 this.consumer = consumer; 108 if (BitMap.numberOfBitMaps(shape.getNumberOfBits()) * Long.BYTES < (long) shape.getNumberOfHashFunctions() 109 * Integer.BYTES) { 110 this.tracker = new BitMapTracker(shape); 111 } else { 112 this.tracker = new ArrayTracker(shape); 113 } 114 } 115 116 /** 117 * Test if the number should be processed by the {@code consumer}. 118 * 119 * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned. 120 * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p> 121 * 122 * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p> 123 * 124 * @param number the number to check. 125 * @return {@code true} if processing should continue, {@code false} otherwise. 126 */ 127 public boolean test(final int number) { 128 if (number >= size) { 129 throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size)); 130 } 131 if (tracker.test(number)) { 132 return consumer.test(number); 133 } 134 return true; 135 } 136 }