001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.function.IntPredicate; 020 021/** 022 * A convenience class for Hasher implementations to filter out duplicate indices. 023 * 024 * <p><em>If the index is negative the behavior is not defined.</em></p> 025 * 026 * <p>This is conceptually a unique filter implemented as an {@link IntPredicate}.</p> 027 * 028 * @since 4.5.0-M1 029 */ 030public final class IndexFilter { 031 032 /** 033 * An IndexTracker implementation that uses an array of integers to track whether or not a 034 * number has been seen. Suitable for Shapes that have few hash functions. 035 * @since 4.5.0 036 */ 037 static class ArrayTracker implements IntPredicate { 038 private final int[] seen; 039 private int populated; 040 041 /** 042 * Constructs the tracker based on the shape. 043 * @param shape the shape to build the tracker for. 044 */ 045 ArrayTracker(final Shape shape) { 046 seen = new int[shape.getNumberOfHashFunctions()]; 047 } 048 049 @Override 050 public boolean test(final int number) { 051 if (number < 0) { 052 throw new IndexOutOfBoundsException("number may not be less than zero. " + number); 053 } 054 for (int i = 0; i < populated; i++) { 055 if (seen[i] == number) { 056 return false; 057 } 058 } 059 seen[populated++] = number; 060 return true; 061 } 062 } 063 064 /** 065 * An IndexTracker implementation that uses an array of bit maps to track whether or not a 066 * number has been seen. 067 */ 068 static class BitMapTracker implements IntPredicate { 069 private final long[] bits; 070 071 /** 072 * Constructs a bit map based tracker for the specified shape. 073 * @param shape The shape that is being generated. 074 */ 075 BitMapTracker(final Shape shape) { 076 bits = BitMaps.newBitMap(shape); 077 } 078 079 @Override 080 public boolean test(final int number) { 081 final boolean retval = !BitMaps.contains(bits, number); 082 BitMaps.set(bits, number); 083 return retval; 084 } 085 } 086 087 /** 088 * Creates an instance optimized for the specified shape. 089 * 090 * @param shape The shape that is being generated. 091 * @param consumer The consumer to accept the values. 092 * @return an IndexFilter optimized for the specified shape. 093 */ 094 public static IntPredicate create(final Shape shape, final IntPredicate consumer) { 095 return new IndexFilter(shape, consumer)::test; 096 } 097 098 private final IntPredicate tracker; 099 100 private final int size; 101 102 private final IntPredicate consumer; 103 104 /** 105 * Creates an instance optimized for the specified shape. 106 * 107 * @param shape The shape that is being generated. 108 * @param consumer The consumer to accept the values. 109 */ 110 private IndexFilter(final Shape shape, final IntPredicate consumer) { 111 this.size = shape.getNumberOfBits(); 112 this.consumer = consumer; 113 if (BitMaps.numberOfBitMaps(shape) * Long.BYTES < (long) shape.getNumberOfHashFunctions() * Integer.BYTES) { 114 this.tracker = new BitMapTracker(shape); 115 } else { 116 this.tracker = new ArrayTracker(shape); 117 } 118 } 119 120 /** 121 * Test if the number should be processed by the {@code consumer}. 122 * 123 * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned. 124 * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p> 125 * 126 * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p> 127 * 128 * @param number the number to check. 129 * @return {@code true} if processing should continue, {@code false} otherwise. 130 */ 131 public boolean test(final int number) { 132 if (number >= size) { 133 throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size)); 134 } 135 if (tracker.test(number)) { 136 return consumer.test(number); 137 } 138 return true; 139 } 140}