001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.function.IntPredicate; 020 021/** 022 * A convenience class for Hasher implementations to filter out duplicate indices. 023 * 024 * <p><em>If the index is negative the behavior is not defined.</em></p> 025 * 026 * <p>This is conceptually a unique filter implemented as an {@code IntPredicate}.</p> 027 * @since 4.5 028 */ 029public final class IndexFilter { 030 /** 031 * An IndexTracker implementation that uses an array of integers to track whether or not a 032 * number has been seen. Suitable for Shapes that have few hash functions. 033 * @since 4.5 034 */ 035 static class ArrayTracker implements IntPredicate { 036 private final int[] seen; 037 private int populated; 038 039 /** 040 * Constructs the tracker based on the shape. 041 * @param shape the shape to build the tracker for. 042 */ 043 ArrayTracker(final Shape shape) { 044 seen = new int[shape.getNumberOfHashFunctions()]; 045 } 046 047 @Override 048 public boolean test(final int number) { 049 if (number < 0) { 050 throw new IndexOutOfBoundsException("number may not be less than zero. " + number); 051 } 052 for (int i = 0; i < populated; i++) { 053 if (seen[i] == number) { 054 return false; 055 } 056 } 057 seen[populated++] = number; 058 return true; 059 } 060 } 061 /** 062 * An IndexTracker implementation that uses an array of bit maps to track whether or not a 063 * number has been seen. 064 * @since 4.5 065 */ 066 static class BitMapTracker implements IntPredicate { 067 private final long[] bits; 068 069 /** 070 * Constructs a bit map based tracker for the specified shape. 071 * @param shape The shape that is being generated. 072 */ 073 BitMapTracker(final Shape shape) { 074 bits = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())]; 075 } 076 077 @Override 078 public boolean test(final int number) { 079 final boolean retval = !BitMap.contains(bits, number); 080 BitMap.set(bits, number); 081 return retval; 082 } 083 } 084 /** 085 * Creates an instance optimized for the specified shape. 086 * @param shape The shape that is being generated. 087 * @param consumer The consumer to accept the values. 088 * @return an IndexFilter optimized for the specified shape. 089 */ 090 public static IntPredicate create(final Shape shape, final IntPredicate consumer) { 091 return new IndexFilter(shape, consumer)::test; 092 } 093 094 private final IntPredicate tracker; 095 096 private final int size; 097 098 private final IntPredicate consumer; 099 100 /** 101 * Creates an instance optimized for the specified shape. 102 * @param shape The shape that is being generated. 103 * @param consumer The consumer to accept the values. 104 */ 105 private IndexFilter(final Shape shape, final IntPredicate consumer) { 106 this.size = shape.getNumberOfBits(); 107 this.consumer = consumer; 108 if (BitMap.numberOfBitMaps(shape.getNumberOfBits()) * Long.BYTES < (long) shape.getNumberOfHashFunctions() 109 * Integer.BYTES) { 110 this.tracker = new BitMapTracker(shape); 111 } else { 112 this.tracker = new ArrayTracker(shape); 113 } 114 } 115 116 /** 117 * Test if the number should be processed by the {@code consumer}. 118 * 119 * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned. 120 * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p> 121 * 122 * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p> 123 * 124 * @param number the number to check. 125 * @return {@code true} if processing should continue, {@code false} otherwise. 126 */ 127 public boolean test(final int number) { 128 if (number >= size) { 129 throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size)); 130 } 131 if (tracker.test(number)) { 132 return consumer.test(number); 133 } 134 return true; 135 } 136}