001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.function.IntPredicate;
020
021/**
022 * A convenience class for Hasher implementations to filter out duplicate indices.
023 *
024 * <p><em>If the index is negative the behavior is not defined.</em></p>
025 *
026 * <p>This is conceptually a unique filter implemented as an {@link IntPredicate}.</p>
027 *
028 * @since 4.5.0-M1
029 */
030public final class IndexFilter {
031
032    /**
033     * An IndexTracker implementation that uses an array of integers to track whether or not a
034     * number has been seen. Suitable for Shapes that have few hash functions.
035     * @since 4.5.0
036     */
037    static class ArrayTracker implements IntPredicate {
038        private final int[] seen;
039        private int populated;
040
041        /**
042         * Constructs the tracker based on the shape.
043         * @param shape the shape to build the tracker for.
044         */
045        ArrayTracker(final Shape shape) {
046            seen = new int[shape.getNumberOfHashFunctions()];
047        }
048
049        @Override
050        public boolean test(final int number) {
051            if (number < 0) {
052                throw new IndexOutOfBoundsException("number may not be less than zero. " + number);
053            }
054            for (int i = 0; i < populated; i++) {
055                if (seen[i] == number) {
056                    return false;
057                }
058            }
059            seen[populated++] = number;
060            return true;
061        }
062    }
063
064    /**
065     * An IndexTracker implementation that uses an array of bit maps to track whether or not a
066     * number has been seen.
067     */
068    static class BitMapTracker implements IntPredicate {
069        private final long[] bits;
070
071        /**
072         * Constructs a bit map based tracker for the specified shape.
073         * @param shape The shape that is being generated.
074         */
075        BitMapTracker(final Shape shape) {
076            bits = BitMaps.newBitMap(shape);
077        }
078
079        @Override
080        public boolean test(final int number) {
081            final boolean retval = !BitMaps.contains(bits, number);
082            BitMaps.set(bits, number);
083            return retval;
084        }
085    }
086
087    /**
088     * Creates an instance optimized for the specified shape.
089     *
090     * @param shape The shape that is being generated.
091     * @param consumer The consumer to accept the values.
092     * @return an IndexFilter optimized for the specified shape.
093     */
094    public static IntPredicate create(final Shape shape, final IntPredicate consumer) {
095        return new IndexFilter(shape, consumer)::test;
096    }
097
098    private final IntPredicate tracker;
099
100    private final int size;
101
102    private final IntPredicate consumer;
103
104    /**
105     * Creates an instance optimized for the specified shape.
106     *
107     * @param shape The shape that is being generated.
108     * @param consumer The consumer to accept the values.
109     */
110    private IndexFilter(final Shape shape, final IntPredicate consumer) {
111        this.size = shape.getNumberOfBits();
112        this.consumer = consumer;
113        if (BitMaps.numberOfBitMaps(shape) * Long.BYTES < (long) shape.getNumberOfHashFunctions() * Integer.BYTES) {
114            this.tracker = new BitMapTracker(shape);
115        } else {
116            this.tracker = new ArrayTracker(shape);
117        }
118    }
119
120    /**
121     * Test if the number should be processed by the {@code consumer}.
122     *
123     * <p>If the number has <em>not</em> been seen before it is passed to the {@code consumer} and the result returned.
124     * If the number has been seen before the {@code consumer} is not called and {@code true} returned.</p>
125     *
126     * <p><em>If the input is not in the range [0,size) an IndexOutOfBoundsException exception is thrown.</em></p>
127     *
128     * @param number the number to check.
129     * @return {@code true} if processing should continue, {@code false} otherwise.
130     */
131    public boolean test(final int number) {
132        if (number >= size) {
133            throw new IndexOutOfBoundsException(String.format("number too large %d >= %d", number, size));
134        }
135        if (tracker.test(number)) {
136            return consumer.test(number);
137        }
138        return true;
139    }
140}