001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.collections4.bloomfilter;
018
019import java.util.TreeMap;
020import java.util.function.IntPredicate;
021
022
023/**
024 * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
025 * refer to these counts and their associated index.  This class is the equivalent of the index producer except
026 * that it produces cells.
027 *
028 * <p>Note that a CellProducer must not return duplicate indices and must be ordered.</p>
029 *
030 * <p>Implementations must guarantee that:</p>
031 *
032 * <ul>
033 * <li>The IndexProducer implementation returns unique ordered indices.</li>
034 * <li>The cells are produced in IndexProducer order.</li>
035 * <li>For every value produced by the IndexProducer there will be only one matching
036 * cell produced by the CellProducer.</li>
037 * <li>The CellProducer will not generate cells with indices that are not output by the IndexProducer.</li>
038 * <li>The IndexProducer will not generate indices that have a zero count for the cell.</li>
039 * </ul>
040 *
041 * @since 4.5
042 */
043@FunctionalInterface
044public interface CellProducer extends IndexProducer {
045
046    /**
047     * Represents an operation that accepts an {@code <index, count>} pair.
048     * Returns {@code true} if processing should continue, {@code false} otherwise.
049     *
050     * <p>Note: This is a functional interface as a specialization of
051     * {@link java.util.function.BiPredicate} for {@code int}.</p>
052     */
053    @FunctionalInterface
054    interface CellConsumer {
055        /**
056         * Performs an operation on the given {@code <index, count>} pair.
057         *
058         * @param index the bit index.
059         * @param count the cell value at the specified bit index.
060         * @return {@code true} if processing should continue, {@code false} if processing should stop.
061         */
062        boolean test(int index, int count);
063    }
064
065    /**
066     * Creates a CellProducer from an IndexProducer.
067     *
068     * <p>Note the following properties:
069     * <ul>
070     * <li>Each index returned from the IndexProducer is assumed to have a cell value of 1.</li>
071     * <li>The CellProducer aggregates duplicate indices from the IndexProducer.</li>
072     * </ul>
073     *
074     * <p>A CellProducer that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
075     * of indices including:
076     * <pre>
077     * [1, 1, 2, 2, 2, 3]
078     * [1, 3, 1, 2, 2, 2]
079     * [3, 2, 1, 2, 1, 2]
080     * ...
081     * </pre>
082     *
083     * @param producer An index producer.
084     * @return A CellProducer with the same indices as the IndexProducer.
085     */
086    static CellProducer from(final IndexProducer producer) {
087        return new CellProducer() {
088            /**
089             * Class to track cell values in the TreeMap.
090             */
091            final class CounterCell implements Comparable<CounterCell> {
092                final int idx;
093                int count;
094
095                CounterCell(final int idx, final int count) {
096                    this.idx = idx;
097                    this.count = count;
098                }
099
100                @Override
101                public int compareTo(final CounterCell other) {
102                    return Integer.compare(idx, other.idx);
103                }
104            }
105
106            TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>();
107
108            @Override
109            public int[] asIndexArray() {
110                populate();
111                return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
112            }
113
114            @Override
115            public boolean forEachCell(final CellConsumer consumer) {
116                populate();
117                for (final CounterCell cell : counterCells.values()) {
118                    if (!consumer.test(cell.idx, cell.count)) {
119                        return false;
120                    }
121                }
122                return true;
123            }
124
125            private void populate() {
126                if (counterCells.isEmpty()) {
127                    producer.forEachIndex( idx -> {
128                        final CounterCell cell = new CounterCell(idx, 1);
129                        final CounterCell counter = counterCells.get(cell);
130                        if (counter == null) {
131                            counterCells.put(cell, cell);
132                        } else {
133                            counter.count++;
134                        }
135                        return true;
136                    });
137                }
138            }
139        };
140    }
141
142    /**
143     * Performs the given action for each {@code cell}  where the cell count is non-zero.
144     *
145     * <p>Some Bloom filter implementations use a count rather than a bit flag.  The term {@code Cell} is used to
146     * refer to these counts.</p>
147     *
148     * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
149     * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
150     * is returned, and no further pairs are processed.</p>
151     *
152     * @param consumer the action to be performed for each non-zero cell.
153     * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
154     * @throws NullPointerException if the specified consumer is null
155     */
156    boolean forEachCell(CellConsumer consumer);
157
158    /**
159     * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
160     */
161    @Override
162    default boolean forEachIndex(final IntPredicate predicate) {
163        return forEachCell((i, v) -> predicate.test(i));
164    }
165
166    @Override
167    default IndexProducer uniqueIndices() {
168        return this;
169    }
170}
171