1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.bloomfilter;
18
19 import java.util.TreeMap;
20 import java.util.function.IntPredicate;
21
22 /**
23 * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
24 * refer to these counts and their associated index. This class is the equivalent of the index extractor except
25 * that it produces cells.
26 *
27 * <p>Note that a CellExtractor must not return duplicate indices and must be ordered.</p>
28 *
29 * <p>Implementations must guarantee that:</p>
30 *
31 * <ul>
32 * <li>The IndexExtractor implementation returns unique ordered indices.</li>
33 * <li>The cells are produced in IndexExtractor order.</li>
34 * <li>For every value produced by the IndexExtractor there will be only one matching
35 * cell produced by the CellExtractor.</li>
36 * <li>The CellExtractor will not generate cells with indices that are not output by the IndexExtractor.</li>
37 * <li>The IndexExtractor will not generate indices that have a zero count for the cell.</li>
38 * </ul>
39 *
40 * @since 4.5.0-M2
41 */
42 @FunctionalInterface
43 public interface CellExtractor extends IndexExtractor {
44
45 /**
46 * Represents an operation that accepts an {@code <index, count>} pair.
47 * Returns {@code true} if processing should continue, {@code false} otherwise.
48 *
49 * <p>Note: This is a functional interface as a specialization of
50 * {@link java.util.function.BiPredicate} for {@code int}.</p>
51 */
52 @FunctionalInterface
53 interface CellPredicate {
54 /**
55 * Performs an operation on the given {@code <index, count>} pair.
56 *
57 * @param index the bit index.
58 * @param count the cell value at the specified bit index.
59 * @return {@code true} if processing should continue, {@code false} if processing should stop.
60 */
61 boolean test(int index, int count);
62 }
63
64 /**
65 * Creates a CellExtractor from an IndexExtractor.
66 *
67 * <p>Note the following properties:</p>
68 * <ul>
69 * <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li>
70 * <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li>
71 * </ul>
72 *
73 * <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
74 * of indices including:</p>
75 * <pre>
76 * [1, 1, 2, 2, 2, 3]
77 * [1, 3, 1, 2, 2, 2]
78 * [3, 2, 1, 2, 1, 2]
79 * ...
80 * </pre>
81 *
82 * @param indexExtractor An index indexExtractor.
83 * @return A CellExtractor with the same indices as the IndexExtractor.
84 */
85 static CellExtractor from(final IndexExtractor indexExtractor) {
86 return new CellExtractor() {
87 /**
88 * Class to track cell values in the TreeMap.
89 */
90 final class CounterCell implements Comparable<CounterCell> {
91 final int idx;
92 int count;
93
94 CounterCell(final int idx, final int count) {
95 this.idx = idx;
96 this.count = count;
97 }
98
99 @Override
100 public int compareTo(final CounterCell other) {
101 return Integer.compare(idx, other.idx);
102 }
103 }
104
105 TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>();
106
107 @Override
108 public int[] asIndexArray() {
109 populate();
110 return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
111 }
112
113 private void populate() {
114 if (counterCells.isEmpty()) {
115 indexExtractor.processIndices(idx -> {
116 final CounterCell cell = new CounterCell(idx, 1);
117 final CounterCell counter = counterCells.get(cell);
118 if (counter == null) {
119 counterCells.put(cell, cell);
120 } else {
121 counter.count++;
122 }
123 return true;
124 });
125 }
126 }
127
128 @Override
129 public boolean processCells(final CellPredicate consumer) {
130 populate();
131 for (final CounterCell cell : counterCells.values()) {
132 if (!consumer.test(cell.idx, cell.count)) {
133 return false;
134 }
135 }
136 return true;
137 }
138 };
139 }
140
141 /**
142 * Performs the given action for each {@code cell} where the cell count is non-zero.
143 *
144 * <p>Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
145 * refer to these counts.</p>
146 *
147 * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
148 * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
149 * is returned, and no further pairs are processed.</p>
150 *
151 * @param consumer the action to be performed for each non-zero cell.
152 * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
153 * @throws NullPointerException if the specified consumer is null
154 */
155 boolean processCells(CellPredicate consumer);
156
157 /**
158 * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
159 */
160 @Override
161 default boolean processIndices(final IntPredicate predicate) {
162 return processCells((i, v) -> predicate.test(i));
163 }
164
165 @Override
166 default IndexExtractor uniqueIndices() {
167 return this;
168 }
169 }
170