View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.TreeMap;
20  import java.util.function.IntPredicate;
21  
22  /**
23   * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
24   * refer to these counts and their associated index.  This class is the equivalent of the index extractor except
25   * that it produces cells.
26   *
27   * <p>Note that a CellExtractor must not return duplicate indices and must be ordered.</p>
28   *
29   * <p>Implementations must guarantee that:</p>
30   *
31   * <ul>
32   * <li>The IndexExtractor implementation returns unique ordered indices.</li>
33   * <li>The cells are produced in IndexExtractor order.</li>
34   * <li>For every value produced by the IndexExtractor there will be only one matching
35   * cell produced by the CellExtractor.</li>
36   * <li>The CellExtractor will not generate cells with indices that are not output by the IndexExtractor.</li>
37   * <li>The IndexExtractor will not generate indices that have a zero count for the cell.</li>
38   * </ul>
39   *
40   * @since 4.5.0-M2
41   */
42  @FunctionalInterface
43  public interface CellExtractor extends IndexExtractor {
44  
45      /**
46       * Represents an operation that accepts an {@code <index, count>} pair.
47       * Returns {@code true} if processing should continue, {@code false} otherwise.
48       *
49       * <p>Note: This is a functional interface as a specialization of
50       * {@link java.util.function.BiPredicate} for {@code int}.</p>
51       */
52      @FunctionalInterface
53      interface CellPredicate {
54          /**
55           * Performs an operation on the given {@code <index, count>} pair.
56           *
57           * @param index the bit index.
58           * @param count the cell value at the specified bit index.
59           * @return {@code true} if processing should continue, {@code false} if processing should stop.
60           */
61          boolean test(int index, int count);
62      }
63  
64      /**
65       * Creates a CellExtractor from an IndexExtractor.
66       *
67       * <p>Note the following properties:</p>
68       * <ul>
69       * <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li>
70       * <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li>
71       * </ul>
72       *
73       * <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
74       * of indices including:</p>
75       * <pre>
76       * [1, 1, 2, 2, 2, 3]
77       * [1, 3, 1, 2, 2, 2]
78       * [3, 2, 1, 2, 1, 2]
79       * ...
80       * </pre>
81       *
82       * @param indexExtractor An index indexExtractor.
83       * @return A CellExtractor with the same indices as the IndexExtractor.
84       */
85      static CellExtractor from(final IndexExtractor indexExtractor) {
86          return new CellExtractor() {
87              /**
88               * Class to track cell values in the TreeMap.
89               */
90              final class CounterCell implements Comparable<CounterCell> {
91                  final int idx;
92                  int count;
93  
94                  CounterCell(final int idx, final int count) {
95                      this.idx = idx;
96                      this.count = count;
97                  }
98  
99                  @Override
100                 public int compareTo(final CounterCell other) {
101                     return Integer.compare(idx, other.idx);
102                 }
103             }
104 
105             TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>();
106 
107             @Override
108             public int[] asIndexArray() {
109                 populate();
110                 return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
111             }
112 
113             private void populate() {
114                 if (counterCells.isEmpty()) {
115                     indexExtractor.processIndices(idx -> {
116                         final CounterCell cell = new CounterCell(idx, 1);
117                         final CounterCell counter = counterCells.get(cell);
118                         if (counter == null) {
119                             counterCells.put(cell, cell);
120                         } else {
121                             counter.count++;
122                         }
123                         return true;
124                     });
125                 }
126             }
127 
128             @Override
129             public boolean processCells(final CellPredicate consumer) {
130                 populate();
131                 for (final CounterCell cell : counterCells.values()) {
132                     if (!consumer.test(cell.idx, cell.count)) {
133                         return false;
134                     }
135                 }
136                 return true;
137             }
138         };
139     }
140 
141     /**
142      * Performs the given action for each {@code cell}  where the cell count is non-zero.
143      *
144      * <p>Some Bloom filter implementations use a count rather than a bit flag.  The term {@code Cell} is used to
145      * refer to these counts.</p>
146      *
147      * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
148      * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
149      * is returned, and no further pairs are processed.</p>
150      *
151      * @param consumer the action to be performed for each non-zero cell.
152      * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
153      * @throws NullPointerException if the specified consumer is null
154      */
155     boolean processCells(CellPredicate consumer);
156 
157     /**
158      * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
159      */
160     @Override
161     default boolean processIndices(final IntPredicate predicate) {
162         return processCells((i, v) -> predicate.test(i));
163     }
164 
165     @Override
166     default IndexExtractor uniqueIndices() {
167         return this;
168     }
169 }
170