View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.TreeMap;
20  import java.util.function.IntPredicate;
21  
22  
23  /**
24   * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
25   * refer to these counts and their associated index.  This class is the equivalent of the index producer except
26   * that it produces cells.
27   *
28   * <p>Note that a CellProducer must not return duplicate indices and must be ordered.</p>
29   *
30   * <p>Implementations must guarantee that:</p>
31   *
32   * <ul>
33   * <li>The IndexProducer implementation returns unique ordered indices.</li>
34   * <li>The cells are produced in IndexProducer order.</li>
35   * <li>For every value produced by the IndexProducer there will be only one matching
36   * cell produced by the CellProducer.</li>
37   * <li>The CellProducer will not generate cells with indices that are not output by the IndexProducer.</li>
38   * <li>The IndexProducer will not generate indices that have a zero count for the cell.</li>
39   * </ul>
40   *
41   * @since 4.5
42   */
43  @FunctionalInterface
44  public interface CellProducer extends IndexProducer {
45  
46      /**
47       * Represents an operation that accepts an {@code <index, count>} pair.
48       * Returns {@code true} if processing should continue, {@code false} otherwise.
49       *
50       * <p>Note: This is a functional interface as a specialization of
51       * {@link java.util.function.BiPredicate} for {@code int}.</p>
52       */
53      @FunctionalInterface
54      interface CellConsumer {
55          /**
56           * Performs an operation on the given {@code <index, count>} pair.
57           *
58           * @param index the bit index.
59           * @param count the cell value at the specified bit index.
60           * @return {@code true} if processing should continue, {@code false} if processing should stop.
61           */
62          boolean test(int index, int count);
63      }
64  
65      /**
66       * Creates a CellProducer from an IndexProducer.
67       *
68       * <p>Note the following properties:
69       * <ul>
70       * <li>Each index returned from the IndexProducer is assumed to have a cell value of 1.</li>
71       * <li>The CellProducer aggregates duplicate indices from the IndexProducer.</li>
72       * </ul>
73       *
74       * <p>A CellProducer that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
75       * of indices including:
76       * <pre>
77       * [1, 1, 2, 2, 2, 3]
78       * [1, 3, 1, 2, 2, 2]
79       * [3, 2, 1, 2, 1, 2]
80       * ...
81       * </pre>
82       *
83       * @param producer An index producer.
84       * @return A CellProducer with the same indices as the IndexProducer.
85       */
86      static CellProducer from(final IndexProducer producer) {
87          return new CellProducer() {
88              /**
89               * Class to track cell values in the TreeMap.
90               */
91              final class CounterCell implements Comparable<CounterCell> {
92                  final int idx;
93                  int count;
94  
95                  CounterCell(final int idx, final int count) {
96                      this.idx = idx;
97                      this.count = count;
98                  }
99  
100                 @Override
101                 public int compareTo(final CounterCell other) {
102                     return Integer.compare(idx, other.idx);
103                 }
104             }
105 
106             TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>();
107 
108             @Override
109             public int[] asIndexArray() {
110                 populate();
111                 return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
112             }
113 
114             @Override
115             public boolean forEachCell(final CellConsumer consumer) {
116                 populate();
117                 for (final CounterCell cell : counterCells.values()) {
118                     if (!consumer.test(cell.idx, cell.count)) {
119                         return false;
120                     }
121                 }
122                 return true;
123             }
124 
125             private void populate() {
126                 if (counterCells.isEmpty()) {
127                     producer.forEachIndex( idx -> {
128                         final CounterCell cell = new CounterCell(idx, 1);
129                         final CounterCell counter = counterCells.get(cell);
130                         if (counter == null) {
131                             counterCells.put(cell, cell);
132                         } else {
133                             counter.count++;
134                         }
135                         return true;
136                     });
137                 }
138             }
139         };
140     }
141 
142     /**
143      * Performs the given action for each {@code cell}  where the cell count is non-zero.
144      *
145      * <p>Some Bloom filter implementations use a count rather than a bit flag.  The term {@code Cell} is used to
146      * refer to these counts.</p>
147      *
148      * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
149      * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
150      * is returned, and no further pairs are processed.</p>
151      *
152      * @param consumer the action to be performed for each non-zero cell.
153      * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
154      * @throws NullPointerException if the specified consumer is null
155      */
156     boolean forEachCell(CellConsumer consumer);
157 
158     /**
159      * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
160      */
161     @Override
162     default boolean forEachIndex(final IntPredicate predicate) {
163         return forEachCell((i, v) -> predicate.test(i));
164     }
165 
166     @Override
167     default IndexProducer uniqueIndices() {
168         return this;
169     }
170 }
171