CellExtractor.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.collections4.bloomfilter;

  18. import java.util.TreeMap;
  19. import java.util.function.IntPredicate;

  20. /**
  21.  * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to
  22.  * refer to these counts and their associated index.  This class is the equivalent of the index extractor except
  23.  * that it produces cells.
  24.  *
  25.  * <p>Note that a CellExtractor must not return duplicate indices and must be ordered.</p>
  26.  *
  27.  * <p>Implementations must guarantee that:</p>
  28.  *
  29.  * <ul>
  30.  * <li>The IndexExtractor implementation returns unique ordered indices.</li>
  31.  * <li>The cells are produced in IndexExtractor order.</li>
  32.  * <li>For every value produced by the IndexExtractor there will be only one matching
  33.  * cell produced by the CellExtractor.</li>
  34.  * <li>The CellExtractor will not generate cells with indices that are not output by the IndexExtractor.</li>
  35.  * <li>The IndexExtractor will not generate indices that have a zero count for the cell.</li>
  36.  * </ul>
  37.  *
  38.  * @since 4.5.0-M2
  39.  */
  40. @FunctionalInterface
  41. public interface CellExtractor extends IndexExtractor {

  42.     /**
  43.      * Represents an operation that accepts an {@code <index, count>} pair.
  44.      * Returns {@code true} if processing should continue, {@code false} otherwise.
  45.      *
  46.      * <p>Note: This is a functional interface as a specialization of
  47.      * {@link java.util.function.BiPredicate} for {@code int}.</p>
  48.      */
  49.     @FunctionalInterface
  50.     interface CellPredicate {
  51.         /**
  52.          * Performs an operation on the given {@code <index, count>} pair.
  53.          *
  54.          * @param index the bit index.
  55.          * @param count the cell value at the specified bit index.
  56.          * @return {@code true} if processing should continue, {@code false} if processing should stop.
  57.          */
  58.         boolean test(int index, int count);
  59.     }

  60.     /**
  61.      * Creates a CellExtractor from an IndexExtractor.
  62.      *
  63.      * <p>Note the following properties:</p>
  64.      * <ul>
  65.      * <li>Each index returned from the IndexExtractor is assumed to have a cell value of 1.</li>
  66.      * <li>The CellExtractor aggregates duplicate indices from the IndexExtractor.</li>
  67.      * </ul>
  68.      *
  69.      * <p>A CellExtractor that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations
  70.      * of indices including:</p>
  71.      * <pre>
  72.      * [1, 1, 2, 2, 2, 3]
  73.      * [1, 3, 1, 2, 2, 2]
  74.      * [3, 2, 1, 2, 1, 2]
  75.      * ...
  76.      * </pre>
  77.      *
  78.      * @param indexExtractor An index indexExtractor.
  79.      * @return A CellExtractor with the same indices as the IndexExtractor.
  80.      */
  81.     static CellExtractor from(final IndexExtractor indexExtractor) {
  82.         return new CellExtractor() {
  83.             /**
  84.              * Class to track cell values in the TreeMap.
  85.              */
  86.             final class CounterCell implements Comparable<CounterCell> {
  87.                 final int idx;
  88.                 int count;

  89.                 CounterCell(final int idx, final int count) {
  90.                     this.idx = idx;
  91.                     this.count = count;
  92.                 }

  93.                 @Override
  94.                 public int compareTo(final CounterCell other) {
  95.                     return Integer.compare(idx, other.idx);
  96.                 }
  97.             }

  98.             TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>();

  99.             @Override
  100.             public int[] asIndexArray() {
  101.                 populate();
  102.                 return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray();
  103.             }

  104.             private void populate() {
  105.                 if (counterCells.isEmpty()) {
  106.                     indexExtractor.processIndices(idx -> {
  107.                         final CounterCell cell = new CounterCell(idx, 1);
  108.                         final CounterCell counter = counterCells.get(cell);
  109.                         if (counter == null) {
  110.                             counterCells.put(cell, cell);
  111.                         } else {
  112.                             counter.count++;
  113.                         }
  114.                         return true;
  115.                     });
  116.                 }
  117.             }

  118.             @Override
  119.             public boolean processCells(final CellPredicate consumer) {
  120.                 populate();
  121.                 for (final CounterCell cell : counterCells.values()) {
  122.                     if (!consumer.test(cell.idx, cell.count)) {
  123.                         return false;
  124.                     }
  125.                 }
  126.                 return true;
  127.             }
  128.         };
  129.     }

  130.     /**
  131.      * Performs the given action for each {@code cell}  where the cell count is non-zero.
  132.      *
  133.      * <p>Some Bloom filter implementations use a count rather than a bit flag.  The term {@code Cell} is used to
  134.      * refer to these counts.</p>
  135.      *
  136.      * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each
  137.      * cell. If the consumer returns {@code false} the execution is stopped, {@code false}
  138.      * is returned, and no further pairs are processed.</p>
  139.      *
  140.      * @param consumer the action to be performed for each non-zero cell.
  141.      * @return {@code true} if all cells return true from consumer, {@code false} otherwise.
  142.      * @throws NullPointerException if the specified consumer is null
  143.      */
  144.     boolean processCells(CellPredicate consumer);

  145.     /**
  146.      * The default implementation returns distinct and ordered indices for all cells with a non-zero count.
  147.      */
  148.     @Override
  149.     default boolean processIndices(final IntPredicate predicate) {
  150.         return processCells((i, v) -> predicate.test(i));
  151.     }

  152.     @Override
  153.     default IndexExtractor uniqueIndices() {
  154.         return this;
  155.     }
  156. }