001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.TreeMap; 020import java.util.function.IntPredicate; 021 022 023/** 024 * Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to 025 * refer to these counts and their associated index. This class is the equivalent of the index producer except 026 * that it produces cells. 027 * 028 * <p>Note that a CellProducer must not return duplicate indices and must be ordered.</p> 029 * 030 * <p>Implementations must guarantee that:</p> 031 * 032 * <ul> 033 * <li>The IndexProducer implementation returns unique ordered indices.</li> 034 * <li>The cells are produced in IndexProducer order.</li> 035 * <li>For every value produced by the IndexProducer there will be only one matching 036 * cell produced by the CellProducer.</li> 037 * <li>The CellProducer will not generate cells with indices that are not output by the IndexProducer.</li> 038 * <li>The IndexProducer will not generate indices that have a zero count for the cell.</li> 039 * </ul> 040 * 041 * @since 4.5 042 */ 043@FunctionalInterface 044public interface CellProducer extends IndexProducer { 045 046 /** 047 * Represents an operation that accepts an {@code <index, count>} pair. 048 * Returns {@code true} if processing should continue, {@code false} otherwise. 049 * 050 * <p>Note: This is a functional interface as a specialization of 051 * {@link java.util.function.BiPredicate} for {@code int}.</p> 052 */ 053 @FunctionalInterface 054 interface CellConsumer { 055 /** 056 * Performs an operation on the given {@code <index, count>} pair. 057 * 058 * @param index the bit index. 059 * @param count the cell value at the specified bit index. 060 * @return {@code true} if processing should continue, {@code false} if processing should stop. 061 */ 062 boolean test(int index, int count); 063 } 064 065 /** 066 * Creates a CellProducer from an IndexProducer. 067 * 068 * <p>Note the following properties: 069 * <ul> 070 * <li>Each index returned from the IndexProducer is assumed to have a cell value of 1.</li> 071 * <li>The CellProducer aggregates duplicate indices from the IndexProducer.</li> 072 * </ul> 073 * 074 * <p>A CellProducer that outputs the mapping [(1,2),(2,3),(3,1)] can be created from many combinations 075 * of indices including: 076 * <pre> 077 * [1, 1, 2, 2, 2, 3] 078 * [1, 3, 1, 2, 2, 2] 079 * [3, 2, 1, 2, 1, 2] 080 * ... 081 * </pre> 082 * 083 * @param producer An index producer. 084 * @return A CellProducer with the same indices as the IndexProducer. 085 */ 086 static CellProducer from(final IndexProducer producer) { 087 return new CellProducer() { 088 /** 089 * Class to track cell values in the TreeMap. 090 */ 091 final class CounterCell implements Comparable<CounterCell> { 092 final int idx; 093 int count; 094 095 CounterCell(final int idx, final int count) { 096 this.idx = idx; 097 this.count = count; 098 } 099 100 @Override 101 public int compareTo(final CounterCell other) { 102 return Integer.compare(idx, other.idx); 103 } 104 } 105 106 TreeMap<CounterCell, CounterCell> counterCells = new TreeMap<>(); 107 108 @Override 109 public int[] asIndexArray() { 110 populate(); 111 return counterCells.keySet().stream().mapToInt(c -> c.idx).toArray(); 112 } 113 114 @Override 115 public boolean forEachCell(final CellConsumer consumer) { 116 populate(); 117 for (final CounterCell cell : counterCells.values()) { 118 if (!consumer.test(cell.idx, cell.count)) { 119 return false; 120 } 121 } 122 return true; 123 } 124 125 private void populate() { 126 if (counterCells.isEmpty()) { 127 producer.forEachIndex( idx -> { 128 final CounterCell cell = new CounterCell(idx, 1); 129 final CounterCell counter = counterCells.get(cell); 130 if (counter == null) { 131 counterCells.put(cell, cell); 132 } else { 133 counter.count++; 134 } 135 return true; 136 }); 137 } 138 } 139 }; 140 } 141 142 /** 143 * Performs the given action for each {@code cell} where the cell count is non-zero. 144 * 145 * <p>Some Bloom filter implementations use a count rather than a bit flag. The term {@code Cell} is used to 146 * refer to these counts.</p> 147 * 148 * <p>Any exceptions thrown by the action are relayed to the caller. The consumer is applied to each 149 * cell. If the consumer returns {@code false} the execution is stopped, {@code false} 150 * is returned, and no further pairs are processed.</p> 151 * 152 * @param consumer the action to be performed for each non-zero cell. 153 * @return {@code true} if all cells return true from consumer, {@code false} otherwise. 154 * @throws NullPointerException if the specified consumer is null 155 */ 156 boolean forEachCell(CellConsumer consumer); 157 158 /** 159 * The default implementation returns distinct and ordered indices for all cells with a non-zero count. 160 */ 161 @Override 162 default boolean forEachIndex(final IntPredicate predicate) { 163 return forEachCell((i, v) -> predicate.test(i)); 164 } 165 166 @Override 167 default IndexProducer uniqueIndices() { 168 return this; 169 } 170} 171