001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Arrays; 020import java.util.Objects; 021import java.util.function.IntPredicate; 022import java.util.function.LongPredicate; 023 024/** 025 * A bloom filter using an array of bit maps to track enabled bits. This is a standard implementation and should work well for most Bloom filters. 026 * 027 * @since 4.5.0-M1 028 */ 029public final class SimpleBloomFilter implements BloomFilter<SimpleBloomFilter> { 030 031 /** 032 * The array of bit map longs that defines this Bloom filter. Will be null if the filter is empty. 033 */ 034 private final long[] bitMap; 035 036 /** 037 * The Shape of this Bloom filter. 038 */ 039 private final Shape shape; 040 041 /** 042 * The cardinality of this Bloom filter. 043 */ 044 private int cardinality; 045 046 /** 047 * Creates an empty instance. 048 * 049 * @param shape The shape for the filter. 050 */ 051 public SimpleBloomFilter(final Shape shape) { 052 Objects.requireNonNull(shape, "shape"); 053 this.shape = shape; 054 this.bitMap = BitMaps.newBitMap(shape); 055 this.cardinality = 0; 056 } 057 058 /** 059 * Copy constructor for {@code copy()} use. 060 * 061 * @param source 062 */ 063 private SimpleBloomFilter(final SimpleBloomFilter source) { 064 this.shape = source.shape; 065 this.bitMap = source.bitMap.clone(); 066 this.cardinality = source.cardinality; 067 } 068 069 @Override 070 public long[] asBitMapArray() { 071 return Arrays.copyOf(bitMap, bitMap.length); 072 } 073 074 @Override 075 public int cardinality() { 076 // Lazy evaluation with caching 077 int c = cardinality; 078 if (c < 0) { 079 cardinality = c = SetOperations.cardinality(this); 080 } 081 return c; 082 } 083 084 @Override 085 public int characteristics() { 086 return 0; 087 } 088 089 @Override 090 public void clear() { 091 Arrays.fill(bitMap, 0L); 092 cardinality = 0; 093 } 094 095 @Override 096 public boolean contains(final IndexExtractor indexExtractor) { 097 return indexExtractor.processIndices(idx -> BitMaps.contains(bitMap, idx)); 098 } 099 100 /** 101 * Creates a new instance of this {@link SimpleBloomFilter} with the same properties as the current one. 102 * 103 * @return a copy of this {@link SimpleBloomFilter}. 104 */ 105 @Override 106 public SimpleBloomFilter copy() { 107 return new SimpleBloomFilter(this); 108 } 109 110 @Override 111 public Shape getShape() { 112 return shape; 113 } 114 115 @Override 116 public boolean isEmpty() { 117 return cardinality == 0 || processBitMaps(y -> y == 0); 118 } 119 120 @Override 121 public boolean merge(final BitMapExtractor bitMapExtractor) { 122 Objects.requireNonNull(bitMapExtractor, "bitMapExtractor"); 123 try { 124 final int[] idx = new int[1]; 125 bitMapExtractor.processBitMaps(value -> { 126 bitMap[idx[0]++] |= value; 127 return true; 128 }); 129 // idx[0] will be limit+1 so decrement it 130 idx[0]--; 131 final int idxLimit = BitMaps.getLongIndex(shape.getNumberOfBits()); 132 if (idxLimit == idx[0]) { 133 final long excess = bitMap[idxLimit] >> shape.getNumberOfBits(); 134 if (excess != 0) { 135 throw new IllegalArgumentException( 136 String.format("BitMapExtractor set a bit higher than the limit for the shape: %s", shape.getNumberOfBits())); 137 } 138 } 139 cardinality = -1; 140 } catch (final IndexOutOfBoundsException e) { 141 throw new IllegalArgumentException(String.format("BitMapExtractor should send at most %s maps", bitMap.length), e); 142 } 143 return true; 144 } 145 146 @Override 147 public boolean merge(final BloomFilter<?> other) { 148 Objects.requireNonNull(other, "other"); 149 if ((other.characteristics() & SPARSE) != 0) { 150 merge((IndexExtractor) other); 151 } else { 152 merge((BitMapExtractor) other); 153 } 154 return true; 155 } 156 157 @Override 158 public boolean merge(final Hasher hasher) { 159 Objects.requireNonNull(hasher, "hasher"); 160 return merge(hasher.indices(shape)); 161 } 162 163 @Override 164 public boolean merge(final IndexExtractor indexExtractor) { 165 Objects.requireNonNull(indexExtractor, "indexExtractor"); 166 indexExtractor.processIndices(idx -> { 167 if (idx < 0 || idx >= shape.getNumberOfBits()) { 168 throw new IllegalArgumentException(String.format("IndexExtractor should only send values in the range[0,%s)", shape.getNumberOfBits())); 169 } 170 BitMaps.set(bitMap, idx); 171 return true; 172 }); 173 cardinality = -1; 174 return true; 175 } 176 177 @Override 178 public boolean processBitMapPairs(final BitMapExtractor other, final LongBiPredicate func) { 179 final CountingLongPredicate p = new CountingLongPredicate(bitMap, func); 180 return other.processBitMaps(p) && p.processRemaining(); 181 } 182 183 @Override 184 public boolean processBitMaps(final LongPredicate consumer) { 185 Objects.requireNonNull(consumer, "consumer"); 186 for (final long l : bitMap) { 187 if (!consumer.test(l)) { 188 return false; 189 } 190 } 191 return true; 192 } 193 194 @Override 195 public boolean processIndices(final IntPredicate consumer) { 196 Objects.requireNonNull(consumer, "consumer"); 197 return IndexExtractor.fromBitMapExtractor(this).processIndices(consumer); 198 } 199}