001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.collections4.bloomfilter; 018 019import java.util.Objects; 020import java.util.TreeSet; 021import java.util.function.IntPredicate; 022import java.util.function.LongPredicate; 023 024/** 025 * A bloom filter using a TreeSet of integers to track enabled bits. This is a standard 026 * implementation and should work well for most low cardinality Bloom filters. 027 * @since 4.5 028 */ 029public final class SparseBloomFilter implements BloomFilter { 030 031 /** 032 * The bitSet that defines this BloomFilter. 033 */ 034 private final TreeSet<Integer> indices; 035 036 /** 037 * The shape of this BloomFilter. 038 */ 039 private final Shape shape; 040 041 /** 042 * Constructs an empty BitSetBloomFilter. 043 * 044 * @param shape The shape of the filter. 045 */ 046 public SparseBloomFilter(final Shape shape) { 047 Objects.requireNonNull(shape, "shape"); 048 this.shape = shape; 049 this.indices = new TreeSet<>(); 050 } 051 052 private SparseBloomFilter(final SparseBloomFilter source) { 053 shape = source.shape; 054 indices = new TreeSet<>(source.indices); 055 } 056 057 /** 058 * Adds the index to the indices. 059 * @param idx the index to add. 060 * @return {@code true} always 061 */ 062 private boolean add(final int idx) { 063 indices.add(idx); 064 return true; 065 } 066 067 @Override 068 public long[] asBitMapArray() { 069 final long[] result = new long[BitMap.numberOfBitMaps(shape.getNumberOfBits())]; 070 for (final int i : indices) { 071 BitMap.set(result, i); 072 } 073 return result; 074 } 075 076 @Override 077 public int cardinality() { 078 return indices.size(); 079 } 080 081 @Override 082 public int characteristics() { 083 return SPARSE; 084 } 085 086 @Override 087 public void clear() { 088 indices.clear(); 089 } 090 091 @Override 092 public boolean contains(final BitMapProducer bitMapProducer) { 093 return contains(IndexProducer.fromBitMapProducer(bitMapProducer)); 094 } 095 096 @Override 097 public boolean contains(final IndexProducer indexProducer) { 098 return indexProducer.forEachIndex(indices::contains); 099 } 100 101 @Override 102 public SparseBloomFilter copy() { 103 return new SparseBloomFilter(this); 104 } 105 106 @Override 107 public boolean forEachBitMap(final LongPredicate consumer) { 108 Objects.requireNonNull(consumer, "consumer"); 109 final int limit = BitMap.numberOfBitMaps(shape.getNumberOfBits()); 110 /* 111 * because our indices are always in order we can shorten the time necessary to 112 * create the longs for the consumer 113 */ 114 // the currently constructed bitMap 115 long bitMap = 0; 116 // the bitmap we are working on 117 int idx = 0; 118 for (final int i : indices) { 119 while (BitMap.getLongIndex(i) != idx) { 120 if (!consumer.test(bitMap)) { 121 return false; 122 } 123 bitMap = 0; 124 idx++; 125 } 126 bitMap |= BitMap.getLongBit(i); 127 } 128 // we fall through with data in the bitMap 129 if (!consumer.test(bitMap)) { 130 return false; 131 } 132 // account for hte bitMap in the previous block + the next one 133 idx++; 134 // while there are more blocks to generate send zero to the consumer. 135 while (idx < limit) { 136 if (!consumer.test(0L)) { 137 return false; 138 } 139 idx++; 140 } 141 return true; 142 } 143 144 @Override 145 public boolean forEachIndex(final IntPredicate consumer) { 146 Objects.requireNonNull(consumer, "consumer"); 147 for (final int value : indices) { 148 if (!consumer.test(value)) { 149 return false; 150 } 151 } 152 return true; 153 } 154 155 @Override 156 public Shape getShape() { 157 return shape; 158 } 159 160 @Override 161 public boolean isEmpty() { 162 return indices.isEmpty(); 163 } 164 165 @Override 166 public boolean merge(final BitMapProducer bitMapProducer) { 167 Objects.requireNonNull(bitMapProducer, "bitMapProducer"); 168 return this.merge(IndexProducer.fromBitMapProducer(bitMapProducer)); 169 } 170 171 @Override 172 public boolean merge(final BloomFilter other) { 173 Objects.requireNonNull(other, "other"); 174 final IndexProducer producer = (other.characteristics() & SPARSE) != 0 ? (IndexProducer) other : IndexProducer.fromBitMapProducer(other); 175 merge(producer); 176 return true; 177 } 178 179 @Override 180 public boolean merge(final Hasher hasher) { 181 Objects.requireNonNull(hasher, "hasher"); 182 merge(hasher.indices(shape)); 183 return true; 184 } 185 186 @Override 187 public boolean merge(final IndexProducer indexProducer) { 188 Objects.requireNonNull(indexProducer, "indexProducer"); 189 indexProducer.forEachIndex(this::add); 190 if (!this.indices.isEmpty()) { 191 if (this.indices.last() >= shape.getNumberOfBits()) { 192 throw new IllegalArgumentException(String.format("Value in list %s is greater than maximum value (%s)", 193 this.indices.last(), shape.getNumberOfBits() - 1)); 194 } 195 if (this.indices.first() < 0) { 196 throw new IllegalArgumentException( 197 String.format("Value in list %s is less than 0", this.indices.first())); 198 } 199 } 200 return true; 201 } 202}