1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.collections4.bloomfilter; 18 19 import java.util.function.LongBinaryOperator; 20 21 /** 22 * Implementations of set operations on BitMapProducers. 23 * 24 * @since 4.5 25 */ 26 public final class SetOperations { 27 28 /** 29 * Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters. 30 * @param first the first BitMapProducer. 31 * @param second the second BitMapProducer 32 * @return the cardinality of the {@code AND} of the filters. 33 */ 34 public static int andCardinality(final BitMapProducer first, final BitMapProducer second) { 35 return cardinality(first, second, (x, y) -> x & y); 36 } 37 38 /** 39 * Calculates the cardinality of a BitMapProducer. By necessity this method will visit each bit map 40 * created by the producer. 41 * @param producer the Producer to calculate the cardinality for. 42 * @return the cardinality of the bit maps produced by the producer. 43 */ 44 public static int cardinality(final BitMapProducer producer) { 45 final int[] cardinality = new int[1]; 46 producer.forEachBitMap(l -> { 47 cardinality[0] += Long.bitCount(l); 48 return true; 49 }); 50 return cardinality[0]; 51 } 52 53 /** 54 * Calculates the cardinality of the result of a LongBinaryOperator using the 55 * {@code BitMapProducer.makePredicate} method. 56 * @param first the first BitMapProducer 57 * @param second the second BitMapProducer 58 * @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap producers. 59 * @return the calculated cardinality. 60 */ 61 private static int cardinality(final BitMapProducer first, final BitMapProducer second, final LongBinaryOperator op) { 62 final int[] cardinality = new int[1]; 63 64 first.forEachBitMapPair(second, (x, y) -> { 65 cardinality[0] += Long.bitCount(op.applyAsLong(x, y)); 66 return true; 67 }); 68 return cardinality[0]; 69 } 70 71 /** 72 * Calculates the Cosine distance between two BitMapProducer. 73 * 74 * <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p> 75 * 76 * @param first the first BitMapProducer. 77 * @param second the second BitMapProducer. 78 * @return the jaccard distance. 79 */ 80 public static double cosineDistance(final BitMapProducer first, final BitMapProducer second) { 81 return 1.0 - cosineSimilarity(first, second); 82 } 83 84 /** 85 * Calculates the Cosine similarity between two BitMapProducers. 86 * <p> Also known as Orchini similarity and the Tucker coefficient of congruence or 87 * Ochiai similarity.</p> 88 * 89 * <p>If either producer is empty the result is 0 (zero)</p> 90 * 91 * @param first the first BitMapProducer. 92 * @param second the second BitMapProducer. 93 * @return the Cosine similarity. 94 */ 95 public static double cosineSimilarity(final BitMapProducer first, final BitMapProducer second) { 96 final int numerator = andCardinality(first, second); 97 // Given that the cardinality is an int then the product as a double will not 98 // overflow, we can use one sqrt: 99 return numerator == 0 ? 0 : numerator / Math.sqrt(cardinality(first) * cardinality(second)); 100 } 101 102 /** 103 * Calculates the Cosine similarity between two Bloom filters. 104 * <p> Also known as Orchini similarity and the Tucker coefficient of congruence or 105 * Ochiai similarity.</p> 106 * 107 * <p>If either filter is empty (no enabled bits) the result is 0 (zero)</p> 108 * 109 * <p>This is a version of cosineSimilarity optimized for Bloom filters.</p> 110 * 111 * @param first the first Bloom filter. 112 * @param second the second Bloom filter. 113 * @return the Cosine similarity. 114 */ 115 public static double cosineSimilarity(final BloomFilter first, final BloomFilter second) { 116 final int numerator = andCardinality(first, second); 117 // Given that the cardinality is an int then the product as a double will not 118 // overflow, we can use one sqrt: 119 return numerator == 0 ? 0 : numerator / Math.sqrt(first.cardinality() * second.cardinality()); 120 } 121 122 /** 123 * Calculates the Hamming distance between two BitMapProducers. 124 * 125 * @param first the first BitMapProducer. 126 * @param second the second BitMapProducer. 127 * @return the Hamming distance. 128 */ 129 public static int hammingDistance(final BitMapProducer first, final BitMapProducer second) { 130 return xorCardinality(first, second); 131 } 132 133 /** 134 * Calculates the Jaccard distance between two BitMapProducer. 135 * 136 * <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p> 137 * 138 * @param first the first BitMapProducer. 139 * @param second the second BitMapProducer. 140 * @return the Jaccard distance. 141 */ 142 public static double jaccardDistance(final BitMapProducer first, final BitMapProducer second) { 143 return 1.0 - jaccardSimilarity(first, second); 144 } 145 146 /** 147 * Calculates the Jaccard similarity between two BitMapProducer. 148 * 149 * <p>Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient</p> 150 * 151 * @param first the first BitMapProducer. 152 * @param second the second BitMapProducer. 153 * @return the Jaccard similarity. 154 */ 155 public static double jaccardSimilarity(final BitMapProducer first, final BitMapProducer second) { 156 final int[] cardinality = new int[2]; 157 first.forEachBitMapPair(second, (x, y) -> { 158 cardinality[0] += Long.bitCount(x & y); 159 cardinality[1] += Long.bitCount(x | y); 160 return true; 161 }); 162 final int intersection = cardinality[0]; 163 return intersection == 0 ? 0 : intersection / (double) cardinality[1]; 164 } 165 166 /** 167 * Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters. 168 * @param first the first BitMapProducer. 169 * @param second the second BitMapProducer 170 * @return the cardinality of the {@code OR} of the filters. 171 */ 172 public static int orCardinality(final BitMapProducer first, final BitMapProducer second) { 173 return cardinality(first, second, (x, y) -> x | y); 174 } 175 176 /** 177 * Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters. 178 * @param first the first BitMapProducer. 179 * @param second the second BitMapProducer 180 * @return the cardinality of the {@code XOR} of the filters. 181 */ 182 public static int xorCardinality(final BitMapProducer first, final BitMapProducer second) { 183 return cardinality(first, second, (x, y) -> x ^ y); 184 } 185 186 /** 187 * Do not instantiate. 188 */ 189 private SetOperations() { 190 } 191 }