1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.collections4.bloomfilter;
18
19 import java.util.function.LongBinaryOperator;
20
21 /**
22 * Implementations of set operations on BitMapExtractors.
23 *
24 * @since 4.5.0-M1
25 */
26 public final class SetOperations {
27
28 /**
29 * Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters.
30 *
31 * @param first the first BitMapExtractor.
32 * @param second the second BitMapExtractor
33 * @return the cardinality of the {@code AND} of the filters.
34 */
35 public static int andCardinality(final BitMapExtractor first, final BitMapExtractor second) {
36 return cardinality(first, second, (x, y) -> x & y);
37 }
38
39 /**
40 * Calculates the cardinality of a BitMapExtractor. By necessity this method will visit each bit map created by the bitMapExtractor.
41 *
42 * @param bitMapExtractor the extractor to calculate the cardinality for.
43 * @return the cardinality of the bit maps produced by the bitMapExtractor.
44 */
45 public static int cardinality(final BitMapExtractor bitMapExtractor) {
46 final int[] cardinality = new int[1];
47 bitMapExtractor.processBitMaps(l -> {
48 cardinality[0] += Long.bitCount(l);
49 return true;
50 });
51 return cardinality[0];
52 }
53
54 /**
55 * Calculates the cardinality of the result of a LongBinaryOperator using the {@code BitMapExtractor.makePredicate} method.
56 *
57 * @param first the first BitMapExtractor
58 * @param second the second BitMapExtractor
59 * @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap extractors.
60 * @return the calculated cardinality.
61 */
62 private static int cardinality(final BitMapExtractor first, final BitMapExtractor second, final LongBinaryOperator op) {
63 final int[] cardinality = new int[1];
64
65 first.processBitMapPairs(second, (x, y) -> {
66 cardinality[0] += Long.bitCount(op.applyAsLong(x, y));
67 return true;
68 });
69 return cardinality[0];
70 }
71
72 /**
73 * Calculates the Cosine distance between two BitMapExtractor.
74 * <p>
75 * Cosine distance is defined as {@code 1 - Cosine similarity}
76 * </p>
77 *
78 * @param first the first BitMapExtractor.
79 * @param second the second BitMapExtractor.
80 * @return the jaccard distance.
81 */
82 public static double cosineDistance(final BitMapExtractor first, final BitMapExtractor second) {
83 return 1.0 - cosineSimilarity(first, second);
84 }
85
86 /**
87 * Calculates the Cosine similarity between two BitMapExtractors.
88 * <p>
89 * Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
90 * </p>
91 * <p>
92 * If either extractor is empty the result is 0 (zero)
93 * </p>
94 *
95 * @param first the first BitMapExtractor.
96 * @param second the second BitMapExtractor.
97 * @return the Cosine similarity.
98 */
99 public static double cosineSimilarity(final BitMapExtractor first, final BitMapExtractor second) {
100 final int numerator = andCardinality(first, second);
101 // Given that the cardinality is an int then the product as a double will not
102 // overflow, we can use one sqrt:
103 return numerator == 0 ? 0 : numerator / Math.sqrt(cardinality(first) * cardinality(second));
104 }
105
106 /**
107 * Calculates the Cosine similarity between two Bloom filters.
108 * <p>
109 * Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
110 * </p>
111 * <p>
112 * If either filter is empty (no enabled bits) the result is 0 (zero)
113 * </p>
114 * <p>
115 * This is a version of cosineSimilarity optimized for Bloom filters.
116 * </p>
117 *
118 * @param first the first Bloom filter.
119 * @param second the second Bloom filter.
120 * @return the Cosine similarity.
121 */
122 public static double cosineSimilarity(final BloomFilter<?> first, final BloomFilter<?> second) {
123 final int numerator = andCardinality(first, second);
124 // Given that the cardinality is an int then the product as a double will not
125 // overflow, we can use one sqrt:
126 return numerator == 0 ? 0 : numerator / Math.sqrt(first.cardinality() * second.cardinality());
127 }
128
129 /**
130 * Calculates the Hamming distance between two BitMapExtractors.
131 *
132 * @param first the first BitMapExtractor.
133 * @param second the second BitMapExtractor.
134 * @return the Hamming distance.
135 */
136 public static int hammingDistance(final BitMapExtractor first, final BitMapExtractor second) {
137 return xorCardinality(first, second);
138 }
139
140 /**
141 * Calculates the Jaccard distance between two BitMapExtractor.
142 * <p>
143 * Jaccard distance is defined as {@code 1 - Jaccard similarity}
144 * </p>
145 *
146 * @param first the first BitMapExtractor.
147 * @param second the second BitMapExtractor.
148 * @return the Jaccard distance.
149 */
150 public static double jaccardDistance(final BitMapExtractor first, final BitMapExtractor second) {
151 return 1.0 - jaccardSimilarity(first, second);
152 }
153
154 /**
155 * Calculates the Jaccard similarity between two BitMapExtractor.
156 * <p>
157 * Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient
158 * </p>
159 *
160 * @param first the first BitMapExtractor.
161 * @param second the second BitMapExtractor.
162 * @return the Jaccard similarity.
163 */
164 public static double jaccardSimilarity(final BitMapExtractor first, final BitMapExtractor second) {
165 final int[] cardinality = new int[2];
166 first.processBitMapPairs(second, (x, y) -> {
167 cardinality[0] += Long.bitCount(x & y);
168 cardinality[1] += Long.bitCount(x | y);
169 return true;
170 });
171 final int intersection = cardinality[0];
172 return intersection == 0 ? 0 : intersection / (double) cardinality[1];
173 }
174
175 /**
176 * Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters.
177 *
178 * @param first the first BitMapExtractor.
179 * @param second the second BitMapExtractor
180 * @return the cardinality of the {@code OR} of the filters.
181 */
182 public static int orCardinality(final BitMapExtractor first, final BitMapExtractor second) {
183 return cardinality(first, second, (x, y) -> x | y);
184 }
185
186 /**
187 * Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters.
188 *
189 * @param first the first BitMapExtractor.
190 * @param second the second BitMapExtractor
191 * @return the cardinality of the {@code XOR} of the filters.
192 */
193 public static int xorCardinality(final BitMapExtractor first, final BitMapExtractor second) {
194 return cardinality(first, second, (x, y) -> x ^ y);
195 }
196
197 /**
198 * Do not instantiate.
199 */
200 private SetOperations() {
201 }
202 }