View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.function.LongBinaryOperator;
20  
21  /**
22   * Implementations of set operations on BitMapExtractors.
23   *
24   * @since 4.5.0-M1
25   */
26  public final class SetOperations {
27  
28      /**
29       * Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters.
30       *
31       * @param first  the first BitMapExtractor.
32       * @param second the second BitMapExtractor
33       * @return the cardinality of the {@code AND} of the filters.
34       */
35      public static int andCardinality(final BitMapExtractor first, final BitMapExtractor second) {
36          return cardinality(first, second, (x, y) -> x & y);
37      }
38  
39      /**
40       * Calculates the cardinality of a BitMapExtractor. By necessity this method will visit each bit map created by the bitMapExtractor.
41       *
42       * @param bitMapExtractor the extractor to calculate the cardinality for.
43       * @return the cardinality of the bit maps produced by the bitMapExtractor.
44       */
45      public static int cardinality(final BitMapExtractor bitMapExtractor) {
46          final int[] cardinality = new int[1];
47          bitMapExtractor.processBitMaps(l -> {
48              cardinality[0] += Long.bitCount(l);
49              return true;
50          });
51          return cardinality[0];
52      }
53  
54      /**
55       * Calculates the cardinality of the result of a LongBinaryOperator using the {@code BitMapExtractor.makePredicate} method.
56       *
57       * @param first  the first BitMapExtractor
58       * @param second the second BitMapExtractor
59       * @param op     a long binary operation on where x = {@code first} and y = {@code second} bitmap extractors.
60       * @return the calculated cardinality.
61       */
62      private static int cardinality(final BitMapExtractor first, final BitMapExtractor second, final LongBinaryOperator op) {
63          final int[] cardinality = new int[1];
64  
65          first.processBitMapPairs(second, (x, y) -> {
66              cardinality[0] += Long.bitCount(op.applyAsLong(x, y));
67              return true;
68          });
69          return cardinality[0];
70      }
71  
72      /**
73       * Calculates the Cosine distance between two BitMapExtractor.
74       * <p>
75       * Cosine distance is defined as {@code 1 - Cosine similarity}
76       * </p>
77       *
78       * @param first  the first BitMapExtractor.
79       * @param second the second BitMapExtractor.
80       * @return the jaccard distance.
81       */
82      public static double cosineDistance(final BitMapExtractor first, final BitMapExtractor second) {
83          return 1.0 - cosineSimilarity(first, second);
84      }
85  
86      /**
87       * Calculates the Cosine similarity between two BitMapExtractors.
88       * <p>
89       * Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
90       * </p>
91       * <p>
92       * If either extractor is empty the result is 0 (zero)
93       * </p>
94       *
95       * @param first  the first BitMapExtractor.
96       * @param second the second BitMapExtractor.
97       * @return the Cosine similarity.
98       */
99      public static double cosineSimilarity(final BitMapExtractor first, final BitMapExtractor second) {
100         final int numerator = andCardinality(first, second);
101         // Given that the cardinality is an int then the product as a double will not
102         // overflow, we can use one sqrt:
103         return numerator == 0 ? 0 : numerator / Math.sqrt(cardinality(first) * cardinality(second));
104     }
105 
106     /**
107      * Calculates the Cosine similarity between two Bloom filters.
108      * <p>
109      * Also known as Orchini similarity and the Tucker coefficient of congruence or Ochiai similarity.
110      * </p>
111      * <p>
112      * If either filter is empty (no enabled bits) the result is 0 (zero)
113      * </p>
114      * <p>
115      * This is a version of cosineSimilarity optimized for Bloom filters.
116      * </p>
117      *
118      * @param first  the first Bloom filter.
119      * @param second the second Bloom filter.
120      * @return the Cosine similarity.
121      */
122     public static double cosineSimilarity(final BloomFilter<?> first, final BloomFilter<?> second) {
123         final int numerator = andCardinality(first, second);
124         // Given that the cardinality is an int then the product as a double will not
125         // overflow, we can use one sqrt:
126         return numerator == 0 ? 0 : numerator / Math.sqrt(first.cardinality() * second.cardinality());
127     }
128 
129     /**
130      * Calculates the Hamming distance between two BitMapExtractors.
131      *
132      * @param first  the first BitMapExtractor.
133      * @param second the second BitMapExtractor.
134      * @return the Hamming distance.
135      */
136     public static int hammingDistance(final BitMapExtractor first, final BitMapExtractor second) {
137         return xorCardinality(first, second);
138     }
139 
140     /**
141      * Calculates the Jaccard distance between two BitMapExtractor.
142      * <p>
143      * Jaccard distance is defined as {@code 1 - Jaccard similarity}
144      * </p>
145      *
146      * @param first  the first BitMapExtractor.
147      * @param second the second BitMapExtractor.
148      * @return the Jaccard distance.
149      */
150     public static double jaccardDistance(final BitMapExtractor first, final BitMapExtractor second) {
151         return 1.0 - jaccardSimilarity(first, second);
152     }
153 
154     /**
155      * Calculates the Jaccard similarity between two BitMapExtractor.
156      * <p>
157      * Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient
158      * </p>
159      *
160      * @param first  the first BitMapExtractor.
161      * @param second the second BitMapExtractor.
162      * @return the Jaccard similarity.
163      */
164     public static double jaccardSimilarity(final BitMapExtractor first, final BitMapExtractor second) {
165         final int[] cardinality = new int[2];
166         first.processBitMapPairs(second, (x, y) -> {
167             cardinality[0] += Long.bitCount(x & y);
168             cardinality[1] += Long.bitCount(x | y);
169             return true;
170         });
171         final int intersection = cardinality[0];
172         return intersection == 0 ? 0 : intersection / (double) cardinality[1];
173     }
174 
175     /**
176      * Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters.
177      *
178      * @param first  the first BitMapExtractor.
179      * @param second the second BitMapExtractor
180      * @return the cardinality of the {@code OR} of the filters.
181      */
182     public static int orCardinality(final BitMapExtractor first, final BitMapExtractor second) {
183         return cardinality(first, second, (x, y) -> x | y);
184     }
185 
186     /**
187      * Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters.
188      *
189      * @param first  the first BitMapExtractor.
190      * @param second the second BitMapExtractor
191      * @return the cardinality of the {@code XOR} of the filters.
192      */
193     public static int xorCardinality(final BitMapExtractor first, final BitMapExtractor second) {
194         return cardinality(first, second, (x, y) -> x ^ y);
195     }
196 
197     /**
198      * Do not instantiate.
199      */
200     private SetOperations() {
201     }
202 }