View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.collections4.bloomfilter;
18  
19  import java.util.function.LongBinaryOperator;
20  
21  /**
22   * Implementations of set operations on BitMapProducers.
23   *
24   * @since 4.5
25   */
26  public final class SetOperations {
27  
28      /**
29       * Calculates the cardinality of the logical {@code AND} of the bit maps for the two filters.
30       * @param first the first BitMapProducer.
31       * @param second the second BitMapProducer
32       * @return the cardinality of the {@code AND} of the filters.
33       */
34      public static int andCardinality(final BitMapProducer first, final BitMapProducer second) {
35          return cardinality(first, second, (x, y) -> x & y);
36      }
37  
38      /**
39       * Calculates the cardinality of a BitMapProducer. By necessity this method will visit each bit map
40       * created by the producer.
41       * @param producer the Producer to calculate the cardinality for.
42       * @return the cardinality of the bit maps produced by the producer.
43       */
44      public static int cardinality(final BitMapProducer producer) {
45          final int[] cardinality = new int[1];
46          producer.forEachBitMap(l -> {
47              cardinality[0] += Long.bitCount(l);
48              return true;
49          });
50          return cardinality[0];
51      }
52  
53      /**
54       * Calculates the cardinality of the result of a LongBinaryOperator using the
55       * {@code BitMapProducer.makePredicate} method.
56       * @param first the first BitMapProducer
57       * @param second the second BitMapProducer
58       * @param op a long binary operation on where x = {@code first} and y = {@code second} bitmap producers.
59       * @return the calculated cardinality.
60       */
61      private static int cardinality(final BitMapProducer first, final BitMapProducer second, final LongBinaryOperator op) {
62          final int[] cardinality = new int[1];
63  
64          first.forEachBitMapPair(second, (x, y) -> {
65              cardinality[0] += Long.bitCount(op.applyAsLong(x, y));
66              return true;
67          });
68          return cardinality[0];
69      }
70  
71      /**
72       * Calculates the Cosine distance between two BitMapProducer.
73       *
74       * <p>Cosine distance is defined as {@code 1 - Cosine similarity}</p>
75       *
76       * @param first the first BitMapProducer.
77       * @param second the second BitMapProducer.
78       * @return the jaccard distance.
79       */
80      public static double cosineDistance(final BitMapProducer first, final BitMapProducer second) {
81          return 1.0 - cosineSimilarity(first, second);
82      }
83  
84      /**
85       * Calculates the Cosine similarity between two BitMapProducers.
86       * <p> Also known as Orchini similarity and the Tucker coefficient of congruence or
87       * Ochiai similarity.</p>
88       *
89       * <p>If either producer is empty the result is 0 (zero)</p>
90       *
91       * @param first the first BitMapProducer.
92       * @param second the second BitMapProducer.
93       * @return the Cosine similarity.
94       */
95      public static double cosineSimilarity(final BitMapProducer first, final BitMapProducer second) {
96          final int numerator = andCardinality(first, second);
97          // Given that the cardinality is an int then the product as a double will not
98          // overflow, we can use one sqrt:
99          return numerator == 0 ? 0 : numerator / Math.sqrt(cardinality(first) * cardinality(second));
100     }
101 
102     /**
103      * Calculates the Cosine similarity between two Bloom filters.
104      * <p> Also known as Orchini similarity and the Tucker coefficient of congruence or
105      * Ochiai similarity.</p>
106      *
107      * <p>If either filter is empty (no enabled bits) the result is 0 (zero)</p>
108      *
109      * <p>This is a version of cosineSimilarity optimized for Bloom filters.</p>
110      *
111      * @param first the first Bloom filter.
112      * @param second the second Bloom filter.
113      * @return the Cosine similarity.
114      */
115     public static double cosineSimilarity(final BloomFilter first, final BloomFilter second) {
116         final int numerator = andCardinality(first, second);
117         // Given that the cardinality is an int then the product as a double will not
118         // overflow, we can use one sqrt:
119         return numerator == 0 ? 0 : numerator / Math.sqrt(first.cardinality() * second.cardinality());
120     }
121 
122     /**
123      * Calculates the Hamming distance between two BitMapProducers.
124      *
125      * @param first the first BitMapProducer.
126      * @param second the second BitMapProducer.
127      * @return the Hamming distance.
128      */
129     public static int hammingDistance(final BitMapProducer first, final BitMapProducer second) {
130         return xorCardinality(first, second);
131     }
132 
133     /**
134      * Calculates the Jaccard distance between two BitMapProducer.
135      *
136      * <p>Jaccard distance is defined as {@code 1 - Jaccard similarity}</p>
137      *
138      * @param first the first BitMapProducer.
139      * @param second the second BitMapProducer.
140      * @return the Jaccard distance.
141      */
142     public static double jaccardDistance(final BitMapProducer first, final BitMapProducer second) {
143         return 1.0 - jaccardSimilarity(first, second);
144     }
145 
146     /**
147      * Calculates the Jaccard similarity between two BitMapProducer.
148      *
149      * <p>Also known as Jaccard index, Intersection over Union, and Jaccard similarity coefficient</p>
150      *
151      * @param first the first BitMapProducer.
152      * @param second the second BitMapProducer.
153      * @return the Jaccard similarity.
154      */
155     public static double jaccardSimilarity(final BitMapProducer first, final BitMapProducer second) {
156         final int[] cardinality = new int[2];
157         first.forEachBitMapPair(second, (x, y) -> {
158             cardinality[0] += Long.bitCount(x & y);
159             cardinality[1] += Long.bitCount(x | y);
160             return true;
161         });
162         final int intersection = cardinality[0];
163         return intersection == 0 ? 0 : intersection / (double) cardinality[1];
164     }
165 
166     /**
167      * Calculates the cardinality of the logical {@code OR} of the bit maps for the two filters.
168      * @param first the first BitMapProducer.
169      * @param second the second BitMapProducer
170      * @return the cardinality of the {@code OR} of the filters.
171      */
172     public static int orCardinality(final BitMapProducer first, final BitMapProducer second) {
173         return cardinality(first, second, (x, y) -> x | y);
174     }
175 
176     /**
177      * Calculates the cardinality of the logical {@code XOR} of the bit maps for the two filters.
178      * @param first the first BitMapProducer.
179      * @param second the second BitMapProducer
180      * @return the cardinality of the {@code XOR} of the filters.
181      */
182     public static int xorCardinality(final BitMapProducer first, final BitMapProducer second) {
183         return cardinality(first, second, (x, y) -> x ^ y);
184     }
185 
186     /**
187      * Do not instantiate.
188      */
189     private SetOperations() {
190     }
191 }