JaccardDistance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text.similarity;

  18. /**
  19.  * Measures the Jaccard distance of two sets of character sequence. Jaccard distance is the dissimilarity between two sets. It is the complementary of Jaccard
  20.  * similarity.
  21.  *
  22.  * <p>
  23.  * For further explanation about Jaccard Distance, refer https://en.wikipedia.org/wiki/Jaccard_index
  24.  * </p>
  25.  *
  26.  * @since 1.0
  27.  */
  28. public class JaccardDistance implements EditDistance<Double> {

  29.     /**
  30.      * Creates a new instance.
  31.      */
  32.     public JaccardDistance() {
  33.         // empty
  34.     }

  35.     /**
  36.      * Computes the Jaccard distance of two set character sequence passed as input. Calculates Jaccard similarity and returns the complement of it.
  37.      *
  38.      * @param left  first input sequence.
  39.      * @param right second input sequence.
  40.      * @return index
  41.      * @throws IllegalArgumentException if either String input {@code null}.
  42.      */
  43.     @Override
  44.     public Double apply(final CharSequence left, final CharSequence right) {
  45.         return apply(SimilarityInput.input(left), SimilarityInput.input(right));
  46.     }

  47.     /**
  48.      * Computes the Jaccard distance of two set character sequence passed as input. Calculates Jaccard similarity and returns the complement of it.
  49.      *
  50.      * @param <E>   The type of similarity score unit.
  51.      * @param left  first input sequence.
  52.      * @param right second input sequence.
  53.      * @return index
  54.      * @throws IllegalArgumentException if either String input {@code null}.
  55.      */
  56.     public <E> Double apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
  57.         return 1.0 - JaccardSimilarity.INSTANCE.apply(left, right).doubleValue();
  58.     }

  59. }