View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  /**
20   * Interface for the concept of a string similarity score.
21   *
22   * <p>
23   * A string similarity score is intended to have <i>some</i> of the properties of a metric, yet
24   * allowing for exceptions, namely the Jaro-Winkler similarity score.
25   * </p>
26   * <p>
27   * We Define a SimilarityScore to be a function {@code d: [X * X] -&gt; [0, INFINITY)} with the
28   * following properties:
29   * </p>
30   * <ul>
31   *     <li>{@code d(x,y) &gt;= 0}, non-negativity or separation axiom</li>
32   *     <li>{@code d(x,y) == d(y,x)}, symmetry.</li>
33   * </ul>
34   *
35   * <p>
36   * Notice, these are two of the properties that contribute to d being a metric.
37   * </p>
38   *
39   *
40   * <p>
41   * Further, this intended to be BiFunction&lt;CharSequence, CharSequence, R&gt;.
42   * The {@code apply} method
43   * accepts a pair of {@link CharSequence} parameters
44   * and returns an {@code R} type similarity score. We have omitted the explicit
45   * statement of extending BiFunction due to it only being implemented in Java 1.8, and we
46   * wish to maintain Java 1.7 compatibility.
47   * </p>
48   *
49   * @param <R> The type of similarity score unit used by this EditDistance.
50   * @since 1.0
51   */
52  public interface SimilarityScore<R> {
53  
54      /**
55       * Compares two CharSequences.
56       *
57       * @param left the first CharSequence
58       * @param right the second CharSequence
59       * @return The similarity score between two CharSequences
60       */
61      R apply(CharSequence left, CharSequence right);
62  
63  }