View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  /**
20   * Interface for <a href="http://en.wikipedia.org/wiki/Edit_distance">Edit Distances</a>.
21   *
22   * <p>
23   * An edit distance is a formal metric on the Kleene closure ({@code X<sup>*</sup>}) over an
24   * alphabet ({@code X}). Note, that a <a href="https://en.wikipedia.org/wiki/Metric_(mathematics)">metric</a>
25   * on a set {@code S} is a function {@code d: [S * S] -&gt; [0, INFINITY)} such
26   * that the following hold for {@code x,y,z} in
27   * the set {@code S}:
28   * </p>
29   * <ul>
30   *     <li>{@code d(x,y) &gt;= 0}, non-negativity or separation axiom</li>
31   *     <li>{@code d(x,y) == 0}, if and only if, {@code x == y}</li>
32   *     <li>{@code d(x,y) == d(y,x)}, symmetry, and</li>
33   *     <li>{@code d(x,z) &lt;=  d(x,y) + d(y,z)}, the triangle inequality</li>
34   * </ul>
35   *
36   *
37   * <p>
38   * This is a BiFunction&lt;CharSequence, CharSequence, R&gt;.
39   * The {@code apply} method
40   * accepts a pair of {@link CharSequence} parameters
41   * and returns an {@code R} type similarity score.
42   * </p>
43   *
44   * @param <R> The type of similarity score unit used by this EditDistance.
45   * @since 1.0
46   */
47  public interface EditDistance<R> extends SimilarityScore<R> {
48  
49      /**
50       * Compares two CharSequences.
51       *
52       * @param left the first CharSequence
53       * @param right the second CharSequence
54       * @return The similarity score between two CharSequences
55       */
56      @Override
57      R apply(CharSequence left, CharSequence right);
58  
59  }