View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  /**
20   * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different.
21   *
22   * <p>
23   * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance.
24   * </p>
25   *
26   * @since 1.0
27   */
28  public class HammingDistance implements EditDistance<Integer> {
29  
30      /**
31       * Creates a new instance.
32       */
33      public HammingDistance() {
34          // empty
35      }
36  
37      /**
38       * Computes the Hamming Distance between two strings with the same length.
39       *
40       * <p>
41       * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
42       * value.
43       * </p>
44       *
45       * <p>
46       * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
47       * </p>
48       *
49       * <pre>
50       * distance.apply("", "")               = 0
51       * distance.apply("pappa", "pappa")     = 0
52       * distance.apply("1011101", "1011111") = 1
53       * distance.apply("ATCG", "ACCC")       = 2
54       * distance.apply("karolin", "kerstin"  = 3
55       * </pre>
56       *
57       * @param left  the first input, must not be null.
58       * @param right the second input, must not be null.
59       * @return distance.
60       * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
61       */
62      @Override
63      public Integer apply(final CharSequence left, final CharSequence right) {
64          return apply(SimilarityInput.input(left), SimilarityInput.input(right));
65      }
66  
67      /**
68       * Computes the Hamming Distance between two strings with the same length.
69       *
70       * <p>
71       * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
72       * value.
73       * </p>
74       * <p>
75       * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
76       * </p>
77       *
78       * <pre>
79       * distance.apply("", "")               = 0
80       * distance.apply("pappa", "pappa")     = 0
81       * distance.apply("1011101", "1011111") = 1
82       * distance.apply("ATCG", "ACCC")       = 2
83       * distance.apply("karolin", "kerstin"  = 3
84       * </pre>
85       *
86       * @param <E> The type of similarity score unit.
87       * @param left  the first input, must not be null.
88       * @param right the second input, must not be null.
89       * @return distance.
90       * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
91       * @since 1.13.0
92       */
93      public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
94          if (left == null || right == null) {
95              throw new IllegalArgumentException("SimilarityInput must not be null");
96          }
97          if (left.length() != right.length()) {
98              throw new IllegalArgumentException("SimilarityInput must have the same length");
99          }
100         int distance = 0;
101         for (int i = 0; i < left.length(); i++) {
102             if (!left.at(i).equals(right.at(i))) {
103                 distance++;
104             }
105         }
106         return distance;
107     }
108 
109 }