View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  /**
20   * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different.
21   *
22   * <p>
23   * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance.
24   * </p>
25   *
26   * @since 1.0
27   */
28  public class HammingDistance implements EditDistance<Integer> {
29  
30      /**
31       * Computes the Hamming Distance between two strings with the same length.
32       *
33       * <p>
34       * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
35       * value.
36       * </p>
37       *
38       * <p>
39       * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
40       * </p>
41       *
42       * <pre>
43       * distance.apply("", "")               = 0
44       * distance.apply("pappa", "pappa")     = 0
45       * distance.apply("1011101", "1011111") = 1
46       * distance.apply("ATCG", "ACCC")       = 2
47       * distance.apply("karolin", "kerstin"  = 3
48       * </pre>
49       *
50       * @param left  the first input, must not be null.
51       * @param right the second input, must not be null.
52       * @return distance.
53       * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
54       */
55      @Override
56      public Integer apply(final CharSequence left, final CharSequence right) {
57          return apply(SimilarityInput.input(left), SimilarityInput.input(right));
58      }
59  
60      /**
61       * Computes the Hamming Distance between two strings with the same length.
62       *
63       * <p>
64       * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
65       * value.
66       * </p>
67       * <p>
68       * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
69       * </p>
70       *
71       * <pre>
72       * distance.apply("", "")               = 0
73       * distance.apply("pappa", "pappa")     = 0
74       * distance.apply("1011101", "1011111") = 1
75       * distance.apply("ATCG", "ACCC")       = 2
76       * distance.apply("karolin", "kerstin"  = 3
77       * </pre>
78       *
79       * @param <E> The type of similarity score unit.
80       * @param left  the first input, must not be null.
81       * @param right the second input, must not be null.
82       * @return distance.
83       * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
84       * @since 1.13.0
85       */
86      public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
87          if (left == null || right == null) {
88              throw new IllegalArgumentException("SimilarityInput must not be null");
89          }
90          if (left.length() != right.length()) {
91              throw new IllegalArgumentException("SimilarityInput must have the same length");
92          }
93          int distance = 0;
94          for (int i = 0; i < left.length(); i++) {
95              if (!left.at(i).equals(right.at(i))) {
96                  distance++;
97              }
98          }
99          return distance;
100     }
101 
102 }