1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text.similarity;
18
19 /**
20 * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different.
21 *
22 * <p>
23 * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance.
24 * </p>
25 *
26 * @since 1.0
27 */
28 public class HammingDistance implements EditDistance<Integer> {
29
30 /**
31 * Creates a new instance.
32 */
33 public HammingDistance() {
34 // empty
35 }
36
37 /**
38 * Computes the Hamming Distance between two strings with the same length.
39 *
40 * <p>
41 * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
42 * value.
43 * </p>
44 *
45 * <p>
46 * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
47 * </p>
48 *
49 * <pre>
50 * distance.apply("", "") = 0
51 * distance.apply("pappa", "pappa") = 0
52 * distance.apply("1011101", "1011111") = 1
53 * distance.apply("ATCG", "ACCC") = 2
54 * distance.apply("karolin", "kerstin" = 3
55 * </pre>
56 *
57 * @param left the first input, must not be null.
58 * @param right the second input, must not be null.
59 * @return distance.
60 * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
61 */
62 @Override
63 public Integer apply(final CharSequence left, final CharSequence right) {
64 return apply(SimilarityInput.input(left), SimilarityInput.input(right));
65 }
66
67 /**
68 * Computes the Hamming Distance between two strings with the same length.
69 *
70 * <p>
71 * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
72 * value.
73 * </p>
74 * <p>
75 * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
76 * </p>
77 *
78 * <pre>
79 * distance.apply("", "") = 0
80 * distance.apply("pappa", "pappa") = 0
81 * distance.apply("1011101", "1011111") = 1
82 * distance.apply("ATCG", "ACCC") = 2
83 * distance.apply("karolin", "kerstin" = 3
84 * </pre>
85 *
86 * @param <E> The type of similarity score unit.
87 * @param left the first input, must not be null.
88 * @param right the second input, must not be null.
89 * @return distance.
90 * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
91 * @since 1.13.0
92 */
93 public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
94 if (left == null || right == null) {
95 throw new IllegalArgumentException("SimilarityInput must not be null");
96 }
97 if (left.length() != right.length()) {
98 throw new IllegalArgumentException("SimilarityInput must have the same length");
99 }
100 int distance = 0;
101 for (int i = 0; i < left.length(); i++) {
102 if (!left.at(i).equals(right.at(i))) {
103 distance++;
104 }
105 }
106 return distance;
107 }
108
109 }