1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.text.similarity; 18 19 /** 20 * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different. 21 * 22 * <p> 23 * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance. 24 * </p> 25 * 26 * @since 1.0 27 */ 28 public class HammingDistance implements EditDistance<Integer> { 29 30 /** 31 * Creates a new instance. 32 */ 33 public HammingDistance() { 34 // empty 35 } 36 37 /** 38 * Computes the Hamming Distance between two strings with the same length. 39 * 40 * <p> 41 * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its 42 * value. 43 * </p> 44 * 45 * <p> 46 * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException 47 * </p> 48 * 49 * <pre> 50 * distance.apply("", "") = 0 51 * distance.apply("pappa", "pappa") = 0 52 * distance.apply("1011101", "1011111") = 1 53 * distance.apply("ATCG", "ACCC") = 2 54 * distance.apply("karolin", "kerstin" = 3 55 * </pre> 56 * 57 * @param left the first input, must not be null. 58 * @param right the second input, must not be null. 59 * @return distance. 60 * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length. 61 */ 62 @Override 63 public Integer apply(final CharSequence left, final CharSequence right) { 64 return apply(SimilarityInput.input(left), SimilarityInput.input(right)); 65 } 66 67 /** 68 * Computes the Hamming Distance between two strings with the same length. 69 * 70 * <p> 71 * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its 72 * value. 73 * </p> 74 * <p> 75 * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException 76 * </p> 77 * 78 * <pre> 79 * distance.apply("", "") = 0 80 * distance.apply("pappa", "pappa") = 0 81 * distance.apply("1011101", "1011111") = 1 82 * distance.apply("ATCG", "ACCC") = 2 83 * distance.apply("karolin", "kerstin" = 3 84 * </pre> 85 * 86 * @param <E> The type of similarity score unit. 87 * @param left the first input, must not be null. 88 * @param right the second input, must not be null. 89 * @return distance. 90 * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length. 91 * @since 1.13.0 92 */ 93 public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) { 94 if (left == null || right == null) { 95 throw new IllegalArgumentException("SimilarityInput must not be null"); 96 } 97 if (left.length() != right.length()) { 98 throw new IllegalArgumentException("SimilarityInput must have the same length"); 99 } 100 int distance = 0; 101 for (int i = 0; i < left.length(); i++) { 102 if (!left.at(i).equals(right.at(i))) { 103 distance++; 104 } 105 } 106 return distance; 107 } 108 109 }