001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text.similarity; 018 019/** 020 * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different. 021 * 022 * <p> 023 * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance. 024 * </p> 025 * 026 * @since 1.0 027 */ 028public class HammingDistance implements EditDistance<Integer> { 029 030 /** 031 * Creates a new instance. 032 */ 033 public HammingDistance() { 034 // empty 035 } 036 037 /** 038 * Computes the Hamming Distance between two strings with the same length. 039 * 040 * <p> 041 * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its 042 * value. 043 * </p> 044 * 045 * <p> 046 * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException 047 * </p> 048 * 049 * <pre> 050 * distance.apply("", "") = 0 051 * distance.apply("pappa", "pappa") = 0 052 * distance.apply("1011101", "1011111") = 1 053 * distance.apply("ATCG", "ACCC") = 2 054 * distance.apply("karolin", "kerstin" = 3 055 * </pre> 056 * 057 * @param left the first input, must not be null. 058 * @param right the second input, must not be null. 059 * @return distance. 060 * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length. 061 */ 062 @Override 063 public Integer apply(final CharSequence left, final CharSequence right) { 064 return apply(SimilarityInput.input(left), SimilarityInput.input(right)); 065 } 066 067 /** 068 * Computes the Hamming Distance between two strings with the same length. 069 * 070 * <p> 071 * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its 072 * value. 073 * </p> 074 * <p> 075 * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException 076 * </p> 077 * 078 * <pre> 079 * distance.apply("", "") = 0 080 * distance.apply("pappa", "pappa") = 0 081 * distance.apply("1011101", "1011111") = 1 082 * distance.apply("ATCG", "ACCC") = 2 083 * distance.apply("karolin", "kerstin" = 3 084 * </pre> 085 * 086 * @param <E> The type of similarity score unit. 087 * @param left the first input, must not be null. 088 * @param right the second input, must not be null. 089 * @return distance. 090 * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length. 091 * @since 1.13.0 092 */ 093 public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) { 094 if (left == null || right == null) { 095 throw new IllegalArgumentException("SimilarityInput must not be null"); 096 } 097 if (left.length() != right.length()) { 098 throw new IllegalArgumentException("SimilarityInput must have the same length"); 099 } 100 int distance = 0; 101 for (int i = 0; i < left.length(); i++) { 102 if (!left.at(i).equals(right.at(i))) { 103 distance++; 104 } 105 } 106 return distance; 107 } 108 109}