001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text.similarity;
018
019/**
020 * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different.
021 *
022 * <p>
023 * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance.
024 * </p>
025 *
026 * @since 1.0
027 */
028public class HammingDistance implements EditDistance<Integer> {
029
030    /**
031     * Creates a new instance.
032     */
033    public HammingDistance() {
034        // empty
035    }
036
037    /**
038     * Computes the Hamming Distance between two strings with the same length.
039     *
040     * <p>
041     * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
042     * value.
043     * </p>
044     *
045     * <p>
046     * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
047     * </p>
048     *
049     * <pre>
050     * distance.apply("", "")               = 0
051     * distance.apply("pappa", "pappa")     = 0
052     * distance.apply("1011101", "1011111") = 1
053     * distance.apply("ATCG", "ACCC")       = 2
054     * distance.apply("karolin", "kerstin"  = 3
055     * </pre>
056     *
057     * @param left  the first input, must not be null.
058     * @param right the second input, must not be null.
059     * @return distance.
060     * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
061     */
062    @Override
063    public Integer apply(final CharSequence left, final CharSequence right) {
064        return apply(SimilarityInput.input(left), SimilarityInput.input(right));
065    }
066
067    /**
068     * Computes the Hamming Distance between two strings with the same length.
069     *
070     * <p>
071     * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
072     * value.
073     * </p>
074     * <p>
075     * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
076     * </p>
077     *
078     * <pre>
079     * distance.apply("", "")               = 0
080     * distance.apply("pappa", "pappa")     = 0
081     * distance.apply("1011101", "1011111") = 1
082     * distance.apply("ATCG", "ACCC")       = 2
083     * distance.apply("karolin", "kerstin"  = 3
084     * </pre>
085     *
086     * @param <E> The type of similarity score unit.
087     * @param left  the first input, must not be null.
088     * @param right the second input, must not be null.
089     * @return distance.
090     * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
091     * @since 1.13.0
092     */
093    public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
094        if (left == null || right == null) {
095            throw new IllegalArgumentException("SimilarityInput must not be null");
096        }
097        if (left.length() != right.length()) {
098            throw new IllegalArgumentException("SimilarityInput must have the same length");
099        }
100        int distance = 0;
101        for (int i = 0; i < left.length(); i++) {
102            if (!left.at(i).equals(right.at(i))) {
103                distance++;
104            }
105        }
106        return distance;
107    }
108
109}