HammingDistance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text.similarity;

  18. /**
  19.  * The hamming distance between two strings of equal length is the number of positions at which the corresponding symbols are different.
  20.  *
  21.  * <p>
  22.  * For further explanation about the Hamming Distance, take a look at its Wikipedia page at https://en.wikipedia.org/wiki/Hamming_distance.
  23.  * </p>
  24.  *
  25.  * @since 1.0
  26.  */
  27. public class HammingDistance implements EditDistance<Integer> {

  28.     /**
  29.      * Creates a new instance.
  30.      */
  31.     public HammingDistance() {
  32.         // empty
  33.     }

  34.     /**
  35.      * Computes the Hamming Distance between two strings with the same length.
  36.      *
  37.      * <p>
  38.      * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
  39.      * value.
  40.      * </p>
  41.      *
  42.      * <p>
  43.      * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
  44.      * </p>
  45.      *
  46.      * <pre>
  47.      * distance.apply("", "")               = 0
  48.      * distance.apply("pappa", "pappa")     = 0
  49.      * distance.apply("1011101", "1011111") = 1
  50.      * distance.apply("ATCG", "ACCC")       = 2
  51.      * distance.apply("karolin", "kerstin"  = 3
  52.      * </pre>
  53.      *
  54.      * @param left  the first input, must not be null.
  55.      * @param right the second input, must not be null.
  56.      * @return distance.
  57.      * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
  58.      */
  59.     @Override
  60.     public Integer apply(final CharSequence left, final CharSequence right) {
  61.         return apply(SimilarityInput.input(left), SimilarityInput.input(right));
  62.     }

  63.     /**
  64.      * Computes the Hamming Distance between two strings with the same length.
  65.      *
  66.      * <p>
  67.      * The distance starts with zero, and for each occurrence of a different character in either String, it increments the distance by 1, and finally return its
  68.      * value.
  69.      * </p>
  70.      * <p>
  71.      * Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths will throw IllegalArgumentException
  72.      * </p>
  73.      *
  74.      * <pre>
  75.      * distance.apply("", "")               = 0
  76.      * distance.apply("pappa", "pappa")     = 0
  77.      * distance.apply("1011101", "1011111") = 1
  78.      * distance.apply("ATCG", "ACCC")       = 2
  79.      * distance.apply("karolin", "kerstin"  = 3
  80.      * </pre>
  81.      *
  82.      * @param <E> The type of similarity score unit.
  83.      * @param left  the first input, must not be null.
  84.      * @param right the second input, must not be null.
  85.      * @return distance.
  86.      * @throws IllegalArgumentException if either input is {@code null} or if they do not have the same length.
  87.      * @since 1.13.0
  88.      */
  89.     public <E> Integer apply(final SimilarityInput<E> left, final SimilarityInput<E> right) {
  90.         if (left == null || right == null) {
  91.             throw new IllegalArgumentException("SimilarityInput must not be null");
  92.         }
  93.         if (left.length() != right.length()) {
  94.             throw new IllegalArgumentException("SimilarityInput must have the same length");
  95.         }
  96.         int distance = 0;
  97.         for (int i = 0; i < left.length(); i++) {
  98.             if (!left.at(i).equals(right.at(i))) {
  99.                 distance++;
  100.             }
  101.         }
  102.         return distance;
  103.     }

  104. }