HammingDistance.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text.similarity;

  18. /**
  19.  * The hamming distance between two strings of equal length is the number of
  20.  * positions at which the corresponding symbols are different.
  21.  *
  22.  * <p>
  23.  * For further explanation about the Hamming Distance, take a look at its
  24.  * Wikipedia page at http://en.wikipedia.org/wiki/Hamming_distance.
  25.  * </p>
  26.  *
  27.  * @since 1.0
  28.  */
  29. public class HammingDistance implements EditDistance<Integer> {

  30.     /**
  31.      * Find the Hamming Distance between two strings with the same
  32.      * length.
  33.      *
  34.      * <p>The distance starts with zero, and for each occurrence of a
  35.      * different character in either String, it increments the distance
  36.      * by 1, and finally return its value.</p>
  37.      *
  38.      * <p>Since the Hamming Distance can only be calculated between strings of equal length, input of different lengths
  39.      * will throw IllegalArgumentException</p>
  40.      *
  41.      * <pre>
  42.      * distance.apply("", "")               = 0
  43.      * distance.apply("pappa", "pappa")     = 0
  44.      * distance.apply("1011101", "1011111") = 1
  45.      * distance.apply("ATCG", "ACCC")       = 2
  46.      * distance.apply("karolin", "kerstin"  = 3
  47.      * </pre>
  48.      *
  49.      * @param left the first CharSequence, must not be null
  50.      * @param right the second CharSequence, must not be null
  51.      * @return distance
  52.      * @throws IllegalArgumentException if either input is {@code null} or
  53.      *             if they do not have the same length
  54.      */
  55.     @Override
  56.     public Integer apply(final CharSequence left, final CharSequence right) {
  57.         if (left == null || right == null) {
  58.             throw new IllegalArgumentException("Strings must not be null");
  59.         }

  60.         if (left.length() != right.length()) {
  61.             throw new IllegalArgumentException("Strings must have the same length");
  62.         }

  63.         int distance = 0;

  64.         for (int i = 0; i < left.length(); i++) {
  65.             if (left.charAt(i) != right.charAt(i)) {
  66.                 distance++;
  67.             }
  68.         }

  69.         return distance;
  70.     }

  71. }