View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertNull;
21  import static org.junit.jupiter.api.Assertions.assertThrows;
22  
23  import org.junit.jupiter.api.Test;
24  import org.junit.jupiter.params.ParameterizedTest;
25  import org.junit.jupiter.params.provider.MethodSource;
26  
27  /**
28   * Tests {@link LevenshteinDistance}.
29   */
30  class LevenshteinDistanceTest {
31  
32      private static final LevenshteinDistance UNLIMITED_DISTANCE = LevenshteinDistance.getDefaultInstance();
33  
34      @Test
35      void testApplyThrowsIllegalArgumentExceptionSimilarityInput() {
36          assertThrows(IllegalArgumentException.class, () -> new LevenshteinDistance(0).apply((SimilarityInput<Object>) null, (SimilarityInput<Object>) null));
37      }
38  
39      @Test
40      void testApplyThrowsIllegalArgumentExceptionString() {
41          assertThrows(IllegalArgumentException.class, () -> new LevenshteinDistance(0).apply((String) null, (String) null));
42      }
43  
44      @Test
45      void testConstructorWithNegativeThreshold() {
46          assertThrows(IllegalArgumentException.class, () -> new LevenshteinDistance(-1));
47      }
48  
49      @ParameterizedTest
50      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
51      void testGetLevenshteinDistance(final Class<?> cls) {
52          assertEquals(0, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "")));
53          assertEquals(1, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "a")));
54          assertEquals(7, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "")));
55          assertEquals(1, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "frog"), SimilarityInputTest.build(cls, "fog")));
56          assertEquals(3, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "fly"), SimilarityInputTest.build(cls, "ant")));
57          assertEquals(7, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo")));
58          assertEquals(7, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "elephant")));
59          assertEquals(8, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "zzzzzzzz")));
60          assertEquals(8, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo")));
61          assertEquals(1, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hello"), SimilarityInputTest.build(cls, "hallo")));
62      }
63  
64      @ParameterizedTest
65      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
66      void testGetLevenshteinDistance_NullString(final Class<?> cls) {
67          assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, null)));
68      }
69  
70      @ParameterizedTest
71      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
72      void testGetLevenshteinDistance_NullStringInt(final Class<?> cls) {
73          assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, null), SimilarityInputTest.build(cls, "a")));
74      }
75  
76      @ParameterizedTest
77      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
78      void testGetLevenshteinDistance_StringNull(final Class<?> cls) {
79          assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, null), SimilarityInputTest.build(cls, "a")));
80      }
81  
82      @ParameterizedTest
83      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
84      void testGetLevenshteinDistance_StringNullInt(final Class<?> cls) {
85          assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, null)));
86      }
87  
88      @ParameterizedTest
89      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
90      void testGetLevenshteinDistance_StringString(final Class<?> cls) {
91          assertEquals(0, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "")));
92          assertEquals(1, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "a")));
93          assertEquals(7, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "")));
94          assertEquals(1, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "frog"), SimilarityInputTest.build(cls, "fog")));
95          assertEquals(3, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "fly"), SimilarityInputTest.build(cls, "ant")));
96          assertEquals(7, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo")));
97          assertEquals(7, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "elephant")));
98          assertEquals(8, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "zzzzzzzz")));
99          assertEquals(8, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo")));
100         assertEquals(1, UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hello"), SimilarityInputTest.build(cls, "hallo")));
101     }
102 
103     @ParameterizedTest
104     @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
105     void testGetLevenshteinDistance_StringStringInt(final Class<?> cls) {
106         // empty strings
107         assertEquals(0, new LevenshteinDistance(0).apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "")));
108         assertEquals(7, new LevenshteinDistance(8).apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "")));
109         assertEquals(7, new LevenshteinDistance(7).apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "")));
110         assertEquals(-1, new LevenshteinDistance(6).apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "")));
111 
112         // unequal strings, zero threshold
113         assertEquals(-1, new LevenshteinDistance(0).apply(SimilarityInputTest.build(cls, "b"), SimilarityInputTest.build(cls, "a")));
114         assertEquals(-1, new LevenshteinDistance(0).apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, "b")));
115 
116         // equal strings
117         assertEquals(0, new LevenshteinDistance(0).apply(SimilarityInputTest.build(cls, "aa"), SimilarityInputTest.build(cls, "aa")));
118         assertEquals(0, new LevenshteinDistance(2).apply(SimilarityInputTest.build(cls, "aa"), SimilarityInputTest.build(cls, "aa")));
119 
120         // same length
121         assertEquals(-1, new LevenshteinDistance(2).apply(SimilarityInputTest.build(cls, "aaa"), SimilarityInputTest.build(cls, "bbb")));
122         assertEquals(3, new LevenshteinDistance(3).apply(SimilarityInputTest.build(cls, "aaa"), SimilarityInputTest.build(cls, "bbb")));
123 
124         // big stripe
125         assertEquals(6, new LevenshteinDistance(10).apply(SimilarityInputTest.build(cls, "aaaaaa"), SimilarityInputTest.build(cls, "b")));
126 
127         // distance less than threshold
128         assertEquals(7, new LevenshteinDistance(8).apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "b")));
129         assertEquals(3, new LevenshteinDistance(4).apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, "bbb")));
130 
131         // distance equal to threshold
132         assertEquals(7, new LevenshteinDistance(7).apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "b")));
133         assertEquals(3, new LevenshteinDistance(3).apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, "bbb")));
134 
135         // distance greater than threshold
136         assertEquals(-1, new LevenshteinDistance(2).apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, "bbb")));
137         assertEquals(-1, new LevenshteinDistance(2).apply(SimilarityInputTest.build(cls, "bbb"), SimilarityInputTest.build(cls, "a")));
138         assertEquals(-1, new LevenshteinDistance(6).apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, "b")));
139 
140         // stripe runs off array, strings not similar
141         assertEquals(-1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "a"), SimilarityInputTest.build(cls, "bbb")));
142         assertEquals(-1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "bbb"), SimilarityInputTest.build(cls, "a")));
143 
144         // stripe runs off array, strings are similar
145         assertEquals(-1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "12345"), SimilarityInputTest.build(cls, "1234567")));
146         assertEquals(-1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "1234567"), SimilarityInputTest.build(cls, "12345")));
147 
148         // old getLevenshteinDistance test cases
149         assertEquals(1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "frog"), SimilarityInputTest.build(cls, "fog")));
150         assertEquals(3, new LevenshteinDistance(3).apply(SimilarityInputTest.build(cls, "fly"), SimilarityInputTest.build(cls, "ant")));
151         assertEquals(7, new LevenshteinDistance(7).apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo")));
152         assertEquals(-1, new LevenshteinDistance(6).apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo")));
153         assertEquals(7, new LevenshteinDistance(7).apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "elephant")));
154         assertEquals(-1, new LevenshteinDistance(6).apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "elephant")));
155         assertEquals(8, new LevenshteinDistance(8).apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "zzzzzzzz")));
156         assertEquals(8, new LevenshteinDistance(8).apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo")));
157         assertEquals(1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "hello"), SimilarityInputTest.build(cls, "hallo")));
158 
159         assertEquals(1, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "frog"), SimilarityInputTest.build(cls, "fog")));
160         assertEquals(3, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "fly"), SimilarityInputTest.build(cls, "ant")));
161         assertEquals(7, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo")));
162         assertEquals(7, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "elephant")));
163         assertEquals(8, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "zzzzzzzz")));
164         assertEquals(8, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo")));
165         assertEquals(1, new LevenshteinDistance(Integer.MAX_VALUE).apply(SimilarityInputTest.build(cls, "hello"), SimilarityInputTest.build(cls, "hallo")));
166         assertEquals(-1, new LevenshteinDistance(1).apply(SimilarityInputTest.build(cls, "abc"), SimilarityInputTest.build(cls, "acb")));
167     }
168 
169     @Test
170     void testGetThresholdDirectlyAfterObjectInstantiation() {
171         assertNull(LevenshteinDistance.getDefaultInstance().getThreshold());
172     }
173 
174 }