View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text.similarity;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertThrows;
21  
22  import org.apache.commons.text.TextStringBuilder;
23  import org.junit.jupiter.api.Test;
24  import org.junit.jupiter.params.ParameterizedTest;
25  import org.junit.jupiter.params.provider.MethodSource;
26  
27  class LevenshteinDetailedDistanceTest {
28  
29      private static final LevenshteinDetailedDistance UNLIMITED_DISTANCE = LevenshteinDetailedDistance.getDefaultInstance();
30  
31      @Test
32      void testApplyThrowsIllegalArgumentExceptionAndCreatesLevenshteinDetailedDistanceTakingInteger() {
33          assertThrows(IllegalArgumentException.class, () -> {
34              final LevenshteinDetailedDistance levenshteinDetailedDistance = new LevenshteinDetailedDistance(0);
35              final CharSequence charSequence = new TextStringBuilder();
36              levenshteinDetailedDistance.apply(charSequence, null);
37          });
38      }
39  
40      @Test
41      void testApplyWithNullSimilarityInput() {
42          assertThrows(IllegalArgumentException.class,
43                  () -> new LevenshteinDetailedDistance(0).apply((SimilarityInput<Object>) null, (SimilarityInput<Object>) null));
44      }
45  
46      @Test
47      void testApplyWithNullString() {
48          assertThrows(IllegalArgumentException.class, () -> new LevenshteinDetailedDistance(0).apply((String) null, (String) null));
49      }
50  
51      @Test
52      void testConstructorWithNegativeThreshold() {
53          assertThrows(IllegalArgumentException.class, () -> new LevenshteinDetailedDistance(-1));
54      }
55  
56      @ParameterizedTest
57      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
58      void testCreatesLevenshteinDetailedDistanceTakingInteger6(final Class<?> cls) {
59          final LevenshteinDetailedDistance levenshteinDetailedDistance = new LevenshteinDetailedDistance(0);
60          final LevenshteinResults levenshteinResults = levenshteinDetailedDistance.apply("", "Distance: 38, Insert: 0, Delete: 0, Substitute: 0");
61          assertEquals(0, levenshteinResults.getSubstituteCount());
62          assertEquals(0, levenshteinResults.getDeleteCount());
63          assertEquals(0, levenshteinResults.getInsertCount());
64          assertEquals(-1, levenshteinResults.getDistance());
65          assertEquals(levenshteinResults, levenshteinDetailedDistance.apply(SimilarityInputTest.build(cls, ""),
66                  SimilarityInputTest.build(cls, "Distance: 38, Insert: 0, Delete: 0, Substitute: 0")));
67      }
68  
69      @ParameterizedTest
70      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
71      void testEquals(final Class<?> cls) {
72          final LevenshteinDetailedDistance classBeingTested = LevenshteinDetailedDistance.getDefaultInstance();
73          LevenshteinResults actualResult = classBeingTested.apply(SimilarityInputTest.build(cls, "hello"), SimilarityInputTest.build(cls, "hallo"));
74          LevenshteinResults expectedResult = new LevenshteinResults(1, 0, 0, 1);
75          assertEquals(expectedResult, actualResult);
76  
77          assertEquals(classBeingTested.apply("zzzzzzzz", "hippo"),
78                  classBeingTested.apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo")));
79          actualResult = classBeingTested.apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo"));
80          expectedResult = new LevenshteinResults(8, 0, 3, 5);
81          assertEquals(expectedResult, actualResult);
82          assertEquals(actualResult, actualResult); // intentionally added
83  
84          actualResult = classBeingTested.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, ""));
85          expectedResult = new LevenshteinResults(0, 0, 0, 0);
86          assertEquals(expectedResult, actualResult);
87      }
88  
89      @ParameterizedTest
90      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
91      void testGetDefaultInstanceOne(final Class<?> cls) {
92          final LevenshteinDetailedDistance levenshteinDetailedDistance = LevenshteinDetailedDistance.getDefaultInstance();
93          final LevenshteinResults levenshteinResults = levenshteinDetailedDistance.apply(
94                  SimilarityInputTest.build(cls, "Distance: -2147483643, Insert: 0, Delete: 0, Substitute: 0"),
95                  SimilarityInputTest.build(cls, "Distance: 0, Insert: 2147483536, Delete: 0, Substitute: 0"));
96  
97          assertEquals(21, levenshteinResults.getDistance());
98      }
99  
100     @ParameterizedTest
101     @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
102     void testGetDefaultInstanceTwo(final Class<?> cls) {
103         final LevenshteinDetailedDistance levenshteinDetailedDistance = LevenshteinDetailedDistance.getDefaultInstance();
104         final LevenshteinResults levenshteinResults = levenshteinDetailedDistance.apply("Distance: 2147483647, Insert: 0, Delete: 0, Substitute: 0",
105                 "Distance: 0, Insert: 2147483647, Delete: 0, Substitute: 0");
106         assertEquals(20, levenshteinResults.getDistance());
107         assertEquals(levenshteinResults,
108                 levenshteinDetailedDistance.apply(SimilarityInputTest.build(cls, "Distance: 2147483647, Insert: 0, Delete: 0, Substitute: 0"),
109                         SimilarityInputTest.build(cls, "Distance: 0, Insert: 2147483647, Delete: 0, Substitute: 0")));
110     }
111 
112     @Test
113     void testGetLevenshteinDetailedDistance_NullString() {
114         assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply("a", null));
115     }
116 
117     @Test
118     void testGetLevenshteinDetailedDistance_NullStringInt() {
119         assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply(null, "a"));
120     }
121 
122     @Test
123     void testGetLevenshteinDetailedDistance_StringNull() {
124         assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply(null, "a"));
125     }
126 
127     @Test
128     void testGetLevenshteinDetailedDistance_StringNullInt() {
129         assertThrows(IllegalArgumentException.class, () -> UNLIMITED_DISTANCE.apply("a", null));
130     }
131 
132     @ParameterizedTest
133     @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputs()")
134     void testGetLevenshteinDetailedDistance_StringString(final Class<?> cls) {
135         LevenshteinResults result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, ""));
136         assertEquals(0, result.getDistance());
137         assertEquals(0, result.getInsertCount());
138         assertEquals(0, result.getDeleteCount());
139         assertEquals(0, result.getSubstituteCount());
140 
141         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "a"));
142         assertEquals(1, result.getDistance());
143         assertEquals(1, result.getInsertCount());
144         assertEquals(0, result.getDeleteCount());
145         assertEquals(0, result.getSubstituteCount());
146 
147         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "aaapppp"), SimilarityInputTest.build(cls, ""));
148         assertEquals(7, result.getDistance());
149         assertEquals(0, result.getInsertCount());
150         assertEquals(7, result.getDeleteCount());
151         assertEquals(0, result.getSubstituteCount());
152 
153         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "frog"), SimilarityInputTest.build(cls, "fog"));
154         assertEquals(1, result.getDistance());
155         assertEquals(0, result.getInsertCount());
156         assertEquals(1, result.getDeleteCount());
157         assertEquals(0, result.getSubstituteCount());
158 
159         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "fly"), SimilarityInputTest.build(cls, "ant"));
160         assertEquals(3, result.getDistance());
161         assertEquals(0, result.getInsertCount());
162         assertEquals(0, result.getDeleteCount());
163         assertEquals(3, result.getSubstituteCount());
164 
165         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo"));
166         assertEquals(7, result.getDistance());
167         assertEquals(0, result.getInsertCount());
168         assertEquals(3, result.getDeleteCount());
169         assertEquals(4, result.getSubstituteCount());
170 
171         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "elephant"));
172         assertEquals(7, result.getDistance());
173         assertEquals(3, result.getInsertCount());
174         assertEquals(0, result.getDeleteCount());
175         assertEquals(4, result.getSubstituteCount());
176 
177         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hippo"), SimilarityInputTest.build(cls, "zzzzzzzz"));
178         assertEquals(8, result.getDistance());
179         assertEquals(3, result.getInsertCount());
180         assertEquals(0, result.getDeleteCount());
181         assertEquals(5, result.getSubstituteCount());
182 
183         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "zzzzzzzz"), SimilarityInputTest.build(cls, "hippo"));
184         assertEquals(8, result.getDistance());
185         assertEquals(0, result.getInsertCount());
186         assertEquals(3, result.getDeleteCount());
187         assertEquals(5, result.getSubstituteCount());
188 
189         result = UNLIMITED_DISTANCE.apply(SimilarityInputTest.build(cls, "hello"), SimilarityInputTest.build(cls, "hallo"));
190         assertEquals(1, result.getDistance());
191         assertEquals(0, result.getInsertCount());
192         assertEquals(0, result.getDeleteCount());
193         assertEquals(1, result.getSubstituteCount());
194     }
195 
196     @Test
197     void testGetLevenshteinDetailedDistance_StringStringInt() {
198 
199         LevenshteinResults result = new LevenshteinDetailedDistance(0).apply("", "");
200 
201         assertEquals(0, result.getDistance());
202         assertEquals(0, result.getInsertCount());
203         assertEquals(0, result.getDeleteCount());
204         assertEquals(0, result.getSubstituteCount());
205 
206         result = new LevenshteinDetailedDistance(8).apply("aaapppp", "");
207         assertEquals(7, result.getDistance());
208         assertEquals(0, result.getInsertCount());
209         assertEquals(7, result.getDeleteCount());
210         assertEquals(0, result.getSubstituteCount());
211 
212         result = new LevenshteinDetailedDistance(7).apply("aaapppp", "");
213         assertEquals(7, result.getDistance());
214         assertEquals(0, result.getInsertCount());
215         assertEquals(7, result.getDeleteCount());
216         assertEquals(0, result.getSubstituteCount());
217 
218         result = new LevenshteinDetailedDistance(6).apply("aaapppp", "");
219         assertEquals(-1, result.getDistance());
220         assertEquals(0, result.getInsertCount());
221         assertEquals(0, result.getDeleteCount());
222         assertEquals(0, result.getSubstituteCount());
223 
224         result = new LevenshteinDetailedDistance(0).apply("b", "a");
225         assertEquals(-1, result.getDistance());
226         assertEquals(0, result.getInsertCount());
227         assertEquals(0, result.getDeleteCount());
228         assertEquals(0, result.getSubstituteCount());
229 
230         result = new LevenshteinDetailedDistance(0).apply("a", "b");
231         assertEquals(-1, result.getDistance());
232         assertEquals(0, result.getInsertCount());
233         assertEquals(0, result.getDeleteCount());
234         assertEquals(0, result.getSubstituteCount());
235 
236         result = new LevenshteinDetailedDistance(0).apply("aa", "aa");
237         assertEquals(0, result.getDistance());
238         assertEquals(0, result.getInsertCount());
239         assertEquals(0, result.getDeleteCount());
240         assertEquals(0, result.getSubstituteCount());
241 
242         result = new LevenshteinDetailedDistance(2).apply("aa", "aa");
243         assertEquals(0, result.getDistance());
244         assertEquals(0, result.getInsertCount());
245         assertEquals(0, result.getDeleteCount());
246         assertEquals(0, result.getSubstituteCount());
247 
248         result = new LevenshteinDetailedDistance(2).apply("aaa", "bbb");
249         assertEquals(-1, result.getDistance());
250         assertEquals(0, result.getInsertCount());
251         assertEquals(0, result.getDeleteCount());
252         assertEquals(0, result.getSubstituteCount());
253 
254         result = new LevenshteinDetailedDistance(3).apply("aaa", "bbb");
255         assertEquals(3, result.getDistance());
256         assertEquals(0, result.getInsertCount());
257         assertEquals(0, result.getDeleteCount());
258         assertEquals(3, result.getSubstituteCount());
259 
260         result = new LevenshteinDetailedDistance(10).apply("aaaaaa", "b");
261         assertEquals(6, result.getDistance());
262         assertEquals(0, result.getInsertCount());
263         assertEquals(5, result.getDeleteCount());
264         assertEquals(1, result.getSubstituteCount());
265 
266         result = new LevenshteinDetailedDistance(8).apply("aaapppp", "b");
267         assertEquals(7, result.getDistance());
268         assertEquals(0, result.getInsertCount());
269         assertEquals(6, result.getDeleteCount());
270         assertEquals(1, result.getSubstituteCount());
271 
272         result = new LevenshteinDetailedDistance(4).apply("a", "bbb");
273         assertEquals(3, result.getDistance());
274         assertEquals(2, result.getInsertCount());
275         assertEquals(0, result.getDeleteCount());
276         assertEquals(1, result.getSubstituteCount());
277 
278         result = new LevenshteinDetailedDistance(7).apply("aaapppp", "b");
279         assertEquals(7, result.getDistance());
280         assertEquals(0, result.getInsertCount());
281         assertEquals(6, result.getDeleteCount());
282         assertEquals(1, result.getSubstituteCount());
283 
284         result = new LevenshteinDetailedDistance(3).apply("a", "bbb");
285         assertEquals(3, result.getDistance());
286         assertEquals(2, result.getInsertCount());
287         assertEquals(0, result.getDeleteCount());
288         assertEquals(1, result.getSubstituteCount());
289 
290         result = new LevenshteinDetailedDistance(2).apply("a", "bbb");
291         assertEquals(-1, result.getDistance());
292         assertEquals(0, result.getInsertCount());
293         assertEquals(0, result.getDeleteCount());
294         assertEquals(0, result.getSubstituteCount());
295 
296         result = new LevenshteinDetailedDistance(2).apply("bbb", "a");
297         assertEquals(-1, result.getDistance());
298         assertEquals(0, result.getInsertCount());
299         assertEquals(0, result.getDeleteCount());
300         assertEquals(0, result.getSubstituteCount());
301 
302         result = new LevenshteinDetailedDistance(6).apply("aaapppp", "b");
303         assertEquals(-1, result.getDistance());
304         assertEquals(0, result.getInsertCount());
305         assertEquals(0, result.getDeleteCount());
306         assertEquals(0, result.getSubstituteCount());
307 
308         result = new LevenshteinDetailedDistance(1).apply("a", "bbb");
309         assertEquals(-1, result.getDistance());
310         assertEquals(0, result.getInsertCount());
311         assertEquals(0, result.getDeleteCount());
312         assertEquals(0, result.getSubstituteCount());
313 
314         result = new LevenshteinDetailedDistance(1).apply("bbb", "a");
315         assertEquals(-1, result.getDistance());
316         assertEquals(0, result.getInsertCount());
317         assertEquals(0, result.getDeleteCount());
318         assertEquals(0, result.getSubstituteCount());
319 
320         result = new LevenshteinDetailedDistance(1).apply("12345", "1234567");
321         assertEquals(-1, result.getDistance());
322         assertEquals(0, result.getInsertCount());
323         assertEquals(0, result.getDeleteCount());
324         assertEquals(0, result.getSubstituteCount());
325 
326         result = new LevenshteinDetailedDistance(1).apply("1234567", "12345");
327         assertEquals(-1, result.getDistance());
328         assertEquals(0, result.getInsertCount());
329         assertEquals(0, result.getDeleteCount());
330         assertEquals(0, result.getSubstituteCount());
331 
332         result = new LevenshteinDetailedDistance(1).apply("frog", "fog");
333         assertEquals(1, result.getDistance());
334         assertEquals(0, result.getInsertCount());
335         assertEquals(1, result.getDeleteCount());
336         assertEquals(0, result.getSubstituteCount());
337 
338         result = new LevenshteinDetailedDistance(3).apply("fly", "ant");
339         assertEquals(3, result.getDistance());
340         assertEquals(0, result.getInsertCount());
341         assertEquals(0, result.getDeleteCount());
342         assertEquals(3, result.getSubstituteCount());
343 
344         result = new LevenshteinDetailedDistance(7).apply("elephant", "hippo");
345         assertEquals(7, result.getDistance());
346         assertEquals(0, result.getInsertCount());
347         assertEquals(3, result.getDeleteCount());
348         assertEquals(4, result.getSubstituteCount());
349 
350         result = new LevenshteinDetailedDistance(6).apply("elephant", "hippo");
351         assertEquals(-1, result.getDistance());
352         assertEquals(0, result.getInsertCount());
353         assertEquals(0, result.getDeleteCount());
354         assertEquals(0, result.getSubstituteCount());
355 
356         result = new LevenshteinDetailedDistance(7).apply("hippo", "elephant");
357         assertEquals(7, result.getDistance());
358         assertEquals(3, result.getInsertCount());
359         assertEquals(0, result.getDeleteCount());
360         assertEquals(4, result.getSubstituteCount());
361 
362         result = new LevenshteinDetailedDistance(7).apply("hippo", "elephant");
363         assertEquals(7, result.getDistance());
364         assertEquals(3, result.getInsertCount());
365         assertEquals(0, result.getDeleteCount());
366         assertEquals(4, result.getSubstituteCount());
367 
368         result = new LevenshteinDetailedDistance(6).apply("hippo", "elephant");
369         assertEquals(-1, result.getDistance());
370         assertEquals(0, result.getInsertCount());
371         assertEquals(0, result.getDeleteCount());
372         assertEquals(0, result.getSubstituteCount());
373 
374         result = new LevenshteinDetailedDistance(8).apply("hippo", "zzzzzzzz");
375         assertEquals(8, result.getDistance());
376         assertEquals(3, result.getInsertCount());
377         assertEquals(0, result.getDeleteCount());
378         assertEquals(5, result.getSubstituteCount());
379 
380         result = new LevenshteinDetailedDistance(8).apply("zzzzzzzz", "hippo");
381         assertEquals(8, result.getDistance());
382         assertEquals(0, result.getInsertCount());
383         assertEquals(3, result.getDeleteCount());
384         assertEquals(5, result.getSubstituteCount());
385 
386         result = new LevenshteinDetailedDistance(1).apply("hello", "hallo");
387         assertEquals(1, result.getDistance());
388         assertEquals(0, result.getInsertCount());
389         assertEquals(0, result.getDeleteCount());
390         assertEquals(1, result.getSubstituteCount());
391 
392         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("frog", "fog");
393         assertEquals(1, result.getDistance());
394         assertEquals(0, result.getInsertCount());
395         assertEquals(1, result.getDeleteCount());
396         assertEquals(0, result.getSubstituteCount());
397 
398         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("fly", "ant");
399         assertEquals(3, result.getDistance());
400         assertEquals(0, result.getInsertCount());
401         assertEquals(0, result.getDeleteCount());
402         assertEquals(3, result.getSubstituteCount());
403 
404         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("elephant", "hippo");
405         assertEquals(7, result.getDistance());
406         assertEquals(0, result.getInsertCount());
407         assertEquals(3, result.getDeleteCount());
408         assertEquals(4, result.getSubstituteCount());
409 
410         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("hippo", "elephant");
411         assertEquals(7, result.getDistance());
412         assertEquals(3, result.getInsertCount());
413         assertEquals(0, result.getDeleteCount());
414         assertEquals(4, result.getSubstituteCount());
415 
416         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("hippo", "zzzzzzzz");
417         assertEquals(8, result.getDistance());
418         assertEquals(3, result.getInsertCount());
419         assertEquals(0, result.getDeleteCount());
420         assertEquals(5, result.getSubstituteCount());
421 
422         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("zzzzzzzz", "hippo");
423         assertEquals(8, result.getDistance());
424         assertEquals(0, result.getInsertCount());
425         assertEquals(3, result.getDeleteCount());
426         assertEquals(5, result.getSubstituteCount());
427 
428         result = new LevenshteinDetailedDistance(Integer.MAX_VALUE).apply("hello", "hallo");
429         assertEquals(1, result.getDistance());
430         assertEquals(0, result.getInsertCount());
431         assertEquals(0, result.getDeleteCount());
432         assertEquals(1, result.getSubstituteCount());
433     }
434 
435     @Test
436     void testGetThreshold() {
437         final LevenshteinDetailedDistance levenshteinDetailedDistance = new LevenshteinDetailedDistance(0);
438 
439         assertEquals(0, levenshteinDetailedDistance.getThreshold());
440     }
441 
442     @Test
443     void testHashCode() {
444         final LevenshteinDetailedDistance classBeingTested = LevenshteinDetailedDistance.getDefaultInstance();
445         LevenshteinResults actualResult = classBeingTested.apply("aaapppp", "");
446         LevenshteinResults expectedResult = new LevenshteinResults(7, 0, 7, 0);
447         assertEquals(expectedResult.hashCode(), actualResult.hashCode());
448 
449         actualResult = classBeingTested.apply("frog", "fog");
450         expectedResult = new LevenshteinResults(1, 0, 1, 0);
451         assertEquals(expectedResult.hashCode(), actualResult.hashCode());
452 
453         actualResult = classBeingTested.apply("elephant", "hippo");
454         expectedResult = new LevenshteinResults(7, 0, 3, 4);
455         assertEquals(expectedResult.hashCode(), actualResult.hashCode());
456     }
457 
458     @Test
459     void testToString() {
460         final LevenshteinDetailedDistance classBeingTested = LevenshteinDetailedDistance.getDefaultInstance();
461         LevenshteinResults actualResult = classBeingTested.apply("fly", "ant");
462         LevenshteinResults expectedResult = new LevenshteinResults(3, 0, 0, 3);
463         assertEquals(expectedResult.toString(), actualResult.toString());
464 
465         actualResult = classBeingTested.apply("hippo", "elephant");
466         expectedResult = new LevenshteinResults(7, 3, 0, 4);
467         assertEquals(expectedResult.toString(), actualResult.toString());
468 
469         actualResult = classBeingTested.apply("", "a");
470         expectedResult = new LevenshteinResults(1, 1, 0, 0);
471         assertEquals(expectedResult.toString(), actualResult.toString());
472     }
473 
474 }