View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.text.similarity;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertThrows;
22  
23  import org.junit.jupiter.api.BeforeAll;
24  import org.junit.jupiter.api.Test;
25  import org.junit.jupiter.params.ParameterizedTest;
26  import org.junit.jupiter.params.provider.MethodSource;
27  
28  /**
29   * Tests {@link JaroWinklerSimilarity}.
30   */
31  class JaroWinklerSimilarityTest {
32  
33      private static JaroWinklerSimilarity similarity;
34  
35      @BeforeAll
36      public static void setUp() {
37          similarity = new JaroWinklerSimilarity();
38      }
39  
40      /**
41       * Wraps the string in a custom {@link CharSequence}. This ensures that using the {@link Object#equals(Object)} method on the input CharSequence to test for
42       * equality will fail.
43       *
44       * @param string the string
45       * @return the char sequence
46       */
47      private static CharSequence wrap(final String string) {
48          return new CharSequence() {
49  
50              @Override
51              public char charAt(final int index) {
52                  return string.charAt(index);
53              }
54  
55              @Override
56              public boolean equals(final Object obj) {
57                  return string.equals(obj);
58              }
59  
60              @Override
61              public int hashCode() {
62                  return string.hashCode();
63              }
64  
65              @Override
66              public int length() {
67                  return string.length();
68              }
69  
70              @Override
71              public CharSequence subSequence(final int start, final int end) {
72                  return string.subSequence(start, end);
73              }
74  
75              @Override
76              public String toString() {
77                  return string;
78              }
79          };
80      }
81  
82      @Test
83      void testApply_NullSimilarityInput() {
84          assertThrows(IllegalArgumentException.class, () -> similarity.apply(null, new SimilarityCharacterInput("a")));
85      }
86  
87      @Test
88      void testApply_SimilarityInputNull() {
89          assertThrows(IllegalArgumentException.class, () -> similarity.apply(new SimilarityCharacterInput("a"), null));
90      }
91  
92      @ParameterizedTest
93      @MethodSource("org.apache.commons.text.similarity.SimilarityInputTest#similarityInputsEquals()")
94      void testGetJaroWinklerSimilarity(final Class<?> cls) {
95          assertEquals(1d, similarity.apply(SimilarityInputTest.build(cls, ""), SimilarityInputTest.build(cls, "")), 0.00001d);
96          assertEquals(1d, similarity.apply(SimilarityInputTest.build(cls, "foo"), SimilarityInputTest.build(cls, "foo")), 0.00001d);
97          assertEquals(0.94166d, similarity.apply(SimilarityInputTest.build(cls, "foo"), SimilarityInputTest.build(cls, "foo ")), 0.00001d);
98          assertEquals(0.90666d, similarity.apply(SimilarityInputTest.build(cls, "foo"), SimilarityInputTest.build(cls, "foo  ")), 0.00001d);
99          assertEquals(0.86666d, similarity.apply(SimilarityInputTest.build(cls, "foo"), SimilarityInputTest.build(cls, " foo ")), 0.00001d);
100         assertEquals(0.51111d, similarity.apply(SimilarityInputTest.build(cls, "foo"), SimilarityInputTest.build(cls, "  foo")), 0.00001d);
101         assertEquals(0.92499d, similarity.apply(SimilarityInputTest.build(cls, "frog"), SimilarityInputTest.build(cls, "fog")), 0.00001d);
102         assertEquals(0.0d, similarity.apply(SimilarityInputTest.build(cls, "fly"), SimilarityInputTest.build(cls, "ant")), 0.00000000000000000001d);
103         assertEquals(0.44166d, similarity.apply(SimilarityInputTest.build(cls, "elephant"), SimilarityInputTest.build(cls, "hippo")), 0.00001d);
104         assertEquals(0.90666d, similarity.apply(SimilarityInputTest.build(cls, "ABC Corporation"), SimilarityInputTest.build(cls, "ABC Corp")), 0.00001d);
105         assertEquals(0.95251d,
106                 similarity.apply(SimilarityInputTest.build(cls, "D N H Enterprises Inc"), SimilarityInputTest.build(cls, "D & H Enterprises, Inc.")), 0.00001d);
107         assertEquals(0.942d, similarity.apply(SimilarityInputTest.build(cls, "My Gym Children's Fitness Center"),
108                 SimilarityInputTest.build(cls, "My Gym. Childrens Fitness")), 0.00001d);
109         assertEquals(0.898018d, similarity.apply(SimilarityInputTest.build(cls, "PENNSYLVANIA"), SimilarityInputTest.build(cls, "PENNCISYLVNIA")), 0.00001d);
110         assertEquals(0.971428d, similarity.apply(SimilarityInputTest.build(cls, "/opt/software1"), SimilarityInputTest.build(cls, "/opt/software2")), 0.00001d);
111         assertEquals(0.941666d, similarity.apply(SimilarityInputTest.build(cls, "aaabcd"), SimilarityInputTest.build(cls, "aaacdb")), 0.00001d);
112         assertEquals(0.911111d, similarity.apply(SimilarityInputTest.build(cls, "John Horn"), SimilarityInputTest.build(cls, "John Hopkins")), 0.00001d);
113     }
114 
115     @Test
116     void testGetJaroWinklerSimilarity_NullNull() {
117         assertThrows(IllegalArgumentException.class, () -> similarity.apply((String) null, null));
118     }
119 
120     @Test
121     void testGetJaroWinklerSimilarity_NullString() {
122         assertThrows(IllegalArgumentException.class, () -> similarity.apply(null, "clear"));
123     }
124 
125     @Test
126     void testGetJaroWinklerSimilarity_StringNull() {
127         assertThrows(IllegalArgumentException.class, () -> similarity.apply(" ", null));
128     }
129 
130     @Test
131     void testGetJaroWinklerSimilarity_StringString() {
132         assertEquals(1d, similarity.apply(wrap(""), ""), 0.00001d);
133         assertEquals(1d, similarity.apply(wrap("foo"), "foo"), 0.00001d);
134         assertEquals(0.94166d, similarity.apply(wrap("foo"), "foo "), 0.00001d);
135         assertEquals(0.90666d, similarity.apply(wrap("foo"), "foo  "), 0.00001d);
136         assertEquals(0.86666d, similarity.apply(wrap("foo"), " foo "), 0.00001d);
137         assertEquals(0.51111d, similarity.apply(wrap("foo"), "  foo"), 0.00001d);
138         assertEquals(0.92499d, similarity.apply(wrap("frog"), "fog"), 0.00001d);
139         assertEquals(0.0d, similarity.apply(wrap("fly"), "ant"), 0.00000000000000000001d);
140         assertEquals(0.44166d, similarity.apply(wrap("elephant"), "hippo"), 0.00001d);
141         assertEquals(0.90666d, similarity.apply(wrap("ABC Corporation"), "ABC Corp"), 0.00001d);
142         assertEquals(0.95251d, similarity.apply(wrap("D N H Enterprises Inc"), "D & H Enterprises, Inc."), 0.00001d);
143         assertEquals(0.942d, similarity.apply(wrap("My Gym Children's Fitness Center"), "My Gym. Childrens Fitness"), 0.00001d);
144         assertEquals(0.898018d, similarity.apply(wrap("PENNSYLVANIA"), "PENNCISYLVNIA"), 0.00001d);
145         assertEquals(0.971428d, similarity.apply(wrap("/opt/software1"), "/opt/software2"), 0.00001d);
146         assertEquals(0.941666d, similarity.apply(wrap("aaabcd"), "aaacdb"), 0.00001d);
147         assertEquals(0.911111d, similarity.apply(wrap("John Horn"), "John Hopkins"), 0.00001d);
148     }
149 }