1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertThrows;
22 import static org.junit.jupiter.api.Assertions.assertTrue;
23
24 import java.util.HashSet;
25 import java.util.Locale;
26 import java.util.Set;
27
28 import org.apache.commons.codec.AbstractStringEncoderTest;
29 import org.apache.commons.codec.EncoderException;
30 import org.junit.jupiter.api.AfterAll;
31 import org.junit.jupiter.api.Test;
32 import org.opentest4j.AssertionFailedError;
33
34
35
36
37
38
39
40
41 public class ColognePhoneticTest extends AbstractStringEncoderTest<ColognePhonetic> {
42
43 private static final Set<String> TESTSET = new HashSet<>();
44
45
46
47 private static final String[] MATCHES = {
48 ".*[AEIOUJY].*",
49 ".*H.*",
50 ".*B.*",
51 ".*P[^H].*",
52 ".*[DT][^CSZ].*",
53 ".*[FVW].*",
54 ".*PH.*",
55 ".*[GKQ].*",
56 "C[AHKLOQRUX].*",
57 ".*[^SZ]C[AHKLOQRUX].*",
58 ".*[^CKQ]X.*",
59 ".*L.*",
60 ".*[MN].*",
61 ".*R.*",
62 ".*[SZ].*",
63 ".*[SZ]C.*",
64 "C[^AHKLOQRUX].*",
65 ".+C[^AHKLOQRUX].*",
66 ".*[DT][CSZ].*",
67 ".*[CKQ]X.*",
68
69 };
70
71 @AfterAll
72
73 public static void finishTests() {
74 int errors = 0;
75 for (final String m : MATCHES) {
76 if (!hasTestCase(m)) {
77 System.out.println(m + " has no test case");
78 errors++;
79 }
80 }
81 assertEquals(0, errors, "Not expecting any missing test cases");
82 }
83
84 private static boolean hasTestCase(final String re) {
85 for (final String s : TESTSET) {
86 if (s.matches(re)) {
87 return true;
88 }
89 }
90 return false;
91 }
92
93
94 public static void main(final String args[]) {
95 final ColognePhonetic coder = new ColognePhonetic();
96 for (final String arg : args) {
97 final String code = coder.encode(arg);
98 System.out.println("'" + arg + "' = '" + code + "'");
99 }
100 }
101
102 @Override
103
104 public void checkEncoding(final String expected, final String source) throws EncoderException {
105
106 TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä', 'A').replace('Ö', 'O').replace('Ü', 'U'));
107 super.checkEncoding(expected, source);
108 }
109
110 @Override
111 protected ColognePhonetic createStringEncoder() {
112 return new ColognePhonetic();
113 }
114
115 @Test
116 public void testAabjoe() throws EncoderException {
117 this.checkEncoding("01", "Aabjoe");
118 }
119
120 @Test
121 public void testAaclan() throws EncoderException {
122 this.checkEncoding("0856", "Aaclan");
123 }
124
125
126
127
128
129
130 @Test
131 public void testAychlmajrForCodec122() throws EncoderException {
132 this.checkEncoding("04567", "Aychlmajr");
133 }
134
135 @Test
136
137 public void testCanFail() {
138 assertThrows(AssertionFailedError.class, () -> this.checkEncoding("/", "Fehler"));
139 }
140
141 @Test
142 public void testEdgeCases() throws EncoderException {
143
144 final String[][] data = {
145 { "a", "0" },
146 { "e", "0" },
147 { "i", "0" },
148 { "o", "0" },
149 { "u", "0" },
150 { "\u00E4", "0" },
151 { "\u00F6", "0" },
152 { "\u00FC", "0" },
153 { "\u00DF", "8" },
154 { "aa", "0" },
155 { "ha", "0" },
156 { "h", "" },
157 { "aha", "0" },
158 { "b", "1" },
159 { "p", "1" },
160 { "ph", "3" },
161 { "f", "3" },
162 { "v", "3" },
163 { "w", "3" },
164 { "g", "4" },
165 { "k", "4" },
166 { "q", "4" },
167 { "x", "48" },
168 { "ax", "048" },
169 { "cx", "48" },
170 { "l", "5" },
171 { "cl", "45" },
172 { "acl", "085" },
173 { "mn", "6" },
174 { "{mn}", "6" },
175 { "r", "7" }
176 };
177
178 this.checkEncodings(data);
179 }
180
181 @Test
182 public void testExamples() throws EncoderException {
183
184 final String[][] data = {
185 { "m\u00DCller", "657" },
186 { "m\u00FCller", "657" },
187 { "schmidt", "862" },
188 { "schneider", "8627" },
189 { "fischer", "387" },
190 { "weber", "317" },
191 { "wagner", "3467" },
192 { "becker", "147" },
193 { "hoffmann", "0366" },
194 { "sch\u00C4fer", "837" },
195 { "sch\u00e4fer", "837" },
196 { "Breschnew", "17863" },
197 { "Wikipedia", "3412" },
198 { "peter", "127" },
199 { "pharma", "376" },
200 { "m\u00f6nchengladbach", "664645214" },
201 { "deutsch", "28" },
202 { "deutz", "28" },
203 { "hamburg", "06174" },
204 { "hannover", "0637" },
205 { "christstollen", "478256" },
206 { "Xanthippe", "48621" },
207 { "Zacharias", "8478" },
208 { "Holzbau", "0581" },
209 { "matsch", "68" },
210 { "matz", "68" },
211 { "Arbeitsamt", "071862" },
212 { "Eberhard", "01772" },
213 { "Eberhardt", "01772" },
214 { "Celsius", "8588" },
215 { "Ace", "08" },
216 { "shch", "84" },
217 { "xch", "484" },
218 { "heithabu", "021" }
219 };
220
221 this.checkEncodings(data);
222 }
223
224 @Test
225 public void testHyphen() throws EncoderException {
226 final String[][] data = { { "bergisch-gladbach", "174845214" }, { "M\u00fcller-L\u00fcdenscheidt", "65752682" } };
227 this.checkEncodings(data);
228 }
229
230 @Test
231 public void testIsEncodeEquals() {
232
233 final String[][] data = {
234 { "Muller", "M\u00fcller" },
235 { "Meyer", "Mayr" },
236 { "house", "house" },
237 { "House", "house" },
238 { "Haus", "house" },
239 { "ganz", "Gans" },
240 { "ganz", "G\u00e4nse" },
241 { "Miyagi", "Miyako" }
242 };
243
244 for (final String[] element : data) {
245 final boolean encodeEqual = this.getStringEncoder().isEncodeEqual(element[1], element[0]);
246 assertTrue(encodeEqual, element[1] + " != " + element[0]);
247 }
248 }
249
250 @Test
251 public void testSpecialCharsBetweenSameLetters() throws EncoderException {
252 final String[] data = { "Test test", "Testtest", "Test-test", "TesT#Test", "TesT?test" };
253 this.checkEncodingVariations("28282", data);
254 }
255
256 @Test
257 public void testVariationsMella() throws EncoderException {
258 final String[] data = { "mella", "milah", "moulla", "mellah", "muehle", "mule" };
259 this.checkEncodingVariations("65", data);
260 }
261
262 @Test
263 public void testVariationsMeyer() throws EncoderException {
264 final String[] data = { "Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major" };
265 this.checkEncodingVariations("67", data);
266 }
267 }