1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.codec.language;
21
22 import static org.junit.jupiter.api.Assertions.assertEquals;
23 import static org.junit.jupiter.api.Assertions.assertNull;
24 import static org.junit.jupiter.api.Assertions.assertThrows;
25
26 import org.apache.commons.codec.AbstractStringEncoderTest;
27 import org.apache.commons.codec.EncoderException;
28 import org.junit.jupiter.api.Test;
29
30
31
32
33
34
35 class SoundexTest extends AbstractStringEncoderTest<Soundex> {
36
37 @Override
38 protected Soundex createStringEncoder() {
39 return new Soundex();
40 }
41
42 @Test
43 void testB650() throws EncoderException {
44
45 checkEncodingVariations("B650",
46 "BARHAM",
47 "BARONE",
48 "BARRON",
49 "BERNA",
50 "BIRNEY",
51 "BIRNIE",
52 "BOOROM",
53 "BOREN",
54 "BORN",
55 "BOURN",
56 "BOURNE",
57 "BOWRON",
58 "BRAIN",
59 "BRAME",
60 "BRANN",
61 "BRAUN",
62 "BREEN",
63 "BRIEN",
64 "BRIM",
65 "BRIMM",
66 "BRINN",
67 "BRION",
68 "BROOM",
69 "BROOME",
70 "BROWN",
71 "BROWNE",
72 "BRUEN",
73 "BRUHN",
74 "BRUIN",
75 "BRUMM",
76 "BRUN",
77 "BRUNO",
78 "BRYAN",
79 "BURIAN",
80 "BURN",
81 "BURNEY",
82 "BYRAM",
83 "BYRNE",
84 "BYRON",
85 "BYRUM");
86
87 }
88
89 @Test
90 void testBadCharacters() {
91 assertEquals("H452", getStringEncoder().encode("HOL>MES"));
92 }
93
94 @Test
95 void testDifference() throws EncoderException {
96
97 assertEquals(0, getStringEncoder().difference(null, null));
98 assertEquals(0, getStringEncoder().difference("", ""));
99 assertEquals(0, getStringEncoder().difference(" ", " "));
100
101 assertEquals(4, getStringEncoder().difference("Smith", "Smythe"));
102 assertEquals(2, getStringEncoder().difference("Ann", "Andrew"));
103 assertEquals(1, getStringEncoder().difference("Margaret", "Andrew"));
104 assertEquals(0, getStringEncoder().difference("Janet", "Margaret"));
105
106 assertEquals(4, getStringEncoder().difference("Green", "Greene"));
107 assertEquals(0, getStringEncoder().difference("Blotchet-Halls", "Greene"));
108
109 assertEquals(4, getStringEncoder().difference("Smith", "Smythe"));
110 assertEquals(4, getStringEncoder().difference("Smithers", "Smythers"));
111 assertEquals(2, getStringEncoder().difference("Anothers", "Brothers"));
112 }
113
114 @Test
115 void testEncodeBasic() {
116 assertEquals("T235", getStringEncoder().encode("testing"));
117 assertEquals("T000", getStringEncoder().encode("The"));
118 assertEquals("Q200", getStringEncoder().encode("quick"));
119 assertEquals("B650", getStringEncoder().encode("brown"));
120 assertEquals("F200", getStringEncoder().encode("fox"));
121 assertEquals("J513", getStringEncoder().encode("jumped"));
122 assertEquals("O160", getStringEncoder().encode("over"));
123 assertEquals("T000", getStringEncoder().encode("the"));
124 assertEquals("L200", getStringEncoder().encode("lazy"));
125 assertEquals("D200", getStringEncoder().encode("dogs"));
126 }
127
128
129
130
131 @Test
132 void testEncodeBatch2() {
133 assertEquals("A462", getStringEncoder().encode("Allricht"));
134 assertEquals("E166", getStringEncoder().encode("Eberhard"));
135 assertEquals("E521", getStringEncoder().encode("Engebrethson"));
136 assertEquals("H512", getStringEncoder().encode("Heimbach"));
137 assertEquals("H524", getStringEncoder().encode("Hanselmann"));
138 assertEquals("H431", getStringEncoder().encode("Hildebrand"));
139 assertEquals("K152", getStringEncoder().encode("Kavanagh"));
140 assertEquals("L530", getStringEncoder().encode("Lind"));
141 assertEquals("L222", getStringEncoder().encode("Lukaschowsky"));
142 assertEquals("M235", getStringEncoder().encode("McDonnell"));
143 assertEquals("M200", getStringEncoder().encode("McGee"));
144 assertEquals("O155", getStringEncoder().encode("Opnian"));
145 assertEquals("O155", getStringEncoder().encode("Oppenheimer"));
146 assertEquals("R355", getStringEncoder().encode("Riedemanas"));
147 assertEquals("Z300", getStringEncoder().encode("Zita"));
148 assertEquals("Z325", getStringEncoder().encode("Zitzmeinn"));
149 }
150
151
152
153
154 @Test
155 void testEncodeBatch3() {
156 assertEquals("W252", getStringEncoder().encode("Washington"));
157 assertEquals("L000", getStringEncoder().encode("Lee"));
158 assertEquals("G362", getStringEncoder().encode("Gutierrez"));
159 assertEquals("P236", getStringEncoder().encode("Pfister"));
160 assertEquals("J250", getStringEncoder().encode("Jackson"));
161 assertEquals("T522", getStringEncoder().encode("Tymczak"));
162
163
164 assertEquals("V532", getStringEncoder().encode("VanDeusen"));
165 }
166
167
168
169
170 @Test
171 void testEncodeBatch4() {
172 assertEquals("H452", getStringEncoder().encode("HOLMES"));
173 assertEquals("A355", getStringEncoder().encode("ADOMOMI"));
174 assertEquals("V536", getStringEncoder().encode("VONDERLEHR"));
175 assertEquals("B400", getStringEncoder().encode("BALL"));
176 assertEquals("S000", getStringEncoder().encode("SHAW"));
177 assertEquals("J250", getStringEncoder().encode("JACKSON"));
178 assertEquals("S545", getStringEncoder().encode("SCANLON"));
179 assertEquals("S532", getStringEncoder().encode("SAINTJOHN"));
180
181 }
182
183 @Test
184 void testEncodeIgnoreApostrophes() throws EncoderException {
185
186 checkEncodingVariations("O165",
187 "OBrien",
188 "'OBrien",
189 "O'Brien",
190 "OB'rien",
191 "OBr'ien",
192 "OBri'en",
193 "OBrie'n",
194 "OBrien'");
195
196 }
197
198
199
200
201
202 @Test
203 void testEncodeIgnoreHyphens() throws EncoderException {
204
205 checkEncodingVariations("K525",
206 "KINGSMITH",
207 "-KINGSMITH",
208 "K-INGSMITH",
209 "KI-NGSMITH",
210 "KIN-GSMITH",
211 "KING-SMITH",
212 "KINGS-MITH",
213 "KINGSM-ITH",
214 "KINGSMI-TH",
215 "KINGSMIT-H",
216 "KINGSMITH-");
217
218 }
219
220 @Test
221 void testEncodeIgnoreTrimmable() {
222 assertEquals("W252", getStringEncoder().encode(" \t\n\r Washington \t\n\r "));
223 }
224
225 @Test
226
227 void testGenealogy() {
228 final Soundex s = Soundex.US_ENGLISH_GENEALOGY;
229 assertEquals("H251", s.encode("Heggenburger"));
230 assertEquals("B425", s.encode("Blackman"));
231 assertEquals("S530", s.encode("Schmidt"));
232 assertEquals("L150", s.encode("Lippmann"));
233
234 assertEquals("D200", s.encode("Dodds"));
235 assertEquals("D200", s.encode("Dhdds"));
236 assertEquals("D200", s.encode("Dwdds"));
237 }
238
239
240
241
242 @Test
243 void testHWRuleEx1() {
244
245
246
247
248 assertEquals("A261", getStringEncoder().encode("Ashcraft"));
249 assertEquals("A261", getStringEncoder().encode("Ashcroft"));
250 assertEquals("Y330", getStringEncoder().encode("yehudit"));
251 assertEquals("Y330", getStringEncoder().encode("yhwdyt"));
252 }
253
254
255
256
257
258
259 @Test
260 void testHWRuleEx2() {
261 assertEquals("B312", getStringEncoder().encode("BOOTHDAVIS"));
262 assertEquals("B312", getStringEncoder().encode("BOOTH-DAVIS"));
263 }
264
265
266
267
268
269 @Test
270 void testHWRuleEx3() throws EncoderException {
271 assertEquals("S460", getStringEncoder().encode("Sgler"));
272 assertEquals("S460", getStringEncoder().encode("Swhgler"));
273
274
275 checkEncodingVariations("S460",
276 "SAILOR",
277 "SALYER",
278 "SAYLOR",
279 "SCHALLER",
280 "SCHELLER",
281 "SCHILLER",
282 "SCHOOLER",
283 "SCHULER",
284 "SCHUYLER",
285 "SEILER",
286 "SEYLER",
287 "SHOLAR",
288 "SHULER",
289 "SILAR",
290 "SILER",
291 "SILLER");
292
293 }
294
295
296
297
298
299 @Test
300 void testMsSqlServer1() {
301 assertEquals("S530", getStringEncoder().encode("Smith"));
302 assertEquals("S530", getStringEncoder().encode("Smythe"));
303 }
304
305
306
307
308
309
310
311 @Test
312 void testMsSqlServer2() throws EncoderException {
313 checkEncodingVariations("E625", "Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen");
314 }
315
316
317
318
319 @Test
320 void testMsSqlServer3() {
321 assertEquals("A500", getStringEncoder().encode("Ann"));
322 assertEquals("A536", getStringEncoder().encode("Andrew"));
323 assertEquals("J530", getStringEncoder().encode("Janet"));
324 assertEquals("M626", getStringEncoder().encode("Margaret"));
325 assertEquals("S315", getStringEncoder().encode("Steven"));
326 assertEquals("M240", getStringEncoder().encode("Michael"));
327 assertEquals("R163", getStringEncoder().encode("Robert"));
328 assertEquals("L600", getStringEncoder().encode("Laura"));
329 assertEquals("A500", getStringEncoder().encode("Anne"));
330 }
331
332
333
334
335 @Test
336 void testNewInstance() {
337 assertEquals("W452", new Soundex().soundex("Williams"));
338 }
339
340 @Test
341 void testNewInstance2() {
342 assertEquals("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING.toCharArray()).soundex("Williams"));
343 }
344
345 @Test
346 void testNewInstance3() {
347 assertEquals("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING).soundex("Williams"));
348 }
349
350 @Test
351
352 void testSimplifiedSoundex() {
353 final Soundex s = Soundex.US_ENGLISH_SIMPLIFIED;
354 assertEquals("W452", s.encode("WILLIAMS"));
355 assertEquals("B625", s.encode("BARAGWANATH"));
356 assertEquals("D540", s.encode("DONNELL"));
357 assertEquals("L300", s.encode("LLOYD"));
358 assertEquals("W422", s.encode("WOOLCOCK"));
359
360 assertEquals("D320", s.encode("Dodds"));
361 assertEquals("D320", s.encode("Dwdds"));
362 assertEquals("D320", s.encode("Dhdds"));
363 }
364
365 @Test
366 void testSoundexUtilsConstructable() {
367 new SoundexUtils();
368 }
369
370 @Test
371 void testSoundexUtilsNullBehaviour() {
372 assertNull(SoundexUtils.clean(null));
373 assertEquals("", SoundexUtils.clean(""));
374 assertEquals(0, SoundexUtils.differenceEncoded(null, ""));
375 assertEquals(0, SoundexUtils.differenceEncoded("", null));
376 }
377
378
379
380
381 @Test
382 void testUsEnglishStatic() {
383 assertEquals("W452", Soundex.US_ENGLISH.soundex("Williams"));
384 }
385
386
387
388
389
390
391 @Test
392 void testUsMappingEWithAcute() {
393 assertEquals("E000", getStringEncoder().encode("e"));
394 if (Character.isLetter('\u00e9')) {
395
396 assertThrows(IllegalArgumentException.class, () -> getStringEncoder().encode("\u00e9"));
397 } else {
398 assertEquals("", getStringEncoder().encode("\u00e9"));
399 }
400 }
401
402
403
404
405
406
407 @Test
408 void testUsMappingOWithDiaeresis() {
409 assertEquals("O000", getStringEncoder().encode("o"));
410 if (Character.isLetter('\u00f6')) {
411
412 assertThrows(IllegalArgumentException.class, () -> getStringEncoder().encode("\u00f6"));
413 } else {
414 assertEquals("", getStringEncoder().encode("\u00f6"));
415 }
416 }
417
418
419
420
421 @Test
422 void testWikipediaAmericanSoundex() {
423 assertEquals("R163", getStringEncoder().encode("Robert"));
424 assertEquals("R163", getStringEncoder().encode("Rupert"));
425 assertEquals("A261", getStringEncoder().encode("Ashcraft"));
426 assertEquals("A261", getStringEncoder().encode("Ashcroft"));
427 assertEquals("T522", getStringEncoder().encode("Tymczak"));
428 assertEquals("P236", getStringEncoder().encode("Pfister"));
429 }
430 }