1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.commons.codec.language;
21
22 import static org.junit.jupiter.api.Assertions.assertEquals;
23 import static org.junit.jupiter.api.Assertions.assertNull;
24 import static org.junit.jupiter.api.Assertions.assertThrows;
25
26 import org.apache.commons.codec.AbstractStringEncoderTest;
27 import org.apache.commons.codec.EncoderException;
28 import org.junit.jupiter.api.Test;
29
30
31
32
33
34
35 public class SoundexTest extends AbstractStringEncoderTest<Soundex> {
36
37 @Override
38 protected Soundex createStringEncoder() {
39 return new Soundex();
40 }
41
42 @Test
43 public void testB650() throws EncoderException {
44 this.checkEncodingVariations("B650", new String[]{
45 "BARHAM",
46 "BARONE",
47 "BARRON",
48 "BERNA",
49 "BIRNEY",
50 "BIRNIE",
51 "BOOROM",
52 "BOREN",
53 "BORN",
54 "BOURN",
55 "BOURNE",
56 "BOWRON",
57 "BRAIN",
58 "BRAME",
59 "BRANN",
60 "BRAUN",
61 "BREEN",
62 "BRIEN",
63 "BRIM",
64 "BRIMM",
65 "BRINN",
66 "BRION",
67 "BROOM",
68 "BROOME",
69 "BROWN",
70 "BROWNE",
71 "BRUEN",
72 "BRUHN",
73 "BRUIN",
74 "BRUMM",
75 "BRUN",
76 "BRUNO",
77 "BRYAN",
78 "BURIAN",
79 "BURN",
80 "BURNEY",
81 "BYRAM",
82 "BYRNE",
83 "BYRON",
84 "BYRUM"});
85 }
86
87 @Test
88 public void testBadCharacters() {
89 assertEquals("H452", this.getStringEncoder().encode("HOL>MES"));
90
91 }
92
93 @Test
94 public void testDifference() throws EncoderException {
95
96 assertEquals(0, this.getStringEncoder().difference(null, null));
97 assertEquals(0, this.getStringEncoder().difference("", ""));
98 assertEquals(0, this.getStringEncoder().difference(" ", " "));
99
100 assertEquals(4, this.getStringEncoder().difference("Smith", "Smythe"));
101 assertEquals(2, this.getStringEncoder().difference("Ann", "Andrew"));
102 assertEquals(1, this.getStringEncoder().difference("Margaret", "Andrew"));
103 assertEquals(0, this.getStringEncoder().difference("Janet", "Margaret"));
104
105 assertEquals(4, this.getStringEncoder().difference("Green", "Greene"));
106 assertEquals(0, this.getStringEncoder().difference("Blotchet-Halls", "Greene"));
107
108 assertEquals(4, this.getStringEncoder().difference("Smith", "Smythe"));
109 assertEquals(4, this.getStringEncoder().difference("Smithers", "Smythers"));
110 assertEquals(2, this.getStringEncoder().difference("Anothers", "Brothers"));
111 }
112
113 @Test
114 public void testEncodeBasic() {
115 assertEquals("T235", this.getStringEncoder().encode("testing"));
116 assertEquals("T000", this.getStringEncoder().encode("The"));
117 assertEquals("Q200", this.getStringEncoder().encode("quick"));
118 assertEquals("B650", this.getStringEncoder().encode("brown"));
119 assertEquals("F200", this.getStringEncoder().encode("fox"));
120 assertEquals("J513", this.getStringEncoder().encode("jumped"));
121 assertEquals("O160", this.getStringEncoder().encode("over"));
122 assertEquals("T000", this.getStringEncoder().encode("the"));
123 assertEquals("L200", this.getStringEncoder().encode("lazy"));
124 assertEquals("D200", this.getStringEncoder().encode("dogs"));
125 }
126
127
128
129
130 @Test
131 public void testEncodeBatch2() {
132 assertEquals("A462", this.getStringEncoder().encode("Allricht"));
133 assertEquals("E166", this.getStringEncoder().encode("Eberhard"));
134 assertEquals("E521", this.getStringEncoder().encode("Engebrethson"));
135 assertEquals("H512", this.getStringEncoder().encode("Heimbach"));
136 assertEquals("H524", this.getStringEncoder().encode("Hanselmann"));
137 assertEquals("H431", this.getStringEncoder().encode("Hildebrand"));
138 assertEquals("K152", this.getStringEncoder().encode("Kavanagh"));
139 assertEquals("L530", this.getStringEncoder().encode("Lind"));
140 assertEquals("L222", this.getStringEncoder().encode("Lukaschowsky"));
141 assertEquals("M235", this.getStringEncoder().encode("McDonnell"));
142 assertEquals("M200", this.getStringEncoder().encode("McGee"));
143 assertEquals("O155", this.getStringEncoder().encode("Opnian"));
144 assertEquals("O155", this.getStringEncoder().encode("Oppenheimer"));
145 assertEquals("R355", this.getStringEncoder().encode("Riedemanas"));
146 assertEquals("Z300", this.getStringEncoder().encode("Zita"));
147 assertEquals("Z325", this.getStringEncoder().encode("Zitzmeinn"));
148 }
149
150
151
152
153 @Test
154 public void testEncodeBatch3() {
155 assertEquals("W252", this.getStringEncoder().encode("Washington"));
156 assertEquals("L000", this.getStringEncoder().encode("Lee"));
157 assertEquals("G362", this.getStringEncoder().encode("Gutierrez"));
158 assertEquals("P236", this.getStringEncoder().encode("Pfister"));
159 assertEquals("J250", this.getStringEncoder().encode("Jackson"));
160 assertEquals("T522", this.getStringEncoder().encode("Tymczak"));
161
162
163 assertEquals("V532", this.getStringEncoder().encode("VanDeusen"));
164 }
165
166
167
168
169 @Test
170 public void testEncodeBatch4() {
171 assertEquals("H452", this.getStringEncoder().encode("HOLMES"));
172 assertEquals("A355", this.getStringEncoder().encode("ADOMOMI"));
173 assertEquals("V536", this.getStringEncoder().encode("VONDERLEHR"));
174 assertEquals("B400", this.getStringEncoder().encode("BALL"));
175 assertEquals("S000", this.getStringEncoder().encode("SHAW"));
176 assertEquals("J250", this.getStringEncoder().encode("JACKSON"));
177 assertEquals("S545", this.getStringEncoder().encode("SCANLON"));
178 assertEquals("S532", this.getStringEncoder().encode("SAINTJOHN"));
179
180 }
181
182 @Test
183 public void testEncodeIgnoreApostrophes() throws EncoderException {
184 this.checkEncodingVariations("O165", new String[]{
185 "OBrien",
186 "'OBrien",
187 "O'Brien",
188 "OB'rien",
189 "OBr'ien",
190 "OBri'en",
191 "OBrie'n",
192 "OBrien'"});
193 }
194
195
196
197
198
199 @Test
200 public void testEncodeIgnoreHyphens() throws EncoderException {
201 this.checkEncodingVariations("K525", new String[]{
202 "KINGSMITH",
203 "-KINGSMITH",
204 "K-INGSMITH",
205 "KI-NGSMITH",
206 "KIN-GSMITH",
207 "KING-SMITH",
208 "KINGS-MITH",
209 "KINGSM-ITH",
210 "KINGSMI-TH",
211 "KINGSMIT-H",
212 "KINGSMITH-"});
213 }
214
215 @Test
216 public void testEncodeIgnoreTrimmable() {
217 assertEquals("W252", this.getStringEncoder().encode(" \t\n\r Washington \t\n\r "));
218 }
219
220 @Test
221
222 public void testGenealogy() {
223 final Soundex s = Soundex.US_ENGLISH_GENEALOGY;
224 assertEquals("H251", s.encode("Heggenburger"));
225 assertEquals("B425", s.encode("Blackman"));
226 assertEquals("S530", s.encode("Schmidt"));
227 assertEquals("L150", s.encode("Lippmann"));
228
229 assertEquals("D200", s.encode("Dodds"));
230 assertEquals("D200", s.encode("Dhdds"));
231 assertEquals("D200", s.encode("Dwdds"));
232 }
233
234
235
236
237 @Test
238 public void testHWRuleEx1() {
239
240
241
242
243 assertEquals("A261", this.getStringEncoder().encode("Ashcraft"));
244 assertEquals("A261", this.getStringEncoder().encode("Ashcroft"));
245 assertEquals("Y330", this.getStringEncoder().encode("yehudit"));
246 assertEquals("Y330", this.getStringEncoder().encode("yhwdyt"));
247 }
248
249
250
251
252
253
254 @Test
255 public void testHWRuleEx2() {
256 assertEquals("B312", this.getStringEncoder().encode("BOOTHDAVIS"));
257 assertEquals("B312", this.getStringEncoder().encode("BOOTH-DAVIS"));
258 }
259
260
261
262
263
264 @Test
265 public void testHWRuleEx3() throws EncoderException {
266 assertEquals("S460", this.getStringEncoder().encode("Sgler"));
267 assertEquals("S460", this.getStringEncoder().encode("Swhgler"));
268
269 this.checkEncodingVariations("S460", new String[]{
270 "SAILOR",
271 "SALYER",
272 "SAYLOR",
273 "SCHALLER",
274 "SCHELLER",
275 "SCHILLER",
276 "SCHOOLER",
277 "SCHULER",
278 "SCHUYLER",
279 "SEILER",
280 "SEYLER",
281 "SHOLAR",
282 "SHULER",
283 "SILAR",
284 "SILER",
285 "SILLER"});
286 }
287
288
289
290
291
292 @Test
293 public void testMsSqlServer1() {
294 assertEquals("S530", this.getStringEncoder().encode("Smith"));
295 assertEquals("S530", this.getStringEncoder().encode("Smythe"));
296 }
297
298
299
300
301
302
303
304 @Test
305 public void testMsSqlServer2() throws EncoderException {
306 this.checkEncodingVariations("E625", new String[]{"Erickson", "Erickson", "Erikson", "Ericson", "Ericksen", "Ericsen"});
307 }
308
309
310
311
312 @Test
313 public void testMsSqlServer3() {
314 assertEquals("A500", this.getStringEncoder().encode("Ann"));
315 assertEquals("A536", this.getStringEncoder().encode("Andrew"));
316 assertEquals("J530", this.getStringEncoder().encode("Janet"));
317 assertEquals("M626", this.getStringEncoder().encode("Margaret"));
318 assertEquals("S315", this.getStringEncoder().encode("Steven"));
319 assertEquals("M240", this.getStringEncoder().encode("Michael"));
320 assertEquals("R163", this.getStringEncoder().encode("Robert"));
321 assertEquals("L600", this.getStringEncoder().encode("Laura"));
322 assertEquals("A500", this.getStringEncoder().encode("Anne"));
323 }
324
325
326
327
328 @Test
329 public void testNewInstance() {
330 assertEquals("W452", new Soundex().soundex("Williams"));
331 }
332
333 @Test
334 public void testNewInstance2() {
335 assertEquals("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING.toCharArray()).soundex("Williams"));
336 }
337
338 @Test
339 public void testNewInstance3() {
340 assertEquals("W452", new Soundex(Soundex.US_ENGLISH_MAPPING_STRING).soundex("Williams"));
341 }
342
343 @Test
344
345 public void testSimplifiedSoundex() {
346 final Soundex s = Soundex.US_ENGLISH_SIMPLIFIED;
347 assertEquals("W452", s.encode("WILLIAMS"));
348 assertEquals("B625", s.encode("BARAGWANATH"));
349 assertEquals("D540", s.encode("DONNELL"));
350 assertEquals("L300", s.encode("LLOYD"));
351 assertEquals("W422", s.encode("WOOLCOCK"));
352
353 assertEquals("D320", s.encode("Dodds"));
354 assertEquals("D320", s.encode("Dwdds"));
355 assertEquals("D320", s.encode("Dhdds"));
356 }
357
358 @Test
359 public void testSoundexUtilsConstructable() {
360 new SoundexUtils();
361 }
362
363 @Test
364 public void testSoundexUtilsNullBehaviour() {
365 assertNull(SoundexUtils.clean(null));
366 assertEquals("", SoundexUtils.clean(""));
367 assertEquals(0, SoundexUtils.differenceEncoded(null, ""));
368 assertEquals(0, SoundexUtils.differenceEncoded("", null));
369 }
370
371
372
373
374 @Test
375 public void testUsEnglishStatic() {
376 assertEquals("W452", Soundex.US_ENGLISH.soundex("Williams"));
377 }
378
379
380
381
382
383
384 @Test
385 public void testUsMappingEWithAcute() {
386 assertEquals("E000", this.getStringEncoder().encode("e"));
387 if (Character.isLetter('\u00e9')) {
388
389 assertThrows(IllegalArgumentException.class, () -> getStringEncoder().encode("\u00e9"));
390 } else {
391 assertEquals("", this.getStringEncoder().encode("\u00e9"));
392 }
393 }
394
395
396
397
398
399
400 @Test
401 public void testUsMappingOWithDiaeresis() {
402 assertEquals("O000", this.getStringEncoder().encode("o"));
403 if (Character.isLetter('\u00f6')) {
404
405 assertThrows(IllegalArgumentException.class, () -> getStringEncoder().encode("\u00f6"));
406 } else {
407 assertEquals("", this.getStringEncoder().encode("\u00f6"));
408 }
409 }
410
411
412
413
414 @Test
415 public void testWikipediaAmericanSoundex() {
416 assertEquals("R163", this.getStringEncoder().encode("Robert"));
417 assertEquals("R163", this.getStringEncoder().encode("Rupert"));
418 assertEquals("A261", this.getStringEncoder().encode("Ashcraft"));
419 assertEquals("A261", this.getStringEncoder().encode("Ashcroft"));
420 assertEquals("T522", this.getStringEncoder().encode("Tymczak"));
421 assertEquals("P236", this.getStringEncoder().encode("Pfister"));
422 }
423 }