1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21
22 import java.util.stream.IntStream;
23
24 import org.apache.commons.codec.AbstractStringEncoderTest;
25 import org.apache.commons.codec.EncoderException;
26 import org.junit.jupiter.api.Test;
27 import org.junit.jupiter.params.ParameterizedTest;
28 import org.junit.jupiter.params.provider.MethodSource;
29
30
31
32
33
34
35
36 class DaitchMokotoffSoundexTest extends AbstractStringEncoderTest<DaitchMokotoffSoundex> {
37
38 static IntStream getNonLetters() {
39 return IntStream.rangeClosed(Character.MIN_VALUE, Character.MAX_VALUE).filter(c -> !Character.isLetter(c));
40 }
41
42 @Override
43 protected DaitchMokotoffSoundex createStringEncoder() {
44 return new DaitchMokotoffSoundex();
45 }
46
47 private String encode(final String source) {
48 return getStringEncoder().encode(source);
49 }
50
51 private String soundex(final String source) {
52 return getStringEncoder().soundex(source);
53 }
54
55 @Test
56 void testAccentedCharacterFolding() {
57 assertEquals("294795", soundex("Straßburg"));
58 assertEquals("294795", soundex("Strasburg"));
59 assertEquals("095600", soundex("Éregon"));
60 assertEquals("095600", soundex("Eregon"));
61 }
62
63 @Test
64 void testAdjacentCodes() {
65
66
67
68
69 assertEquals("054800", soundex("AKSSOL"));
70
71
72
73
74 assertEquals("547830|545783|594783|594578", soundex("GERSCHFELD"));
75 }
76
77 @Test
78 void testEncodeBasic() {
79
80 assertEquals("097400", encode("AUERBACH"));
81 assertEquals("097400", encode("OHRBACH"));
82 assertEquals("874400", encode("LIPSHITZ"));
83 assertEquals("874400", encode("LIPPSZYC"));
84 assertEquals("876450", encode("LEWINSKY"));
85 assertEquals("876450", encode("LEVINSKI"));
86 assertEquals("486740", encode("SZLAMAWICZ"));
87 assertEquals("486740", encode("SHLAMOVITZ"));
88 }
89
90 @Test
91 void testEncodeIgnoreApostrophes() throws EncoderException {
92 checkEncodingVariations("079600", "OBrien", "'OBrien", "O'Brien", "OB'rien", "OBr'ien", "OBri'en", "OBrie'n", "OBrien'");
93 }
94
95
96
97
98
99
100 @Test
101 void testEncodeIgnoreHyphens() throws EncoderException {
102 checkEncodingVariations("565463", "KINGSMITH", "-KINGSMITH", "K-INGSMITH", "KI-NGSMITH", "KIN-GSMITH", "KING-SMITH", "KINGS-MITH", "KINGSM-ITH",
103 "KINGSMI-TH", "KINGSMIT-H", "KINGSMITH-");
104 }
105
106 @ParameterizedTest
107 @MethodSource("getNonLetters")
108 void testEncodeIgnoreNonLetters(final int nonLetterInt) throws EncoderException {
109 final char nonLetterChar = (char) nonLetterInt;
110 checkEncodingVariations("746536", "Washington" + nonLetterChar, nonLetterChar + "Washington", nonLetterChar + "Washington" + nonLetterChar,
111 "Washi" + nonLetterChar + "ngton");
112 }
113
114 @Test
115 void testEncodeIgnoreTrimmable() {
116 assertEquals("746536", encode(" \t\n\r Washington \t\n\r "));
117 assertEquals("746536", encode("Washington"));
118 }
119
120
121
122
123 @Test
124 void testSoundexBasic() {
125 assertEquals("583600", soundex("GOLDEN"));
126 assertEquals("087930", soundex("Alpert"));
127 assertEquals("791900", soundex("Breuer"));
128 assertEquals("579000", soundex("Haber"));
129 assertEquals("665600", soundex("Mannheim"));
130 assertEquals("664000", soundex("Mintz"));
131 assertEquals("370000", soundex("Topf"));
132 assertEquals("586660", soundex("Kleinmann"));
133 assertEquals("769600", soundex("Ben Aron"));
134 assertEquals("097400|097500", soundex("AUERBACH"));
135 assertEquals("097400|097500", soundex("OHRBACH"));
136 assertEquals("874400", soundex("LIPSHITZ"));
137 assertEquals("874400|874500", soundex("LIPPSZYC"));
138 assertEquals("876450", soundex("LEWINSKY"));
139 assertEquals("876450", soundex("LEVINSKI"));
140 assertEquals("486740", soundex("SZLAMAWICZ"));
141 assertEquals("486740", soundex("SHLAMOVITZ"));
142 }
143
144
145
146
147 @Test
148 void testSoundexBasic2() {
149 assertEquals("467000|567000", soundex("Ceniow"));
150 assertEquals("467000", soundex("Tsenyuv"));
151 assertEquals("587400|587500", soundex("Holubica"));
152 assertEquals("587400", soundex("Golubitsa"));
153 assertEquals("746480|794648", soundex("Przemysl"));
154 assertEquals("746480", soundex("Pshemeshil"));
155 assertEquals("944744|944745|944754|944755|945744|945745|945754|945755", soundex("Rosochowaciec"));
156 assertEquals("945744", soundex("Rosokhovatsets"));
157 }
158
159
160
161
162 @Test
163 void testSoundexBasic3() {
164 assertEquals("734000|739400", soundex("Peters"));
165 assertEquals("734600|739460", soundex("Peterson"));
166 assertEquals("645740", soundex("Moskowitz"));
167 assertEquals("645740", soundex("Moskovitz"));
168 assertEquals("154600|145460|454600|445460", soundex("Jackson"));
169 final String jacksonJackson = "154654|154645|154644|145465|145464|454654|454645|454644|445465|445464";
170 assertEquals(jacksonJackson, soundex("Jackson-Jackson"));
171 assertEquals(jacksonJackson, soundex("Jackson--Jackson"));
172 assertEquals(jacksonJackson, soundex("Jackson—Jackson"));
173 assertEquals(jacksonJackson, soundex("Jackson_Jackson"));
174 assertEquals(jacksonJackson, soundex("Jackson$Jackson"));
175 assertEquals(jacksonJackson, soundex("JacksonJackson"));
176 }
177
178 @Test
179 void testSpecialRomanianCharacters() {
180 assertEquals("364000|464000", soundex("ţamas"));
181 assertEquals("364000|464000", soundex("țamas"));
182 }
183 }