1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21 import static org.junit.jupiter.api.Assertions.assertThrows;
22 import static org.junit.jupiter.api.Assertions.assertTrue;
23
24 import java.util.HashSet;
25 import java.util.Locale;
26 import java.util.Set;
27 import java.util.stream.Stream;
28
29 import org.apache.commons.codec.AbstractStringEncoderTest;
30 import org.apache.commons.codec.EncoderException;
31 import org.junit.jupiter.api.AfterAll;
32 import org.junit.jupiter.api.Test;
33 import org.junit.jupiter.params.ParameterizedTest;
34 import org.junit.jupiter.params.provider.Arguments;
35 import org.junit.jupiter.params.provider.MethodSource;
36 import org.opentest4j.AssertionFailedError;
37
38
39
40
41
42
43
44
45 class ColognePhoneticTest extends AbstractStringEncoderTest<ColognePhonetic> {
46
47 private static final Set<String> TESTSET = new HashSet<>();
48
49
50
51 private static final String[] MATCHES = {
52 ".*[AEIOUJY].*",
53 ".*H.*",
54 ".*B.*",
55 ".*P[^H].*",
56 ".*[DT][^CSZ].*",
57 ".*[FVW].*",
58 ".*PH.*",
59 ".*[GKQ].*",
60 "C[AHKLOQRUX].*",
61 ".*[^SZ]C[AHKLOQRUX].*",
62 ".*[^CKQ]X.*",
63 ".*L.*",
64 ".*[MN].*",
65 ".*R.*",
66 ".*[SZ].*",
67 ".*[SZ]C.*",
68 "C[^AHKLOQRUX].*",
69 ".+C[^AHKLOQRUX].*",
70 ".*[DT][CSZ].*",
71 ".*[CKQ]X.*",
72
73 };
74
75 @AfterAll
76
77 static void finishTests() {
78 int errors = 0;
79 for (final String m : MATCHES) {
80 if (!hasTestCase(m)) {
81 System.out.println(m + " has no test case");
82 errors++;
83 }
84 }
85 assertEquals(0, errors, "Not expecting any missing test cases");
86 }
87
88 private static boolean hasTestCase(final String re) {
89 for (final String s : TESTSET) {
90 if (s.matches(re)) {
91 return true;
92 }
93 }
94 return false;
95 }
96
97
98 public static void main(final String[] args) {
99 final ColognePhonetic coder = new ColognePhonetic();
100 for (final String arg : args) {
101 final String code = coder.encode(arg);
102 System.out.println("'" + arg + "' = '" + code + "'");
103 }
104 }
105
106 static Stream<Arguments> testBasicEncoding() {
107
108 return Stream.of(
109 Arguments.arguments("01", "Aabjoe"),
110 Arguments.arguments("0856", "Aaclan"),
111 Arguments.arguments("04567", "Aychlmajr")
112 );
113
114 }
115
116 static Stream<Arguments> testEdgeCases() {
117
118 return Stream.of(
119 Arguments.arguments("a", "0"),
120 Arguments.arguments("e", "0"),
121 Arguments.arguments("i", "0"),
122 Arguments.arguments("o", "0"),
123 Arguments.arguments("u", "0"),
124 Arguments.arguments("\u00E4", "0"),
125 Arguments.arguments("\u00F6", "0"),
126 Arguments.arguments("\u00FC", "0"),
127 Arguments.arguments("\u00DF", "8"),
128 Arguments.arguments("aa", "0"),
129 Arguments.arguments("ha", "0"),
130 Arguments.arguments("h", ""),
131 Arguments.arguments("aha", "0"),
132 Arguments.arguments("b", "1"),
133 Arguments.arguments("p", "1"),
134 Arguments.arguments("ph", "3"),
135 Arguments.arguments("f", "3"),
136 Arguments.arguments("v", "3"),
137 Arguments.arguments("w", "3"),
138 Arguments.arguments("g", "4"),
139 Arguments.arguments("k", "4"),
140 Arguments.arguments("q", "4"),
141 Arguments.arguments("x", "48"),
142 Arguments.arguments("ax", "048"),
143 Arguments.arguments("cx", "48"),
144 Arguments.arguments("l", "5"),
145 Arguments.arguments("cl", "45"),
146 Arguments.arguments("acl", "085"),
147 Arguments.arguments("mn", "6"),
148 Arguments.arguments("{mn}", "6"),
149 Arguments.arguments("r", "7")
150 );
151
152 }
153
154 static Stream<Arguments> testExamples() {
155
156 return Stream.of(
157 Arguments.arguments("m\u00DCller", "657"),
158 Arguments.arguments("m\u00FCller", "657"),
159 Arguments.arguments("schmidt", "862"),
160 Arguments.arguments("schneider", "8627"),
161 Arguments.arguments("fischer", "387"),
162 Arguments.arguments("weber", "317"),
163 Arguments.arguments("wagner", "3467"),
164 Arguments.arguments("becker", "147"),
165 Arguments.arguments("hoffmann", "036"),
166 Arguments.arguments("sch\u00C4fer", "837"),
167 Arguments.arguments("sch\u00e4fer", "837"),
168 Arguments.arguments("Breschnew", "17863"),
169 Arguments.arguments("Wikipedia", "3412"),
170 Arguments.arguments("peter", "127"),
171 Arguments.arguments("pharma", "376"),
172 Arguments.arguments("m\u00f6nchengladbach", "64645214"),
173 Arguments.arguments("deutsch", "28"),
174 Arguments.arguments("deutz", "28"),
175 Arguments.arguments("hamburg", "06174"),
176 Arguments.arguments("hannover", "0637"),
177 Arguments.arguments("christstollen", "478256"),
178 Arguments.arguments("Xanthippe", "48621"),
179 Arguments.arguments("Zacharias", "8478"),
180 Arguments.arguments("Holzbau", "0581"),
181 Arguments.arguments("matsch", "68"),
182 Arguments.arguments("matz", "68"),
183 Arguments.arguments("Arbeitsamt", "071862"),
184 Arguments.arguments("Eberhard", "0172"),
185 Arguments.arguments("Eberhardt", "0172"),
186 Arguments.arguments("Celsius", "858"),
187 Arguments.arguments("Ace", "08"),
188 Arguments.arguments("shch", "84"),
189 Arguments.arguments("xch", "484"),
190 Arguments.arguments("heithabu", "021")
191 );
192
193 }
194
195 static Stream<Arguments> testIsEncodeEquals() {
196
197 return Stream.of(
198 Arguments.arguments("Muller", "M\u00fcller"),
199 Arguments.arguments("Meyer", "Mayr"),
200 Arguments.arguments("house", "house"),
201 Arguments.arguments("House", "house"),
202 Arguments.arguments("Haus", "house"),
203 Arguments.arguments("ganz", "Gans"),
204 Arguments.arguments("ganz", "G\u00e4nse"),
205 Arguments.arguments("Miyagi", "Miyako")
206 );
207
208 }
209
210 @Override
211
212 public void checkEncoding(final String expected, final String source) throws EncoderException {
213
214 TESTSET.add(source.toUpperCase(Locale.GERMAN).replace('Ä', 'A').replace('Ö', 'O').replace('Ü', 'U'));
215 super.checkEncoding(expected, source);
216 }
217
218 @Override
219 protected ColognePhonetic createStringEncoder() {
220 return new ColognePhonetic();
221 }
222
223 @ParameterizedTest
224 @MethodSource
225 void testBasicEncoding(final String expected, final String source) throws EncoderException {
226 checkEncoding(expected, source);
227 }
228
229 @Test
230
231 void testCanFail() {
232 assertThrows(AssertionFailedError.class, () -> checkEncoding("/", "Fehler"));
233 }
234
235 @ParameterizedTest
236 @MethodSource
237 void testEdgeCases(final String source, final String expected) throws EncoderException {
238 checkEncoding(expected, source);
239 }
240
241 @ParameterizedTest
242 @MethodSource
243 void testExamples(final String source, final String expected) throws EncoderException {
244 checkEncoding(expected, source);
245 }
246
247 @Test
248 void testHyphen() throws EncoderException {
249
250 checkEncodings(new String[][] { { "bergisch-gladbach", "174845214" }, { "M\u00fcller-L\u00fcdenscheidt", "65752682" } });
251 }
252
253 @ParameterizedTest
254 @MethodSource
255 void testIsEncodeEquals(final String source, final String expected) {
256 final boolean encodeEqual = getStringEncoder().isEncodeEqual(expected, source);
257 assertTrue(encodeEqual, () -> expected + " != " + source);
258 }
259
260 @Test
261 void testSpecialCharsBetweenSameLetters() throws EncoderException {
262 checkEncodingVariations("28282", "Test test", "Testtest", "Test-test", "TesT#Test", "TesT?test");
263 }
264
265 @Test
266 void testVariationsMella() throws EncoderException {
267 checkEncodingVariations("65", "mella", "milah", "moulla", "mellah", "muehle", "mule");
268 }
269
270 @Test
271 void testVariationsMeyer() throws EncoderException {
272 checkEncodingVariations("67", "Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major");
273 }
274 }