1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language.bm;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21
22 import java.util.Arrays;
23 import java.util.HashSet;
24 import java.util.Map;
25 import java.util.TreeMap;
26 import java.util.regex.Pattern;
27
28 import org.junit.jupiter.api.Test;
29
30
31
32
33 class PhoneticEngineRegressionTest {
34
35 private static final Pattern COMMA_PATTERN = Pattern.compile(",");
36
37
38
39
40
41
42
43
44 private static String encode(final Map<String, String> args, final boolean concat, final String input) {
45 final Languages.LanguageSet languageSet;
46 final PhoneticEngine engine;
47
48
49
50 final String nameTypeArg = args.get("nameType");
51 final NameType nameType = nameTypeArg == null ? NameType.GENERIC : NameType.valueOf(nameTypeArg);
52
53 final String ruleTypeArg = args.get("ruleType");
54 final RuleType ruleType = ruleTypeArg == null ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);
55
56 engine = new PhoneticEngine(nameType, ruleType, concat);
57
58
59 final String languageSetArg = args.get("languageSet");
60 if (languageSetArg == null || languageSetArg.equals("auto")) {
61 languageSet = null;
62 } else {
63 languageSet = Languages.LanguageSet.from(new HashSet<>(Arrays.asList(COMMA_PATTERN.split(languageSetArg))));
64 }
65
66
67
68
69
70
71
72
73
74
75 if (languageSet == null) {
76 return engine.encode(input);
77 }
78 return engine.encode(input, languageSet);
79 }
80
81 @Test
82 void testCompatibilityWithOriginalVersion() {
83
84
85
86 final Map<String, String> args = new TreeMap<>();
87 args.put("nameType", "GENERIC");
88 args.put("ruleType", "APPROX");
89
90 assertEquals("Ybram|Ybrom|abram|abran|abrom|abron|avram|avrom|obram|obran|obrom|obron|ovram|ovrom",
91 encode(args, true, "abram"));
92 assertEquals("bndzn|bntsn|bnzn|vndzn|vntsn",
93 encode(args, true, "Bendzin"));
94
95 args.put("nameType", "ASHKENAZI");
96 args.put("ruleType", "APPROX");
97
98 assertEquals("Ybram|Ybrom|abram|abrom|avram|avrom|imbram|imbrom|obram|obrom|ombram|ombrom|ovram|ovrom",
99 encode(args, true, "abram"));
100 assertEquals("YlpYrn|Ylpirn|alpYrn|alpirn|olpYrn|olpirn|xalpirn|xolpirn",
101 encode(args, true, "Halpern"));
102
103 }
104
105 @Test
106 void testSolrASHKENAZI() {
107 Map<String, String> args;
108
109
110 args = new TreeMap<>();
111 args.put("nameType", "ASHKENAZI");
112 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
113 encode(args, true, "Angelo"));
114 args.put("ruleType", "EXACT");
115 assertEquals("andZelo|angelo|anhelo|anxelo",
116 encode(args, true, "Angelo"));
117 assertEquals("dandZelo|dangelo|danhelo|danxelo",
118 encode(args, true, "D'Angelo"));
119 args.put("languageSet", "italian,greek,spanish");
120 assertEquals("angelo|anxelo",
121 encode(args, true, "Angelo"));
122 assertEquals(encode(args, true, "1234"), "");
123
124
125 args = new TreeMap<>();
126 args.put("nameType", "ASHKENAZI");
127 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
128 encode(args, false, "Angelo"));
129 args.put("ruleType", "EXACT");
130 assertEquals("andZelo|angelo|anhelo|anxelo",
131 encode(args, false, "Angelo"));
132 assertEquals("dandZelo|dangelo|danhelo|danxelo",
133 encode(args, false, "D'Angelo"));
134 args.put("languageSet", "italian,greek,spanish");
135 assertEquals("angelo|anxelo",
136 encode(args, false, "Angelo"));
137 assertEquals(encode(args, false, "1234"), "");
138
139
140 args = new TreeMap<>();
141 args.put("nameType", "ASHKENAZI");
142 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
143 encode(args, true, "Angelo"));
144 args.put("ruleType", "APPROX");
145 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
146 encode(args, true, "Angelo"));
147 assertEquals("dYngYlo|dYngilo|dangYlo|dangilo|danilo|danxilo|danzilo|dongYlo|dongilo|donilo|donxilo|donzilo",
148 encode(args, true, "D'Angelo"));
149 args.put("languageSet", "italian,greek,spanish");
150 assertEquals("angilo|anxilo|ongilo|onxilo",
151 encode(args, true, "Angelo"));
152 assertEquals(encode(args, true, "1234"), "");
153
154
155 args = new TreeMap<>();
156 args.put("nameType", "ASHKENAZI");
157 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
158 encode(args, false, "Angelo"));
159 args.put("ruleType", "APPROX");
160 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
161 encode(args, false, "Angelo"));
162 assertEquals("dYngYlo|dYngilo|dangYlo|dangilo|danilo|danxilo|danzilo|dongYlo|dongilo|donilo|donxilo|donzilo",
163 encode(args, false, "D'Angelo"));
164 args.put("languageSet", "italian,greek,spanish");
165 assertEquals("angilo|anxilo|ongilo|onxilo",
166 encode(args, false, "Angelo"));
167 assertEquals(encode(args, false, "1234"), "");
168 }
169
170 @Test
171 void testSolrGENERIC() {
172 Map<String, String> args;
173
174
175 args = new TreeMap<>();
176 args.put("nameType", "GENERIC");
177 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
178 encode(args, true, "Angelo"));
179 args.put("ruleType", "EXACT");
180 assertEquals("anZelo|andZelo|angelo|anhelo|anjelo|anxelo",
181 encode(args, true, "Angelo"));
182 assertEquals("(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)",
183 encode(args, true, "D'Angelo"));
184 args.put("languageSet", "italian,greek,spanish");
185 assertEquals("andZelo|angelo|anxelo",
186 encode(args, true, "Angelo"));
187 assertEquals(encode(args, true, "1234"), "");
188
189
190 args = new TreeMap<>();
191 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
192 encode(args, false, "Angelo"));
193 args.put("ruleType", "EXACT");
194 assertEquals("anZelo|andZelo|angelo|anhelo|anjelo|anxelo",
195 encode(args, false, "Angelo"));
196 assertEquals("(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)",
197 encode(args, false, "D'Angelo"));
198 args.put("languageSet", "italian,greek,spanish");
199 assertEquals("andZelo|angelo|anxelo",
200 encode(args, false, "Angelo"));
201 assertEquals(encode(args, false, "1234"), "");
202
203
204 args = new TreeMap<>();
205 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
206 encode(args, true, "Angelo"));
207 args.put("ruleType", "APPROX");
208 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
209 encode(args, true, "Angelo"));
210 assertEquals("(YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo)-(dYngYlo|dYngilo|dagilo|dangYlo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongYlo|dongilo|doniilo|donilo|donxilo|donzilo)",
211 encode(args, true, "D'Angelo"));
212 args.put("languageSet", "italian,greek,spanish");
213 assertEquals("angilo|anxilo|anzilo|ongilo|onxilo|onzilo",
214 encode(args, true, "Angelo"));
215 assertEquals(encode(args, true, "1234"), "");
216
217
218 args = new TreeMap<>();
219 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
220 encode(args, false, "Angelo"));
221 args.put("ruleType", "APPROX");
222 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
223 encode(args, false, "Angelo"));
224 assertEquals("(YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo)-(dYngYlo|dYngilo|dagilo|dangYlo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongYlo|dongilo|doniilo|donilo|donxilo|donzilo)",
225 encode(args, false, "D'Angelo"));
226 args.put("languageSet", "italian,greek,spanish");
227 assertEquals("angilo|anxilo|anzilo|ongilo|onxilo|onzilo",
228 encode(args, false, "Angelo"));
229 assertEquals(encode(args, false, "1234"), "");
230 }
231
232 @Test
233 void testSolrSEPHARDIC() {
234 Map<String, String> args;
235
236
237 args = new TreeMap<>();
238 args.put("nameType", "SEPHARDIC");
239 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
240 encode(args, true, "Angelo"));
241 args.put("ruleType", "EXACT");
242 assertEquals("anZelo|andZelo|anxelo",
243 encode(args, true, "Angelo"));
244 assertEquals("anZelo|andZelo|anxelo",
245 encode(args, true, "D'Angelo"));
246 args.put("languageSet", "italian,greek,spanish");
247 assertEquals("andZelo|anxelo",
248 encode(args, true, "Angelo"));
249 assertEquals(encode(args, true, "1234"), "");
250
251
252 args = new TreeMap<>();
253 args.put("nameType", "SEPHARDIC");
254 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
255 encode(args, false, "Angelo"));
256 args.put("ruleType", "EXACT");
257 assertEquals("anZelo|andZelo|anxelo",
258 encode(args, false, "Angelo"));
259 assertEquals("danZelo|dandZelo|danxelo",
260 encode(args, false, "D'Angelo"));
261 args.put("languageSet", "italian,greek,spanish");
262 assertEquals("andZelo|anxelo",
263 encode(args, false, "Angelo"));
264 assertEquals(encode(args, false, "1234"), "");
265
266
267 args = new TreeMap<>();
268 args.put("nameType", "SEPHARDIC");
269 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
270 encode(args, true, "Angelo"));
271 args.put("ruleType", "APPROX");
272 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
273 encode(args, true, "Angelo"));
274 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
275 encode(args, true, "D'Angelo"));
276 args.put("languageSet", "italian,greek,spanish");
277 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
278 encode(args, true, "Angelo"));
279 assertEquals(encode(args, true, "1234"), "");
280
281
282 args = new TreeMap<>();
283 args.put("nameType", "SEPHARDIC");
284 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
285 encode(args, false, "Angelo"));
286 args.put("ruleType", "APPROX");
287 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
288 encode(args, false, "Angelo"));
289 assertEquals("danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu",
290 encode(args, false, "D'Angelo"));
291 args.put("languageSet", "italian,greek,spanish");
292 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
293 encode(args, false, "Angelo"));
294 assertEquals(encode(args, false, "1234"), "");
295 }
296 }