1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language.bm;
19
20 import static org.junit.jupiter.api.Assertions.assertEquals;
21
22 import java.util.Arrays;
23 import java.util.HashSet;
24 import java.util.Map;
25 import java.util.TreeMap;
26
27 import org.junit.jupiter.api.Test;
28
29
30
31
32 public class PhoneticEngineRegressionTest {
33
34
35
36
37
38
39
40
41 private static String encode(final Map<String, String> args, final boolean concat, final String input) {
42 final Languages.LanguageSet languageSet;
43 final PhoneticEngine engine;
44
45
46
47 final String nameTypeArg = args.get("nameType");
48 final NameType nameType = nameTypeArg == null ? NameType.GENERIC : NameType.valueOf(nameTypeArg);
49
50 final String ruleTypeArg = args.get("ruleType");
51 final RuleType ruleType = ruleTypeArg == null ? RuleType.APPROX : RuleType.valueOf(ruleTypeArg);
52
53 engine = new PhoneticEngine(nameType, ruleType, concat);
54
55
56 final String languageSetArg = args.get("languageSet");
57 if (languageSetArg == null || languageSetArg.equals("auto")) {
58 languageSet = null;
59 } else {
60 languageSet = Languages.LanguageSet.from(new HashSet<>(Arrays.asList(languageSetArg.split(","))));
61 }
62
63
64
65
66
67
68
69
70
71
72 if (languageSet == null) {
73 return engine.encode(input);
74 }
75 return engine.encode(input, languageSet);
76 }
77
78 @Test
79 public void testCompatibilityWithOriginalVersion() {
80
81
82
83 final Map<String, String> args = new TreeMap<>();
84 args.put("nameType", "GENERIC");
85 args.put("ruleType", "APPROX");
86
87 assertEquals("Ybram|Ybrom|abram|abran|abrom|abron|avram|avrom|obram|obran|obrom|obron|ovram|ovrom",
88 encode(args, true, "abram"));
89 assertEquals("bndzn|bntsn|bnzn|vndzn|vntsn",
90 encode(args, true, "Bendzin"));
91
92 args.put("nameType", "ASHKENAZI");
93 args.put("ruleType", "APPROX");
94
95 assertEquals("Ybram|Ybrom|abram|abrom|avram|avrom|imbram|imbrom|obram|obrom|ombram|ombrom|ovram|ovrom",
96 encode(args, true, "abram"));
97 assertEquals("YlpYrn|Ylpirn|alpYrn|alpirn|olpYrn|olpirn|xalpirn|xolpirn",
98 encode(args, true, "Halpern"));
99
100 }
101
102 @Test
103 public void testSolrASHKENAZI() {
104 Map<String, String> args;
105
106
107 args = new TreeMap<>();
108 args.put("nameType", "ASHKENAZI");
109 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
110 encode(args, true, "Angelo"));
111 args.put("ruleType", "EXACT");
112 assertEquals("andZelo|angelo|anhelo|anxelo",
113 encode(args, true, "Angelo"));
114 assertEquals("dandZelo|dangelo|danhelo|danxelo",
115 encode(args, true, "D'Angelo"));
116 args.put("languageSet", "italian,greek,spanish");
117 assertEquals("angelo|anxelo",
118 encode(args, true, "Angelo"));
119 assertEquals(encode(args, true, "1234"), "");
120
121
122 args = new TreeMap<>();
123 args.put("nameType", "ASHKENAZI");
124 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
125 encode(args, false, "Angelo"));
126 args.put("ruleType", "EXACT");
127 assertEquals("andZelo|angelo|anhelo|anxelo",
128 encode(args, false, "Angelo"));
129 assertEquals("dandZelo|dangelo|danhelo|danxelo",
130 encode(args, false, "D'Angelo"));
131 args.put("languageSet", "italian,greek,spanish");
132 assertEquals("angelo|anxelo",
133 encode(args, false, "Angelo"));
134 assertEquals(encode(args, false, "1234"), "");
135
136
137 args = new TreeMap<>();
138 args.put("nameType", "ASHKENAZI");
139 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
140 encode(args, true, "Angelo"));
141 args.put("ruleType", "APPROX");
142 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
143 encode(args, true, "Angelo"));
144 assertEquals("dYngYlo|dYngilo|dangYlo|dangilo|danilo|danxilo|danzilo|dongYlo|dongilo|donilo|donxilo|donzilo",
145 encode(args, true, "D'Angelo"));
146 args.put("languageSet", "italian,greek,spanish");
147 assertEquals("angilo|anxilo|ongilo|onxilo",
148 encode(args, true, "Angelo"));
149 assertEquals(encode(args, true, "1234"), "");
150
151
152 args = new TreeMap<>();
153 args.put("nameType", "ASHKENAZI");
154 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
155 encode(args, false, "Angelo"));
156 args.put("ruleType", "APPROX");
157 assertEquals("YngYlo|Yngilo|angYlo|angilo|anilo|anxilo|anzilo|ongYlo|ongilo|onilo|onxilo|onzilo",
158 encode(args, false, "Angelo"));
159 assertEquals("dYngYlo|dYngilo|dangYlo|dangilo|danilo|danxilo|danzilo|dongYlo|dongilo|donilo|donxilo|donzilo",
160 encode(args, false, "D'Angelo"));
161 args.put("languageSet", "italian,greek,spanish");
162 assertEquals("angilo|anxilo|ongilo|onxilo",
163 encode(args, false, "Angelo"));
164 assertEquals(encode(args, false, "1234"), "");
165 }
166
167 @Test
168 public void testSolrGENERIC() {
169 Map<String, String> args;
170
171
172 args = new TreeMap<>();
173 args.put("nameType", "GENERIC");
174 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
175 encode(args, true, "Angelo"));
176 args.put("ruleType", "EXACT");
177 assertEquals("anZelo|andZelo|angelo|anhelo|anjelo|anxelo",
178 encode(args, true, "Angelo"));
179 assertEquals("(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)",
180 encode(args, true, "D'Angelo"));
181 args.put("languageSet", "italian,greek,spanish");
182 assertEquals("andZelo|angelo|anxelo",
183 encode(args, true, "Angelo"));
184 assertEquals(encode(args, true, "1234"), "");
185
186
187 args = new TreeMap<>();
188 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
189 encode(args, false, "Angelo"));
190 args.put("ruleType", "EXACT");
191 assertEquals("anZelo|andZelo|angelo|anhelo|anjelo|anxelo",
192 encode(args, false, "Angelo"));
193 assertEquals("(anZelo|andZelo|angelo|anhelo|anjelo|anxelo)-(danZelo|dandZelo|dangelo|danhelo|danjelo|danxelo)",
194 encode(args, false, "D'Angelo"));
195 args.put("languageSet", "italian,greek,spanish");
196 assertEquals("andZelo|angelo|anxelo",
197 encode(args, false, "Angelo"));
198 assertEquals(encode(args, false, "1234"), "");
199
200
201 args = new TreeMap<>();
202 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
203 encode(args, true, "Angelo"));
204 args.put("ruleType", "APPROX");
205 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
206 encode(args, true, "Angelo"));
207 assertEquals("(YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo)-(dYngYlo|dYngilo|dagilo|dangYlo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongYlo|dongilo|doniilo|donilo|donxilo|donzilo)",
208 encode(args, true, "D'Angelo"));
209 args.put("languageSet", "italian,greek,spanish");
210 assertEquals("angilo|anxilo|anzilo|ongilo|onxilo|onzilo",
211 encode(args, true, "Angelo"));
212 assertEquals(encode(args, true, "1234"), "");
213
214
215 args = new TreeMap<>();
216 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
217 encode(args, false, "Angelo"));
218 args.put("ruleType", "APPROX");
219 assertEquals("YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo",
220 encode(args, false, "Angelo"));
221 assertEquals("(YngYlo|Yngilo|agilo|angYlo|angilo|aniilo|anilo|anxilo|anzilo|ogilo|ongYlo|ongilo|oniilo|onilo|onxilo|onzilo)-(dYngYlo|dYngilo|dagilo|dangYlo|dangilo|daniilo|danilo|danxilo|danzilo|dogilo|dongYlo|dongilo|doniilo|donilo|donxilo|donzilo)",
222 encode(args, false, "D'Angelo"));
223 args.put("languageSet", "italian,greek,spanish");
224 assertEquals("angilo|anxilo|anzilo|ongilo|onxilo|onzilo",
225 encode(args, false, "Angelo"));
226 assertEquals(encode(args, false, "1234"), "");
227 }
228
229 @Test
230 public void testSolrSEPHARDIC() {
231 Map<String, String> args;
232
233
234 args = new TreeMap<>();
235 args.put("nameType", "SEPHARDIC");
236 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
237 encode(args, true, "Angelo"));
238 args.put("ruleType", "EXACT");
239 assertEquals("anZelo|andZelo|anxelo",
240 encode(args, true, "Angelo"));
241 assertEquals("anZelo|andZelo|anxelo",
242 encode(args, true, "D'Angelo"));
243 args.put("languageSet", "italian,greek,spanish");
244 assertEquals("andZelo|anxelo",
245 encode(args, true, "Angelo"));
246 assertEquals(encode(args, true, "1234"), "");
247
248
249 args = new TreeMap<>();
250 args.put("nameType", "SEPHARDIC");
251 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
252 encode(args, false, "Angelo"));
253 args.put("ruleType", "EXACT");
254 assertEquals("anZelo|andZelo|anxelo",
255 encode(args, false, "Angelo"));
256 assertEquals("danZelo|dandZelo|danxelo",
257 encode(args, false, "D'Angelo"));
258 args.put("languageSet", "italian,greek,spanish");
259 assertEquals("andZelo|anxelo",
260 encode(args, false, "Angelo"));
261 assertEquals(encode(args, false, "1234"), "");
262
263
264 args = new TreeMap<>();
265 args.put("nameType", "SEPHARDIC");
266 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
267 encode(args, true, "Angelo"));
268 args.put("ruleType", "APPROX");
269 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
270 encode(args, true, "Angelo"));
271 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
272 encode(args, true, "D'Angelo"));
273 args.put("languageSet", "italian,greek,spanish");
274 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
275 encode(args, true, "Angelo"));
276 assertEquals(encode(args, true, "1234"), "");
277
278
279 args = new TreeMap<>();
280 args.put("nameType", "SEPHARDIC");
281 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
282 encode(args, false, "Angelo"));
283 args.put("ruleType", "APPROX");
284 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
285 encode(args, false, "Angelo"));
286 assertEquals("danhila|danhilu|danzila|danzilu|nhila|nhilu|nzila|nzilu",
287 encode(args, false, "D'Angelo"));
288 args.put("languageSet", "italian,greek,spanish");
289 assertEquals("anhila|anhilu|anzila|anzilu|nhila|nhilu|nzila|nzilu",
290 encode(args, false, "Angelo"));
291 assertEquals(encode(args, false, "1234"), "");
292 }
293 }