001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.language;
019
020 import static org.junit.Assert.assertEquals;
021 import static org.junit.Assert.assertTrue;
022 import static org.junit.Assert.fail;
023
024 import org.apache.commons.codec.StringEncoder;
025 import org.apache.commons.codec.StringEncoderAbstractTest;
026 import org.junit.Test;
027
028 /**
029 * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
030 */
031 public class MetaphoneTest extends StringEncoderAbstractTest {
032
033 public void assertIsMetaphoneEqual(String source, String[] matches) {
034 // match source to all matches
035 for (String matche : matches) {
036 assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
037 this.getMetaphone().isMetaphoneEqual(source, matche));
038 }
039 // match to each other
040 for (String matche : matches) {
041 for (String matche2 : matches) {
042 assertTrue(this.getMetaphone().isMetaphoneEqual(matche, matche2));
043 }
044 }
045 }
046
047 public void assertMetaphoneEqual(String[][] pairs) {
048 this.validateFixture(pairs);
049 for (String[] pair : pairs) {
050 String name0 = pair[0];
051 String name1 = pair[1];
052 String failMsg = "Expected match between " + name0 + " and " + name1;
053 assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name0, name1));
054 assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name1, name0));
055 }
056 }
057
058 /**
059 * @return Returns the metaphone.
060 */
061 private Metaphone getMetaphone() {
062 return (Metaphone) this.getStringEncoder();
063 }
064
065 @Override
066 protected StringEncoder createStringEncoder() {
067 return new Metaphone();
068 }
069
070 @Test
071 public void testIsMetaphoneEqual1() {
072 this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
073 "CASE", "Case" }, {
074 "caSe", "cAsE" }, {
075 "quick", "cookie" }
076 });
077 }
078
079 /**
080 * Matches computed from http://www.lanw.com/java/phonetic/default.htm
081 */
082 @Test
083 public void testIsMetaphoneEqual2() {
084 this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
085 "Gary", "Cahra" }, });
086 }
087
088 /**
089 * Initial AE case.
090 *
091 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
092 */
093 @Test
094 public void testIsMetaphoneEqualAero() {
095 this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
096 }
097
098 /**
099 * Initial WH case.
100 *
101 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
102 */
103 @Test
104 public void testIsMetaphoneEqualWhite() {
105 this.assertIsMetaphoneEqual(
106 "White",
107 new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
108 }
109
110 /**
111 * Initial A, not followed by an E case.
112 *
113 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114 */
115 @Test
116 public void testIsMetaphoneEqualAlbert() {
117 this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
118 }
119
120 /**
121 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
122 */
123 @Test
124 public void testIsMetaphoneEqualGary() {
125 this.assertIsMetaphoneEqual(
126 "Gary",
127 new String[] {
128 "Cahra",
129 "Cara",
130 "Carey",
131 "Cari",
132 "Caria",
133 "Carie",
134 "Caro",
135 "Carree",
136 "Carri",
137 "Carrie",
138 "Carry",
139 "Cary",
140 "Cora",
141 "Corey",
142 "Cori",
143 "Corie",
144 "Correy",
145 "Corri",
146 "Corrie",
147 "Corry",
148 "Cory",
149 "Gray",
150 "Kara",
151 "Kare",
152 "Karee",
153 "Kari",
154 "Karia",
155 "Karie",
156 "Karrah",
157 "Karrie",
158 "Karry",
159 "Kary",
160 "Keri",
161 "Kerri",
162 "Kerrie",
163 "Kerry",
164 "Kira",
165 "Kiri",
166 "Kora",
167 "Kore",
168 "Kori",
169 "Korie",
170 "Korrie",
171 "Korry" });
172 }
173
174 /**
175 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
176 */
177 @Test
178 public void testIsMetaphoneEqualJohn() {
179 this.assertIsMetaphoneEqual(
180 "John",
181 new String[] {
182 "Gena",
183 "Gene",
184 "Genia",
185 "Genna",
186 "Genni",
187 "Gennie",
188 "Genny",
189 "Giana",
190 "Gianna",
191 "Gina",
192 "Ginni",
193 "Ginnie",
194 "Ginny",
195 "Jaine",
196 "Jan",
197 "Jana",
198 "Jane",
199 "Janey",
200 "Jania",
201 "Janie",
202 "Janna",
203 "Jany",
204 "Jayne",
205 "Jean",
206 "Jeana",
207 "Jeane",
208 "Jeanie",
209 "Jeanna",
210 "Jeanne",
211 "Jeannie",
212 "Jen",
213 "Jena",
214 "Jeni",
215 "Jenn",
216 "Jenna",
217 "Jennee",
218 "Jenni",
219 "Jennie",
220 "Jenny",
221 "Jinny",
222 "Jo Ann",
223 "Jo-Ann",
224 "Jo-Anne",
225 "Joan",
226 "Joana",
227 "Joane",
228 "Joanie",
229 "Joann",
230 "Joanna",
231 "Joanne",
232 "Joeann",
233 "Johna",
234 "Johnna",
235 "Joni",
236 "Jonie",
237 "Juana",
238 "June",
239 "Junia",
240 "Junie" });
241 }
242
243 /**
244 * Initial KN case.
245 *
246 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
247 */
248 @Test
249 public void testIsMetaphoneEqualKnight() {
250 this.assertIsMetaphoneEqual(
251 "Knight",
252 new String[] {
253 "Hynda",
254 "Nada",
255 "Nadia",
256 "Nady",
257 "Nat",
258 "Nata",
259 "Natty",
260 "Neda",
261 "Nedda",
262 "Nedi",
263 "Netta",
264 "Netti",
265 "Nettie",
266 "Netty",
267 "Nita",
268 "Nydia" });
269 }
270 /**
271 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
272 */
273 @Test
274 public void testIsMetaphoneEqualMary() {
275 this.assertIsMetaphoneEqual(
276 "Mary",
277 new String[] {
278 "Mair",
279 "Maire",
280 "Mara",
281 "Mareah",
282 "Mari",
283 "Maria",
284 "Marie",
285 "Mary",
286 "Maura",
287 "Maure",
288 "Meara",
289 "Merrie",
290 "Merry",
291 "Mira",
292 "Moira",
293 "Mora",
294 "Moria",
295 "Moyra",
296 "Muire",
297 "Myra",
298 "Myrah" });
299 }
300
301 /**
302 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303 */
304 @Test
305 public void testIsMetaphoneEqualParis() {
306 this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
307 }
308
309 /**
310 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
311 */
312 @Test
313 public void testIsMetaphoneEqualPeter() {
314 this.assertIsMetaphoneEqual(
315 "Peter",
316 new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
317 }
318
319 /**
320 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321 */
322 @Test
323 public void testIsMetaphoneEqualRay() {
324 this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
325 }
326
327 /**
328 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
329 */
330 @Test
331 public void testIsMetaphoneEqualSusan() {
332 this.assertIsMetaphoneEqual(
333 "Susan",
334 new String[] {
335 "Siusan",
336 "Sosanna",
337 "Susan",
338 "Susana",
339 "Susann",
340 "Susanna",
341 "Susannah",
342 "Susanne",
343 "Suzann",
344 "Suzanna",
345 "Suzanne",
346 "Zuzana" });
347 }
348
349 /**
350 * Initial WR case.
351 *
352 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353 */
354 @Test
355 public void testIsMetaphoneEqualWright() {
356 this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
357 }
358
359 /**
360 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
361 */
362 @Test
363 public void testIsMetaphoneEqualXalan() {
364 this.assertIsMetaphoneEqual(
365 "Xalan",
366 new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
367 }
368
369 @Test
370 public void testMetaphone() {
371 assertEquals("HL", this.getMetaphone().metaphone("howl"));
372 assertEquals("TSTN", this.getMetaphone().metaphone("testing"));
373 assertEquals("0", this.getMetaphone().metaphone("The"));
374 assertEquals("KK", this.getMetaphone().metaphone("quick"));
375 assertEquals("BRN", this.getMetaphone().metaphone("brown"));
376 assertEquals("FKS", this.getMetaphone().metaphone("fox"));
377 assertEquals("JMPT", this.getMetaphone().metaphone("jumped"));
378 assertEquals("OFR", this.getMetaphone().metaphone("over"));
379 assertEquals("0", this.getMetaphone().metaphone("the"));
380 assertEquals("LS", this.getMetaphone().metaphone("lazy"));
381 assertEquals("TKS", this.getMetaphone().metaphone("dogs"));
382 }
383
384 @Test
385 public void testWordEndingInMB() {
386 assertEquals( "KM", this.getMetaphone().metaphone("COMB") );
387 assertEquals( "TM", this.getMetaphone().metaphone("TOMB") );
388 assertEquals( "WM", this.getMetaphone().metaphone("WOMB") );
389 }
390
391 @Test
392 public void testDiscardOfSCEOrSCIOrSCY() {
393 assertEquals( "SNS", this.getMetaphone().metaphone("SCIENCE") );
394 assertEquals( "SN", this.getMetaphone().metaphone("SCENE") );
395 assertEquals( "S", this.getMetaphone().metaphone("SCY") );
396 }
397
398 /**
399 * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
400 */
401 @Test
402 public void testWhy() {
403 // PHP returns "H". The original metaphone returns an empty string.
404 assertEquals("", this.getMetaphone().metaphone("WHY"));
405 }
406
407 @Test
408 public void testWordsWithCIA() {
409 assertEquals( "XP", this.getMetaphone().metaphone("CIAPO") );
410 }
411
412 @Test
413 public void testTranslateOfSCHAndCH() {
414 assertEquals( "SKTL", this.getMetaphone().metaphone("SCHEDULE") );
415 assertEquals( "SKMT", this.getMetaphone().metaphone("SCHEMATIC") );
416
417 assertEquals( "KRKT", this.getMetaphone().metaphone("CHARACTER") );
418 assertEquals( "TX", this.getMetaphone().metaphone("TEACH") );
419 }
420
421 @Test
422 public void testTranslateToJOfDGEOrDGIOrDGY() {
423 assertEquals( "TJ", this.getMetaphone().metaphone("DODGY") );
424 assertEquals( "TJ", this.getMetaphone().metaphone("DODGE") );
425 assertEquals( "AJMT", this.getMetaphone().metaphone("ADGIEMTI") );
426 }
427
428 @Test
429 public void testDiscardOfSilentHAfterG() {
430 assertEquals( "KNT", this.getMetaphone().metaphone("GHENT") );
431 assertEquals( "B", this.getMetaphone().metaphone("BAUGH") );
432 }
433
434 @Test
435 public void testDiscardOfSilentGN() {
436 // NOTE: This does not test for silent GN, but for starting with GN
437 assertEquals( "N", this.getMetaphone().metaphone("GNU") );
438
439 // NOTE: Trying to test for GNED, but expected code does not appear to execute
440 assertEquals( "SNT", this.getMetaphone().metaphone("SIGNED") );
441 }
442
443 @Test
444 public void testPHTOF() {
445 assertEquals( "FX", this.getMetaphone().metaphone("PHISH") );
446 }
447
448 @Test
449 public void testSHAndSIOAndSIAToX() {
450 assertEquals( "XT", this.getMetaphone().metaphone("SHOT") );
451 assertEquals( "OTXN", this.getMetaphone().metaphone("ODSIAN") );
452 assertEquals( "PLXN", this.getMetaphone().metaphone("PULSION") );
453 }
454
455 @Test
456 public void testTIOAndTIAToX() {
457 assertEquals( "OX", this.getMetaphone().metaphone("OTIA") );
458 assertEquals( "PRXN", this.getMetaphone().metaphone("PORTION") );
459 }
460
461 @Test
462 public void testTCH() {
463 assertEquals( "RX", this.getMetaphone().metaphone("RETCH") );
464 assertEquals( "WX", this.getMetaphone().metaphone("WATCH") );
465 }
466
467 @Test
468 public void testExceedLength() {
469 // should be AKSKS, but istruncated by Max Code Length
470 assertEquals( "AKSK", this.getMetaphone().metaphone("AXEAXE") );
471 }
472
473 @Test
474 public void testSetMaxLengthWithTruncation() {
475 // should be AKSKS, but istruncated by Max Code Length
476 this.getMetaphone().setMaxCodeLen( 6 );
477 assertEquals( "AKSKSK", this.getMetaphone().metaphone("AXEAXEAXE") );
478 }
479
480 public void validateFixture(String[][] pairs) {
481 if (pairs.length == 0) {
482 fail("Test fixture is empty");
483 }
484 for (int i = 0; i < pairs.length; i++) {
485 if (pairs[i].length != 2) {
486 fail("Error in test fixture in the data array at index " + i);
487 }
488 }
489 }
490
491 }