001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.language;
019
020 import static org.junit.Assert.assertEquals;
021 import static org.junit.Assert.assertTrue;
022 import static org.junit.Assert.fail;
023
024 import org.apache.commons.codec.StringEncoderAbstractTest;
025 import org.junit.Test;
026
027 /**
028 * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
029 */
030 public class MetaphoneTest extends StringEncoderAbstractTest<Metaphone> {
031
032 public void assertIsMetaphoneEqual(final String source, final String[] matches) {
033 // match source to all matches
034 for (final String matche : matches) {
035 assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
036 this.getStringEncoder().isMetaphoneEqual(source, matche));
037 }
038 // match to each other
039 for (final String matche : matches) {
040 for (final String matche2 : matches) {
041 assertTrue(this.getStringEncoder().isMetaphoneEqual(matche, matche2));
042 }
043 }
044 }
045
046 public void assertMetaphoneEqual(final String[][] pairs) {
047 this.validateFixture(pairs);
048 for (final String[] pair : pairs) {
049 final String name0 = pair[0];
050 final String name1 = pair[1];
051 final String failMsg = "Expected match between " + name0 + " and " + name1;
052 assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name0, name1));
053 assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name1, name0));
054 }
055 }
056
057 @Override
058 protected Metaphone createStringEncoder() {
059 return new Metaphone();
060 }
061
062 @Test
063 public void testIsMetaphoneEqual1() {
064 this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
065 "CASE", "Case" }, {
066 "caSe", "cAsE" }, {
067 "quick", "cookie" }
068 });
069 }
070
071 /**
072 * Matches computed from http://www.lanw.com/java/phonetic/default.htm
073 */
074 @Test
075 public void testIsMetaphoneEqual2() {
076 this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
077 "Gary", "Cahra" }, });
078 }
079
080 /**
081 * Initial AE case.
082 *
083 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
084 */
085 @Test
086 public void testIsMetaphoneEqualAero() {
087 this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
088 }
089
090 /**
091 * Initial WH case.
092 *
093 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
094 */
095 @Test
096 public void testIsMetaphoneEqualWhite() {
097 this.assertIsMetaphoneEqual(
098 "White",
099 new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
100 }
101
102 /**
103 * Initial A, not followed by an E case.
104 *
105 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
106 */
107 @Test
108 public void testIsMetaphoneEqualAlbert() {
109 this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
110 }
111
112 /**
113 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114 */
115 @Test
116 public void testIsMetaphoneEqualGary() {
117 this.assertIsMetaphoneEqual(
118 "Gary",
119 new String[] {
120 "Cahra",
121 "Cara",
122 "Carey",
123 "Cari",
124 "Caria",
125 "Carie",
126 "Caro",
127 "Carree",
128 "Carri",
129 "Carrie",
130 "Carry",
131 "Cary",
132 "Cora",
133 "Corey",
134 "Cori",
135 "Corie",
136 "Correy",
137 "Corri",
138 "Corrie",
139 "Corry",
140 "Cory",
141 "Gray",
142 "Kara",
143 "Kare",
144 "Karee",
145 "Kari",
146 "Karia",
147 "Karie",
148 "Karrah",
149 "Karrie",
150 "Karry",
151 "Kary",
152 "Keri",
153 "Kerri",
154 "Kerrie",
155 "Kerry",
156 "Kira",
157 "Kiri",
158 "Kora",
159 "Kore",
160 "Kori",
161 "Korie",
162 "Korrie",
163 "Korry" });
164 }
165
166 /**
167 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
168 */
169 @Test
170 public void testIsMetaphoneEqualJohn() {
171 this.assertIsMetaphoneEqual(
172 "John",
173 new String[] {
174 "Gena",
175 "Gene",
176 "Genia",
177 "Genna",
178 "Genni",
179 "Gennie",
180 "Genny",
181 "Giana",
182 "Gianna",
183 "Gina",
184 "Ginni",
185 "Ginnie",
186 "Ginny",
187 "Jaine",
188 "Jan",
189 "Jana",
190 "Jane",
191 "Janey",
192 "Jania",
193 "Janie",
194 "Janna",
195 "Jany",
196 "Jayne",
197 "Jean",
198 "Jeana",
199 "Jeane",
200 "Jeanie",
201 "Jeanna",
202 "Jeanne",
203 "Jeannie",
204 "Jen",
205 "Jena",
206 "Jeni",
207 "Jenn",
208 "Jenna",
209 "Jennee",
210 "Jenni",
211 "Jennie",
212 "Jenny",
213 "Jinny",
214 "Jo Ann",
215 "Jo-Ann",
216 "Jo-Anne",
217 "Joan",
218 "Joana",
219 "Joane",
220 "Joanie",
221 "Joann",
222 "Joanna",
223 "Joanne",
224 "Joeann",
225 "Johna",
226 "Johnna",
227 "Joni",
228 "Jonie",
229 "Juana",
230 "June",
231 "Junia",
232 "Junie" });
233 }
234
235 /**
236 * Initial KN case.
237 *
238 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
239 */
240 @Test
241 public void testIsMetaphoneEqualKnight() {
242 this.assertIsMetaphoneEqual(
243 "Knight",
244 new String[] {
245 "Hynda",
246 "Nada",
247 "Nadia",
248 "Nady",
249 "Nat",
250 "Nata",
251 "Natty",
252 "Neda",
253 "Nedda",
254 "Nedi",
255 "Netta",
256 "Netti",
257 "Nettie",
258 "Netty",
259 "Nita",
260 "Nydia" });
261 }
262 /**
263 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
264 */
265 @Test
266 public void testIsMetaphoneEqualMary() {
267 this.assertIsMetaphoneEqual(
268 "Mary",
269 new String[] {
270 "Mair",
271 "Maire",
272 "Mara",
273 "Mareah",
274 "Mari",
275 "Maria",
276 "Marie",
277 "Mary",
278 "Maura",
279 "Maure",
280 "Meara",
281 "Merrie",
282 "Merry",
283 "Mira",
284 "Moira",
285 "Mora",
286 "Moria",
287 "Moyra",
288 "Muire",
289 "Myra",
290 "Myrah" });
291 }
292
293 /**
294 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
295 */
296 @Test
297 public void testIsMetaphoneEqualParis() {
298 this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
299 }
300
301 /**
302 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303 */
304 @Test
305 public void testIsMetaphoneEqualPeter() {
306 this.assertIsMetaphoneEqual(
307 "Peter",
308 new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
309 }
310
311 /**
312 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
313 */
314 @Test
315 public void testIsMetaphoneEqualRay() {
316 this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
317 }
318
319 /**
320 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321 */
322 @Test
323 public void testIsMetaphoneEqualSusan() {
324 this.assertIsMetaphoneEqual(
325 "Susan",
326 new String[] {
327 "Siusan",
328 "Sosanna",
329 "Susan",
330 "Susana",
331 "Susann",
332 "Susanna",
333 "Susannah",
334 "Susanne",
335 "Suzann",
336 "Suzanna",
337 "Suzanne",
338 "Zuzana" });
339 }
340
341 /**
342 * Initial WR case.
343 *
344 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
345 */
346 @Test
347 public void testIsMetaphoneEqualWright() {
348 this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
349 }
350
351 /**
352 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353 */
354 @Test
355 public void testIsMetaphoneEqualXalan() {
356 this.assertIsMetaphoneEqual(
357 "Xalan",
358 new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
359 }
360
361 @Test
362 public void testMetaphone() {
363 assertEquals("HL", this.getStringEncoder().metaphone("howl"));
364 assertEquals("TSTN", this.getStringEncoder().metaphone("testing"));
365 assertEquals("0", this.getStringEncoder().metaphone("The"));
366 assertEquals("KK", this.getStringEncoder().metaphone("quick"));
367 assertEquals("BRN", this.getStringEncoder().metaphone("brown"));
368 assertEquals("FKS", this.getStringEncoder().metaphone("fox"));
369 assertEquals("JMPT", this.getStringEncoder().metaphone("jumped"));
370 assertEquals("OFR", this.getStringEncoder().metaphone("over"));
371 assertEquals("0", this.getStringEncoder().metaphone("the"));
372 assertEquals("LS", this.getStringEncoder().metaphone("lazy"));
373 assertEquals("TKS", this.getStringEncoder().metaphone("dogs"));
374 }
375
376 @Test
377 public void testWordEndingInMB() {
378 assertEquals( "KM", this.getStringEncoder().metaphone("COMB") );
379 assertEquals( "TM", this.getStringEncoder().metaphone("TOMB") );
380 assertEquals( "WM", this.getStringEncoder().metaphone("WOMB") );
381 }
382
383 @Test
384 public void testDiscardOfSCEOrSCIOrSCY() {
385 assertEquals( "SNS", this.getStringEncoder().metaphone("SCIENCE") );
386 assertEquals( "SN", this.getStringEncoder().metaphone("SCENE") );
387 assertEquals( "S", this.getStringEncoder().metaphone("SCY") );
388 }
389
390 /**
391 * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
392 */
393 @Test
394 public void testWhy() {
395 // PHP returns "H". The original metaphone returns an empty string.
396 assertEquals("", this.getStringEncoder().metaphone("WHY"));
397 }
398
399 @Test
400 public void testWordsWithCIA() {
401 assertEquals( "XP", this.getStringEncoder().metaphone("CIAPO") );
402 }
403
404 @Test
405 public void testTranslateOfSCHAndCH() {
406 assertEquals( "SKTL", this.getStringEncoder().metaphone("SCHEDULE") );
407 assertEquals( "SKMT", this.getStringEncoder().metaphone("SCHEMATIC") );
408
409 assertEquals( "KRKT", this.getStringEncoder().metaphone("CHARACTER") );
410 assertEquals( "TX", this.getStringEncoder().metaphone("TEACH") );
411 }
412
413 @Test
414 public void testTranslateToJOfDGEOrDGIOrDGY() {
415 assertEquals( "TJ", this.getStringEncoder().metaphone("DODGY") );
416 assertEquals( "TJ", this.getStringEncoder().metaphone("DODGE") );
417 assertEquals( "AJMT", this.getStringEncoder().metaphone("ADGIEMTI") );
418 }
419
420 @Test
421 public void testDiscardOfSilentHAfterG() {
422 assertEquals( "KNT", this.getStringEncoder().metaphone("GHENT") );
423 assertEquals( "B", this.getStringEncoder().metaphone("BAUGH") );
424 }
425
426 @Test
427 public void testDiscardOfSilentGN() {
428 // NOTE: This does not test for silent GN, but for starting with GN
429 assertEquals( "N", this.getStringEncoder().metaphone("GNU") );
430
431 // NOTE: Trying to test for GNED, but expected code does not appear to execute
432 assertEquals( "SNT", this.getStringEncoder().metaphone("SIGNED") );
433 }
434
435 @Test
436 public void testPHTOF() {
437 assertEquals( "FX", this.getStringEncoder().metaphone("PHISH") );
438 }
439
440 @Test
441 public void testSHAndSIOAndSIAToX() {
442 assertEquals( "XT", this.getStringEncoder().metaphone("SHOT") );
443 assertEquals( "OTXN", this.getStringEncoder().metaphone("ODSIAN") );
444 assertEquals( "PLXN", this.getStringEncoder().metaphone("PULSION") );
445 }
446
447 @Test
448 public void testTIOAndTIAToX() {
449 assertEquals( "OX", this.getStringEncoder().metaphone("OTIA") );
450 assertEquals( "PRXN", this.getStringEncoder().metaphone("PORTION") );
451 }
452
453 @Test
454 public void testTCH() {
455 assertEquals( "RX", this.getStringEncoder().metaphone("RETCH") );
456 assertEquals( "WX", this.getStringEncoder().metaphone("WATCH") );
457 }
458
459 @Test
460 public void testExceedLength() {
461 // should be AKSKS, but istruncated by Max Code Length
462 assertEquals( "AKSK", this.getStringEncoder().metaphone("AXEAXE") );
463 }
464
465 @Test
466 public void testSetMaxLengthWithTruncation() {
467 // should be AKSKS, but istruncated by Max Code Length
468 this.getStringEncoder().setMaxCodeLen( 6 );
469 assertEquals( "AKSKSK", this.getStringEncoder().metaphone("AXEAXEAXE") );
470 }
471
472 public void validateFixture(final String[][] pairs) {
473 if (pairs.length == 0) {
474 fail("Test fixture is empty");
475 }
476 for (int i = 0; i < pairs.length; i++) {
477 if (pairs[i].length != 2) {
478 fail("Error in test fixture in the data array at index " + i);
479 }
480 }
481 }
482
483 }