1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */ 
17  
18  package org.apache.commons.codec.language;
19  
20  import junit.framework.Test;
21  import junit.framework.TestSuite;
22  
23  import org.apache.commons.codec.StringEncoder;
24  import org.apache.commons.codec.StringEncoderAbstractTest;
25  
26  /**
27   * @author Apache Software Foundation
28   * @version $Id: MetaphoneTest.java 588074 2007-10-24 23:04:56Z ggregory $
29   */
30  public class MetaphoneTest extends StringEncoderAbstractTest {
31  
32      public static Test suite() {
33          return new TestSuite(MetaphoneTest.class);
34      }
35  
36      private Metaphone metaphone = null;
37  
38      public MetaphoneTest(String name) {
39          super(name);
40      }
41  
42      public void assertIsMetaphoneEqual(String source, String[] matches) {
43          // match source to all matches
44          for (int i = 0; i < matches.length; i++) {
45              assertTrue("Source: " + source + ", should have same Metaphone as: " + matches[i],
46                         this.getMetaphone().isMetaphoneEqual(source, matches[i]));
47          }
48          // match to each other
49          for (int i = 0; i < matches.length; i++) {
50              for (int j = 0; j < matches.length; j++) {
51                  assertTrue(this.getMetaphone().isMetaphoneEqual(matches[i], matches[j]));
52              }
53          }
54      }
55  
56      public void assertMetaphoneEqual(String[][] pairs) {
57          this.validateFixture(pairs);
58          for (int i = 0; i < pairs.length; i++) {
59              String name0 = pairs[i][0];
60              String name1 = pairs[i][1];
61              String failMsg = "Expected match between " + name0 + " and " + name1;
62              assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name0, name1));
63              assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name1, name0));
64          }
65      }
66      /**
67  	 * @return Returns the metaphone.
68  	 */
69      private Metaphone getMetaphone() {
70          return this.metaphone;
71      }
72  
73      protected StringEncoder makeEncoder() {
74          return new Metaphone();
75      }
76  
77      /**
78  	 * @param metaphone
79  	 *                  The metaphone to set.
80  	 */
81      private void setMetaphone(Metaphone metaphone) {
82          this.metaphone = metaphone;
83      }
84  
85      public void setUp() throws Exception {
86          super.setUp();
87          this.setMetaphone(new Metaphone());
88      }
89  
90      public void tearDown() throws Exception {
91          super.tearDown();
92          this.setMetaphone(null);
93      }
94  
95      public void testIsMetaphoneEqual1() {
96          this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
97                  "CASE", "Case" }, {
98                  "caSe", "cAsE" }, {
99                  "quick", "cookie" }
100         });
101     }
102 
103     /**
104 	 * Matches computed from http://www.lanw.com/java/phonetic/default.htm
105 	 */
106     public void testIsMetaphoneEqual2() {
107         this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
108                 "Gary", "Cahra" }, });
109     }
110 
111     /**
112 	 * Initial AE case.
113 	 * 
114 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
115 	 */
116     public void testIsMetaphoneEqualAero() {
117         this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
118     }
119 
120     /**
121 	 * Initial WH case.
122 	 * 
123 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
124 	 */
125     public void testIsMetaphoneEqualWhite() {
126         this.assertIsMetaphoneEqual(
127             "White",
128             new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
129     }
130 
131     /**
132 	 * Initial A, not followed by an E case.
133 	 * 
134 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
135 	 */
136     public void testIsMetaphoneEqualAlbert() {
137         this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
138     }
139 
140     /**
141 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
142 	 */
143     public void testIsMetaphoneEqualGary() {
144         this.assertIsMetaphoneEqual(
145             "Gary",
146             new String[] {
147                 "Cahra",
148                 "Cara",
149                 "Carey",
150                 "Cari",
151                 "Caria",
152                 "Carie",
153                 "Caro",
154                 "Carree",
155                 "Carri",
156                 "Carrie",
157                 "Carry",
158                 "Cary",
159                 "Cora",
160                 "Corey",
161                 "Cori",
162                 "Corie",
163                 "Correy",
164                 "Corri",
165                 "Corrie",
166                 "Corry",
167                 "Cory",
168                 "Gray",
169                 "Kara",
170                 "Kare",
171                 "Karee",
172                 "Kari",
173                 "Karia",
174                 "Karie",
175                 "Karrah",
176                 "Karrie",
177                 "Karry",
178                 "Kary",
179                 "Keri",
180                 "Kerri",
181                 "Kerrie",
182                 "Kerry",
183                 "Kira",
184                 "Kiri",
185                 "Kora",
186                 "Kore",
187                 "Kori",
188                 "Korie",
189                 "Korrie",
190                 "Korry" });
191     }
192 
193     /**
194 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
195 	 */
196     public void testIsMetaphoneEqualJohn() {
197         this.assertIsMetaphoneEqual(
198             "John",
199             new String[] {
200                 "Gena",
201                 "Gene",
202                 "Genia",
203                 "Genna",
204                 "Genni",
205                 "Gennie",
206                 "Genny",
207                 "Giana",
208                 "Gianna",
209                 "Gina",
210                 "Ginni",
211                 "Ginnie",
212                 "Ginny",
213                 "Jaine",
214                 "Jan",
215                 "Jana",
216                 "Jane",
217                 "Janey",
218                 "Jania",
219                 "Janie",
220                 "Janna",
221                 "Jany",
222                 "Jayne",
223                 "Jean",
224                 "Jeana",
225                 "Jeane",
226                 "Jeanie",
227                 "Jeanna",
228                 "Jeanne",
229                 "Jeannie",
230                 "Jen",
231                 "Jena",
232                 "Jeni",
233                 "Jenn",
234                 "Jenna",
235                 "Jennee",
236                 "Jenni",
237                 "Jennie",
238                 "Jenny",
239                 "Jinny",
240                 "Jo Ann",
241                 "Jo-Ann",
242                 "Jo-Anne",
243                 "Joan",
244                 "Joana",
245                 "Joane",
246                 "Joanie",
247                 "Joann",
248                 "Joanna",
249                 "Joanne",
250                 "Joeann",
251                 "Johna",
252                 "Johnna",
253                 "Joni",
254                 "Jonie",
255                 "Juana",
256                 "June",
257                 "Junia",
258                 "Junie" });
259     }
260 
261     /**
262 	 * Initial KN case.
263 	 * 
264 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
265 	 */
266     public void testIsMetaphoneEqualKnight() {
267         this.assertIsMetaphoneEqual(
268             "Knight",
269             new String[] {
270                 "Hynda",
271                 "Nada",
272                 "Nadia",
273                 "Nady",
274                 "Nat",
275                 "Nata",
276                 "Natty",
277                 "Neda",
278                 "Nedda",
279                 "Nedi",
280                 "Netta",
281                 "Netti",
282                 "Nettie",
283                 "Netty",
284                 "Nita",
285                 "Nydia" });
286     }
287     /**
288 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
289 	 */
290     public void testIsMetaphoneEqualMary() {
291         this.assertIsMetaphoneEqual(
292             "Mary",
293             new String[] {
294                 "Mair",
295                 "Maire",
296                 "Mara",
297                 "Mareah",
298                 "Mari",
299                 "Maria",
300                 "Marie",
301                 "Mary",
302                 "Maura",
303                 "Maure",
304                 "Meara",
305                 "Merrie",
306                 "Merry",
307                 "Mira",
308                 "Moira",
309                 "Mora",
310                 "Moria",
311                 "Moyra",
312                 "Muire",
313                 "Myra",
314                 "Myrah" });
315     }
316 
317     /**
318 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
319 	 */
320     public void testIsMetaphoneEqualParis() {
321         this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
322     }
323 
324     /**
325 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
326 	 */
327     public void testIsMetaphoneEqualPeter() {
328         this.assertIsMetaphoneEqual(
329             "Peter",
330             new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
331     }
332 
333     /**
334 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
335 	 */
336     public void testIsMetaphoneEqualRay() {
337         this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
338     }
339 
340     /**
341 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
342 	 */
343     public void testIsMetaphoneEqualSusan() {
344         this.assertIsMetaphoneEqual(
345             "Susan",
346             new String[] {
347                 "Siusan",
348                 "Sosanna",
349                 "Susan",
350                 "Susana",
351                 "Susann",
352                 "Susanna",
353                 "Susannah",
354                 "Susanne",
355                 "Suzann",
356                 "Suzanna",
357                 "Suzanne",
358                 "Zuzana" });
359     }
360 
361     /**
362 	 * Initial WR case.
363 	 * 
364 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
365 	 */
366     public void testIsMetaphoneEqualWright() {
367         this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
368     }
369 
370     /**
371 	 * Match data computed from http://www.lanw.com/java/phonetic/default.htm
372 	 */
373     public void testIsMetaphoneEqualXalan() {
374         this.assertIsMetaphoneEqual(
375             "Xalan",
376             new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
377     }
378 
379     public void testMetaphone() {
380 		assertEquals("HL", this.getMetaphone().metaphone("howl"));
381         assertEquals("TSTN", this.getMetaphone().metaphone("testing"));
382         assertEquals("0", this.getMetaphone().metaphone("The"));
383         assertEquals("KK", this.getMetaphone().metaphone("quick"));
384         assertEquals("BRN", this.getMetaphone().metaphone("brown"));
385         assertEquals("FKS", this.getMetaphone().metaphone("fox"));
386         assertEquals("JMPT", this.getMetaphone().metaphone("jumped"));
387         assertEquals("OFR", this.getMetaphone().metaphone("over"));
388         assertEquals("0", this.getMetaphone().metaphone("the"));
389         assertEquals("LS", this.getMetaphone().metaphone("lazy"));
390         assertEquals("TKS", this.getMetaphone().metaphone("dogs"));
391     }
392 	
393 	public void testWordEndingInMB() {
394 		assertEquals( "KM", this.getMetaphone().metaphone("COMB") );
395 		assertEquals( "TM", this.getMetaphone().metaphone("TOMB") );
396 		assertEquals( "WM", this.getMetaphone().metaphone("WOMB") );
397 	}
398 
399 	public void testDiscardOfSCEOrSCIOrSCY() {
400 		assertEquals( "SNS", this.getMetaphone().metaphone("SCIENCE") );
401 		assertEquals( "SN", this.getMetaphone().metaphone("SCENE") );
402 		assertEquals( "S", this.getMetaphone().metaphone("SCY") );
403 	}
404 
405     /**
406      * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
407      */
408     public void testWhy() {
409         assertEquals("H", this.getMetaphone().metaphone("WHY"));
410     }
411 
412     public void testWordsWithCIA() {
413         assertEquals( "XP", this.getMetaphone().metaphone("CIAPO") );
414     }
415 
416 	public void testTranslateOfSCHAndCH() {
417 		assertEquals( "SKTL", this.getMetaphone().metaphone("SCHEDULE") );
418 		assertEquals( "SKMT", this.getMetaphone().metaphone("SCHEMATIC") );
419 
420 		assertEquals( "KRKT", this.getMetaphone().metaphone("CHARACTER") );
421 		assertEquals( "TX", this.getMetaphone().metaphone("TEACH") );
422 	}
423 
424 	public void testTranslateToJOfDGEOrDGIOrDGY() {
425 		assertEquals( "TJ", this.getMetaphone().metaphone("DODGY") );
426 		assertEquals( "TJ", this.getMetaphone().metaphone("DODGE") );
427 		assertEquals( "AJMT", this.getMetaphone().metaphone("ADGIEMTI") );
428 	}
429 
430 	public void testDiscardOfSilentHAfterG() {
431 		assertEquals( "KNT", this.getMetaphone().metaphone("GHENT") );
432 		assertEquals( "B", this.getMetaphone().metaphone("BAUGH") );
433 	}
434 
435 	public void testDiscardOfSilentGN() {
436 		assertEquals( "N", this.getMetaphone().metaphone("GNU") );
437 		assertEquals( "SNT", this.getMetaphone().metaphone("SIGNED") );
438 	}
439 
440 	public void testPHTOF() {
441 		assertEquals( "FX", this.getMetaphone().metaphone("PHISH") );
442 	}
443 
444 	public void testSHAndSIOAndSIAToX() {
445 		assertEquals( "XT", this.getMetaphone().metaphone("SHOT") );
446 		assertEquals( "OTXN", this.getMetaphone().metaphone("ODSIAN") );
447 		assertEquals( "PLXN", this.getMetaphone().metaphone("PULSION") );
448 	}
449 
450 	public void testTIOAndTIAToX() {
451 		assertEquals( "OX", this.getMetaphone().metaphone("OTIA") );
452 		assertEquals( "PRXN", this.getMetaphone().metaphone("PORTION") );
453 	}
454 
455 	public void testTCH() {
456 		assertEquals( "RX", this.getMetaphone().metaphone("RETCH") );
457 		assertEquals( "WX", this.getMetaphone().metaphone("WATCH") );
458 	}
459 
460 	public void testExceedLength() {
461 		// should be AKSKS, but istruncated by Max Code Length
462 		assertEquals( "AKSK", this.getMetaphone().metaphone("AXEAXE") );
463 	}
464 
465 	public void testSetMaxLengthWithTruncation() {
466 		// should be AKSKS, but istruncated by Max Code Length
467 		this.getMetaphone().setMaxCodeLen( 6 );
468 		assertEquals( "AKSKSK", this.getMetaphone().metaphone("AXEAXEAXE") );
469 	}
470 
471     public void validateFixture(String[][] pairs) {
472         if (pairs.length == 0) {
473             fail("Test fixture is empty");
474         }
475         for (int i = 0; i < pairs.length; i++) {
476             if (pairs[i].length != 2) {
477                 fail("Error in test fixture in the data array at index " + i);
478             }
479         }
480     }
481 
482 }