View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import org.apache.commons.codec.StringEncoder;
25  import org.apache.commons.codec.StringEncoderAbstractTest;
26  import org.junit.Test;
27  
28  /**
29   * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
30   */
31  public class MetaphoneTest extends StringEncoderAbstractTest {
32  
33      public void assertIsMetaphoneEqual(String source, String[] matches) {
34          // match source to all matches
35          for (String matche : matches) {
36              assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
37                         this.getMetaphone().isMetaphoneEqual(source, matche));
38          }
39          // match to each other
40          for (String matche : matches) {
41              for (String matche2 : matches) {
42                  assertTrue(this.getMetaphone().isMetaphoneEqual(matche, matche2));
43              }
44          }
45      }
46  
47      public void assertMetaphoneEqual(String[][] pairs) {
48          this.validateFixture(pairs);
49          for (String[] pair : pairs) {
50              String name0 = pair[0];
51              String name1 = pair[1];
52              String failMsg = "Expected match between " + name0 + " and " + name1;
53              assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name0, name1));
54              assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name1, name0));
55          }
56      }
57  
58      /**
59       * @return Returns the metaphone.
60       */
61      private Metaphone getMetaphone() {
62          return (Metaphone) this.getStringEncoder();
63      }
64  
65      @Override
66      protected StringEncoder createStringEncoder() {
67          return new Metaphone();
68      }
69  
70      @Test
71      public void testIsMetaphoneEqual1() {
72          this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
73                  "CASE", "Case" }, {
74                  "caSe", "cAsE" }, {
75                  "quick", "cookie" }
76          });
77      }
78  
79      /**
80       * Matches computed from http://www.lanw.com/java/phonetic/default.htm
81       */
82      @Test
83      public void testIsMetaphoneEqual2() {
84          this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
85                  "Gary", "Cahra" }, });
86      }
87  
88      /**
89       * Initial AE case.
90       *
91       * Match data computed from http://www.lanw.com/java/phonetic/default.htm
92       */
93      @Test
94      public void testIsMetaphoneEqualAero() {
95          this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
96      }
97  
98      /**
99       * Initial WH case.
100      *
101      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
102      */
103     @Test
104     public void testIsMetaphoneEqualWhite() {
105         this.assertIsMetaphoneEqual(
106             "White",
107             new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
108     }
109 
110     /**
111      * Initial A, not followed by an E case.
112      *
113      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114      */
115     @Test
116     public void testIsMetaphoneEqualAlbert() {
117         this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
118     }
119 
120     /**
121      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
122      */
123     @Test
124     public void testIsMetaphoneEqualGary() {
125         this.assertIsMetaphoneEqual(
126             "Gary",
127             new String[] {
128                 "Cahra",
129                 "Cara",
130                 "Carey",
131                 "Cari",
132                 "Caria",
133                 "Carie",
134                 "Caro",
135                 "Carree",
136                 "Carri",
137                 "Carrie",
138                 "Carry",
139                 "Cary",
140                 "Cora",
141                 "Corey",
142                 "Cori",
143                 "Corie",
144                 "Correy",
145                 "Corri",
146                 "Corrie",
147                 "Corry",
148                 "Cory",
149                 "Gray",
150                 "Kara",
151                 "Kare",
152                 "Karee",
153                 "Kari",
154                 "Karia",
155                 "Karie",
156                 "Karrah",
157                 "Karrie",
158                 "Karry",
159                 "Kary",
160                 "Keri",
161                 "Kerri",
162                 "Kerrie",
163                 "Kerry",
164                 "Kira",
165                 "Kiri",
166                 "Kora",
167                 "Kore",
168                 "Kori",
169                 "Korie",
170                 "Korrie",
171                 "Korry" });
172     }
173 
174     /**
175      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
176      */
177     @Test
178     public void testIsMetaphoneEqualJohn() {
179         this.assertIsMetaphoneEqual(
180             "John",
181             new String[] {
182                 "Gena",
183                 "Gene",
184                 "Genia",
185                 "Genna",
186                 "Genni",
187                 "Gennie",
188                 "Genny",
189                 "Giana",
190                 "Gianna",
191                 "Gina",
192                 "Ginni",
193                 "Ginnie",
194                 "Ginny",
195                 "Jaine",
196                 "Jan",
197                 "Jana",
198                 "Jane",
199                 "Janey",
200                 "Jania",
201                 "Janie",
202                 "Janna",
203                 "Jany",
204                 "Jayne",
205                 "Jean",
206                 "Jeana",
207                 "Jeane",
208                 "Jeanie",
209                 "Jeanna",
210                 "Jeanne",
211                 "Jeannie",
212                 "Jen",
213                 "Jena",
214                 "Jeni",
215                 "Jenn",
216                 "Jenna",
217                 "Jennee",
218                 "Jenni",
219                 "Jennie",
220                 "Jenny",
221                 "Jinny",
222                 "Jo Ann",
223                 "Jo-Ann",
224                 "Jo-Anne",
225                 "Joan",
226                 "Joana",
227                 "Joane",
228                 "Joanie",
229                 "Joann",
230                 "Joanna",
231                 "Joanne",
232                 "Joeann",
233                 "Johna",
234                 "Johnna",
235                 "Joni",
236                 "Jonie",
237                 "Juana",
238                 "June",
239                 "Junia",
240                 "Junie" });
241     }
242 
243     /**
244      * Initial KN case.
245      *
246      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
247      */
248     @Test
249     public void testIsMetaphoneEqualKnight() {
250         this.assertIsMetaphoneEqual(
251             "Knight",
252             new String[] {
253                 "Hynda",
254                 "Nada",
255                 "Nadia",
256                 "Nady",
257                 "Nat",
258                 "Nata",
259                 "Natty",
260                 "Neda",
261                 "Nedda",
262                 "Nedi",
263                 "Netta",
264                 "Netti",
265                 "Nettie",
266                 "Netty",
267                 "Nita",
268                 "Nydia" });
269     }
270     /**
271      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
272      */
273     @Test
274     public void testIsMetaphoneEqualMary() {
275         this.assertIsMetaphoneEqual(
276             "Mary",
277             new String[] {
278                 "Mair",
279                 "Maire",
280                 "Mara",
281                 "Mareah",
282                 "Mari",
283                 "Maria",
284                 "Marie",
285                 "Mary",
286                 "Maura",
287                 "Maure",
288                 "Meara",
289                 "Merrie",
290                 "Merry",
291                 "Mira",
292                 "Moira",
293                 "Mora",
294                 "Moria",
295                 "Moyra",
296                 "Muire",
297                 "Myra",
298                 "Myrah" });
299     }
300 
301     /**
302      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303      */
304     @Test
305     public void testIsMetaphoneEqualParis() {
306         this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
307     }
308 
309     /**
310      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
311      */
312     @Test
313     public void testIsMetaphoneEqualPeter() {
314         this.assertIsMetaphoneEqual(
315             "Peter",
316             new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
317     }
318 
319     /**
320      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321      */
322     @Test
323     public void testIsMetaphoneEqualRay() {
324         this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
325     }
326 
327     /**
328      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
329      */
330     @Test
331     public void testIsMetaphoneEqualSusan() {
332         this.assertIsMetaphoneEqual(
333             "Susan",
334             new String[] {
335                 "Siusan",
336                 "Sosanna",
337                 "Susan",
338                 "Susana",
339                 "Susann",
340                 "Susanna",
341                 "Susannah",
342                 "Susanne",
343                 "Suzann",
344                 "Suzanna",
345                 "Suzanne",
346                 "Zuzana" });
347     }
348 
349     /**
350      * Initial WR case.
351      *
352      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353      */
354     @Test
355     public void testIsMetaphoneEqualWright() {
356         this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
357     }
358 
359     /**
360      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
361      */
362     @Test
363     public void testIsMetaphoneEqualXalan() {
364         this.assertIsMetaphoneEqual(
365             "Xalan",
366             new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
367     }
368 
369     @Test
370     public void testMetaphone() {
371         assertEquals("HL", this.getMetaphone().metaphone("howl"));
372         assertEquals("TSTN", this.getMetaphone().metaphone("testing"));
373         assertEquals("0", this.getMetaphone().metaphone("The"));
374         assertEquals("KK", this.getMetaphone().metaphone("quick"));
375         assertEquals("BRN", this.getMetaphone().metaphone("brown"));
376         assertEquals("FKS", this.getMetaphone().metaphone("fox"));
377         assertEquals("JMPT", this.getMetaphone().metaphone("jumped"));
378         assertEquals("OFR", this.getMetaphone().metaphone("over"));
379         assertEquals("0", this.getMetaphone().metaphone("the"));
380         assertEquals("LS", this.getMetaphone().metaphone("lazy"));
381         assertEquals("TKS", this.getMetaphone().metaphone("dogs"));
382     }
383 
384     @Test
385     public void testWordEndingInMB() {
386         assertEquals( "KM", this.getMetaphone().metaphone("COMB") );
387         assertEquals( "TM", this.getMetaphone().metaphone("TOMB") );
388         assertEquals( "WM", this.getMetaphone().metaphone("WOMB") );
389     }
390 
391     @Test
392     public void testDiscardOfSCEOrSCIOrSCY() {
393         assertEquals( "SNS", this.getMetaphone().metaphone("SCIENCE") );
394         assertEquals( "SN", this.getMetaphone().metaphone("SCENE") );
395         assertEquals( "S", this.getMetaphone().metaphone("SCY") );
396     }
397 
398     /**
399      * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
400      */
401     @Test
402     public void testWhy() {
403         // PHP returns "H". The original metaphone returns an empty string.
404         assertEquals("", this.getMetaphone().metaphone("WHY"));
405     }
406 
407     @Test
408     public void testWordsWithCIA() {
409         assertEquals( "XP", this.getMetaphone().metaphone("CIAPO") );
410     }
411 
412     @Test
413     public void testTranslateOfSCHAndCH() {
414         assertEquals( "SKTL", this.getMetaphone().metaphone("SCHEDULE") );
415         assertEquals( "SKMT", this.getMetaphone().metaphone("SCHEMATIC") );
416 
417         assertEquals( "KRKT", this.getMetaphone().metaphone("CHARACTER") );
418         assertEquals( "TX", this.getMetaphone().metaphone("TEACH") );
419     }
420 
421     @Test
422     public void testTranslateToJOfDGEOrDGIOrDGY() {
423         assertEquals( "TJ", this.getMetaphone().metaphone("DODGY") );
424         assertEquals( "TJ", this.getMetaphone().metaphone("DODGE") );
425         assertEquals( "AJMT", this.getMetaphone().metaphone("ADGIEMTI") );
426     }
427 
428     @Test
429     public void testDiscardOfSilentHAfterG() {
430         assertEquals( "KNT", this.getMetaphone().metaphone("GHENT") );
431         assertEquals( "B", this.getMetaphone().metaphone("BAUGH") );
432     }
433 
434     @Test
435     public void testDiscardOfSilentGN() {
436         // NOTE: This does not test for silent GN, but for starting with GN
437         assertEquals( "N", this.getMetaphone().metaphone("GNU") );
438 
439         // NOTE: Trying to test for GNED, but expected code does not appear to execute
440         assertEquals( "SNT", this.getMetaphone().metaphone("SIGNED") );
441     }
442 
443     @Test
444     public void testPHTOF() {
445         assertEquals( "FX", this.getMetaphone().metaphone("PHISH") );
446     }
447 
448     @Test
449     public void testSHAndSIOAndSIAToX() {
450         assertEquals( "XT", this.getMetaphone().metaphone("SHOT") );
451         assertEquals( "OTXN", this.getMetaphone().metaphone("ODSIAN") );
452         assertEquals( "PLXN", this.getMetaphone().metaphone("PULSION") );
453     }
454 
455     @Test
456     public void testTIOAndTIAToX() {
457         assertEquals( "OX", this.getMetaphone().metaphone("OTIA") );
458         assertEquals( "PRXN", this.getMetaphone().metaphone("PORTION") );
459     }
460 
461     @Test
462     public void testTCH() {
463         assertEquals( "RX", this.getMetaphone().metaphone("RETCH") );
464         assertEquals( "WX", this.getMetaphone().metaphone("WATCH") );
465     }
466 
467     @Test
468     public void testExceedLength() {
469         // should be AKSKS, but istruncated by Max Code Length
470         assertEquals( "AKSK", this.getMetaphone().metaphone("AXEAXE") );
471     }
472 
473     @Test
474     public void testSetMaxLengthWithTruncation() {
475         // should be AKSKS, but istruncated by Max Code Length
476         this.getMetaphone().setMaxCodeLen( 6 );
477         assertEquals( "AKSKSK", this.getMetaphone().metaphone("AXEAXEAXE") );
478     }
479 
480     public void validateFixture(String[][] pairs) {
481         if (pairs.length == 0) {
482             fail("Test fixture is empty");
483         }
484         for (int i = 0; i < pairs.length; i++) {
485             if (pairs[i].length != 2) {
486                 fail("Error in test fixture in the data array at index " + i);
487             }
488         }
489     }
490 
491 }