View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import static org.junit.Assert.assertEquals;
21  import static org.junit.Assert.assertTrue;
22  import static org.junit.Assert.fail;
23  
24  import org.apache.commons.codec.StringEncoderAbstractTest;
25  import org.junit.Test;
26  
27  /**
28   * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
29   */
30  public class MetaphoneTest extends StringEncoderAbstractTest<Metaphone> {
31  
32      public void assertIsMetaphoneEqual(final String source, final String[] matches) {
33          // match source to all matches
34          for (final String matche : matches) {
35              assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
36                         this.getStringEncoder().isMetaphoneEqual(source, matche));
37          }
38          // match to each other
39          for (final String matche : matches) {
40              for (final String matche2 : matches) {
41                  assertTrue(this.getStringEncoder().isMetaphoneEqual(matche, matche2));
42              }
43          }
44      }
45  
46      public void assertMetaphoneEqual(final String[][] pairs) {
47          this.validateFixture(pairs);
48          for (final String[] pair : pairs) {
49              final String name0 = pair[0];
50              final String name1 = pair[1];
51              final String failMsg = "Expected match between " + name0 + " and " + name1;
52              assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name0, name1));
53              assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name1, name0));
54          }
55      }
56  
57      @Override
58      protected Metaphone createStringEncoder() {
59          return new Metaphone();
60      }
61  
62      @Test
63      public void testIsMetaphoneEqual1() {
64          this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
65                  "CASE", "Case" }, {
66                  "caSe", "cAsE" }, {
67                  "quick", "cookie" }
68          });
69      }
70  
71      /**
72       * Matches computed from http://www.lanw.com/java/phonetic/default.htm
73       */
74      @Test
75      public void testIsMetaphoneEqual2() {
76          this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
77                  "Gary", "Cahra" }, });
78      }
79  
80      /**
81       * Initial AE case.
82       *
83       * Match data computed from http://www.lanw.com/java/phonetic/default.htm
84       */
85      @Test
86      public void testIsMetaphoneEqualAero() {
87          this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
88      }
89  
90      /**
91       * Initial WH case.
92       *
93       * Match data computed from http://www.lanw.com/java/phonetic/default.htm
94       */
95      @Test
96      public void testIsMetaphoneEqualWhite() {
97          this.assertIsMetaphoneEqual(
98              "White",
99              new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
100     }
101 
102     /**
103      * Initial A, not followed by an E case.
104      *
105      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
106      */
107     @Test
108     public void testIsMetaphoneEqualAlbert() {
109         this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
110     }
111 
112     /**
113      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114      */
115     @Test
116     public void testIsMetaphoneEqualGary() {
117         this.assertIsMetaphoneEqual(
118             "Gary",
119             new String[] {
120                 "Cahra",
121                 "Cara",
122                 "Carey",
123                 "Cari",
124                 "Caria",
125                 "Carie",
126                 "Caro",
127                 "Carree",
128                 "Carri",
129                 "Carrie",
130                 "Carry",
131                 "Cary",
132                 "Cora",
133                 "Corey",
134                 "Cori",
135                 "Corie",
136                 "Correy",
137                 "Corri",
138                 "Corrie",
139                 "Corry",
140                 "Cory",
141                 "Gray",
142                 "Kara",
143                 "Kare",
144                 "Karee",
145                 "Kari",
146                 "Karia",
147                 "Karie",
148                 "Karrah",
149                 "Karrie",
150                 "Karry",
151                 "Kary",
152                 "Keri",
153                 "Kerri",
154                 "Kerrie",
155                 "Kerry",
156                 "Kira",
157                 "Kiri",
158                 "Kora",
159                 "Kore",
160                 "Kori",
161                 "Korie",
162                 "Korrie",
163                 "Korry" });
164     }
165 
166     /**
167      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
168      */
169     @Test
170     public void testIsMetaphoneEqualJohn() {
171         this.assertIsMetaphoneEqual(
172             "John",
173             new String[] {
174                 "Gena",
175                 "Gene",
176                 "Genia",
177                 "Genna",
178                 "Genni",
179                 "Gennie",
180                 "Genny",
181                 "Giana",
182                 "Gianna",
183                 "Gina",
184                 "Ginni",
185                 "Ginnie",
186                 "Ginny",
187                 "Jaine",
188                 "Jan",
189                 "Jana",
190                 "Jane",
191                 "Janey",
192                 "Jania",
193                 "Janie",
194                 "Janna",
195                 "Jany",
196                 "Jayne",
197                 "Jean",
198                 "Jeana",
199                 "Jeane",
200                 "Jeanie",
201                 "Jeanna",
202                 "Jeanne",
203                 "Jeannie",
204                 "Jen",
205                 "Jena",
206                 "Jeni",
207                 "Jenn",
208                 "Jenna",
209                 "Jennee",
210                 "Jenni",
211                 "Jennie",
212                 "Jenny",
213                 "Jinny",
214                 "Jo Ann",
215                 "Jo-Ann",
216                 "Jo-Anne",
217                 "Joan",
218                 "Joana",
219                 "Joane",
220                 "Joanie",
221                 "Joann",
222                 "Joanna",
223                 "Joanne",
224                 "Joeann",
225                 "Johna",
226                 "Johnna",
227                 "Joni",
228                 "Jonie",
229                 "Juana",
230                 "June",
231                 "Junia",
232                 "Junie" });
233     }
234 
235     /**
236      * Initial KN case.
237      *
238      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
239      */
240     @Test
241     public void testIsMetaphoneEqualKnight() {
242         this.assertIsMetaphoneEqual(
243             "Knight",
244             new String[] {
245                 "Hynda",
246                 "Nada",
247                 "Nadia",
248                 "Nady",
249                 "Nat",
250                 "Nata",
251                 "Natty",
252                 "Neda",
253                 "Nedda",
254                 "Nedi",
255                 "Netta",
256                 "Netti",
257                 "Nettie",
258                 "Netty",
259                 "Nita",
260                 "Nydia" });
261     }
262     /**
263      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
264      */
265     @Test
266     public void testIsMetaphoneEqualMary() {
267         this.assertIsMetaphoneEqual(
268             "Mary",
269             new String[] {
270                 "Mair",
271                 "Maire",
272                 "Mara",
273                 "Mareah",
274                 "Mari",
275                 "Maria",
276                 "Marie",
277                 "Mary",
278                 "Maura",
279                 "Maure",
280                 "Meara",
281                 "Merrie",
282                 "Merry",
283                 "Mira",
284                 "Moira",
285                 "Mora",
286                 "Moria",
287                 "Moyra",
288                 "Muire",
289                 "Myra",
290                 "Myrah" });
291     }
292 
293     /**
294      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
295      */
296     @Test
297     public void testIsMetaphoneEqualParis() {
298         this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
299     }
300 
301     /**
302      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303      */
304     @Test
305     public void testIsMetaphoneEqualPeter() {
306         this.assertIsMetaphoneEqual(
307             "Peter",
308             new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
309     }
310 
311     /**
312      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
313      */
314     @Test
315     public void testIsMetaphoneEqualRay() {
316         this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
317     }
318 
319     /**
320      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321      */
322     @Test
323     public void testIsMetaphoneEqualSusan() {
324         this.assertIsMetaphoneEqual(
325             "Susan",
326             new String[] {
327                 "Siusan",
328                 "Sosanna",
329                 "Susan",
330                 "Susana",
331                 "Susann",
332                 "Susanna",
333                 "Susannah",
334                 "Susanne",
335                 "Suzann",
336                 "Suzanna",
337                 "Suzanne",
338                 "Zuzana" });
339     }
340 
341     /**
342      * Initial WR case.
343      *
344      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
345      */
346     @Test
347     public void testIsMetaphoneEqualWright() {
348         this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
349     }
350 
351     /**
352      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353      */
354     @Test
355     public void testIsMetaphoneEqualXalan() {
356         this.assertIsMetaphoneEqual(
357             "Xalan",
358             new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
359     }
360 
361     @Test
362     public void testMetaphone() {
363         assertEquals("HL", this.getStringEncoder().metaphone("howl"));
364         assertEquals("TSTN", this.getStringEncoder().metaphone("testing"));
365         assertEquals("0", this.getStringEncoder().metaphone("The"));
366         assertEquals("KK", this.getStringEncoder().metaphone("quick"));
367         assertEquals("BRN", this.getStringEncoder().metaphone("brown"));
368         assertEquals("FKS", this.getStringEncoder().metaphone("fox"));
369         assertEquals("JMPT", this.getStringEncoder().metaphone("jumped"));
370         assertEquals("OFR", this.getStringEncoder().metaphone("over"));
371         assertEquals("0", this.getStringEncoder().metaphone("the"));
372         assertEquals("LS", this.getStringEncoder().metaphone("lazy"));
373         assertEquals("TKS", this.getStringEncoder().metaphone("dogs"));
374     }
375 
376     @Test
377     public void testWordEndingInMB() {
378         assertEquals( "KM", this.getStringEncoder().metaphone("COMB") );
379         assertEquals( "TM", this.getStringEncoder().metaphone("TOMB") );
380         assertEquals( "WM", this.getStringEncoder().metaphone("WOMB") );
381     }
382 
383     @Test
384     public void testDiscardOfSCEOrSCIOrSCY() {
385         assertEquals( "SNS", this.getStringEncoder().metaphone("SCIENCE") );
386         assertEquals( "SN", this.getStringEncoder().metaphone("SCENE") );
387         assertEquals( "S", this.getStringEncoder().metaphone("SCY") );
388     }
389 
390     /**
391      * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
392      */
393     @Test
394     public void testWhy() {
395         // PHP returns "H". The original metaphone returns an empty string.
396         assertEquals("", this.getStringEncoder().metaphone("WHY"));
397     }
398 
399     @Test
400     public void testWordsWithCIA() {
401         assertEquals( "XP", this.getStringEncoder().metaphone("CIAPO") );
402     }
403 
404     @Test
405     public void testTranslateOfSCHAndCH() {
406         assertEquals( "SKTL", this.getStringEncoder().metaphone("SCHEDULE") );
407         assertEquals( "SKMT", this.getStringEncoder().metaphone("SCHEMATIC") );
408 
409         assertEquals( "KRKT", this.getStringEncoder().metaphone("CHARACTER") );
410         assertEquals( "TX", this.getStringEncoder().metaphone("TEACH") );
411     }
412 
413     @Test
414     public void testTranslateToJOfDGEOrDGIOrDGY() {
415         assertEquals( "TJ", this.getStringEncoder().metaphone("DODGY") );
416         assertEquals( "TJ", this.getStringEncoder().metaphone("DODGE") );
417         assertEquals( "AJMT", this.getStringEncoder().metaphone("ADGIEMTI") );
418     }
419 
420     @Test
421     public void testDiscardOfSilentHAfterG() {
422         assertEquals( "KNT", this.getStringEncoder().metaphone("GHENT") );
423         assertEquals( "B", this.getStringEncoder().metaphone("BAUGH") );
424     }
425 
426     @Test
427     public void testDiscardOfSilentGN() {
428         // NOTE: This does not test for silent GN, but for starting with GN
429         assertEquals( "N", this.getStringEncoder().metaphone("GNU") );
430 
431         // NOTE: Trying to test for GNED, but expected code does not appear to execute
432         assertEquals( "SNT", this.getStringEncoder().metaphone("SIGNED") );
433     }
434 
435     @Test
436     public void testPHTOF() {
437         assertEquals( "FX", this.getStringEncoder().metaphone("PHISH") );
438     }
439 
440     @Test
441     public void testSHAndSIOAndSIAToX() {
442         assertEquals( "XT", this.getStringEncoder().metaphone("SHOT") );
443         assertEquals( "OTXN", this.getStringEncoder().metaphone("ODSIAN") );
444         assertEquals( "PLXN", this.getStringEncoder().metaphone("PULSION") );
445     }
446 
447     @Test
448     public void testTIOAndTIAToX() {
449         assertEquals( "OX", this.getStringEncoder().metaphone("OTIA") );
450         assertEquals( "PRXN", this.getStringEncoder().metaphone("PORTION") );
451     }
452 
453     @Test
454     public void testTCH() {
455         assertEquals( "RX", this.getStringEncoder().metaphone("RETCH") );
456         assertEquals( "WX", this.getStringEncoder().metaphone("WATCH") );
457     }
458 
459     @Test
460     public void testExceedLength() {
461         // should be AKSKS, but istruncated by Max Code Length
462         assertEquals( "AKSK", this.getStringEncoder().metaphone("AXEAXE") );
463     }
464 
465     @Test
466     public void testSetMaxLengthWithTruncation() {
467         // should be AKSKS, but istruncated by Max Code Length
468         this.getStringEncoder().setMaxCodeLen( 6 );
469         assertEquals( "AKSKSK", this.getStringEncoder().metaphone("AXEAXEAXE") );
470     }
471 
472     public void validateFixture(final String[][] pairs) {
473         if (pairs.length == 0) {
474             fail("Test fixture is empty");
475         }
476         for (int i = 0; i < pairs.length; i++) {
477             if (pairs[i].length != 2) {
478                 fail("Error in test fixture in the data array at index " + i);
479             }
480         }
481     }
482 
483 }