001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertTrue;
022import static org.junit.Assert.fail;
023
024import org.apache.commons.codec.StringEncoderAbstractTest;
025import org.junit.Test;
026
027/**
028 * @version $Id: MetaphoneTest.html 891688 2013-12-24 20:49:46Z ggregory $
029 */
030public class MetaphoneTest extends StringEncoderAbstractTest<Metaphone> {
031
032    public void assertIsMetaphoneEqual(final String source, final String[] matches) {
033        // match source to all matches
034        for (final String matche : matches) {
035            assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
036                       this.getStringEncoder().isMetaphoneEqual(source, matche));
037        }
038        // match to each other
039        for (final String matche : matches) {
040            for (final String matche2 : matches) {
041                assertTrue(this.getStringEncoder().isMetaphoneEqual(matche, matche2));
042            }
043        }
044    }
045
046    public void assertMetaphoneEqual(final String[][] pairs) {
047        this.validateFixture(pairs);
048        for (final String[] pair : pairs) {
049            final String name0 = pair[0];
050            final String name1 = pair[1];
051            final String failMsg = "Expected match between " + name0 + " and " + name1;
052            assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name0, name1));
053            assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name1, name0));
054        }
055    }
056
057    @Override
058    protected Metaphone createStringEncoder() {
059        return new Metaphone();
060    }
061
062    @Test
063    public void testIsMetaphoneEqual1() {
064        this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
065                "CASE", "Case" }, {
066                "caSe", "cAsE" }, {
067                "quick", "cookie" }
068        });
069    }
070
071    /**
072     * Matches computed from http://www.lanw.com/java/phonetic/default.htm
073     */
074    @Test
075    public void testIsMetaphoneEqual2() {
076        this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
077                "Gary", "Cahra" }, });
078    }
079
080    /**
081     * Initial AE case.
082     *
083     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
084     */
085    @Test
086    public void testIsMetaphoneEqualAero() {
087        this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
088    }
089
090    /**
091     * Initial WH case.
092     *
093     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
094     */
095    @Test
096    public void testIsMetaphoneEqualWhite() {
097        this.assertIsMetaphoneEqual(
098            "White",
099            new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
100    }
101
102    /**
103     * Initial A, not followed by an E case.
104     *
105     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
106     */
107    @Test
108    public void testIsMetaphoneEqualAlbert() {
109        this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
110    }
111
112    /**
113     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114     */
115    @Test
116    public void testIsMetaphoneEqualGary() {
117        this.assertIsMetaphoneEqual(
118            "Gary",
119            new String[] {
120                "Cahra",
121                "Cara",
122                "Carey",
123                "Cari",
124                "Caria",
125                "Carie",
126                "Caro",
127                "Carree",
128                "Carri",
129                "Carrie",
130                "Carry",
131                "Cary",
132                "Cora",
133                "Corey",
134                "Cori",
135                "Corie",
136                "Correy",
137                "Corri",
138                "Corrie",
139                "Corry",
140                "Cory",
141                "Gray",
142                "Kara",
143                "Kare",
144                "Karee",
145                "Kari",
146                "Karia",
147                "Karie",
148                "Karrah",
149                "Karrie",
150                "Karry",
151                "Kary",
152                "Keri",
153                "Kerri",
154                "Kerrie",
155                "Kerry",
156                "Kira",
157                "Kiri",
158                "Kora",
159                "Kore",
160                "Kori",
161                "Korie",
162                "Korrie",
163                "Korry" });
164    }
165
166    /**
167     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
168     */
169    @Test
170    public void testIsMetaphoneEqualJohn() {
171        this.assertIsMetaphoneEqual(
172            "John",
173            new String[] {
174                "Gena",
175                "Gene",
176                "Genia",
177                "Genna",
178                "Genni",
179                "Gennie",
180                "Genny",
181                "Giana",
182                "Gianna",
183                "Gina",
184                "Ginni",
185                "Ginnie",
186                "Ginny",
187                "Jaine",
188                "Jan",
189                "Jana",
190                "Jane",
191                "Janey",
192                "Jania",
193                "Janie",
194                "Janna",
195                "Jany",
196                "Jayne",
197                "Jean",
198                "Jeana",
199                "Jeane",
200                "Jeanie",
201                "Jeanna",
202                "Jeanne",
203                "Jeannie",
204                "Jen",
205                "Jena",
206                "Jeni",
207                "Jenn",
208                "Jenna",
209                "Jennee",
210                "Jenni",
211                "Jennie",
212                "Jenny",
213                "Jinny",
214                "Jo Ann",
215                "Jo-Ann",
216                "Jo-Anne",
217                "Joan",
218                "Joana",
219                "Joane",
220                "Joanie",
221                "Joann",
222                "Joanna",
223                "Joanne",
224                "Joeann",
225                "Johna",
226                "Johnna",
227                "Joni",
228                "Jonie",
229                "Juana",
230                "June",
231                "Junia",
232                "Junie" });
233    }
234
235    /**
236     * Initial KN case.
237     *
238     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
239     */
240    @Test
241    public void testIsMetaphoneEqualKnight() {
242        this.assertIsMetaphoneEqual(
243            "Knight",
244            new String[] {
245                "Hynda",
246                "Nada",
247                "Nadia",
248                "Nady",
249                "Nat",
250                "Nata",
251                "Natty",
252                "Neda",
253                "Nedda",
254                "Nedi",
255                "Netta",
256                "Netti",
257                "Nettie",
258                "Netty",
259                "Nita",
260                "Nydia" });
261    }
262    /**
263     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
264     */
265    @Test
266    public void testIsMetaphoneEqualMary() {
267        this.assertIsMetaphoneEqual(
268            "Mary",
269            new String[] {
270                "Mair",
271                "Maire",
272                "Mara",
273                "Mareah",
274                "Mari",
275                "Maria",
276                "Marie",
277                "Mary",
278                "Maura",
279                "Maure",
280                "Meara",
281                "Merrie",
282                "Merry",
283                "Mira",
284                "Moira",
285                "Mora",
286                "Moria",
287                "Moyra",
288                "Muire",
289                "Myra",
290                "Myrah" });
291    }
292
293    /**
294     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
295     */
296    @Test
297    public void testIsMetaphoneEqualParis() {
298        this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
299    }
300
301    /**
302     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303     */
304    @Test
305    public void testIsMetaphoneEqualPeter() {
306        this.assertIsMetaphoneEqual(
307            "Peter",
308            new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
309    }
310
311    /**
312     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
313     */
314    @Test
315    public void testIsMetaphoneEqualRay() {
316        this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
317    }
318
319    /**
320     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321     */
322    @Test
323    public void testIsMetaphoneEqualSusan() {
324        this.assertIsMetaphoneEqual(
325            "Susan",
326            new String[] {
327                "Siusan",
328                "Sosanna",
329                "Susan",
330                "Susana",
331                "Susann",
332                "Susanna",
333                "Susannah",
334                "Susanne",
335                "Suzann",
336                "Suzanna",
337                "Suzanne",
338                "Zuzana" });
339    }
340
341    /**
342     * Initial WR case.
343     *
344     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
345     */
346    @Test
347    public void testIsMetaphoneEqualWright() {
348        this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
349    }
350
351    /**
352     * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353     */
354    @Test
355    public void testIsMetaphoneEqualXalan() {
356        this.assertIsMetaphoneEqual(
357            "Xalan",
358            new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
359    }
360
361    @Test
362    public void testMetaphone() {
363        assertEquals("HL", this.getStringEncoder().metaphone("howl"));
364        assertEquals("TSTN", this.getStringEncoder().metaphone("testing"));
365        assertEquals("0", this.getStringEncoder().metaphone("The"));
366        assertEquals("KK", this.getStringEncoder().metaphone("quick"));
367        assertEquals("BRN", this.getStringEncoder().metaphone("brown"));
368        assertEquals("FKS", this.getStringEncoder().metaphone("fox"));
369        assertEquals("JMPT", this.getStringEncoder().metaphone("jumped"));
370        assertEquals("OFR", this.getStringEncoder().metaphone("over"));
371        assertEquals("0", this.getStringEncoder().metaphone("the"));
372        assertEquals("LS", this.getStringEncoder().metaphone("lazy"));
373        assertEquals("TKS", this.getStringEncoder().metaphone("dogs"));
374    }
375
376    @Test
377    public void testWordEndingInMB() {
378        assertEquals( "KM", this.getStringEncoder().metaphone("COMB") );
379        assertEquals( "TM", this.getStringEncoder().metaphone("TOMB") );
380        assertEquals( "WM", this.getStringEncoder().metaphone("WOMB") );
381    }
382
383    @Test
384    public void testDiscardOfSCEOrSCIOrSCY() {
385        assertEquals( "SNS", this.getStringEncoder().metaphone("SCIENCE") );
386        assertEquals( "SN", this.getStringEncoder().metaphone("SCENE") );
387        assertEquals( "S", this.getStringEncoder().metaphone("SCY") );
388    }
389
390    /**
391     * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
392     */
393    @Test
394    public void testWhy() {
395        // PHP returns "H". The original metaphone returns an empty string.
396        assertEquals("", this.getStringEncoder().metaphone("WHY"));
397    }
398
399    @Test
400    public void testWordsWithCIA() {
401        assertEquals( "XP", this.getStringEncoder().metaphone("CIAPO") );
402    }
403
404    @Test
405    public void testTranslateOfSCHAndCH() {
406        assertEquals( "SKTL", this.getStringEncoder().metaphone("SCHEDULE") );
407        assertEquals( "SKMT", this.getStringEncoder().metaphone("SCHEMATIC") );
408
409        assertEquals( "KRKT", this.getStringEncoder().metaphone("CHARACTER") );
410        assertEquals( "TX", this.getStringEncoder().metaphone("TEACH") );
411    }
412
413    @Test
414    public void testTranslateToJOfDGEOrDGIOrDGY() {
415        assertEquals( "TJ", this.getStringEncoder().metaphone("DODGY") );
416        assertEquals( "TJ", this.getStringEncoder().metaphone("DODGE") );
417        assertEquals( "AJMT", this.getStringEncoder().metaphone("ADGIEMTI") );
418    }
419
420    @Test
421    public void testDiscardOfSilentHAfterG() {
422        assertEquals( "KNT", this.getStringEncoder().metaphone("GHENT") );
423        assertEquals( "B", this.getStringEncoder().metaphone("BAUGH") );
424    }
425
426    @Test
427    public void testDiscardOfSilentGN() {
428        // NOTE: This does not test for silent GN, but for starting with GN
429        assertEquals( "N", this.getStringEncoder().metaphone("GNU") );
430
431        // NOTE: Trying to test for GNED, but expected code does not appear to execute
432        assertEquals( "SNT", this.getStringEncoder().metaphone("SIGNED") );
433    }
434
435    @Test
436    public void testPHTOF() {
437        assertEquals( "FX", this.getStringEncoder().metaphone("PHISH") );
438    }
439
440    @Test
441    public void testSHAndSIOAndSIAToX() {
442        assertEquals( "XT", this.getStringEncoder().metaphone("SHOT") );
443        assertEquals( "OTXN", this.getStringEncoder().metaphone("ODSIAN") );
444        assertEquals( "PLXN", this.getStringEncoder().metaphone("PULSION") );
445    }
446
447    @Test
448    public void testTIOAndTIAToX() {
449        assertEquals( "OX", this.getStringEncoder().metaphone("OTIA") );
450        assertEquals( "PRXN", this.getStringEncoder().metaphone("PORTION") );
451    }
452
453    @Test
454    public void testTCH() {
455        assertEquals( "RX", this.getStringEncoder().metaphone("RETCH") );
456        assertEquals( "WX", this.getStringEncoder().metaphone("WATCH") );
457    }
458
459    @Test
460    public void testExceedLength() {
461        // should be AKSKS, but istruncated by Max Code Length
462        assertEquals( "AKSK", this.getStringEncoder().metaphone("AXEAXE") );
463    }
464
465    @Test
466    public void testSetMaxLengthWithTruncation() {
467        // should be AKSKS, but istruncated by Max Code Length
468        this.getStringEncoder().setMaxCodeLen( 6 );
469        assertEquals( "AKSKSK", this.getStringEncoder().metaphone("AXEAXEAXE") );
470    }
471
472    public void validateFixture(final String[][] pairs) {
473        if (pairs.length == 0) {
474            fail("Test fixture is empty");
475        }
476        for (int i = 0; i < pairs.length; i++) {
477            if (pairs[i].length != 2) {
478                fail("Error in test fixture in the data array at index " + i);
479            }
480        }
481    }
482
483}