001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import static org.junit.Assert.assertEquals;
021    import static org.junit.Assert.assertTrue;
022    import static org.junit.Assert.fail;
023    
024    import org.apache.commons.codec.StringEncoder;
025    import org.apache.commons.codec.StringEncoderAbstractTest;
026    import org.junit.Test;
027    
028    /**
029     * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
030     */
031    public class MetaphoneTest extends StringEncoderAbstractTest {
032    
033        public void assertIsMetaphoneEqual(String source, String[] matches) {
034            // match source to all matches
035            for (String matche : matches) {
036                assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
037                           this.getMetaphone().isMetaphoneEqual(source, matche));
038            }
039            // match to each other
040            for (String matche : matches) {
041                for (String matche2 : matches) {
042                    assertTrue(this.getMetaphone().isMetaphoneEqual(matche, matche2));
043                }
044            }
045        }
046    
047        public void assertMetaphoneEqual(String[][] pairs) {
048            this.validateFixture(pairs);
049            for (String[] pair : pairs) {
050                String name0 = pair[0];
051                String name1 = pair[1];
052                String failMsg = "Expected match between " + name0 + " and " + name1;
053                assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name0, name1));
054                assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name1, name0));
055            }
056        }
057    
058        /**
059         * @return Returns the metaphone.
060         */
061        private Metaphone getMetaphone() {
062            return (Metaphone) this.getStringEncoder();
063        }
064    
065        @Override
066        protected StringEncoder createStringEncoder() {
067            return new Metaphone();
068        }
069    
070        @Test
071        public void testIsMetaphoneEqual1() {
072            this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
073                    "CASE", "Case" }, {
074                    "caSe", "cAsE" }, {
075                    "quick", "cookie" }
076            });
077        }
078    
079        /**
080         * Matches computed from http://www.lanw.com/java/phonetic/default.htm
081         */
082        @Test
083        public void testIsMetaphoneEqual2() {
084            this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
085                    "Gary", "Cahra" }, });
086        }
087    
088        /**
089         * Initial AE case.
090         *
091         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
092         */
093        @Test
094        public void testIsMetaphoneEqualAero() {
095            this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
096        }
097    
098        /**
099         * Initial WH case.
100         *
101         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
102         */
103        @Test
104        public void testIsMetaphoneEqualWhite() {
105            this.assertIsMetaphoneEqual(
106                "White",
107                new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
108        }
109    
110        /**
111         * Initial A, not followed by an E case.
112         *
113         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114         */
115        @Test
116        public void testIsMetaphoneEqualAlbert() {
117            this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
118        }
119    
120        /**
121         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
122         */
123        @Test
124        public void testIsMetaphoneEqualGary() {
125            this.assertIsMetaphoneEqual(
126                "Gary",
127                new String[] {
128                    "Cahra",
129                    "Cara",
130                    "Carey",
131                    "Cari",
132                    "Caria",
133                    "Carie",
134                    "Caro",
135                    "Carree",
136                    "Carri",
137                    "Carrie",
138                    "Carry",
139                    "Cary",
140                    "Cora",
141                    "Corey",
142                    "Cori",
143                    "Corie",
144                    "Correy",
145                    "Corri",
146                    "Corrie",
147                    "Corry",
148                    "Cory",
149                    "Gray",
150                    "Kara",
151                    "Kare",
152                    "Karee",
153                    "Kari",
154                    "Karia",
155                    "Karie",
156                    "Karrah",
157                    "Karrie",
158                    "Karry",
159                    "Kary",
160                    "Keri",
161                    "Kerri",
162                    "Kerrie",
163                    "Kerry",
164                    "Kira",
165                    "Kiri",
166                    "Kora",
167                    "Kore",
168                    "Kori",
169                    "Korie",
170                    "Korrie",
171                    "Korry" });
172        }
173    
174        /**
175         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
176         */
177        @Test
178        public void testIsMetaphoneEqualJohn() {
179            this.assertIsMetaphoneEqual(
180                "John",
181                new String[] {
182                    "Gena",
183                    "Gene",
184                    "Genia",
185                    "Genna",
186                    "Genni",
187                    "Gennie",
188                    "Genny",
189                    "Giana",
190                    "Gianna",
191                    "Gina",
192                    "Ginni",
193                    "Ginnie",
194                    "Ginny",
195                    "Jaine",
196                    "Jan",
197                    "Jana",
198                    "Jane",
199                    "Janey",
200                    "Jania",
201                    "Janie",
202                    "Janna",
203                    "Jany",
204                    "Jayne",
205                    "Jean",
206                    "Jeana",
207                    "Jeane",
208                    "Jeanie",
209                    "Jeanna",
210                    "Jeanne",
211                    "Jeannie",
212                    "Jen",
213                    "Jena",
214                    "Jeni",
215                    "Jenn",
216                    "Jenna",
217                    "Jennee",
218                    "Jenni",
219                    "Jennie",
220                    "Jenny",
221                    "Jinny",
222                    "Jo Ann",
223                    "Jo-Ann",
224                    "Jo-Anne",
225                    "Joan",
226                    "Joana",
227                    "Joane",
228                    "Joanie",
229                    "Joann",
230                    "Joanna",
231                    "Joanne",
232                    "Joeann",
233                    "Johna",
234                    "Johnna",
235                    "Joni",
236                    "Jonie",
237                    "Juana",
238                    "June",
239                    "Junia",
240                    "Junie" });
241        }
242    
243        /**
244         * Initial KN case.
245         *
246         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
247         */
248        @Test
249        public void testIsMetaphoneEqualKnight() {
250            this.assertIsMetaphoneEqual(
251                "Knight",
252                new String[] {
253                    "Hynda",
254                    "Nada",
255                    "Nadia",
256                    "Nady",
257                    "Nat",
258                    "Nata",
259                    "Natty",
260                    "Neda",
261                    "Nedda",
262                    "Nedi",
263                    "Netta",
264                    "Netti",
265                    "Nettie",
266                    "Netty",
267                    "Nita",
268                    "Nydia" });
269        }
270        /**
271         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
272         */
273        @Test
274        public void testIsMetaphoneEqualMary() {
275            this.assertIsMetaphoneEqual(
276                "Mary",
277                new String[] {
278                    "Mair",
279                    "Maire",
280                    "Mara",
281                    "Mareah",
282                    "Mari",
283                    "Maria",
284                    "Marie",
285                    "Mary",
286                    "Maura",
287                    "Maure",
288                    "Meara",
289                    "Merrie",
290                    "Merry",
291                    "Mira",
292                    "Moira",
293                    "Mora",
294                    "Moria",
295                    "Moyra",
296                    "Muire",
297                    "Myra",
298                    "Myrah" });
299        }
300    
301        /**
302         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303         */
304        @Test
305        public void testIsMetaphoneEqualParis() {
306            this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
307        }
308    
309        /**
310         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
311         */
312        @Test
313        public void testIsMetaphoneEqualPeter() {
314            this.assertIsMetaphoneEqual(
315                "Peter",
316                new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
317        }
318    
319        /**
320         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321         */
322        @Test
323        public void testIsMetaphoneEqualRay() {
324            this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
325        }
326    
327        /**
328         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
329         */
330        @Test
331        public void testIsMetaphoneEqualSusan() {
332            this.assertIsMetaphoneEqual(
333                "Susan",
334                new String[] {
335                    "Siusan",
336                    "Sosanna",
337                    "Susan",
338                    "Susana",
339                    "Susann",
340                    "Susanna",
341                    "Susannah",
342                    "Susanne",
343                    "Suzann",
344                    "Suzanna",
345                    "Suzanne",
346                    "Zuzana" });
347        }
348    
349        /**
350         * Initial WR case.
351         *
352         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353         */
354        @Test
355        public void testIsMetaphoneEqualWright() {
356            this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
357        }
358    
359        /**
360         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
361         */
362        @Test
363        public void testIsMetaphoneEqualXalan() {
364            this.assertIsMetaphoneEqual(
365                "Xalan",
366                new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
367        }
368    
369        @Test
370        public void testMetaphone() {
371            assertEquals("HL", this.getMetaphone().metaphone("howl"));
372            assertEquals("TSTN", this.getMetaphone().metaphone("testing"));
373            assertEquals("0", this.getMetaphone().metaphone("The"));
374            assertEquals("KK", this.getMetaphone().metaphone("quick"));
375            assertEquals("BRN", this.getMetaphone().metaphone("brown"));
376            assertEquals("FKS", this.getMetaphone().metaphone("fox"));
377            assertEquals("JMPT", this.getMetaphone().metaphone("jumped"));
378            assertEquals("OFR", this.getMetaphone().metaphone("over"));
379            assertEquals("0", this.getMetaphone().metaphone("the"));
380            assertEquals("LS", this.getMetaphone().metaphone("lazy"));
381            assertEquals("TKS", this.getMetaphone().metaphone("dogs"));
382        }
383    
384        @Test
385        public void testWordEndingInMB() {
386            assertEquals( "KM", this.getMetaphone().metaphone("COMB") );
387            assertEquals( "TM", this.getMetaphone().metaphone("TOMB") );
388            assertEquals( "WM", this.getMetaphone().metaphone("WOMB") );
389        }
390    
391        @Test
392        public void testDiscardOfSCEOrSCIOrSCY() {
393            assertEquals( "SNS", this.getMetaphone().metaphone("SCIENCE") );
394            assertEquals( "SN", this.getMetaphone().metaphone("SCENE") );
395            assertEquals( "S", this.getMetaphone().metaphone("SCY") );
396        }
397    
398        /**
399         * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
400         */
401        @Test
402        public void testWhy() {
403            // PHP returns "H". The original metaphone returns an empty string.
404            assertEquals("", this.getMetaphone().metaphone("WHY"));
405        }
406    
407        @Test
408        public void testWordsWithCIA() {
409            assertEquals( "XP", this.getMetaphone().metaphone("CIAPO") );
410        }
411    
412        @Test
413        public void testTranslateOfSCHAndCH() {
414            assertEquals( "SKTL", this.getMetaphone().metaphone("SCHEDULE") );
415            assertEquals( "SKMT", this.getMetaphone().metaphone("SCHEMATIC") );
416    
417            assertEquals( "KRKT", this.getMetaphone().metaphone("CHARACTER") );
418            assertEquals( "TX", this.getMetaphone().metaphone("TEACH") );
419        }
420    
421        @Test
422        public void testTranslateToJOfDGEOrDGIOrDGY() {
423            assertEquals( "TJ", this.getMetaphone().metaphone("DODGY") );
424            assertEquals( "TJ", this.getMetaphone().metaphone("DODGE") );
425            assertEquals( "AJMT", this.getMetaphone().metaphone("ADGIEMTI") );
426        }
427    
428        @Test
429        public void testDiscardOfSilentHAfterG() {
430            assertEquals( "KNT", this.getMetaphone().metaphone("GHENT") );
431            assertEquals( "B", this.getMetaphone().metaphone("BAUGH") );
432        }
433    
434        @Test
435        public void testDiscardOfSilentGN() {
436            // NOTE: This does not test for silent GN, but for starting with GN
437            assertEquals( "N", this.getMetaphone().metaphone("GNU") );
438    
439            // NOTE: Trying to test for GNED, but expected code does not appear to execute
440            assertEquals( "SNT", this.getMetaphone().metaphone("SIGNED") );
441        }
442    
443        @Test
444        public void testPHTOF() {
445            assertEquals( "FX", this.getMetaphone().metaphone("PHISH") );
446        }
447    
448        @Test
449        public void testSHAndSIOAndSIAToX() {
450            assertEquals( "XT", this.getMetaphone().metaphone("SHOT") );
451            assertEquals( "OTXN", this.getMetaphone().metaphone("ODSIAN") );
452            assertEquals( "PLXN", this.getMetaphone().metaphone("PULSION") );
453        }
454    
455        @Test
456        public void testTIOAndTIAToX() {
457            assertEquals( "OX", this.getMetaphone().metaphone("OTIA") );
458            assertEquals( "PRXN", this.getMetaphone().metaphone("PORTION") );
459        }
460    
461        @Test
462        public void testTCH() {
463            assertEquals( "RX", this.getMetaphone().metaphone("RETCH") );
464            assertEquals( "WX", this.getMetaphone().metaphone("WATCH") );
465        }
466    
467        @Test
468        public void testExceedLength() {
469            // should be AKSKS, but istruncated by Max Code Length
470            assertEquals( "AKSK", this.getMetaphone().metaphone("AXEAXE") );
471        }
472    
473        @Test
474        public void testSetMaxLengthWithTruncation() {
475            // should be AKSKS, but istruncated by Max Code Length
476            this.getMetaphone().setMaxCodeLen( 6 );
477            assertEquals( "AKSKSK", this.getMetaphone().metaphone("AXEAXEAXE") );
478        }
479    
480        public void validateFixture(String[][] pairs) {
481            if (pairs.length == 0) {
482                fail("Test fixture is empty");
483            }
484            for (int i = 0; i < pairs.length; i++) {
485                if (pairs[i].length != 2) {
486                    fail("Error in test fixture in the data array at index " + i);
487                }
488            }
489        }
490    
491    }