001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import static org.junit.Assert.assertEquals;
021    import static org.junit.Assert.assertTrue;
022    import static org.junit.Assert.fail;
023    
024    import org.apache.commons.codec.StringEncoderAbstractTest;
025    import org.junit.Test;
026    
027    /**
028     * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
029     */
030    public class MetaphoneTest extends StringEncoderAbstractTest<Metaphone> {
031    
032        public void assertIsMetaphoneEqual(final String source, final String[] matches) {
033            // match source to all matches
034            for (final String matche : matches) {
035                assertTrue("Source: " + source + ", should have same Metaphone as: " + matche,
036                           this.getStringEncoder().isMetaphoneEqual(source, matche));
037            }
038            // match to each other
039            for (final String matche : matches) {
040                for (final String matche2 : matches) {
041                    assertTrue(this.getStringEncoder().isMetaphoneEqual(matche, matche2));
042                }
043            }
044        }
045    
046        public void assertMetaphoneEqual(final String[][] pairs) {
047            this.validateFixture(pairs);
048            for (final String[] pair : pairs) {
049                final String name0 = pair[0];
050                final String name1 = pair[1];
051                final String failMsg = "Expected match between " + name0 + " and " + name1;
052                assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name0, name1));
053                assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name1, name0));
054            }
055        }
056    
057        @Override
058        protected Metaphone createStringEncoder() {
059            return new Metaphone();
060        }
061    
062        @Test
063        public void testIsMetaphoneEqual1() {
064            this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, {
065                    "CASE", "Case" }, {
066                    "caSe", "cAsE" }, {
067                    "quick", "cookie" }
068            });
069        }
070    
071        /**
072         * Matches computed from http://www.lanw.com/java/phonetic/default.htm
073         */
074        @Test
075        public void testIsMetaphoneEqual2() {
076            this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, {
077                    "Gary", "Cahra" }, });
078        }
079    
080        /**
081         * Initial AE case.
082         *
083         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
084         */
085        @Test
086        public void testIsMetaphoneEqualAero() {
087            this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
088        }
089    
090        /**
091         * Initial WH case.
092         *
093         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
094         */
095        @Test
096        public void testIsMetaphoneEqualWhite() {
097            this.assertIsMetaphoneEqual(
098                "White",
099                new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
100        }
101    
102        /**
103         * Initial A, not followed by an E case.
104         *
105         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
106         */
107        @Test
108        public void testIsMetaphoneEqualAlbert() {
109            this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
110        }
111    
112        /**
113         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
114         */
115        @Test
116        public void testIsMetaphoneEqualGary() {
117            this.assertIsMetaphoneEqual(
118                "Gary",
119                new String[] {
120                    "Cahra",
121                    "Cara",
122                    "Carey",
123                    "Cari",
124                    "Caria",
125                    "Carie",
126                    "Caro",
127                    "Carree",
128                    "Carri",
129                    "Carrie",
130                    "Carry",
131                    "Cary",
132                    "Cora",
133                    "Corey",
134                    "Cori",
135                    "Corie",
136                    "Correy",
137                    "Corri",
138                    "Corrie",
139                    "Corry",
140                    "Cory",
141                    "Gray",
142                    "Kara",
143                    "Kare",
144                    "Karee",
145                    "Kari",
146                    "Karia",
147                    "Karie",
148                    "Karrah",
149                    "Karrie",
150                    "Karry",
151                    "Kary",
152                    "Keri",
153                    "Kerri",
154                    "Kerrie",
155                    "Kerry",
156                    "Kira",
157                    "Kiri",
158                    "Kora",
159                    "Kore",
160                    "Kori",
161                    "Korie",
162                    "Korrie",
163                    "Korry" });
164        }
165    
166        /**
167         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
168         */
169        @Test
170        public void testIsMetaphoneEqualJohn() {
171            this.assertIsMetaphoneEqual(
172                "John",
173                new String[] {
174                    "Gena",
175                    "Gene",
176                    "Genia",
177                    "Genna",
178                    "Genni",
179                    "Gennie",
180                    "Genny",
181                    "Giana",
182                    "Gianna",
183                    "Gina",
184                    "Ginni",
185                    "Ginnie",
186                    "Ginny",
187                    "Jaine",
188                    "Jan",
189                    "Jana",
190                    "Jane",
191                    "Janey",
192                    "Jania",
193                    "Janie",
194                    "Janna",
195                    "Jany",
196                    "Jayne",
197                    "Jean",
198                    "Jeana",
199                    "Jeane",
200                    "Jeanie",
201                    "Jeanna",
202                    "Jeanne",
203                    "Jeannie",
204                    "Jen",
205                    "Jena",
206                    "Jeni",
207                    "Jenn",
208                    "Jenna",
209                    "Jennee",
210                    "Jenni",
211                    "Jennie",
212                    "Jenny",
213                    "Jinny",
214                    "Jo Ann",
215                    "Jo-Ann",
216                    "Jo-Anne",
217                    "Joan",
218                    "Joana",
219                    "Joane",
220                    "Joanie",
221                    "Joann",
222                    "Joanna",
223                    "Joanne",
224                    "Joeann",
225                    "Johna",
226                    "Johnna",
227                    "Joni",
228                    "Jonie",
229                    "Juana",
230                    "June",
231                    "Junia",
232                    "Junie" });
233        }
234    
235        /**
236         * Initial KN case.
237         *
238         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
239         */
240        @Test
241        public void testIsMetaphoneEqualKnight() {
242            this.assertIsMetaphoneEqual(
243                "Knight",
244                new String[] {
245                    "Hynda",
246                    "Nada",
247                    "Nadia",
248                    "Nady",
249                    "Nat",
250                    "Nata",
251                    "Natty",
252                    "Neda",
253                    "Nedda",
254                    "Nedi",
255                    "Netta",
256                    "Netti",
257                    "Nettie",
258                    "Netty",
259                    "Nita",
260                    "Nydia" });
261        }
262        /**
263         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
264         */
265        @Test
266        public void testIsMetaphoneEqualMary() {
267            this.assertIsMetaphoneEqual(
268                "Mary",
269                new String[] {
270                    "Mair",
271                    "Maire",
272                    "Mara",
273                    "Mareah",
274                    "Mari",
275                    "Maria",
276                    "Marie",
277                    "Mary",
278                    "Maura",
279                    "Maure",
280                    "Meara",
281                    "Merrie",
282                    "Merry",
283                    "Mira",
284                    "Moira",
285                    "Mora",
286                    "Moria",
287                    "Moyra",
288                    "Muire",
289                    "Myra",
290                    "Myrah" });
291        }
292    
293        /**
294         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
295         */
296        @Test
297        public void testIsMetaphoneEqualParis() {
298            this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
299        }
300    
301        /**
302         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
303         */
304        @Test
305        public void testIsMetaphoneEqualPeter() {
306            this.assertIsMetaphoneEqual(
307                "Peter",
308                new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
309        }
310    
311        /**
312         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
313         */
314        @Test
315        public void testIsMetaphoneEqualRay() {
316            this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
317        }
318    
319        /**
320         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
321         */
322        @Test
323        public void testIsMetaphoneEqualSusan() {
324            this.assertIsMetaphoneEqual(
325                "Susan",
326                new String[] {
327                    "Siusan",
328                    "Sosanna",
329                    "Susan",
330                    "Susana",
331                    "Susann",
332                    "Susanna",
333                    "Susannah",
334                    "Susanne",
335                    "Suzann",
336                    "Suzanna",
337                    "Suzanne",
338                    "Zuzana" });
339        }
340    
341        /**
342         * Initial WR case.
343         *
344         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
345         */
346        @Test
347        public void testIsMetaphoneEqualWright() {
348            this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
349        }
350    
351        /**
352         * Match data computed from http://www.lanw.com/java/phonetic/default.htm
353         */
354        @Test
355        public void testIsMetaphoneEqualXalan() {
356            this.assertIsMetaphoneEqual(
357                "Xalan",
358                new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
359        }
360    
361        @Test
362        public void testMetaphone() {
363            assertEquals("HL", this.getStringEncoder().metaphone("howl"));
364            assertEquals("TSTN", this.getStringEncoder().metaphone("testing"));
365            assertEquals("0", this.getStringEncoder().metaphone("The"));
366            assertEquals("KK", this.getStringEncoder().metaphone("quick"));
367            assertEquals("BRN", this.getStringEncoder().metaphone("brown"));
368            assertEquals("FKS", this.getStringEncoder().metaphone("fox"));
369            assertEquals("JMPT", this.getStringEncoder().metaphone("jumped"));
370            assertEquals("OFR", this.getStringEncoder().metaphone("over"));
371            assertEquals("0", this.getStringEncoder().metaphone("the"));
372            assertEquals("LS", this.getStringEncoder().metaphone("lazy"));
373            assertEquals("TKS", this.getStringEncoder().metaphone("dogs"));
374        }
375    
376        @Test
377        public void testWordEndingInMB() {
378            assertEquals( "KM", this.getStringEncoder().metaphone("COMB") );
379            assertEquals( "TM", this.getStringEncoder().metaphone("TOMB") );
380            assertEquals( "WM", this.getStringEncoder().metaphone("WOMB") );
381        }
382    
383        @Test
384        public void testDiscardOfSCEOrSCIOrSCY() {
385            assertEquals( "SNS", this.getStringEncoder().metaphone("SCIENCE") );
386            assertEquals( "SN", this.getStringEncoder().metaphone("SCENE") );
387            assertEquals( "S", this.getStringEncoder().metaphone("SCY") );
388        }
389    
390        /**
391         * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
392         */
393        @Test
394        public void testWhy() {
395            // PHP returns "H". The original metaphone returns an empty string.
396            assertEquals("", this.getStringEncoder().metaphone("WHY"));
397        }
398    
399        @Test
400        public void testWordsWithCIA() {
401            assertEquals( "XP", this.getStringEncoder().metaphone("CIAPO") );
402        }
403    
404        @Test
405        public void testTranslateOfSCHAndCH() {
406            assertEquals( "SKTL", this.getStringEncoder().metaphone("SCHEDULE") );
407            assertEquals( "SKMT", this.getStringEncoder().metaphone("SCHEMATIC") );
408    
409            assertEquals( "KRKT", this.getStringEncoder().metaphone("CHARACTER") );
410            assertEquals( "TX", this.getStringEncoder().metaphone("TEACH") );
411        }
412    
413        @Test
414        public void testTranslateToJOfDGEOrDGIOrDGY() {
415            assertEquals( "TJ", this.getStringEncoder().metaphone("DODGY") );
416            assertEquals( "TJ", this.getStringEncoder().metaphone("DODGE") );
417            assertEquals( "AJMT", this.getStringEncoder().metaphone("ADGIEMTI") );
418        }
419    
420        @Test
421        public void testDiscardOfSilentHAfterG() {
422            assertEquals( "KNT", this.getStringEncoder().metaphone("GHENT") );
423            assertEquals( "B", this.getStringEncoder().metaphone("BAUGH") );
424        }
425    
426        @Test
427        public void testDiscardOfSilentGN() {
428            // NOTE: This does not test for silent GN, but for starting with GN
429            assertEquals( "N", this.getStringEncoder().metaphone("GNU") );
430    
431            // NOTE: Trying to test for GNED, but expected code does not appear to execute
432            assertEquals( "SNT", this.getStringEncoder().metaphone("SIGNED") );
433        }
434    
435        @Test
436        public void testPHTOF() {
437            assertEquals( "FX", this.getStringEncoder().metaphone("PHISH") );
438        }
439    
440        @Test
441        public void testSHAndSIOAndSIAToX() {
442            assertEquals( "XT", this.getStringEncoder().metaphone("SHOT") );
443            assertEquals( "OTXN", this.getStringEncoder().metaphone("ODSIAN") );
444            assertEquals( "PLXN", this.getStringEncoder().metaphone("PULSION") );
445        }
446    
447        @Test
448        public void testTIOAndTIAToX() {
449            assertEquals( "OX", this.getStringEncoder().metaphone("OTIA") );
450            assertEquals( "PRXN", this.getStringEncoder().metaphone("PORTION") );
451        }
452    
453        @Test
454        public void testTCH() {
455            assertEquals( "RX", this.getStringEncoder().metaphone("RETCH") );
456            assertEquals( "WX", this.getStringEncoder().metaphone("WATCH") );
457        }
458    
459        @Test
460        public void testExceedLength() {
461            // should be AKSKS, but istruncated by Max Code Length
462            assertEquals( "AKSK", this.getStringEncoder().metaphone("AXEAXE") );
463        }
464    
465        @Test
466        public void testSetMaxLengthWithTruncation() {
467            // should be AKSKS, but istruncated by Max Code Length
468            this.getStringEncoder().setMaxCodeLen( 6 );
469            assertEquals( "AKSKSK", this.getStringEncoder().metaphone("AXEAXEAXE") );
470        }
471    
472        public void validateFixture(final String[][] pairs) {
473            if (pairs.length == 0) {
474                fail("Test fixture is empty");
475            }
476            for (int i = 0; i < pairs.length; i++) {
477                if (pairs[i].length != 2) {
478                    fail("Error in test fixture in the data array at index " + i);
479                }
480            }
481        }
482    
483    }