View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertTrue;
22  import static org.junit.jupiter.api.Assertions.fail;
23  
24  import org.apache.commons.codec.AbstractStringEncoderTest;
25  import org.junit.jupiter.api.Test;
26  
27  /**
28   */
29  public class MetaphoneTest extends AbstractStringEncoderTest<Metaphone> {
30  
31      public void assertIsMetaphoneEqual(final String source, final String[] matches) {
32          // match source to all matches
33          for (final String matche : matches) {
34              assertTrue(this.getStringEncoder().isMetaphoneEqual(source, matche), "Source: " + source + ", should have same Metaphone as: " + matche);
35          }
36          // match to each other
37          for (final String matche : matches) {
38              for (final String matche2 : matches) {
39                  assertTrue(this.getStringEncoder().isMetaphoneEqual(matche, matche2));
40              }
41          }
42      }
43  
44      public void assertMetaphoneEqual(final String[][] pairs) {
45          this.validateFixture(pairs);
46          for (final String[] pair : pairs) {
47              final String name0 = pair[0];
48              final String name1 = pair[1];
49              final String failMsg = "Expected match between " + name0 + " and " + name1;
50              assertTrue(this.getStringEncoder().isMetaphoneEqual(name0, name1), failMsg);
51              assertTrue(this.getStringEncoder().isMetaphoneEqual(name1, name0), failMsg);
52          }
53      }
54  
55      @Override
56      protected Metaphone createStringEncoder() {
57          return new Metaphone();
58      }
59  
60      @Test
61      public void testDiscardOfSCEOrSCIOrSCY() {
62          assertEquals("SNS", this.getStringEncoder().metaphone("SCIENCE"));
63          assertEquals("SN", this.getStringEncoder().metaphone("SCENE"));
64          assertEquals("S", this.getStringEncoder().metaphone("SCY"));
65      }
66  
67      @Test
68      public void testDiscardOfSilentGN() {
69          // NOTE: This does not test for silent GN, but for starting with GN
70          assertEquals("N", this.getStringEncoder().metaphone("GNU"));
71  
72          // NOTE: Trying to test for GNED, but expected code does not appear to execute
73          assertEquals("SNT", this.getStringEncoder().metaphone("SIGNED"));
74      }
75  
76      @Test
77      public void testDiscardOfSilentHAfterG() {
78          assertEquals("KNT", this.getStringEncoder().metaphone("GHENT"));
79          assertEquals("B", this.getStringEncoder().metaphone("BAUGH"));
80      }
81  
82      @Test
83      public void testExceedLength() {
84          // should be AKSKS, but is truncated by Max Code Length
85          assertEquals("AKSK", this.getStringEncoder().metaphone("AXEAXE"));
86      }
87  
88      @Test
89      public void testIsMetaphoneEqual1() {
90          this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, { "CASE", "Case" }, { "caSe", "cAsE" }, { "quick", "cookie" } });
91      }
92  
93      /**
94       * Matches computed from http://www.lanw.com/java/phonetic/default.htm
95       */
96      @Test
97      public void testIsMetaphoneEqual2() {
98          this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, { "Gary", "Cahra" }, });
99      }
100 
101     /**
102      * Initial AE case.
103      *
104      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
105      */
106     @Test
107     public void testIsMetaphoneEqualAero() {
108         this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" });
109     }
110 
111     /**
112      * Initial A, not followed by an E case.
113      *
114      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
115      */
116     @Test
117     public void testIsMetaphoneEqualAlbert() {
118         this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" });
119     }
120 
121     /**
122      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
123      */
124     @Test
125     public void testIsMetaphoneEqualGary() {
126         this.assertIsMetaphoneEqual("Gary",
127                 new String[] { "Cahra", "Cara", "Carey", "Cari", "Caria", "Carie", "Caro", "Carree", "Carri", "Carrie", "Carry", "Cary", "Cora", "Corey",
128                         "Cori", "Corie", "Correy", "Corri", "Corrie", "Corry", "Cory", "Gray", "Kara", "Kare", "Karee", "Kari", "Karia", "Karie", "Karrah",
129                         "Karrie", "Karry", "Kary", "Keri", "Kerri", "Kerrie", "Kerry", "Kira", "Kiri", "Kora", "Kore", "Kori", "Korie", "Korrie", "Korry" });
130     }
131 
132     /**
133      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
134      */
135     @Test
136     public void testIsMetaphoneEqualJohn() {
137         this.assertIsMetaphoneEqual("John",
138                 new String[] { "Gena", "Gene", "Genia", "Genna", "Genni", "Gennie", "Genny", "Giana", "Gianna", "Gina", "Ginni", "Ginnie", "Ginny", "Jaine",
139                         "Jan", "Jana", "Jane", "Janey", "Jania", "Janie", "Janna", "Jany", "Jayne", "Jean", "Jeana", "Jeane", "Jeanie", "Jeanna", "Jeanne",
140                         "Jeannie", "Jen", "Jena", "Jeni", "Jenn", "Jenna", "Jennee", "Jenni", "Jennie", "Jenny", "Jinny", "Jo Ann", "Jo-Ann", "Jo-Anne", "Joan",
141                         "Joana", "Joane", "Joanie", "Joann", "Joanna", "Joanne", "Joeann", "Johna", "Johnna", "Joni", "Jonie", "Juana", "June", "Junia",
142                         "Junie" });
143     }
144 
145     /**
146      * Initial KN case.
147      *
148      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
149      */
150     @Test
151     public void testIsMetaphoneEqualKnight() {
152         this.assertIsMetaphoneEqual("Knight", new String[] { "Hynda", "Nada", "Nadia", "Nady", "Nat", "Nata", "Natty", "Neda", "Nedda", "Nedi", "Netta",
153                 "Netti", "Nettie", "Netty", "Nita", "Nydia" });
154     }
155 
156     /**
157      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
158      */
159     @Test
160     public void testIsMetaphoneEqualMary() {
161         this.assertIsMetaphoneEqual("Mary", new String[] { "Mair", "Maire", "Mara", "Mareah", "Mari", "Maria", "Marie", "Mary", "Maura", "Maure", "Meara",
162                 "Merrie", "Merry", "Mira", "Moira", "Mora", "Moria", "Moyra", "Muire", "Myra", "Myrah" });
163     }
164 
165     /**
166      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
167      */
168     @Test
169     public void testIsMetaphoneEqualParis() {
170         this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" });
171     }
172 
173     /**
174      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
175      */
176     @Test
177     public void testIsMetaphoneEqualPeter() {
178         this.assertIsMetaphoneEqual("Peter", new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" });
179     }
180 
181     /**
182      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
183      */
184     @Test
185     public void testIsMetaphoneEqualRay() {
186         this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" });
187     }
188 
189     /**
190      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
191      */
192     @Test
193     public void testIsMetaphoneEqualSusan() {
194         this.assertIsMetaphoneEqual("Susan",
195                 new String[] { "Siusan", "Sosanna", "Susan", "Susana", "Susann", "Susanna", "Susannah", "Susanne", "Suzann", "Suzanna", "Suzanne", "Zuzana" });
196     }
197 
198     /**
199      * Initial WH case.
200      *
201      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
202      */
203     @Test
204     public void testIsMetaphoneEqualWhite() {
205         this.assertIsMetaphoneEqual("White",
206                 new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" });
207     }
208 
209     /**
210      * Initial WR case.
211      *
212      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
213      */
214     @Test
215     public void testIsMetaphoneEqualWright() {
216         this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" });
217     }
218 
219     /**
220      * Match data computed from http://www.lanw.com/java/phonetic/default.htm
221      */
222     @Test
223     public void testIsMetaphoneEqualXalan() {
224         this.assertIsMetaphoneEqual("Xalan", new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" });
225     }
226 
227     @Test
228     public void testMetaphone() {
229         assertEquals("HL", this.getStringEncoder().metaphone("howl"));
230         assertEquals("TSTN", this.getStringEncoder().metaphone("testing"));
231         assertEquals("0", this.getStringEncoder().metaphone("The"));
232         assertEquals("KK", this.getStringEncoder().metaphone("quick"));
233         assertEquals("BRN", this.getStringEncoder().metaphone("brown"));
234         assertEquals("FKS", this.getStringEncoder().metaphone("fox"));
235         assertEquals("JMPT", this.getStringEncoder().metaphone("jumped"));
236         assertEquals("OFR", this.getStringEncoder().metaphone("over"));
237         assertEquals("0", this.getStringEncoder().metaphone("the"));
238         assertEquals("LS", this.getStringEncoder().metaphone("lazy"));
239         assertEquals("TKS", this.getStringEncoder().metaphone("dogs"));
240     }
241 
242     @Test
243     public void testPHTOF() {
244         assertEquals("FX", this.getStringEncoder().metaphone("PHISH"));
245     }
246 
247     @Test
248     public void testSetMaxLengthWithTruncation() {
249         // should be AKSKS, but istruncated by Max Code Length
250         this.getStringEncoder().setMaxCodeLen(6);
251         assertEquals("AKSKSK", this.getStringEncoder().metaphone("AXEAXEAXE"));
252     }
253 
254     @Test
255     public void testSHAndSIOAndSIAToX() {
256         assertEquals("XT", this.getStringEncoder().metaphone("SHOT"));
257         assertEquals("OTXN", this.getStringEncoder().metaphone("ODSIAN"));
258         assertEquals("PLXN", this.getStringEncoder().metaphone("PULSION"));
259     }
260 
261     @Test
262     public void testTCH() {
263         assertEquals("RX", this.getStringEncoder().metaphone("RETCH"));
264         assertEquals("WX", this.getStringEncoder().metaphone("WATCH"));
265     }
266 
267     @Test
268     public void testTIOAndTIAToX() {
269         assertEquals("OX", this.getStringEncoder().metaphone("OTIA"));
270         assertEquals("PRXN", this.getStringEncoder().metaphone("PORTION"));
271     }
272 
273     @Test
274     public void testTranslateOfSCHAndCH() {
275         assertEquals("SKTL", this.getStringEncoder().metaphone("SCHEDULE"));
276         assertEquals("SKMT", this.getStringEncoder().metaphone("SCHEMATIC"));
277 
278         assertEquals("KRKT", this.getStringEncoder().metaphone("CHARACTER"));
279         assertEquals("TX", this.getStringEncoder().metaphone("TEACH"));
280     }
281 
282     @Test
283     public void testTranslateToJOfDGEOrDGIOrDGY() {
284         assertEquals("TJ", this.getStringEncoder().metaphone("DODGY"));
285         assertEquals("TJ", this.getStringEncoder().metaphone("DODGE"));
286         assertEquals("AJMT", this.getStringEncoder().metaphone("ADGIEMTI"));
287     }
288 
289     /**
290      * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why"
291      */
292     @Test
293     public void testWhy() {
294         // PHP returns "H". The original metaphone returns an empty string.
295         assertEquals("", this.getStringEncoder().metaphone("WHY"));
296     }
297 
298     @Test
299     public void testWordEndingInMB() {
300         assertEquals("KM", this.getStringEncoder().metaphone("COMB"));
301         assertEquals("TM", this.getStringEncoder().metaphone("TOMB"));
302         assertEquals("WM", this.getStringEncoder().metaphone("WOMB"));
303     }
304 
305     @Test
306     public void testWordsWithCIA() {
307         assertEquals("XP", this.getStringEncoder().metaphone("CIAPO"));
308     }
309 
310     public void validateFixture(final String[][] pairs) {
311         if (pairs.length == 0) {
312             fail("Test fixture is empty");
313         }
314         for (int i = 0; i < pairs.length; i++) {
315             if (pairs[i].length != 2) {
316                 fail("Error in test fixture in the data array at index " + i);
317             }
318         }
319     }
320 
321 }