001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.language; 019 020 import static org.junit.Assert.assertEquals; 021 import static org.junit.Assert.assertTrue; 022 import static org.junit.Assert.fail; 023 024 import org.apache.commons.codec.StringEncoder; 025 import org.apache.commons.codec.StringEncoderAbstractTest; 026 import org.junit.Test; 027 028 /** 029 * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $ 030 */ 031 public class MetaphoneTest extends StringEncoderAbstractTest { 032 033 public void assertIsMetaphoneEqual(String source, String[] matches) { 034 // match source to all matches 035 for (String matche : matches) { 036 assertTrue("Source: " + source + ", should have same Metaphone as: " + matche, 037 this.getMetaphone().isMetaphoneEqual(source, matche)); 038 } 039 // match to each other 040 for (String matche : matches) { 041 for (String matche2 : matches) { 042 assertTrue(this.getMetaphone().isMetaphoneEqual(matche, matche2)); 043 } 044 } 045 } 046 047 public void assertMetaphoneEqual(String[][] pairs) { 048 this.validateFixture(pairs); 049 for (String[] pair : pairs) { 050 String name0 = pair[0]; 051 String name1 = pair[1]; 052 String failMsg = "Expected match between " + name0 + " and " + name1; 053 assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name0, name1)); 054 assertTrue(failMsg, this.getMetaphone().isMetaphoneEqual(name1, name0)); 055 } 056 } 057 058 /** 059 * @return Returns the metaphone. 060 */ 061 private Metaphone getMetaphone() { 062 return (Metaphone) this.getStringEncoder(); 063 } 064 065 @Override 066 protected StringEncoder createStringEncoder() { 067 return new Metaphone(); 068 } 069 070 @Test 071 public void testIsMetaphoneEqual1() { 072 this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, { 073 "CASE", "Case" }, { 074 "caSe", "cAsE" }, { 075 "quick", "cookie" } 076 }); 077 } 078 079 /** 080 * Matches computed from http://www.lanw.com/java/phonetic/default.htm 081 */ 082 @Test 083 public void testIsMetaphoneEqual2() { 084 this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, { 085 "Gary", "Cahra" }, }); 086 } 087 088 /** 089 * Initial AE case. 090 * 091 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 092 */ 093 @Test 094 public void testIsMetaphoneEqualAero() { 095 this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" }); 096 } 097 098 /** 099 * Initial WH case. 100 * 101 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 102 */ 103 @Test 104 public void testIsMetaphoneEqualWhite() { 105 this.assertIsMetaphoneEqual( 106 "White", 107 new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" }); 108 } 109 110 /** 111 * Initial A, not followed by an E case. 112 * 113 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 114 */ 115 @Test 116 public void testIsMetaphoneEqualAlbert() { 117 this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" }); 118 } 119 120 /** 121 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 122 */ 123 @Test 124 public void testIsMetaphoneEqualGary() { 125 this.assertIsMetaphoneEqual( 126 "Gary", 127 new String[] { 128 "Cahra", 129 "Cara", 130 "Carey", 131 "Cari", 132 "Caria", 133 "Carie", 134 "Caro", 135 "Carree", 136 "Carri", 137 "Carrie", 138 "Carry", 139 "Cary", 140 "Cora", 141 "Corey", 142 "Cori", 143 "Corie", 144 "Correy", 145 "Corri", 146 "Corrie", 147 "Corry", 148 "Cory", 149 "Gray", 150 "Kara", 151 "Kare", 152 "Karee", 153 "Kari", 154 "Karia", 155 "Karie", 156 "Karrah", 157 "Karrie", 158 "Karry", 159 "Kary", 160 "Keri", 161 "Kerri", 162 "Kerrie", 163 "Kerry", 164 "Kira", 165 "Kiri", 166 "Kora", 167 "Kore", 168 "Kori", 169 "Korie", 170 "Korrie", 171 "Korry" }); 172 } 173 174 /** 175 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 176 */ 177 @Test 178 public void testIsMetaphoneEqualJohn() { 179 this.assertIsMetaphoneEqual( 180 "John", 181 new String[] { 182 "Gena", 183 "Gene", 184 "Genia", 185 "Genna", 186 "Genni", 187 "Gennie", 188 "Genny", 189 "Giana", 190 "Gianna", 191 "Gina", 192 "Ginni", 193 "Ginnie", 194 "Ginny", 195 "Jaine", 196 "Jan", 197 "Jana", 198 "Jane", 199 "Janey", 200 "Jania", 201 "Janie", 202 "Janna", 203 "Jany", 204 "Jayne", 205 "Jean", 206 "Jeana", 207 "Jeane", 208 "Jeanie", 209 "Jeanna", 210 "Jeanne", 211 "Jeannie", 212 "Jen", 213 "Jena", 214 "Jeni", 215 "Jenn", 216 "Jenna", 217 "Jennee", 218 "Jenni", 219 "Jennie", 220 "Jenny", 221 "Jinny", 222 "Jo Ann", 223 "Jo-Ann", 224 "Jo-Anne", 225 "Joan", 226 "Joana", 227 "Joane", 228 "Joanie", 229 "Joann", 230 "Joanna", 231 "Joanne", 232 "Joeann", 233 "Johna", 234 "Johnna", 235 "Joni", 236 "Jonie", 237 "Juana", 238 "June", 239 "Junia", 240 "Junie" }); 241 } 242 243 /** 244 * Initial KN case. 245 * 246 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 247 */ 248 @Test 249 public void testIsMetaphoneEqualKnight() { 250 this.assertIsMetaphoneEqual( 251 "Knight", 252 new String[] { 253 "Hynda", 254 "Nada", 255 "Nadia", 256 "Nady", 257 "Nat", 258 "Nata", 259 "Natty", 260 "Neda", 261 "Nedda", 262 "Nedi", 263 "Netta", 264 "Netti", 265 "Nettie", 266 "Netty", 267 "Nita", 268 "Nydia" }); 269 } 270 /** 271 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 272 */ 273 @Test 274 public void testIsMetaphoneEqualMary() { 275 this.assertIsMetaphoneEqual( 276 "Mary", 277 new String[] { 278 "Mair", 279 "Maire", 280 "Mara", 281 "Mareah", 282 "Mari", 283 "Maria", 284 "Marie", 285 "Mary", 286 "Maura", 287 "Maure", 288 "Meara", 289 "Merrie", 290 "Merry", 291 "Mira", 292 "Moira", 293 "Mora", 294 "Moria", 295 "Moyra", 296 "Muire", 297 "Myra", 298 "Myrah" }); 299 } 300 301 /** 302 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 303 */ 304 @Test 305 public void testIsMetaphoneEqualParis() { 306 this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" }); 307 } 308 309 /** 310 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 311 */ 312 @Test 313 public void testIsMetaphoneEqualPeter() { 314 this.assertIsMetaphoneEqual( 315 "Peter", 316 new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" }); 317 } 318 319 /** 320 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 321 */ 322 @Test 323 public void testIsMetaphoneEqualRay() { 324 this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" }); 325 } 326 327 /** 328 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 329 */ 330 @Test 331 public void testIsMetaphoneEqualSusan() { 332 this.assertIsMetaphoneEqual( 333 "Susan", 334 new String[] { 335 "Siusan", 336 "Sosanna", 337 "Susan", 338 "Susana", 339 "Susann", 340 "Susanna", 341 "Susannah", 342 "Susanne", 343 "Suzann", 344 "Suzanna", 345 "Suzanne", 346 "Zuzana" }); 347 } 348 349 /** 350 * Initial WR case. 351 * 352 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 353 */ 354 @Test 355 public void testIsMetaphoneEqualWright() { 356 this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" }); 357 } 358 359 /** 360 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 361 */ 362 @Test 363 public void testIsMetaphoneEqualXalan() { 364 this.assertIsMetaphoneEqual( 365 "Xalan", 366 new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" }); 367 } 368 369 @Test 370 public void testMetaphone() { 371 assertEquals("HL", this.getMetaphone().metaphone("howl")); 372 assertEquals("TSTN", this.getMetaphone().metaphone("testing")); 373 assertEquals("0", this.getMetaphone().metaphone("The")); 374 assertEquals("KK", this.getMetaphone().metaphone("quick")); 375 assertEquals("BRN", this.getMetaphone().metaphone("brown")); 376 assertEquals("FKS", this.getMetaphone().metaphone("fox")); 377 assertEquals("JMPT", this.getMetaphone().metaphone("jumped")); 378 assertEquals("OFR", this.getMetaphone().metaphone("over")); 379 assertEquals("0", this.getMetaphone().metaphone("the")); 380 assertEquals("LS", this.getMetaphone().metaphone("lazy")); 381 assertEquals("TKS", this.getMetaphone().metaphone("dogs")); 382 } 383 384 @Test 385 public void testWordEndingInMB() { 386 assertEquals( "KM", this.getMetaphone().metaphone("COMB") ); 387 assertEquals( "TM", this.getMetaphone().metaphone("TOMB") ); 388 assertEquals( "WM", this.getMetaphone().metaphone("WOMB") ); 389 } 390 391 @Test 392 public void testDiscardOfSCEOrSCIOrSCY() { 393 assertEquals( "SNS", this.getMetaphone().metaphone("SCIENCE") ); 394 assertEquals( "SN", this.getMetaphone().metaphone("SCENE") ); 395 assertEquals( "S", this.getMetaphone().metaphone("SCY") ); 396 } 397 398 /** 399 * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why" 400 */ 401 @Test 402 public void testWhy() { 403 // PHP returns "H". The original metaphone returns an empty string. 404 assertEquals("", this.getMetaphone().metaphone("WHY")); 405 } 406 407 @Test 408 public void testWordsWithCIA() { 409 assertEquals( "XP", this.getMetaphone().metaphone("CIAPO") ); 410 } 411 412 @Test 413 public void testTranslateOfSCHAndCH() { 414 assertEquals( "SKTL", this.getMetaphone().metaphone("SCHEDULE") ); 415 assertEquals( "SKMT", this.getMetaphone().metaphone("SCHEMATIC") ); 416 417 assertEquals( "KRKT", this.getMetaphone().metaphone("CHARACTER") ); 418 assertEquals( "TX", this.getMetaphone().metaphone("TEACH") ); 419 } 420 421 @Test 422 public void testTranslateToJOfDGEOrDGIOrDGY() { 423 assertEquals( "TJ", this.getMetaphone().metaphone("DODGY") ); 424 assertEquals( "TJ", this.getMetaphone().metaphone("DODGE") ); 425 assertEquals( "AJMT", this.getMetaphone().metaphone("ADGIEMTI") ); 426 } 427 428 @Test 429 public void testDiscardOfSilentHAfterG() { 430 assertEquals( "KNT", this.getMetaphone().metaphone("GHENT") ); 431 assertEquals( "B", this.getMetaphone().metaphone("BAUGH") ); 432 } 433 434 @Test 435 public void testDiscardOfSilentGN() { 436 // NOTE: This does not test for silent GN, but for starting with GN 437 assertEquals( "N", this.getMetaphone().metaphone("GNU") ); 438 439 // NOTE: Trying to test for GNED, but expected code does not appear to execute 440 assertEquals( "SNT", this.getMetaphone().metaphone("SIGNED") ); 441 } 442 443 @Test 444 public void testPHTOF() { 445 assertEquals( "FX", this.getMetaphone().metaphone("PHISH") ); 446 } 447 448 @Test 449 public void testSHAndSIOAndSIAToX() { 450 assertEquals( "XT", this.getMetaphone().metaphone("SHOT") ); 451 assertEquals( "OTXN", this.getMetaphone().metaphone("ODSIAN") ); 452 assertEquals( "PLXN", this.getMetaphone().metaphone("PULSION") ); 453 } 454 455 @Test 456 public void testTIOAndTIAToX() { 457 assertEquals( "OX", this.getMetaphone().metaphone("OTIA") ); 458 assertEquals( "PRXN", this.getMetaphone().metaphone("PORTION") ); 459 } 460 461 @Test 462 public void testTCH() { 463 assertEquals( "RX", this.getMetaphone().metaphone("RETCH") ); 464 assertEquals( "WX", this.getMetaphone().metaphone("WATCH") ); 465 } 466 467 @Test 468 public void testExceedLength() { 469 // should be AKSKS, but istruncated by Max Code Length 470 assertEquals( "AKSK", this.getMetaphone().metaphone("AXEAXE") ); 471 } 472 473 @Test 474 public void testSetMaxLengthWithTruncation() { 475 // should be AKSKS, but istruncated by Max Code Length 476 this.getMetaphone().setMaxCodeLen( 6 ); 477 assertEquals( "AKSKSK", this.getMetaphone().metaphone("AXEAXEAXE") ); 478 } 479 480 public void validateFixture(String[][] pairs) { 481 if (pairs.length == 0) { 482 fail("Test fixture is empty"); 483 } 484 for (int i = 0; i < pairs.length; i++) { 485 if (pairs[i].length != 2) { 486 fail("Error in test fixture in the data array at index " + i); 487 } 488 } 489 } 490 491 }