001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.language; 019 020 import static org.junit.Assert.assertEquals; 021 import static org.junit.Assert.assertTrue; 022 import static org.junit.Assert.fail; 023 024 import org.apache.commons.codec.StringEncoderAbstractTest; 025 import org.junit.Test; 026 027 /** 028 * @version $Id: MetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $ 029 */ 030 public class MetaphoneTest extends StringEncoderAbstractTest<Metaphone> { 031 032 public void assertIsMetaphoneEqual(final String source, final String[] matches) { 033 // match source to all matches 034 for (final String matche : matches) { 035 assertTrue("Source: " + source + ", should have same Metaphone as: " + matche, 036 this.getStringEncoder().isMetaphoneEqual(source, matche)); 037 } 038 // match to each other 039 for (final String matche : matches) { 040 for (final String matche2 : matches) { 041 assertTrue(this.getStringEncoder().isMetaphoneEqual(matche, matche2)); 042 } 043 } 044 } 045 046 public void assertMetaphoneEqual(final String[][] pairs) { 047 this.validateFixture(pairs); 048 for (final String[] pair : pairs) { 049 final String name0 = pair[0]; 050 final String name1 = pair[1]; 051 final String failMsg = "Expected match between " + name0 + " and " + name1; 052 assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name0, name1)); 053 assertTrue(failMsg, this.getStringEncoder().isMetaphoneEqual(name1, name0)); 054 } 055 } 056 057 @Override 058 protected Metaphone createStringEncoder() { 059 return new Metaphone(); 060 } 061 062 @Test 063 public void testIsMetaphoneEqual1() { 064 this.assertMetaphoneEqual(new String[][] { { "Case", "case" }, { 065 "CASE", "Case" }, { 066 "caSe", "cAsE" }, { 067 "quick", "cookie" } 068 }); 069 } 070 071 /** 072 * Matches computed from http://www.lanw.com/java/phonetic/default.htm 073 */ 074 @Test 075 public void testIsMetaphoneEqual2() { 076 this.assertMetaphoneEqual(new String[][] { { "Lawrence", "Lorenza" }, { 077 "Gary", "Cahra" }, }); 078 } 079 080 /** 081 * Initial AE case. 082 * 083 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 084 */ 085 @Test 086 public void testIsMetaphoneEqualAero() { 087 this.assertIsMetaphoneEqual("Aero", new String[] { "Eure" }); 088 } 089 090 /** 091 * Initial WH case. 092 * 093 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 094 */ 095 @Test 096 public void testIsMetaphoneEqualWhite() { 097 this.assertIsMetaphoneEqual( 098 "White", 099 new String[] { "Wade", "Wait", "Waite", "Wat", "Whit", "Wiatt", "Wit", "Wittie", "Witty", "Wood", "Woodie", "Woody" }); 100 } 101 102 /** 103 * Initial A, not followed by an E case. 104 * 105 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 106 */ 107 @Test 108 public void testIsMetaphoneEqualAlbert() { 109 this.assertIsMetaphoneEqual("Albert", new String[] { "Ailbert", "Alberik", "Albert", "Alberto", "Albrecht" }); 110 } 111 112 /** 113 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 114 */ 115 @Test 116 public void testIsMetaphoneEqualGary() { 117 this.assertIsMetaphoneEqual( 118 "Gary", 119 new String[] { 120 "Cahra", 121 "Cara", 122 "Carey", 123 "Cari", 124 "Caria", 125 "Carie", 126 "Caro", 127 "Carree", 128 "Carri", 129 "Carrie", 130 "Carry", 131 "Cary", 132 "Cora", 133 "Corey", 134 "Cori", 135 "Corie", 136 "Correy", 137 "Corri", 138 "Corrie", 139 "Corry", 140 "Cory", 141 "Gray", 142 "Kara", 143 "Kare", 144 "Karee", 145 "Kari", 146 "Karia", 147 "Karie", 148 "Karrah", 149 "Karrie", 150 "Karry", 151 "Kary", 152 "Keri", 153 "Kerri", 154 "Kerrie", 155 "Kerry", 156 "Kira", 157 "Kiri", 158 "Kora", 159 "Kore", 160 "Kori", 161 "Korie", 162 "Korrie", 163 "Korry" }); 164 } 165 166 /** 167 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 168 */ 169 @Test 170 public void testIsMetaphoneEqualJohn() { 171 this.assertIsMetaphoneEqual( 172 "John", 173 new String[] { 174 "Gena", 175 "Gene", 176 "Genia", 177 "Genna", 178 "Genni", 179 "Gennie", 180 "Genny", 181 "Giana", 182 "Gianna", 183 "Gina", 184 "Ginni", 185 "Ginnie", 186 "Ginny", 187 "Jaine", 188 "Jan", 189 "Jana", 190 "Jane", 191 "Janey", 192 "Jania", 193 "Janie", 194 "Janna", 195 "Jany", 196 "Jayne", 197 "Jean", 198 "Jeana", 199 "Jeane", 200 "Jeanie", 201 "Jeanna", 202 "Jeanne", 203 "Jeannie", 204 "Jen", 205 "Jena", 206 "Jeni", 207 "Jenn", 208 "Jenna", 209 "Jennee", 210 "Jenni", 211 "Jennie", 212 "Jenny", 213 "Jinny", 214 "Jo Ann", 215 "Jo-Ann", 216 "Jo-Anne", 217 "Joan", 218 "Joana", 219 "Joane", 220 "Joanie", 221 "Joann", 222 "Joanna", 223 "Joanne", 224 "Joeann", 225 "Johna", 226 "Johnna", 227 "Joni", 228 "Jonie", 229 "Juana", 230 "June", 231 "Junia", 232 "Junie" }); 233 } 234 235 /** 236 * Initial KN case. 237 * 238 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 239 */ 240 @Test 241 public void testIsMetaphoneEqualKnight() { 242 this.assertIsMetaphoneEqual( 243 "Knight", 244 new String[] { 245 "Hynda", 246 "Nada", 247 "Nadia", 248 "Nady", 249 "Nat", 250 "Nata", 251 "Natty", 252 "Neda", 253 "Nedda", 254 "Nedi", 255 "Netta", 256 "Netti", 257 "Nettie", 258 "Netty", 259 "Nita", 260 "Nydia" }); 261 } 262 /** 263 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 264 */ 265 @Test 266 public void testIsMetaphoneEqualMary() { 267 this.assertIsMetaphoneEqual( 268 "Mary", 269 new String[] { 270 "Mair", 271 "Maire", 272 "Mara", 273 "Mareah", 274 "Mari", 275 "Maria", 276 "Marie", 277 "Mary", 278 "Maura", 279 "Maure", 280 "Meara", 281 "Merrie", 282 "Merry", 283 "Mira", 284 "Moira", 285 "Mora", 286 "Moria", 287 "Moyra", 288 "Muire", 289 "Myra", 290 "Myrah" }); 291 } 292 293 /** 294 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 295 */ 296 @Test 297 public void testIsMetaphoneEqualParis() { 298 this.assertIsMetaphoneEqual("Paris", new String[] { "Pearcy", "Perris", "Piercy", "Pierz", "Pryse" }); 299 } 300 301 /** 302 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 303 */ 304 @Test 305 public void testIsMetaphoneEqualPeter() { 306 this.assertIsMetaphoneEqual( 307 "Peter", 308 new String[] { "Peadar", "Peder", "Pedro", "Peter", "Petr", "Peyter", "Pieter", "Pietro", "Piotr" }); 309 } 310 311 /** 312 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 313 */ 314 @Test 315 public void testIsMetaphoneEqualRay() { 316 this.assertIsMetaphoneEqual("Ray", new String[] { "Ray", "Rey", "Roi", "Roy", "Ruy" }); 317 } 318 319 /** 320 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 321 */ 322 @Test 323 public void testIsMetaphoneEqualSusan() { 324 this.assertIsMetaphoneEqual( 325 "Susan", 326 new String[] { 327 "Siusan", 328 "Sosanna", 329 "Susan", 330 "Susana", 331 "Susann", 332 "Susanna", 333 "Susannah", 334 "Susanne", 335 "Suzann", 336 "Suzanna", 337 "Suzanne", 338 "Zuzana" }); 339 } 340 341 /** 342 * Initial WR case. 343 * 344 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 345 */ 346 @Test 347 public void testIsMetaphoneEqualWright() { 348 this.assertIsMetaphoneEqual("Wright", new String[] { "Rota", "Rudd", "Ryde" }); 349 } 350 351 /** 352 * Match data computed from http://www.lanw.com/java/phonetic/default.htm 353 */ 354 @Test 355 public void testIsMetaphoneEqualXalan() { 356 this.assertIsMetaphoneEqual( 357 "Xalan", 358 new String[] { "Celene", "Celina", "Celine", "Selena", "Selene", "Selina", "Seline", "Suellen", "Xylina" }); 359 } 360 361 @Test 362 public void testMetaphone() { 363 assertEquals("HL", this.getStringEncoder().metaphone("howl")); 364 assertEquals("TSTN", this.getStringEncoder().metaphone("testing")); 365 assertEquals("0", this.getStringEncoder().metaphone("The")); 366 assertEquals("KK", this.getStringEncoder().metaphone("quick")); 367 assertEquals("BRN", this.getStringEncoder().metaphone("brown")); 368 assertEquals("FKS", this.getStringEncoder().metaphone("fox")); 369 assertEquals("JMPT", this.getStringEncoder().metaphone("jumped")); 370 assertEquals("OFR", this.getStringEncoder().metaphone("over")); 371 assertEquals("0", this.getStringEncoder().metaphone("the")); 372 assertEquals("LS", this.getStringEncoder().metaphone("lazy")); 373 assertEquals("TKS", this.getStringEncoder().metaphone("dogs")); 374 } 375 376 @Test 377 public void testWordEndingInMB() { 378 assertEquals( "KM", this.getStringEncoder().metaphone("COMB") ); 379 assertEquals( "TM", this.getStringEncoder().metaphone("TOMB") ); 380 assertEquals( "WM", this.getStringEncoder().metaphone("WOMB") ); 381 } 382 383 @Test 384 public void testDiscardOfSCEOrSCIOrSCY() { 385 assertEquals( "SNS", this.getStringEncoder().metaphone("SCIENCE") ); 386 assertEquals( "SN", this.getStringEncoder().metaphone("SCENE") ); 387 assertEquals( "S", this.getStringEncoder().metaphone("SCY") ); 388 } 389 390 /** 391 * Tests (CODEC-57) Metaphone.metaphone(String) returns an empty string when passed the word "why" 392 */ 393 @Test 394 public void testWhy() { 395 // PHP returns "H". The original metaphone returns an empty string. 396 assertEquals("", this.getStringEncoder().metaphone("WHY")); 397 } 398 399 @Test 400 public void testWordsWithCIA() { 401 assertEquals( "XP", this.getStringEncoder().metaphone("CIAPO") ); 402 } 403 404 @Test 405 public void testTranslateOfSCHAndCH() { 406 assertEquals( "SKTL", this.getStringEncoder().metaphone("SCHEDULE") ); 407 assertEquals( "SKMT", this.getStringEncoder().metaphone("SCHEMATIC") ); 408 409 assertEquals( "KRKT", this.getStringEncoder().metaphone("CHARACTER") ); 410 assertEquals( "TX", this.getStringEncoder().metaphone("TEACH") ); 411 } 412 413 @Test 414 public void testTranslateToJOfDGEOrDGIOrDGY() { 415 assertEquals( "TJ", this.getStringEncoder().metaphone("DODGY") ); 416 assertEquals( "TJ", this.getStringEncoder().metaphone("DODGE") ); 417 assertEquals( "AJMT", this.getStringEncoder().metaphone("ADGIEMTI") ); 418 } 419 420 @Test 421 public void testDiscardOfSilentHAfterG() { 422 assertEquals( "KNT", this.getStringEncoder().metaphone("GHENT") ); 423 assertEquals( "B", this.getStringEncoder().metaphone("BAUGH") ); 424 } 425 426 @Test 427 public void testDiscardOfSilentGN() { 428 // NOTE: This does not test for silent GN, but for starting with GN 429 assertEquals( "N", this.getStringEncoder().metaphone("GNU") ); 430 431 // NOTE: Trying to test for GNED, but expected code does not appear to execute 432 assertEquals( "SNT", this.getStringEncoder().metaphone("SIGNED") ); 433 } 434 435 @Test 436 public void testPHTOF() { 437 assertEquals( "FX", this.getStringEncoder().metaphone("PHISH") ); 438 } 439 440 @Test 441 public void testSHAndSIOAndSIAToX() { 442 assertEquals( "XT", this.getStringEncoder().metaphone("SHOT") ); 443 assertEquals( "OTXN", this.getStringEncoder().metaphone("ODSIAN") ); 444 assertEquals( "PLXN", this.getStringEncoder().metaphone("PULSION") ); 445 } 446 447 @Test 448 public void testTIOAndTIAToX() { 449 assertEquals( "OX", this.getStringEncoder().metaphone("OTIA") ); 450 assertEquals( "PRXN", this.getStringEncoder().metaphone("PORTION") ); 451 } 452 453 @Test 454 public void testTCH() { 455 assertEquals( "RX", this.getStringEncoder().metaphone("RETCH") ); 456 assertEquals( "WX", this.getStringEncoder().metaphone("WATCH") ); 457 } 458 459 @Test 460 public void testExceedLength() { 461 // should be AKSKS, but istruncated by Max Code Length 462 assertEquals( "AKSK", this.getStringEncoder().metaphone("AXEAXE") ); 463 } 464 465 @Test 466 public void testSetMaxLengthWithTruncation() { 467 // should be AKSKS, but istruncated by Max Code Length 468 this.getStringEncoder().setMaxCodeLen( 6 ); 469 assertEquals( "AKSKSK", this.getStringEncoder().metaphone("AXEAXEAXE") ); 470 } 471 472 public void validateFixture(final String[][] pairs) { 473 if (pairs.length == 0) { 474 fail("Test fixture is empty"); 475 } 476 for (int i = 0; i < pairs.length; i++) { 477 if (pairs[i].length != 2) { 478 fail("Error in test fixture in the data array at index " + i); 479 } 480 } 481 } 482 483 }