001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.language; 019 020 import static org.junit.Assert.assertEquals; 021 import static org.junit.Assert.assertFalse; 022 import static org.junit.Assert.assertTrue; 023 import static org.junit.Assert.fail; 024 025 import org.apache.commons.codec.EncoderException; 026 import org.apache.commons.codec.StringEncoder; 027 import org.apache.commons.codec.StringEncoderAbstractTest; 028 import org.junit.Test; 029 030 /** 031 * Tests {@link DoubleMetaphone}. 032 * 033 * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p> 034 * 035 * @see "http://www.cuj.com/documents/s=8038/cuj0006philips/" 036 * @version $Id: DoubleMetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $ 037 */ 038 public class DoubleMetaphoneTest extends StringEncoderAbstractTest { 039 040 /** 041 * Test data from http://aspell.sourceforge.net/test/batch0.tab. 042 * 043 * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying 044 * and distribution of this entire article is permitted in any medium, 045 * provided this notice is preserved." 046 * 047 * Massaged the test data in the array below. 048 */ 049 private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, { 050 "Ciculer", "Circler" }, { 051 "Circue", "Circle" }, { 052 "Maddness", "Madness" }, { 053 "Occusionaly", "Occasionally" }, { 054 "Steffen", "Stephen" }, { 055 "Thw", "The" }, { 056 "Unformanlly", "Unfortunately" }, { 057 "Unfortally", "Unfortunately" }, { 058 "abilitey", "ability" }, { 059 "abouy", "about" }, { 060 "absorbtion", "absorption" }, { 061 "accidently", "accidentally" }, { 062 "accomodate", "accommodate" }, { 063 "acommadate", "accommodate" }, { 064 "acord", "accord" }, { 065 "adultry", "adultery" }, { 066 "aggresive", "aggressive" }, { 067 "alchohol", "alcohol" }, { 068 "alchoholic", "alcoholic" }, { 069 "allieve", "alive" }, { 070 "alot", "a lot" }, { 071 "alright", "all right" }, { 072 "amature", "amateur" }, { 073 "ambivilant", "ambivalent" }, { 074 "amification", "amplification" }, { 075 "amourfous", "amorphous" }, { 076 "annoint", "anoint" }, { 077 "annonsment", "announcement" }, { 078 "annoyting", "anting" }, { 079 "annuncio", "announce" }, { 080 "anonomy", "anatomy" }, { 081 "anotomy", "anatomy" }, { 082 "antidesestablishmentarianism", "antidisestablishmentarianism" }, { 083 "antidisestablishmentarism", "antidisestablishmentarianism" }, { 084 "anynomous", "anonymous" }, { 085 "appelet", "applet" }, { 086 "appreceiated", "appreciated" }, { 087 "appresteate", "appreciate" }, { 088 "aquantance", "acquaintance" }, { 089 "aratictature", "architecture" }, { 090 "archeype", "archetype" }, { 091 "aricticure", "architecture" }, { 092 "artic", "arctic" }, { 093 "asentote", "asymptote" }, { 094 "ast", "at" }, { 095 "asterick", "asterisk" }, { 096 "asymetric", "asymmetric" }, { 097 "atentively", "attentively" }, { 098 "autoamlly", "automatically" }, { 099 "bankrot", "bankrupt" }, { 100 "basicly", "basically" }, { 101 "batallion", "battalion" }, { 102 "bbrose", "browse" }, { 103 "beauro", "bureau" }, { 104 "beaurocracy", "bureaucracy" }, { 105 "beggining", "beginning" }, { 106 "beging", "beginning" }, { 107 "behaviour", "behavior" }, { 108 "beleive", "believe" }, { 109 "belive", "believe" }, { 110 "benidifs", "benefits" }, { 111 "bigginging", "beginning" }, { 112 "blait", "bleat" }, { 113 "bouyant", "buoyant" }, { 114 "boygot", "boycott" }, { 115 "brocolli", "broccoli" }, { 116 "buch", "bush" }, { 117 "buder", "butter" }, { 118 "budr", "butter" }, { 119 "budter", "butter" }, { 120 "buracracy", "bureaucracy" }, { 121 "burracracy", "bureaucracy" }, { 122 "buton", "button" }, { 123 "byby", "by by" }, { 124 "cauler", "caller" }, { 125 "ceasar", "caesar" }, { 126 "cemetary", "cemetery" }, { 127 "changeing", "changing" }, { 128 "cheet", "cheat" }, { 129 "cicle", "circle" }, { 130 "cimplicity", "simplicity" }, { 131 "circumstaces", "circumstances" }, { 132 "clob", "club" }, { 133 "coaln", "colon" }, { 134 "cocamena", "cockamamie" }, { 135 "colleaque", "colleague" }, { 136 "colloquilism", "colloquialism" }, { 137 "columne", "column" }, { 138 "comiler", "compiler" }, { 139 "comitmment", "commitment" }, { 140 "comitte", "committee" }, { 141 "comittmen", "commitment" }, { 142 "comittmend", "commitment" }, { 143 "commerciasl", "commercials" }, { 144 "commited", "committed" }, { 145 "commitee", "committee" }, { 146 "companys", "companies" }, { 147 "compicated", "complicated" }, { 148 "comupter", "computer" }, { 149 "concensus", "consensus" }, { 150 "confusionism", "confucianism" }, { 151 "congradulations", "congratulations" }, { 152 "conibation", "contribution" }, { 153 "consident", "consistent" }, { 154 "consident", "consonant" }, { 155 "contast", "constant" }, { 156 "contastant", "constant" }, { 157 "contunie", "continue" }, { 158 "cooly", "coolly" }, { 159 "copping", "coping" }, { 160 "cosmoplyton", "cosmopolitan" }, { 161 "courst", "court" }, { 162 "crasy", "crazy" }, { 163 "cravets", "caveats" }, { 164 "credetability", "credibility" }, { 165 "criqitue", "critique" }, { 166 "croke", "croak" }, { 167 "crucifiction", "crucifixion" }, { 168 "crusifed", "crucified" }, { 169 "ctitique", "critique" }, { 170 "cumba", "combo" }, { 171 "custamisation", "customization" }, { 172 "dag", "dog" }, { 173 "daly", "daily" }, { 174 "danguages", "dangerous" }, { 175 "deaft", "draft" }, { 176 "defence", "defense" }, { 177 "defenly", "defiantly" }, { 178 "definate", "definite" }, { 179 "definately", "definitely" }, { 180 "dependeble", "dependable" }, { 181 "descrption", "description" }, { 182 "descrptn", "description" }, { 183 "desparate", "desperate" }, { 184 "dessicate", "desiccate" }, { 185 "destint", "distant" }, { 186 "develepment", "developments" }, { 187 "developement", "development" }, { 188 "develpond", "development" }, { 189 "devulge", "divulge" }, { 190 "diagree", "disagree" }, { 191 "dieties", "deities" }, { 192 "dinasaur", "dinosaur" }, { 193 "dinasour", "dinosaur" }, { 194 "direcyly", "directly" }, { 195 "discuess", "discuss" }, { 196 "disect", "dissect" }, { 197 "disippate", "dissipate" }, { 198 "disition", "decision" }, { 199 "dispair", "despair" }, { 200 "disssicion", "discussion" }, { 201 "distarct", "distract" }, { 202 "distart", "distort" }, { 203 "distroy", "destroy" }, { 204 "documtations", "documentation" }, { 205 "doenload", "download" }, { 206 "dongle", "dangle" }, { 207 "doog", "dog" }, { 208 "dramaticly", "dramatically" }, { 209 "drunkeness", "drunkenness" }, { 210 "ductioneery", "dictionary" }, { 211 "dur", "due" }, { 212 "duren", "during" }, { 213 "dymatic", "dynamic" }, { 214 "dynaic", "dynamic" }, { 215 "ecstacy", "ecstasy" }, { 216 "efficat", "efficient" }, { 217 "efficity", "efficacy" }, { 218 "effots", "efforts" }, { 219 "egsistence", "existence" }, { 220 "eitiology", "etiology" }, { 221 "elagent", "elegant" }, { 222 "elligit", "elegant" }, { 223 "embarass", "embarrass" }, { 224 "embarassment", "embarrassment" }, { 225 "embaress", "embarrass" }, { 226 "encapsualtion", "encapsulation" }, { 227 "encyclapidia", "encyclopedia" }, { 228 "encyclopia", "encyclopedia" }, { 229 "engins", "engine" }, { 230 "enhence", "enhance" }, { 231 "enligtment", "Enlightenment" }, { 232 "ennuui", "ennui" }, { 233 "enought", "enough" }, { 234 "enventions", "inventions" }, { 235 "envireminakl", "environmental" }, { 236 "enviroment", "environment" }, { 237 "epitomy", "epitome" }, { 238 "equire", "acquire" }, { 239 "errara", "error" }, { 240 "erro", "error" }, { 241 "evaualtion", "evaluation" }, { 242 "evething", "everything" }, { 243 "evtually", "eventually" }, { 244 "excede", "exceed" }, { 245 "excercise", "exercise" }, { 246 "excpt", "except" }, { 247 "excution", "execution" }, { 248 "exhileration", "exhilaration" }, { 249 "existance", "existence" }, { 250 "expleyly", "explicitly" }, { 251 "explity", "explicitly" }, { 252 "expresso", "espresso" }, { 253 "exspidient", "expedient" }, { 254 "extions", "extensions" }, { 255 "factontion", "factorization" }, { 256 "failer", "failure" }, { 257 "famdasy", "fantasy" }, { 258 "faver", "favor" }, { 259 "faxe", "fax" }, { 260 "febuary", "february" }, { 261 "firey", "fiery" }, { 262 "fistival", "festival" }, { 263 "flatterring", "flattering" }, { 264 "fluk", "flux" }, { 265 "flukse", "flux" }, { 266 "fone", "phone" }, { 267 "forsee", "foresee" }, { 268 "frustartaion", "frustrating" }, { 269 "fuction", "function" }, { 270 "funetik", "phonetic" }, { 271 "futs", "guts" }, { 272 "gamne", "came" }, { 273 "gaurd", "guard" }, { 274 "generly", "generally" }, { 275 "ghandi", "gandhi" }, { 276 "goberment", "government" }, { 277 "gobernement", "government" }, { 278 "gobernment", "government" }, { 279 "gotton", "gotten" }, { 280 "gracefull", "graceful" }, { 281 "gradualy", "gradually" }, { 282 "grammer", "grammar" }, { 283 "hallo", "hello" }, { 284 "hapily", "happily" }, { 285 "harrass", "harass" }, { 286 "havne", "have" }, { 287 "heellp", "help" }, { 288 "heighth", "height" }, { 289 "hellp", "help" }, { 290 "helo", "hello" }, { 291 "herlo", "hello" }, { 292 "hifin", "hyphen" }, { 293 "hifine", "hyphen" }, { 294 "higer", "higher" }, { 295 "hiphine", "hyphen" }, { 296 "hippie", "hippy" }, { 297 "hippopotamous", "hippopotamus" }, { 298 "hlp", "help" }, { 299 "hourse", "horse" }, { 300 "houssing", "housing" }, { 301 "howaver", "however" }, { 302 "howver", "however" }, { 303 "humaniti", "humanity" }, { 304 "hyfin", "hyphen" }, { 305 "hypotathes", "hypothesis" }, { 306 "hypotathese", "hypothesis" }, { 307 "hystrical", "hysterical" }, { 308 "ident", "indent" }, { 309 "illegitament", "illegitimate" }, { 310 "imbed", "embed" }, { 311 "imediaetly", "immediately" }, { 312 "imfamy", "infamy" }, { 313 "immenant", "immanent" }, { 314 "implemtes", "implements" }, { 315 "inadvertant", "inadvertent" }, { 316 "incase", "in case" }, { 317 "incedious", "insidious" }, { 318 "incompleet", "incomplete" }, { 319 "incomplot", "incomplete" }, { 320 "inconvenant", "inconvenient" }, { 321 "inconvience", "inconvenience" }, { 322 "independant", "independent" }, { 323 "independenent", "independent" }, { 324 "indepnends", "independent" }, { 325 "indepth", "in depth" }, { 326 "indispensible", "indispensable" }, { 327 "inefficite", "inefficient" }, { 328 "inerface", "interface" }, { 329 "infact", "in fact" }, { 330 "influencial", "influential" }, { 331 "inital", "initial" }, { 332 "initinized", "initialized" }, { 333 "initized", "initialized" }, { 334 "innoculate", "inoculate" }, { 335 "insistant", "insistent" }, { 336 "insistenet", "insistent" }, { 337 "instulation", "installation" }, { 338 "intealignt", "intelligent" }, { 339 "intejilent", "intelligent" }, { 340 "intelegent", "intelligent" }, { 341 "intelegnent", "intelligent" }, { 342 "intelejent", "intelligent" }, { 343 "inteligent", "intelligent" }, { 344 "intelignt", "intelligent" }, { 345 "intellagant", "intelligent" }, { 346 "intellegent", "intelligent" }, { 347 "intellegint", "intelligent" }, { 348 "intellgnt", "intelligent" }, { 349 "intensionality", "intensionally" }, { 350 "interate", "iterate" }, { 351 "internation", "international" }, { 352 "interpretate", "interpret" }, { 353 "interpretter", "interpreter" }, { 354 "intertes", "interested" }, { 355 "intertesd", "interested" }, { 356 "invermeantial", "environmental" }, { 357 "irregardless", "regardless" }, { 358 "irresistable", "irresistible" }, { 359 "irritible", "irritable" }, { 360 "islams", "muslims" }, { 361 "isotrop", "isotope" }, { 362 "isreal", "israel" }, { 363 "johhn", "john" }, { 364 "judgement", "judgment" }, { 365 "kippur", "kipper" }, { 366 "knawing", "knowing" }, { 367 "latext", "latest" }, { 368 "leasve", "leave" }, { 369 "lesure", "leisure" }, { 370 "liasion", "lesion" }, { 371 "liason", "liaison" }, { 372 "libary", "library" }, { 373 "likly", "likely" }, { 374 "lilometer", "kilometer" }, { 375 "liquify", "liquefy" }, { 376 "lloyer", "layer" }, { 377 "lossing", "losing" }, { 378 "luser", "laser" }, { 379 "maintanence", "maintenance" }, { 380 "majaerly", "majority" }, { 381 "majoraly", "majority" }, { 382 "maks", "masks" }, { 383 "mandelbrot", "Mandelbrot" }, { 384 "mant", "want" }, { 385 "marshall", "marshal" }, { 386 "maxium", "maximum" }, { 387 "meory", "memory" }, { 388 "metter", "better" }, { 389 "mic", "mike" }, { 390 "midia", "media" }, { 391 "millenium", "millennium" }, { 392 "miniscule", "minuscule" }, { 393 "minkay", "monkey" }, { 394 "minum", "minimum" }, { 395 "mischievious", "mischievous" }, { 396 "misilous", "miscellaneous" }, { 397 "momento", "memento" }, { 398 "monkay", "monkey" }, { 399 "mosaik", "mosaic" }, { 400 "mostlikely", "most likely" }, { 401 "mousr", "mouser" }, { 402 "mroe", "more" }, { 403 "neccessary", "necessary" }, { 404 "necesary", "necessary" }, { 405 "necesser", "necessary" }, { 406 "neice", "niece" }, { 407 "neighbour", "neighbor" }, { 408 "nemonic", "pneumonic" }, { 409 "nevade", "Nevada" }, { 410 "nickleodeon", "nickelodeon" }, { 411 "nieve", "naive" }, { 412 "noone", "no one" }, { 413 "noticably", "noticeably" }, { 414 "notin", "not in" }, { 415 "nozled", "nuzzled" }, { 416 "objectsion", "objects" }, { 417 "obsfuscate", "obfuscate" }, { 418 "ocassion", "occasion" }, { 419 "occuppied", "occupied" }, { 420 "occurence", "occurrence" }, { 421 "octagenarian", "octogenarian" }, { 422 "olf", "old" }, { 423 "opposim", "opossum" }, { 424 "organise", "organize" }, { 425 "organiz", "organize" }, { 426 "orientate", "orient" }, { 427 "oscilascope", "oscilloscope" }, { 428 "oving", "moving" }, { 429 "paramers", "parameters" }, { 430 "parametic", "parameter" }, { 431 "paranets", "parameters" }, { 432 "partrucal", "particular" }, { 433 "pataphysical", "metaphysical" }, { 434 "patten", "pattern" }, { 435 "permissable", "permissible" }, { 436 "permition", "permission" }, { 437 "permmasivie", "permissive" }, { 438 "perogative", "prerogative" }, { 439 "persue", "pursue" }, { 440 "phantasia", "fantasia" }, { 441 "phenominal", "phenomenal" }, { 442 "picaresque", "picturesque" }, { 443 "playwrite", "playwright" }, { 444 "poeses", "poesies" }, { 445 "polation", "politician" }, { 446 "poligamy", "polygamy" }, { 447 "politict", "politic" }, { 448 "pollice", "police" }, { 449 "polypropalene", "polypropylene" }, { 450 "pompom", "pompon" }, { 451 "possable", "possible" }, { 452 "practicle", "practical" }, { 453 "pragmaticism", "pragmatism" }, { 454 "preceeding", "preceding" }, { 455 "precion", "precision" }, { 456 "precios", "precision" }, { 457 "preemptory", "peremptory" }, { 458 "prefices", "prefixes" }, { 459 "prefixt", "prefixed" }, { 460 "presbyterian", "Presbyterian" }, { 461 "presue", "pursue" }, { 462 "presued", "pursued" }, { 463 "privielage", "privilege" }, { 464 "priviledge", "privilege" }, { 465 "proceedures", "procedures" }, { 466 "pronensiation", "pronunciation" }, { 467 "pronisation", "pronunciation" }, { 468 "pronounciation", "pronunciation" }, { 469 "properally", "properly" }, { 470 "proplematic", "problematic" }, { 471 "protray", "portray" }, { 472 "pscolgst", "psychologist" }, { 473 "psicolagest", "psychologist" }, { 474 "psycolagest", "psychologist" }, { 475 "quoz", "quiz" }, { 476 "radious", "radius" }, { 477 "ramplily", "rampantly" }, { 478 "reccomend", "recommend" }, { 479 "reccona", "raccoon" }, { 480 "recieve", "receive" }, { 481 "reconise", "recognize" }, { 482 "rectangeles", "rectangle" }, { 483 "redign", "redesign" }, { 484 "reoccurring", "recurring" }, { 485 "repitition", "repetition" }, { 486 "replasments", "replacement" }, { 487 "reposable", "responsible" }, { 488 "reseblence", "resemblance" }, { 489 "respct", "respect" }, { 490 "respecally", "respectfully" }, { 491 "roon", "room" }, { 492 "rought", "roughly" }, { 493 "rsx", "RSX" }, { 494 "rudemtry", "rudimentary" }, { 495 "runnung", "running" }, { 496 "sacreligious", "sacrilegious" }, { 497 "saftly", "safely" }, { 498 "salut", "salute" }, { 499 "satifly", "satisfy" }, { 500 "scrabdle", "scrabble" }, { 501 "searcheable", "searchable" }, { 502 "secion", "section" }, { 503 "seferal", "several" }, { 504 "segements", "segments" }, { 505 "sence", "sense" }, { 506 "seperate", "separate" }, { 507 "sherbert", "sherbet" }, { 508 "sicolagest", "psychologist" }, { 509 "sieze", "seize" }, { 510 "simpfilty", "simplicity" }, { 511 "simplye", "simply" }, { 512 "singal", "signal" }, { 513 "sitte", "site" }, { 514 "situration", "situation" }, { 515 "slyph", "sylph" }, { 516 "smil", "smile" }, { 517 "snuck", "sneaked" }, { 518 "sometmes", "sometimes" }, { 519 "soonec", "sonic" }, { 520 "specificialy", "specifically" }, { 521 "spel", "spell" }, { 522 "spoak", "spoke" }, { 523 "sponsered", "sponsored" }, { 524 "stering", "steering" }, { 525 "straightjacket", "straitjacket" }, { 526 "stumach", "stomach" }, { 527 "stutent", "student" }, { 528 "styleguide", "style guide" }, { 529 "subisitions", "substitutions" }, { 530 "subjecribed", "subscribed" }, { 531 "subpena", "subpoena" }, { 532 "substations", "substitutions" }, { 533 "suger", "sugar" }, { 534 "supercede", "supersede" }, { 535 "superfulous", "superfluous" }, { 536 "susan", "Susan" }, { 537 "swimwear", "swim wear" }, { 538 "syncorization", "synchronization" }, { 539 "taff", "tough" }, { 540 "taht", "that" }, { 541 "tattos", "tattoos" }, { 542 "techniquely", "technically" }, { 543 "teh", "the" }, { 544 "tem", "team" }, { 545 "teo", "two" }, { 546 "teridical", "theoretical" }, { 547 "tesst", "test" }, { 548 "tets", "tests" }, { 549 "thanot", "than or" }, { 550 "theirselves", "themselves" }, { 551 "theridically", "theoretical" }, { 552 "thredically", "theoretically" }, { 553 "thruout", "throughout" }, { 554 "ths", "this" }, { 555 "titalate", "titillate" }, { 556 "tobagan", "tobaggon" }, { 557 "tommorrow", "tomorrow" }, { 558 "tomorow", "tomorrow" }, { 559 "tradegy", "tragedy" }, { 560 "trubbel", "trouble" }, { 561 "ttest", "test" }, { 562 "tunnellike", "tunnel like" }, { 563 "tured", "turned" }, { 564 "tyrrany", "tyranny" }, { 565 "unatourral", "unnatural" }, { 566 "unaturral", "unnatural" }, { 567 "unconisitional", "unconstitutional" }, { 568 "unconscience", "unconscious" }, { 569 "underladder", "under ladder" }, { 570 "unentelegible", "unintelligible" }, { 571 "unfortunently", "unfortunately" }, { 572 "unnaturral", "unnatural" }, { 573 "upcast", "up cast" }, { 574 "upmost", "utmost" }, { 575 "uranisium", "uranium" }, { 576 "verison", "version" }, { 577 "vinagarette", "vinaigrette" }, { 578 "volumptuous", "voluptuous" }, { 579 "volunteerism", "voluntarism" }, { 580 "volye", "volley" }, { 581 "wadting", "wasting" }, { 582 "waite", "wait" }, { 583 "wan't", "won't" }, { 584 "warloord", "warlord" }, { 585 "whaaat", "what" }, { 586 "whard", "ward" }, { 587 "whimp", "wimp" }, { 588 "wicken", "weaken" }, { 589 "wierd", "weird" }, { 590 "wrank", "rank" }, { 591 "writeen", "righten" }, { 592 "writting", "writing" }, { 593 "wundeews", "windows" }, { 594 "yeild", "yield" }, { 595 "youe", "your" } 596 }; 597 598 /** 599 * A subset of FIXTURE generated by this test. 600 */ 601 private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, { 602 "Maddness", "Madness" }, { 603 "Occusionaly", "Occasionally" }, { 604 "Steffen", "Stephen" }, { 605 "Thw", "The" }, { 606 "Unformanlly", "Unfortunately" }, { 607 "Unfortally", "Unfortunately" }, { 608 "abilitey", "ability" }, { 609 "absorbtion", "absorption" }, { 610 "accidently", "accidentally" }, { 611 "accomodate", "accommodate" }, { 612 "acommadate", "accommodate" }, { 613 "acord", "accord" }, { 614 "adultry", "adultery" }, { 615 "aggresive", "aggressive" }, { 616 "alchohol", "alcohol" }, { 617 "alchoholic", "alcoholic" }, { 618 "allieve", "alive" }, { 619 "alot", "a lot" }, { 620 "alright", "all right" }, { 621 "amature", "amateur" }, { 622 "ambivilant", "ambivalent" }, { 623 "amourfous", "amorphous" }, { 624 "annoint", "anoint" }, { 625 "annonsment", "announcement" }, { 626 "annoyting", "anting" }, { 627 "annuncio", "announce" }, { 628 "anotomy", "anatomy" }, { 629 "antidesestablishmentarianism", "antidisestablishmentarianism" }, { 630 "antidisestablishmentarism", "antidisestablishmentarianism" }, { 631 "anynomous", "anonymous" }, { 632 "appelet", "applet" }, { 633 "appreceiated", "appreciated" }, { 634 "appresteate", "appreciate" }, { 635 "aquantance", "acquaintance" }, { 636 "aricticure", "architecture" }, { 637 "asterick", "asterisk" }, { 638 "asymetric", "asymmetric" }, { 639 "atentively", "attentively" }, { 640 "bankrot", "bankrupt" }, { 641 "basicly", "basically" }, { 642 "batallion", "battalion" }, { 643 "bbrose", "browse" }, { 644 "beauro", "bureau" }, { 645 "beaurocracy", "bureaucracy" }, { 646 "beggining", "beginning" }, { 647 "behaviour", "behavior" }, { 648 "beleive", "believe" }, { 649 "belive", "believe" }, { 650 "blait", "bleat" }, { 651 "bouyant", "buoyant" }, { 652 "boygot", "boycott" }, { 653 "brocolli", "broccoli" }, { 654 "buder", "butter" }, { 655 "budr", "butter" }, { 656 "budter", "butter" }, { 657 "buracracy", "bureaucracy" }, { 658 "burracracy", "bureaucracy" }, { 659 "buton", "button" }, { 660 "byby", "by by" }, { 661 "cauler", "caller" }, { 662 "ceasar", "caesar" }, { 663 "cemetary", "cemetery" }, { 664 "changeing", "changing" }, { 665 "cheet", "cheat" }, { 666 "cimplicity", "simplicity" }, { 667 "circumstaces", "circumstances" }, { 668 "clob", "club" }, { 669 "coaln", "colon" }, { 670 "colleaque", "colleague" }, { 671 "colloquilism", "colloquialism" }, { 672 "columne", "column" }, { 673 "comitmment", "commitment" }, { 674 "comitte", "committee" }, { 675 "comittmen", "commitment" }, { 676 "comittmend", "commitment" }, { 677 "commerciasl", "commercials" }, { 678 "commited", "committed" }, { 679 "commitee", "committee" }, { 680 "companys", "companies" }, { 681 "comupter", "computer" }, { 682 "concensus", "consensus" }, { 683 "confusionism", "confucianism" }, { 684 "congradulations", "congratulations" }, { 685 "contunie", "continue" }, { 686 "cooly", "coolly" }, { 687 "copping", "coping" }, { 688 "cosmoplyton", "cosmopolitan" }, { 689 "crasy", "crazy" }, { 690 "croke", "croak" }, { 691 "crucifiction", "crucifixion" }, { 692 "crusifed", "crucified" }, { 693 "cumba", "combo" }, { 694 "custamisation", "customization" }, { 695 "dag", "dog" }, { 696 "daly", "daily" }, { 697 "defence", "defense" }, { 698 "definate", "definite" }, { 699 "definately", "definitely" }, { 700 "dependeble", "dependable" }, { 701 "descrption", "description" }, { 702 "descrptn", "description" }, { 703 "desparate", "desperate" }, { 704 "dessicate", "desiccate" }, { 705 "destint", "distant" }, { 706 "develepment", "developments" }, { 707 "developement", "development" }, { 708 "develpond", "development" }, { 709 "devulge", "divulge" }, { 710 "dieties", "deities" }, { 711 "dinasaur", "dinosaur" }, { 712 "dinasour", "dinosaur" }, { 713 "discuess", "discuss" }, { 714 "disect", "dissect" }, { 715 "disippate", "dissipate" }, { 716 "disition", "decision" }, { 717 "dispair", "despair" }, { 718 "distarct", "distract" }, { 719 "distart", "distort" }, { 720 "distroy", "destroy" }, { 721 "doenload", "download" }, { 722 "dongle", "dangle" }, { 723 "doog", "dog" }, { 724 "dramaticly", "dramatically" }, { 725 "drunkeness", "drunkenness" }, { 726 "ductioneery", "dictionary" }, { 727 "ecstacy", "ecstasy" }, { 728 "egsistence", "existence" }, { 729 "eitiology", "etiology" }, { 730 "elagent", "elegant" }, { 731 "embarass", "embarrass" }, { 732 "embarassment", "embarrassment" }, { 733 "embaress", "embarrass" }, { 734 "encapsualtion", "encapsulation" }, { 735 "encyclapidia", "encyclopedia" }, { 736 "encyclopia", "encyclopedia" }, { 737 "engins", "engine" }, { 738 "enhence", "enhance" }, { 739 "ennuui", "ennui" }, { 740 "enventions", "inventions" }, { 741 "envireminakl", "environmental" }, { 742 "enviroment", "environment" }, { 743 "epitomy", "epitome" }, { 744 "equire", "acquire" }, { 745 "errara", "error" }, { 746 "evaualtion", "evaluation" }, { 747 "excede", "exceed" }, { 748 "excercise", "exercise" }, { 749 "excpt", "except" }, { 750 "exhileration", "exhilaration" }, { 751 "existance", "existence" }, { 752 "expleyly", "explicitly" }, { 753 "explity", "explicitly" }, { 754 "failer", "failure" }, { 755 "faver", "favor" }, { 756 "faxe", "fax" }, { 757 "firey", "fiery" }, { 758 "fistival", "festival" }, { 759 "flatterring", "flattering" }, { 760 "flukse", "flux" }, { 761 "fone", "phone" }, { 762 "forsee", "foresee" }, { 763 "frustartaion", "frustrating" }, { 764 "funetik", "phonetic" }, { 765 "gaurd", "guard" }, { 766 "generly", "generally" }, { 767 "ghandi", "gandhi" }, { 768 "gotton", "gotten" }, { 769 "gracefull", "graceful" }, { 770 "gradualy", "gradually" }, { 771 "grammer", "grammar" }, { 772 "hallo", "hello" }, { 773 "hapily", "happily" }, { 774 "harrass", "harass" }, { 775 "heellp", "help" }, { 776 "heighth", "height" }, { 777 "hellp", "help" }, { 778 "helo", "hello" }, { 779 "hifin", "hyphen" }, { 780 "hifine", "hyphen" }, { 781 "hiphine", "hyphen" }, { 782 "hippie", "hippy" }, { 783 "hippopotamous", "hippopotamus" }, { 784 "hourse", "horse" }, { 785 "houssing", "housing" }, { 786 "howaver", "however" }, { 787 "howver", "however" }, { 788 "humaniti", "humanity" }, { 789 "hyfin", "hyphen" }, { 790 "hystrical", "hysterical" }, { 791 "illegitament", "illegitimate" }, { 792 "imbed", "embed" }, { 793 "imediaetly", "immediately" }, { 794 "immenant", "immanent" }, { 795 "implemtes", "implements" }, { 796 "inadvertant", "inadvertent" }, { 797 "incase", "in case" }, { 798 "incedious", "insidious" }, { 799 "incompleet", "incomplete" }, { 800 "incomplot", "incomplete" }, { 801 "inconvenant", "inconvenient" }, { 802 "inconvience", "inconvenience" }, { 803 "independant", "independent" }, { 804 "independenent", "independent" }, { 805 "indepnends", "independent" }, { 806 "indepth", "in depth" }, { 807 "indispensible", "indispensable" }, { 808 "inefficite", "inefficient" }, { 809 "infact", "in fact" }, { 810 "influencial", "influential" }, { 811 "innoculate", "inoculate" }, { 812 "insistant", "insistent" }, { 813 "insistenet", "insistent" }, { 814 "instulation", "installation" }, { 815 "intealignt", "intelligent" }, { 816 "intelegent", "intelligent" }, { 817 "intelegnent", "intelligent" }, { 818 "intelejent", "intelligent" }, { 819 "inteligent", "intelligent" }, { 820 "intelignt", "intelligent" }, { 821 "intellagant", "intelligent" }, { 822 "intellegent", "intelligent" }, { 823 "intellegint", "intelligent" }, { 824 "intellgnt", "intelligent" }, { 825 "intensionality", "intensionally" }, { 826 "internation", "international" }, { 827 "interpretate", "interpret" }, { 828 "interpretter", "interpreter" }, { 829 "intertes", "interested" }, { 830 "intertesd", "interested" }, { 831 "invermeantial", "environmental" }, { 832 "irresistable", "irresistible" }, { 833 "irritible", "irritable" }, { 834 "isreal", "israel" }, { 835 "johhn", "john" }, { 836 "kippur", "kipper" }, { 837 "knawing", "knowing" }, { 838 "lesure", "leisure" }, { 839 "liasion", "lesion" }, { 840 "liason", "liaison" }, { 841 "likly", "likely" }, { 842 "liquify", "liquefy" }, { 843 "lloyer", "layer" }, { 844 "lossing", "losing" }, { 845 "luser", "laser" }, { 846 "maintanence", "maintenance" }, { 847 "mandelbrot", "Mandelbrot" }, { 848 "marshall", "marshal" }, { 849 "maxium", "maximum" }, { 850 "mic", "mike" }, { 851 "midia", "media" }, { 852 "millenium", "millennium" }, { 853 "miniscule", "minuscule" }, { 854 "minkay", "monkey" }, { 855 "mischievious", "mischievous" }, { 856 "momento", "memento" }, { 857 "monkay", "monkey" }, { 858 "mosaik", "mosaic" }, { 859 "mostlikely", "most likely" }, { 860 "mousr", "mouser" }, { 861 "mroe", "more" }, { 862 "necesary", "necessary" }, { 863 "necesser", "necessary" }, { 864 "neice", "niece" }, { 865 "neighbour", "neighbor" }, { 866 "nemonic", "pneumonic" }, { 867 "nevade", "Nevada" }, { 868 "nickleodeon", "nickelodeon" }, { 869 "nieve", "naive" }, { 870 "noone", "no one" }, { 871 "notin", "not in" }, { 872 "nozled", "nuzzled" }, { 873 "objectsion", "objects" }, { 874 "ocassion", "occasion" }, { 875 "occuppied", "occupied" }, { 876 "occurence", "occurrence" }, { 877 "octagenarian", "octogenarian" }, { 878 "opposim", "opossum" }, { 879 "organise", "organize" }, { 880 "organiz", "organize" }, { 881 "orientate", "orient" }, { 882 "oscilascope", "oscilloscope" }, { 883 "parametic", "parameter" }, { 884 "permissable", "permissible" }, { 885 "permmasivie", "permissive" }, { 886 "persue", "pursue" }, { 887 "phantasia", "fantasia" }, { 888 "phenominal", "phenomenal" }, { 889 "playwrite", "playwright" }, { 890 "poeses", "poesies" }, { 891 "poligamy", "polygamy" }, { 892 "politict", "politic" }, { 893 "pollice", "police" }, { 894 "polypropalene", "polypropylene" }, { 895 "possable", "possible" }, { 896 "practicle", "practical" }, { 897 "pragmaticism", "pragmatism" }, { 898 "preceeding", "preceding" }, { 899 "precios", "precision" }, { 900 "preemptory", "peremptory" }, { 901 "prefixt", "prefixed" }, { 902 "presbyterian", "Presbyterian" }, { 903 "presue", "pursue" }, { 904 "presued", "pursued" }, { 905 "privielage", "privilege" }, { 906 "priviledge", "privilege" }, { 907 "proceedures", "procedures" }, { 908 "pronensiation", "pronunciation" }, { 909 "pronounciation", "pronunciation" }, { 910 "properally", "properly" }, { 911 "proplematic", "problematic" }, { 912 "protray", "portray" }, { 913 "pscolgst", "psychologist" }, { 914 "psicolagest", "psychologist" }, { 915 "psycolagest", "psychologist" }, { 916 "quoz", "quiz" }, { 917 "radious", "radius" }, { 918 "reccomend", "recommend" }, { 919 "reccona", "raccoon" }, { 920 "recieve", "receive" }, { 921 "reconise", "recognize" }, { 922 "rectangeles", "rectangle" }, { 923 "reoccurring", "recurring" }, { 924 "repitition", "repetition" }, { 925 "replasments", "replacement" }, { 926 "respct", "respect" }, { 927 "respecally", "respectfully" }, { 928 "rsx", "RSX" }, { 929 "runnung", "running" }, { 930 "sacreligious", "sacrilegious" }, { 931 "salut", "salute" }, { 932 "searcheable", "searchable" }, { 933 "seferal", "several" }, { 934 "segements", "segments" }, { 935 "sence", "sense" }, { 936 "seperate", "separate" }, { 937 "sicolagest", "psychologist" }, { 938 "sieze", "seize" }, { 939 "simplye", "simply" }, { 940 "sitte", "site" }, { 941 "slyph", "sylph" }, { 942 "smil", "smile" }, { 943 "sometmes", "sometimes" }, { 944 "soonec", "sonic" }, { 945 "specificialy", "specifically" }, { 946 "spel", "spell" }, { 947 "spoak", "spoke" }, { 948 "sponsered", "sponsored" }, { 949 "stering", "steering" }, { 950 "straightjacket", "straitjacket" }, { 951 "stumach", "stomach" }, { 952 "stutent", "student" }, { 953 "styleguide", "style guide" }, { 954 "subpena", "subpoena" }, { 955 "substations", "substitutions" }, { 956 "supercede", "supersede" }, { 957 "superfulous", "superfluous" }, { 958 "susan", "Susan" }, { 959 "swimwear", "swim wear" }, { 960 "syncorization", "synchronization" }, { 961 "taff", "tough" }, { 962 "taht", "that" }, { 963 "tattos", "tattoos" }, { 964 "techniquely", "technically" }, { 965 "teh", "the" }, { 966 "tem", "team" }, { 967 "teo", "two" }, { 968 "teridical", "theoretical" }, { 969 "tesst", "test" }, { 970 "theridically", "theoretical" }, { 971 "thredically", "theoretically" }, { 972 "thruout", "throughout" }, { 973 "ths", "this" }, { 974 "titalate", "titillate" }, { 975 "tobagan", "tobaggon" }, { 976 "tommorrow", "tomorrow" }, { 977 "tomorow", "tomorrow" }, { 978 "trubbel", "trouble" }, { 979 "ttest", "test" }, { 980 "tyrrany", "tyranny" }, { 981 "unatourral", "unnatural" }, { 982 "unaturral", "unnatural" }, { 983 "unconisitional", "unconstitutional" }, { 984 "unconscience", "unconscious" }, { 985 "underladder", "under ladder" }, { 986 "unentelegible", "unintelligible" }, { 987 "unfortunently", "unfortunately" }, { 988 "unnaturral", "unnatural" }, { 989 "upcast", "up cast" }, { 990 "verison", "version" }, { 991 "vinagarette", "vinaigrette" }, { 992 "volunteerism", "voluntarism" }, { 993 "volye", "volley" }, { 994 "waite", "wait" }, { 995 "wan't", "won't" }, { 996 "warloord", "warlord" }, { 997 "whaaat", "what" }, { 998 "whard", "ward" }, { 999 "whimp", "wimp" }, { 1000 "wicken", "weaken" }, { 1001 "wierd", "weird" }, { 1002 "wrank", "rank" }, { 1003 "writeen", "righten" }, { 1004 "writting", "writing" }, { 1005 "wundeews", "windows" }, { 1006 "yeild", "yield" }, }; 1007 1008 /** 1009 * Tests encoding APIs in one place. 1010 */ 1011 private void assertDoubleMetaphone(String expected, String source) { 1012 assertEquals(expected, this.getDoubleMetaphone().encode(source)); 1013 try { 1014 assertEquals(expected, this.getDoubleMetaphone().encode((Object) source)); 1015 } catch (EncoderException e) { 1016 fail("Unexpected expection: " + e); 1017 } 1018 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source)); 1019 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, false)); 1020 } 1021 1022 /** 1023 * Tests encoding APIs in one place. 1024 */ 1025 public void assertDoubleMetaphoneAlt(String expected, String source) { 1026 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, true)); 1027 } 1028 1029 public void doubleMetaphoneEqualTest(String[][] pairs, boolean useAlternate) { 1030 this.validateFixture(pairs); 1031 for (String[] pair : pairs) { 1032 String name0 = pair[0]; 1033 String name1 = pair[1]; 1034 String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")"; 1035 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, useAlternate)); 1036 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0, useAlternate)); 1037 if (!useAlternate) { 1038 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1)); 1039 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0)); 1040 } 1041 } 1042 } 1043 1044 public void doubleMetaphoneNotEqualTest(boolean alternate) { 1045 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band", alternate)); 1046 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain", alternate)); 1047 1048 if (!alternate) { 1049 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band")); 1050 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain")); 1051 } 1052 } 1053 1054 private DoubleMetaphone getDoubleMetaphone() { 1055 return (DoubleMetaphone) this.getStringEncoder(); 1056 } 1057 1058 @Override 1059 protected StringEncoder createStringEncoder() { 1060 return new DoubleMetaphone(); 1061 } 1062 1063 @Test 1064 public void testDoubleMetaphone() { 1065 assertDoubleMetaphone("TSTN", "testing"); 1066 assertDoubleMetaphone("0", "The"); 1067 assertDoubleMetaphone("KK", "quick"); 1068 assertDoubleMetaphone("PRN", "brown"); 1069 assertDoubleMetaphone("FKS", "fox"); 1070 assertDoubleMetaphone("JMPT", "jumped"); 1071 assertDoubleMetaphone("AFR", "over"); 1072 assertDoubleMetaphone("0", "the"); 1073 assertDoubleMetaphone("LS", "lazy"); 1074 assertDoubleMetaphone("TKS", "dogs"); 1075 assertDoubleMetaphone("MKFR", "MacCafferey"); 1076 assertDoubleMetaphone("STFN", "Stephan"); 1077 assertDoubleMetaphone("KSSK", "Kuczewski"); 1078 assertDoubleMetaphone("MKLL", "McClelland"); 1079 assertDoubleMetaphone("SNHS", "san jose"); 1080 assertDoubleMetaphone("SNFP", "xenophobia"); 1081 1082 assertDoubleMetaphoneAlt("TSTN", "testing"); 1083 assertDoubleMetaphoneAlt("T", "The"); 1084 assertDoubleMetaphoneAlt("KK", "quick"); 1085 assertDoubleMetaphoneAlt("PRN", "brown"); 1086 assertDoubleMetaphoneAlt("FKS", "fox"); 1087 assertDoubleMetaphoneAlt("AMPT", "jumped"); 1088 assertDoubleMetaphoneAlt("AFR", "over"); 1089 assertDoubleMetaphoneAlt("T", "the"); 1090 assertDoubleMetaphoneAlt("LS", "lazy"); 1091 assertDoubleMetaphoneAlt("TKS", "dogs"); 1092 assertDoubleMetaphoneAlt("MKFR", "MacCafferey"); 1093 assertDoubleMetaphoneAlt("STFN", "Stephan"); 1094 assertDoubleMetaphoneAlt("KXFS", "Kutchefski"); 1095 assertDoubleMetaphoneAlt("MKLL", "McClelland"); 1096 assertDoubleMetaphoneAlt("SNHS", "san jose"); 1097 assertDoubleMetaphoneAlt("SNFP", "xenophobia"); 1098 assertDoubleMetaphoneAlt("FKR", "Fokker"); 1099 assertDoubleMetaphoneAlt("AK", "Joqqi"); 1100 assertDoubleMetaphoneAlt("HF", "Hovvi"); 1101 assertDoubleMetaphoneAlt("XRN", "Czerny"); 1102 } 1103 1104 @Test 1105 public void testEmpty() { 1106 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(null)); 1107 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone("")); 1108 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(" ")); 1109 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone("\t\n\r ")); 1110 } 1111 1112 /** 1113 * Test setting maximum length 1114 */ 1115 @Test 1116 public void testSetMaxCodeLength() { 1117 String value = "jumped"; 1118 1119 DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); 1120 1121 // Sanity check of default settings 1122 assertEquals("Default Max Code Length", 4, doubleMetaphone.getMaxCodeLen()); 1123 assertEquals("Default Primary", "JMPT", doubleMetaphone.doubleMetaphone(value, false)); 1124 assertEquals("Default Alternate", "AMPT", doubleMetaphone.doubleMetaphone(value, true)); 1125 1126 // Check setting Max Code Length 1127 doubleMetaphone.setMaxCodeLen(3); 1128 assertEquals("Set Max Code Length", 3, doubleMetaphone.getMaxCodeLen()); 1129 assertEquals("Max=3 Primary", "JMP", doubleMetaphone.doubleMetaphone(value, false)); 1130 assertEquals("Max=3 Alternate", "AMP", doubleMetaphone.doubleMetaphone(value, true)); 1131 } 1132 1133 @Test 1134 public void testIsDoubleMetaphoneEqualBasic() { 1135 String[][] testFixture = new String[][] { { "Case", "case" }, { 1136 "CASE", "Case" }, { 1137 "caSe", "cAsE" }, { 1138 "cookie", "quick" }, { 1139 "quick", "cookie" }, { 1140 "Brian", "Bryan" }, { 1141 "Auto", "Otto" }, { 1142 "Steven", "Stefan" }, { 1143 "Philipowitz", "Filipowicz" } 1144 }; 1145 doubleMetaphoneEqualTest(testFixture, false); 1146 doubleMetaphoneEqualTest(testFixture, true); 1147 } 1148 1149 /** 1150 * Example in the original article but failures in this Java impl: 1151 */ 1152 @Test 1153 public void testIsDoubleMetaphoneEqualExtended1() { 1154 // String[][] testFixture = new String[][] { { "Smith", "Schmidt" } 1155 // }; 1156 // doubleMetaphoneEqualTest(testFixture, false); 1157 // doubleMetaphoneEqualTest(testFixture, true); 1158 } 1159 1160 @Test 1161 public void testIsDoubleMetaphoneEqualExtended2() { 1162 String[][] testFixture = new String[][] { { "Jablonski", "Yablonsky" } 1163 }; 1164 //doubleMetaphoneEqualTest(testFixture, false); 1165 doubleMetaphoneEqualTest(testFixture, true); 1166 } 1167 1168 /** 1169 * Used to generate the MATCHES array and test possible matches from the 1170 * FIXTURE array. 1171 */ 1172 @Test 1173 public void testIsDoubleMetaphoneEqualExtended3() { 1174 this.validateFixture(FIXTURE); 1175 StringBuilder failures = new StringBuilder(); 1176 StringBuilder matches = new StringBuilder(); 1177 String cr = System.getProperty("line.separator"); 1178 matches.append("private static final String[][] MATCHES = {" + cr); 1179 int failCount = 0; 1180 for (int i = 0; i < FIXTURE.length; i++) { 1181 String name0 = FIXTURE[i][0]; 1182 String name1 = FIXTURE[i][1]; 1183 boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false); 1184 boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true); 1185 if (match1 == false && match2 == false) { 1186 String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr; 1187 failures.append(failMsg); 1188 failCount++; 1189 } else { 1190 matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr); 1191 } 1192 } 1193 matches.append("};"); 1194 // Turn on to print a new MATCH array 1195 //System.out.println(matches.toString()); 1196 if (failCount > 0) { 1197 // Turn on to see which pairs do NOT match. 1198 // String msg = failures.toString(); 1199 //fail(failCount + " failures out of " + FIXTURE.length + ". The 1200 // following could be made to match: " + cr + msg); 1201 } 1202 } 1203 1204 @Test 1205 public void testIsDoubleMetaphoneEqualWithMATCHES() { 1206 this.validateFixture(MATCHES); 1207 for (int i = 0; i < MATCHES.length; i++) { 1208 String name0 = MATCHES[i][0]; 1209 String name1 = MATCHES[i][1]; 1210 boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false); 1211 boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true); 1212 if (match1 == false && match2 == false) { 1213 fail("Expected match [" + i + "] " + name0 + " and " + name1); 1214 } 1215 } 1216 } 1217 1218 @Test 1219 public void testIsDoubleMetaphoneNotEqual() { 1220 doubleMetaphoneNotEqualTest(false); 1221 doubleMetaphoneNotEqualTest(true); 1222 } 1223 1224 @Test 1225 public void testCCedilla() { 1226 assertTrue(this.getDoubleMetaphone().isDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla 1227 } 1228 1229 @Test 1230 public void testNTilde() { 1231 assertTrue(this.getDoubleMetaphone().isDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde 1232 } 1233 1234 public void validateFixture(String[][] pairs) { 1235 if (pairs.length == 0) { 1236 fail("Test fixture is empty"); 1237 } 1238 for (int i = 0; i < pairs.length; i++) { 1239 if (pairs[i].length != 2) { 1240 fail("Error in test fixture in the data array at index " + i); 1241 } 1242 } 1243 } 1244 }