001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.language; 019 020 import static org.junit.Assert.assertEquals; 021 import static org.junit.Assert.assertFalse; 022 import static org.junit.Assert.assertTrue; 023 import static org.junit.Assert.fail; 024 025 import org.apache.commons.codec.EncoderException; 026 import org.apache.commons.codec.StringEncoderAbstractTest; 027 import org.junit.Test; 028 029 /** 030 * Tests {@link DoubleMetaphone}. 031 * 032 * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p> 033 * 034 * @see "http://www.cuj.com/documents/s=8038/cuj0006philips/" 035 * @version $Id: DoubleMetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $ 036 */ 037 public class DoubleMetaphoneTest extends StringEncoderAbstractTest<DoubleMetaphone> { 038 039 /** 040 * Test data from http://aspell.net/test/orig/batch0.tab. 041 * 042 * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying 043 * and distribution of this entire article is permitted in any medium, 044 * provided this notice is preserved." 045 * 046 * Massaged the test data in the array below. 047 */ 048 private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, { 049 "Ciculer", "Circler" }, { 050 "Circue", "Circle" }, { 051 "Maddness", "Madness" }, { 052 "Occusionaly", "Occasionally" }, { 053 "Steffen", "Stephen" }, { 054 "Thw", "The" }, { 055 "Unformanlly", "Unfortunately" }, { 056 "Unfortally", "Unfortunately" }, { 057 "abilitey", "ability" }, { 058 "abouy", "about" }, { 059 "absorbtion", "absorption" }, { 060 "accidently", "accidentally" }, { 061 "accomodate", "accommodate" }, { 062 "acommadate", "accommodate" }, { 063 "acord", "accord" }, { 064 "adultry", "adultery" }, { 065 "aggresive", "aggressive" }, { 066 "alchohol", "alcohol" }, { 067 "alchoholic", "alcoholic" }, { 068 "allieve", "alive" }, { 069 "alot", "a lot" }, { 070 "alright", "all right" }, { 071 "amature", "amateur" }, { 072 "ambivilant", "ambivalent" }, { 073 "amification", "amplification" }, { 074 "amourfous", "amorphous" }, { 075 "annoint", "anoint" }, { 076 "annonsment", "announcement" }, { 077 "annoyting", "anting" }, { 078 "annuncio", "announce" }, { 079 "anonomy", "anatomy" }, { 080 "anotomy", "anatomy" }, { 081 "antidesestablishmentarianism", "antidisestablishmentarianism" }, { 082 "antidisestablishmentarism", "antidisestablishmentarianism" }, { 083 "anynomous", "anonymous" }, { 084 "appelet", "applet" }, { 085 "appreceiated", "appreciated" }, { 086 "appresteate", "appreciate" }, { 087 "aquantance", "acquaintance" }, { 088 "aratictature", "architecture" }, { 089 "archeype", "archetype" }, { 090 "aricticure", "architecture" }, { 091 "artic", "arctic" }, { 092 "asentote", "asymptote" }, { 093 "ast", "at" }, { 094 "asterick", "asterisk" }, { 095 "asymetric", "asymmetric" }, { 096 "atentively", "attentively" }, { 097 "autoamlly", "automatically" }, { 098 "bankrot", "bankrupt" }, { 099 "basicly", "basically" }, { 100 "batallion", "battalion" }, { 101 "bbrose", "browse" }, { 102 "beauro", "bureau" }, { 103 "beaurocracy", "bureaucracy" }, { 104 "beggining", "beginning" }, { 105 "beging", "beginning" }, { 106 "behaviour", "behavior" }, { 107 "beleive", "believe" }, { 108 "belive", "believe" }, { 109 "benidifs", "benefits" }, { 110 "bigginging", "beginning" }, { 111 "blait", "bleat" }, { 112 "bouyant", "buoyant" }, { 113 "boygot", "boycott" }, { 114 "brocolli", "broccoli" }, { 115 "buch", "bush" }, { 116 "buder", "butter" }, { 117 "budr", "butter" }, { 118 "budter", "butter" }, { 119 "buracracy", "bureaucracy" }, { 120 "burracracy", "bureaucracy" }, { 121 "buton", "button" }, { 122 "byby", "by by" }, { 123 "cauler", "caller" }, { 124 "ceasar", "caesar" }, { 125 "cemetary", "cemetery" }, { 126 "changeing", "changing" }, { 127 "cheet", "cheat" }, { 128 "cicle", "circle" }, { 129 "cimplicity", "simplicity" }, { 130 "circumstaces", "circumstances" }, { 131 "clob", "club" }, { 132 "coaln", "colon" }, { 133 "cocamena", "cockamamie" }, { 134 "colleaque", "colleague" }, { 135 "colloquilism", "colloquialism" }, { 136 "columne", "column" }, { 137 "comiler", "compiler" }, { 138 "comitmment", "commitment" }, { 139 "comitte", "committee" }, { 140 "comittmen", "commitment" }, { 141 "comittmend", "commitment" }, { 142 "commerciasl", "commercials" }, { 143 "commited", "committed" }, { 144 "commitee", "committee" }, { 145 "companys", "companies" }, { 146 "compicated", "complicated" }, { 147 "comupter", "computer" }, { 148 "concensus", "consensus" }, { 149 "confusionism", "confucianism" }, { 150 "congradulations", "congratulations" }, { 151 "conibation", "contribution" }, { 152 "consident", "consistent" }, { 153 "consident", "consonant" }, { 154 "contast", "constant" }, { 155 "contastant", "constant" }, { 156 "contunie", "continue" }, { 157 "cooly", "coolly" }, { 158 "copping", "coping" }, { 159 "cosmoplyton", "cosmopolitan" }, { 160 "courst", "court" }, { 161 "crasy", "crazy" }, { 162 "cravets", "caveats" }, { 163 "credetability", "credibility" }, { 164 "criqitue", "critique" }, { 165 "croke", "croak" }, { 166 "crucifiction", "crucifixion" }, { 167 "crusifed", "crucified" }, { 168 "ctitique", "critique" }, { 169 "cumba", "combo" }, { 170 "custamisation", "customization" }, { 171 "dag", "dog" }, { 172 "daly", "daily" }, { 173 "danguages", "dangerous" }, { 174 "deaft", "draft" }, { 175 "defence", "defense" }, { 176 "defenly", "defiantly" }, { 177 "definate", "definite" }, { 178 "definately", "definitely" }, { 179 "dependeble", "dependable" }, { 180 "descrption", "description" }, { 181 "descrptn", "description" }, { 182 "desparate", "desperate" }, { 183 "dessicate", "desiccate" }, { 184 "destint", "distant" }, { 185 "develepment", "developments" }, { 186 "developement", "development" }, { 187 "develpond", "development" }, { 188 "devulge", "divulge" }, { 189 "diagree", "disagree" }, { 190 "dieties", "deities" }, { 191 "dinasaur", "dinosaur" }, { 192 "dinasour", "dinosaur" }, { 193 "direcyly", "directly" }, { 194 "discuess", "discuss" }, { 195 "disect", "dissect" }, { 196 "disippate", "dissipate" }, { 197 "disition", "decision" }, { 198 "dispair", "despair" }, { 199 "disssicion", "discussion" }, { 200 "distarct", "distract" }, { 201 "distart", "distort" }, { 202 "distroy", "destroy" }, { 203 "documtations", "documentation" }, { 204 "doenload", "download" }, { 205 "dongle", "dangle" }, { 206 "doog", "dog" }, { 207 "dramaticly", "dramatically" }, { 208 "drunkeness", "drunkenness" }, { 209 "ductioneery", "dictionary" }, { 210 "dur", "due" }, { 211 "duren", "during" }, { 212 "dymatic", "dynamic" }, { 213 "dynaic", "dynamic" }, { 214 "ecstacy", "ecstasy" }, { 215 "efficat", "efficient" }, { 216 "efficity", "efficacy" }, { 217 "effots", "efforts" }, { 218 "egsistence", "existence" }, { 219 "eitiology", "etiology" }, { 220 "elagent", "elegant" }, { 221 "elligit", "elegant" }, { 222 "embarass", "embarrass" }, { 223 "embarassment", "embarrassment" }, { 224 "embaress", "embarrass" }, { 225 "encapsualtion", "encapsulation" }, { 226 "encyclapidia", "encyclopedia" }, { 227 "encyclopia", "encyclopedia" }, { 228 "engins", "engine" }, { 229 "enhence", "enhance" }, { 230 "enligtment", "Enlightenment" }, { 231 "ennuui", "ennui" }, { 232 "enought", "enough" }, { 233 "enventions", "inventions" }, { 234 "envireminakl", "environmental" }, { 235 "enviroment", "environment" }, { 236 "epitomy", "epitome" }, { 237 "equire", "acquire" }, { 238 "errara", "error" }, { 239 "erro", "error" }, { 240 "evaualtion", "evaluation" }, { 241 "evething", "everything" }, { 242 "evtually", "eventually" }, { 243 "excede", "exceed" }, { 244 "excercise", "exercise" }, { 245 "excpt", "except" }, { 246 "excution", "execution" }, { 247 "exhileration", "exhilaration" }, { 248 "existance", "existence" }, { 249 "expleyly", "explicitly" }, { 250 "explity", "explicitly" }, { 251 "expresso", "espresso" }, { 252 "exspidient", "expedient" }, { 253 "extions", "extensions" }, { 254 "factontion", "factorization" }, { 255 "failer", "failure" }, { 256 "famdasy", "fantasy" }, { 257 "faver", "favor" }, { 258 "faxe", "fax" }, { 259 "febuary", "february" }, { 260 "firey", "fiery" }, { 261 "fistival", "festival" }, { 262 "flatterring", "flattering" }, { 263 "fluk", "flux" }, { 264 "flukse", "flux" }, { 265 "fone", "phone" }, { 266 "forsee", "foresee" }, { 267 "frustartaion", "frustrating" }, { 268 "fuction", "function" }, { 269 "funetik", "phonetic" }, { 270 "futs", "guts" }, { 271 "gamne", "came" }, { 272 "gaurd", "guard" }, { 273 "generly", "generally" }, { 274 "ghandi", "gandhi" }, { 275 "goberment", "government" }, { 276 "gobernement", "government" }, { 277 "gobernment", "government" }, { 278 "gotton", "gotten" }, { 279 "gracefull", "graceful" }, { 280 "gradualy", "gradually" }, { 281 "grammer", "grammar" }, { 282 "hallo", "hello" }, { 283 "hapily", "happily" }, { 284 "harrass", "harass" }, { 285 "havne", "have" }, { 286 "heellp", "help" }, { 287 "heighth", "height" }, { 288 "hellp", "help" }, { 289 "helo", "hello" }, { 290 "herlo", "hello" }, { 291 "hifin", "hyphen" }, { 292 "hifine", "hyphen" }, { 293 "higer", "higher" }, { 294 "hiphine", "hyphen" }, { 295 "hippie", "hippy" }, { 296 "hippopotamous", "hippopotamus" }, { 297 "hlp", "help" }, { 298 "hourse", "horse" }, { 299 "houssing", "housing" }, { 300 "howaver", "however" }, { 301 "howver", "however" }, { 302 "humaniti", "humanity" }, { 303 "hyfin", "hyphen" }, { 304 "hypotathes", "hypothesis" }, { 305 "hypotathese", "hypothesis" }, { 306 "hystrical", "hysterical" }, { 307 "ident", "indent" }, { 308 "illegitament", "illegitimate" }, { 309 "imbed", "embed" }, { 310 "imediaetly", "immediately" }, { 311 "imfamy", "infamy" }, { 312 "immenant", "immanent" }, { 313 "implemtes", "implements" }, { 314 "inadvertant", "inadvertent" }, { 315 "incase", "in case" }, { 316 "incedious", "insidious" }, { 317 "incompleet", "incomplete" }, { 318 "incomplot", "incomplete" }, { 319 "inconvenant", "inconvenient" }, { 320 "inconvience", "inconvenience" }, { 321 "independant", "independent" }, { 322 "independenent", "independent" }, { 323 "indepnends", "independent" }, { 324 "indepth", "in depth" }, { 325 "indispensible", "indispensable" }, { 326 "inefficite", "inefficient" }, { 327 "inerface", "interface" }, { 328 "infact", "in fact" }, { 329 "influencial", "influential" }, { 330 "inital", "initial" }, { 331 "initinized", "initialized" }, { 332 "initized", "initialized" }, { 333 "innoculate", "inoculate" }, { 334 "insistant", "insistent" }, { 335 "insistenet", "insistent" }, { 336 "instulation", "installation" }, { 337 "intealignt", "intelligent" }, { 338 "intejilent", "intelligent" }, { 339 "intelegent", "intelligent" }, { 340 "intelegnent", "intelligent" }, { 341 "intelejent", "intelligent" }, { 342 "inteligent", "intelligent" }, { 343 "intelignt", "intelligent" }, { 344 "intellagant", "intelligent" }, { 345 "intellegent", "intelligent" }, { 346 "intellegint", "intelligent" }, { 347 "intellgnt", "intelligent" }, { 348 "intensionality", "intensionally" }, { 349 "interate", "iterate" }, { 350 "internation", "international" }, { 351 "interpretate", "interpret" }, { 352 "interpretter", "interpreter" }, { 353 "intertes", "interested" }, { 354 "intertesd", "interested" }, { 355 "invermeantial", "environmental" }, { 356 "irregardless", "regardless" }, { 357 "irresistable", "irresistible" }, { 358 "irritible", "irritable" }, { 359 "islams", "muslims" }, { 360 "isotrop", "isotope" }, { 361 "isreal", "israel" }, { 362 "johhn", "john" }, { 363 "judgement", "judgment" }, { 364 "kippur", "kipper" }, { 365 "knawing", "knowing" }, { 366 "latext", "latest" }, { 367 "leasve", "leave" }, { 368 "lesure", "leisure" }, { 369 "liasion", "lesion" }, { 370 "liason", "liaison" }, { 371 "libary", "library" }, { 372 "likly", "likely" }, { 373 "lilometer", "kilometer" }, { 374 "liquify", "liquefy" }, { 375 "lloyer", "layer" }, { 376 "lossing", "losing" }, { 377 "luser", "laser" }, { 378 "maintanence", "maintenance" }, { 379 "majaerly", "majority" }, { 380 "majoraly", "majority" }, { 381 "maks", "masks" }, { 382 "mandelbrot", "Mandelbrot" }, { 383 "mant", "want" }, { 384 "marshall", "marshal" }, { 385 "maxium", "maximum" }, { 386 "meory", "memory" }, { 387 "metter", "better" }, { 388 "mic", "mike" }, { 389 "midia", "media" }, { 390 "millenium", "millennium" }, { 391 "miniscule", "minuscule" }, { 392 "minkay", "monkey" }, { 393 "minum", "minimum" }, { 394 "mischievious", "mischievous" }, { 395 "misilous", "miscellaneous" }, { 396 "momento", "memento" }, { 397 "monkay", "monkey" }, { 398 "mosaik", "mosaic" }, { 399 "mostlikely", "most likely" }, { 400 "mousr", "mouser" }, { 401 "mroe", "more" }, { 402 "neccessary", "necessary" }, { 403 "necesary", "necessary" }, { 404 "necesser", "necessary" }, { 405 "neice", "niece" }, { 406 "neighbour", "neighbor" }, { 407 "nemonic", "pneumonic" }, { 408 "nevade", "Nevada" }, { 409 "nickleodeon", "nickelodeon" }, { 410 "nieve", "naive" }, { 411 "noone", "no one" }, { 412 "noticably", "noticeably" }, { 413 "notin", "not in" }, { 414 "nozled", "nuzzled" }, { 415 "objectsion", "objects" }, { 416 "obsfuscate", "obfuscate" }, { 417 "ocassion", "occasion" }, { 418 "occuppied", "occupied" }, { 419 "occurence", "occurrence" }, { 420 "octagenarian", "octogenarian" }, { 421 "olf", "old" }, { 422 "opposim", "opossum" }, { 423 "organise", "organize" }, { 424 "organiz", "organize" }, { 425 "orientate", "orient" }, { 426 "oscilascope", "oscilloscope" }, { 427 "oving", "moving" }, { 428 "paramers", "parameters" }, { 429 "parametic", "parameter" }, { 430 "paranets", "parameters" }, { 431 "partrucal", "particular" }, { 432 "pataphysical", "metaphysical" }, { 433 "patten", "pattern" }, { 434 "permissable", "permissible" }, { 435 "permition", "permission" }, { 436 "permmasivie", "permissive" }, { 437 "perogative", "prerogative" }, { 438 "persue", "pursue" }, { 439 "phantasia", "fantasia" }, { 440 "phenominal", "phenomenal" }, { 441 "picaresque", "picturesque" }, { 442 "playwrite", "playwright" }, { 443 "poeses", "poesies" }, { 444 "polation", "politician" }, { 445 "poligamy", "polygamy" }, { 446 "politict", "politic" }, { 447 "pollice", "police" }, { 448 "polypropalene", "polypropylene" }, { 449 "pompom", "pompon" }, { 450 "possable", "possible" }, { 451 "practicle", "practical" }, { 452 "pragmaticism", "pragmatism" }, { 453 "preceeding", "preceding" }, { 454 "precion", "precision" }, { 455 "precios", "precision" }, { 456 "preemptory", "peremptory" }, { 457 "prefices", "prefixes" }, { 458 "prefixt", "prefixed" }, { 459 "presbyterian", "Presbyterian" }, { 460 "presue", "pursue" }, { 461 "presued", "pursued" }, { 462 "privielage", "privilege" }, { 463 "priviledge", "privilege" }, { 464 "proceedures", "procedures" }, { 465 "pronensiation", "pronunciation" }, { 466 "pronisation", "pronunciation" }, { 467 "pronounciation", "pronunciation" }, { 468 "properally", "properly" }, { 469 "proplematic", "problematic" }, { 470 "protray", "portray" }, { 471 "pscolgst", "psychologist" }, { 472 "psicolagest", "psychologist" }, { 473 "psycolagest", "psychologist" }, { 474 "quoz", "quiz" }, { 475 "radious", "radius" }, { 476 "ramplily", "rampantly" }, { 477 "reccomend", "recommend" }, { 478 "reccona", "raccoon" }, { 479 "recieve", "receive" }, { 480 "reconise", "recognize" }, { 481 "rectangeles", "rectangle" }, { 482 "redign", "redesign" }, { 483 "reoccurring", "recurring" }, { 484 "repitition", "repetition" }, { 485 "replasments", "replacement" }, { 486 "reposable", "responsible" }, { 487 "reseblence", "resemblance" }, { 488 "respct", "respect" }, { 489 "respecally", "respectfully" }, { 490 "roon", "room" }, { 491 "rought", "roughly" }, { 492 "rsx", "RSX" }, { 493 "rudemtry", "rudimentary" }, { 494 "runnung", "running" }, { 495 "sacreligious", "sacrilegious" }, { 496 "saftly", "safely" }, { 497 "salut", "salute" }, { 498 "satifly", "satisfy" }, { 499 "scrabdle", "scrabble" }, { 500 "searcheable", "searchable" }, { 501 "secion", "section" }, { 502 "seferal", "several" }, { 503 "segements", "segments" }, { 504 "sence", "sense" }, { 505 "seperate", "separate" }, { 506 "sherbert", "sherbet" }, { 507 "sicolagest", "psychologist" }, { 508 "sieze", "seize" }, { 509 "simpfilty", "simplicity" }, { 510 "simplye", "simply" }, { 511 "singal", "signal" }, { 512 "sitte", "site" }, { 513 "situration", "situation" }, { 514 "slyph", "sylph" }, { 515 "smil", "smile" }, { 516 "snuck", "sneaked" }, { 517 "sometmes", "sometimes" }, { 518 "soonec", "sonic" }, { 519 "specificialy", "specifically" }, { 520 "spel", "spell" }, { 521 "spoak", "spoke" }, { 522 "sponsered", "sponsored" }, { 523 "stering", "steering" }, { 524 "straightjacket", "straitjacket" }, { 525 "stumach", "stomach" }, { 526 "stutent", "student" }, { 527 "styleguide", "style guide" }, { 528 "subisitions", "substitutions" }, { 529 "subjecribed", "subscribed" }, { 530 "subpena", "subpoena" }, { 531 "substations", "substitutions" }, { 532 "suger", "sugar" }, { 533 "supercede", "supersede" }, { 534 "superfulous", "superfluous" }, { 535 "susan", "Susan" }, { 536 "swimwear", "swim wear" }, { 537 "syncorization", "synchronization" }, { 538 "taff", "tough" }, { 539 "taht", "that" }, { 540 "tattos", "tattoos" }, { 541 "techniquely", "technically" }, { 542 "teh", "the" }, { 543 "tem", "team" }, { 544 "teo", "two" }, { 545 "teridical", "theoretical" }, { 546 "tesst", "test" }, { 547 "tets", "tests" }, { 548 "thanot", "than or" }, { 549 "theirselves", "themselves" }, { 550 "theridically", "theoretical" }, { 551 "thredically", "theoretically" }, { 552 "thruout", "throughout" }, { 553 "ths", "this" }, { 554 "titalate", "titillate" }, { 555 "tobagan", "tobaggon" }, { 556 "tommorrow", "tomorrow" }, { 557 "tomorow", "tomorrow" }, { 558 "tradegy", "tragedy" }, { 559 "trubbel", "trouble" }, { 560 "ttest", "test" }, { 561 "tunnellike", "tunnel like" }, { 562 "tured", "turned" }, { 563 "tyrrany", "tyranny" }, { 564 "unatourral", "unnatural" }, { 565 "unaturral", "unnatural" }, { 566 "unconisitional", "unconstitutional" }, { 567 "unconscience", "unconscious" }, { 568 "underladder", "under ladder" }, { 569 "unentelegible", "unintelligible" }, { 570 "unfortunently", "unfortunately" }, { 571 "unnaturral", "unnatural" }, { 572 "upcast", "up cast" }, { 573 "upmost", "utmost" }, { 574 "uranisium", "uranium" }, { 575 "verison", "version" }, { 576 "vinagarette", "vinaigrette" }, { 577 "volumptuous", "voluptuous" }, { 578 "volunteerism", "voluntarism" }, { 579 "volye", "volley" }, { 580 "wadting", "wasting" }, { 581 "waite", "wait" }, { 582 "wan't", "won't" }, { 583 "warloord", "warlord" }, { 584 "whaaat", "what" }, { 585 "whard", "ward" }, { 586 "whimp", "wimp" }, { 587 "wicken", "weaken" }, { 588 "wierd", "weird" }, { 589 "wrank", "rank" }, { 590 "writeen", "righten" }, { 591 "writting", "writing" }, { 592 "wundeews", "windows" }, { 593 "yeild", "yield" }, { 594 "youe", "your" } 595 }; 596 597 /** 598 * A subset of FIXTURE generated by this test. 599 */ 600 private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, { 601 "Maddness", "Madness" }, { 602 "Occusionaly", "Occasionally" }, { 603 "Steffen", "Stephen" }, { 604 "Thw", "The" }, { 605 "Unformanlly", "Unfortunately" }, { 606 "Unfortally", "Unfortunately" }, { 607 "abilitey", "ability" }, { 608 "absorbtion", "absorption" }, { 609 "accidently", "accidentally" }, { 610 "accomodate", "accommodate" }, { 611 "acommadate", "accommodate" }, { 612 "acord", "accord" }, { 613 "adultry", "adultery" }, { 614 "aggresive", "aggressive" }, { 615 "alchohol", "alcohol" }, { 616 "alchoholic", "alcoholic" }, { 617 "allieve", "alive" }, { 618 "alot", "a lot" }, { 619 "alright", "all right" }, { 620 "amature", "amateur" }, { 621 "ambivilant", "ambivalent" }, { 622 "amourfous", "amorphous" }, { 623 "annoint", "anoint" }, { 624 "annonsment", "announcement" }, { 625 "annoyting", "anting" }, { 626 "annuncio", "announce" }, { 627 "anotomy", "anatomy" }, { 628 "antidesestablishmentarianism", "antidisestablishmentarianism" }, { 629 "antidisestablishmentarism", "antidisestablishmentarianism" }, { 630 "anynomous", "anonymous" }, { 631 "appelet", "applet" }, { 632 "appreceiated", "appreciated" }, { 633 "appresteate", "appreciate" }, { 634 "aquantance", "acquaintance" }, { 635 "aricticure", "architecture" }, { 636 "asterick", "asterisk" }, { 637 "asymetric", "asymmetric" }, { 638 "atentively", "attentively" }, { 639 "bankrot", "bankrupt" }, { 640 "basicly", "basically" }, { 641 "batallion", "battalion" }, { 642 "bbrose", "browse" }, { 643 "beauro", "bureau" }, { 644 "beaurocracy", "bureaucracy" }, { 645 "beggining", "beginning" }, { 646 "behaviour", "behavior" }, { 647 "beleive", "believe" }, { 648 "belive", "believe" }, { 649 "blait", "bleat" }, { 650 "bouyant", "buoyant" }, { 651 "boygot", "boycott" }, { 652 "brocolli", "broccoli" }, { 653 "buder", "butter" }, { 654 "budr", "butter" }, { 655 "budter", "butter" }, { 656 "buracracy", "bureaucracy" }, { 657 "burracracy", "bureaucracy" }, { 658 "buton", "button" }, { 659 "byby", "by by" }, { 660 "cauler", "caller" }, { 661 "ceasar", "caesar" }, { 662 "cemetary", "cemetery" }, { 663 "changeing", "changing" }, { 664 "cheet", "cheat" }, { 665 "cimplicity", "simplicity" }, { 666 "circumstaces", "circumstances" }, { 667 "clob", "club" }, { 668 "coaln", "colon" }, { 669 "colleaque", "colleague" }, { 670 "colloquilism", "colloquialism" }, { 671 "columne", "column" }, { 672 "comitmment", "commitment" }, { 673 "comitte", "committee" }, { 674 "comittmen", "commitment" }, { 675 "comittmend", "commitment" }, { 676 "commerciasl", "commercials" }, { 677 "commited", "committed" }, { 678 "commitee", "committee" }, { 679 "companys", "companies" }, { 680 "comupter", "computer" }, { 681 "concensus", "consensus" }, { 682 "confusionism", "confucianism" }, { 683 "congradulations", "congratulations" }, { 684 "contunie", "continue" }, { 685 "cooly", "coolly" }, { 686 "copping", "coping" }, { 687 "cosmoplyton", "cosmopolitan" }, { 688 "crasy", "crazy" }, { 689 "croke", "croak" }, { 690 "crucifiction", "crucifixion" }, { 691 "crusifed", "crucified" }, { 692 "cumba", "combo" }, { 693 "custamisation", "customization" }, { 694 "dag", "dog" }, { 695 "daly", "daily" }, { 696 "defence", "defense" }, { 697 "definate", "definite" }, { 698 "definately", "definitely" }, { 699 "dependeble", "dependable" }, { 700 "descrption", "description" }, { 701 "descrptn", "description" }, { 702 "desparate", "desperate" }, { 703 "dessicate", "desiccate" }, { 704 "destint", "distant" }, { 705 "develepment", "developments" }, { 706 "developement", "development" }, { 707 "develpond", "development" }, { 708 "devulge", "divulge" }, { 709 "dieties", "deities" }, { 710 "dinasaur", "dinosaur" }, { 711 "dinasour", "dinosaur" }, { 712 "discuess", "discuss" }, { 713 "disect", "dissect" }, { 714 "disippate", "dissipate" }, { 715 "disition", "decision" }, { 716 "dispair", "despair" }, { 717 "distarct", "distract" }, { 718 "distart", "distort" }, { 719 "distroy", "destroy" }, { 720 "doenload", "download" }, { 721 "dongle", "dangle" }, { 722 "doog", "dog" }, { 723 "dramaticly", "dramatically" }, { 724 "drunkeness", "drunkenness" }, { 725 "ductioneery", "dictionary" }, { 726 "ecstacy", "ecstasy" }, { 727 "egsistence", "existence" }, { 728 "eitiology", "etiology" }, { 729 "elagent", "elegant" }, { 730 "embarass", "embarrass" }, { 731 "embarassment", "embarrassment" }, { 732 "embaress", "embarrass" }, { 733 "encapsualtion", "encapsulation" }, { 734 "encyclapidia", "encyclopedia" }, { 735 "encyclopia", "encyclopedia" }, { 736 "engins", "engine" }, { 737 "enhence", "enhance" }, { 738 "ennuui", "ennui" }, { 739 "enventions", "inventions" }, { 740 "envireminakl", "environmental" }, { 741 "enviroment", "environment" }, { 742 "epitomy", "epitome" }, { 743 "equire", "acquire" }, { 744 "errara", "error" }, { 745 "evaualtion", "evaluation" }, { 746 "excede", "exceed" }, { 747 "excercise", "exercise" }, { 748 "excpt", "except" }, { 749 "exhileration", "exhilaration" }, { 750 "existance", "existence" }, { 751 "expleyly", "explicitly" }, { 752 "explity", "explicitly" }, { 753 "failer", "failure" }, { 754 "faver", "favor" }, { 755 "faxe", "fax" }, { 756 "firey", "fiery" }, { 757 "fistival", "festival" }, { 758 "flatterring", "flattering" }, { 759 "flukse", "flux" }, { 760 "fone", "phone" }, { 761 "forsee", "foresee" }, { 762 "frustartaion", "frustrating" }, { 763 "funetik", "phonetic" }, { 764 "gaurd", "guard" }, { 765 "generly", "generally" }, { 766 "ghandi", "gandhi" }, { 767 "gotton", "gotten" }, { 768 "gracefull", "graceful" }, { 769 "gradualy", "gradually" }, { 770 "grammer", "grammar" }, { 771 "hallo", "hello" }, { 772 "hapily", "happily" }, { 773 "harrass", "harass" }, { 774 "heellp", "help" }, { 775 "heighth", "height" }, { 776 "hellp", "help" }, { 777 "helo", "hello" }, { 778 "hifin", "hyphen" }, { 779 "hifine", "hyphen" }, { 780 "hiphine", "hyphen" }, { 781 "hippie", "hippy" }, { 782 "hippopotamous", "hippopotamus" }, { 783 "hourse", "horse" }, { 784 "houssing", "housing" }, { 785 "howaver", "however" }, { 786 "howver", "however" }, { 787 "humaniti", "humanity" }, { 788 "hyfin", "hyphen" }, { 789 "hystrical", "hysterical" }, { 790 "illegitament", "illegitimate" }, { 791 "imbed", "embed" }, { 792 "imediaetly", "immediately" }, { 793 "immenant", "immanent" }, { 794 "implemtes", "implements" }, { 795 "inadvertant", "inadvertent" }, { 796 "incase", "in case" }, { 797 "incedious", "insidious" }, { 798 "incompleet", "incomplete" }, { 799 "incomplot", "incomplete" }, { 800 "inconvenant", "inconvenient" }, { 801 "inconvience", "inconvenience" }, { 802 "independant", "independent" }, { 803 "independenent", "independent" }, { 804 "indepnends", "independent" }, { 805 "indepth", "in depth" }, { 806 "indispensible", "indispensable" }, { 807 "inefficite", "inefficient" }, { 808 "infact", "in fact" }, { 809 "influencial", "influential" }, { 810 "innoculate", "inoculate" }, { 811 "insistant", "insistent" }, { 812 "insistenet", "insistent" }, { 813 "instulation", "installation" }, { 814 "intealignt", "intelligent" }, { 815 "intelegent", "intelligent" }, { 816 "intelegnent", "intelligent" }, { 817 "intelejent", "intelligent" }, { 818 "inteligent", "intelligent" }, { 819 "intelignt", "intelligent" }, { 820 "intellagant", "intelligent" }, { 821 "intellegent", "intelligent" }, { 822 "intellegint", "intelligent" }, { 823 "intellgnt", "intelligent" }, { 824 "intensionality", "intensionally" }, { 825 "internation", "international" }, { 826 "interpretate", "interpret" }, { 827 "interpretter", "interpreter" }, { 828 "intertes", "interested" }, { 829 "intertesd", "interested" }, { 830 "invermeantial", "environmental" }, { 831 "irresistable", "irresistible" }, { 832 "irritible", "irritable" }, { 833 "isreal", "israel" }, { 834 "johhn", "john" }, { 835 "kippur", "kipper" }, { 836 "knawing", "knowing" }, { 837 "lesure", "leisure" }, { 838 "liasion", "lesion" }, { 839 "liason", "liaison" }, { 840 "likly", "likely" }, { 841 "liquify", "liquefy" }, { 842 "lloyer", "layer" }, { 843 "lossing", "losing" }, { 844 "luser", "laser" }, { 845 "maintanence", "maintenance" }, { 846 "mandelbrot", "Mandelbrot" }, { 847 "marshall", "marshal" }, { 848 "maxium", "maximum" }, { 849 "mic", "mike" }, { 850 "midia", "media" }, { 851 "millenium", "millennium" }, { 852 "miniscule", "minuscule" }, { 853 "minkay", "monkey" }, { 854 "mischievious", "mischievous" }, { 855 "momento", "memento" }, { 856 "monkay", "monkey" }, { 857 "mosaik", "mosaic" }, { 858 "mostlikely", "most likely" }, { 859 "mousr", "mouser" }, { 860 "mroe", "more" }, { 861 "necesary", "necessary" }, { 862 "necesser", "necessary" }, { 863 "neice", "niece" }, { 864 "neighbour", "neighbor" }, { 865 "nemonic", "pneumonic" }, { 866 "nevade", "Nevada" }, { 867 "nickleodeon", "nickelodeon" }, { 868 "nieve", "naive" }, { 869 "noone", "no one" }, { 870 "notin", "not in" }, { 871 "nozled", "nuzzled" }, { 872 "objectsion", "objects" }, { 873 "ocassion", "occasion" }, { 874 "occuppied", "occupied" }, { 875 "occurence", "occurrence" }, { 876 "octagenarian", "octogenarian" }, { 877 "opposim", "opossum" }, { 878 "organise", "organize" }, { 879 "organiz", "organize" }, { 880 "orientate", "orient" }, { 881 "oscilascope", "oscilloscope" }, { 882 "parametic", "parameter" }, { 883 "permissable", "permissible" }, { 884 "permmasivie", "permissive" }, { 885 "persue", "pursue" }, { 886 "phantasia", "fantasia" }, { 887 "phenominal", "phenomenal" }, { 888 "playwrite", "playwright" }, { 889 "poeses", "poesies" }, { 890 "poligamy", "polygamy" }, { 891 "politict", "politic" }, { 892 "pollice", "police" }, { 893 "polypropalene", "polypropylene" }, { 894 "possable", "possible" }, { 895 "practicle", "practical" }, { 896 "pragmaticism", "pragmatism" }, { 897 "preceeding", "preceding" }, { 898 "precios", "precision" }, { 899 "preemptory", "peremptory" }, { 900 "prefixt", "prefixed" }, { 901 "presbyterian", "Presbyterian" }, { 902 "presue", "pursue" }, { 903 "presued", "pursued" }, { 904 "privielage", "privilege" }, { 905 "priviledge", "privilege" }, { 906 "proceedures", "procedures" }, { 907 "pronensiation", "pronunciation" }, { 908 "pronounciation", "pronunciation" }, { 909 "properally", "properly" }, { 910 "proplematic", "problematic" }, { 911 "protray", "portray" }, { 912 "pscolgst", "psychologist" }, { 913 "psicolagest", "psychologist" }, { 914 "psycolagest", "psychologist" }, { 915 "quoz", "quiz" }, { 916 "radious", "radius" }, { 917 "reccomend", "recommend" }, { 918 "reccona", "raccoon" }, { 919 "recieve", "receive" }, { 920 "reconise", "recognize" }, { 921 "rectangeles", "rectangle" }, { 922 "reoccurring", "recurring" }, { 923 "repitition", "repetition" }, { 924 "replasments", "replacement" }, { 925 "respct", "respect" }, { 926 "respecally", "respectfully" }, { 927 "rsx", "RSX" }, { 928 "runnung", "running" }, { 929 "sacreligious", "sacrilegious" }, { 930 "salut", "salute" }, { 931 "searcheable", "searchable" }, { 932 "seferal", "several" }, { 933 "segements", "segments" }, { 934 "sence", "sense" }, { 935 "seperate", "separate" }, { 936 "sicolagest", "psychologist" }, { 937 "sieze", "seize" }, { 938 "simplye", "simply" }, { 939 "sitte", "site" }, { 940 "slyph", "sylph" }, { 941 "smil", "smile" }, { 942 "sometmes", "sometimes" }, { 943 "soonec", "sonic" }, { 944 "specificialy", "specifically" }, { 945 "spel", "spell" }, { 946 "spoak", "spoke" }, { 947 "sponsered", "sponsored" }, { 948 "stering", "steering" }, { 949 "straightjacket", "straitjacket" }, { 950 "stumach", "stomach" }, { 951 "stutent", "student" }, { 952 "styleguide", "style guide" }, { 953 "subpena", "subpoena" }, { 954 "substations", "substitutions" }, { 955 "supercede", "supersede" }, { 956 "superfulous", "superfluous" }, { 957 "susan", "Susan" }, { 958 "swimwear", "swim wear" }, { 959 "syncorization", "synchronization" }, { 960 "taff", "tough" }, { 961 "taht", "that" }, { 962 "tattos", "tattoos" }, { 963 "techniquely", "technically" }, { 964 "teh", "the" }, { 965 "tem", "team" }, { 966 "teo", "two" }, { 967 "teridical", "theoretical" }, { 968 "tesst", "test" }, { 969 "theridically", "theoretical" }, { 970 "thredically", "theoretically" }, { 971 "thruout", "throughout" }, { 972 "ths", "this" }, { 973 "titalate", "titillate" }, { 974 "tobagan", "tobaggon" }, { 975 "tommorrow", "tomorrow" }, { 976 "tomorow", "tomorrow" }, { 977 "trubbel", "trouble" }, { 978 "ttest", "test" }, { 979 "tyrrany", "tyranny" }, { 980 "unatourral", "unnatural" }, { 981 "unaturral", "unnatural" }, { 982 "unconisitional", "unconstitutional" }, { 983 "unconscience", "unconscious" }, { 984 "underladder", "under ladder" }, { 985 "unentelegible", "unintelligible" }, { 986 "unfortunently", "unfortunately" }, { 987 "unnaturral", "unnatural" }, { 988 "upcast", "up cast" }, { 989 "verison", "version" }, { 990 "vinagarette", "vinaigrette" }, { 991 "volunteerism", "voluntarism" }, { 992 "volye", "volley" }, { 993 "waite", "wait" }, { 994 "wan't", "won't" }, { 995 "warloord", "warlord" }, { 996 "whaaat", "what" }, { 997 "whard", "ward" }, { 998 "whimp", "wimp" }, { 999 "wicken", "weaken" }, { 1000 "wierd", "weird" }, { 1001 "wrank", "rank" }, { 1002 "writeen", "righten" }, { 1003 "writting", "writing" }, { 1004 "wundeews", "windows" }, { 1005 "yeild", "yield" }, }; 1006 1007 /** 1008 * Tests encoding APIs in one place. 1009 */ 1010 private void assertDoubleMetaphone(final String expected, final String source) { 1011 assertEquals(expected, this.getStringEncoder().encode(source)); 1012 try { 1013 assertEquals(expected, this.getStringEncoder().encode((Object) source)); 1014 } catch (final EncoderException e) { 1015 fail("Unexpected expection: " + e); 1016 } 1017 assertEquals(expected, this.getStringEncoder().doubleMetaphone(source)); 1018 assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, false)); 1019 } 1020 1021 /** 1022 * Tests encoding APIs in one place. 1023 */ 1024 public void assertDoubleMetaphoneAlt(final String expected, final String source) { 1025 assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, true)); 1026 } 1027 1028 public void doubleMetaphoneEqualTest(final String[][] pairs, final boolean useAlternate) { 1029 this.validateFixture(pairs); 1030 for (final String[] pair : pairs) { 1031 final String name0 = pair[0]; 1032 final String name1 = pair[1]; 1033 final String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")"; 1034 assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, useAlternate)); 1035 assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0, useAlternate)); 1036 if (!useAlternate) { 1037 assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1)); 1038 assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0)); 1039 } 1040 } 1041 } 1042 1043 public void doubleMetaphoneNotEqualTest(final boolean alternate) { 1044 assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band", alternate)); 1045 assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain", alternate)); 1046 1047 if (!alternate) { 1048 assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band")); 1049 assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain")); 1050 } 1051 } 1052 1053 @Override 1054 protected DoubleMetaphone createStringEncoder() { 1055 return new DoubleMetaphone(); 1056 } 1057 1058 @Test 1059 public void testDoubleMetaphone() { 1060 assertDoubleMetaphone("TSTN", "testing"); 1061 assertDoubleMetaphone("0", "The"); 1062 assertDoubleMetaphone("KK", "quick"); 1063 assertDoubleMetaphone("PRN", "brown"); 1064 assertDoubleMetaphone("FKS", "fox"); 1065 assertDoubleMetaphone("JMPT", "jumped"); 1066 assertDoubleMetaphone("AFR", "over"); 1067 assertDoubleMetaphone("0", "the"); 1068 assertDoubleMetaphone("LS", "lazy"); 1069 assertDoubleMetaphone("TKS", "dogs"); 1070 assertDoubleMetaphone("MKFR", "MacCafferey"); 1071 assertDoubleMetaphone("STFN", "Stephan"); 1072 assertDoubleMetaphone("KSSK", "Kuczewski"); 1073 assertDoubleMetaphone("MKLL", "McClelland"); 1074 assertDoubleMetaphone("SNHS", "san jose"); 1075 assertDoubleMetaphone("SNFP", "xenophobia"); 1076 1077 assertDoubleMetaphoneAlt("TSTN", "testing"); 1078 assertDoubleMetaphoneAlt("T", "The"); 1079 assertDoubleMetaphoneAlt("KK", "quick"); 1080 assertDoubleMetaphoneAlt("PRN", "brown"); 1081 assertDoubleMetaphoneAlt("FKS", "fox"); 1082 assertDoubleMetaphoneAlt("AMPT", "jumped"); 1083 assertDoubleMetaphoneAlt("AFR", "over"); 1084 assertDoubleMetaphoneAlt("T", "the"); 1085 assertDoubleMetaphoneAlt("LS", "lazy"); 1086 assertDoubleMetaphoneAlt("TKS", "dogs"); 1087 assertDoubleMetaphoneAlt("MKFR", "MacCafferey"); 1088 assertDoubleMetaphoneAlt("STFN", "Stephan"); 1089 assertDoubleMetaphoneAlt("KXFS", "Kutchefski"); 1090 assertDoubleMetaphoneAlt("MKLL", "McClelland"); 1091 assertDoubleMetaphoneAlt("SNHS", "san jose"); 1092 assertDoubleMetaphoneAlt("SNFP", "xenophobia"); 1093 assertDoubleMetaphoneAlt("FKR", "Fokker"); 1094 assertDoubleMetaphoneAlt("AK", "Joqqi"); 1095 assertDoubleMetaphoneAlt("HF", "Hovvi"); 1096 assertDoubleMetaphoneAlt("XRN", "Czerny"); 1097 } 1098 1099 @Test 1100 public void testEmpty() { 1101 assertEquals(null, this.getStringEncoder().doubleMetaphone(null)); 1102 assertEquals(null, this.getStringEncoder().doubleMetaphone("")); 1103 assertEquals(null, this.getStringEncoder().doubleMetaphone(" ")); 1104 assertEquals(null, this.getStringEncoder().doubleMetaphone("\t\n\r ")); 1105 } 1106 1107 /** 1108 * Test setting maximum length 1109 */ 1110 @Test 1111 public void testSetMaxCodeLength() { 1112 final String value = "jumped"; 1113 1114 final DoubleMetaphone doubleMetaphone = new DoubleMetaphone(); 1115 1116 // Sanity check of default settings 1117 assertEquals("Default Max Code Length", 4, doubleMetaphone.getMaxCodeLen()); 1118 assertEquals("Default Primary", "JMPT", doubleMetaphone.doubleMetaphone(value, false)); 1119 assertEquals("Default Alternate", "AMPT", doubleMetaphone.doubleMetaphone(value, true)); 1120 1121 // Check setting Max Code Length 1122 doubleMetaphone.setMaxCodeLen(3); 1123 assertEquals("Set Max Code Length", 3, doubleMetaphone.getMaxCodeLen()); 1124 assertEquals("Max=3 Primary", "JMP", doubleMetaphone.doubleMetaphone(value, false)); 1125 assertEquals("Max=3 Alternate", "AMP", doubleMetaphone.doubleMetaphone(value, true)); 1126 } 1127 1128 @Test 1129 public void testIsDoubleMetaphoneEqualBasic() { 1130 final String[][] testFixture = new String[][] { { "Case", "case" }, { 1131 "CASE", "Case" }, { 1132 "caSe", "cAsE" }, { 1133 "cookie", "quick" }, { 1134 "quick", "cookie" }, { 1135 "Brian", "Bryan" }, { 1136 "Auto", "Otto" }, { 1137 "Steven", "Stefan" }, { 1138 "Philipowitz", "Filipowicz" } 1139 }; 1140 doubleMetaphoneEqualTest(testFixture, false); 1141 doubleMetaphoneEqualTest(testFixture, true); 1142 } 1143 1144 /** 1145 * Example in the original article but failures in this Java impl: 1146 */ 1147 @Test 1148 public void testIsDoubleMetaphoneEqualExtended1() { 1149 // String[][] testFixture = new String[][] { { "Smith", "Schmidt" } 1150 // }; 1151 // doubleMetaphoneEqualTest(testFixture, false); 1152 // doubleMetaphoneEqualTest(testFixture, true); 1153 } 1154 1155 @Test 1156 public void testIsDoubleMetaphoneEqualExtended2() { 1157 final String[][] testFixture = new String[][] { { "Jablonski", "Yablonsky" } 1158 }; 1159 //doubleMetaphoneEqualTest(testFixture, false); 1160 doubleMetaphoneEqualTest(testFixture, true); 1161 } 1162 1163 /** 1164 * Used to generate the MATCHES array and test possible matches from the 1165 * FIXTURE array. 1166 */ 1167 @Test 1168 public void testIsDoubleMetaphoneEqualExtended3() { 1169 this.validateFixture(FIXTURE); 1170 final StringBuilder failures = new StringBuilder(); 1171 final StringBuilder matches = new StringBuilder(); 1172 final String cr = System.getProperty("line.separator"); 1173 matches.append("private static final String[][] MATCHES = {" + cr); 1174 int failCount = 0; 1175 for (int i = 0; i < FIXTURE.length; i++) { 1176 final String name0 = FIXTURE[i][0]; 1177 final String name1 = FIXTURE[i][1]; 1178 final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false); 1179 final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true); 1180 if (match1 == false && match2 == false) { 1181 final String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr; 1182 failures.append(failMsg); 1183 failCount++; 1184 } else { 1185 matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr); 1186 } 1187 } 1188 matches.append("};"); 1189 // Turn on to print a new MATCH array 1190 //System.out.println(matches.toString()); 1191 if (failCount > 0) { 1192 // Turn on to see which pairs do NOT match. 1193 // String msg = failures.toString(); 1194 //fail(failCount + " failures out of " + FIXTURE.length + ". The 1195 // following could be made to match: " + cr + msg); 1196 } 1197 } 1198 1199 @Test 1200 public void testIsDoubleMetaphoneEqualWithMATCHES() { 1201 this.validateFixture(MATCHES); 1202 for (int i = 0; i < MATCHES.length; i++) { 1203 final String name0 = MATCHES[i][0]; 1204 final String name1 = MATCHES[i][1]; 1205 final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false); 1206 final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true); 1207 if (match1 == false && match2 == false) { 1208 fail("Expected match [" + i + "] " + name0 + " and " + name1); 1209 } 1210 } 1211 } 1212 1213 @Test 1214 public void testIsDoubleMetaphoneNotEqual() { 1215 doubleMetaphoneNotEqualTest(false); 1216 doubleMetaphoneNotEqualTest(true); 1217 } 1218 1219 @Test 1220 public void testCCedilla() { 1221 assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla 1222 } 1223 1224 @Test 1225 public void testNTilde() { 1226 assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde 1227 } 1228 1229 public void validateFixture(final String[][] pairs) { 1230 if (pairs.length == 0) { 1231 fail("Test fixture is empty"); 1232 } 1233 for (int i = 0; i < pairs.length; i++) { 1234 if (pairs[i].length != 2) { 1235 fail("Error in test fixture in the data array at index " + i); 1236 } 1237 } 1238 } 1239 }