001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import static org.junit.Assert.assertEquals;
021    import static org.junit.Assert.assertFalse;
022    import static org.junit.Assert.assertTrue;
023    import static org.junit.Assert.fail;
024    
025    import org.apache.commons.codec.EncoderException;
026    import org.apache.commons.codec.StringEncoder;
027    import org.apache.commons.codec.StringEncoderAbstractTest;
028    import org.junit.Test;
029    
030    /**
031     * Tests {@link DoubleMetaphone}.
032     *
033     * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
034     *
035     * @see "http://www.cuj.com/documents/s=8038/cuj0006philips/"
036     * @version $Id: DoubleMetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
037     */
038    public class DoubleMetaphoneTest extends StringEncoderAbstractTest {
039    
040        /**
041         * Test data from http://aspell.sourceforge.net/test/batch0.tab.
042         *
043         * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying
044         * and distribution of this entire article is permitted in any medium,
045         * provided this notice is preserved."
046         *
047         * Massaged the test data in the array below.
048         */
049        private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, {
050                "Ciculer", "Circler" }, {
051                "Circue", "Circle" }, {
052                "Maddness", "Madness" }, {
053                "Occusionaly", "Occasionally" }, {
054                "Steffen", "Stephen" }, {
055                "Thw", "The" }, {
056                "Unformanlly", "Unfortunately" }, {
057                "Unfortally", "Unfortunately" }, {
058                "abilitey", "ability" }, {
059                "abouy", "about" }, {
060                "absorbtion", "absorption" }, {
061                "accidently", "accidentally" }, {
062                "accomodate", "accommodate" }, {
063                "acommadate", "accommodate" }, {
064                "acord", "accord" }, {
065                "adultry", "adultery" }, {
066                "aggresive", "aggressive" }, {
067                "alchohol", "alcohol" }, {
068                "alchoholic", "alcoholic" }, {
069                "allieve", "alive" }, {
070                "alot", "a lot" }, {
071                "alright", "all right" }, {
072                "amature", "amateur" }, {
073                "ambivilant", "ambivalent" }, {
074                "amification", "amplification" }, {
075                "amourfous", "amorphous" }, {
076                "annoint", "anoint" }, {
077                "annonsment", "announcement" }, {
078                "annoyting", "anting" }, {
079                "annuncio", "announce" }, {
080                "anonomy", "anatomy" }, {
081                "anotomy", "anatomy" }, {
082                "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
083                "antidisestablishmentarism", "antidisestablishmentarianism" }, {
084                "anynomous", "anonymous" }, {
085                "appelet", "applet" }, {
086                "appreceiated", "appreciated" }, {
087                "appresteate", "appreciate" }, {
088                "aquantance", "acquaintance" }, {
089                "aratictature", "architecture" }, {
090                "archeype", "archetype" }, {
091                "aricticure", "architecture" }, {
092                "artic", "arctic" }, {
093                "asentote", "asymptote" }, {
094                "ast", "at" }, {
095                "asterick", "asterisk" }, {
096                "asymetric", "asymmetric" }, {
097                "atentively", "attentively" }, {
098                "autoamlly", "automatically" }, {
099                "bankrot", "bankrupt" }, {
100                "basicly", "basically" }, {
101                "batallion", "battalion" }, {
102                "bbrose", "browse" }, {
103                "beauro", "bureau" }, {
104                "beaurocracy", "bureaucracy" }, {
105                "beggining", "beginning" }, {
106                "beging", "beginning" }, {
107                "behaviour", "behavior" }, {
108                "beleive", "believe" }, {
109                "belive", "believe" }, {
110                "benidifs", "benefits" }, {
111                "bigginging", "beginning" }, {
112                "blait", "bleat" }, {
113                "bouyant", "buoyant" }, {
114                "boygot", "boycott" }, {
115                "brocolli", "broccoli" }, {
116                "buch", "bush" }, {
117                "buder", "butter" }, {
118                "budr", "butter" }, {
119                "budter", "butter" }, {
120                "buracracy", "bureaucracy" }, {
121                "burracracy", "bureaucracy" }, {
122                "buton", "button" }, {
123                "byby", "by by" }, {
124                "cauler", "caller" }, {
125                "ceasar", "caesar" }, {
126                "cemetary", "cemetery" }, {
127                "changeing", "changing" }, {
128                "cheet", "cheat" }, {
129                "cicle", "circle" }, {
130                "cimplicity", "simplicity" }, {
131                "circumstaces", "circumstances" }, {
132                "clob", "club" }, {
133                "coaln", "colon" }, {
134                "cocamena", "cockamamie" }, {
135                "colleaque", "colleague" }, {
136                "colloquilism", "colloquialism" }, {
137                "columne", "column" }, {
138                "comiler", "compiler" }, {
139                "comitmment", "commitment" }, {
140                "comitte", "committee" }, {
141                "comittmen", "commitment" }, {
142                "comittmend", "commitment" }, {
143                "commerciasl", "commercials" }, {
144                "commited", "committed" }, {
145                "commitee", "committee" }, {
146                "companys", "companies" }, {
147                "compicated", "complicated" }, {
148                "comupter", "computer" }, {
149                "concensus", "consensus" }, {
150                "confusionism", "confucianism" }, {
151                "congradulations", "congratulations" }, {
152                "conibation", "contribution" }, {
153                "consident", "consistent" }, {
154                "consident", "consonant" }, {
155                "contast", "constant" }, {
156                "contastant", "constant" }, {
157                "contunie", "continue" }, {
158                "cooly", "coolly" }, {
159                "copping", "coping" }, {
160                "cosmoplyton", "cosmopolitan" }, {
161                "courst", "court" }, {
162                "crasy", "crazy" }, {
163                "cravets", "caveats" }, {
164                "credetability", "credibility" }, {
165                "criqitue", "critique" }, {
166                "croke", "croak" }, {
167                "crucifiction", "crucifixion" }, {
168                "crusifed", "crucified" }, {
169                "ctitique", "critique" }, {
170                "cumba", "combo" }, {
171                "custamisation", "customization" }, {
172                "dag", "dog" }, {
173                "daly", "daily" }, {
174                "danguages", "dangerous" }, {
175                "deaft", "draft" }, {
176                "defence", "defense" }, {
177                "defenly", "defiantly" }, {
178                "definate", "definite" }, {
179                "definately", "definitely" }, {
180                "dependeble", "dependable" }, {
181                "descrption", "description" }, {
182                "descrptn", "description" }, {
183                "desparate", "desperate" }, {
184                "dessicate", "desiccate" }, {
185                "destint", "distant" }, {
186                "develepment", "developments" }, {
187                "developement", "development" }, {
188                "develpond", "development" }, {
189                "devulge", "divulge" }, {
190                "diagree", "disagree" }, {
191                "dieties", "deities" }, {
192                "dinasaur", "dinosaur" }, {
193                "dinasour", "dinosaur" }, {
194                "direcyly", "directly" }, {
195                "discuess", "discuss" }, {
196                "disect", "dissect" }, {
197                "disippate", "dissipate" }, {
198                "disition", "decision" }, {
199                "dispair", "despair" }, {
200                "disssicion", "discussion" }, {
201                "distarct", "distract" }, {
202                "distart", "distort" }, {
203                "distroy", "destroy" }, {
204                "documtations", "documentation" }, {
205                "doenload", "download" }, {
206                "dongle", "dangle" }, {
207                "doog", "dog" }, {
208                "dramaticly", "dramatically" }, {
209                "drunkeness", "drunkenness" }, {
210                "ductioneery", "dictionary" }, {
211                "dur", "due" }, {
212                "duren", "during" }, {
213                "dymatic", "dynamic" }, {
214                "dynaic", "dynamic" }, {
215                "ecstacy", "ecstasy" }, {
216                "efficat", "efficient" }, {
217                "efficity", "efficacy" }, {
218                "effots", "efforts" }, {
219                "egsistence", "existence" }, {
220                "eitiology", "etiology" }, {
221                "elagent", "elegant" }, {
222                "elligit", "elegant" }, {
223                "embarass", "embarrass" }, {
224                "embarassment", "embarrassment" }, {
225                "embaress", "embarrass" }, {
226                "encapsualtion", "encapsulation" }, {
227                "encyclapidia", "encyclopedia" }, {
228                "encyclopia", "encyclopedia" }, {
229                "engins", "engine" }, {
230                "enhence", "enhance" }, {
231                "enligtment", "Enlightenment" }, {
232                "ennuui", "ennui" }, {
233                "enought", "enough" }, {
234                "enventions", "inventions" }, {
235                "envireminakl", "environmental" }, {
236                "enviroment", "environment" }, {
237                "epitomy", "epitome" }, {
238                "equire", "acquire" }, {
239                "errara", "error" }, {
240                "erro", "error" }, {
241                "evaualtion", "evaluation" }, {
242                "evething", "everything" }, {
243                "evtually", "eventually" }, {
244                "excede", "exceed" }, {
245                "excercise", "exercise" }, {
246                "excpt", "except" }, {
247                "excution", "execution" }, {
248                "exhileration", "exhilaration" }, {
249                "existance", "existence" }, {
250                "expleyly", "explicitly" }, {
251                "explity", "explicitly" }, {
252                "expresso", "espresso" }, {
253                "exspidient", "expedient" }, {
254                "extions", "extensions" }, {
255                "factontion", "factorization" }, {
256                "failer", "failure" }, {
257                "famdasy", "fantasy" }, {
258                "faver", "favor" }, {
259                "faxe", "fax" }, {
260                "febuary", "february" }, {
261                "firey", "fiery" }, {
262                "fistival", "festival" }, {
263                "flatterring", "flattering" }, {
264                "fluk", "flux" }, {
265                "flukse", "flux" }, {
266                "fone", "phone" }, {
267                "forsee", "foresee" }, {
268                "frustartaion", "frustrating" }, {
269                "fuction", "function" }, {
270                "funetik", "phonetic" }, {
271                "futs", "guts" }, {
272                "gamne", "came" }, {
273                "gaurd", "guard" }, {
274                "generly", "generally" }, {
275                "ghandi", "gandhi" }, {
276                "goberment", "government" }, {
277                "gobernement", "government" }, {
278                "gobernment", "government" }, {
279                "gotton", "gotten" }, {
280                "gracefull", "graceful" }, {
281                "gradualy", "gradually" }, {
282                "grammer", "grammar" }, {
283                "hallo", "hello" }, {
284                "hapily", "happily" }, {
285                "harrass", "harass" }, {
286                "havne", "have" }, {
287                "heellp", "help" }, {
288                "heighth", "height" }, {
289                "hellp", "help" }, {
290                "helo", "hello" }, {
291                "herlo", "hello" }, {
292                "hifin", "hyphen" }, {
293                "hifine", "hyphen" }, {
294                "higer", "higher" }, {
295                "hiphine", "hyphen" }, {
296                "hippie", "hippy" }, {
297                "hippopotamous", "hippopotamus" }, {
298                "hlp", "help" }, {
299                "hourse", "horse" }, {
300                "houssing", "housing" }, {
301                "howaver", "however" }, {
302                "howver", "however" }, {
303                "humaniti", "humanity" }, {
304                "hyfin", "hyphen" }, {
305                "hypotathes", "hypothesis" }, {
306                "hypotathese", "hypothesis" }, {
307                "hystrical", "hysterical" }, {
308                "ident", "indent" }, {
309                "illegitament", "illegitimate" }, {
310                "imbed", "embed" }, {
311                "imediaetly", "immediately" }, {
312                "imfamy", "infamy" }, {
313                "immenant", "immanent" }, {
314                "implemtes", "implements" }, {
315                "inadvertant", "inadvertent" }, {
316                "incase", "in case" }, {
317                "incedious", "insidious" }, {
318                "incompleet", "incomplete" }, {
319                "incomplot", "incomplete" }, {
320                "inconvenant", "inconvenient" }, {
321                "inconvience", "inconvenience" }, {
322                "independant", "independent" }, {
323                "independenent", "independent" }, {
324                "indepnends", "independent" }, {
325                "indepth", "in depth" }, {
326                "indispensible", "indispensable" }, {
327                "inefficite", "inefficient" }, {
328                "inerface", "interface" }, {
329                "infact", "in fact" }, {
330                "influencial", "influential" }, {
331                "inital", "initial" }, {
332                "initinized", "initialized" }, {
333                "initized", "initialized" }, {
334                "innoculate", "inoculate" }, {
335                "insistant", "insistent" }, {
336                "insistenet", "insistent" }, {
337                "instulation", "installation" }, {
338                "intealignt", "intelligent" }, {
339                "intejilent", "intelligent" }, {
340                "intelegent", "intelligent" }, {
341                "intelegnent", "intelligent" }, {
342                "intelejent", "intelligent" }, {
343                "inteligent", "intelligent" }, {
344                "intelignt", "intelligent" }, {
345                "intellagant", "intelligent" }, {
346                "intellegent", "intelligent" }, {
347                "intellegint", "intelligent" }, {
348                "intellgnt", "intelligent" }, {
349                "intensionality", "intensionally" }, {
350                "interate", "iterate" }, {
351                "internation", "international" }, {
352                "interpretate", "interpret" }, {
353                "interpretter", "interpreter" }, {
354                "intertes", "interested" }, {
355                "intertesd", "interested" }, {
356                "invermeantial", "environmental" }, {
357                "irregardless", "regardless" }, {
358                "irresistable", "irresistible" }, {
359                "irritible", "irritable" }, {
360                "islams", "muslims" }, {
361                "isotrop", "isotope" }, {
362                "isreal", "israel" }, {
363                "johhn", "john" }, {
364                "judgement", "judgment" }, {
365                "kippur", "kipper" }, {
366                "knawing", "knowing" }, {
367                "latext", "latest" }, {
368                "leasve", "leave" }, {
369                "lesure", "leisure" }, {
370                "liasion", "lesion" }, {
371                "liason", "liaison" }, {
372                "libary", "library" }, {
373                "likly", "likely" }, {
374                "lilometer", "kilometer" }, {
375                "liquify", "liquefy" }, {
376                "lloyer", "layer" }, {
377                "lossing", "losing" }, {
378                "luser", "laser" }, {
379                "maintanence", "maintenance" }, {
380                "majaerly", "majority" }, {
381                "majoraly", "majority" }, {
382                "maks", "masks" }, {
383                "mandelbrot", "Mandelbrot" }, {
384                "mant", "want" }, {
385                "marshall", "marshal" }, {
386                "maxium", "maximum" }, {
387                "meory", "memory" }, {
388                "metter", "better" }, {
389                "mic", "mike" }, {
390                "midia", "media" }, {
391                "millenium", "millennium" }, {
392                "miniscule", "minuscule" }, {
393                "minkay", "monkey" }, {
394                "minum", "minimum" }, {
395                "mischievious", "mischievous" }, {
396                "misilous", "miscellaneous" }, {
397                "momento", "memento" }, {
398                "monkay", "monkey" }, {
399                "mosaik", "mosaic" }, {
400                "mostlikely", "most likely" }, {
401                "mousr", "mouser" }, {
402                "mroe", "more" }, {
403                "neccessary", "necessary" }, {
404                "necesary", "necessary" }, {
405                "necesser", "necessary" }, {
406                "neice", "niece" }, {
407                "neighbour", "neighbor" }, {
408                "nemonic", "pneumonic" }, {
409                "nevade", "Nevada" }, {
410                "nickleodeon", "nickelodeon" }, {
411                "nieve", "naive" }, {
412                "noone", "no one" }, {
413                "noticably", "noticeably" }, {
414                "notin", "not in" }, {
415                "nozled", "nuzzled" }, {
416                "objectsion", "objects" }, {
417                "obsfuscate", "obfuscate" }, {
418                "ocassion", "occasion" }, {
419                "occuppied", "occupied" }, {
420                "occurence", "occurrence" }, {
421                "octagenarian", "octogenarian" }, {
422                "olf", "old" }, {
423                "opposim", "opossum" }, {
424                "organise", "organize" }, {
425                "organiz", "organize" }, {
426                "orientate", "orient" }, {
427                "oscilascope", "oscilloscope" }, {
428                "oving", "moving" }, {
429                "paramers", "parameters" }, {
430                "parametic", "parameter" }, {
431                "paranets", "parameters" }, {
432                "partrucal", "particular" }, {
433                "pataphysical", "metaphysical" }, {
434                "patten", "pattern" }, {
435                "permissable", "permissible" }, {
436                "permition", "permission" }, {
437                "permmasivie", "permissive" }, {
438                "perogative", "prerogative" }, {
439                "persue", "pursue" }, {
440                "phantasia", "fantasia" }, {
441                "phenominal", "phenomenal" }, {
442                "picaresque", "picturesque" }, {
443                "playwrite", "playwright" }, {
444                "poeses", "poesies" }, {
445                "polation", "politician" }, {
446                "poligamy", "polygamy" }, {
447                "politict", "politic" }, {
448                "pollice", "police" }, {
449                "polypropalene", "polypropylene" }, {
450                "pompom", "pompon" }, {
451                "possable", "possible" }, {
452                "practicle", "practical" }, {
453                "pragmaticism", "pragmatism" }, {
454                "preceeding", "preceding" }, {
455                "precion", "precision" }, {
456                "precios", "precision" }, {
457                "preemptory", "peremptory" }, {
458                "prefices", "prefixes" }, {
459                "prefixt", "prefixed" }, {
460                "presbyterian", "Presbyterian" }, {
461                "presue", "pursue" }, {
462                "presued", "pursued" }, {
463                "privielage", "privilege" }, {
464                "priviledge", "privilege" }, {
465                "proceedures", "procedures" }, {
466                "pronensiation", "pronunciation" }, {
467                "pronisation", "pronunciation" }, {
468                "pronounciation", "pronunciation" }, {
469                "properally", "properly" }, {
470                "proplematic", "problematic" }, {
471                "protray", "portray" }, {
472                "pscolgst", "psychologist" }, {
473                "psicolagest", "psychologist" }, {
474                "psycolagest", "psychologist" }, {
475                "quoz", "quiz" }, {
476                "radious", "radius" }, {
477                "ramplily", "rampantly" }, {
478                "reccomend", "recommend" }, {
479                "reccona", "raccoon" }, {
480                "recieve", "receive" }, {
481                "reconise", "recognize" }, {
482                "rectangeles", "rectangle" }, {
483                "redign", "redesign" }, {
484                "reoccurring", "recurring" }, {
485                "repitition", "repetition" }, {
486                "replasments", "replacement" }, {
487                "reposable", "responsible" }, {
488                "reseblence", "resemblance" }, {
489                "respct", "respect" }, {
490                "respecally", "respectfully" }, {
491                "roon", "room" }, {
492                "rought", "roughly" }, {
493                "rsx", "RSX" }, {
494                "rudemtry", "rudimentary" }, {
495                "runnung", "running" }, {
496                "sacreligious", "sacrilegious" }, {
497                "saftly", "safely" }, {
498                "salut", "salute" }, {
499                "satifly", "satisfy" }, {
500                "scrabdle", "scrabble" }, {
501                "searcheable", "searchable" }, {
502                "secion", "section" }, {
503                "seferal", "several" }, {
504                "segements", "segments" }, {
505                "sence", "sense" }, {
506                "seperate", "separate" }, {
507                "sherbert", "sherbet" }, {
508                "sicolagest", "psychologist" }, {
509                "sieze", "seize" }, {
510                "simpfilty", "simplicity" }, {
511                "simplye", "simply" }, {
512                "singal", "signal" }, {
513                "sitte", "site" }, {
514                "situration", "situation" }, {
515                "slyph", "sylph" }, {
516                "smil", "smile" }, {
517                "snuck", "sneaked" }, {
518                "sometmes", "sometimes" }, {
519                "soonec", "sonic" }, {
520                "specificialy", "specifically" }, {
521                "spel", "spell" }, {
522                "spoak", "spoke" }, {
523                "sponsered", "sponsored" }, {
524                "stering", "steering" }, {
525                "straightjacket", "straitjacket" }, {
526                "stumach", "stomach" }, {
527                "stutent", "student" }, {
528                "styleguide", "style guide" }, {
529                "subisitions", "substitutions" }, {
530                "subjecribed", "subscribed" }, {
531                "subpena", "subpoena" }, {
532                "substations", "substitutions" }, {
533                "suger", "sugar" }, {
534                "supercede", "supersede" }, {
535                "superfulous", "superfluous" }, {
536                "susan", "Susan" }, {
537                "swimwear", "swim wear" }, {
538                "syncorization", "synchronization" }, {
539                "taff", "tough" }, {
540                "taht", "that" }, {
541                "tattos", "tattoos" }, {
542                "techniquely", "technically" }, {
543                "teh", "the" }, {
544                "tem", "team" }, {
545                "teo", "two" }, {
546                "teridical", "theoretical" }, {
547                "tesst", "test" }, {
548                "tets", "tests" }, {
549                "thanot", "than or" }, {
550                "theirselves", "themselves" }, {
551                "theridically", "theoretical" }, {
552                "thredically", "theoretically" }, {
553                "thruout", "throughout" }, {
554                "ths", "this" }, {
555                "titalate", "titillate" }, {
556                "tobagan", "tobaggon" }, {
557                "tommorrow", "tomorrow" }, {
558                "tomorow", "tomorrow" }, {
559                "tradegy", "tragedy" }, {
560                "trubbel", "trouble" }, {
561                "ttest", "test" }, {
562                "tunnellike", "tunnel like" }, {
563                "tured", "turned" }, {
564                "tyrrany", "tyranny" }, {
565                "unatourral", "unnatural" }, {
566                "unaturral", "unnatural" }, {
567                "unconisitional", "unconstitutional" }, {
568                "unconscience", "unconscious" }, {
569                "underladder", "under ladder" }, {
570                "unentelegible", "unintelligible" }, {
571                "unfortunently", "unfortunately" }, {
572                "unnaturral", "unnatural" }, {
573                "upcast", "up cast" }, {
574                "upmost", "utmost" }, {
575                "uranisium", "uranium" }, {
576                "verison", "version" }, {
577                "vinagarette", "vinaigrette" }, {
578                "volumptuous", "voluptuous" }, {
579                "volunteerism", "voluntarism" }, {
580                "volye", "volley" }, {
581                "wadting", "wasting" }, {
582                "waite", "wait" }, {
583                "wan't", "won't" }, {
584                "warloord", "warlord" }, {
585                "whaaat", "what" }, {
586                "whard", "ward" }, {
587                "whimp", "wimp" }, {
588                "wicken", "weaken" }, {
589                "wierd", "weird" }, {
590                "wrank", "rank" }, {
591                "writeen", "righten" }, {
592                "writting", "writing" }, {
593                "wundeews", "windows" }, {
594                "yeild", "yield" }, {
595                "youe", "your" }
596        };
597    
598        /**
599         * A subset of FIXTURE generated by this test.
600         */
601        private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, {
602                "Maddness", "Madness" }, {
603                "Occusionaly", "Occasionally" }, {
604                "Steffen", "Stephen" }, {
605                "Thw", "The" }, {
606                "Unformanlly", "Unfortunately" }, {
607                "Unfortally", "Unfortunately" }, {
608                "abilitey", "ability" }, {
609                "absorbtion", "absorption" }, {
610                "accidently", "accidentally" }, {
611                "accomodate", "accommodate" }, {
612                "acommadate", "accommodate" }, {
613                "acord", "accord" }, {
614                "adultry", "adultery" }, {
615                "aggresive", "aggressive" }, {
616                "alchohol", "alcohol" }, {
617                "alchoholic", "alcoholic" }, {
618                "allieve", "alive" }, {
619                "alot", "a lot" }, {
620                "alright", "all right" }, {
621                "amature", "amateur" }, {
622                "ambivilant", "ambivalent" }, {
623                "amourfous", "amorphous" }, {
624                "annoint", "anoint" }, {
625                "annonsment", "announcement" }, {
626                "annoyting", "anting" }, {
627                "annuncio", "announce" }, {
628                "anotomy", "anatomy" }, {
629                "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
630                "antidisestablishmentarism", "antidisestablishmentarianism" }, {
631                "anynomous", "anonymous" }, {
632                "appelet", "applet" }, {
633                "appreceiated", "appreciated" }, {
634                "appresteate", "appreciate" }, {
635                "aquantance", "acquaintance" }, {
636                "aricticure", "architecture" }, {
637                "asterick", "asterisk" }, {
638                "asymetric", "asymmetric" }, {
639                "atentively", "attentively" }, {
640                "bankrot", "bankrupt" }, {
641                "basicly", "basically" }, {
642                "batallion", "battalion" }, {
643                "bbrose", "browse" }, {
644                "beauro", "bureau" }, {
645                "beaurocracy", "bureaucracy" }, {
646                "beggining", "beginning" }, {
647                "behaviour", "behavior" }, {
648                "beleive", "believe" }, {
649                "belive", "believe" }, {
650                "blait", "bleat" }, {
651                "bouyant", "buoyant" }, {
652                "boygot", "boycott" }, {
653                "brocolli", "broccoli" }, {
654                "buder", "butter" }, {
655                "budr", "butter" }, {
656                "budter", "butter" }, {
657                "buracracy", "bureaucracy" }, {
658                "burracracy", "bureaucracy" }, {
659                "buton", "button" }, {
660                "byby", "by by" }, {
661                "cauler", "caller" }, {
662                "ceasar", "caesar" }, {
663                "cemetary", "cemetery" }, {
664                "changeing", "changing" }, {
665                "cheet", "cheat" }, {
666                "cimplicity", "simplicity" }, {
667                "circumstaces", "circumstances" }, {
668                "clob", "club" }, {
669                "coaln", "colon" }, {
670                "colleaque", "colleague" }, {
671                "colloquilism", "colloquialism" }, {
672                "columne", "column" }, {
673                "comitmment", "commitment" }, {
674                "comitte", "committee" }, {
675                "comittmen", "commitment" }, {
676                "comittmend", "commitment" }, {
677                "commerciasl", "commercials" }, {
678                "commited", "committed" }, {
679                "commitee", "committee" }, {
680                "companys", "companies" }, {
681                "comupter", "computer" }, {
682                "concensus", "consensus" }, {
683                "confusionism", "confucianism" }, {
684                "congradulations", "congratulations" }, {
685                "contunie", "continue" }, {
686                "cooly", "coolly" }, {
687                "copping", "coping" }, {
688                "cosmoplyton", "cosmopolitan" }, {
689                "crasy", "crazy" }, {
690                "croke", "croak" }, {
691                "crucifiction", "crucifixion" }, {
692                "crusifed", "crucified" }, {
693                "cumba", "combo" }, {
694                "custamisation", "customization" }, {
695                "dag", "dog" }, {
696                "daly", "daily" }, {
697                "defence", "defense" }, {
698                "definate", "definite" }, {
699                "definately", "definitely" }, {
700                "dependeble", "dependable" }, {
701                "descrption", "description" }, {
702                "descrptn", "description" }, {
703                "desparate", "desperate" }, {
704                "dessicate", "desiccate" }, {
705                "destint", "distant" }, {
706                "develepment", "developments" }, {
707                "developement", "development" }, {
708                "develpond", "development" }, {
709                "devulge", "divulge" }, {
710                "dieties", "deities" }, {
711                "dinasaur", "dinosaur" }, {
712                "dinasour", "dinosaur" }, {
713                "discuess", "discuss" }, {
714                "disect", "dissect" }, {
715                "disippate", "dissipate" }, {
716                "disition", "decision" }, {
717                "dispair", "despair" }, {
718                "distarct", "distract" }, {
719                "distart", "distort" }, {
720                "distroy", "destroy" }, {
721                "doenload", "download" }, {
722                "dongle", "dangle" }, {
723                "doog", "dog" }, {
724                "dramaticly", "dramatically" }, {
725                "drunkeness", "drunkenness" }, {
726                "ductioneery", "dictionary" }, {
727                "ecstacy", "ecstasy" }, {
728                "egsistence", "existence" }, {
729                "eitiology", "etiology" }, {
730                "elagent", "elegant" }, {
731                "embarass", "embarrass" }, {
732                "embarassment", "embarrassment" }, {
733                "embaress", "embarrass" }, {
734                "encapsualtion", "encapsulation" }, {
735                "encyclapidia", "encyclopedia" }, {
736                "encyclopia", "encyclopedia" }, {
737                "engins", "engine" }, {
738                "enhence", "enhance" }, {
739                "ennuui", "ennui" }, {
740                "enventions", "inventions" }, {
741                "envireminakl", "environmental" }, {
742                "enviroment", "environment" }, {
743                "epitomy", "epitome" }, {
744                "equire", "acquire" }, {
745                "errara", "error" }, {
746                "evaualtion", "evaluation" }, {
747                "excede", "exceed" }, {
748                "excercise", "exercise" }, {
749                "excpt", "except" }, {
750                "exhileration", "exhilaration" }, {
751                "existance", "existence" }, {
752                "expleyly", "explicitly" }, {
753                "explity", "explicitly" }, {
754                "failer", "failure" }, {
755                "faver", "favor" }, {
756                "faxe", "fax" }, {
757                "firey", "fiery" }, {
758                "fistival", "festival" }, {
759                "flatterring", "flattering" }, {
760                "flukse", "flux" }, {
761                "fone", "phone" }, {
762                "forsee", "foresee" }, {
763                "frustartaion", "frustrating" }, {
764                "funetik", "phonetic" }, {
765                "gaurd", "guard" }, {
766                "generly", "generally" }, {
767                "ghandi", "gandhi" }, {
768                "gotton", "gotten" }, {
769                "gracefull", "graceful" }, {
770                "gradualy", "gradually" }, {
771                "grammer", "grammar" }, {
772                "hallo", "hello" }, {
773                "hapily", "happily" }, {
774                "harrass", "harass" }, {
775                "heellp", "help" }, {
776                "heighth", "height" }, {
777                "hellp", "help" }, {
778                "helo", "hello" }, {
779                "hifin", "hyphen" }, {
780                "hifine", "hyphen" }, {
781                "hiphine", "hyphen" }, {
782                "hippie", "hippy" }, {
783                "hippopotamous", "hippopotamus" }, {
784                "hourse", "horse" }, {
785                "houssing", "housing" }, {
786                "howaver", "however" }, {
787                "howver", "however" }, {
788                "humaniti", "humanity" }, {
789                "hyfin", "hyphen" }, {
790                "hystrical", "hysterical" }, {
791                "illegitament", "illegitimate" }, {
792                "imbed", "embed" }, {
793                "imediaetly", "immediately" }, {
794                "immenant", "immanent" }, {
795                "implemtes", "implements" }, {
796                "inadvertant", "inadvertent" }, {
797                "incase", "in case" }, {
798                "incedious", "insidious" }, {
799                "incompleet", "incomplete" }, {
800                "incomplot", "incomplete" }, {
801                "inconvenant", "inconvenient" }, {
802                "inconvience", "inconvenience" }, {
803                "independant", "independent" }, {
804                "independenent", "independent" }, {
805                "indepnends", "independent" }, {
806                "indepth", "in depth" }, {
807                "indispensible", "indispensable" }, {
808                "inefficite", "inefficient" }, {
809                "infact", "in fact" }, {
810                "influencial", "influential" }, {
811                "innoculate", "inoculate" }, {
812                "insistant", "insistent" }, {
813                "insistenet", "insistent" }, {
814                "instulation", "installation" }, {
815                "intealignt", "intelligent" }, {
816                "intelegent", "intelligent" }, {
817                "intelegnent", "intelligent" }, {
818                "intelejent", "intelligent" }, {
819                "inteligent", "intelligent" }, {
820                "intelignt", "intelligent" }, {
821                "intellagant", "intelligent" }, {
822                "intellegent", "intelligent" }, {
823                "intellegint", "intelligent" }, {
824                "intellgnt", "intelligent" }, {
825                "intensionality", "intensionally" }, {
826                "internation", "international" }, {
827                "interpretate", "interpret" }, {
828                "interpretter", "interpreter" }, {
829                "intertes", "interested" }, {
830                "intertesd", "interested" }, {
831                "invermeantial", "environmental" }, {
832                "irresistable", "irresistible" }, {
833                "irritible", "irritable" }, {
834                "isreal", "israel" }, {
835                "johhn", "john" }, {
836                "kippur", "kipper" }, {
837                "knawing", "knowing" }, {
838                "lesure", "leisure" }, {
839                "liasion", "lesion" }, {
840                "liason", "liaison" }, {
841                "likly", "likely" }, {
842                "liquify", "liquefy" }, {
843                "lloyer", "layer" }, {
844                "lossing", "losing" }, {
845                "luser", "laser" }, {
846                "maintanence", "maintenance" }, {
847                "mandelbrot", "Mandelbrot" }, {
848                "marshall", "marshal" }, {
849                "maxium", "maximum" }, {
850                "mic", "mike" }, {
851                "midia", "media" }, {
852                "millenium", "millennium" }, {
853                "miniscule", "minuscule" }, {
854                "minkay", "monkey" }, {
855                "mischievious", "mischievous" }, {
856                "momento", "memento" }, {
857                "monkay", "monkey" }, {
858                "mosaik", "mosaic" }, {
859                "mostlikely", "most likely" }, {
860                "mousr", "mouser" }, {
861                "mroe", "more" }, {
862                "necesary", "necessary" }, {
863                "necesser", "necessary" }, {
864                "neice", "niece" }, {
865                "neighbour", "neighbor" }, {
866                "nemonic", "pneumonic" }, {
867                "nevade", "Nevada" }, {
868                "nickleodeon", "nickelodeon" }, {
869                "nieve", "naive" }, {
870                "noone", "no one" }, {
871                "notin", "not in" }, {
872                "nozled", "nuzzled" }, {
873                "objectsion", "objects" }, {
874                "ocassion", "occasion" }, {
875                "occuppied", "occupied" }, {
876                "occurence", "occurrence" }, {
877                "octagenarian", "octogenarian" }, {
878                "opposim", "opossum" }, {
879                "organise", "organize" }, {
880                "organiz", "organize" }, {
881                "orientate", "orient" }, {
882                "oscilascope", "oscilloscope" }, {
883                "parametic", "parameter" }, {
884                "permissable", "permissible" }, {
885                "permmasivie", "permissive" }, {
886                "persue", "pursue" }, {
887                "phantasia", "fantasia" }, {
888                "phenominal", "phenomenal" }, {
889                "playwrite", "playwright" }, {
890                "poeses", "poesies" }, {
891                "poligamy", "polygamy" }, {
892                "politict", "politic" }, {
893                "pollice", "police" }, {
894                "polypropalene", "polypropylene" }, {
895                "possable", "possible" }, {
896                "practicle", "practical" }, {
897                "pragmaticism", "pragmatism" }, {
898                "preceeding", "preceding" }, {
899                "precios", "precision" }, {
900                "preemptory", "peremptory" }, {
901                "prefixt", "prefixed" }, {
902                "presbyterian", "Presbyterian" }, {
903                "presue", "pursue" }, {
904                "presued", "pursued" }, {
905                "privielage", "privilege" }, {
906                "priviledge", "privilege" }, {
907                "proceedures", "procedures" }, {
908                "pronensiation", "pronunciation" }, {
909                "pronounciation", "pronunciation" }, {
910                "properally", "properly" }, {
911                "proplematic", "problematic" }, {
912                "protray", "portray" }, {
913                "pscolgst", "psychologist" }, {
914                "psicolagest", "psychologist" }, {
915                "psycolagest", "psychologist" }, {
916                "quoz", "quiz" }, {
917                "radious", "radius" }, {
918                "reccomend", "recommend" }, {
919                "reccona", "raccoon" }, {
920                "recieve", "receive" }, {
921                "reconise", "recognize" }, {
922                "rectangeles", "rectangle" }, {
923                "reoccurring", "recurring" }, {
924                "repitition", "repetition" }, {
925                "replasments", "replacement" }, {
926                "respct", "respect" }, {
927                "respecally", "respectfully" }, {
928                "rsx", "RSX" }, {
929                "runnung", "running" }, {
930                "sacreligious", "sacrilegious" }, {
931                "salut", "salute" }, {
932                "searcheable", "searchable" }, {
933                "seferal", "several" }, {
934                "segements", "segments" }, {
935                "sence", "sense" }, {
936                "seperate", "separate" }, {
937                "sicolagest", "psychologist" }, {
938                "sieze", "seize" }, {
939                "simplye", "simply" }, {
940                "sitte", "site" }, {
941                "slyph", "sylph" }, {
942                "smil", "smile" }, {
943                "sometmes", "sometimes" }, {
944                "soonec", "sonic" }, {
945                "specificialy", "specifically" }, {
946                "spel", "spell" }, {
947                "spoak", "spoke" }, {
948                "sponsered", "sponsored" }, {
949                "stering", "steering" }, {
950                "straightjacket", "straitjacket" }, {
951                "stumach", "stomach" }, {
952                "stutent", "student" }, {
953                "styleguide", "style guide" }, {
954                "subpena", "subpoena" }, {
955                "substations", "substitutions" }, {
956                "supercede", "supersede" }, {
957                "superfulous", "superfluous" }, {
958                "susan", "Susan" }, {
959                "swimwear", "swim wear" }, {
960                "syncorization", "synchronization" }, {
961                "taff", "tough" }, {
962                "taht", "that" }, {
963                "tattos", "tattoos" }, {
964                "techniquely", "technically" }, {
965                "teh", "the" }, {
966                "tem", "team" }, {
967                "teo", "two" }, {
968                "teridical", "theoretical" }, {
969                "tesst", "test" }, {
970                "theridically", "theoretical" }, {
971                "thredically", "theoretically" }, {
972                "thruout", "throughout" }, {
973                "ths", "this" }, {
974                "titalate", "titillate" }, {
975                "tobagan", "tobaggon" }, {
976                "tommorrow", "tomorrow" }, {
977                "tomorow", "tomorrow" }, {
978                "trubbel", "trouble" }, {
979                "ttest", "test" }, {
980                "tyrrany", "tyranny" }, {
981                "unatourral", "unnatural" }, {
982                "unaturral", "unnatural" }, {
983                "unconisitional", "unconstitutional" }, {
984                "unconscience", "unconscious" }, {
985                "underladder", "under ladder" }, {
986                "unentelegible", "unintelligible" }, {
987                "unfortunently", "unfortunately" }, {
988                "unnaturral", "unnatural" }, {
989                "upcast", "up cast" }, {
990                "verison", "version" }, {
991                "vinagarette", "vinaigrette" }, {
992                "volunteerism", "voluntarism" }, {
993                "volye", "volley" }, {
994                "waite", "wait" }, {
995                "wan't", "won't" }, {
996                "warloord", "warlord" }, {
997                "whaaat", "what" }, {
998                "whard", "ward" }, {
999                "whimp", "wimp" }, {
1000                "wicken", "weaken" }, {
1001                "wierd", "weird" }, {
1002                "wrank", "rank" }, {
1003                "writeen", "righten" }, {
1004                "writting", "writing" }, {
1005                "wundeews", "windows" }, {
1006                "yeild", "yield" }, };
1007    
1008        /**
1009         * Tests encoding APIs in one place.
1010         */
1011        private void assertDoubleMetaphone(String expected, String source) {
1012            assertEquals(expected, this.getDoubleMetaphone().encode(source));
1013            try {
1014                assertEquals(expected, this.getDoubleMetaphone().encode((Object) source));
1015            } catch (EncoderException e) {
1016                fail("Unexpected expection: " + e);
1017            }
1018            assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source));
1019            assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, false));
1020        }
1021    
1022        /**
1023         * Tests encoding APIs in one place.
1024         */
1025        public void assertDoubleMetaphoneAlt(String expected, String source) {
1026            assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, true));
1027        }
1028    
1029        public void doubleMetaphoneEqualTest(String[][] pairs, boolean useAlternate) {
1030            this.validateFixture(pairs);
1031            for (String[] pair : pairs) {
1032                String name0 = pair[0];
1033                String name1 = pair[1];
1034                String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")";
1035                assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, useAlternate));
1036                assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0, useAlternate));
1037                if (!useAlternate) {
1038                    assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1));
1039                    assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0));
1040                }
1041            }
1042        }
1043    
1044        public void doubleMetaphoneNotEqualTest(boolean alternate) {
1045            assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band", alternate));
1046            assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain", alternate));
1047    
1048            if (!alternate) {
1049                assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band"));
1050                assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain"));
1051            }
1052        }
1053    
1054        private DoubleMetaphone getDoubleMetaphone() {
1055            return (DoubleMetaphone) this.getStringEncoder();
1056        }
1057    
1058        @Override
1059        protected StringEncoder createStringEncoder() {
1060            return new DoubleMetaphone();
1061        }
1062    
1063        @Test
1064        public void testDoubleMetaphone() {
1065            assertDoubleMetaphone("TSTN", "testing");
1066            assertDoubleMetaphone("0", "The");
1067            assertDoubleMetaphone("KK", "quick");
1068            assertDoubleMetaphone("PRN", "brown");
1069            assertDoubleMetaphone("FKS", "fox");
1070            assertDoubleMetaphone("JMPT", "jumped");
1071            assertDoubleMetaphone("AFR", "over");
1072            assertDoubleMetaphone("0", "the");
1073            assertDoubleMetaphone("LS", "lazy");
1074            assertDoubleMetaphone("TKS", "dogs");
1075            assertDoubleMetaphone("MKFR", "MacCafferey");
1076            assertDoubleMetaphone("STFN", "Stephan");
1077            assertDoubleMetaphone("KSSK", "Kuczewski");
1078            assertDoubleMetaphone("MKLL", "McClelland");
1079            assertDoubleMetaphone("SNHS", "san jose");
1080            assertDoubleMetaphone("SNFP", "xenophobia");
1081    
1082            assertDoubleMetaphoneAlt("TSTN", "testing");
1083            assertDoubleMetaphoneAlt("T", "The");
1084            assertDoubleMetaphoneAlt("KK", "quick");
1085            assertDoubleMetaphoneAlt("PRN", "brown");
1086            assertDoubleMetaphoneAlt("FKS", "fox");
1087            assertDoubleMetaphoneAlt("AMPT", "jumped");
1088            assertDoubleMetaphoneAlt("AFR", "over");
1089            assertDoubleMetaphoneAlt("T", "the");
1090            assertDoubleMetaphoneAlt("LS", "lazy");
1091            assertDoubleMetaphoneAlt("TKS", "dogs");
1092            assertDoubleMetaphoneAlt("MKFR", "MacCafferey");
1093            assertDoubleMetaphoneAlt("STFN", "Stephan");
1094            assertDoubleMetaphoneAlt("KXFS", "Kutchefski");
1095            assertDoubleMetaphoneAlt("MKLL", "McClelland");
1096            assertDoubleMetaphoneAlt("SNHS", "san jose");
1097            assertDoubleMetaphoneAlt("SNFP", "xenophobia");
1098            assertDoubleMetaphoneAlt("FKR", "Fokker");
1099            assertDoubleMetaphoneAlt("AK", "Joqqi");
1100            assertDoubleMetaphoneAlt("HF", "Hovvi");
1101            assertDoubleMetaphoneAlt("XRN", "Czerny");
1102        }
1103    
1104        @Test
1105        public void testEmpty() {
1106            assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(null));
1107            assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(""));
1108            assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(" "));
1109            assertEquals(null, this.getDoubleMetaphone().doubleMetaphone("\t\n\r "));
1110        }
1111    
1112        /**
1113         * Test setting maximum length
1114         */
1115        @Test
1116        public void testSetMaxCodeLength() {
1117            String value = "jumped";
1118    
1119            DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
1120    
1121            // Sanity check of default settings
1122            assertEquals("Default Max Code Length", 4, doubleMetaphone.getMaxCodeLen());
1123            assertEquals("Default Primary",   "JMPT", doubleMetaphone.doubleMetaphone(value, false));
1124            assertEquals("Default Alternate", "AMPT", doubleMetaphone.doubleMetaphone(value, true));
1125    
1126            // Check setting Max Code Length
1127            doubleMetaphone.setMaxCodeLen(3);
1128            assertEquals("Set Max Code Length", 3, doubleMetaphone.getMaxCodeLen());
1129            assertEquals("Max=3 Primary",   "JMP", doubleMetaphone.doubleMetaphone(value, false));
1130            assertEquals("Max=3 Alternate", "AMP", doubleMetaphone.doubleMetaphone(value, true));
1131        }
1132    
1133        @Test
1134        public void testIsDoubleMetaphoneEqualBasic() {
1135            String[][] testFixture = new String[][] { { "Case", "case" }, {
1136                    "CASE", "Case" }, {
1137                    "caSe", "cAsE" }, {
1138                    "cookie", "quick" }, {
1139                    "quick", "cookie" }, {
1140                    "Brian", "Bryan" }, {
1141                    "Auto", "Otto" }, {
1142                    "Steven", "Stefan" }, {
1143                    "Philipowitz", "Filipowicz" }
1144            };
1145            doubleMetaphoneEqualTest(testFixture, false);
1146            doubleMetaphoneEqualTest(testFixture, true);
1147        }
1148    
1149        /**
1150         * Example in the original article but failures in this Java impl:
1151         */
1152        @Test
1153        public void testIsDoubleMetaphoneEqualExtended1() {
1154            //        String[][] testFixture = new String[][] { { "Smith", "Schmidt" }
1155            //        };
1156            //        doubleMetaphoneEqualTest(testFixture, false);
1157            //        doubleMetaphoneEqualTest(testFixture, true);
1158        }
1159    
1160        @Test
1161        public void testIsDoubleMetaphoneEqualExtended2() {
1162            String[][] testFixture = new String[][] { { "Jablonski", "Yablonsky" }
1163            };
1164            //doubleMetaphoneEqualTest(testFixture, false);
1165            doubleMetaphoneEqualTest(testFixture, true);
1166        }
1167    
1168        /**
1169         * Used to generate the MATCHES array and test possible matches from the
1170         * FIXTURE array.
1171         */
1172        @Test
1173        public void testIsDoubleMetaphoneEqualExtended3() {
1174            this.validateFixture(FIXTURE);
1175            StringBuilder failures = new StringBuilder();
1176            StringBuilder matches = new StringBuilder();
1177            String cr = System.getProperty("line.separator");
1178            matches.append("private static final String[][] MATCHES = {" + cr);
1179            int failCount = 0;
1180            for (int i = 0; i < FIXTURE.length; i++) {
1181                String name0 = FIXTURE[i][0];
1182                String name1 = FIXTURE[i][1];
1183                boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false);
1184                boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true);
1185                if (match1 == false && match2 == false) {
1186                    String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr;
1187                    failures.append(failMsg);
1188                    failCount++;
1189                } else {
1190                    matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr);
1191                }
1192            }
1193            matches.append("};");
1194            // Turn on to print a new MATCH array
1195            //System.out.println(matches.toString());
1196            if (failCount > 0) {
1197                // Turn on to see which pairs do NOT match.
1198                // String msg = failures.toString();
1199                //fail(failCount + " failures out of " + FIXTURE.length + ". The
1200                // following could be made to match: " + cr + msg);
1201            }
1202        }
1203    
1204        @Test
1205        public void testIsDoubleMetaphoneEqualWithMATCHES() {
1206            this.validateFixture(MATCHES);
1207            for (int i = 0; i < MATCHES.length; i++) {
1208                String name0 = MATCHES[i][0];
1209                String name1 = MATCHES[i][1];
1210                boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false);
1211                boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true);
1212                if (match1 == false && match2 == false) {
1213                    fail("Expected match [" + i + "] " + name0 + " and " + name1);
1214                }
1215            }
1216        }
1217    
1218        @Test
1219        public void testIsDoubleMetaphoneNotEqual() {
1220            doubleMetaphoneNotEqualTest(false);
1221            doubleMetaphoneNotEqualTest(true);
1222        }
1223    
1224        @Test
1225        public void testCCedilla() {
1226            assertTrue(this.getDoubleMetaphone().isDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla
1227        }
1228    
1229        @Test
1230        public void testNTilde() {
1231            assertTrue(this.getDoubleMetaphone().isDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde
1232        }
1233    
1234        public void validateFixture(String[][] pairs) {
1235            if (pairs.length == 0) {
1236                fail("Test fixture is empty");
1237            }
1238            for (int i = 0; i < pairs.length; i++) {
1239                if (pairs[i].length != 2) {
1240                    fail("Error in test fixture in the data array at index " + i);
1241                }
1242            }
1243        }
1244    }