001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import static org.junit.Assert.assertEquals;
021    import static org.junit.Assert.assertFalse;
022    import static org.junit.Assert.assertTrue;
023    import static org.junit.Assert.fail;
024    
025    import org.apache.commons.codec.EncoderException;
026    import org.apache.commons.codec.StringEncoderAbstractTest;
027    import org.junit.Test;
028    
029    /**
030     * Tests {@link DoubleMetaphone}.
031     *
032     * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
033     *
034     * @see "http://www.cuj.com/documents/s=8038/cuj0006philips/"
035     * @version $Id: DoubleMetaphoneTest.html 889935 2013-12-11 05:05:13Z ggregory $
036     */
037    public class DoubleMetaphoneTest extends StringEncoderAbstractTest<DoubleMetaphone> {
038    
039        /**
040         * Test data from http://aspell.net/test/orig/batch0.tab.
041         *
042         * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying
043         * and distribution of this entire article is permitted in any medium,
044         * provided this notice is preserved."
045         *
046         * Massaged the test data in the array below.
047         */
048        private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, {
049                "Ciculer", "Circler" }, {
050                "Circue", "Circle" }, {
051                "Maddness", "Madness" }, {
052                "Occusionaly", "Occasionally" }, {
053                "Steffen", "Stephen" }, {
054                "Thw", "The" }, {
055                "Unformanlly", "Unfortunately" }, {
056                "Unfortally", "Unfortunately" }, {
057                "abilitey", "ability" }, {
058                "abouy", "about" }, {
059                "absorbtion", "absorption" }, {
060                "accidently", "accidentally" }, {
061                "accomodate", "accommodate" }, {
062                "acommadate", "accommodate" }, {
063                "acord", "accord" }, {
064                "adultry", "adultery" }, {
065                "aggresive", "aggressive" }, {
066                "alchohol", "alcohol" }, {
067                "alchoholic", "alcoholic" }, {
068                "allieve", "alive" }, {
069                "alot", "a lot" }, {
070                "alright", "all right" }, {
071                "amature", "amateur" }, {
072                "ambivilant", "ambivalent" }, {
073                "amification", "amplification" }, {
074                "amourfous", "amorphous" }, {
075                "annoint", "anoint" }, {
076                "annonsment", "announcement" }, {
077                "annoyting", "anting" }, {
078                "annuncio", "announce" }, {
079                "anonomy", "anatomy" }, {
080                "anotomy", "anatomy" }, {
081                "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
082                "antidisestablishmentarism", "antidisestablishmentarianism" }, {
083                "anynomous", "anonymous" }, {
084                "appelet", "applet" }, {
085                "appreceiated", "appreciated" }, {
086                "appresteate", "appreciate" }, {
087                "aquantance", "acquaintance" }, {
088                "aratictature", "architecture" }, {
089                "archeype", "archetype" }, {
090                "aricticure", "architecture" }, {
091                "artic", "arctic" }, {
092                "asentote", "asymptote" }, {
093                "ast", "at" }, {
094                "asterick", "asterisk" }, {
095                "asymetric", "asymmetric" }, {
096                "atentively", "attentively" }, {
097                "autoamlly", "automatically" }, {
098                "bankrot", "bankrupt" }, {
099                "basicly", "basically" }, {
100                "batallion", "battalion" }, {
101                "bbrose", "browse" }, {
102                "beauro", "bureau" }, {
103                "beaurocracy", "bureaucracy" }, {
104                "beggining", "beginning" }, {
105                "beging", "beginning" }, {
106                "behaviour", "behavior" }, {
107                "beleive", "believe" }, {
108                "belive", "believe" }, {
109                "benidifs", "benefits" }, {
110                "bigginging", "beginning" }, {
111                "blait", "bleat" }, {
112                "bouyant", "buoyant" }, {
113                "boygot", "boycott" }, {
114                "brocolli", "broccoli" }, {
115                "buch", "bush" }, {
116                "buder", "butter" }, {
117                "budr", "butter" }, {
118                "budter", "butter" }, {
119                "buracracy", "bureaucracy" }, {
120                "burracracy", "bureaucracy" }, {
121                "buton", "button" }, {
122                "byby", "by by" }, {
123                "cauler", "caller" }, {
124                "ceasar", "caesar" }, {
125                "cemetary", "cemetery" }, {
126                "changeing", "changing" }, {
127                "cheet", "cheat" }, {
128                "cicle", "circle" }, {
129                "cimplicity", "simplicity" }, {
130                "circumstaces", "circumstances" }, {
131                "clob", "club" }, {
132                "coaln", "colon" }, {
133                "cocamena", "cockamamie" }, {
134                "colleaque", "colleague" }, {
135                "colloquilism", "colloquialism" }, {
136                "columne", "column" }, {
137                "comiler", "compiler" }, {
138                "comitmment", "commitment" }, {
139                "comitte", "committee" }, {
140                "comittmen", "commitment" }, {
141                "comittmend", "commitment" }, {
142                "commerciasl", "commercials" }, {
143                "commited", "committed" }, {
144                "commitee", "committee" }, {
145                "companys", "companies" }, {
146                "compicated", "complicated" }, {
147                "comupter", "computer" }, {
148                "concensus", "consensus" }, {
149                "confusionism", "confucianism" }, {
150                "congradulations", "congratulations" }, {
151                "conibation", "contribution" }, {
152                "consident", "consistent" }, {
153                "consident", "consonant" }, {
154                "contast", "constant" }, {
155                "contastant", "constant" }, {
156                "contunie", "continue" }, {
157                "cooly", "coolly" }, {
158                "copping", "coping" }, {
159                "cosmoplyton", "cosmopolitan" }, {
160                "courst", "court" }, {
161                "crasy", "crazy" }, {
162                "cravets", "caveats" }, {
163                "credetability", "credibility" }, {
164                "criqitue", "critique" }, {
165                "croke", "croak" }, {
166                "crucifiction", "crucifixion" }, {
167                "crusifed", "crucified" }, {
168                "ctitique", "critique" }, {
169                "cumba", "combo" }, {
170                "custamisation", "customization" }, {
171                "dag", "dog" }, {
172                "daly", "daily" }, {
173                "danguages", "dangerous" }, {
174                "deaft", "draft" }, {
175                "defence", "defense" }, {
176                "defenly", "defiantly" }, {
177                "definate", "definite" }, {
178                "definately", "definitely" }, {
179                "dependeble", "dependable" }, {
180                "descrption", "description" }, {
181                "descrptn", "description" }, {
182                "desparate", "desperate" }, {
183                "dessicate", "desiccate" }, {
184                "destint", "distant" }, {
185                "develepment", "developments" }, {
186                "developement", "development" }, {
187                "develpond", "development" }, {
188                "devulge", "divulge" }, {
189                "diagree", "disagree" }, {
190                "dieties", "deities" }, {
191                "dinasaur", "dinosaur" }, {
192                "dinasour", "dinosaur" }, {
193                "direcyly", "directly" }, {
194                "discuess", "discuss" }, {
195                "disect", "dissect" }, {
196                "disippate", "dissipate" }, {
197                "disition", "decision" }, {
198                "dispair", "despair" }, {
199                "disssicion", "discussion" }, {
200                "distarct", "distract" }, {
201                "distart", "distort" }, {
202                "distroy", "destroy" }, {
203                "documtations", "documentation" }, {
204                "doenload", "download" }, {
205                "dongle", "dangle" }, {
206                "doog", "dog" }, {
207                "dramaticly", "dramatically" }, {
208                "drunkeness", "drunkenness" }, {
209                "ductioneery", "dictionary" }, {
210                "dur", "due" }, {
211                "duren", "during" }, {
212                "dymatic", "dynamic" }, {
213                "dynaic", "dynamic" }, {
214                "ecstacy", "ecstasy" }, {
215                "efficat", "efficient" }, {
216                "efficity", "efficacy" }, {
217                "effots", "efforts" }, {
218                "egsistence", "existence" }, {
219                "eitiology", "etiology" }, {
220                "elagent", "elegant" }, {
221                "elligit", "elegant" }, {
222                "embarass", "embarrass" }, {
223                "embarassment", "embarrassment" }, {
224                "embaress", "embarrass" }, {
225                "encapsualtion", "encapsulation" }, {
226                "encyclapidia", "encyclopedia" }, {
227                "encyclopia", "encyclopedia" }, {
228                "engins", "engine" }, {
229                "enhence", "enhance" }, {
230                "enligtment", "Enlightenment" }, {
231                "ennuui", "ennui" }, {
232                "enought", "enough" }, {
233                "enventions", "inventions" }, {
234                "envireminakl", "environmental" }, {
235                "enviroment", "environment" }, {
236                "epitomy", "epitome" }, {
237                "equire", "acquire" }, {
238                "errara", "error" }, {
239                "erro", "error" }, {
240                "evaualtion", "evaluation" }, {
241                "evething", "everything" }, {
242                "evtually", "eventually" }, {
243                "excede", "exceed" }, {
244                "excercise", "exercise" }, {
245                "excpt", "except" }, {
246                "excution", "execution" }, {
247                "exhileration", "exhilaration" }, {
248                "existance", "existence" }, {
249                "expleyly", "explicitly" }, {
250                "explity", "explicitly" }, {
251                "expresso", "espresso" }, {
252                "exspidient", "expedient" }, {
253                "extions", "extensions" }, {
254                "factontion", "factorization" }, {
255                "failer", "failure" }, {
256                "famdasy", "fantasy" }, {
257                "faver", "favor" }, {
258                "faxe", "fax" }, {
259                "febuary", "february" }, {
260                "firey", "fiery" }, {
261                "fistival", "festival" }, {
262                "flatterring", "flattering" }, {
263                "fluk", "flux" }, {
264                "flukse", "flux" }, {
265                "fone", "phone" }, {
266                "forsee", "foresee" }, {
267                "frustartaion", "frustrating" }, {
268                "fuction", "function" }, {
269                "funetik", "phonetic" }, {
270                "futs", "guts" }, {
271                "gamne", "came" }, {
272                "gaurd", "guard" }, {
273                "generly", "generally" }, {
274                "ghandi", "gandhi" }, {
275                "goberment", "government" }, {
276                "gobernement", "government" }, {
277                "gobernment", "government" }, {
278                "gotton", "gotten" }, {
279                "gracefull", "graceful" }, {
280                "gradualy", "gradually" }, {
281                "grammer", "grammar" }, {
282                "hallo", "hello" }, {
283                "hapily", "happily" }, {
284                "harrass", "harass" }, {
285                "havne", "have" }, {
286                "heellp", "help" }, {
287                "heighth", "height" }, {
288                "hellp", "help" }, {
289                "helo", "hello" }, {
290                "herlo", "hello" }, {
291                "hifin", "hyphen" }, {
292                "hifine", "hyphen" }, {
293                "higer", "higher" }, {
294                "hiphine", "hyphen" }, {
295                "hippie", "hippy" }, {
296                "hippopotamous", "hippopotamus" }, {
297                "hlp", "help" }, {
298                "hourse", "horse" }, {
299                "houssing", "housing" }, {
300                "howaver", "however" }, {
301                "howver", "however" }, {
302                "humaniti", "humanity" }, {
303                "hyfin", "hyphen" }, {
304                "hypotathes", "hypothesis" }, {
305                "hypotathese", "hypothesis" }, {
306                "hystrical", "hysterical" }, {
307                "ident", "indent" }, {
308                "illegitament", "illegitimate" }, {
309                "imbed", "embed" }, {
310                "imediaetly", "immediately" }, {
311                "imfamy", "infamy" }, {
312                "immenant", "immanent" }, {
313                "implemtes", "implements" }, {
314                "inadvertant", "inadvertent" }, {
315                "incase", "in case" }, {
316                "incedious", "insidious" }, {
317                "incompleet", "incomplete" }, {
318                "incomplot", "incomplete" }, {
319                "inconvenant", "inconvenient" }, {
320                "inconvience", "inconvenience" }, {
321                "independant", "independent" }, {
322                "independenent", "independent" }, {
323                "indepnends", "independent" }, {
324                "indepth", "in depth" }, {
325                "indispensible", "indispensable" }, {
326                "inefficite", "inefficient" }, {
327                "inerface", "interface" }, {
328                "infact", "in fact" }, {
329                "influencial", "influential" }, {
330                "inital", "initial" }, {
331                "initinized", "initialized" }, {
332                "initized", "initialized" }, {
333                "innoculate", "inoculate" }, {
334                "insistant", "insistent" }, {
335                "insistenet", "insistent" }, {
336                "instulation", "installation" }, {
337                "intealignt", "intelligent" }, {
338                "intejilent", "intelligent" }, {
339                "intelegent", "intelligent" }, {
340                "intelegnent", "intelligent" }, {
341                "intelejent", "intelligent" }, {
342                "inteligent", "intelligent" }, {
343                "intelignt", "intelligent" }, {
344                "intellagant", "intelligent" }, {
345                "intellegent", "intelligent" }, {
346                "intellegint", "intelligent" }, {
347                "intellgnt", "intelligent" }, {
348                "intensionality", "intensionally" }, {
349                "interate", "iterate" }, {
350                "internation", "international" }, {
351                "interpretate", "interpret" }, {
352                "interpretter", "interpreter" }, {
353                "intertes", "interested" }, {
354                "intertesd", "interested" }, {
355                "invermeantial", "environmental" }, {
356                "irregardless", "regardless" }, {
357                "irresistable", "irresistible" }, {
358                "irritible", "irritable" }, {
359                "islams", "muslims" }, {
360                "isotrop", "isotope" }, {
361                "isreal", "israel" }, {
362                "johhn", "john" }, {
363                "judgement", "judgment" }, {
364                "kippur", "kipper" }, {
365                "knawing", "knowing" }, {
366                "latext", "latest" }, {
367                "leasve", "leave" }, {
368                "lesure", "leisure" }, {
369                "liasion", "lesion" }, {
370                "liason", "liaison" }, {
371                "libary", "library" }, {
372                "likly", "likely" }, {
373                "lilometer", "kilometer" }, {
374                "liquify", "liquefy" }, {
375                "lloyer", "layer" }, {
376                "lossing", "losing" }, {
377                "luser", "laser" }, {
378                "maintanence", "maintenance" }, {
379                "majaerly", "majority" }, {
380                "majoraly", "majority" }, {
381                "maks", "masks" }, {
382                "mandelbrot", "Mandelbrot" }, {
383                "mant", "want" }, {
384                "marshall", "marshal" }, {
385                "maxium", "maximum" }, {
386                "meory", "memory" }, {
387                "metter", "better" }, {
388                "mic", "mike" }, {
389                "midia", "media" }, {
390                "millenium", "millennium" }, {
391                "miniscule", "minuscule" }, {
392                "minkay", "monkey" }, {
393                "minum", "minimum" }, {
394                "mischievious", "mischievous" }, {
395                "misilous", "miscellaneous" }, {
396                "momento", "memento" }, {
397                "monkay", "monkey" }, {
398                "mosaik", "mosaic" }, {
399                "mostlikely", "most likely" }, {
400                "mousr", "mouser" }, {
401                "mroe", "more" }, {
402                "neccessary", "necessary" }, {
403                "necesary", "necessary" }, {
404                "necesser", "necessary" }, {
405                "neice", "niece" }, {
406                "neighbour", "neighbor" }, {
407                "nemonic", "pneumonic" }, {
408                "nevade", "Nevada" }, {
409                "nickleodeon", "nickelodeon" }, {
410                "nieve", "naive" }, {
411                "noone", "no one" }, {
412                "noticably", "noticeably" }, {
413                "notin", "not in" }, {
414                "nozled", "nuzzled" }, {
415                "objectsion", "objects" }, {
416                "obsfuscate", "obfuscate" }, {
417                "ocassion", "occasion" }, {
418                "occuppied", "occupied" }, {
419                "occurence", "occurrence" }, {
420                "octagenarian", "octogenarian" }, {
421                "olf", "old" }, {
422                "opposim", "opossum" }, {
423                "organise", "organize" }, {
424                "organiz", "organize" }, {
425                "orientate", "orient" }, {
426                "oscilascope", "oscilloscope" }, {
427                "oving", "moving" }, {
428                "paramers", "parameters" }, {
429                "parametic", "parameter" }, {
430                "paranets", "parameters" }, {
431                "partrucal", "particular" }, {
432                "pataphysical", "metaphysical" }, {
433                "patten", "pattern" }, {
434                "permissable", "permissible" }, {
435                "permition", "permission" }, {
436                "permmasivie", "permissive" }, {
437                "perogative", "prerogative" }, {
438                "persue", "pursue" }, {
439                "phantasia", "fantasia" }, {
440                "phenominal", "phenomenal" }, {
441                "picaresque", "picturesque" }, {
442                "playwrite", "playwright" }, {
443                "poeses", "poesies" }, {
444                "polation", "politician" }, {
445                "poligamy", "polygamy" }, {
446                "politict", "politic" }, {
447                "pollice", "police" }, {
448                "polypropalene", "polypropylene" }, {
449                "pompom", "pompon" }, {
450                "possable", "possible" }, {
451                "practicle", "practical" }, {
452                "pragmaticism", "pragmatism" }, {
453                "preceeding", "preceding" }, {
454                "precion", "precision" }, {
455                "precios", "precision" }, {
456                "preemptory", "peremptory" }, {
457                "prefices", "prefixes" }, {
458                "prefixt", "prefixed" }, {
459                "presbyterian", "Presbyterian" }, {
460                "presue", "pursue" }, {
461                "presued", "pursued" }, {
462                "privielage", "privilege" }, {
463                "priviledge", "privilege" }, {
464                "proceedures", "procedures" }, {
465                "pronensiation", "pronunciation" }, {
466                "pronisation", "pronunciation" }, {
467                "pronounciation", "pronunciation" }, {
468                "properally", "properly" }, {
469                "proplematic", "problematic" }, {
470                "protray", "portray" }, {
471                "pscolgst", "psychologist" }, {
472                "psicolagest", "psychologist" }, {
473                "psycolagest", "psychologist" }, {
474                "quoz", "quiz" }, {
475                "radious", "radius" }, {
476                "ramplily", "rampantly" }, {
477                "reccomend", "recommend" }, {
478                "reccona", "raccoon" }, {
479                "recieve", "receive" }, {
480                "reconise", "recognize" }, {
481                "rectangeles", "rectangle" }, {
482                "redign", "redesign" }, {
483                "reoccurring", "recurring" }, {
484                "repitition", "repetition" }, {
485                "replasments", "replacement" }, {
486                "reposable", "responsible" }, {
487                "reseblence", "resemblance" }, {
488                "respct", "respect" }, {
489                "respecally", "respectfully" }, {
490                "roon", "room" }, {
491                "rought", "roughly" }, {
492                "rsx", "RSX" }, {
493                "rudemtry", "rudimentary" }, {
494                "runnung", "running" }, {
495                "sacreligious", "sacrilegious" }, {
496                "saftly", "safely" }, {
497                "salut", "salute" }, {
498                "satifly", "satisfy" }, {
499                "scrabdle", "scrabble" }, {
500                "searcheable", "searchable" }, {
501                "secion", "section" }, {
502                "seferal", "several" }, {
503                "segements", "segments" }, {
504                "sence", "sense" }, {
505                "seperate", "separate" }, {
506                "sherbert", "sherbet" }, {
507                "sicolagest", "psychologist" }, {
508                "sieze", "seize" }, {
509                "simpfilty", "simplicity" }, {
510                "simplye", "simply" }, {
511                "singal", "signal" }, {
512                "sitte", "site" }, {
513                "situration", "situation" }, {
514                "slyph", "sylph" }, {
515                "smil", "smile" }, {
516                "snuck", "sneaked" }, {
517                "sometmes", "sometimes" }, {
518                "soonec", "sonic" }, {
519                "specificialy", "specifically" }, {
520                "spel", "spell" }, {
521                "spoak", "spoke" }, {
522                "sponsered", "sponsored" }, {
523                "stering", "steering" }, {
524                "straightjacket", "straitjacket" }, {
525                "stumach", "stomach" }, {
526                "stutent", "student" }, {
527                "styleguide", "style guide" }, {
528                "subisitions", "substitutions" }, {
529                "subjecribed", "subscribed" }, {
530                "subpena", "subpoena" }, {
531                "substations", "substitutions" }, {
532                "suger", "sugar" }, {
533                "supercede", "supersede" }, {
534                "superfulous", "superfluous" }, {
535                "susan", "Susan" }, {
536                "swimwear", "swim wear" }, {
537                "syncorization", "synchronization" }, {
538                "taff", "tough" }, {
539                "taht", "that" }, {
540                "tattos", "tattoos" }, {
541                "techniquely", "technically" }, {
542                "teh", "the" }, {
543                "tem", "team" }, {
544                "teo", "two" }, {
545                "teridical", "theoretical" }, {
546                "tesst", "test" }, {
547                "tets", "tests" }, {
548                "thanot", "than or" }, {
549                "theirselves", "themselves" }, {
550                "theridically", "theoretical" }, {
551                "thredically", "theoretically" }, {
552                "thruout", "throughout" }, {
553                "ths", "this" }, {
554                "titalate", "titillate" }, {
555                "tobagan", "tobaggon" }, {
556                "tommorrow", "tomorrow" }, {
557                "tomorow", "tomorrow" }, {
558                "tradegy", "tragedy" }, {
559                "trubbel", "trouble" }, {
560                "ttest", "test" }, {
561                "tunnellike", "tunnel like" }, {
562                "tured", "turned" }, {
563                "tyrrany", "tyranny" }, {
564                "unatourral", "unnatural" }, {
565                "unaturral", "unnatural" }, {
566                "unconisitional", "unconstitutional" }, {
567                "unconscience", "unconscious" }, {
568                "underladder", "under ladder" }, {
569                "unentelegible", "unintelligible" }, {
570                "unfortunently", "unfortunately" }, {
571                "unnaturral", "unnatural" }, {
572                "upcast", "up cast" }, {
573                "upmost", "utmost" }, {
574                "uranisium", "uranium" }, {
575                "verison", "version" }, {
576                "vinagarette", "vinaigrette" }, {
577                "volumptuous", "voluptuous" }, {
578                "volunteerism", "voluntarism" }, {
579                "volye", "volley" }, {
580                "wadting", "wasting" }, {
581                "waite", "wait" }, {
582                "wan't", "won't" }, {
583                "warloord", "warlord" }, {
584                "whaaat", "what" }, {
585                "whard", "ward" }, {
586                "whimp", "wimp" }, {
587                "wicken", "weaken" }, {
588                "wierd", "weird" }, {
589                "wrank", "rank" }, {
590                "writeen", "righten" }, {
591                "writting", "writing" }, {
592                "wundeews", "windows" }, {
593                "yeild", "yield" }, {
594                "youe", "your" }
595        };
596    
597        /**
598         * A subset of FIXTURE generated by this test.
599         */
600        private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, {
601                "Maddness", "Madness" }, {
602                "Occusionaly", "Occasionally" }, {
603                "Steffen", "Stephen" }, {
604                "Thw", "The" }, {
605                "Unformanlly", "Unfortunately" }, {
606                "Unfortally", "Unfortunately" }, {
607                "abilitey", "ability" }, {
608                "absorbtion", "absorption" }, {
609                "accidently", "accidentally" }, {
610                "accomodate", "accommodate" }, {
611                "acommadate", "accommodate" }, {
612                "acord", "accord" }, {
613                "adultry", "adultery" }, {
614                "aggresive", "aggressive" }, {
615                "alchohol", "alcohol" }, {
616                "alchoholic", "alcoholic" }, {
617                "allieve", "alive" }, {
618                "alot", "a lot" }, {
619                "alright", "all right" }, {
620                "amature", "amateur" }, {
621                "ambivilant", "ambivalent" }, {
622                "amourfous", "amorphous" }, {
623                "annoint", "anoint" }, {
624                "annonsment", "announcement" }, {
625                "annoyting", "anting" }, {
626                "annuncio", "announce" }, {
627                "anotomy", "anatomy" }, {
628                "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
629                "antidisestablishmentarism", "antidisestablishmentarianism" }, {
630                "anynomous", "anonymous" }, {
631                "appelet", "applet" }, {
632                "appreceiated", "appreciated" }, {
633                "appresteate", "appreciate" }, {
634                "aquantance", "acquaintance" }, {
635                "aricticure", "architecture" }, {
636                "asterick", "asterisk" }, {
637                "asymetric", "asymmetric" }, {
638                "atentively", "attentively" }, {
639                "bankrot", "bankrupt" }, {
640                "basicly", "basically" }, {
641                "batallion", "battalion" }, {
642                "bbrose", "browse" }, {
643                "beauro", "bureau" }, {
644                "beaurocracy", "bureaucracy" }, {
645                "beggining", "beginning" }, {
646                "behaviour", "behavior" }, {
647                "beleive", "believe" }, {
648                "belive", "believe" }, {
649                "blait", "bleat" }, {
650                "bouyant", "buoyant" }, {
651                "boygot", "boycott" }, {
652                "brocolli", "broccoli" }, {
653                "buder", "butter" }, {
654                "budr", "butter" }, {
655                "budter", "butter" }, {
656                "buracracy", "bureaucracy" }, {
657                "burracracy", "bureaucracy" }, {
658                "buton", "button" }, {
659                "byby", "by by" }, {
660                "cauler", "caller" }, {
661                "ceasar", "caesar" }, {
662                "cemetary", "cemetery" }, {
663                "changeing", "changing" }, {
664                "cheet", "cheat" }, {
665                "cimplicity", "simplicity" }, {
666                "circumstaces", "circumstances" }, {
667                "clob", "club" }, {
668                "coaln", "colon" }, {
669                "colleaque", "colleague" }, {
670                "colloquilism", "colloquialism" }, {
671                "columne", "column" }, {
672                "comitmment", "commitment" }, {
673                "comitte", "committee" }, {
674                "comittmen", "commitment" }, {
675                "comittmend", "commitment" }, {
676                "commerciasl", "commercials" }, {
677                "commited", "committed" }, {
678                "commitee", "committee" }, {
679                "companys", "companies" }, {
680                "comupter", "computer" }, {
681                "concensus", "consensus" }, {
682                "confusionism", "confucianism" }, {
683                "congradulations", "congratulations" }, {
684                "contunie", "continue" }, {
685                "cooly", "coolly" }, {
686                "copping", "coping" }, {
687                "cosmoplyton", "cosmopolitan" }, {
688                "crasy", "crazy" }, {
689                "croke", "croak" }, {
690                "crucifiction", "crucifixion" }, {
691                "crusifed", "crucified" }, {
692                "cumba", "combo" }, {
693                "custamisation", "customization" }, {
694                "dag", "dog" }, {
695                "daly", "daily" }, {
696                "defence", "defense" }, {
697                "definate", "definite" }, {
698                "definately", "definitely" }, {
699                "dependeble", "dependable" }, {
700                "descrption", "description" }, {
701                "descrptn", "description" }, {
702                "desparate", "desperate" }, {
703                "dessicate", "desiccate" }, {
704                "destint", "distant" }, {
705                "develepment", "developments" }, {
706                "developement", "development" }, {
707                "develpond", "development" }, {
708                "devulge", "divulge" }, {
709                "dieties", "deities" }, {
710                "dinasaur", "dinosaur" }, {
711                "dinasour", "dinosaur" }, {
712                "discuess", "discuss" }, {
713                "disect", "dissect" }, {
714                "disippate", "dissipate" }, {
715                "disition", "decision" }, {
716                "dispair", "despair" }, {
717                "distarct", "distract" }, {
718                "distart", "distort" }, {
719                "distroy", "destroy" }, {
720                "doenload", "download" }, {
721                "dongle", "dangle" }, {
722                "doog", "dog" }, {
723                "dramaticly", "dramatically" }, {
724                "drunkeness", "drunkenness" }, {
725                "ductioneery", "dictionary" }, {
726                "ecstacy", "ecstasy" }, {
727                "egsistence", "existence" }, {
728                "eitiology", "etiology" }, {
729                "elagent", "elegant" }, {
730                "embarass", "embarrass" }, {
731                "embarassment", "embarrassment" }, {
732                "embaress", "embarrass" }, {
733                "encapsualtion", "encapsulation" }, {
734                "encyclapidia", "encyclopedia" }, {
735                "encyclopia", "encyclopedia" }, {
736                "engins", "engine" }, {
737                "enhence", "enhance" }, {
738                "ennuui", "ennui" }, {
739                "enventions", "inventions" }, {
740                "envireminakl", "environmental" }, {
741                "enviroment", "environment" }, {
742                "epitomy", "epitome" }, {
743                "equire", "acquire" }, {
744                "errara", "error" }, {
745                "evaualtion", "evaluation" }, {
746                "excede", "exceed" }, {
747                "excercise", "exercise" }, {
748                "excpt", "except" }, {
749                "exhileration", "exhilaration" }, {
750                "existance", "existence" }, {
751                "expleyly", "explicitly" }, {
752                "explity", "explicitly" }, {
753                "failer", "failure" }, {
754                "faver", "favor" }, {
755                "faxe", "fax" }, {
756                "firey", "fiery" }, {
757                "fistival", "festival" }, {
758                "flatterring", "flattering" }, {
759                "flukse", "flux" }, {
760                "fone", "phone" }, {
761                "forsee", "foresee" }, {
762                "frustartaion", "frustrating" }, {
763                "funetik", "phonetic" }, {
764                "gaurd", "guard" }, {
765                "generly", "generally" }, {
766                "ghandi", "gandhi" }, {
767                "gotton", "gotten" }, {
768                "gracefull", "graceful" }, {
769                "gradualy", "gradually" }, {
770                "grammer", "grammar" }, {
771                "hallo", "hello" }, {
772                "hapily", "happily" }, {
773                "harrass", "harass" }, {
774                "heellp", "help" }, {
775                "heighth", "height" }, {
776                "hellp", "help" }, {
777                "helo", "hello" }, {
778                "hifin", "hyphen" }, {
779                "hifine", "hyphen" }, {
780                "hiphine", "hyphen" }, {
781                "hippie", "hippy" }, {
782                "hippopotamous", "hippopotamus" }, {
783                "hourse", "horse" }, {
784                "houssing", "housing" }, {
785                "howaver", "however" }, {
786                "howver", "however" }, {
787                "humaniti", "humanity" }, {
788                "hyfin", "hyphen" }, {
789                "hystrical", "hysterical" }, {
790                "illegitament", "illegitimate" }, {
791                "imbed", "embed" }, {
792                "imediaetly", "immediately" }, {
793                "immenant", "immanent" }, {
794                "implemtes", "implements" }, {
795                "inadvertant", "inadvertent" }, {
796                "incase", "in case" }, {
797                "incedious", "insidious" }, {
798                "incompleet", "incomplete" }, {
799                "incomplot", "incomplete" }, {
800                "inconvenant", "inconvenient" }, {
801                "inconvience", "inconvenience" }, {
802                "independant", "independent" }, {
803                "independenent", "independent" }, {
804                "indepnends", "independent" }, {
805                "indepth", "in depth" }, {
806                "indispensible", "indispensable" }, {
807                "inefficite", "inefficient" }, {
808                "infact", "in fact" }, {
809                "influencial", "influential" }, {
810                "innoculate", "inoculate" }, {
811                "insistant", "insistent" }, {
812                "insistenet", "insistent" }, {
813                "instulation", "installation" }, {
814                "intealignt", "intelligent" }, {
815                "intelegent", "intelligent" }, {
816                "intelegnent", "intelligent" }, {
817                "intelejent", "intelligent" }, {
818                "inteligent", "intelligent" }, {
819                "intelignt", "intelligent" }, {
820                "intellagant", "intelligent" }, {
821                "intellegent", "intelligent" }, {
822                "intellegint", "intelligent" }, {
823                "intellgnt", "intelligent" }, {
824                "intensionality", "intensionally" }, {
825                "internation", "international" }, {
826                "interpretate", "interpret" }, {
827                "interpretter", "interpreter" }, {
828                "intertes", "interested" }, {
829                "intertesd", "interested" }, {
830                "invermeantial", "environmental" }, {
831                "irresistable", "irresistible" }, {
832                "irritible", "irritable" }, {
833                "isreal", "israel" }, {
834                "johhn", "john" }, {
835                "kippur", "kipper" }, {
836                "knawing", "knowing" }, {
837                "lesure", "leisure" }, {
838                "liasion", "lesion" }, {
839                "liason", "liaison" }, {
840                "likly", "likely" }, {
841                "liquify", "liquefy" }, {
842                "lloyer", "layer" }, {
843                "lossing", "losing" }, {
844                "luser", "laser" }, {
845                "maintanence", "maintenance" }, {
846                "mandelbrot", "Mandelbrot" }, {
847                "marshall", "marshal" }, {
848                "maxium", "maximum" }, {
849                "mic", "mike" }, {
850                "midia", "media" }, {
851                "millenium", "millennium" }, {
852                "miniscule", "minuscule" }, {
853                "minkay", "monkey" }, {
854                "mischievious", "mischievous" }, {
855                "momento", "memento" }, {
856                "monkay", "monkey" }, {
857                "mosaik", "mosaic" }, {
858                "mostlikely", "most likely" }, {
859                "mousr", "mouser" }, {
860                "mroe", "more" }, {
861                "necesary", "necessary" }, {
862                "necesser", "necessary" }, {
863                "neice", "niece" }, {
864                "neighbour", "neighbor" }, {
865                "nemonic", "pneumonic" }, {
866                "nevade", "Nevada" }, {
867                "nickleodeon", "nickelodeon" }, {
868                "nieve", "naive" }, {
869                "noone", "no one" }, {
870                "notin", "not in" }, {
871                "nozled", "nuzzled" }, {
872                "objectsion", "objects" }, {
873                "ocassion", "occasion" }, {
874                "occuppied", "occupied" }, {
875                "occurence", "occurrence" }, {
876                "octagenarian", "octogenarian" }, {
877                "opposim", "opossum" }, {
878                "organise", "organize" }, {
879                "organiz", "organize" }, {
880                "orientate", "orient" }, {
881                "oscilascope", "oscilloscope" }, {
882                "parametic", "parameter" }, {
883                "permissable", "permissible" }, {
884                "permmasivie", "permissive" }, {
885                "persue", "pursue" }, {
886                "phantasia", "fantasia" }, {
887                "phenominal", "phenomenal" }, {
888                "playwrite", "playwright" }, {
889                "poeses", "poesies" }, {
890                "poligamy", "polygamy" }, {
891                "politict", "politic" }, {
892                "pollice", "police" }, {
893                "polypropalene", "polypropylene" }, {
894                "possable", "possible" }, {
895                "practicle", "practical" }, {
896                "pragmaticism", "pragmatism" }, {
897                "preceeding", "preceding" }, {
898                "precios", "precision" }, {
899                "preemptory", "peremptory" }, {
900                "prefixt", "prefixed" }, {
901                "presbyterian", "Presbyterian" }, {
902                "presue", "pursue" }, {
903                "presued", "pursued" }, {
904                "privielage", "privilege" }, {
905                "priviledge", "privilege" }, {
906                "proceedures", "procedures" }, {
907                "pronensiation", "pronunciation" }, {
908                "pronounciation", "pronunciation" }, {
909                "properally", "properly" }, {
910                "proplematic", "problematic" }, {
911                "protray", "portray" }, {
912                "pscolgst", "psychologist" }, {
913                "psicolagest", "psychologist" }, {
914                "psycolagest", "psychologist" }, {
915                "quoz", "quiz" }, {
916                "radious", "radius" }, {
917                "reccomend", "recommend" }, {
918                "reccona", "raccoon" }, {
919                "recieve", "receive" }, {
920                "reconise", "recognize" }, {
921                "rectangeles", "rectangle" }, {
922                "reoccurring", "recurring" }, {
923                "repitition", "repetition" }, {
924                "replasments", "replacement" }, {
925                "respct", "respect" }, {
926                "respecally", "respectfully" }, {
927                "rsx", "RSX" }, {
928                "runnung", "running" }, {
929                "sacreligious", "sacrilegious" }, {
930                "salut", "salute" }, {
931                "searcheable", "searchable" }, {
932                "seferal", "several" }, {
933                "segements", "segments" }, {
934                "sence", "sense" }, {
935                "seperate", "separate" }, {
936                "sicolagest", "psychologist" }, {
937                "sieze", "seize" }, {
938                "simplye", "simply" }, {
939                "sitte", "site" }, {
940                "slyph", "sylph" }, {
941                "smil", "smile" }, {
942                "sometmes", "sometimes" }, {
943                "soonec", "sonic" }, {
944                "specificialy", "specifically" }, {
945                "spel", "spell" }, {
946                "spoak", "spoke" }, {
947                "sponsered", "sponsored" }, {
948                "stering", "steering" }, {
949                "straightjacket", "straitjacket" }, {
950                "stumach", "stomach" }, {
951                "stutent", "student" }, {
952                "styleguide", "style guide" }, {
953                "subpena", "subpoena" }, {
954                "substations", "substitutions" }, {
955                "supercede", "supersede" }, {
956                "superfulous", "superfluous" }, {
957                "susan", "Susan" }, {
958                "swimwear", "swim wear" }, {
959                "syncorization", "synchronization" }, {
960                "taff", "tough" }, {
961                "taht", "that" }, {
962                "tattos", "tattoos" }, {
963                "techniquely", "technically" }, {
964                "teh", "the" }, {
965                "tem", "team" }, {
966                "teo", "two" }, {
967                "teridical", "theoretical" }, {
968                "tesst", "test" }, {
969                "theridically", "theoretical" }, {
970                "thredically", "theoretically" }, {
971                "thruout", "throughout" }, {
972                "ths", "this" }, {
973                "titalate", "titillate" }, {
974                "tobagan", "tobaggon" }, {
975                "tommorrow", "tomorrow" }, {
976                "tomorow", "tomorrow" }, {
977                "trubbel", "trouble" }, {
978                "ttest", "test" }, {
979                "tyrrany", "tyranny" }, {
980                "unatourral", "unnatural" }, {
981                "unaturral", "unnatural" }, {
982                "unconisitional", "unconstitutional" }, {
983                "unconscience", "unconscious" }, {
984                "underladder", "under ladder" }, {
985                "unentelegible", "unintelligible" }, {
986                "unfortunently", "unfortunately" }, {
987                "unnaturral", "unnatural" }, {
988                "upcast", "up cast" }, {
989                "verison", "version" }, {
990                "vinagarette", "vinaigrette" }, {
991                "volunteerism", "voluntarism" }, {
992                "volye", "volley" }, {
993                "waite", "wait" }, {
994                "wan't", "won't" }, {
995                "warloord", "warlord" }, {
996                "whaaat", "what" }, {
997                "whard", "ward" }, {
998                "whimp", "wimp" }, {
999                "wicken", "weaken" }, {
1000                "wierd", "weird" }, {
1001                "wrank", "rank" }, {
1002                "writeen", "righten" }, {
1003                "writting", "writing" }, {
1004                "wundeews", "windows" }, {
1005                "yeild", "yield" }, };
1006    
1007        /**
1008         * Tests encoding APIs in one place.
1009         */
1010        private void assertDoubleMetaphone(final String expected, final String source) {
1011            assertEquals(expected, this.getStringEncoder().encode(source));
1012            try {
1013                assertEquals(expected, this.getStringEncoder().encode((Object) source));
1014            } catch (final EncoderException e) {
1015                fail("Unexpected expection: " + e);
1016            }
1017            assertEquals(expected, this.getStringEncoder().doubleMetaphone(source));
1018            assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, false));
1019        }
1020    
1021        /**
1022         * Tests encoding APIs in one place.
1023         */
1024        public void assertDoubleMetaphoneAlt(final String expected, final String source) {
1025            assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, true));
1026        }
1027    
1028        public void doubleMetaphoneEqualTest(final String[][] pairs, final boolean useAlternate) {
1029            this.validateFixture(pairs);
1030            for (final String[] pair : pairs) {
1031                final String name0 = pair[0];
1032                final String name1 = pair[1];
1033                final String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")";
1034                assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, useAlternate));
1035                assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0, useAlternate));
1036                if (!useAlternate) {
1037                    assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1));
1038                    assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0));
1039                }
1040            }
1041        }
1042    
1043        public void doubleMetaphoneNotEqualTest(final boolean alternate) {
1044            assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band", alternate));
1045            assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain", alternate));
1046    
1047            if (!alternate) {
1048                assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band"));
1049                assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain"));
1050            }
1051        }
1052    
1053        @Override
1054        protected DoubleMetaphone createStringEncoder() {
1055            return new DoubleMetaphone();
1056        }
1057    
1058        @Test
1059        public void testDoubleMetaphone() {
1060            assertDoubleMetaphone("TSTN", "testing");
1061            assertDoubleMetaphone("0", "The");
1062            assertDoubleMetaphone("KK", "quick");
1063            assertDoubleMetaphone("PRN", "brown");
1064            assertDoubleMetaphone("FKS", "fox");
1065            assertDoubleMetaphone("JMPT", "jumped");
1066            assertDoubleMetaphone("AFR", "over");
1067            assertDoubleMetaphone("0", "the");
1068            assertDoubleMetaphone("LS", "lazy");
1069            assertDoubleMetaphone("TKS", "dogs");
1070            assertDoubleMetaphone("MKFR", "MacCafferey");
1071            assertDoubleMetaphone("STFN", "Stephan");
1072            assertDoubleMetaphone("KSSK", "Kuczewski");
1073            assertDoubleMetaphone("MKLL", "McClelland");
1074            assertDoubleMetaphone("SNHS", "san jose");
1075            assertDoubleMetaphone("SNFP", "xenophobia");
1076    
1077            assertDoubleMetaphoneAlt("TSTN", "testing");
1078            assertDoubleMetaphoneAlt("T", "The");
1079            assertDoubleMetaphoneAlt("KK", "quick");
1080            assertDoubleMetaphoneAlt("PRN", "brown");
1081            assertDoubleMetaphoneAlt("FKS", "fox");
1082            assertDoubleMetaphoneAlt("AMPT", "jumped");
1083            assertDoubleMetaphoneAlt("AFR", "over");
1084            assertDoubleMetaphoneAlt("T", "the");
1085            assertDoubleMetaphoneAlt("LS", "lazy");
1086            assertDoubleMetaphoneAlt("TKS", "dogs");
1087            assertDoubleMetaphoneAlt("MKFR", "MacCafferey");
1088            assertDoubleMetaphoneAlt("STFN", "Stephan");
1089            assertDoubleMetaphoneAlt("KXFS", "Kutchefski");
1090            assertDoubleMetaphoneAlt("MKLL", "McClelland");
1091            assertDoubleMetaphoneAlt("SNHS", "san jose");
1092            assertDoubleMetaphoneAlt("SNFP", "xenophobia");
1093            assertDoubleMetaphoneAlt("FKR", "Fokker");
1094            assertDoubleMetaphoneAlt("AK", "Joqqi");
1095            assertDoubleMetaphoneAlt("HF", "Hovvi");
1096            assertDoubleMetaphoneAlt("XRN", "Czerny");
1097        }
1098    
1099        @Test
1100        public void testEmpty() {
1101            assertEquals(null, this.getStringEncoder().doubleMetaphone(null));
1102            assertEquals(null, this.getStringEncoder().doubleMetaphone(""));
1103            assertEquals(null, this.getStringEncoder().doubleMetaphone(" "));
1104            assertEquals(null, this.getStringEncoder().doubleMetaphone("\t\n\r "));
1105        }
1106    
1107        /**
1108         * Test setting maximum length
1109         */
1110        @Test
1111        public void testSetMaxCodeLength() {
1112            final String value = "jumped";
1113    
1114            final DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
1115    
1116            // Sanity check of default settings
1117            assertEquals("Default Max Code Length", 4, doubleMetaphone.getMaxCodeLen());
1118            assertEquals("Default Primary",   "JMPT", doubleMetaphone.doubleMetaphone(value, false));
1119            assertEquals("Default Alternate", "AMPT", doubleMetaphone.doubleMetaphone(value, true));
1120    
1121            // Check setting Max Code Length
1122            doubleMetaphone.setMaxCodeLen(3);
1123            assertEquals("Set Max Code Length", 3, doubleMetaphone.getMaxCodeLen());
1124            assertEquals("Max=3 Primary",   "JMP", doubleMetaphone.doubleMetaphone(value, false));
1125            assertEquals("Max=3 Alternate", "AMP", doubleMetaphone.doubleMetaphone(value, true));
1126        }
1127    
1128        @Test
1129        public void testIsDoubleMetaphoneEqualBasic() {
1130            final String[][] testFixture = new String[][] { { "Case", "case" }, {
1131                    "CASE", "Case" }, {
1132                    "caSe", "cAsE" }, {
1133                    "cookie", "quick" }, {
1134                    "quick", "cookie" }, {
1135                    "Brian", "Bryan" }, {
1136                    "Auto", "Otto" }, {
1137                    "Steven", "Stefan" }, {
1138                    "Philipowitz", "Filipowicz" }
1139            };
1140            doubleMetaphoneEqualTest(testFixture, false);
1141            doubleMetaphoneEqualTest(testFixture, true);
1142        }
1143    
1144        /**
1145         * Example in the original article but failures in this Java impl:
1146         */
1147        @Test
1148        public void testIsDoubleMetaphoneEqualExtended1() {
1149            //        String[][] testFixture = new String[][] { { "Smith", "Schmidt" }
1150            //        };
1151            //        doubleMetaphoneEqualTest(testFixture, false);
1152            //        doubleMetaphoneEqualTest(testFixture, true);
1153        }
1154    
1155        @Test
1156        public void testIsDoubleMetaphoneEqualExtended2() {
1157            final String[][] testFixture = new String[][] { { "Jablonski", "Yablonsky" }
1158            };
1159            //doubleMetaphoneEqualTest(testFixture, false);
1160            doubleMetaphoneEqualTest(testFixture, true);
1161        }
1162    
1163        /**
1164         * Used to generate the MATCHES array and test possible matches from the
1165         * FIXTURE array.
1166         */
1167        @Test
1168        public void testIsDoubleMetaphoneEqualExtended3() {
1169            this.validateFixture(FIXTURE);
1170            final StringBuilder failures = new StringBuilder();
1171            final StringBuilder matches = new StringBuilder();
1172            final String cr = System.getProperty("line.separator");
1173            matches.append("private static final String[][] MATCHES = {" + cr);
1174            int failCount = 0;
1175            for (int i = 0; i < FIXTURE.length; i++) {
1176                final String name0 = FIXTURE[i][0];
1177                final String name1 = FIXTURE[i][1];
1178                final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false);
1179                final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true);
1180                if (match1 == false && match2 == false) {
1181                    final String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr;
1182                    failures.append(failMsg);
1183                    failCount++;
1184                } else {
1185                    matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr);
1186                }
1187            }
1188            matches.append("};");
1189            // Turn on to print a new MATCH array
1190            //System.out.println(matches.toString());
1191            if (failCount > 0) {
1192                // Turn on to see which pairs do NOT match.
1193                // String msg = failures.toString();
1194                //fail(failCount + " failures out of " + FIXTURE.length + ". The
1195                // following could be made to match: " + cr + msg);
1196            }
1197        }
1198    
1199        @Test
1200        public void testIsDoubleMetaphoneEqualWithMATCHES() {
1201            this.validateFixture(MATCHES);
1202            for (int i = 0; i < MATCHES.length; i++) {
1203                final String name0 = MATCHES[i][0];
1204                final String name1 = MATCHES[i][1];
1205                final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false);
1206                final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true);
1207                if (match1 == false && match2 == false) {
1208                    fail("Expected match [" + i + "] " + name0 + " and " + name1);
1209                }
1210            }
1211        }
1212    
1213        @Test
1214        public void testIsDoubleMetaphoneNotEqual() {
1215            doubleMetaphoneNotEqualTest(false);
1216            doubleMetaphoneNotEqualTest(true);
1217        }
1218    
1219        @Test
1220        public void testCCedilla() {
1221            assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla
1222        }
1223    
1224        @Test
1225        public void testNTilde() {
1226            assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde
1227        }
1228    
1229        public void validateFixture(final String[][] pairs) {
1230            if (pairs.length == 0) {
1231                fail("Test fixture is empty");
1232            }
1233            for (int i = 0; i < pairs.length; i++) {
1234                if (pairs[i].length != 2) {
1235                    fail("Error in test fixture in the data array at index " + i);
1236                }
1237            }
1238        }
1239    }