001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023import static org.junit.Assert.fail;
024
025import org.apache.commons.codec.EncoderException;
026import org.apache.commons.codec.StringEncoderAbstractTest;
027import org.junit.Test;
028
029/**
030 * Tests {@link DoubleMetaphone}.
031 *
032 * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
033 *
034 * @see "http://www.cuj.com/documents/s=8038/cuj0006philips/"
035 * @version $Id: DoubleMetaphoneTest.html 891688 2013-12-24 20:49:46Z ggregory $
036 */
037public class DoubleMetaphoneTest extends StringEncoderAbstractTest<DoubleMetaphone> {
038
039    /**
040     * Test data from http://aspell.net/test/orig/batch0.tab.
041     *
042     * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying
043     * and distribution of this entire article is permitted in any medium,
044     * provided this notice is preserved."
045     *
046     * Massaged the test data in the array below.
047     */
048    private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, {
049            "Ciculer", "Circler" }, {
050            "Circue", "Circle" }, {
051            "Maddness", "Madness" }, {
052            "Occusionaly", "Occasionally" }, {
053            "Steffen", "Stephen" }, {
054            "Thw", "The" }, {
055            "Unformanlly", "Unfortunately" }, {
056            "Unfortally", "Unfortunately" }, {
057            "abilitey", "ability" }, {
058            "abouy", "about" }, {
059            "absorbtion", "absorption" }, {
060            "accidently", "accidentally" }, {
061            "accomodate", "accommodate" }, {
062            "acommadate", "accommodate" }, {
063            "acord", "accord" }, {
064            "adultry", "adultery" }, {
065            "aggresive", "aggressive" }, {
066            "alchohol", "alcohol" }, {
067            "alchoholic", "alcoholic" }, {
068            "allieve", "alive" }, {
069            "alot", "a lot" }, {
070            "alright", "all right" }, {
071            "amature", "amateur" }, {
072            "ambivilant", "ambivalent" }, {
073            "amification", "amplification" }, {
074            "amourfous", "amorphous" }, {
075            "annoint", "anoint" }, {
076            "annonsment", "announcement" }, {
077            "annoyting", "anting" }, {
078            "annuncio", "announce" }, {
079            "anonomy", "anatomy" }, {
080            "anotomy", "anatomy" }, {
081            "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
082            "antidisestablishmentarism", "antidisestablishmentarianism" }, {
083            "anynomous", "anonymous" }, {
084            "appelet", "applet" }, {
085            "appreceiated", "appreciated" }, {
086            "appresteate", "appreciate" }, {
087            "aquantance", "acquaintance" }, {
088            "aratictature", "architecture" }, {
089            "archeype", "archetype" }, {
090            "aricticure", "architecture" }, {
091            "artic", "arctic" }, {
092            "asentote", "asymptote" }, {
093            "ast", "at" }, {
094            "asterick", "asterisk" }, {
095            "asymetric", "asymmetric" }, {
096            "atentively", "attentively" }, {
097            "autoamlly", "automatically" }, {
098            "bankrot", "bankrupt" }, {
099            "basicly", "basically" }, {
100            "batallion", "battalion" }, {
101            "bbrose", "browse" }, {
102            "beauro", "bureau" }, {
103            "beaurocracy", "bureaucracy" }, {
104            "beggining", "beginning" }, {
105            "beging", "beginning" }, {
106            "behaviour", "behavior" }, {
107            "beleive", "believe" }, {
108            "belive", "believe" }, {
109            "benidifs", "benefits" }, {
110            "bigginging", "beginning" }, {
111            "blait", "bleat" }, {
112            "bouyant", "buoyant" }, {
113            "boygot", "boycott" }, {
114            "brocolli", "broccoli" }, {
115            "buch", "bush" }, {
116            "buder", "butter" }, {
117            "budr", "butter" }, {
118            "budter", "butter" }, {
119            "buracracy", "bureaucracy" }, {
120            "burracracy", "bureaucracy" }, {
121            "buton", "button" }, {
122            "byby", "by by" }, {
123            "cauler", "caller" }, {
124            "ceasar", "caesar" }, {
125            "cemetary", "cemetery" }, {
126            "changeing", "changing" }, {
127            "cheet", "cheat" }, {
128            "cicle", "circle" }, {
129            "cimplicity", "simplicity" }, {
130            "circumstaces", "circumstances" }, {
131            "clob", "club" }, {
132            "coaln", "colon" }, {
133            "cocamena", "cockamamie" }, {
134            "colleaque", "colleague" }, {
135            "colloquilism", "colloquialism" }, {
136            "columne", "column" }, {
137            "comiler", "compiler" }, {
138            "comitmment", "commitment" }, {
139            "comitte", "committee" }, {
140            "comittmen", "commitment" }, {
141            "comittmend", "commitment" }, {
142            "commerciasl", "commercials" }, {
143            "commited", "committed" }, {
144            "commitee", "committee" }, {
145            "companys", "companies" }, {
146            "compicated", "complicated" }, {
147            "comupter", "computer" }, {
148            "concensus", "consensus" }, {
149            "confusionism", "confucianism" }, {
150            "congradulations", "congratulations" }, {
151            "conibation", "contribution" }, {
152            "consident", "consistent" }, {
153            "consident", "consonant" }, {
154            "contast", "constant" }, {
155            "contastant", "constant" }, {
156            "contunie", "continue" }, {
157            "cooly", "coolly" }, {
158            "copping", "coping" }, {
159            "cosmoplyton", "cosmopolitan" }, {
160            "courst", "court" }, {
161            "crasy", "crazy" }, {
162            "cravets", "caveats" }, {
163            "credetability", "credibility" }, {
164            "criqitue", "critique" }, {
165            "croke", "croak" }, {
166            "crucifiction", "crucifixion" }, {
167            "crusifed", "crucified" }, {
168            "ctitique", "critique" }, {
169            "cumba", "combo" }, {
170            "custamisation", "customization" }, {
171            "dag", "dog" }, {
172            "daly", "daily" }, {
173            "danguages", "dangerous" }, {
174            "deaft", "draft" }, {
175            "defence", "defense" }, {
176            "defenly", "defiantly" }, {
177            "definate", "definite" }, {
178            "definately", "definitely" }, {
179            "dependeble", "dependable" }, {
180            "descrption", "description" }, {
181            "descrptn", "description" }, {
182            "desparate", "desperate" }, {
183            "dessicate", "desiccate" }, {
184            "destint", "distant" }, {
185            "develepment", "developments" }, {
186            "developement", "development" }, {
187            "develpond", "development" }, {
188            "devulge", "divulge" }, {
189            "diagree", "disagree" }, {
190            "dieties", "deities" }, {
191            "dinasaur", "dinosaur" }, {
192            "dinasour", "dinosaur" }, {
193            "direcyly", "directly" }, {
194            "discuess", "discuss" }, {
195            "disect", "dissect" }, {
196            "disippate", "dissipate" }, {
197            "disition", "decision" }, {
198            "dispair", "despair" }, {
199            "disssicion", "discussion" }, {
200            "distarct", "distract" }, {
201            "distart", "distort" }, {
202            "distroy", "destroy" }, {
203            "documtations", "documentation" }, {
204            "doenload", "download" }, {
205            "dongle", "dangle" }, {
206            "doog", "dog" }, {
207            "dramaticly", "dramatically" }, {
208            "drunkeness", "drunkenness" }, {
209            "ductioneery", "dictionary" }, {
210            "dur", "due" }, {
211            "duren", "during" }, {
212            "dymatic", "dynamic" }, {
213            "dynaic", "dynamic" }, {
214            "ecstacy", "ecstasy" }, {
215            "efficat", "efficient" }, {
216            "efficity", "efficacy" }, {
217            "effots", "efforts" }, {
218            "egsistence", "existence" }, {
219            "eitiology", "etiology" }, {
220            "elagent", "elegant" }, {
221            "elligit", "elegant" }, {
222            "embarass", "embarrass" }, {
223            "embarassment", "embarrassment" }, {
224            "embaress", "embarrass" }, {
225            "encapsualtion", "encapsulation" }, {
226            "encyclapidia", "encyclopedia" }, {
227            "encyclopia", "encyclopedia" }, {
228            "engins", "engine" }, {
229            "enhence", "enhance" }, {
230            "enligtment", "Enlightenment" }, {
231            "ennuui", "ennui" }, {
232            "enought", "enough" }, {
233            "enventions", "inventions" }, {
234            "envireminakl", "environmental" }, {
235            "enviroment", "environment" }, {
236            "epitomy", "epitome" }, {
237            "equire", "acquire" }, {
238            "errara", "error" }, {
239            "erro", "error" }, {
240            "evaualtion", "evaluation" }, {
241            "evething", "everything" }, {
242            "evtually", "eventually" }, {
243            "excede", "exceed" }, {
244            "excercise", "exercise" }, {
245            "excpt", "except" }, {
246            "excution", "execution" }, {
247            "exhileration", "exhilaration" }, {
248            "existance", "existence" }, {
249            "expleyly", "explicitly" }, {
250            "explity", "explicitly" }, {
251            "expresso", "espresso" }, {
252            "exspidient", "expedient" }, {
253            "extions", "extensions" }, {
254            "factontion", "factorization" }, {
255            "failer", "failure" }, {
256            "famdasy", "fantasy" }, {
257            "faver", "favor" }, {
258            "faxe", "fax" }, {
259            "febuary", "february" }, {
260            "firey", "fiery" }, {
261            "fistival", "festival" }, {
262            "flatterring", "flattering" }, {
263            "fluk", "flux" }, {
264            "flukse", "flux" }, {
265            "fone", "phone" }, {
266            "forsee", "foresee" }, {
267            "frustartaion", "frustrating" }, {
268            "fuction", "function" }, {
269            "funetik", "phonetic" }, {
270            "futs", "guts" }, {
271            "gamne", "came" }, {
272            "gaurd", "guard" }, {
273            "generly", "generally" }, {
274            "ghandi", "gandhi" }, {
275            "goberment", "government" }, {
276            "gobernement", "government" }, {
277            "gobernment", "government" }, {
278            "gotton", "gotten" }, {
279            "gracefull", "graceful" }, {
280            "gradualy", "gradually" }, {
281            "grammer", "grammar" }, {
282            "hallo", "hello" }, {
283            "hapily", "happily" }, {
284            "harrass", "harass" }, {
285            "havne", "have" }, {
286            "heellp", "help" }, {
287            "heighth", "height" }, {
288            "hellp", "help" }, {
289            "helo", "hello" }, {
290            "herlo", "hello" }, {
291            "hifin", "hyphen" }, {
292            "hifine", "hyphen" }, {
293            "higer", "higher" }, {
294            "hiphine", "hyphen" }, {
295            "hippie", "hippy" }, {
296            "hippopotamous", "hippopotamus" }, {
297            "hlp", "help" }, {
298            "hourse", "horse" }, {
299            "houssing", "housing" }, {
300            "howaver", "however" }, {
301            "howver", "however" }, {
302            "humaniti", "humanity" }, {
303            "hyfin", "hyphen" }, {
304            "hypotathes", "hypothesis" }, {
305            "hypotathese", "hypothesis" }, {
306            "hystrical", "hysterical" }, {
307            "ident", "indent" }, {
308            "illegitament", "illegitimate" }, {
309            "imbed", "embed" }, {
310            "imediaetly", "immediately" }, {
311            "imfamy", "infamy" }, {
312            "immenant", "immanent" }, {
313            "implemtes", "implements" }, {
314            "inadvertant", "inadvertent" }, {
315            "incase", "in case" }, {
316            "incedious", "insidious" }, {
317            "incompleet", "incomplete" }, {
318            "incomplot", "incomplete" }, {
319            "inconvenant", "inconvenient" }, {
320            "inconvience", "inconvenience" }, {
321            "independant", "independent" }, {
322            "independenent", "independent" }, {
323            "indepnends", "independent" }, {
324            "indepth", "in depth" }, {
325            "indispensible", "indispensable" }, {
326            "inefficite", "inefficient" }, {
327            "inerface", "interface" }, {
328            "infact", "in fact" }, {
329            "influencial", "influential" }, {
330            "inital", "initial" }, {
331            "initinized", "initialized" }, {
332            "initized", "initialized" }, {
333            "innoculate", "inoculate" }, {
334            "insistant", "insistent" }, {
335            "insistenet", "insistent" }, {
336            "instulation", "installation" }, {
337            "intealignt", "intelligent" }, {
338            "intejilent", "intelligent" }, {
339            "intelegent", "intelligent" }, {
340            "intelegnent", "intelligent" }, {
341            "intelejent", "intelligent" }, {
342            "inteligent", "intelligent" }, {
343            "intelignt", "intelligent" }, {
344            "intellagant", "intelligent" }, {
345            "intellegent", "intelligent" }, {
346            "intellegint", "intelligent" }, {
347            "intellgnt", "intelligent" }, {
348            "intensionality", "intensionally" }, {
349            "interate", "iterate" }, {
350            "internation", "international" }, {
351            "interpretate", "interpret" }, {
352            "interpretter", "interpreter" }, {
353            "intertes", "interested" }, {
354            "intertesd", "interested" }, {
355            "invermeantial", "environmental" }, {
356            "irregardless", "regardless" }, {
357            "irresistable", "irresistible" }, {
358            "irritible", "irritable" }, {
359            "islams", "muslims" }, {
360            "isotrop", "isotope" }, {
361            "isreal", "israel" }, {
362            "johhn", "john" }, {
363            "judgement", "judgment" }, {
364            "kippur", "kipper" }, {
365            "knawing", "knowing" }, {
366            "latext", "latest" }, {
367            "leasve", "leave" }, {
368            "lesure", "leisure" }, {
369            "liasion", "lesion" }, {
370            "liason", "liaison" }, {
371            "libary", "library" }, {
372            "likly", "likely" }, {
373            "lilometer", "kilometer" }, {
374            "liquify", "liquefy" }, {
375            "lloyer", "layer" }, {
376            "lossing", "losing" }, {
377            "luser", "laser" }, {
378            "maintanence", "maintenance" }, {
379            "majaerly", "majority" }, {
380            "majoraly", "majority" }, {
381            "maks", "masks" }, {
382            "mandelbrot", "Mandelbrot" }, {
383            "mant", "want" }, {
384            "marshall", "marshal" }, {
385            "maxium", "maximum" }, {
386            "meory", "memory" }, {
387            "metter", "better" }, {
388            "mic", "mike" }, {
389            "midia", "media" }, {
390            "millenium", "millennium" }, {
391            "miniscule", "minuscule" }, {
392            "minkay", "monkey" }, {
393            "minum", "minimum" }, {
394            "mischievious", "mischievous" }, {
395            "misilous", "miscellaneous" }, {
396            "momento", "memento" }, {
397            "monkay", "monkey" }, {
398            "mosaik", "mosaic" }, {
399            "mostlikely", "most likely" }, {
400            "mousr", "mouser" }, {
401            "mroe", "more" }, {
402            "neccessary", "necessary" }, {
403            "necesary", "necessary" }, {
404            "necesser", "necessary" }, {
405            "neice", "niece" }, {
406            "neighbour", "neighbor" }, {
407            "nemonic", "pneumonic" }, {
408            "nevade", "Nevada" }, {
409            "nickleodeon", "nickelodeon" }, {
410            "nieve", "naive" }, {
411            "noone", "no one" }, {
412            "noticably", "noticeably" }, {
413            "notin", "not in" }, {
414            "nozled", "nuzzled" }, {
415            "objectsion", "objects" }, {
416            "obsfuscate", "obfuscate" }, {
417            "ocassion", "occasion" }, {
418            "occuppied", "occupied" }, {
419            "occurence", "occurrence" }, {
420            "octagenarian", "octogenarian" }, {
421            "olf", "old" }, {
422            "opposim", "opossum" }, {
423            "organise", "organize" }, {
424            "organiz", "organize" }, {
425            "orientate", "orient" }, {
426            "oscilascope", "oscilloscope" }, {
427            "oving", "moving" }, {
428            "paramers", "parameters" }, {
429            "parametic", "parameter" }, {
430            "paranets", "parameters" }, {
431            "partrucal", "particular" }, {
432            "pataphysical", "metaphysical" }, {
433            "patten", "pattern" }, {
434            "permissable", "permissible" }, {
435            "permition", "permission" }, {
436            "permmasivie", "permissive" }, {
437            "perogative", "prerogative" }, {
438            "persue", "pursue" }, {
439            "phantasia", "fantasia" }, {
440            "phenominal", "phenomenal" }, {
441            "picaresque", "picturesque" }, {
442            "playwrite", "playwright" }, {
443            "poeses", "poesies" }, {
444            "polation", "politician" }, {
445            "poligamy", "polygamy" }, {
446            "politict", "politic" }, {
447            "pollice", "police" }, {
448            "polypropalene", "polypropylene" }, {
449            "pompom", "pompon" }, {
450            "possable", "possible" }, {
451            "practicle", "practical" }, {
452            "pragmaticism", "pragmatism" }, {
453            "preceeding", "preceding" }, {
454            "precion", "precision" }, {
455            "precios", "precision" }, {
456            "preemptory", "peremptory" }, {
457            "prefices", "prefixes" }, {
458            "prefixt", "prefixed" }, {
459            "presbyterian", "Presbyterian" }, {
460            "presue", "pursue" }, {
461            "presued", "pursued" }, {
462            "privielage", "privilege" }, {
463            "priviledge", "privilege" }, {
464            "proceedures", "procedures" }, {
465            "pronensiation", "pronunciation" }, {
466            "pronisation", "pronunciation" }, {
467            "pronounciation", "pronunciation" }, {
468            "properally", "properly" }, {
469            "proplematic", "problematic" }, {
470            "protray", "portray" }, {
471            "pscolgst", "psychologist" }, {
472            "psicolagest", "psychologist" }, {
473            "psycolagest", "psychologist" }, {
474            "quoz", "quiz" }, {
475            "radious", "radius" }, {
476            "ramplily", "rampantly" }, {
477            "reccomend", "recommend" }, {
478            "reccona", "raccoon" }, {
479            "recieve", "receive" }, {
480            "reconise", "recognize" }, {
481            "rectangeles", "rectangle" }, {
482            "redign", "redesign" }, {
483            "reoccurring", "recurring" }, {
484            "repitition", "repetition" }, {
485            "replasments", "replacement" }, {
486            "reposable", "responsible" }, {
487            "reseblence", "resemblance" }, {
488            "respct", "respect" }, {
489            "respecally", "respectfully" }, {
490            "roon", "room" }, {
491            "rought", "roughly" }, {
492            "rsx", "RSX" }, {
493            "rudemtry", "rudimentary" }, {
494            "runnung", "running" }, {
495            "sacreligious", "sacrilegious" }, {
496            "saftly", "safely" }, {
497            "salut", "salute" }, {
498            "satifly", "satisfy" }, {
499            "scrabdle", "scrabble" }, {
500            "searcheable", "searchable" }, {
501            "secion", "section" }, {
502            "seferal", "several" }, {
503            "segements", "segments" }, {
504            "sence", "sense" }, {
505            "seperate", "separate" }, {
506            "sherbert", "sherbet" }, {
507            "sicolagest", "psychologist" }, {
508            "sieze", "seize" }, {
509            "simpfilty", "simplicity" }, {
510            "simplye", "simply" }, {
511            "singal", "signal" }, {
512            "sitte", "site" }, {
513            "situration", "situation" }, {
514            "slyph", "sylph" }, {
515            "smil", "smile" }, {
516            "snuck", "sneaked" }, {
517            "sometmes", "sometimes" }, {
518            "soonec", "sonic" }, {
519            "specificialy", "specifically" }, {
520            "spel", "spell" }, {
521            "spoak", "spoke" }, {
522            "sponsered", "sponsored" }, {
523            "stering", "steering" }, {
524            "straightjacket", "straitjacket" }, {
525            "stumach", "stomach" }, {
526            "stutent", "student" }, {
527            "styleguide", "style guide" }, {
528            "subisitions", "substitutions" }, {
529            "subjecribed", "subscribed" }, {
530            "subpena", "subpoena" }, {
531            "substations", "substitutions" }, {
532            "suger", "sugar" }, {
533            "supercede", "supersede" }, {
534            "superfulous", "superfluous" }, {
535            "susan", "Susan" }, {
536            "swimwear", "swim wear" }, {
537            "syncorization", "synchronization" }, {
538            "taff", "tough" }, {
539            "taht", "that" }, {
540            "tattos", "tattoos" }, {
541            "techniquely", "technically" }, {
542            "teh", "the" }, {
543            "tem", "team" }, {
544            "teo", "two" }, {
545            "teridical", "theoretical" }, {
546            "tesst", "test" }, {
547            "tets", "tests" }, {
548            "thanot", "than or" }, {
549            "theirselves", "themselves" }, {
550            "theridically", "theoretical" }, {
551            "thredically", "theoretically" }, {
552            "thruout", "throughout" }, {
553            "ths", "this" }, {
554            "titalate", "titillate" }, {
555            "tobagan", "tobaggon" }, {
556            "tommorrow", "tomorrow" }, {
557            "tomorow", "tomorrow" }, {
558            "tradegy", "tragedy" }, {
559            "trubbel", "trouble" }, {
560            "ttest", "test" }, {
561            "tunnellike", "tunnel like" }, {
562            "tured", "turned" }, {
563            "tyrrany", "tyranny" }, {
564            "unatourral", "unnatural" }, {
565            "unaturral", "unnatural" }, {
566            "unconisitional", "unconstitutional" }, {
567            "unconscience", "unconscious" }, {
568            "underladder", "under ladder" }, {
569            "unentelegible", "unintelligible" }, {
570            "unfortunently", "unfortunately" }, {
571            "unnaturral", "unnatural" }, {
572            "upcast", "up cast" }, {
573            "upmost", "utmost" }, {
574            "uranisium", "uranium" }, {
575            "verison", "version" }, {
576            "vinagarette", "vinaigrette" }, {
577            "volumptuous", "voluptuous" }, {
578            "volunteerism", "voluntarism" }, {
579            "volye", "volley" }, {
580            "wadting", "wasting" }, {
581            "waite", "wait" }, {
582            "wan't", "won't" }, {
583            "warloord", "warlord" }, {
584            "whaaat", "what" }, {
585            "whard", "ward" }, {
586            "whimp", "wimp" }, {
587            "wicken", "weaken" }, {
588            "wierd", "weird" }, {
589            "wrank", "rank" }, {
590            "writeen", "righten" }, {
591            "writting", "writing" }, {
592            "wundeews", "windows" }, {
593            "yeild", "yield" }, {
594            "youe", "your" }
595    };
596
597    /**
598     * A subset of FIXTURE generated by this test.
599     */
600    private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, {
601            "Maddness", "Madness" }, {
602            "Occusionaly", "Occasionally" }, {
603            "Steffen", "Stephen" }, {
604            "Thw", "The" }, {
605            "Unformanlly", "Unfortunately" }, {
606            "Unfortally", "Unfortunately" }, {
607            "abilitey", "ability" }, {
608            "absorbtion", "absorption" }, {
609            "accidently", "accidentally" }, {
610            "accomodate", "accommodate" }, {
611            "acommadate", "accommodate" }, {
612            "acord", "accord" }, {
613            "adultry", "adultery" }, {
614            "aggresive", "aggressive" }, {
615            "alchohol", "alcohol" }, {
616            "alchoholic", "alcoholic" }, {
617            "allieve", "alive" }, {
618            "alot", "a lot" }, {
619            "alright", "all right" }, {
620            "amature", "amateur" }, {
621            "ambivilant", "ambivalent" }, {
622            "amourfous", "amorphous" }, {
623            "annoint", "anoint" }, {
624            "annonsment", "announcement" }, {
625            "annoyting", "anting" }, {
626            "annuncio", "announce" }, {
627            "anotomy", "anatomy" }, {
628            "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
629            "antidisestablishmentarism", "antidisestablishmentarianism" }, {
630            "anynomous", "anonymous" }, {
631            "appelet", "applet" }, {
632            "appreceiated", "appreciated" }, {
633            "appresteate", "appreciate" }, {
634            "aquantance", "acquaintance" }, {
635            "aricticure", "architecture" }, {
636            "asterick", "asterisk" }, {
637            "asymetric", "asymmetric" }, {
638            "atentively", "attentively" }, {
639            "bankrot", "bankrupt" }, {
640            "basicly", "basically" }, {
641            "batallion", "battalion" }, {
642            "bbrose", "browse" }, {
643            "beauro", "bureau" }, {
644            "beaurocracy", "bureaucracy" }, {
645            "beggining", "beginning" }, {
646            "behaviour", "behavior" }, {
647            "beleive", "believe" }, {
648            "belive", "believe" }, {
649            "blait", "bleat" }, {
650            "bouyant", "buoyant" }, {
651            "boygot", "boycott" }, {
652            "brocolli", "broccoli" }, {
653            "buder", "butter" }, {
654            "budr", "butter" }, {
655            "budter", "butter" }, {
656            "buracracy", "bureaucracy" }, {
657            "burracracy", "bureaucracy" }, {
658            "buton", "button" }, {
659            "byby", "by by" }, {
660            "cauler", "caller" }, {
661            "ceasar", "caesar" }, {
662            "cemetary", "cemetery" }, {
663            "changeing", "changing" }, {
664            "cheet", "cheat" }, {
665            "cimplicity", "simplicity" }, {
666            "circumstaces", "circumstances" }, {
667            "clob", "club" }, {
668            "coaln", "colon" }, {
669            "colleaque", "colleague" }, {
670            "colloquilism", "colloquialism" }, {
671            "columne", "column" }, {
672            "comitmment", "commitment" }, {
673            "comitte", "committee" }, {
674            "comittmen", "commitment" }, {
675            "comittmend", "commitment" }, {
676            "commerciasl", "commercials" }, {
677            "commited", "committed" }, {
678            "commitee", "committee" }, {
679            "companys", "companies" }, {
680            "comupter", "computer" }, {
681            "concensus", "consensus" }, {
682            "confusionism", "confucianism" }, {
683            "congradulations", "congratulations" }, {
684            "contunie", "continue" }, {
685            "cooly", "coolly" }, {
686            "copping", "coping" }, {
687            "cosmoplyton", "cosmopolitan" }, {
688            "crasy", "crazy" }, {
689            "croke", "croak" }, {
690            "crucifiction", "crucifixion" }, {
691            "crusifed", "crucified" }, {
692            "cumba", "combo" }, {
693            "custamisation", "customization" }, {
694            "dag", "dog" }, {
695            "daly", "daily" }, {
696            "defence", "defense" }, {
697            "definate", "definite" }, {
698            "definately", "definitely" }, {
699            "dependeble", "dependable" }, {
700            "descrption", "description" }, {
701            "descrptn", "description" }, {
702            "desparate", "desperate" }, {
703            "dessicate", "desiccate" }, {
704            "destint", "distant" }, {
705            "develepment", "developments" }, {
706            "developement", "development" }, {
707            "develpond", "development" }, {
708            "devulge", "divulge" }, {
709            "dieties", "deities" }, {
710            "dinasaur", "dinosaur" }, {
711            "dinasour", "dinosaur" }, {
712            "discuess", "discuss" }, {
713            "disect", "dissect" }, {
714            "disippate", "dissipate" }, {
715            "disition", "decision" }, {
716            "dispair", "despair" }, {
717            "distarct", "distract" }, {
718            "distart", "distort" }, {
719            "distroy", "destroy" }, {
720            "doenload", "download" }, {
721            "dongle", "dangle" }, {
722            "doog", "dog" }, {
723            "dramaticly", "dramatically" }, {
724            "drunkeness", "drunkenness" }, {
725            "ductioneery", "dictionary" }, {
726            "ecstacy", "ecstasy" }, {
727            "egsistence", "existence" }, {
728            "eitiology", "etiology" }, {
729            "elagent", "elegant" }, {
730            "embarass", "embarrass" }, {
731            "embarassment", "embarrassment" }, {
732            "embaress", "embarrass" }, {
733            "encapsualtion", "encapsulation" }, {
734            "encyclapidia", "encyclopedia" }, {
735            "encyclopia", "encyclopedia" }, {
736            "engins", "engine" }, {
737            "enhence", "enhance" }, {
738            "ennuui", "ennui" }, {
739            "enventions", "inventions" }, {
740            "envireminakl", "environmental" }, {
741            "enviroment", "environment" }, {
742            "epitomy", "epitome" }, {
743            "equire", "acquire" }, {
744            "errara", "error" }, {
745            "evaualtion", "evaluation" }, {
746            "excede", "exceed" }, {
747            "excercise", "exercise" }, {
748            "excpt", "except" }, {
749            "exhileration", "exhilaration" }, {
750            "existance", "existence" }, {
751            "expleyly", "explicitly" }, {
752            "explity", "explicitly" }, {
753            "failer", "failure" }, {
754            "faver", "favor" }, {
755            "faxe", "fax" }, {
756            "firey", "fiery" }, {
757            "fistival", "festival" }, {
758            "flatterring", "flattering" }, {
759            "flukse", "flux" }, {
760            "fone", "phone" }, {
761            "forsee", "foresee" }, {
762            "frustartaion", "frustrating" }, {
763            "funetik", "phonetic" }, {
764            "gaurd", "guard" }, {
765            "generly", "generally" }, {
766            "ghandi", "gandhi" }, {
767            "gotton", "gotten" }, {
768            "gracefull", "graceful" }, {
769            "gradualy", "gradually" }, {
770            "grammer", "grammar" }, {
771            "hallo", "hello" }, {
772            "hapily", "happily" }, {
773            "harrass", "harass" }, {
774            "heellp", "help" }, {
775            "heighth", "height" }, {
776            "hellp", "help" }, {
777            "helo", "hello" }, {
778            "hifin", "hyphen" }, {
779            "hifine", "hyphen" }, {
780            "hiphine", "hyphen" }, {
781            "hippie", "hippy" }, {
782            "hippopotamous", "hippopotamus" }, {
783            "hourse", "horse" }, {
784            "houssing", "housing" }, {
785            "howaver", "however" }, {
786            "howver", "however" }, {
787            "humaniti", "humanity" }, {
788            "hyfin", "hyphen" }, {
789            "hystrical", "hysterical" }, {
790            "illegitament", "illegitimate" }, {
791            "imbed", "embed" }, {
792            "imediaetly", "immediately" }, {
793            "immenant", "immanent" }, {
794            "implemtes", "implements" }, {
795            "inadvertant", "inadvertent" }, {
796            "incase", "in case" }, {
797            "incedious", "insidious" }, {
798            "incompleet", "incomplete" }, {
799            "incomplot", "incomplete" }, {
800            "inconvenant", "inconvenient" }, {
801            "inconvience", "inconvenience" }, {
802            "independant", "independent" }, {
803            "independenent", "independent" }, {
804            "indepnends", "independent" }, {
805            "indepth", "in depth" }, {
806            "indispensible", "indispensable" }, {
807            "inefficite", "inefficient" }, {
808            "infact", "in fact" }, {
809            "influencial", "influential" }, {
810            "innoculate", "inoculate" }, {
811            "insistant", "insistent" }, {
812            "insistenet", "insistent" }, {
813            "instulation", "installation" }, {
814            "intealignt", "intelligent" }, {
815            "intelegent", "intelligent" }, {
816            "intelegnent", "intelligent" }, {
817            "intelejent", "intelligent" }, {
818            "inteligent", "intelligent" }, {
819            "intelignt", "intelligent" }, {
820            "intellagant", "intelligent" }, {
821            "intellegent", "intelligent" }, {
822            "intellegint", "intelligent" }, {
823            "intellgnt", "intelligent" }, {
824            "intensionality", "intensionally" }, {
825            "internation", "international" }, {
826            "interpretate", "interpret" }, {
827            "interpretter", "interpreter" }, {
828            "intertes", "interested" }, {
829            "intertesd", "interested" }, {
830            "invermeantial", "environmental" }, {
831            "irresistable", "irresistible" }, {
832            "irritible", "irritable" }, {
833            "isreal", "israel" }, {
834            "johhn", "john" }, {
835            "kippur", "kipper" }, {
836            "knawing", "knowing" }, {
837            "lesure", "leisure" }, {
838            "liasion", "lesion" }, {
839            "liason", "liaison" }, {
840            "likly", "likely" }, {
841            "liquify", "liquefy" }, {
842            "lloyer", "layer" }, {
843            "lossing", "losing" }, {
844            "luser", "laser" }, {
845            "maintanence", "maintenance" }, {
846            "mandelbrot", "Mandelbrot" }, {
847            "marshall", "marshal" }, {
848            "maxium", "maximum" }, {
849            "mic", "mike" }, {
850            "midia", "media" }, {
851            "millenium", "millennium" }, {
852            "miniscule", "minuscule" }, {
853            "minkay", "monkey" }, {
854            "mischievious", "mischievous" }, {
855            "momento", "memento" }, {
856            "monkay", "monkey" }, {
857            "mosaik", "mosaic" }, {
858            "mostlikely", "most likely" }, {
859            "mousr", "mouser" }, {
860            "mroe", "more" }, {
861            "necesary", "necessary" }, {
862            "necesser", "necessary" }, {
863            "neice", "niece" }, {
864            "neighbour", "neighbor" }, {
865            "nemonic", "pneumonic" }, {
866            "nevade", "Nevada" }, {
867            "nickleodeon", "nickelodeon" }, {
868            "nieve", "naive" }, {
869            "noone", "no one" }, {
870            "notin", "not in" }, {
871            "nozled", "nuzzled" }, {
872            "objectsion", "objects" }, {
873            "ocassion", "occasion" }, {
874            "occuppied", "occupied" }, {
875            "occurence", "occurrence" }, {
876            "octagenarian", "octogenarian" }, {
877            "opposim", "opossum" }, {
878            "organise", "organize" }, {
879            "organiz", "organize" }, {
880            "orientate", "orient" }, {
881            "oscilascope", "oscilloscope" }, {
882            "parametic", "parameter" }, {
883            "permissable", "permissible" }, {
884            "permmasivie", "permissive" }, {
885            "persue", "pursue" }, {
886            "phantasia", "fantasia" }, {
887            "phenominal", "phenomenal" }, {
888            "playwrite", "playwright" }, {
889            "poeses", "poesies" }, {
890            "poligamy", "polygamy" }, {
891            "politict", "politic" }, {
892            "pollice", "police" }, {
893            "polypropalene", "polypropylene" }, {
894            "possable", "possible" }, {
895            "practicle", "practical" }, {
896            "pragmaticism", "pragmatism" }, {
897            "preceeding", "preceding" }, {
898            "precios", "precision" }, {
899            "preemptory", "peremptory" }, {
900            "prefixt", "prefixed" }, {
901            "presbyterian", "Presbyterian" }, {
902            "presue", "pursue" }, {
903            "presued", "pursued" }, {
904            "privielage", "privilege" }, {
905            "priviledge", "privilege" }, {
906            "proceedures", "procedures" }, {
907            "pronensiation", "pronunciation" }, {
908            "pronounciation", "pronunciation" }, {
909            "properally", "properly" }, {
910            "proplematic", "problematic" }, {
911            "protray", "portray" }, {
912            "pscolgst", "psychologist" }, {
913            "psicolagest", "psychologist" }, {
914            "psycolagest", "psychologist" }, {
915            "quoz", "quiz" }, {
916            "radious", "radius" }, {
917            "reccomend", "recommend" }, {
918            "reccona", "raccoon" }, {
919            "recieve", "receive" }, {
920            "reconise", "recognize" }, {
921            "rectangeles", "rectangle" }, {
922            "reoccurring", "recurring" }, {
923            "repitition", "repetition" }, {
924            "replasments", "replacement" }, {
925            "respct", "respect" }, {
926            "respecally", "respectfully" }, {
927            "rsx", "RSX" }, {
928            "runnung", "running" }, {
929            "sacreligious", "sacrilegious" }, {
930            "salut", "salute" }, {
931            "searcheable", "searchable" }, {
932            "seferal", "several" }, {
933            "segements", "segments" }, {
934            "sence", "sense" }, {
935            "seperate", "separate" }, {
936            "sicolagest", "psychologist" }, {
937            "sieze", "seize" }, {
938            "simplye", "simply" }, {
939            "sitte", "site" }, {
940            "slyph", "sylph" }, {
941            "smil", "smile" }, {
942            "sometmes", "sometimes" }, {
943            "soonec", "sonic" }, {
944            "specificialy", "specifically" }, {
945            "spel", "spell" }, {
946            "spoak", "spoke" }, {
947            "sponsered", "sponsored" }, {
948            "stering", "steering" }, {
949            "straightjacket", "straitjacket" }, {
950            "stumach", "stomach" }, {
951            "stutent", "student" }, {
952            "styleguide", "style guide" }, {
953            "subpena", "subpoena" }, {
954            "substations", "substitutions" }, {
955            "supercede", "supersede" }, {
956            "superfulous", "superfluous" }, {
957            "susan", "Susan" }, {
958            "swimwear", "swim wear" }, {
959            "syncorization", "synchronization" }, {
960            "taff", "tough" }, {
961            "taht", "that" }, {
962            "tattos", "tattoos" }, {
963            "techniquely", "technically" }, {
964            "teh", "the" }, {
965            "tem", "team" }, {
966            "teo", "two" }, {
967            "teridical", "theoretical" }, {
968            "tesst", "test" }, {
969            "theridically", "theoretical" }, {
970            "thredically", "theoretically" }, {
971            "thruout", "throughout" }, {
972            "ths", "this" }, {
973            "titalate", "titillate" }, {
974            "tobagan", "tobaggon" }, {
975            "tommorrow", "tomorrow" }, {
976            "tomorow", "tomorrow" }, {
977            "trubbel", "trouble" }, {
978            "ttest", "test" }, {
979            "tyrrany", "tyranny" }, {
980            "unatourral", "unnatural" }, {
981            "unaturral", "unnatural" }, {
982            "unconisitional", "unconstitutional" }, {
983            "unconscience", "unconscious" }, {
984            "underladder", "under ladder" }, {
985            "unentelegible", "unintelligible" }, {
986            "unfortunently", "unfortunately" }, {
987            "unnaturral", "unnatural" }, {
988            "upcast", "up cast" }, {
989            "verison", "version" }, {
990            "vinagarette", "vinaigrette" }, {
991            "volunteerism", "voluntarism" }, {
992            "volye", "volley" }, {
993            "waite", "wait" }, {
994            "wan't", "won't" }, {
995            "warloord", "warlord" }, {
996            "whaaat", "what" }, {
997            "whard", "ward" }, {
998            "whimp", "wimp" }, {
999            "wicken", "weaken" }, {
1000            "wierd", "weird" }, {
1001            "wrank", "rank" }, {
1002            "writeen", "righten" }, {
1003            "writting", "writing" }, {
1004            "wundeews", "windows" }, {
1005            "yeild", "yield" }, };
1006
1007    /**
1008     * Tests encoding APIs in one place.
1009     */
1010    private void assertDoubleMetaphone(final String expected, final String source) {
1011        assertEquals(expected, this.getStringEncoder().encode(source));
1012        try {
1013            assertEquals(expected, this.getStringEncoder().encode((Object) source));
1014        } catch (final EncoderException e) {
1015            fail("Unexpected expection: " + e);
1016        }
1017        assertEquals(expected, this.getStringEncoder().doubleMetaphone(source));
1018        assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, false));
1019    }
1020
1021    /**
1022     * Tests encoding APIs in one place.
1023     */
1024    public void assertDoubleMetaphoneAlt(final String expected, final String source) {
1025        assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, true));
1026    }
1027
1028    public void doubleMetaphoneEqualTest(final String[][] pairs, final boolean useAlternate) {
1029        this.validateFixture(pairs);
1030        for (final String[] pair : pairs) {
1031            final String name0 = pair[0];
1032            final String name1 = pair[1];
1033            final String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")";
1034            assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, useAlternate));
1035            assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0, useAlternate));
1036            if (!useAlternate) {
1037                assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1));
1038                assertTrue(failMsg, this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0));
1039            }
1040        }
1041    }
1042
1043    public void doubleMetaphoneNotEqualTest(final boolean alternate) {
1044        assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band", alternate));
1045        assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain", alternate));
1046
1047        if (!alternate) {
1048            assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band"));
1049            assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain"));
1050        }
1051    }
1052
1053    @Override
1054    protected DoubleMetaphone createStringEncoder() {
1055        return new DoubleMetaphone();
1056    }
1057
1058    @Test
1059    public void testDoubleMetaphone() {
1060        assertDoubleMetaphone("TSTN", "testing");
1061        assertDoubleMetaphone("0", "The");
1062        assertDoubleMetaphone("KK", "quick");
1063        assertDoubleMetaphone("PRN", "brown");
1064        assertDoubleMetaphone("FKS", "fox");
1065        assertDoubleMetaphone("JMPT", "jumped");
1066        assertDoubleMetaphone("AFR", "over");
1067        assertDoubleMetaphone("0", "the");
1068        assertDoubleMetaphone("LS", "lazy");
1069        assertDoubleMetaphone("TKS", "dogs");
1070        assertDoubleMetaphone("MKFR", "MacCafferey");
1071        assertDoubleMetaphone("STFN", "Stephan");
1072        assertDoubleMetaphone("KSSK", "Kuczewski");
1073        assertDoubleMetaphone("MKLL", "McClelland");
1074        assertDoubleMetaphone("SNHS", "san jose");
1075        assertDoubleMetaphone("SNFP", "xenophobia");
1076
1077        assertDoubleMetaphoneAlt("TSTN", "testing");
1078        assertDoubleMetaphoneAlt("T", "The");
1079        assertDoubleMetaphoneAlt("KK", "quick");
1080        assertDoubleMetaphoneAlt("PRN", "brown");
1081        assertDoubleMetaphoneAlt("FKS", "fox");
1082        assertDoubleMetaphoneAlt("AMPT", "jumped");
1083        assertDoubleMetaphoneAlt("AFR", "over");
1084        assertDoubleMetaphoneAlt("T", "the");
1085        assertDoubleMetaphoneAlt("LS", "lazy");
1086        assertDoubleMetaphoneAlt("TKS", "dogs");
1087        assertDoubleMetaphoneAlt("MKFR", "MacCafferey");
1088        assertDoubleMetaphoneAlt("STFN", "Stephan");
1089        assertDoubleMetaphoneAlt("KXFS", "Kutchefski");
1090        assertDoubleMetaphoneAlt("MKLL", "McClelland");
1091        assertDoubleMetaphoneAlt("SNHS", "san jose");
1092        assertDoubleMetaphoneAlt("SNFP", "xenophobia");
1093        assertDoubleMetaphoneAlt("FKR", "Fokker");
1094        assertDoubleMetaphoneAlt("AK", "Joqqi");
1095        assertDoubleMetaphoneAlt("HF", "Hovvi");
1096        assertDoubleMetaphoneAlt("XRN", "Czerny");
1097    }
1098
1099    @Test
1100    public void testEmpty() {
1101        assertEquals(null, this.getStringEncoder().doubleMetaphone(null));
1102        assertEquals(null, this.getStringEncoder().doubleMetaphone(""));
1103        assertEquals(null, this.getStringEncoder().doubleMetaphone(" "));
1104        assertEquals(null, this.getStringEncoder().doubleMetaphone("\t\n\r "));
1105    }
1106
1107    /**
1108     * Test setting maximum length
1109     */
1110    @Test
1111    public void testSetMaxCodeLength() {
1112        final String value = "jumped";
1113
1114        final DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
1115
1116        // Sanity check of default settings
1117        assertEquals("Default Max Code Length", 4, doubleMetaphone.getMaxCodeLen());
1118        assertEquals("Default Primary",   "JMPT", doubleMetaphone.doubleMetaphone(value, false));
1119        assertEquals("Default Alternate", "AMPT", doubleMetaphone.doubleMetaphone(value, true));
1120
1121        // Check setting Max Code Length
1122        doubleMetaphone.setMaxCodeLen(3);
1123        assertEquals("Set Max Code Length", 3, doubleMetaphone.getMaxCodeLen());
1124        assertEquals("Max=3 Primary",   "JMP", doubleMetaphone.doubleMetaphone(value, false));
1125        assertEquals("Max=3 Alternate", "AMP", doubleMetaphone.doubleMetaphone(value, true));
1126    }
1127
1128    @Test
1129    public void testIsDoubleMetaphoneEqualBasic() {
1130        final String[][] testFixture = new String[][] { { "Case", "case" }, {
1131                "CASE", "Case" }, {
1132                "caSe", "cAsE" }, {
1133                "cookie", "quick" }, {
1134                "quick", "cookie" }, {
1135                "Brian", "Bryan" }, {
1136                "Auto", "Otto" }, {
1137                "Steven", "Stefan" }, {
1138                "Philipowitz", "Filipowicz" }
1139        };
1140        doubleMetaphoneEqualTest(testFixture, false);
1141        doubleMetaphoneEqualTest(testFixture, true);
1142    }
1143
1144    /**
1145     * Example in the original article but failures in this Java impl:
1146     */
1147    @Test
1148    public void testIsDoubleMetaphoneEqualExtended1() {
1149        //        String[][] testFixture = new String[][] { { "Smith", "Schmidt" }
1150        //        };
1151        //        doubleMetaphoneEqualTest(testFixture, false);
1152        //        doubleMetaphoneEqualTest(testFixture, true);
1153    }
1154
1155    @Test
1156    public void testIsDoubleMetaphoneEqualExtended2() {
1157        final String[][] testFixture = new String[][] { { "Jablonski", "Yablonsky" }
1158        };
1159        //doubleMetaphoneEqualTest(testFixture, false);
1160        doubleMetaphoneEqualTest(testFixture, true);
1161    }
1162
1163    /**
1164     * Used to generate the MATCHES array and test possible matches from the
1165     * FIXTURE array.
1166     */
1167    @Test
1168    public void testIsDoubleMetaphoneEqualExtended3() {
1169        this.validateFixture(FIXTURE);
1170        final StringBuilder failures = new StringBuilder();
1171        final StringBuilder matches = new StringBuilder();
1172        final String cr = System.getProperty("line.separator");
1173        matches.append("private static final String[][] MATCHES = {" + cr);
1174        int failCount = 0;
1175        for (int i = 0; i < FIXTURE.length; i++) {
1176            final String name0 = FIXTURE[i][0];
1177            final String name1 = FIXTURE[i][1];
1178            final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false);
1179            final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true);
1180            if (match1 == false && match2 == false) {
1181                final String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr;
1182                failures.append(failMsg);
1183                failCount++;
1184            } else {
1185                matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr);
1186            }
1187        }
1188        matches.append("};");
1189        // Turn on to print a new MATCH array
1190        //System.out.println(matches.toString());
1191        if (failCount > 0) {
1192            // Turn on to see which pairs do NOT match.
1193            // String msg = failures.toString();
1194            //fail(failCount + " failures out of " + FIXTURE.length + ". The
1195            // following could be made to match: " + cr + msg);
1196        }
1197    }
1198
1199    @Test
1200    public void testIsDoubleMetaphoneEqualWithMATCHES() {
1201        this.validateFixture(MATCHES);
1202        for (int i = 0; i < MATCHES.length; i++) {
1203            final String name0 = MATCHES[i][0];
1204            final String name1 = MATCHES[i][1];
1205            final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false);
1206            final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true);
1207            if (match1 == false && match2 == false) {
1208                fail("Expected match [" + i + "] " + name0 + " and " + name1);
1209            }
1210        }
1211    }
1212
1213    @Test
1214    public void testIsDoubleMetaphoneNotEqual() {
1215        doubleMetaphoneNotEqualTest(false);
1216        doubleMetaphoneNotEqualTest(true);
1217    }
1218
1219    @Test
1220    public void testCCedilla() {
1221        assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla
1222    }
1223
1224    @Test
1225    public void testNTilde() {
1226        assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde
1227    }
1228
1229    public void validateFixture(final String[][] pairs) {
1230        if (pairs.length == 0) {
1231            fail("Test fixture is empty");
1232        }
1233        for (int i = 0; i < pairs.length; i++) {
1234            if (pairs[i].length != 2) {
1235                fail("Error in test fixture in the data array at index " + i);
1236            }
1237        }
1238    }
1239}