View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import static org.junit.jupiter.api.Assertions.assertEquals;
21  import static org.junit.jupiter.api.Assertions.assertFalse;
22  import static org.junit.jupiter.api.Assertions.assertNull;
23  import static org.junit.jupiter.api.Assertions.assertTrue;
24  import static org.junit.jupiter.api.Assertions.fail;
25  
26  import org.apache.commons.codec.AbstractStringEncoderTest;
27  import org.apache.commons.codec.EncoderException;
28  import org.junit.jupiter.api.Test;
29  
30  /**
31   * Tests {@link DoubleMetaphone}.
32   *
33   * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
34   *
35   * @see "http://www.cuj.com/documents/s=8038/cuj0006philips/"
36   */
37  public class DoubleMetaphoneTest extends AbstractStringEncoderTest<DoubleMetaphone> {
38  
39      /**
40       * Test data from https://aspell.net/test/orig/batch0.tab.
41       *
42       * "Copyright (C) 2002 Kevin Atkinson (kevina@gnu.org). Verbatim copying
43       * and distribution of this entire article is permitted in any medium,
44       * provided this notice is preserved."
45       *
46       * Massaged the test data in the array below.
47       */
48      private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, {
49              "Ciculer", "Circler" }, {
50              "Circue", "Circle" }, {
51              "Maddness", "Madness" }, {
52              "Occusionaly", "Occasionally" }, {
53              "Steffen", "Stephen" }, {
54              "Thw", "The" }, {
55              "Unformanlly", "Unfortunately" }, {
56              "Unfortally", "Unfortunately" }, {
57              "abilitey", "ability" }, {
58              "abouy", "about" }, {
59              "absorbtion", "absorption" }, {
60              "accidently", "accidentally" }, {
61              "accomodate", "accommodate" }, {
62              "acommadate", "accommodate" }, {
63              "acord", "accord" }, {
64              "adultry", "adultery" }, {
65              "aggresive", "aggressive" }, {
66              "alchohol", "alcohol" }, {
67              "alchoholic", "alcoholic" }, {
68              "allieve", "alive" }, {
69              "alot", "a lot" }, {
70              "alright", "all right" }, {
71              "amature", "amateur" }, {
72              "ambivilant", "ambivalent" }, {
73              "amification", "amplification" }, {
74              "amourfous", "amorphous" }, {
75              "annoint", "anoint" }, {
76              "annonsment", "announcement" }, {
77              "annoyting", "anting" }, {
78              "annuncio", "announce" }, {
79              "anonomy", "anatomy" }, {
80              "anotomy", "anatomy" }, {
81              "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
82              "antidisestablishmentarism", "antidisestablishmentarianism" }, {
83              "anynomous", "anonymous" }, {
84              "appelet", "applet" }, {
85              "appreceiated", "appreciated" }, {
86              "appresteate", "appreciate" }, {
87              "aquantance", "acquaintance" }, {
88              "aratictature", "architecture" }, {
89              "archeype", "archetype" }, {
90              "aricticure", "architecture" }, {
91              "artic", "arctic" }, {
92              "asentote", "asymptote" }, {
93              "ast", "at" }, {
94              "asterick", "asterisk" }, {
95              "asymetric", "asymmetric" }, {
96              "atentively", "attentively" }, {
97              "autoamlly", "automatically" }, {
98              "bankrot", "bankrupt" }, {
99              "basicly", "basically" }, {
100             "batallion", "battalion" }, {
101             "bbrose", "browse" }, {
102             "beauro", "bureau" }, {
103             "beaurocracy", "bureaucracy" }, {
104             "beggining", "beginning" }, {
105             "beging", "beginning" }, {
106             "behaviour", "behavior" }, {
107             "beleive", "believe" }, {
108             "belive", "believe" }, {
109             "benidifs", "benefits" }, {
110             "bigginging", "beginning" }, {
111             "blait", "bleat" }, {
112             "bouyant", "buoyant" }, {
113             "boygot", "boycott" }, {
114             "brocolli", "broccoli" }, {
115             "buch", "bush" }, {
116             "buder", "butter" }, {
117             "budr", "butter" }, {
118             "budter", "butter" }, {
119             "buracracy", "bureaucracy" }, {
120             "burracracy", "bureaucracy" }, {
121             "buton", "button" }, {
122             "byby", "by by" }, {
123             "cauler", "caller" }, {
124             "ceasar", "caesar" }, {
125             "cemetary", "cemetery" }, {
126             "changeing", "changing" }, {
127             "cheet", "cheat" }, {
128             "cicle", "circle" }, {
129             "cimplicity", "simplicity" }, {
130             "circumstaces", "circumstances" }, {
131             "clob", "club" }, {
132             "coaln", "colon" }, {
133             "cocamena", "cockamamie" }, {
134             "colleaque", "colleague" }, {
135             "colloquilism", "colloquialism" }, {
136             "columne", "column" }, {
137             "comiler", "compiler" }, {
138             "comitmment", "commitment" }, {
139             "comitte", "committee" }, {
140             "comittmen", "commitment" }, {
141             "comittmend", "commitment" }, {
142             "commerciasl", "commercials" }, {
143             "commited", "committed" }, {
144             "commitee", "committee" }, {
145             "companys", "companies" }, {
146             "compicated", "complicated" }, {
147             "comupter", "computer" }, {
148             "concensus", "consensus" }, {
149             "confusionism", "confucianism" }, {
150             "congradulations", "congratulations" }, {
151             "conibation", "contribution" }, {
152             "consident", "consistent" }, {
153             "consident", "consonant" }, {
154             "contast", "constant" }, {
155             "contastant", "constant" }, {
156             "contunie", "continue" }, {
157             "cooly", "coolly" }, {
158             "copping", "coping" }, {
159             "cosmoplyton", "cosmopolitan" }, {
160             "courst", "court" }, {
161             "crasy", "crazy" }, {
162             "cravets", "caveats" }, {
163             "credetability", "credibility" }, {
164             "criqitue", "critique" }, {
165             "croke", "croak" }, {
166             "crucifiction", "crucifixion" }, {
167             "crusifed", "crucified" }, {
168             "ctitique", "critique" }, {
169             "cumba", "combo" }, {
170             "custamisation", "customization" }, {
171             "dag", "dog" }, {
172             "daly", "daily" }, {
173             "danguages", "dangerous" }, {
174             "deaft", "draft" }, {
175             "defence", "defense" }, {
176             "defenly", "defiantly" }, {
177             "definate", "definite" }, {
178             "definately", "definitely" }, {
179             "dependeble", "dependable" }, {
180             "descrption", "description" }, {
181             "descrptn", "description" }, {
182             "desparate", "desperate" }, {
183             "dessicate", "desiccate" }, {
184             "destint", "distant" }, {
185             "develepment", "developments" }, {
186             "developement", "development" }, {
187             "develpond", "development" }, {
188             "devulge", "divulge" }, {
189             "diagree", "disagree" }, {
190             "dieties", "deities" }, {
191             "dinasaur", "dinosaur" }, {
192             "dinasour", "dinosaur" }, {
193             "direcyly", "directly" }, {
194             "discuess", "discuss" }, {
195             "disect", "dissect" }, {
196             "disippate", "dissipate" }, {
197             "disition", "decision" }, {
198             "dispair", "despair" }, {
199             "disssicion", "discussion" }, {
200             "distarct", "distract" }, {
201             "distart", "distort" }, {
202             "distroy", "destroy" }, {
203             "documtations", "documentation" }, {
204             "doenload", "download" }, {
205             "dongle", "dangle" }, {
206             "doog", "dog" }, {
207             "dramaticly", "dramatically" }, {
208             "drunkeness", "drunkenness" }, {
209             "ductioneery", "dictionary" }, {
210             "dur", "due" }, {
211             "duren", "during" }, {
212             "dymatic", "dynamic" }, {
213             "dynaic", "dynamic" }, {
214             "ecstacy", "ecstasy" }, {
215             "efficat", "efficient" }, {
216             "efficity", "efficacy" }, {
217             "effots", "efforts" }, {
218             "egsistence", "existence" }, {
219             "eitiology", "etiology" }, {
220             "elagent", "elegant" }, {
221             "elligit", "elegant" }, {
222             "embarass", "embarrass" }, {
223             "embarassment", "embarrassment" }, {
224             "embaress", "embarrass" }, {
225             "encapsualtion", "encapsulation" }, {
226             "encyclapidia", "encyclopedia" }, {
227             "encyclopia", "encyclopedia" }, {
228             "engins", "engine" }, {
229             "enhence", "enhance" }, {
230             "enligtment", "Enlightenment" }, {
231             "ennuui", "ennui" }, {
232             "enought", "enough" }, {
233             "enventions", "inventions" }, {
234             "envireminakl", "environmental" }, {
235             "enviroment", "environment" }, {
236             "epitomy", "epitome" }, {
237             "equire", "acquire" }, {
238             "errara", "error" }, {
239             "erro", "error" }, {
240             "evaualtion", "evaluation" }, {
241             "evething", "everything" }, {
242             "evtually", "eventually" }, {
243             "excede", "exceed" }, {
244             "excercise", "exercise" }, {
245             "excpt", "except" }, {
246             "excution", "execution" }, {
247             "exhileration", "exhilaration" }, {
248             "existance", "existence" }, {
249             "expleyly", "explicitly" }, {
250             "explity", "explicitly" }, {
251             "expresso", "espresso" }, {
252             "exspidient", "expedient" }, {
253             "extions", "extensions" }, {
254             "factontion", "factorization" }, {
255             "failer", "failure" }, {
256             "famdasy", "fantasy" }, {
257             "faver", "favor" }, {
258             "faxe", "fax" }, {
259             "febuary", "february" }, {
260             "firey", "fiery" }, {
261             "fistival", "festival" }, {
262             "flatterring", "flattering" }, {
263             "fluk", "flux" }, {
264             "flukse", "flux" }, {
265             "fone", "phone" }, {
266             "forsee", "foresee" }, {
267             "frustartaion", "frustrating" }, {
268             "fuction", "function" }, {
269             "funetik", "phonetic" }, {
270             "futs", "guts" }, {
271             "gamne", "came" }, {
272             "gaurd", "guard" }, {
273             "generly", "generally" }, {
274             "ghandi", "gandhi" }, {
275             "goberment", "government" }, {
276             "gobernement", "government" }, {
277             "gobernment", "government" }, {
278             "gotton", "gotten" }, {
279             "gracefull", "graceful" }, {
280             "gradualy", "gradually" }, {
281             "grammer", "grammar" }, {
282             "hallo", "hello" }, {
283             "hapily", "happily" }, {
284             "harrass", "harass" }, {
285             "havne", "have" }, {
286             "heellp", "help" }, {
287             "heighth", "height" }, {
288             "hellp", "help" }, {
289             "helo", "hello" }, {
290             "herlo", "hello" }, {
291             "hifin", "hyphen" }, {
292             "hifine", "hyphen" }, {
293             "higer", "higher" }, {
294             "hiphine", "hyphen" }, {
295             "hippie", "hippy" }, {
296             "hippopotamous", "hippopotamus" }, {
297             "hlp", "help" }, {
298             "hourse", "horse" }, {
299             "houssing", "housing" }, {
300             "howaver", "however" }, {
301             "howver", "however" }, {
302             "humaniti", "humanity" }, {
303             "hyfin", "hyphen" }, {
304             "hypotathes", "hypothesis" }, {
305             "hypotathese", "hypothesis" }, {
306             "hystrical", "hysterical" }, {
307             "ident", "indent" }, {
308             "illegitament", "illegitimate" }, {
309             "imbed", "embed" }, {
310             "imediaetly", "immediately" }, {
311             "imfamy", "infamy" }, {
312             "immenant", "immanent" }, {
313             "implemtes", "implements" }, {
314             "inadvertant", "inadvertent" }, {
315             "incase", "in case" }, {
316             "incedious", "insidious" }, {
317             "incompleet", "incomplete" }, {
318             "incomplot", "incomplete" }, {
319             "inconvenant", "inconvenient" }, {
320             "inconvience", "inconvenience" }, {
321             "independant", "independent" }, {
322             "independenent", "independent" }, {
323             "indepnends", "independent" }, {
324             "indepth", "in depth" }, {
325             "indispensible", "indispensable" }, {
326             "inefficite", "inefficient" }, {
327             "inerface", "interface" }, {
328             "infact", "in fact" }, {
329             "influencial", "influential" }, {
330             "inital", "initial" }, {
331             "initinized", "initialized" }, {
332             "initized", "initialized" }, {
333             "innoculate", "inoculate" }, {
334             "insistant", "insistent" }, {
335             "insistenet", "insistent" }, {
336             "instulation", "installation" }, {
337             "intealignt", "intelligent" }, {
338             "intejilent", "intelligent" }, {
339             "intelegent", "intelligent" }, {
340             "intelegnent", "intelligent" }, {
341             "intelejent", "intelligent" }, {
342             "inteligent", "intelligent" }, {
343             "intelignt", "intelligent" }, {
344             "intellagant", "intelligent" }, {
345             "intellegent", "intelligent" }, {
346             "intellegint", "intelligent" }, {
347             "intellgnt", "intelligent" }, {
348             "intensionality", "intensionally" }, {
349             "interate", "iterate" }, {
350             "internation", "international" }, {
351             "interpretate", "interpret" }, {
352             "interpretter", "interpreter" }, {
353             "intertes", "interested" }, {
354             "intertesd", "interested" }, {
355             "invermeantial", "environmental" }, {
356             "irregardless", "regardless" }, {
357             "irresistable", "irresistible" }, {
358             "irritible", "irritable" }, {
359             "islams", "muslims" }, {
360             "isotrop", "isotope" }, {
361             "isreal", "israel" }, {
362             "johhn", "john" }, {
363             "judgement", "judgment" }, {
364             "kippur", "kipper" }, {
365             "knawing", "knowing" }, {
366             "latext", "latest" }, {
367             "leasve", "leave" }, {
368             "lesure", "leisure" }, {
369             "liasion", "lesion" }, {
370             "liason", "liaison" }, {
371             "libary", "library" }, {
372             "likly", "likely" }, {
373             "lilometer", "kilometer" }, {
374             "liquify", "liquefy" }, {
375             "lloyer", "layer" }, {
376             "lossing", "losing" }, {
377             "luser", "laser" }, {
378             "maintanence", "maintenance" }, {
379             "majaerly", "majority" }, {
380             "majoraly", "majority" }, {
381             "maks", "masks" }, {
382             "mandelbrot", "Mandelbrot" }, {
383             "mant", "want" }, {
384             "marshall", "marshal" }, {
385             "maxium", "maximum" }, {
386             "meory", "memory" }, {
387             "metter", "better" }, {
388             "mic", "mike" }, {
389             "midia", "media" }, {
390             "millenium", "millennium" }, {
391             "miniscule", "minuscule" }, {
392             "minkay", "monkey" }, {
393             "minum", "minimum" }, {
394             "mischievious", "mischievous" }, {
395             "misilous", "miscellaneous" }, {
396             "momento", "memento" }, {
397             "monkay", "monkey" }, {
398             "mosaik", "mosaic" }, {
399             "mostlikely", "most likely" }, {
400             "mousr", "mouser" }, {
401             "mroe", "more" }, {
402             "neccessary", "necessary" }, {
403             "necesary", "necessary" }, {
404             "necesser", "necessary" }, {
405             "neice", "niece" }, {
406             "neighbour", "neighbor" }, {
407             "nemonic", "pneumonic" }, {
408             "nevade", "Nevada" }, {
409             "nickleodeon", "nickelodeon" }, {
410             "nieve", "naive" }, {
411             "noone", "no one" }, {
412             "noticably", "noticeably" }, {
413             "notin", "not in" }, {
414             "nozled", "nuzzled" }, {
415             "objectsion", "objects" }, {
416             "obsfuscate", "obfuscate" }, {
417             "ocassion", "occasion" }, {
418             "occuppied", "occupied" }, {
419             "occurence", "occurrence" }, {
420             "octagenarian", "octogenarian" }, {
421             "olf", "old" }, {
422             "opposim", "opossum" }, {
423             "organise", "organize" }, {
424             "organiz", "organize" }, {
425             "orientate", "orient" }, {
426             "oscilascope", "oscilloscope" }, {
427             "oving", "moving" }, {
428             "paramers", "parameters" }, {
429             "parametic", "parameter" }, {
430             "paranets", "parameters" }, {
431             "partrucal", "particular" }, {
432             "pataphysical", "metaphysical" }, {
433             "patten", "pattern" }, {
434             "permissable", "permissible" }, {
435             "permition", "permission" }, {
436             "permmasivie", "permissive" }, {
437             "perogative", "prerogative" }, {
438             "persue", "pursue" }, {
439             "phantasia", "fantasia" }, {
440             "phenominal", "phenomenal" }, {
441             "picaresque", "picturesque" }, {
442             "playwrite", "playwright" }, {
443             "poeses", "poesies" }, {
444             "polation", "politician" }, {
445             "poligamy", "polygamy" }, {
446             "politict", "politic" }, {
447             "pollice", "police" }, {
448             "polypropalene", "polypropylene" }, {
449             "pompom", "pompon" }, {
450             "possable", "possible" }, {
451             "practicle", "practical" }, {
452             "pragmaticism", "pragmatism" }, {
453             "preceeding", "preceding" }, {
454             "precion", "precision" }, {
455             "precios", "precision" }, {
456             "preemptory", "peremptory" }, {
457             "prefices", "prefixes" }, {
458             "prefixt", "prefixed" }, {
459             "presbyterian", "Presbyterian" }, {
460             "presue", "pursue" }, {
461             "presued", "pursued" }, {
462             "privielage", "privilege" }, {
463             "priviledge", "privilege" }, {
464             "proceedures", "procedures" }, {
465             "pronensiation", "pronunciation" }, {
466             "pronisation", "pronunciation" }, {
467             "pronounciation", "pronunciation" }, {
468             "properally", "properly" }, {
469             "proplematic", "problematic" }, {
470             "protray", "portray" }, {
471             "pscolgst", "psychologist" }, {
472             "psicolagest", "psychologist" }, {
473             "psycolagest", "psychologist" }, {
474             "quoz", "quiz" }, {
475             "radious", "radius" }, {
476             "ramplily", "rampantly" }, {
477             "reccomend", "recommend" }, {
478             "reccona", "raccoon" }, {
479             "recieve", "receive" }, {
480             "reconise", "recognize" }, {
481             "rectangeles", "rectangle" }, {
482             "redign", "redesign" }, {
483             "reoccurring", "recurring" }, {
484             "repitition", "repetition" }, {
485             "replasments", "replacement" }, {
486             "reposable", "responsible" }, {
487             "reseblence", "resemblance" }, {
488             "respct", "respect" }, {
489             "respecally", "respectfully" }, {
490             "roon", "room" }, {
491             "rought", "roughly" }, {
492             "rsx", "RSX" }, {
493             "rudemtry", "rudimentary" }, {
494             "runnung", "running" }, {
495             "sacreligious", "sacrilegious" }, {
496             "saftly", "safely" }, {
497             "salut", "salute" }, {
498             "satifly", "satisfy" }, {
499             "scrabdle", "scrabble" }, {
500             "searcheable", "searchable" }, {
501             "secion", "section" }, {
502             "seferal", "several" }, {
503             "segements", "segments" }, {
504             "sence", "sense" }, {
505             "seperate", "separate" }, {
506             "sherbert", "sherbet" }, {
507             "sicolagest", "psychologist" }, {
508             "sieze", "seize" }, {
509             "simpfilty", "simplicity" }, {
510             "simplye", "simply" }, {
511             "singal", "signal" }, {
512             "sitte", "site" }, {
513             "situration", "situation" }, {
514             "slyph", "sylph" }, {
515             "smil", "smile" }, {
516             "snuck", "sneaked" }, {
517             "sometmes", "sometimes" }, {
518             "soonec", "sonic" }, {
519             "specificialy", "specifically" }, {
520             "spel", "spell" }, {
521             "spoak", "spoke" }, {
522             "sponsered", "sponsored" }, {
523             "stering", "steering" }, {
524             "straightjacket", "straitjacket" }, {
525             "stumach", "stomach" }, {
526             "stutent", "student" }, {
527             "styleguide", "style guide" }, {
528             "subisitions", "substitutions" }, {
529             "subjecribed", "subscribed" }, {
530             "subpena", "subpoena" }, {
531             "substations", "substitutions" }, {
532             "suger", "sugar" }, {
533             "supercede", "supersede" }, {
534             "superfulous", "superfluous" }, {
535             "susan", "Susan" }, {
536             "swimwear", "swim wear" }, {
537             "syncorization", "synchronization" }, {
538             "taff", "tough" }, {
539             "taht", "that" }, {
540             "tattos", "tattoos" }, {
541             "techniquely", "technically" }, {
542             "teh", "the" }, {
543             "tem", "team" }, {
544             "teo", "two" }, {
545             "teridical", "theoretical" }, {
546             "tesst", "test" }, {
547             "tets", "tests" }, {
548             "thanot", "than or" }, {
549             "theirselves", "themselves" }, {
550             "theridically", "theoretical" }, {
551             "thredically", "theoretically" }, {
552             "thruout", "throughout" }, {
553             "ths", "this" }, {
554             "titalate", "titillate" }, {
555             "tobagan", "tobaggon" }, {
556             "tommorrow", "tomorrow" }, {
557             "tomorow", "tomorrow" }, {
558             "tradegy", "tragedy" }, {
559             "trubbel", "trouble" }, {
560             "ttest", "test" }, {
561             "tunnellike", "tunnel like" }, {
562             "tured", "turned" }, {
563             "tyrrany", "tyranny" }, {
564             "unatourral", "unnatural" }, {
565             "unaturral", "unnatural" }, {
566             "unconisitional", "unconstitutional" }, {
567             "unconscience", "unconscious" }, {
568             "underladder", "under ladder" }, {
569             "unentelegible", "unintelligible" }, {
570             "unfortunently", "unfortunately" }, {
571             "unnaturral", "unnatural" }, {
572             "upcast", "up cast" }, {
573             "upmost", "utmost" }, {
574             "uranisium", "uranium" }, {
575             "verison", "version" }, {
576             "vinagarette", "vinaigrette" }, {
577             "volumptuous", "voluptuous" }, {
578             "volunteerism", "voluntarism" }, {
579             "volye", "volley" }, {
580             "wadting", "wasting" }, {
581             "waite", "wait" }, {
582             "wan't", "won't" }, {
583             "warloord", "warlord" }, {
584             "whaaat", "what" }, {
585             "whard", "ward" }, {
586             "whimp", "wimp" }, {
587             "wicken", "weaken" }, {
588             "wierd", "weird" }, {
589             "wrank", "rank" }, {
590             "writeen", "righten" }, {
591             "writting", "writing" }, {
592             "wundeews", "windows" }, {
593             "yeild", "yield" }, {
594             "youe", "your" }
595     };
596 
597     /**
598      * A subset of FIXTURE generated by this test.
599      */
600     private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, {
601             "Maddness", "Madness" }, {
602             "Occusionaly", "Occasionally" }, {
603             "Steffen", "Stephen" }, {
604             "Thw", "The" }, {
605             "Unformanlly", "Unfortunately" }, {
606             "Unfortally", "Unfortunately" }, {
607             "abilitey", "ability" }, {
608             "absorbtion", "absorption" }, {
609             "accidently", "accidentally" }, {
610             "accomodate", "accommodate" }, {
611             "acommadate", "accommodate" }, {
612             "acord", "accord" }, {
613             "adultry", "adultery" }, {
614             "aggresive", "aggressive" }, {
615             "alchohol", "alcohol" }, {
616             "alchoholic", "alcoholic" }, {
617             "allieve", "alive" }, {
618             "alot", "a lot" }, {
619             "alright", "all right" }, {
620             "amature", "amateur" }, {
621             "ambivilant", "ambivalent" }, {
622             "amourfous", "amorphous" }, {
623             "annoint", "anoint" }, {
624             "annonsment", "announcement" }, {
625             "annoyting", "anting" }, {
626             "annuncio", "announce" }, {
627             "anotomy", "anatomy" }, {
628             "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
629             "antidisestablishmentarism", "antidisestablishmentarianism" }, {
630             "anynomous", "anonymous" }, {
631             "appelet", "applet" }, {
632             "appreceiated", "appreciated" }, {
633             "appresteate", "appreciate" }, {
634             "aquantance", "acquaintance" }, {
635             "aricticure", "architecture" }, {
636             "asterick", "asterisk" }, {
637             "asymetric", "asymmetric" }, {
638             "atentively", "attentively" }, {
639             "bankrot", "bankrupt" }, {
640             "basicly", "basically" }, {
641             "batallion", "battalion" }, {
642             "bbrose", "browse" }, {
643             "beauro", "bureau" }, {
644             "beaurocracy", "bureaucracy" }, {
645             "beggining", "beginning" }, {
646             "behaviour", "behavior" }, {
647             "beleive", "believe" }, {
648             "belive", "believe" }, {
649             "blait", "bleat" }, {
650             "bouyant", "buoyant" }, {
651             "boygot", "boycott" }, {
652             "brocolli", "broccoli" }, {
653             "buder", "butter" }, {
654             "budr", "butter" }, {
655             "budter", "butter" }, {
656             "buracracy", "bureaucracy" }, {
657             "burracracy", "bureaucracy" }, {
658             "buton", "button" }, {
659             "byby", "by by" }, {
660             "cauler", "caller" }, {
661             "ceasar", "caesar" }, {
662             "cemetary", "cemetery" }, {
663             "changeing", "changing" }, {
664             "cheet", "cheat" }, {
665             "cimplicity", "simplicity" }, {
666             "circumstaces", "circumstances" }, {
667             "clob", "club" }, {
668             "coaln", "colon" }, {
669             "colleaque", "colleague" }, {
670             "colloquilism", "colloquialism" }, {
671             "columne", "column" }, {
672             "comitmment", "commitment" }, {
673             "comitte", "committee" }, {
674             "comittmen", "commitment" }, {
675             "comittmend", "commitment" }, {
676             "commerciasl", "commercials" }, {
677             "commited", "committed" }, {
678             "commitee", "committee" }, {
679             "companys", "companies" }, {
680             "comupter", "computer" }, {
681             "concensus", "consensus" }, {
682             "confusionism", "confucianism" }, {
683             "congradulations", "congratulations" }, {
684             "contunie", "continue" }, {
685             "cooly", "coolly" }, {
686             "copping", "coping" }, {
687             "cosmoplyton", "cosmopolitan" }, {
688             "crasy", "crazy" }, {
689             "croke", "croak" }, {
690             "crucifiction", "crucifixion" }, {
691             "crusifed", "crucified" }, {
692             "cumba", "combo" }, {
693             "custamisation", "customization" }, {
694             "dag", "dog" }, {
695             "daly", "daily" }, {
696             "defence", "defense" }, {
697             "definate", "definite" }, {
698             "definately", "definitely" }, {
699             "dependeble", "dependable" }, {
700             "descrption", "description" }, {
701             "descrptn", "description" }, {
702             "desparate", "desperate" }, {
703             "dessicate", "desiccate" }, {
704             "destint", "distant" }, {
705             "develepment", "developments" }, {
706             "developement", "development" }, {
707             "develpond", "development" }, {
708             "devulge", "divulge" }, {
709             "dieties", "deities" }, {
710             "dinasaur", "dinosaur" }, {
711             "dinasour", "dinosaur" }, {
712             "discuess", "discuss" }, {
713             "disect", "dissect" }, {
714             "disippate", "dissipate" }, {
715             "disition", "decision" }, {
716             "dispair", "despair" }, {
717             "distarct", "distract" }, {
718             "distart", "distort" }, {
719             "distroy", "destroy" }, {
720             "doenload", "download" }, {
721             "dongle", "dangle" }, {
722             "doog", "dog" }, {
723             "dramaticly", "dramatically" }, {
724             "drunkeness", "drunkenness" }, {
725             "ductioneery", "dictionary" }, {
726             "ecstacy", "ecstasy" }, {
727             "egsistence", "existence" }, {
728             "eitiology", "etiology" }, {
729             "elagent", "elegant" }, {
730             "embarass", "embarrass" }, {
731             "embarassment", "embarrassment" }, {
732             "embaress", "embarrass" }, {
733             "encapsualtion", "encapsulation" }, {
734             "encyclapidia", "encyclopedia" }, {
735             "encyclopia", "encyclopedia" }, {
736             "engins", "engine" }, {
737             "enhence", "enhance" }, {
738             "ennuui", "ennui" }, {
739             "enventions", "inventions" }, {
740             "envireminakl", "environmental" }, {
741             "enviroment", "environment" }, {
742             "epitomy", "epitome" }, {
743             "equire", "acquire" }, {
744             "errara", "error" }, {
745             "evaualtion", "evaluation" }, {
746             "excede", "exceed" }, {
747             "excercise", "exercise" }, {
748             "excpt", "except" }, {
749             "exhileration", "exhilaration" }, {
750             "existance", "existence" }, {
751             "expleyly", "explicitly" }, {
752             "explity", "explicitly" }, {
753             "failer", "failure" }, {
754             "faver", "favor" }, {
755             "faxe", "fax" }, {
756             "firey", "fiery" }, {
757             "fistival", "festival" }, {
758             "flatterring", "flattering" }, {
759             "flukse", "flux" }, {
760             "fone", "phone" }, {
761             "forsee", "foresee" }, {
762             "frustartaion", "frustrating" }, {
763             "funetik", "phonetic" }, {
764             "gaurd", "guard" }, {
765             "generly", "generally" }, {
766             "ghandi", "gandhi" }, {
767             "gotton", "gotten" }, {
768             "gracefull", "graceful" }, {
769             "gradualy", "gradually" }, {
770             "grammer", "grammar" }, {
771             "hallo", "hello" }, {
772             "hapily", "happily" }, {
773             "harrass", "harass" }, {
774             "heellp", "help" }, {
775             "heighth", "height" }, {
776             "hellp", "help" }, {
777             "helo", "hello" }, {
778             "hifin", "hyphen" }, {
779             "hifine", "hyphen" }, {
780             "hiphine", "hyphen" }, {
781             "hippie", "hippy" }, {
782             "hippopotamous", "hippopotamus" }, {
783             "hourse", "horse" }, {
784             "houssing", "housing" }, {
785             "howaver", "however" }, {
786             "howver", "however" }, {
787             "humaniti", "humanity" }, {
788             "hyfin", "hyphen" }, {
789             "hystrical", "hysterical" }, {
790             "illegitament", "illegitimate" }, {
791             "imbed", "embed" }, {
792             "imediaetly", "immediately" }, {
793             "immenant", "immanent" }, {
794             "implemtes", "implements" }, {
795             "inadvertant", "inadvertent" }, {
796             "incase", "in case" }, {
797             "incedious", "insidious" }, {
798             "incompleet", "incomplete" }, {
799             "incomplot", "incomplete" }, {
800             "inconvenant", "inconvenient" }, {
801             "inconvience", "inconvenience" }, {
802             "independant", "independent" }, {
803             "independenent", "independent" }, {
804             "indepnends", "independent" }, {
805             "indepth", "in depth" }, {
806             "indispensible", "indispensable" }, {
807             "inefficite", "inefficient" }, {
808             "infact", "in fact" }, {
809             "influencial", "influential" }, {
810             "innoculate", "inoculate" }, {
811             "insistant", "insistent" }, {
812             "insistenet", "insistent" }, {
813             "instulation", "installation" }, {
814             "intealignt", "intelligent" }, {
815             "intelegent", "intelligent" }, {
816             "intelegnent", "intelligent" }, {
817             "intelejent", "intelligent" }, {
818             "inteligent", "intelligent" }, {
819             "intelignt", "intelligent" }, {
820             "intellagant", "intelligent" }, {
821             "intellegent", "intelligent" }, {
822             "intellegint", "intelligent" }, {
823             "intellgnt", "intelligent" }, {
824             "intensionality", "intensionally" }, {
825             "internation", "international" }, {
826             "interpretate", "interpret" }, {
827             "interpretter", "interpreter" }, {
828             "intertes", "interested" }, {
829             "intertesd", "interested" }, {
830             "invermeantial", "environmental" }, {
831             "irresistable", "irresistible" }, {
832             "irritible", "irritable" }, {
833             "isreal", "israel" }, {
834             "johhn", "john" }, {
835             "kippur", "kipper" }, {
836             "knawing", "knowing" }, {
837             "lesure", "leisure" }, {
838             "liasion", "lesion" }, {
839             "liason", "liaison" }, {
840             "likly", "likely" }, {
841             "liquify", "liquefy" }, {
842             "lloyer", "layer" }, {
843             "lossing", "losing" }, {
844             "luser", "laser" }, {
845             "maintanence", "maintenance" }, {
846             "mandelbrot", "Mandelbrot" }, {
847             "marshall", "marshal" }, {
848             "maxium", "maximum" }, {
849             "mic", "mike" }, {
850             "midia", "media" }, {
851             "millenium", "millennium" }, {
852             "miniscule", "minuscule" }, {
853             "minkay", "monkey" }, {
854             "mischievious", "mischievous" }, {
855             "momento", "memento" }, {
856             "monkay", "monkey" }, {
857             "mosaik", "mosaic" }, {
858             "mostlikely", "most likely" }, {
859             "mousr", "mouser" }, {
860             "mroe", "more" }, {
861             "necesary", "necessary" }, {
862             "necesser", "necessary" }, {
863             "neice", "niece" }, {
864             "neighbour", "neighbor" }, {
865             "nemonic", "pneumonic" }, {
866             "nevade", "Nevada" }, {
867             "nickleodeon", "nickelodeon" }, {
868             "nieve", "naive" }, {
869             "noone", "no one" }, {
870             "notin", "not in" }, {
871             "nozled", "nuzzled" }, {
872             "objectsion", "objects" }, {
873             "ocassion", "occasion" }, {
874             "occuppied", "occupied" }, {
875             "occurence", "occurrence" }, {
876             "octagenarian", "octogenarian" }, {
877             "opposim", "opossum" }, {
878             "organise", "organize" }, {
879             "organiz", "organize" }, {
880             "orientate", "orient" }, {
881             "oscilascope", "oscilloscope" }, {
882             "parametic", "parameter" }, {
883             "permissable", "permissible" }, {
884             "permmasivie", "permissive" }, {
885             "persue", "pursue" }, {
886             "phantasia", "fantasia" }, {
887             "phenominal", "phenomenal" }, {
888             "playwrite", "playwright" }, {
889             "poeses", "poesies" }, {
890             "poligamy", "polygamy" }, {
891             "politict", "politic" }, {
892             "pollice", "police" }, {
893             "polypropalene", "polypropylene" }, {
894             "possable", "possible" }, {
895             "practicle", "practical" }, {
896             "pragmaticism", "pragmatism" }, {
897             "preceeding", "preceding" }, {
898             "precios", "precision" }, {
899             "preemptory", "peremptory" }, {
900             "prefixt", "prefixed" }, {
901             "presbyterian", "Presbyterian" }, {
902             "presue", "pursue" }, {
903             "presued", "pursued" }, {
904             "privielage", "privilege" }, {
905             "priviledge", "privilege" }, {
906             "proceedures", "procedures" }, {
907             "pronensiation", "pronunciation" }, {
908             "pronounciation", "pronunciation" }, {
909             "properally", "properly" }, {
910             "proplematic", "problematic" }, {
911             "protray", "portray" }, {
912             "pscolgst", "psychologist" }, {
913             "psicolagest", "psychologist" }, {
914             "psycolagest", "psychologist" }, {
915             "quoz", "quiz" }, {
916             "radious", "radius" }, {
917             "reccomend", "recommend" }, {
918             "reccona", "raccoon" }, {
919             "recieve", "receive" }, {
920             "reconise", "recognize" }, {
921             "rectangeles", "rectangle" }, {
922             "reoccurring", "recurring" }, {
923             "repitition", "repetition" }, {
924             "replasments", "replacement" }, {
925             "respct", "respect" }, {
926             "respecally", "respectfully" }, {
927             "rsx", "RSX" }, {
928             "runnung", "running" }, {
929             "sacreligious", "sacrilegious" }, {
930             "salut", "salute" }, {
931             "searcheable", "searchable" }, {
932             "seferal", "several" }, {
933             "segements", "segments" }, {
934             "sence", "sense" }, {
935             "seperate", "separate" }, {
936             "sicolagest", "psychologist" }, {
937             "sieze", "seize" }, {
938             "simplye", "simply" }, {
939             "sitte", "site" }, {
940             "slyph", "sylph" }, {
941             "smil", "smile" }, {
942             "sometmes", "sometimes" }, {
943             "soonec", "sonic" }, {
944             "specificialy", "specifically" }, {
945             "spel", "spell" }, {
946             "spoak", "spoke" }, {
947             "sponsered", "sponsored" }, {
948             "stering", "steering" }, {
949             "straightjacket", "straitjacket" }, {
950             "stumach", "stomach" }, {
951             "stutent", "student" }, {
952             "styleguide", "style guide" }, {
953             "subpena", "subpoena" }, {
954             "substations", "substitutions" }, {
955             "supercede", "supersede" }, {
956             "superfulous", "superfluous" }, {
957             "susan", "Susan" }, {
958             "swimwear", "swim wear" }, {
959             "syncorization", "synchronization" }, {
960             "taff", "tough" }, {
961             "taht", "that" }, {
962             "tattos", "tattoos" }, {
963             "techniquely", "technically" }, {
964             "teh", "the" }, {
965             "tem", "team" }, {
966             "teo", "two" }, {
967             "teridical", "theoretical" }, {
968             "tesst", "test" }, {
969             "theridically", "theoretical" }, {
970             "thredically", "theoretically" }, {
971             "thruout", "throughout" }, {
972             "ths", "this" }, {
973             "titalate", "titillate" }, {
974             "tobagan", "tobaggon" }, {
975             "tommorrow", "tomorrow" }, {
976             "tomorow", "tomorrow" }, {
977             "trubbel", "trouble" }, {
978             "ttest", "test" }, {
979             "tyrrany", "tyranny" }, {
980             "unatourral", "unnatural" }, {
981             "unaturral", "unnatural" }, {
982             "unconisitional", "unconstitutional" }, {
983             "unconscience", "unconscious" }, {
984             "underladder", "under ladder" }, {
985             "unentelegible", "unintelligible" }, {
986             "unfortunently", "unfortunately" }, {
987             "unnaturral", "unnatural" }, {
988             "upcast", "up cast" }, {
989             "verison", "version" }, {
990             "vinagarette", "vinaigrette" }, {
991             "volunteerism", "voluntarism" }, {
992             "volye", "volley" }, {
993             "waite", "wait" }, {
994             "wan't", "won't" }, {
995             "warloord", "warlord" }, {
996             "whaaat", "what" }, {
997             "whard", "ward" }, {
998             "whimp", "wimp" }, {
999             "wicken", "weaken" }, {
1000             "wierd", "weird" }, {
1001             "wrank", "rank" }, {
1002             "writeen", "righten" }, {
1003             "writting", "writing" }, {
1004             "wundeews", "windows" }, {
1005             "yeild", "yield" }, };
1006 
1007     /**
1008      * Tests encoding APIs in one place.
1009      */
1010     private void assertDoubleMetaphone(final String expected, final String source) {
1011         assertEquals(expected, this.getStringEncoder().encode(source));
1012         try {
1013             assertEquals(expected, this.getStringEncoder().encode((Object) source));
1014         } catch (final EncoderException e) {
1015             fail("Unexpected exception: " + e);
1016         }
1017         assertEquals(expected, this.getStringEncoder().doubleMetaphone(source));
1018         assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, false));
1019     }
1020 
1021     /**
1022      * Tests encoding APIs in one place.
1023      */
1024     public void assertDoubleMetaphoneAlt(final String expected, final String source) {
1025         assertEquals(expected, this.getStringEncoder().doubleMetaphone(source, true));
1026     }
1027 
1028     @Override
1029     protected DoubleMetaphone createStringEncoder() {
1030         return new DoubleMetaphone();
1031     }
1032 
1033     public void doubleMetaphoneEqualTest(final String[][] pairs, final boolean useAlternate) {
1034         this.validateFixture(pairs);
1035         for (final String[] pair : pairs) {
1036             final String name0 = pair[0];
1037             final String name1 = pair[1];
1038             final String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")";
1039             assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, useAlternate), failMsg);
1040             assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0, useAlternate), failMsg);
1041             if (!useAlternate) {
1042                 assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1), failMsg);
1043                 assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual(name1, name0), failMsg);
1044             }
1045         }
1046     }
1047 
1048     public void doubleMetaphoneNotEqualTest(final boolean alternate) {
1049         assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band", alternate));
1050         assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain", alternate));
1051 
1052         if (!alternate) {
1053             assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Brain", "Band"));
1054             assertFalse(this.getStringEncoder().isDoubleMetaphoneEqual("Band", "Brain"));
1055         }
1056     }
1057 
1058     @Test
1059     public void testCCedilla() {
1060         assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00e7", "S")); // c-cedilla
1061     }
1062 
1063     @Test
1064     public void testCodec184() throws Throwable {
1065         assertTrue(new DoubleMetaphone().isDoubleMetaphoneEqual("", "", false));
1066         assertTrue(new DoubleMetaphone().isDoubleMetaphoneEqual("", "", true));
1067         assertFalse(new DoubleMetaphone().isDoubleMetaphoneEqual("aa", "", false));
1068         assertFalse(new DoubleMetaphone().isDoubleMetaphoneEqual("aa", "", true));
1069         assertFalse(new DoubleMetaphone().isDoubleMetaphoneEqual("", "aa", false));
1070         assertFalse(new DoubleMetaphone().isDoubleMetaphoneEqual("", "aa", true));
1071       }
1072 
1073     @Test
1074     public void testDoubleMetaphone() {
1075         assertDoubleMetaphone("TSTN", "testing");
1076         assertDoubleMetaphone("0", "The");
1077         assertDoubleMetaphone("KK", "quick");
1078         assertDoubleMetaphone("PRN", "brown");
1079         assertDoubleMetaphone("FKS", "fox");
1080         assertDoubleMetaphone("JMPT", "jumped");
1081         assertDoubleMetaphone("AFR", "over");
1082         assertDoubleMetaphone("0", "the");
1083         assertDoubleMetaphone("LS", "lazy");
1084         assertDoubleMetaphone("TKS", "dogs");
1085         assertDoubleMetaphone("MKFR", "MacCafferey");
1086         assertDoubleMetaphone("STFN", "Stephan");
1087         assertDoubleMetaphone("KSSK", "Kuczewski");
1088         assertDoubleMetaphone("MKLL", "McClelland");
1089         assertDoubleMetaphone("SNHS", "san jose");
1090         assertDoubleMetaphone("SNFP", "xenophobia");
1091 
1092         assertDoubleMetaphoneAlt("TSTN", "testing");
1093         assertDoubleMetaphoneAlt("T", "The");
1094         assertDoubleMetaphoneAlt("KK", "quick");
1095         assertDoubleMetaphoneAlt("PRN", "brown");
1096         assertDoubleMetaphoneAlt("FKS", "fox");
1097         assertDoubleMetaphoneAlt("AMPT", "jumped");
1098         assertDoubleMetaphoneAlt("AFR", "over");
1099         assertDoubleMetaphoneAlt("T", "the");
1100         assertDoubleMetaphoneAlt("LS", "lazy");
1101         assertDoubleMetaphoneAlt("TKS", "dogs");
1102         assertDoubleMetaphoneAlt("MKFR", "MacCafferey");
1103         assertDoubleMetaphoneAlt("STFN", "Stephan");
1104         assertDoubleMetaphoneAlt("KXFS", "Kutchefski");
1105         assertDoubleMetaphoneAlt("MKLL", "McClelland");
1106         assertDoubleMetaphoneAlt("SNHS", "san jose");
1107         assertDoubleMetaphoneAlt("SNFP", "xenophobia");
1108         assertDoubleMetaphoneAlt("FKR", "Fokker");
1109         assertDoubleMetaphoneAlt("AK", "Joqqi");
1110         assertDoubleMetaphoneAlt("HF", "Hovvi");
1111         assertDoubleMetaphoneAlt("XRN", "Czerny");
1112     }
1113 
1114     @Test
1115     public void testEmpty() {
1116         assertNull(this.getStringEncoder().doubleMetaphone(null));
1117         assertNull(this.getStringEncoder().doubleMetaphone(""));
1118         assertNull(this.getStringEncoder().doubleMetaphone(" "));
1119         assertNull(this.getStringEncoder().doubleMetaphone("\t\n\r "));
1120     }
1121 
1122     @Test
1123     public void testIsDoubleMetaphoneEqualBasic() {
1124         final String[][] testFixture = { {
1125                 "", "" }, {
1126                 "Case", "case" }, {
1127                 "CASE", "Case" }, {
1128                 "caSe", "cAsE" }, {
1129                 "cookie", "quick" }, {
1130                 "quick", "cookie" }, {
1131                 "Brian", "Bryan" }, {
1132                 "Auto", "Otto" }, {
1133                 "Steven", "Stefan" }, {
1134                 "Philipowitz", "Filipowicz" }
1135         };
1136         doubleMetaphoneEqualTest(testFixture, false);
1137         doubleMetaphoneEqualTest(testFixture, true);
1138     }
1139 
1140     /**
1141      * Example in the original article but failures in this Java impl:
1142      */
1143     @Test
1144     public void testIsDoubleMetaphoneEqualExtended1() {
1145         //        String[][] testFixture = new String[][] { { "Smith", "Schmidt" }
1146         //        };
1147         //        doubleMetaphoneEqualTest(testFixture, false);
1148         //        doubleMetaphoneEqualTest(testFixture, true);
1149     }
1150 
1151     @Test
1152     public void testIsDoubleMetaphoneEqualExtended2() {
1153         final String[][] testFixture = { { "Jablonski", "Yablonsky" }
1154         };
1155         //doubleMetaphoneEqualTest(testFixture, false);
1156         doubleMetaphoneEqualTest(testFixture, true);
1157     }
1158 
1159     /**
1160      * Used to generate the MATCHES array and test possible matches from the
1161      * FIXTURE array.
1162      */
1163     @Test
1164     public void testIsDoubleMetaphoneEqualExtended3() {
1165         this.validateFixture(FIXTURE);
1166         final StringBuilder failures = new StringBuilder();
1167         final StringBuilder matches = new StringBuilder();
1168         final String cr = System.lineSeparator();
1169         matches.append("private static final String[][] MATCHES = {" + cr);
1170         int failCount = 0;
1171         for (int i = 0; i < FIXTURE.length; i++) {
1172             final String name0 = FIXTURE[i][0];
1173             final String name1 = FIXTURE[i][1];
1174             final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false);
1175             final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true);
1176             if (!match1 && !match2) {
1177                 final String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr;
1178                 failures.append(failMsg);
1179                 failCount++;
1180             } else {
1181                 matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr);
1182             }
1183         }
1184         matches.append("};");
1185         // Turn on to print a new MATCH array
1186         //System.out.println(matches.toString());
1187         if (failCount > 0) {
1188             // Turn on to see which pairs do NOT match.
1189             // String msg = failures.toString();
1190             //fail(failCount + " failures out of " + FIXTURE.length + ". The
1191             // following could be made to match: " + cr + msg);
1192         }
1193     }
1194 
1195     @Test
1196     public void testIsDoubleMetaphoneEqualWithMATCHES() {
1197         this.validateFixture(MATCHES);
1198         for (int i = 0; i < MATCHES.length; i++) {
1199             final String name0 = MATCHES[i][0];
1200             final String name1 = MATCHES[i][1];
1201             final boolean match1 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, false);
1202             final boolean match2 = this.getStringEncoder().isDoubleMetaphoneEqual(name0, name1, true);
1203             if (!match1 && !match2) {
1204                 fail("Expected match [" + i + "] " + name0 + " and " + name1);
1205             }
1206         }
1207     }
1208 
1209     @Test
1210     public void testIsDoubleMetaphoneNotEqual() {
1211         doubleMetaphoneNotEqualTest(false);
1212         doubleMetaphoneNotEqualTest(true);
1213     }
1214 
1215     @Test
1216     public void testNTilde() {
1217         assertTrue(this.getStringEncoder().isDoubleMetaphoneEqual("\u00f1", "N")); // n-tilde
1218     }
1219 
1220     /**
1221      * Test setting maximum length
1222      */
1223     @Test
1224     public void testSetMaxCodeLength() {
1225         final String value = "jumped";
1226 
1227         final DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
1228 
1229         // Sanity check of default settings
1230         assertEquals(4, doubleMetaphone.getMaxCodeLen(), "Default Max Code Length");
1231         assertEquals("JMPT", doubleMetaphone.doubleMetaphone(value, false), "Default Primary");
1232         assertEquals("AMPT", doubleMetaphone.doubleMetaphone(value, true), "Default Alternate");
1233 
1234         // Check setting Max Code Length
1235         doubleMetaphone.setMaxCodeLen(3);
1236         assertEquals(3, doubleMetaphone.getMaxCodeLen(), "Set Max Code Length");
1237         assertEquals("JMP", doubleMetaphone.doubleMetaphone(value, false), "Max=3 Primary");
1238         assertEquals("AMP", doubleMetaphone.doubleMetaphone(value, true), "Max=3 Alternate");
1239     }
1240 
1241     public void validateFixture(final String[][] pairs) {
1242         if (pairs.length == 0) {
1243             fail("Test fixture is empty");
1244         }
1245         for (int i = 0; i < pairs.length; i++) {
1246             if (pairs[i].length != 2) {
1247                 fail("Error in test fixture in the data array at index " + i);
1248             }
1249         }
1250     }
1251 }