1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import static org.junit.Assert.assertEquals;
21 import static org.junit.Assert.assertFalse;
22 import static org.junit.Assert.assertTrue;
23 import static org.junit.Assert.fail;
24
25 import org.apache.commons.codec.EncoderException;
26 import org.apache.commons.codec.StringEncoder;
27 import org.apache.commons.codec.StringEncoderAbstractTest;
28 import org.junit.Test;
29
30
31
32
33
34
35
36
37
38 public class DoubleMetaphoneTest extends StringEncoderAbstractTest {
39
40
41
42
43
44
45
46
47
48
49 private static final String[][] FIXTURE = { { "Accosinly", "Occasionally" }, {
50 "Ciculer", "Circler" }, {
51 "Circue", "Circle" }, {
52 "Maddness", "Madness" }, {
53 "Occusionaly", "Occasionally" }, {
54 "Steffen", "Stephen" }, {
55 "Thw", "The" }, {
56 "Unformanlly", "Unfortunately" }, {
57 "Unfortally", "Unfortunately" }, {
58 "abilitey", "ability" }, {
59 "abouy", "about" }, {
60 "absorbtion", "absorption" }, {
61 "accidently", "accidentally" }, {
62 "accomodate", "accommodate" }, {
63 "acommadate", "accommodate" }, {
64 "acord", "accord" }, {
65 "adultry", "adultery" }, {
66 "aggresive", "aggressive" }, {
67 "alchohol", "alcohol" }, {
68 "alchoholic", "alcoholic" }, {
69 "allieve", "alive" }, {
70 "alot", "a lot" }, {
71 "alright", "all right" }, {
72 "amature", "amateur" }, {
73 "ambivilant", "ambivalent" }, {
74 "amification", "amplification" }, {
75 "amourfous", "amorphous" }, {
76 "annoint", "anoint" }, {
77 "annonsment", "announcement" }, {
78 "annoyting", "anting" }, {
79 "annuncio", "announce" }, {
80 "anonomy", "anatomy" }, {
81 "anotomy", "anatomy" }, {
82 "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
83 "antidisestablishmentarism", "antidisestablishmentarianism" }, {
84 "anynomous", "anonymous" }, {
85 "appelet", "applet" }, {
86 "appreceiated", "appreciated" }, {
87 "appresteate", "appreciate" }, {
88 "aquantance", "acquaintance" }, {
89 "aratictature", "architecture" }, {
90 "archeype", "archetype" }, {
91 "aricticure", "architecture" }, {
92 "artic", "arctic" }, {
93 "asentote", "asymptote" }, {
94 "ast", "at" }, {
95 "asterick", "asterisk" }, {
96 "asymetric", "asymmetric" }, {
97 "atentively", "attentively" }, {
98 "autoamlly", "automatically" }, {
99 "bankrot", "bankrupt" }, {
100 "basicly", "basically" }, {
101 "batallion", "battalion" }, {
102 "bbrose", "browse" }, {
103 "beauro", "bureau" }, {
104 "beaurocracy", "bureaucracy" }, {
105 "beggining", "beginning" }, {
106 "beging", "beginning" }, {
107 "behaviour", "behavior" }, {
108 "beleive", "believe" }, {
109 "belive", "believe" }, {
110 "benidifs", "benefits" }, {
111 "bigginging", "beginning" }, {
112 "blait", "bleat" }, {
113 "bouyant", "buoyant" }, {
114 "boygot", "boycott" }, {
115 "brocolli", "broccoli" }, {
116 "buch", "bush" }, {
117 "buder", "butter" }, {
118 "budr", "butter" }, {
119 "budter", "butter" }, {
120 "buracracy", "bureaucracy" }, {
121 "burracracy", "bureaucracy" }, {
122 "buton", "button" }, {
123 "byby", "by by" }, {
124 "cauler", "caller" }, {
125 "ceasar", "caesar" }, {
126 "cemetary", "cemetery" }, {
127 "changeing", "changing" }, {
128 "cheet", "cheat" }, {
129 "cicle", "circle" }, {
130 "cimplicity", "simplicity" }, {
131 "circumstaces", "circumstances" }, {
132 "clob", "club" }, {
133 "coaln", "colon" }, {
134 "cocamena", "cockamamie" }, {
135 "colleaque", "colleague" }, {
136 "colloquilism", "colloquialism" }, {
137 "columne", "column" }, {
138 "comiler", "compiler" }, {
139 "comitmment", "commitment" }, {
140 "comitte", "committee" }, {
141 "comittmen", "commitment" }, {
142 "comittmend", "commitment" }, {
143 "commerciasl", "commercials" }, {
144 "commited", "committed" }, {
145 "commitee", "committee" }, {
146 "companys", "companies" }, {
147 "compicated", "complicated" }, {
148 "comupter", "computer" }, {
149 "concensus", "consensus" }, {
150 "confusionism", "confucianism" }, {
151 "congradulations", "congratulations" }, {
152 "conibation", "contribution" }, {
153 "consident", "consistent" }, {
154 "consident", "consonant" }, {
155 "contast", "constant" }, {
156 "contastant", "constant" }, {
157 "contunie", "continue" }, {
158 "cooly", "coolly" }, {
159 "copping", "coping" }, {
160 "cosmoplyton", "cosmopolitan" }, {
161 "courst", "court" }, {
162 "crasy", "crazy" }, {
163 "cravets", "caveats" }, {
164 "credetability", "credibility" }, {
165 "criqitue", "critique" }, {
166 "croke", "croak" }, {
167 "crucifiction", "crucifixion" }, {
168 "crusifed", "crucified" }, {
169 "ctitique", "critique" }, {
170 "cumba", "combo" }, {
171 "custamisation", "customization" }, {
172 "dag", "dog" }, {
173 "daly", "daily" }, {
174 "danguages", "dangerous" }, {
175 "deaft", "draft" }, {
176 "defence", "defense" }, {
177 "defenly", "defiantly" }, {
178 "definate", "definite" }, {
179 "definately", "definitely" }, {
180 "dependeble", "dependable" }, {
181 "descrption", "description" }, {
182 "descrptn", "description" }, {
183 "desparate", "desperate" }, {
184 "dessicate", "desiccate" }, {
185 "destint", "distant" }, {
186 "develepment", "developments" }, {
187 "developement", "development" }, {
188 "develpond", "development" }, {
189 "devulge", "divulge" }, {
190 "diagree", "disagree" }, {
191 "dieties", "deities" }, {
192 "dinasaur", "dinosaur" }, {
193 "dinasour", "dinosaur" }, {
194 "direcyly", "directly" }, {
195 "discuess", "discuss" }, {
196 "disect", "dissect" }, {
197 "disippate", "dissipate" }, {
198 "disition", "decision" }, {
199 "dispair", "despair" }, {
200 "disssicion", "discussion" }, {
201 "distarct", "distract" }, {
202 "distart", "distort" }, {
203 "distroy", "destroy" }, {
204 "documtations", "documentation" }, {
205 "doenload", "download" }, {
206 "dongle", "dangle" }, {
207 "doog", "dog" }, {
208 "dramaticly", "dramatically" }, {
209 "drunkeness", "drunkenness" }, {
210 "ductioneery", "dictionary" }, {
211 "dur", "due" }, {
212 "duren", "during" }, {
213 "dymatic", "dynamic" }, {
214 "dynaic", "dynamic" }, {
215 "ecstacy", "ecstasy" }, {
216 "efficat", "efficient" }, {
217 "efficity", "efficacy" }, {
218 "effots", "efforts" }, {
219 "egsistence", "existence" }, {
220 "eitiology", "etiology" }, {
221 "elagent", "elegant" }, {
222 "elligit", "elegant" }, {
223 "embarass", "embarrass" }, {
224 "embarassment", "embarrassment" }, {
225 "embaress", "embarrass" }, {
226 "encapsualtion", "encapsulation" }, {
227 "encyclapidia", "encyclopedia" }, {
228 "encyclopia", "encyclopedia" }, {
229 "engins", "engine" }, {
230 "enhence", "enhance" }, {
231 "enligtment", "Enlightenment" }, {
232 "ennuui", "ennui" }, {
233 "enought", "enough" }, {
234 "enventions", "inventions" }, {
235 "envireminakl", "environmental" }, {
236 "enviroment", "environment" }, {
237 "epitomy", "epitome" }, {
238 "equire", "acquire" }, {
239 "errara", "error" }, {
240 "erro", "error" }, {
241 "evaualtion", "evaluation" }, {
242 "evething", "everything" }, {
243 "evtually", "eventually" }, {
244 "excede", "exceed" }, {
245 "excercise", "exercise" }, {
246 "excpt", "except" }, {
247 "excution", "execution" }, {
248 "exhileration", "exhilaration" }, {
249 "existance", "existence" }, {
250 "expleyly", "explicitly" }, {
251 "explity", "explicitly" }, {
252 "expresso", "espresso" }, {
253 "exspidient", "expedient" }, {
254 "extions", "extensions" }, {
255 "factontion", "factorization" }, {
256 "failer", "failure" }, {
257 "famdasy", "fantasy" }, {
258 "faver", "favor" }, {
259 "faxe", "fax" }, {
260 "febuary", "february" }, {
261 "firey", "fiery" }, {
262 "fistival", "festival" }, {
263 "flatterring", "flattering" }, {
264 "fluk", "flux" }, {
265 "flukse", "flux" }, {
266 "fone", "phone" }, {
267 "forsee", "foresee" }, {
268 "frustartaion", "frustrating" }, {
269 "fuction", "function" }, {
270 "funetik", "phonetic" }, {
271 "futs", "guts" }, {
272 "gamne", "came" }, {
273 "gaurd", "guard" }, {
274 "generly", "generally" }, {
275 "ghandi", "gandhi" }, {
276 "goberment", "government" }, {
277 "gobernement", "government" }, {
278 "gobernment", "government" }, {
279 "gotton", "gotten" }, {
280 "gracefull", "graceful" }, {
281 "gradualy", "gradually" }, {
282 "grammer", "grammar" }, {
283 "hallo", "hello" }, {
284 "hapily", "happily" }, {
285 "harrass", "harass" }, {
286 "havne", "have" }, {
287 "heellp", "help" }, {
288 "heighth", "height" }, {
289 "hellp", "help" }, {
290 "helo", "hello" }, {
291 "herlo", "hello" }, {
292 "hifin", "hyphen" }, {
293 "hifine", "hyphen" }, {
294 "higer", "higher" }, {
295 "hiphine", "hyphen" }, {
296 "hippie", "hippy" }, {
297 "hippopotamous", "hippopotamus" }, {
298 "hlp", "help" }, {
299 "hourse", "horse" }, {
300 "houssing", "housing" }, {
301 "howaver", "however" }, {
302 "howver", "however" }, {
303 "humaniti", "humanity" }, {
304 "hyfin", "hyphen" }, {
305 "hypotathes", "hypothesis" }, {
306 "hypotathese", "hypothesis" }, {
307 "hystrical", "hysterical" }, {
308 "ident", "indent" }, {
309 "illegitament", "illegitimate" }, {
310 "imbed", "embed" }, {
311 "imediaetly", "immediately" }, {
312 "imfamy", "infamy" }, {
313 "immenant", "immanent" }, {
314 "implemtes", "implements" }, {
315 "inadvertant", "inadvertent" }, {
316 "incase", "in case" }, {
317 "incedious", "insidious" }, {
318 "incompleet", "incomplete" }, {
319 "incomplot", "incomplete" }, {
320 "inconvenant", "inconvenient" }, {
321 "inconvience", "inconvenience" }, {
322 "independant", "independent" }, {
323 "independenent", "independent" }, {
324 "indepnends", "independent" }, {
325 "indepth", "in depth" }, {
326 "indispensible", "indispensable" }, {
327 "inefficite", "inefficient" }, {
328 "inerface", "interface" }, {
329 "infact", "in fact" }, {
330 "influencial", "influential" }, {
331 "inital", "initial" }, {
332 "initinized", "initialized" }, {
333 "initized", "initialized" }, {
334 "innoculate", "inoculate" }, {
335 "insistant", "insistent" }, {
336 "insistenet", "insistent" }, {
337 "instulation", "installation" }, {
338 "intealignt", "intelligent" }, {
339 "intejilent", "intelligent" }, {
340 "intelegent", "intelligent" }, {
341 "intelegnent", "intelligent" }, {
342 "intelejent", "intelligent" }, {
343 "inteligent", "intelligent" }, {
344 "intelignt", "intelligent" }, {
345 "intellagant", "intelligent" }, {
346 "intellegent", "intelligent" }, {
347 "intellegint", "intelligent" }, {
348 "intellgnt", "intelligent" }, {
349 "intensionality", "intensionally" }, {
350 "interate", "iterate" }, {
351 "internation", "international" }, {
352 "interpretate", "interpret" }, {
353 "interpretter", "interpreter" }, {
354 "intertes", "interested" }, {
355 "intertesd", "interested" }, {
356 "invermeantial", "environmental" }, {
357 "irregardless", "regardless" }, {
358 "irresistable", "irresistible" }, {
359 "irritible", "irritable" }, {
360 "islams", "muslims" }, {
361 "isotrop", "isotope" }, {
362 "isreal", "israel" }, {
363 "johhn", "john" }, {
364 "judgement", "judgment" }, {
365 "kippur", "kipper" }, {
366 "knawing", "knowing" }, {
367 "latext", "latest" }, {
368 "leasve", "leave" }, {
369 "lesure", "leisure" }, {
370 "liasion", "lesion" }, {
371 "liason", "liaison" }, {
372 "libary", "library" }, {
373 "likly", "likely" }, {
374 "lilometer", "kilometer" }, {
375 "liquify", "liquefy" }, {
376 "lloyer", "layer" }, {
377 "lossing", "losing" }, {
378 "luser", "laser" }, {
379 "maintanence", "maintenance" }, {
380 "majaerly", "majority" }, {
381 "majoraly", "majority" }, {
382 "maks", "masks" }, {
383 "mandelbrot", "Mandelbrot" }, {
384 "mant", "want" }, {
385 "marshall", "marshal" }, {
386 "maxium", "maximum" }, {
387 "meory", "memory" }, {
388 "metter", "better" }, {
389 "mic", "mike" }, {
390 "midia", "media" }, {
391 "millenium", "millennium" }, {
392 "miniscule", "minuscule" }, {
393 "minkay", "monkey" }, {
394 "minum", "minimum" }, {
395 "mischievious", "mischievous" }, {
396 "misilous", "miscellaneous" }, {
397 "momento", "memento" }, {
398 "monkay", "monkey" }, {
399 "mosaik", "mosaic" }, {
400 "mostlikely", "most likely" }, {
401 "mousr", "mouser" }, {
402 "mroe", "more" }, {
403 "neccessary", "necessary" }, {
404 "necesary", "necessary" }, {
405 "necesser", "necessary" }, {
406 "neice", "niece" }, {
407 "neighbour", "neighbor" }, {
408 "nemonic", "pneumonic" }, {
409 "nevade", "Nevada" }, {
410 "nickleodeon", "nickelodeon" }, {
411 "nieve", "naive" }, {
412 "noone", "no one" }, {
413 "noticably", "noticeably" }, {
414 "notin", "not in" }, {
415 "nozled", "nuzzled" }, {
416 "objectsion", "objects" }, {
417 "obsfuscate", "obfuscate" }, {
418 "ocassion", "occasion" }, {
419 "occuppied", "occupied" }, {
420 "occurence", "occurrence" }, {
421 "octagenarian", "octogenarian" }, {
422 "olf", "old" }, {
423 "opposim", "opossum" }, {
424 "organise", "organize" }, {
425 "organiz", "organize" }, {
426 "orientate", "orient" }, {
427 "oscilascope", "oscilloscope" }, {
428 "oving", "moving" }, {
429 "paramers", "parameters" }, {
430 "parametic", "parameter" }, {
431 "paranets", "parameters" }, {
432 "partrucal", "particular" }, {
433 "pataphysical", "metaphysical" }, {
434 "patten", "pattern" }, {
435 "permissable", "permissible" }, {
436 "permition", "permission" }, {
437 "permmasivie", "permissive" }, {
438 "perogative", "prerogative" }, {
439 "persue", "pursue" }, {
440 "phantasia", "fantasia" }, {
441 "phenominal", "phenomenal" }, {
442 "picaresque", "picturesque" }, {
443 "playwrite", "playwright" }, {
444 "poeses", "poesies" }, {
445 "polation", "politician" }, {
446 "poligamy", "polygamy" }, {
447 "politict", "politic" }, {
448 "pollice", "police" }, {
449 "polypropalene", "polypropylene" }, {
450 "pompom", "pompon" }, {
451 "possable", "possible" }, {
452 "practicle", "practical" }, {
453 "pragmaticism", "pragmatism" }, {
454 "preceeding", "preceding" }, {
455 "precion", "precision" }, {
456 "precios", "precision" }, {
457 "preemptory", "peremptory" }, {
458 "prefices", "prefixes" }, {
459 "prefixt", "prefixed" }, {
460 "presbyterian", "Presbyterian" }, {
461 "presue", "pursue" }, {
462 "presued", "pursued" }, {
463 "privielage", "privilege" }, {
464 "priviledge", "privilege" }, {
465 "proceedures", "procedures" }, {
466 "pronensiation", "pronunciation" }, {
467 "pronisation", "pronunciation" }, {
468 "pronounciation", "pronunciation" }, {
469 "properally", "properly" }, {
470 "proplematic", "problematic" }, {
471 "protray", "portray" }, {
472 "pscolgst", "psychologist" }, {
473 "psicolagest", "psychologist" }, {
474 "psycolagest", "psychologist" }, {
475 "quoz", "quiz" }, {
476 "radious", "radius" }, {
477 "ramplily", "rampantly" }, {
478 "reccomend", "recommend" }, {
479 "reccona", "raccoon" }, {
480 "recieve", "receive" }, {
481 "reconise", "recognize" }, {
482 "rectangeles", "rectangle" }, {
483 "redign", "redesign" }, {
484 "reoccurring", "recurring" }, {
485 "repitition", "repetition" }, {
486 "replasments", "replacement" }, {
487 "reposable", "responsible" }, {
488 "reseblence", "resemblance" }, {
489 "respct", "respect" }, {
490 "respecally", "respectfully" }, {
491 "roon", "room" }, {
492 "rought", "roughly" }, {
493 "rsx", "RSX" }, {
494 "rudemtry", "rudimentary" }, {
495 "runnung", "running" }, {
496 "sacreligious", "sacrilegious" }, {
497 "saftly", "safely" }, {
498 "salut", "salute" }, {
499 "satifly", "satisfy" }, {
500 "scrabdle", "scrabble" }, {
501 "searcheable", "searchable" }, {
502 "secion", "section" }, {
503 "seferal", "several" }, {
504 "segements", "segments" }, {
505 "sence", "sense" }, {
506 "seperate", "separate" }, {
507 "sherbert", "sherbet" }, {
508 "sicolagest", "psychologist" }, {
509 "sieze", "seize" }, {
510 "simpfilty", "simplicity" }, {
511 "simplye", "simply" }, {
512 "singal", "signal" }, {
513 "sitte", "site" }, {
514 "situration", "situation" }, {
515 "slyph", "sylph" }, {
516 "smil", "smile" }, {
517 "snuck", "sneaked" }, {
518 "sometmes", "sometimes" }, {
519 "soonec", "sonic" }, {
520 "specificialy", "specifically" }, {
521 "spel", "spell" }, {
522 "spoak", "spoke" }, {
523 "sponsered", "sponsored" }, {
524 "stering", "steering" }, {
525 "straightjacket", "straitjacket" }, {
526 "stumach", "stomach" }, {
527 "stutent", "student" }, {
528 "styleguide", "style guide" }, {
529 "subisitions", "substitutions" }, {
530 "subjecribed", "subscribed" }, {
531 "subpena", "subpoena" }, {
532 "substations", "substitutions" }, {
533 "suger", "sugar" }, {
534 "supercede", "supersede" }, {
535 "superfulous", "superfluous" }, {
536 "susan", "Susan" }, {
537 "swimwear", "swim wear" }, {
538 "syncorization", "synchronization" }, {
539 "taff", "tough" }, {
540 "taht", "that" }, {
541 "tattos", "tattoos" }, {
542 "techniquely", "technically" }, {
543 "teh", "the" }, {
544 "tem", "team" }, {
545 "teo", "two" }, {
546 "teridical", "theoretical" }, {
547 "tesst", "test" }, {
548 "tets", "tests" }, {
549 "thanot", "than or" }, {
550 "theirselves", "themselves" }, {
551 "theridically", "theoretical" }, {
552 "thredically", "theoretically" }, {
553 "thruout", "throughout" }, {
554 "ths", "this" }, {
555 "titalate", "titillate" }, {
556 "tobagan", "tobaggon" }, {
557 "tommorrow", "tomorrow" }, {
558 "tomorow", "tomorrow" }, {
559 "tradegy", "tragedy" }, {
560 "trubbel", "trouble" }, {
561 "ttest", "test" }, {
562 "tunnellike", "tunnel like" }, {
563 "tured", "turned" }, {
564 "tyrrany", "tyranny" }, {
565 "unatourral", "unnatural" }, {
566 "unaturral", "unnatural" }, {
567 "unconisitional", "unconstitutional" }, {
568 "unconscience", "unconscious" }, {
569 "underladder", "under ladder" }, {
570 "unentelegible", "unintelligible" }, {
571 "unfortunently", "unfortunately" }, {
572 "unnaturral", "unnatural" }, {
573 "upcast", "up cast" }, {
574 "upmost", "utmost" }, {
575 "uranisium", "uranium" }, {
576 "verison", "version" }, {
577 "vinagarette", "vinaigrette" }, {
578 "volumptuous", "voluptuous" }, {
579 "volunteerism", "voluntarism" }, {
580 "volye", "volley" }, {
581 "wadting", "wasting" }, {
582 "waite", "wait" }, {
583 "wan't", "won't" }, {
584 "warloord", "warlord" }, {
585 "whaaat", "what" }, {
586 "whard", "ward" }, {
587 "whimp", "wimp" }, {
588 "wicken", "weaken" }, {
589 "wierd", "weird" }, {
590 "wrank", "rank" }, {
591 "writeen", "righten" }, {
592 "writting", "writing" }, {
593 "wundeews", "windows" }, {
594 "yeild", "yield" }, {
595 "youe", "your" }
596 };
597
598
599
600
601 private static final String[][] MATCHES = { { "Accosinly", "Occasionally" }, {
602 "Maddness", "Madness" }, {
603 "Occusionaly", "Occasionally" }, {
604 "Steffen", "Stephen" }, {
605 "Thw", "The" }, {
606 "Unformanlly", "Unfortunately" }, {
607 "Unfortally", "Unfortunately" }, {
608 "abilitey", "ability" }, {
609 "absorbtion", "absorption" }, {
610 "accidently", "accidentally" }, {
611 "accomodate", "accommodate" }, {
612 "acommadate", "accommodate" }, {
613 "acord", "accord" }, {
614 "adultry", "adultery" }, {
615 "aggresive", "aggressive" }, {
616 "alchohol", "alcohol" }, {
617 "alchoholic", "alcoholic" }, {
618 "allieve", "alive" }, {
619 "alot", "a lot" }, {
620 "alright", "all right" }, {
621 "amature", "amateur" }, {
622 "ambivilant", "ambivalent" }, {
623 "amourfous", "amorphous" }, {
624 "annoint", "anoint" }, {
625 "annonsment", "announcement" }, {
626 "annoyting", "anting" }, {
627 "annuncio", "announce" }, {
628 "anotomy", "anatomy" }, {
629 "antidesestablishmentarianism", "antidisestablishmentarianism" }, {
630 "antidisestablishmentarism", "antidisestablishmentarianism" }, {
631 "anynomous", "anonymous" }, {
632 "appelet", "applet" }, {
633 "appreceiated", "appreciated" }, {
634 "appresteate", "appreciate" }, {
635 "aquantance", "acquaintance" }, {
636 "aricticure", "architecture" }, {
637 "asterick", "asterisk" }, {
638 "asymetric", "asymmetric" }, {
639 "atentively", "attentively" }, {
640 "bankrot", "bankrupt" }, {
641 "basicly", "basically" }, {
642 "batallion", "battalion" }, {
643 "bbrose", "browse" }, {
644 "beauro", "bureau" }, {
645 "beaurocracy", "bureaucracy" }, {
646 "beggining", "beginning" }, {
647 "behaviour", "behavior" }, {
648 "beleive", "believe" }, {
649 "belive", "believe" }, {
650 "blait", "bleat" }, {
651 "bouyant", "buoyant" }, {
652 "boygot", "boycott" }, {
653 "brocolli", "broccoli" }, {
654 "buder", "butter" }, {
655 "budr", "butter" }, {
656 "budter", "butter" }, {
657 "buracracy", "bureaucracy" }, {
658 "burracracy", "bureaucracy" }, {
659 "buton", "button" }, {
660 "byby", "by by" }, {
661 "cauler", "caller" }, {
662 "ceasar", "caesar" }, {
663 "cemetary", "cemetery" }, {
664 "changeing", "changing" }, {
665 "cheet", "cheat" }, {
666 "cimplicity", "simplicity" }, {
667 "circumstaces", "circumstances" }, {
668 "clob", "club" }, {
669 "coaln", "colon" }, {
670 "colleaque", "colleague" }, {
671 "colloquilism", "colloquialism" }, {
672 "columne", "column" }, {
673 "comitmment", "commitment" }, {
674 "comitte", "committee" }, {
675 "comittmen", "commitment" }, {
676 "comittmend", "commitment" }, {
677 "commerciasl", "commercials" }, {
678 "commited", "committed" }, {
679 "commitee", "committee" }, {
680 "companys", "companies" }, {
681 "comupter", "computer" }, {
682 "concensus", "consensus" }, {
683 "confusionism", "confucianism" }, {
684 "congradulations", "congratulations" }, {
685 "contunie", "continue" }, {
686 "cooly", "coolly" }, {
687 "copping", "coping" }, {
688 "cosmoplyton", "cosmopolitan" }, {
689 "crasy", "crazy" }, {
690 "croke", "croak" }, {
691 "crucifiction", "crucifixion" }, {
692 "crusifed", "crucified" }, {
693 "cumba", "combo" }, {
694 "custamisation", "customization" }, {
695 "dag", "dog" }, {
696 "daly", "daily" }, {
697 "defence", "defense" }, {
698 "definate", "definite" }, {
699 "definately", "definitely" }, {
700 "dependeble", "dependable" }, {
701 "descrption", "description" }, {
702 "descrptn", "description" }, {
703 "desparate", "desperate" }, {
704 "dessicate", "desiccate" }, {
705 "destint", "distant" }, {
706 "develepment", "developments" }, {
707 "developement", "development" }, {
708 "develpond", "development" }, {
709 "devulge", "divulge" }, {
710 "dieties", "deities" }, {
711 "dinasaur", "dinosaur" }, {
712 "dinasour", "dinosaur" }, {
713 "discuess", "discuss" }, {
714 "disect", "dissect" }, {
715 "disippate", "dissipate" }, {
716 "disition", "decision" }, {
717 "dispair", "despair" }, {
718 "distarct", "distract" }, {
719 "distart", "distort" }, {
720 "distroy", "destroy" }, {
721 "doenload", "download" }, {
722 "dongle", "dangle" }, {
723 "doog", "dog" }, {
724 "dramaticly", "dramatically" }, {
725 "drunkeness", "drunkenness" }, {
726 "ductioneery", "dictionary" }, {
727 "ecstacy", "ecstasy" }, {
728 "egsistence", "existence" }, {
729 "eitiology", "etiology" }, {
730 "elagent", "elegant" }, {
731 "embarass", "embarrass" }, {
732 "embarassment", "embarrassment" }, {
733 "embaress", "embarrass" }, {
734 "encapsualtion", "encapsulation" }, {
735 "encyclapidia", "encyclopedia" }, {
736 "encyclopia", "encyclopedia" }, {
737 "engins", "engine" }, {
738 "enhence", "enhance" }, {
739 "ennuui", "ennui" }, {
740 "enventions", "inventions" }, {
741 "envireminakl", "environmental" }, {
742 "enviroment", "environment" }, {
743 "epitomy", "epitome" }, {
744 "equire", "acquire" }, {
745 "errara", "error" }, {
746 "evaualtion", "evaluation" }, {
747 "excede", "exceed" }, {
748 "excercise", "exercise" }, {
749 "excpt", "except" }, {
750 "exhileration", "exhilaration" }, {
751 "existance", "existence" }, {
752 "expleyly", "explicitly" }, {
753 "explity", "explicitly" }, {
754 "failer", "failure" }, {
755 "faver", "favor" }, {
756 "faxe", "fax" }, {
757 "firey", "fiery" }, {
758 "fistival", "festival" }, {
759 "flatterring", "flattering" }, {
760 "flukse", "flux" }, {
761 "fone", "phone" }, {
762 "forsee", "foresee" }, {
763 "frustartaion", "frustrating" }, {
764 "funetik", "phonetic" }, {
765 "gaurd", "guard" }, {
766 "generly", "generally" }, {
767 "ghandi", "gandhi" }, {
768 "gotton", "gotten" }, {
769 "gracefull", "graceful" }, {
770 "gradualy", "gradually" }, {
771 "grammer", "grammar" }, {
772 "hallo", "hello" }, {
773 "hapily", "happily" }, {
774 "harrass", "harass" }, {
775 "heellp", "help" }, {
776 "heighth", "height" }, {
777 "hellp", "help" }, {
778 "helo", "hello" }, {
779 "hifin", "hyphen" }, {
780 "hifine", "hyphen" }, {
781 "hiphine", "hyphen" }, {
782 "hippie", "hippy" }, {
783 "hippopotamous", "hippopotamus" }, {
784 "hourse", "horse" }, {
785 "houssing", "housing" }, {
786 "howaver", "however" }, {
787 "howver", "however" }, {
788 "humaniti", "humanity" }, {
789 "hyfin", "hyphen" }, {
790 "hystrical", "hysterical" }, {
791 "illegitament", "illegitimate" }, {
792 "imbed", "embed" }, {
793 "imediaetly", "immediately" }, {
794 "immenant", "immanent" }, {
795 "implemtes", "implements" }, {
796 "inadvertant", "inadvertent" }, {
797 "incase", "in case" }, {
798 "incedious", "insidious" }, {
799 "incompleet", "incomplete" }, {
800 "incomplot", "incomplete" }, {
801 "inconvenant", "inconvenient" }, {
802 "inconvience", "inconvenience" }, {
803 "independant", "independent" }, {
804 "independenent", "independent" }, {
805 "indepnends", "independent" }, {
806 "indepth", "in depth" }, {
807 "indispensible", "indispensable" }, {
808 "inefficite", "inefficient" }, {
809 "infact", "in fact" }, {
810 "influencial", "influential" }, {
811 "innoculate", "inoculate" }, {
812 "insistant", "insistent" }, {
813 "insistenet", "insistent" }, {
814 "instulation", "installation" }, {
815 "intealignt", "intelligent" }, {
816 "intelegent", "intelligent" }, {
817 "intelegnent", "intelligent" }, {
818 "intelejent", "intelligent" }, {
819 "inteligent", "intelligent" }, {
820 "intelignt", "intelligent" }, {
821 "intellagant", "intelligent" }, {
822 "intellegent", "intelligent" }, {
823 "intellegint", "intelligent" }, {
824 "intellgnt", "intelligent" }, {
825 "intensionality", "intensionally" }, {
826 "internation", "international" }, {
827 "interpretate", "interpret" }, {
828 "interpretter", "interpreter" }, {
829 "intertes", "interested" }, {
830 "intertesd", "interested" }, {
831 "invermeantial", "environmental" }, {
832 "irresistable", "irresistible" }, {
833 "irritible", "irritable" }, {
834 "isreal", "israel" }, {
835 "johhn", "john" }, {
836 "kippur", "kipper" }, {
837 "knawing", "knowing" }, {
838 "lesure", "leisure" }, {
839 "liasion", "lesion" }, {
840 "liason", "liaison" }, {
841 "likly", "likely" }, {
842 "liquify", "liquefy" }, {
843 "lloyer", "layer" }, {
844 "lossing", "losing" }, {
845 "luser", "laser" }, {
846 "maintanence", "maintenance" }, {
847 "mandelbrot", "Mandelbrot" }, {
848 "marshall", "marshal" }, {
849 "maxium", "maximum" }, {
850 "mic", "mike" }, {
851 "midia", "media" }, {
852 "millenium", "millennium" }, {
853 "miniscule", "minuscule" }, {
854 "minkay", "monkey" }, {
855 "mischievious", "mischievous" }, {
856 "momento", "memento" }, {
857 "monkay", "monkey" }, {
858 "mosaik", "mosaic" }, {
859 "mostlikely", "most likely" }, {
860 "mousr", "mouser" }, {
861 "mroe", "more" }, {
862 "necesary", "necessary" }, {
863 "necesser", "necessary" }, {
864 "neice", "niece" }, {
865 "neighbour", "neighbor" }, {
866 "nemonic", "pneumonic" }, {
867 "nevade", "Nevada" }, {
868 "nickleodeon", "nickelodeon" }, {
869 "nieve", "naive" }, {
870 "noone", "no one" }, {
871 "notin", "not in" }, {
872 "nozled", "nuzzled" }, {
873 "objectsion", "objects" }, {
874 "ocassion", "occasion" }, {
875 "occuppied", "occupied" }, {
876 "occurence", "occurrence" }, {
877 "octagenarian", "octogenarian" }, {
878 "opposim", "opossum" }, {
879 "organise", "organize" }, {
880 "organiz", "organize" }, {
881 "orientate", "orient" }, {
882 "oscilascope", "oscilloscope" }, {
883 "parametic", "parameter" }, {
884 "permissable", "permissible" }, {
885 "permmasivie", "permissive" }, {
886 "persue", "pursue" }, {
887 "phantasia", "fantasia" }, {
888 "phenominal", "phenomenal" }, {
889 "playwrite", "playwright" }, {
890 "poeses", "poesies" }, {
891 "poligamy", "polygamy" }, {
892 "politict", "politic" }, {
893 "pollice", "police" }, {
894 "polypropalene", "polypropylene" }, {
895 "possable", "possible" }, {
896 "practicle", "practical" }, {
897 "pragmaticism", "pragmatism" }, {
898 "preceeding", "preceding" }, {
899 "precios", "precision" }, {
900 "preemptory", "peremptory" }, {
901 "prefixt", "prefixed" }, {
902 "presbyterian", "Presbyterian" }, {
903 "presue", "pursue" }, {
904 "presued", "pursued" }, {
905 "privielage", "privilege" }, {
906 "priviledge", "privilege" }, {
907 "proceedures", "procedures" }, {
908 "pronensiation", "pronunciation" }, {
909 "pronounciation", "pronunciation" }, {
910 "properally", "properly" }, {
911 "proplematic", "problematic" }, {
912 "protray", "portray" }, {
913 "pscolgst", "psychologist" }, {
914 "psicolagest", "psychologist" }, {
915 "psycolagest", "psychologist" }, {
916 "quoz", "quiz" }, {
917 "radious", "radius" }, {
918 "reccomend", "recommend" }, {
919 "reccona", "raccoon" }, {
920 "recieve", "receive" }, {
921 "reconise", "recognize" }, {
922 "rectangeles", "rectangle" }, {
923 "reoccurring", "recurring" }, {
924 "repitition", "repetition" }, {
925 "replasments", "replacement" }, {
926 "respct", "respect" }, {
927 "respecally", "respectfully" }, {
928 "rsx", "RSX" }, {
929 "runnung", "running" }, {
930 "sacreligious", "sacrilegious" }, {
931 "salut", "salute" }, {
932 "searcheable", "searchable" }, {
933 "seferal", "several" }, {
934 "segements", "segments" }, {
935 "sence", "sense" }, {
936 "seperate", "separate" }, {
937 "sicolagest", "psychologist" }, {
938 "sieze", "seize" }, {
939 "simplye", "simply" }, {
940 "sitte", "site" }, {
941 "slyph", "sylph" }, {
942 "smil", "smile" }, {
943 "sometmes", "sometimes" }, {
944 "soonec", "sonic" }, {
945 "specificialy", "specifically" }, {
946 "spel", "spell" }, {
947 "spoak", "spoke" }, {
948 "sponsered", "sponsored" }, {
949 "stering", "steering" }, {
950 "straightjacket", "straitjacket" }, {
951 "stumach", "stomach" }, {
952 "stutent", "student" }, {
953 "styleguide", "style guide" }, {
954 "subpena", "subpoena" }, {
955 "substations", "substitutions" }, {
956 "supercede", "supersede" }, {
957 "superfulous", "superfluous" }, {
958 "susan", "Susan" }, {
959 "swimwear", "swim wear" }, {
960 "syncorization", "synchronization" }, {
961 "taff", "tough" }, {
962 "taht", "that" }, {
963 "tattos", "tattoos" }, {
964 "techniquely", "technically" }, {
965 "teh", "the" }, {
966 "tem", "team" }, {
967 "teo", "two" }, {
968 "teridical", "theoretical" }, {
969 "tesst", "test" }, {
970 "theridically", "theoretical" }, {
971 "thredically", "theoretically" }, {
972 "thruout", "throughout" }, {
973 "ths", "this" }, {
974 "titalate", "titillate" }, {
975 "tobagan", "tobaggon" }, {
976 "tommorrow", "tomorrow" }, {
977 "tomorow", "tomorrow" }, {
978 "trubbel", "trouble" }, {
979 "ttest", "test" }, {
980 "tyrrany", "tyranny" }, {
981 "unatourral", "unnatural" }, {
982 "unaturral", "unnatural" }, {
983 "unconisitional", "unconstitutional" }, {
984 "unconscience", "unconscious" }, {
985 "underladder", "under ladder" }, {
986 "unentelegible", "unintelligible" }, {
987 "unfortunently", "unfortunately" }, {
988 "unnaturral", "unnatural" }, {
989 "upcast", "up cast" }, {
990 "verison", "version" }, {
991 "vinagarette", "vinaigrette" }, {
992 "volunteerism", "voluntarism" }, {
993 "volye", "volley" }, {
994 "waite", "wait" }, {
995 "wan't", "won't" }, {
996 "warloord", "warlord" }, {
997 "whaaat", "what" }, {
998 "whard", "ward" }, {
999 "whimp", "wimp" }, {
1000 "wicken", "weaken" }, {
1001 "wierd", "weird" }, {
1002 "wrank", "rank" }, {
1003 "writeen", "righten" }, {
1004 "writting", "writing" }, {
1005 "wundeews", "windows" }, {
1006 "yeild", "yield" }, };
1007
1008
1009
1010
1011 private void assertDoubleMetaphone(String expected, String source) {
1012 assertEquals(expected, this.getDoubleMetaphone().encode(source));
1013 try {
1014 assertEquals(expected, this.getDoubleMetaphone().encode((Object) source));
1015 } catch (EncoderException e) {
1016 fail("Unexpected expection: " + e);
1017 }
1018 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source));
1019 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, false));
1020 }
1021
1022
1023
1024
1025 public void assertDoubleMetaphoneAlt(String expected, String source) {
1026 assertEquals(expected, this.getDoubleMetaphone().doubleMetaphone(source, true));
1027 }
1028
1029 public void doubleMetaphoneEqualTest(String[][] pairs, boolean useAlternate) {
1030 this.validateFixture(pairs);
1031 for (String[] pair : pairs) {
1032 String name0 = pair[0];
1033 String name1 = pair[1];
1034 String failMsg = "Expected match between " + name0 + " and " + name1 + " (use alternate: " + useAlternate + ")";
1035 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, useAlternate));
1036 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0, useAlternate));
1037 if (!useAlternate) {
1038 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1));
1039 assertTrue(failMsg, this.getDoubleMetaphone().isDoubleMetaphoneEqual(name1, name0));
1040 }
1041 }
1042 }
1043
1044 public void doubleMetaphoneNotEqualTest(boolean alternate) {
1045 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band", alternate));
1046 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain", alternate));
1047
1048 if (!alternate) {
1049 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Brain", "Band"));
1050 assertFalse(this.getDoubleMetaphone().isDoubleMetaphoneEqual("Band", "Brain"));
1051 }
1052 }
1053
1054 private DoubleMetaphone getDoubleMetaphone() {
1055 return (DoubleMetaphone) this.getStringEncoder();
1056 }
1057
1058 @Override
1059 protected StringEncoder createStringEncoder() {
1060 return new DoubleMetaphone();
1061 }
1062
1063 @Test
1064 public void testDoubleMetaphone() {
1065 assertDoubleMetaphone("TSTN", "testing");
1066 assertDoubleMetaphone("0", "The");
1067 assertDoubleMetaphone("KK", "quick");
1068 assertDoubleMetaphone("PRN", "brown");
1069 assertDoubleMetaphone("FKS", "fox");
1070 assertDoubleMetaphone("JMPT", "jumped");
1071 assertDoubleMetaphone("AFR", "over");
1072 assertDoubleMetaphone("0", "the");
1073 assertDoubleMetaphone("LS", "lazy");
1074 assertDoubleMetaphone("TKS", "dogs");
1075 assertDoubleMetaphone("MKFR", "MacCafferey");
1076 assertDoubleMetaphone("STFN", "Stephan");
1077 assertDoubleMetaphone("KSSK", "Kuczewski");
1078 assertDoubleMetaphone("MKLL", "McClelland");
1079 assertDoubleMetaphone("SNHS", "san jose");
1080 assertDoubleMetaphone("SNFP", "xenophobia");
1081
1082 assertDoubleMetaphoneAlt("TSTN", "testing");
1083 assertDoubleMetaphoneAlt("T", "The");
1084 assertDoubleMetaphoneAlt("KK", "quick");
1085 assertDoubleMetaphoneAlt("PRN", "brown");
1086 assertDoubleMetaphoneAlt("FKS", "fox");
1087 assertDoubleMetaphoneAlt("AMPT", "jumped");
1088 assertDoubleMetaphoneAlt("AFR", "over");
1089 assertDoubleMetaphoneAlt("T", "the");
1090 assertDoubleMetaphoneAlt("LS", "lazy");
1091 assertDoubleMetaphoneAlt("TKS", "dogs");
1092 assertDoubleMetaphoneAlt("MKFR", "MacCafferey");
1093 assertDoubleMetaphoneAlt("STFN", "Stephan");
1094 assertDoubleMetaphoneAlt("KXFS", "Kutchefski");
1095 assertDoubleMetaphoneAlt("MKLL", "McClelland");
1096 assertDoubleMetaphoneAlt("SNHS", "san jose");
1097 assertDoubleMetaphoneAlt("SNFP", "xenophobia");
1098 assertDoubleMetaphoneAlt("FKR", "Fokker");
1099 assertDoubleMetaphoneAlt("AK", "Joqqi");
1100 assertDoubleMetaphoneAlt("HF", "Hovvi");
1101 assertDoubleMetaphoneAlt("XRN", "Czerny");
1102 }
1103
1104 @Test
1105 public void testEmpty() {
1106 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(null));
1107 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(""));
1108 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone(" "));
1109 assertEquals(null, this.getDoubleMetaphone().doubleMetaphone("\t\n\r "));
1110 }
1111
1112
1113
1114
1115 @Test
1116 public void testSetMaxCodeLength() {
1117 String value = "jumped";
1118
1119 DoubleMetaphone doubleMetaphone = new DoubleMetaphone();
1120
1121
1122 assertEquals("Default Max Code Length", 4, doubleMetaphone.getMaxCodeLen());
1123 assertEquals("Default Primary", "JMPT", doubleMetaphone.doubleMetaphone(value, false));
1124 assertEquals("Default Alternate", "AMPT", doubleMetaphone.doubleMetaphone(value, true));
1125
1126
1127 doubleMetaphone.setMaxCodeLen(3);
1128 assertEquals("Set Max Code Length", 3, doubleMetaphone.getMaxCodeLen());
1129 assertEquals("Max=3 Primary", "JMP", doubleMetaphone.doubleMetaphone(value, false));
1130 assertEquals("Max=3 Alternate", "AMP", doubleMetaphone.doubleMetaphone(value, true));
1131 }
1132
1133 @Test
1134 public void testIsDoubleMetaphoneEqualBasic() {
1135 String[][] testFixture = new String[][] { { "Case", "case" }, {
1136 "CASE", "Case" }, {
1137 "caSe", "cAsE" }, {
1138 "cookie", "quick" }, {
1139 "quick", "cookie" }, {
1140 "Brian", "Bryan" }, {
1141 "Auto", "Otto" }, {
1142 "Steven", "Stefan" }, {
1143 "Philipowitz", "Filipowicz" }
1144 };
1145 doubleMetaphoneEqualTest(testFixture, false);
1146 doubleMetaphoneEqualTest(testFixture, true);
1147 }
1148
1149
1150
1151
1152 @Test
1153 public void testIsDoubleMetaphoneEqualExtended1() {
1154
1155
1156
1157
1158 }
1159
1160 @Test
1161 public void testIsDoubleMetaphoneEqualExtended2() {
1162 String[][] testFixture = new String[][] { { "Jablonski", "Yablonsky" }
1163 };
1164
1165 doubleMetaphoneEqualTest(testFixture, true);
1166 }
1167
1168
1169
1170
1171
1172 @Test
1173 public void testIsDoubleMetaphoneEqualExtended3() {
1174 this.validateFixture(FIXTURE);
1175 StringBuilder failures = new StringBuilder();
1176 StringBuilder matches = new StringBuilder();
1177 String cr = System.getProperty("line.separator");
1178 matches.append("private static final String[][] MATCHES = {" + cr);
1179 int failCount = 0;
1180 for (int i = 0; i < FIXTURE.length; i++) {
1181 String name0 = FIXTURE[i][0];
1182 String name1 = FIXTURE[i][1];
1183 boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false);
1184 boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true);
1185 if (match1 == false && match2 == false) {
1186 String failMsg = "[" + i + "] " + name0 + " and " + name1 + cr;
1187 failures.append(failMsg);
1188 failCount++;
1189 } else {
1190 matches.append("{\"" + name0 + "\", \"" + name1 + "\"}," + cr);
1191 }
1192 }
1193 matches.append("};");
1194
1195
1196 if (failCount > 0) {
1197
1198
1199
1200
1201 }
1202 }
1203
1204 @Test
1205 public void testIsDoubleMetaphoneEqualWithMATCHES() {
1206 this.validateFixture(MATCHES);
1207 for (int i = 0; i < MATCHES.length; i++) {
1208 String name0 = MATCHES[i][0];
1209 String name1 = MATCHES[i][1];
1210 boolean match1 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, false);
1211 boolean match2 = this.getDoubleMetaphone().isDoubleMetaphoneEqual(name0, name1, true);
1212 if (match1 == false && match2 == false) {
1213 fail("Expected match [" + i + "] " + name0 + " and " + name1);
1214 }
1215 }
1216 }
1217
1218 @Test
1219 public void testIsDoubleMetaphoneNotEqual() {
1220 doubleMetaphoneNotEqualTest(false);
1221 doubleMetaphoneNotEqualTest(true);
1222 }
1223
1224 @Test
1225 public void testCCedilla() {
1226 assertTrue(this.getDoubleMetaphone().isDoubleMetaphoneEqual("\u00e7", "S"));
1227 }
1228
1229 @Test
1230 public void testNTilde() {
1231 assertTrue(this.getDoubleMetaphone().isDoubleMetaphoneEqual("\u00f1", "N"));
1232 }
1233
1234 public void validateFixture(String[][] pairs) {
1235 if (pairs.length == 0) {
1236 fail("Test fixture is empty");
1237 }
1238 for (int i = 0; i < pairs.length; i++) {
1239 if (pairs[i].length != 2) {
1240 fail("Error in test fixture in the data array at index " + i);
1241 }
1242 }
1243 }
1244 }