View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */ 
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoder;
22  
23  /**
24   * Encodes a string into a double metaphone value.
25   * This Implementation is based on the algorithm by <CITE>Lawrence Philips</CITE>.
26   * <ul>
27   * <li>Original Article: <a 
28   * href="http://www.cuj.com/documents/s=8038/cuj0006philips/">
29   * http://www.cuj.com/documents/s=8038/cuj0006philips/</a></li>
30   * <li>Original Source Code: <a href="ftp://ftp.cuj.com/pub/2000/1806/philips.zip">
31   * ftp://ftp.cuj.com/pub/2000/1806/philips.zip</a></li>
32   * </ul>
33   * 
34   * @author Apache Software Foundation
35   * @version $Id: DoubleMetaphone.java 480406 2006-11-29 04:56:58Z bayard $
36   */
37  public class DoubleMetaphone implements StringEncoder {
38  
39      /**
40       * "Vowels" to test for
41       */
42      private static final String VOWELS = "AEIOUY";
43  
44      /**
45       * Prefixes when present which are not pronounced
46       */
47      private static final String[] SILENT_START = 
48      { "GN", "KN", "PN", "WR", "PS" };
49      private static final String[] L_R_N_M_B_H_F_V_W_SPACE = 
50      { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
51      private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = 
52      { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
53      private static final String[] L_T_K_S_N_M_B_Z = 
54      { "L", "T", "K", "S", "N", "M", "B", "Z" };
55  
56      /**
57       * Maximum length of an encoding, default is 4
58       */
59      protected int maxCodeLen = 4;
60  
61      /**
62       * Creates an instance of this DoubleMetaphone encoder
63       */
64      public DoubleMetaphone() {
65          super();
66      }
67      
68      /**
69       * Encode a value with Double Metaphone
70       *
71       * @param value String to encode
72       * @return an encoded string
73       */
74      public String doubleMetaphone(String value) {
75          return doubleMetaphone(value, false);
76      }
77      
78      /**
79       * Encode a value with Double Metaphone, optionally using the alternate
80       * encoding.
81       *
82       * @param value String to encode
83       * @param alternate use alternate encode
84       * @return an encoded string
85       */
86      public String doubleMetaphone(String value, boolean alternate) {
87          value = cleanInput(value);
88          if (value == null) {
89              return null;
90          }
91          
92          boolean slavoGermanic = isSlavoGermanic(value);
93          int index = isSilentStart(value) ? 1 : 0;
94          
95          DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
96          
97          while (!result.isComplete() && index <= value.length() - 1) {
98              switch (value.charAt(index)) {
99              case 'A':
100             case 'E':
101             case 'I':
102             case 'O':
103             case 'U':
104             case 'Y':
105                 index = handleAEIOUY(value, result, index);
106                 break;
107             case 'B':
108                 result.append('P');
109                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
110                 break;
111             case '\u00C7':
112                 // A C with a Cedilla
113                 result.append('S');
114                 index++;
115                 break; 
116             case 'C':
117                 index = handleC(value, result, index);
118                 break;
119             case 'D':
120                 index = handleD(value, result, index);
121                 break;
122             case 'F':
123                 result.append('F');
124                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
125                 break;
126             case 'G':
127                 index = handleG(value, result, index, slavoGermanic);
128                 break;
129             case 'H':
130                 index = handleH(value, result, index);
131                 break;
132             case 'J':
133                 index = handleJ(value, result, index, slavoGermanic);
134                 break;
135             case 'K':
136                 result.append('K');
137                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
138                 break;
139             case 'L':
140                 index = handleL(value, result, index);
141                 break;
142             case 'M':
143                 result.append('M');
144                 index = conditionM0(value, index) ? index + 2 : index + 1;
145                 break;
146             case 'N':
147                 result.append('N');
148                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
149                 break;
150             case '\u00D1':
151                 // N with a tilde (spanish ene)
152                 result.append('N');
153                 index++;
154                 break;
155             case 'P':
156                 index = handleP(value, result, index);
157                 break;
158             case 'Q':
159                 result.append('K');
160                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
161                 break;
162             case 'R':
163                 index = handleR(value, result, index, slavoGermanic);
164                 break;
165             case 'S':
166                 index = handleS(value, result, index, slavoGermanic);
167                 break;
168             case 'T':
169                 index = handleT(value, result, index);
170                 break;
171             case 'V':
172                 result.append('F');
173                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
174                 break;
175             case 'W':
176                 index = handleW(value, result, index);
177                 break;
178             case 'X':
179                 index = handleX(value, result, index);
180                 break;
181             case 'Z':
182                 index = handleZ(value, result, index, slavoGermanic);
183                 break;
184             default:
185                 index++;
186                 break;
187             }
188         }
189 
190         return alternate ? result.getAlternate() : result.getPrimary();
191     }
192     
193     /**
194      * Encode the value using DoubleMetaphone.  It will only work if 
195      * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
196      *
197      * @param obj Object to encode (should be of type String)
198      * @return An encoded Object (will be of type String)
199      * @throws EncoderException encode parameter is not of type String
200      */
201     public Object encode(Object obj) throws EncoderException {
202         if (!(obj instanceof String)) {
203             throw new EncoderException("DoubleMetaphone encode parameter is not of type String"); 
204         } 
205         return doubleMetaphone((String) obj);
206     }
207 
208     /**
209      * Encode the value using DoubleMetaphone.
210      *
211      * @param value String to encode
212      * @return An encoded String
213      */
214     public String encode(String value) {
215         return doubleMetaphone(value);   
216     }
217 
218     /**
219      * Check if the Double Metaphone values of two <code>String</code> values
220      * are equal.
221      * 
222      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
223      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
224      * @return <code>true</code> if the encoded <code>String</code>s are equal;
225      *          <code>false</code> otherwise.
226      * @see #isDoubleMetaphoneEqual(String,String,boolean)
227      */
228     public boolean isDoubleMetaphoneEqual(String value1, String value2) {
229         return isDoubleMetaphoneEqual(value1, value2, false);
230     }
231     
232     /**
233      * Check if the Double Metaphone values of two <code>String</code> values
234      * are equal, optionally using the alternate value.
235      * 
236      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
237      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
238      * @param alternate use the alternate value if <code>true</code>.
239      * @return <code>true</code> if the encoded <code>String</code>s are equal;
240      *          <code>false</code> otherwise.
241      */
242     public boolean isDoubleMetaphoneEqual(String value1, 
243                                           String value2, 
244                                           boolean alternate) {
245         return doubleMetaphone(value1, alternate).equals(doubleMetaphone
246                                                          (value2, alternate));
247     }
248     
249     /**
250      * Returns the maxCodeLen.
251      * @return int
252      */
253     public int getMaxCodeLen() {
254         return this.maxCodeLen;
255     }
256 
257     /**
258      * Sets the maxCodeLen.
259      * @param maxCodeLen The maxCodeLen to set
260      */
261     public void setMaxCodeLen(int maxCodeLen) {
262         this.maxCodeLen = maxCodeLen;
263     }
264 
265     //-- BEGIN HANDLERS --//
266 
267     /**
268      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases
269      */
270     private int handleAEIOUY(String value, DoubleMetaphoneResult result, int 
271                              index) {
272         if (index == 0) {
273             result.append('A');
274         }
275         return index + 1;
276     }
277     
278     /**
279      * Handles 'C' cases
280      */
281     private int handleC(String value, 
282                         DoubleMetaphoneResult result, 
283                         int index) {
284         if (conditionC0(value, index)) {  // very confusing, moved out
285             result.append('K');
286             index += 2;
287         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
288             result.append('S');
289             index += 2;
290         } else if (contains(value, index, 2, "CH")) {
291             index = handleCH(value, result, index);
292         } else if (contains(value, index, 2, "CZ") && 
293                    !contains(value, index - 2, 4, "WICZ")) {
294             //-- "Czerny" --//
295             result.append('S', 'X');
296             index += 2;
297         } else if (contains(value, index + 1, 3, "CIA")) {
298             //-- "focaccia" --//
299             result.append('X');
300             index += 3;
301         } else if (contains(value, index, 2, "CC") && 
302                    !(index == 1 && charAt(value, 0) == 'M')) {
303             //-- double "cc" but not "McClelland" --//
304             return handleCC(value, result, index);
305         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
306             result.append('K');
307             index += 2;
308         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
309             //-- Italian vs. English --//
310             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
311                 result.append('S', 'X');
312             } else {
313                 result.append('S');
314             }
315             index += 2;
316         } else {
317             result.append('K');
318             if (contains(value, index + 1, 2, " C", " Q", " G")) { 
319                 //-- Mac Caffrey, Mac Gregor --//
320                 index += 3;
321             } else if (contains(value, index + 1, 1, "C", "K", "Q") && 
322                        !contains(value, index + 1, 2, "CE", "CI")) {
323                 index += 2;
324             } else {
325                 index++;
326             }
327         }
328         
329         return index;
330     }
331 
332     /**
333      * Handles 'CC' cases
334      */
335     private int handleCC(String value, 
336                          DoubleMetaphoneResult result, 
337                          int index) {
338         if (contains(value, index + 2, 1, "I", "E", "H") && 
339             !contains(value, index + 2, 2, "HU")) {
340             //-- "bellocchio" but not "bacchus" --//
341             if ((index == 1 && charAt(value, index - 1) == 'A') || 
342                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
343                 //-- "accident", "accede", "succeed" --//
344                 result.append("KS");
345             } else {
346                 //-- "bacci", "bertucci", other Italian --//
347                 result.append('X');
348             }
349             index += 3;
350         } else {    // Pierce's rule
351             result.append('K');
352             index += 2;
353         }
354         
355         return index;
356     }
357     
358     /**
359      * Handles 'CH' cases
360      */
361     private int handleCH(String value, 
362                          DoubleMetaphoneResult result, 
363                          int index) {
364         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
365             result.append('K', 'X');
366             return index + 2;
367         } else if (conditionCH0(value, index)) {
368             //-- Greek roots ("chemistry", "chorus", etc.) --//
369             result.append('K');
370             return index + 2;
371         } else if (conditionCH1(value, index)) {
372             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
373             result.append('K');
374             return index + 2;
375         } else {
376             if (index > 0) {
377                 if (contains(value, 0, 2, "MC")) {
378                     result.append('K');
379                 } else {
380                     result.append('X', 'K');
381                 }
382             } else {
383                 result.append('X');
384             }
385             return index + 2;
386         }
387     }
388 
389     /**
390      * Handles 'D' cases
391      */
392     private int handleD(String value, 
393                         DoubleMetaphoneResult result, 
394                         int index) {
395         if (contains(value, index, 2, "DG")) {
396             //-- "Edge" --//
397             if (contains(value, index + 2, 1, "I", "E", "Y")) {
398                 result.append('J');
399                 index += 3;
400                 //-- "Edgar" --//
401             } else {
402                 result.append("TK");
403                 index += 2;
404             }
405         } else if (contains(value, index, 2, "DT", "DD")) {
406             result.append('T');
407             index += 2;
408         } else {
409             result.append('T');
410             index++;
411         }
412         return index;
413     }
414 
415     /**
416      * Handles 'G' cases
417      */
418     private int handleG(String value, 
419                         DoubleMetaphoneResult result, 
420                         int index, 
421                         boolean slavoGermanic) {
422         if (charAt(value, index + 1) == 'H') {
423             index = handleGH(value, result, index);
424         } else if (charAt(value, index + 1) == 'N') {
425             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
426                 result.append("KN", "N");
427             } else if (!contains(value, index + 2, 2, "EY") && 
428                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
429                 result.append("N", "KN");
430             } else {
431                 result.append("KN");
432             }
433             index = index + 2;
434         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
435             result.append("KL", "L");
436             index += 2;
437         } else if (index == 0 && (charAt(value, index + 1) == 'Y' || contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
438             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
439             result.append('K', 'J');
440             index += 2;
441         } else if ((contains(value, index + 1, 2, "ER") || 
442                     charAt(value, index + 1) == 'Y') &&
443                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
444                    !contains(value, index - 1, 1, "E", "I") && 
445                    !contains(value, index - 1, 3, "RGY", "OGY")) {
446             //-- -ger-, -gy- --//
447             result.append('K', 'J');
448             index += 2;
449         } else if (contains(value, index + 1, 1, "E", "I", "Y") || 
450                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
451             //-- Italian "biaggi" --//
452             if ((contains(value, 0 ,4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) || contains(value, index + 1, 2, "ET")) {
453                 //-- obvious germanic --//
454                 result.append('K');
455             } else if (contains(value, index + 1, 4, "IER")) {
456                 result.append('J');
457             } else {
458                 result.append('J', 'K');
459             }
460             index += 2;
461         } else if (charAt(value, index + 1) == 'G') {
462             index += 2;
463             result.append('K');
464         } else {
465             index++;
466             result.append('K');
467         }
468         return index;
469     }
470     
471     /**
472      * Handles 'GH' cases
473      */
474     private int handleGH(String value, 
475                          DoubleMetaphoneResult result, 
476                          int index) {
477         if (index > 0 && !isVowel(charAt(value, index - 1))) {
478             result.append('K');
479             index += 2;
480         } else if (index == 0) {
481             if (charAt(value, index + 2) == 'I') {
482                 result.append('J');
483             } else {
484                 result.append('K');
485             }
486             index += 2;
487         } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
488                    (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
489                    (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
490             //-- Parker's rule (with some further refinements) - "hugh"
491             index += 2;
492         } else {
493             if (index > 2 && charAt(value, index - 1) == 'U' && 
494                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
495                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
496                 result.append('F');
497             } else if (index > 0 && charAt(value, index - 1) != 'I') {
498                 result.append('K');
499             }
500             index += 2;
501         }
502         return index;
503     }
504 
505     /**
506      * Handles 'H' cases
507      */
508     private int handleH(String value, 
509                         DoubleMetaphoneResult result, 
510                         int index) {
511         //-- only keep if first & before vowel or between 2 vowels --//
512         if ((index == 0 || isVowel(charAt(value, index - 1))) && 
513             isVowel(charAt(value, index + 1))) {
514             result.append('H');
515             index += 2;
516             //-- also takes car of "HH" --//
517         } else {
518             index++;
519         }
520         return index;
521     }
522     
523     /**
524      * Handles 'J' cases
525      */
526     private int handleJ(String value, DoubleMetaphoneResult result, int index, 
527                         boolean slavoGermanic) {
528         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
529                 //-- obvious Spanish, "Jose", "San Jacinto" --//
530                 if ((index == 0 && (charAt(value, index + 4) == ' ') || 
531                      value.length() == 4) || contains(value, 0, 4, "SAN ")) {
532                     result.append('H');
533                 } else {
534                     result.append('J', 'H');
535                 }
536                 index++;
537             } else {
538                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
539                     result.append('J', 'A');
540                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic && 
541                               (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
542                     result.append('J', 'H');
543                 } else if (index == value.length() - 1) {
544                     result.append('J', ' ');
545                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) && !contains(value, index - 1, 1, "S", "K", "L")) {
546                     result.append('J');
547                 }
548 
549                 if (charAt(value, index + 1) == 'J') {
550                     index += 2;
551                 } else {
552                     index++;
553                 }
554             }
555         return index;
556     }
557     
558     /**
559      * Handles 'L' cases
560      */
561     private int handleL(String value, 
562                         DoubleMetaphoneResult result, 
563                         int index) {
564         result.append('L');
565         if (charAt(value, index + 1) == 'L') {
566             if (conditionL0(value, index)) {
567                 result.appendAlternate(' ');
568             }
569             index += 2;
570         } else {
571             index++;
572         }
573         return index;
574     }
575 
576     /**
577      * Handles 'P' cases
578      */
579     private int handleP(String value, 
580                         DoubleMetaphoneResult result, 
581                         int index) {
582         if (charAt(value, index + 1) == 'H') {
583             result.append('F');
584             index += 2;
585         } else {
586             result.append('P');
587             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
588         }
589         return index;
590     }
591 
592     /**
593      * Handles 'R' cases
594      */
595     private int handleR(String value, 
596                         DoubleMetaphoneResult result, 
597                         int index, 
598                         boolean slavoGermanic) {
599         if (index == value.length() - 1 && !slavoGermanic && 
600             contains(value, index - 2, 2, "IE") && 
601             !contains(value, index - 4, 2, "ME", "MA")) {
602             result.appendAlternate('R');
603         } else {
604             result.append('R');
605         }
606         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
607     }
608 
609     /**
610      * Handles 'S' cases
611      */
612     private int handleS(String value, 
613                         DoubleMetaphoneResult result, 
614                         int index, 
615                         boolean slavoGermanic) {
616         if (contains(value, index - 1, 3, "ISL", "YSL")) {
617             //-- special cases "island", "isle", "carlisle", "carlysle" --//
618             index++;
619         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
620             //-- special case "sugar-" --//
621             result.append('X', 'S');
622             index++;
623         } else if (contains(value, index, 2, "SH")) {
624             if (contains(value, index + 1, 4, 
625                          "HEIM", "HOEK", "HOLM", "HOLZ")) {
626                 //-- germanic --//
627                 result.append('S');
628             } else {
629                 result.append('X');
630             }
631             index += 2;
632         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
633             //-- Italian and Armenian --//
634             if (slavoGermanic) {
635                 result.append('S');
636             } else {
637                 result.append('S', 'X');
638             }
639             index += 3;
640         } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) || contains(value, index + 1, 1, "Z")) {
641             //-- german & anglicisations, e.g. "smith" match "schmidt" //
642             // "snider" match "schneider" --//
643             //-- also, -sz- in slavic language altho in hungarian it //
644             //   is pronounced "s" --//
645             result.append('S', 'X');
646             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
647         } else if (contains(value, index, 2, "SC")) {
648             index = handleSC(value, result, index);
649         } else {
650             if (index == value.length() - 1 && contains(value, index - 2, 
651                                                         2, "AI", "OI")){
652                 //-- french e.g. "resnais", "artois" --//
653                 result.appendAlternate('S');
654             } else {
655                 result.append('S');
656             }
657             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
658         }
659         return index;
660     }
661 
662     /**
663      * Handles 'SC' cases
664      */
665     private int handleSC(String value, 
666                          DoubleMetaphoneResult result, 
667                          int index) {
668         if (charAt(value, index + 2) == 'H') {
669             //-- Schlesinger's rule --//
670             if (contains(value, index + 3, 
671                          2, "OO", "ER", "EN", "UY", "ED", "EM")) {
672                 //-- Dutch origin, e.g. "school", "schooner" --//
673                 if (contains(value, index + 3, 2, "ER", "EN")) {
674                     //-- "schermerhorn", "schenker" --//
675                     result.append("X", "SK");
676                 } else {
677                     result.append("SK");
678                 }
679             } else {
680                 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
681                     result.append('X', 'S');
682                 } else {
683                     result.append('X');
684                 }
685             }
686         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
687             result.append('S');
688         } else {
689             result.append("SK");
690         }
691         return index + 3;
692     }
693 
694     /**
695      * Handles 'T' cases
696      */
697     private int handleT(String value, 
698                         DoubleMetaphoneResult result, 
699                         int index) {
700         if (contains(value, index, 4, "TION")) {
701             result.append('X');
702             index += 3;
703         } else if (contains(value, index, 3, "TIA", "TCH")) {
704             result.append('X');
705             index += 3;
706         } else if (contains(value, index, 2, "TH") || contains(value, index, 
707                                                                3, "TTH")) {
708             if (contains(value, index + 2, 2, "OM", "AM") || 
709                 //-- special case "thomas", "thames" or germanic --//
710                 contains(value, 0, 4, "VAN ", "VON ") || 
711                 contains(value, 0, 3, "SCH")) {
712                 result.append('T');
713             } else {
714                 result.append('0', 'T');
715             }
716             index += 2;
717         } else {
718             result.append('T');
719             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
720         }
721         return index;
722     }
723 
724     /**
725      * Handles 'W' cases
726      */
727     private int handleW(String value, 
728                         DoubleMetaphoneResult result, 
729                         int index) {
730         if (contains(value, index, 2, "WR")) {
731             //-- can also be in middle of word --//
732             result.append('R');
733             index += 2;
734         } else {
735             if (index == 0 && (isVowel(charAt(value, index + 1)) || 
736                                contains(value, index, 2, "WH"))) {
737                 if (isVowel(charAt(value, index + 1))) {
738                     //-- Wasserman should match Vasserman --//
739                     result.append('A', 'F');
740                 } else {
741                     //-- need Uomo to match Womo --//
742                     result.append('A');
743                 }
744                 index++;
745             } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
746                        contains(value, index - 1, 
747                                 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
748                        contains(value, 0, 3, "SCH")) {
749                 //-- Arnow should match Arnoff --//
750                 result.appendAlternate('F');
751                 index++;
752             } else if (contains(value, index, 4, "WICZ", "WITZ")) {
753                 //-- Polish e.g. "filipowicz" --//
754                 result.append("TS", "FX");
755                 index += 4;
756             } else {
757                 index++;
758             }
759         }
760         return index;
761     }
762     
763     /**
764      * Handles 'X' cases
765      */
766     private int handleX(String value, 
767                         DoubleMetaphoneResult result, 
768                         int index) {
769         if (index == 0) {
770             result.append('S');
771             index++;
772         } else {
773             if (!((index == value.length() - 1) && 
774                   (contains(value, index - 3, 3, "IAU", "EAU") || 
775                    contains(value, index - 2, 2, "AU", "OU")))) {
776                 //-- French e.g. breaux --//
777                 result.append("KS");
778             }
779             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
780         }
781         return index;
782     }
783 
784     /**
785      * Handles 'Z' cases
786      */
787     private int handleZ(String value, DoubleMetaphoneResult result, int index, 
788                         boolean slavoGermanic) {
789         if (charAt(value, index + 1) == 'H') {
790             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
791             result.append('J');
792             index += 2;
793         } else {
794             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") || (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
795                 result.append("S", "TS");
796             } else {
797                 result.append('S');
798             }
799             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
800         }
801         return index;
802     }
803 
804     //-- BEGIN CONDITIONS --//
805 
806     /**
807      * Complex condition 0 for 'C'
808      */
809     private boolean conditionC0(String value, int index) {
810 		if (contains(value, index, 4, "CHIA")) {
811 			return true;
812 		} else if (index <= 1) {
813 			return false;
814 		} else if (isVowel(charAt(value, index - 2))) {
815 			return false;
816 		} else if (!contains(value, index - 1, 3, "ACH")) {
817 			return false;
818 		} else {
819 			char c = charAt(value, index + 2);
820 			return (c != 'I' && c != 'E')
821 					|| contains(value, index - 2, 6, "BACHER", "MACHER");
822 		}
823 	}
824     
825     /**
826 	 * Complex condition 0 for 'CH'
827 	 */
828     private boolean conditionCH0(String value, int index) {
829         if (index != 0) {
830             return false;
831         } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") && 
832                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
833             return false;
834         } else if (contains(value, 0, 5, "CHORE")) {
835             return false;
836         } else {
837             return true;
838         }
839     }
840     
841     /**
842      * Complex condition 1 for 'CH'
843      */
844     private boolean conditionCH1(String value, int index) {
845         return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 
846                                                                    3, "SCH")) ||
847                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
848                 contains(value, index + 2, 1, "T", "S") ||
849                 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
850                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
851     }
852     
853     /**
854      * Complex condition 0 for 'L'
855      */
856     private boolean conditionL0(String value, int index) {
857         if (index == value.length() - 3 && 
858             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
859             return true;
860         } else if ((contains(value, index - 1, 2, "AS", "OS") || 
861                     contains(value, value.length() - 1, 1, "A", "O")) &&
862                    contains(value, index - 1, 4, "ALLE")) {
863             return true;
864         } else {
865             return false;
866         }
867     }
868     
869     /**
870      * Complex condition 0 for 'M'
871      */
872     private boolean conditionM0(String value, int index) {
873 		if (charAt(value, index + 1) == 'M') {
874 			return true;
875 		}
876 		return contains(value, index - 1, 3, "UMB")
877 				&& ((index + 1) == value.length() - 1 || contains(value,
878 						index + 2, 2, "ER"));
879 	}
880     
881     //-- BEGIN HELPER FUNCTIONS --//
882 
883     /**
884 	 * Determines whether or not a value is of slavo-germanic orgin. A value is
885 	 * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
886 	 */
887     private boolean isSlavoGermanic(String value) {
888         return value.indexOf('W') > -1 || value.indexOf('K') > -1 || 
889             value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
890     }
891 
892     /**
893      * Determines whether or not a character is a vowel or not
894      */
895     private boolean isVowel(char ch) {
896         return VOWELS.indexOf(ch) != -1;
897     }
898 
899     /**
900      * Determines whether or not the value starts with a silent letter.  It will
901      * return <code>true</code> if the value starts with any of 'GN', 'KN',
902      * 'PN', 'WR' or 'PS'.
903      */    
904     private boolean isSilentStart(String value) {
905         boolean result = false;
906         for (int i = 0; i < SILENT_START.length; i++) {
907             if (value.startsWith(SILENT_START[i])) {
908                 result = true;
909                 break;
910             }
911         }
912         return result;
913     }
914 
915     /**
916      * Cleans the input
917      */    
918     private String cleanInput(String input) {
919 		if (input == null) {
920 			return null;
921 		}
922 		input = input.trim();
923 		if (input.length() == 0) {
924 			return null;
925 		}
926 		return input.toUpperCase();
927 	}
928 
929     /**
930 	 * Gets the character at index <code>index</code> if available, otherwise
931 	 * it returns <code>Character.MIN_VALUE</code> so that there is some sort
932 	 * of a default
933 	 */    
934     protected char charAt(String value, int index) {
935         if (index < 0 || index >= value.length()) {
936             return Character.MIN_VALUE;
937         } 
938         return value.charAt(index);
939     }
940 
941     /**
942      * Shortcut method with 1 criteria
943      */    
944     private static boolean contains(String value, int start, int length, 
945                                     String criteria) {
946         return contains(value, start, length, 
947                         new String[] { criteria });
948     }
949 
950     /**
951      * Shortcut method with 2 criteria
952      */    
953     private static boolean contains(String value, int start, int length, 
954                                     String criteria1, String criteria2) {
955         return contains(value, start, length, 
956                         new String[] { criteria1, criteria2 });
957     }
958 
959     /**
960      * Shortcut method with 3 criteria
961      */    
962     private static boolean contains(String value, int start, int length, 
963                                     String criteria1, String criteria2, 
964                                     String criteria3) {
965         return contains(value, start, length, 
966                         new String[] { criteria1, criteria2, criteria3 });
967     }
968 
969     /**
970      * Shortcut method with 4 criteria
971      */    
972     private static boolean contains(String value, int start, int length, 
973                                     String criteria1, String criteria2, 
974                                     String criteria3, String criteria4) {
975         return contains(value, start, length, 
976                         new String[] { criteria1, criteria2, criteria3, 
977                                        criteria4 });
978     }
979 
980     /**
981      * Shortcut method with 5 criteria
982      */    
983     private static boolean contains(String value, int start, int length, 
984                                     String criteria1, String criteria2, 
985                                     String criteria3, String criteria4, 
986                                     String criteria5) {
987         return contains(value, start, length, 
988                         new String[] { criteria1, criteria2, criteria3, 
989                                        criteria4, criteria5 });
990     }
991 
992     /**
993      * Shortcut method with 6 criteria
994      */    
995     private static boolean contains(String value, int start, int length, 
996                                     String criteria1, String criteria2, 
997                                     String criteria3, String criteria4, 
998                                     String criteria5, String criteria6) {
999         return contains(value, start, length, 
1000                         new String[] { criteria1, criteria2, criteria3, 
1001                                        criteria4, criteria5, criteria6 });
1002     }
1003     
1004     /**
1005      * Determines whether <code>value</code> contains any of the criteria 
1006      starting
1007      * at index <code>start</code> and matching up to length <code>length</code>
1008      */    
1009     protected static boolean contains(String value, int start, int length, 
1010                                       String[] criteria) {
1011         boolean result = false;
1012         if (start >= 0 && start + length <= value.length()) {
1013             String target = value.substring(start, start + length);
1014 
1015             for (int i = 0; i < criteria.length; i++) {
1016                 if (target.equals(criteria[i])) {
1017                     result = true;
1018                     break;
1019                 }
1020             }
1021         }
1022         return result;
1023     }
1024     
1025     //-- BEGIN INNER CLASSES --//
1026     
1027     /**
1028      * Inner class for storing results, since there is the optional alternate
1029      * encoding.
1030      */
1031     public class DoubleMetaphoneResult {
1032 
1033         private StringBuffer primary = new StringBuffer(getMaxCodeLen());
1034         private StringBuffer alternate = new StringBuffer(getMaxCodeLen());
1035         private int maxLength;
1036 
1037         public DoubleMetaphoneResult(int maxLength) {
1038             this.maxLength = maxLength;
1039         }
1040 
1041         public void append(char value) {
1042             appendPrimary(value);
1043             appendAlternate(value);
1044         }
1045 
1046         public void append(char primary, char alternate) {
1047             appendPrimary(primary);
1048             appendAlternate(alternate);
1049         }
1050 
1051         public void appendPrimary(char value) {
1052             if (this.primary.length() < this.maxLength) {
1053                 this.primary.append(value);
1054             }
1055         }
1056 
1057         public void appendAlternate(char value) {
1058             if (this.alternate.length() < this.maxLength) {
1059                 this.alternate.append(value);
1060             }
1061         }
1062 
1063         public void append(String value) {
1064             appendPrimary(value);
1065             appendAlternate(value);
1066         }
1067 
1068         public void append(String primary, String alternate) {
1069             appendPrimary(primary);
1070             appendAlternate(alternate);
1071         }
1072 
1073         public void appendPrimary(String value) {
1074             int addChars = this.maxLength - this.primary.length();
1075             if (value.length() <= addChars) {
1076                 this.primary.append(value);
1077             } else {
1078                 this.primary.append(value.substring(0, addChars));
1079             }
1080         }
1081 
1082         public void appendAlternate(String value) {
1083             int addChars = this.maxLength - this.alternate.length();
1084             if (value.length() <= addChars) {
1085                 this.alternate.append(value);
1086             } else {
1087                 this.alternate.append(value.substring(0, addChars));
1088             }
1089         }
1090 
1091         public String getPrimary() {
1092             return this.primary.toString();
1093         }
1094 
1095         public String getAlternate() {
1096             return this.alternate.toString();
1097         }
1098 
1099         public boolean isComplete() {
1100             return this.primary.length() >= this.maxLength && 
1101                 this.alternate.length() >= this.maxLength;
1102         }
1103     }
1104 }