View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoder;
22  import org.apache.commons.codec.binary.StringUtils;
23  
24  /**
25   * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
26   * Philips</CITE>.
27   * <p>
28   * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable
29   * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
30   * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
31   * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
32   *
33   * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a>
34   * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a>
35   *
36   * @version $Id: DoubleMetaphone.html 928559 2014-11-10 02:53:54Z ggregory $
37   */
38  public class DoubleMetaphone implements StringEncoder {
39  
40      /**
41       * "Vowels" to test for
42       */
43      private static final String VOWELS = "AEIOUY";
44  
45      /**
46       * Prefixes when present which are not pronounced
47       */
48      private static final String[] SILENT_START =
49          { "GN", "KN", "PN", "WR", "PS" };
50      private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
51          { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
52      private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
53          { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
54      private static final String[] L_T_K_S_N_M_B_Z =
55          { "L", "T", "K", "S", "N", "M", "B", "Z" };
56  
57      /**
58       * Maximum length of an encoding, default is 4
59       */
60      private int maxCodeLen = 4;
61  
62      /**
63       * Creates an instance of this DoubleMetaphone encoder
64       */
65      public DoubleMetaphone() {
66          super();
67      }
68  
69      /**
70       * Encode a value with Double Metaphone.
71       *
72       * @param value String to encode
73       * @return an encoded string
74       */
75      public String doubleMetaphone(final String value) {
76          return doubleMetaphone(value, false);
77      }
78  
79      /**
80       * Encode a value with Double Metaphone, optionally using the alternate encoding.
81       *
82       * @param value String to encode
83       * @param alternate use alternate encode
84       * @return an encoded string
85       */
86      public String doubleMetaphone(String value, final boolean alternate) {
87          value = cleanInput(value);
88          if (value == null) {
89              return null;
90          }
91  
92          final boolean slavoGermanic = isSlavoGermanic(value);
93          int index = isSilentStart(value) ? 1 : 0;
94  
95          final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
96  
97          while (!result.isComplete() && index <= value.length() - 1) {
98              switch (value.charAt(index)) {
99              case 'A':
100             case 'E':
101             case 'I':
102             case 'O':
103             case 'U':
104             case 'Y':
105                 index = handleAEIOUY(result, index);
106                 break;
107             case 'B':
108                 result.append('P');
109                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
110                 break;
111             case '\u00C7':
112                 // A C with a Cedilla
113                 result.append('S');
114                 index++;
115                 break;
116             case 'C':
117                 index = handleC(value, result, index);
118                 break;
119             case 'D':
120                 index = handleD(value, result, index);
121                 break;
122             case 'F':
123                 result.append('F');
124                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
125                 break;
126             case 'G':
127                 index = handleG(value, result, index, slavoGermanic);
128                 break;
129             case 'H':
130                 index = handleH(value, result, index);
131                 break;
132             case 'J':
133                 index = handleJ(value, result, index, slavoGermanic);
134                 break;
135             case 'K':
136                 result.append('K');
137                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
138                 break;
139             case 'L':
140                 index = handleL(value, result, index);
141                 break;
142             case 'M':
143                 result.append('M');
144                 index = conditionM0(value, index) ? index + 2 : index + 1;
145                 break;
146             case 'N':
147                 result.append('N');
148                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
149                 break;
150             case '\u00D1':
151                 // N with a tilde (spanish ene)
152                 result.append('N');
153                 index++;
154                 break;
155             case 'P':
156                 index = handleP(value, result, index);
157                 break;
158             case 'Q':
159                 result.append('K');
160                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
161                 break;
162             case 'R':
163                 index = handleR(value, result, index, slavoGermanic);
164                 break;
165             case 'S':
166                 index = handleS(value, result, index, slavoGermanic);
167                 break;
168             case 'T':
169                 index = handleT(value, result, index);
170                 break;
171             case 'V':
172                 result.append('F');
173                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
174                 break;
175             case 'W':
176                 index = handleW(value, result, index);
177                 break;
178             case 'X':
179                 index = handleX(value, result, index);
180                 break;
181             case 'Z':
182                 index = handleZ(value, result, index, slavoGermanic);
183                 break;
184             default:
185                 index++;
186                 break;
187             }
188         }
189 
190         return alternate ? result.getAlternate() : result.getPrimary();
191     }
192 
193     /**
194      * Encode the value using DoubleMetaphone.  It will only work if
195      * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
196      *
197      * @param obj Object to encode (should be of type String)
198      * @return An encoded Object (will be of type String)
199      * @throws EncoderException encode parameter is not of type String
200      */
201     @Override
202     public Object encode(final Object obj) throws EncoderException {
203         if (!(obj instanceof String)) {
204             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
205         }
206         return doubleMetaphone((String) obj);
207     }
208 
209     /**
210      * Encode the value using DoubleMetaphone.
211      *
212      * @param value String to encode
213      * @return An encoded String
214      */
215     @Override
216     public String encode(final String value) {
217         return doubleMetaphone(value);
218     }
219 
220     /**
221      * Check if the Double Metaphone values of two <code>String</code> values
222      * are equal.
223      *
224      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
225      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
226      * @return <code>true</code> if the encoded <code>String</code>s are equal;
227      *          <code>false</code> otherwise.
228      * @see #isDoubleMetaphoneEqual(String,String,boolean)
229      */
230     public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
231         return isDoubleMetaphoneEqual(value1, value2, false);
232     }
233 
234     /**
235      * Check if the Double Metaphone values of two <code>String</code> values
236      * are equal, optionally using the alternate value.
237      *
238      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
239      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
240      * @param alternate use the alternate value if <code>true</code>.
241      * @return <code>true</code> if the encoded <code>String</code>s are equal;
242      *          <code>false</code> otherwise.
243      */
244     public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
245         return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
246     }
247 
248     /**
249      * Returns the maxCodeLen.
250      * @return int
251      */
252     public int getMaxCodeLen() {
253         return this.maxCodeLen;
254     }
255 
256     /**
257      * Sets the maxCodeLen.
258      * @param maxCodeLen The maxCodeLen to set
259      */
260     public void setMaxCodeLen(final int maxCodeLen) {
261         this.maxCodeLen = maxCodeLen;
262     }
263 
264     //-- BEGIN HANDLERS --//
265 
266     /**
267      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
268      */
269     private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
270         if (index == 0) {
271             result.append('A');
272         }
273         return index + 1;
274     }
275 
276     /**
277      * Handles 'C' cases.
278      */
279     private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
280         if (conditionC0(value, index)) {  // very confusing, moved out
281             result.append('K');
282             index += 2;
283         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
284             result.append('S');
285             index += 2;
286         } else if (contains(value, index, 2, "CH")) {
287             index = handleCH(value, result, index);
288         } else if (contains(value, index, 2, "CZ") &&
289                    !contains(value, index - 2, 4, "WICZ")) {
290             //-- "Czerny" --//
291             result.append('S', 'X');
292             index += 2;
293         } else if (contains(value, index + 1, 3, "CIA")) {
294             //-- "focaccia" --//
295             result.append('X');
296             index += 3;
297         } else if (contains(value, index, 2, "CC") &&
298                    !(index == 1 && charAt(value, 0) == 'M')) {
299             //-- double "cc" but not "McClelland" --//
300             return handleCC(value, result, index);
301         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
302             result.append('K');
303             index += 2;
304         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
305             //-- Italian vs. English --//
306             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
307                 result.append('S', 'X');
308             } else {
309                 result.append('S');
310             }
311             index += 2;
312         } else {
313             result.append('K');
314             if (contains(value, index + 1, 2, " C", " Q", " G")) {
315                 //-- Mac Caffrey, Mac Gregor --//
316                 index += 3;
317             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
318                        !contains(value, index + 1, 2, "CE", "CI")) {
319                 index += 2;
320             } else {
321                 index++;
322             }
323         }
324 
325         return index;
326     }
327 
328     /**
329      * Handles 'CC' cases.
330      */
331     private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
332         if (contains(value, index + 2, 1, "I", "E", "H") &&
333             !contains(value, index + 2, 2, "HU")) {
334             //-- "bellocchio" but not "bacchus" --//
335             if ((index == 1 && charAt(value, index - 1) == 'A') ||
336                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
337                 //-- "accident", "accede", "succeed" --//
338                 result.append("KS");
339             } else {
340                 //-- "bacci", "bertucci", other Italian --//
341                 result.append('X');
342             }
343             index += 3;
344         } else {    // Pierce's rule
345             result.append('K');
346             index += 2;
347         }
348 
349         return index;
350     }
351 
352     /**
353      * Handles 'CH' cases.
354      */
355     private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
356         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
357             result.append('K', 'X');
358             return index + 2;
359         } else if (conditionCH0(value, index)) {
360             //-- Greek roots ("chemistry", "chorus", etc.) --//
361             result.append('K');
362             return index + 2;
363         } else if (conditionCH1(value, index)) {
364             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
365             result.append('K');
366             return index + 2;
367         } else {
368             if (index > 0) {
369                 if (contains(value, 0, 2, "MC")) {
370                     result.append('K');
371                 } else {
372                     result.append('X', 'K');
373                 }
374             } else {
375                 result.append('X');
376             }
377             return index + 2;
378         }
379     }
380 
381     /**
382      * Handles 'D' cases.
383      */
384     private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
385         if (contains(value, index, 2, "DG")) {
386             //-- "Edge" --//
387             if (contains(value, index + 2, 1, "I", "E", "Y")) {
388                 result.append('J');
389                 index += 3;
390                 //-- "Edgar" --//
391             } else {
392                 result.append("TK");
393                 index += 2;
394             }
395         } else if (contains(value, index, 2, "DT", "DD")) {
396             result.append('T');
397             index += 2;
398         } else {
399             result.append('T');
400             index++;
401         }
402         return index;
403     }
404 
405     /**
406      * Handles 'G' cases.
407      */
408     private int handleG(final String value, final DoubleMetaphoneResult result, int index,
409                         final boolean slavoGermanic) {
410         if (charAt(value, index + 1) == 'H') {
411             index = handleGH(value, result, index);
412         } else if (charAt(value, index + 1) == 'N') {
413             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
414                 result.append("KN", "N");
415             } else if (!contains(value, index + 2, 2, "EY") &&
416                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
417                 result.append("N", "KN");
418             } else {
419                 result.append("KN");
420             }
421             index = index + 2;
422         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
423             result.append("KL", "L");
424             index += 2;
425         } else if (index == 0 &&
426                    (charAt(value, index + 1) == 'Y' ||
427                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
428             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
429             result.append('K', 'J');
430             index += 2;
431         } else if ((contains(value, index + 1, 2, "ER") ||
432                     charAt(value, index + 1) == 'Y') &&
433                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
434                    !contains(value, index - 1, 1, "E", "I") &&
435                    !contains(value, index - 1, 3, "RGY", "OGY")) {
436             //-- -ger-, -gy- --//
437             result.append('K', 'J');
438             index += 2;
439         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
440                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
441             //-- Italian "biaggi" --//
442             if (contains(value, 0 ,4, "VAN ", "VON ") ||
443                 contains(value, 0, 3, "SCH") ||
444                 contains(value, index + 1, 2, "ET")) {
445                 //-- obvious germanic --//
446                 result.append('K');
447             } else if (contains(value, index + 1, 3, "IER")) {
448                 result.append('J');
449             } else {
450                 result.append('J', 'K');
451             }
452             index += 2;
453         } else if (charAt(value, index + 1) == 'G') {
454             index += 2;
455             result.append('K');
456         } else {
457             index++;
458             result.append('K');
459         }
460         return index;
461     }
462 
463     /**
464      * Handles 'GH' cases.
465      */
466     private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
467         if (index > 0 && !isVowel(charAt(value, index - 1))) {
468             result.append('K');
469             index += 2;
470         } else if (index == 0) {
471             if (charAt(value, index + 2) == 'I') {
472                 result.append('J');
473             } else {
474                 result.append('K');
475             }
476             index += 2;
477         } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
478                    (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
479                    (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
480             //-- Parker's rule (with some further refinements) - "hugh"
481             index += 2;
482         } else {
483             if (index > 2 && charAt(value, index - 1) == 'U' &&
484                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
485                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
486                 result.append('F');
487             } else if (index > 0 && charAt(value, index - 1) != 'I') {
488                 result.append('K');
489             }
490             index += 2;
491         }
492         return index;
493     }
494 
495     /**
496      * Handles 'H' cases.
497      */
498     private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
499         //-- only keep if first & before vowel or between 2 vowels --//
500         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
501             isVowel(charAt(value, index + 1))) {
502             result.append('H');
503             index += 2;
504             //-- also takes car of "HH" --//
505         } else {
506             index++;
507         }
508         return index;
509     }
510 
511     /**
512      * Handles 'J' cases.
513      */
514     private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
515                         final boolean slavoGermanic) {
516         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
517                 //-- obvious Spanish, "Jose", "San Jacinto" --//
518                 if ((index == 0 && (charAt(value, index + 4) == ' ') ||
519                      value.length() == 4) || contains(value, 0, 4, "SAN ")) {
520                     result.append('H');
521                 } else {
522                     result.append('J', 'H');
523                 }
524                 index++;
525             } else {
526                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
527                     result.append('J', 'A');
528                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
529                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
530                     result.append('J', 'H');
531                 } else if (index == value.length() - 1) {
532                     result.append('J', ' ');
533                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
534                            !contains(value, index - 1, 1, "S", "K", "L")) {
535                     result.append('J');
536                 }
537 
538                 if (charAt(value, index + 1) == 'J') {
539                     index += 2;
540                 } else {
541                     index++;
542                 }
543             }
544         return index;
545     }
546 
547     /**
548      * Handles 'L' cases.
549      */
550     private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
551         if (charAt(value, index + 1) == 'L') {
552             if (conditionL0(value, index)) {
553                 result.appendPrimary('L');
554             } else {
555                 result.append('L');
556             }
557             index += 2;
558         } else {
559             index++;
560             result.append('L');
561         }
562         return index;
563     }
564 
565     /**
566      * Handles 'P' cases.
567      */
568     private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
569         if (charAt(value, index + 1) == 'H') {
570             result.append('F');
571             index += 2;
572         } else {
573             result.append('P');
574             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
575         }
576         return index;
577     }
578 
579     /**
580      * Handles 'R' cases.
581      */
582     private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
583                         final boolean slavoGermanic) {
584         if (index == value.length() - 1 && !slavoGermanic &&
585             contains(value, index - 2, 2, "IE") &&
586             !contains(value, index - 4, 2, "ME", "MA")) {
587             result.appendAlternate('R');
588         } else {
589             result.append('R');
590         }
591         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
592     }
593 
594     /**
595      * Handles 'S' cases.
596      */
597     private int handleS(final String value, final DoubleMetaphoneResult result, int index,
598                         final boolean slavoGermanic) {
599         if (contains(value, index - 1, 3, "ISL", "YSL")) {
600             //-- special cases "island", "isle", "carlisle", "carlysle" --//
601             index++;
602         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
603             //-- special case "sugar-" --//
604             result.append('X', 'S');
605             index++;
606         } else if (contains(value, index, 2, "SH")) {
607             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
608                 //-- germanic --//
609                 result.append('S');
610             } else {
611                 result.append('X');
612             }
613             index += 2;
614         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
615             //-- Italian and Armenian --//
616             if (slavoGermanic) {
617                 result.append('S');
618             } else {
619                 result.append('S', 'X');
620             }
621             index += 3;
622         } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) ||
623                    contains(value, index + 1, 1, "Z")) {
624             //-- german & anglicisations, e.g. "smith" match "schmidt" //
625             // "snider" match "schneider" --//
626             //-- also, -sz- in slavic language although in hungarian it //
627             //   is pronounced "s" --//
628             result.append('S', 'X');
629             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
630         } else if (contains(value, index, 2, "SC")) {
631             index = handleSC(value, result, index);
632         } else {
633             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
634                 //-- french e.g. "resnais", "artois" --//
635                 result.appendAlternate('S');
636             } else {
637                 result.append('S');
638             }
639             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
640         }
641         return index;
642     }
643 
644     /**
645      * Handles 'SC' cases.
646      */
647     private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
648         if (charAt(value, index + 2) == 'H') {
649             //-- Schlesinger's rule --//
650             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
651                 //-- Dutch origin, e.g. "school", "schooner" --//
652                 if (contains(value, index + 3, 2, "ER", "EN")) {
653                     //-- "schermerhorn", "schenker" --//
654                     result.append("X", "SK");
655                 } else {
656                     result.append("SK");
657                 }
658             } else {
659                 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
660                     result.append('X', 'S');
661                 } else {
662                     result.append('X');
663                 }
664             }
665         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
666             result.append('S');
667         } else {
668             result.append("SK");
669         }
670         return index + 3;
671     }
672 
673     /**
674      * Handles 'T' cases.
675      */
676     private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
677         if (contains(value, index, 4, "TION")) {
678             result.append('X');
679             index += 3;
680         } else if (contains(value, index, 3, "TIA", "TCH")) {
681             result.append('X');
682             index += 3;
683         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
684             if (contains(value, index + 2, 2, "OM", "AM") ||
685                 //-- special case "thomas", "thames" or germanic --//
686                 contains(value, 0, 4, "VAN ", "VON ") ||
687                 contains(value, 0, 3, "SCH")) {
688                 result.append('T');
689             } else {
690                 result.append('0', 'T');
691             }
692             index += 2;
693         } else {
694             result.append('T');
695             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
696         }
697         return index;
698     }
699 
700     /**
701      * Handles 'W' cases.
702      */
703     private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
704         if (contains(value, index, 2, "WR")) {
705             //-- can also be in middle of word --//
706             result.append('R');
707             index += 2;
708         } else {
709             if (index == 0 && (isVowel(charAt(value, index + 1)) ||
710                                contains(value, index, 2, "WH"))) {
711                 if (isVowel(charAt(value, index + 1))) {
712                     //-- Wasserman should match Vasserman --//
713                     result.append('A', 'F');
714                 } else {
715                     //-- need Uomo to match Womo --//
716                     result.append('A');
717                 }
718                 index++;
719             } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
720                        contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
721                        contains(value, 0, 3, "SCH")) {
722                 //-- Arnow should match Arnoff --//
723                 result.appendAlternate('F');
724                 index++;
725             } else if (contains(value, index, 4, "WICZ", "WITZ")) {
726                 //-- Polish e.g. "filipowicz" --//
727                 result.append("TS", "FX");
728                 index += 4;
729             } else {
730                 index++;
731             }
732         }
733         return index;
734     }
735 
736     /**
737      * Handles 'X' cases.
738      */
739     private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
740         if (index == 0) {
741             result.append('S');
742             index++;
743         } else {
744             if (!((index == value.length() - 1) &&
745                   (contains(value, index - 3, 3, "IAU", "EAU") ||
746                    contains(value, index - 2, 2, "AU", "OU")))) {
747                 //-- French e.g. breaux --//
748                 result.append("KS");
749             }
750             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
751         }
752         return index;
753     }
754 
755     /**
756      * Handles 'Z' cases.
757      */
758     private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
759                         final boolean slavoGermanic) {
760         if (charAt(value, index + 1) == 'H') {
761             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
762             result.append('J');
763             index += 2;
764         } else {
765             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
766                 (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
767                 result.append("S", "TS");
768             } else {
769                 result.append('S');
770             }
771             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
772         }
773         return index;
774     }
775 
776     //-- BEGIN CONDITIONS --//
777 
778     /**
779      * Complex condition 0 for 'C'.
780      */
781     private boolean conditionC0(final String value, final int index) {
782         if (contains(value, index, 4, "CHIA")) {
783             return true;
784         } else if (index <= 1) {
785             return false;
786         } else if (isVowel(charAt(value, index - 2))) {
787             return false;
788         } else if (!contains(value, index - 1, 3, "ACH")) {
789             return false;
790         } else {
791             final char c = charAt(value, index + 2);
792             return (c != 'I' && c != 'E') ||
793                     contains(value, index - 2, 6, "BACHER", "MACHER");
794         }
795     }
796 
797     /**
798      * Complex condition 0 for 'CH'.
799      */
800     private boolean conditionCH0(final String value, final int index) {
801         if (index != 0) {
802             return false;
803         } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
804                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
805             return false;
806         } else if (contains(value, 0, 5, "CHORE")) {
807             return false;
808         } else {
809             return true;
810         }
811     }
812 
813     /**
814      * Complex condition 1 for 'CH'.
815      */
816     private boolean conditionCH1(final String value, final int index) {
817         return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) ||
818                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
819                 contains(value, index + 2, 1, "T", "S") ||
820                 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
821                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
822     }
823 
824     /**
825      * Complex condition 0 for 'L'.
826      */
827     private boolean conditionL0(final String value, final int index) {
828         if (index == value.length() - 3 &&
829             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
830             return true;
831         } else if ((contains(value, value.length() - 2, 2, "AS", "OS") ||
832                     contains(value, value.length() - 1, 1, "A", "O")) &&
833                    contains(value, index - 1, 4, "ALLE")) {
834             return true;
835         } else {
836             return false;
837         }
838     }
839 
840     /**
841      * Complex condition 0 for 'M'.
842      */
843     private boolean conditionM0(final String value, final int index) {
844         if (charAt(value, index + 1) == 'M') {
845             return true;
846         }
847         return contains(value, index - 1, 3, "UMB") &&
848                ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER"));
849     }
850 
851     //-- BEGIN HELPER FUNCTIONS --//
852 
853     /**
854      * Determines whether or not a value is of slavo-germanic origin. A value is
855      * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
856      */
857     private boolean isSlavoGermanic(final String value) {
858         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
859             value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
860     }
861 
862     /**
863      * Determines whether or not a character is a vowel or not
864      */
865     private boolean isVowel(final char ch) {
866         return VOWELS.indexOf(ch) != -1;
867     }
868 
869     /**
870      * Determines whether or not the value starts with a silent letter.  It will
871      * return <code>true</code> if the value starts with any of 'GN', 'KN',
872      * 'PN', 'WR' or 'PS'.
873      */
874     private boolean isSilentStart(final String value) {
875         boolean result = false;
876         for (final String element : SILENT_START) {
877             if (value.startsWith(element)) {
878                 result = true;
879                 break;
880             }
881         }
882         return result;
883     }
884 
885     /**
886      * Cleans the input.
887      */
888     private String cleanInput(String input) {
889         if (input == null) {
890             return null;
891         }
892         input = input.trim();
893         if (input.length() == 0) {
894             return null;
895         }
896         return input.toUpperCase(java.util.Locale.ENGLISH);
897     }
898 
899     /*
900      * Gets the character at index <code>index</code> if available, otherwise
901      * it returns <code>Character.MIN_VALUE</code> so that there is some sort
902      * of a default.
903      */
904     protected char charAt(final String value, final int index) {
905         if (index < 0 || index >= value.length()) {
906             return Character.MIN_VALUE;
907         }
908         return value.charAt(index);
909     }
910 
911     /*
912      * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
913      * matching up to length <code>length</code>.
914      */
915     protected static boolean contains(final String value, final int start, final int length,
916                                       final String... criteria) {
917         boolean result = false;
918         if (start >= 0 && start + length <= value.length()) {
919             final String target = value.substring(start, start + length);
920 
921             for (final String element : criteria) {
922                 if (target.equals(element)) {
923                     result = true;
924                     break;
925                 }
926             }
927         }
928         return result;
929     }
930 
931     //-- BEGIN INNER CLASSES --//
932 
933     /**
934      * Inner class for storing results, since there is the optional alternate encoding.
935      */
936     public class DoubleMetaphoneResult {
937 
938         private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
939         private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
940         private final int maxLength;
941 
942         public DoubleMetaphoneResult(final int maxLength) {
943             this.maxLength = maxLength;
944         }
945 
946         public void append(final char value) {
947             appendPrimary(value);
948             appendAlternate(value);
949         }
950 
951         public void append(final char primary, final char alternate) {
952             appendPrimary(primary);
953             appendAlternate(alternate);
954         }
955 
956         public void appendPrimary(final char value) {
957             if (this.primary.length() < this.maxLength) {
958                 this.primary.append(value);
959             }
960         }
961 
962         public void appendAlternate(final char value) {
963             if (this.alternate.length() < this.maxLength) {
964                 this.alternate.append(value);
965             }
966         }
967 
968         public void append(final String value) {
969             appendPrimary(value);
970             appendAlternate(value);
971         }
972 
973         public void append(final String primary, final String alternate) {
974             appendPrimary(primary);
975             appendAlternate(alternate);
976         }
977 
978         public void appendPrimary(final String value) {
979             final int addChars = this.maxLength - this.primary.length();
980             if (value.length() <= addChars) {
981                 this.primary.append(value);
982             } else {
983                 this.primary.append(value.substring(0, addChars));
984             }
985         }
986 
987         public void appendAlternate(final String value) {
988             final int addChars = this.maxLength - this.alternate.length();
989             if (value.length() <= addChars) {
990                 this.alternate.append(value);
991             } else {
992                 this.alternate.append(value.substring(0, addChars));
993             }
994         }
995 
996         public String getPrimary() {
997             return this.primary.toString();
998         }
999 
1000         public String getAlternate() {
1001             return this.alternate.toString();
1002         }
1003 
1004         public boolean isComplete() {
1005             return this.primary.length() >= this.maxLength &&
1006                    this.alternate.length() >= this.maxLength;
1007         }
1008     }
1009 }