View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoder;
22  
23  /**
24   * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
25   * Philips</CITE>.
26   * <p>
27   * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable
28   * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
29   * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
30   * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
31   *
32   * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a>
33   * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a>
34   *
35   * @version $Id: DoubleMetaphone.html 889935 2013-12-11 05:05:13Z ggregory $
36   */
37  public class DoubleMetaphone implements StringEncoder {
38  
39      /**
40       * "Vowels" to test for
41       */
42      private static final String VOWELS = "AEIOUY";
43  
44      /**
45       * Prefixes when present which are not pronounced
46       */
47      private static final String[] SILENT_START =
48          { "GN", "KN", "PN", "WR", "PS" };
49      private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
50          { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
51      private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
52          { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
53      private static final String[] L_T_K_S_N_M_B_Z =
54          { "L", "T", "K", "S", "N", "M", "B", "Z" };
55  
56      /**
57       * Maximum length of an encoding, default is 4
58       */
59      private int maxCodeLen = 4;
60  
61      /**
62       * Creates an instance of this DoubleMetaphone encoder
63       */
64      public DoubleMetaphone() {
65          super();
66      }
67  
68      /**
69       * Encode a value with Double Metaphone.
70       *
71       * @param value String to encode
72       * @return an encoded string
73       */
74      public String doubleMetaphone(String value) {
75          return doubleMetaphone(value, false);
76      }
77  
78      /**
79       * Encode a value with Double Metaphone, optionally using the alternate encoding.
80       *
81       * @param value String to encode
82       * @param alternate use alternate encode
83       * @return an encoded string
84       */
85      public String doubleMetaphone(String value, boolean alternate) {
86          value = cleanInput(value);
87          if (value == null) {
88              return null;
89          }
90  
91          boolean slavoGermanic = isSlavoGermanic(value);
92          int index = isSilentStart(value) ? 1 : 0;
93  
94          DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
95  
96          while (!result.isComplete() && index <= value.length() - 1) {
97              switch (value.charAt(index)) {
98              case 'A':
99              case 'E':
100             case 'I':
101             case 'O':
102             case 'U':
103             case 'Y':
104                 index = handleAEIOUY(result, index);
105                 break;
106             case 'B':
107                 result.append('P');
108                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
109                 break;
110             case '\u00C7':
111                 // A C with a Cedilla
112                 result.append('S');
113                 index++;
114                 break;
115             case 'C':
116                 index = handleC(value, result, index);
117                 break;
118             case 'D':
119                 index = handleD(value, result, index);
120                 break;
121             case 'F':
122                 result.append('F');
123                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
124                 break;
125             case 'G':
126                 index = handleG(value, result, index, slavoGermanic);
127                 break;
128             case 'H':
129                 index = handleH(value, result, index);
130                 break;
131             case 'J':
132                 index = handleJ(value, result, index, slavoGermanic);
133                 break;
134             case 'K':
135                 result.append('K');
136                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
137                 break;
138             case 'L':
139                 index = handleL(value, result, index);
140                 break;
141             case 'M':
142                 result.append('M');
143                 index = conditionM0(value, index) ? index + 2 : index + 1;
144                 break;
145             case 'N':
146                 result.append('N');
147                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
148                 break;
149             case '\u00D1':
150                 // N with a tilde (spanish ene)
151                 result.append('N');
152                 index++;
153                 break;
154             case 'P':
155                 index = handleP(value, result, index);
156                 break;
157             case 'Q':
158                 result.append('K');
159                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
160                 break;
161             case 'R':
162                 index = handleR(value, result, index, slavoGermanic);
163                 break;
164             case 'S':
165                 index = handleS(value, result, index, slavoGermanic);
166                 break;
167             case 'T':
168                 index = handleT(value, result, index);
169                 break;
170             case 'V':
171                 result.append('F');
172                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
173                 break;
174             case 'W':
175                 index = handleW(value, result, index);
176                 break;
177             case 'X':
178                 index = handleX(value, result, index);
179                 break;
180             case 'Z':
181                 index = handleZ(value, result, index, slavoGermanic);
182                 break;
183             default:
184                 index++;
185                 break;
186             }
187         }
188 
189         return alternate ? result.getAlternate() : result.getPrimary();
190     }
191 
192     /**
193      * Encode the value using DoubleMetaphone.  It will only work if
194      * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
195      *
196      * @param obj Object to encode (should be of type String)
197      * @return An encoded Object (will be of type String)
198      * @throws EncoderException encode parameter is not of type String
199      */
200     @Override
201     public Object encode(Object obj) throws EncoderException {
202         if (!(obj instanceof String)) {
203             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
204         }
205         return doubleMetaphone((String) obj);
206     }
207 
208     /**
209      * Encode the value using DoubleMetaphone.
210      *
211      * @param value String to encode
212      * @return An encoded String
213      */
214     @Override
215     public String encode(String value) {
216         return doubleMetaphone(value);
217     }
218 
219     /**
220      * Check if the Double Metaphone values of two <code>String</code> values
221      * are equal.
222      *
223      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
224      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
225      * @return {@code true} if the encoded <code>String</code>s are equal;
226      *          {@code false} otherwise.
227      * @see #isDoubleMetaphoneEqual(String,String,boolean)
228      */
229     public boolean isDoubleMetaphoneEqual(String value1, String value2) {
230         return isDoubleMetaphoneEqual(value1, value2, false);
231     }
232 
233     /**
234      * Check if the Double Metaphone values of two <code>String</code> values
235      * are equal, optionally using the alternate value.
236      *
237      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
238      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
239      * @param alternate use the alternate value if {@code true}.
240      * @return {@code true} if the encoded <code>String</code>s are equal;
241      *          {@code false} otherwise.
242      */
243     public boolean isDoubleMetaphoneEqual(String value1, String value2, boolean alternate) {
244         return doubleMetaphone(value1, alternate).equals(doubleMetaphone(value2, alternate));
245     }
246 
247     /**
248      * Returns the maxCodeLen.
249      * @return int
250      */
251     public int getMaxCodeLen() {
252         return this.maxCodeLen;
253     }
254 
255     /**
256      * Sets the maxCodeLen.
257      * @param maxCodeLen The maxCodeLen to set
258      */
259     public void setMaxCodeLen(int maxCodeLen) {
260         this.maxCodeLen = maxCodeLen;
261     }
262 
263     //-- BEGIN HANDLERS --//
264 
265     /**
266      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
267      */
268     private int handleAEIOUY(DoubleMetaphoneResult result, int index) {
269         if (index == 0) {
270             result.append('A');
271         }
272         return index + 1;
273     }
274 
275     /**
276      * Handles 'C' cases.
277      */
278     private int handleC(String value, DoubleMetaphoneResult result, int index) {
279         if (conditionC0(value, index)) {  // very confusing, moved out
280             result.append('K');
281             index += 2;
282         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
283             result.append('S');
284             index += 2;
285         } else if (contains(value, index, 2, "CH")) {
286             index = handleCH(value, result, index);
287         } else if (contains(value, index, 2, "CZ") &&
288                    !contains(value, index - 2, 4, "WICZ")) {
289             //-- "Czerny" --//
290             result.append('S', 'X');
291             index += 2;
292         } else if (contains(value, index + 1, 3, "CIA")) {
293             //-- "focaccia" --//
294             result.append('X');
295             index += 3;
296         } else if (contains(value, index, 2, "CC") &&
297                    !(index == 1 && charAt(value, 0) == 'M')) {
298             //-- double "cc" but not "McClelland" --//
299             return handleCC(value, result, index);
300         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
301             result.append('K');
302             index += 2;
303         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
304             //-- Italian vs. English --//
305             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
306                 result.append('S', 'X');
307             } else {
308                 result.append('S');
309             }
310             index += 2;
311         } else {
312             result.append('K');
313             if (contains(value, index + 1, 2, " C", " Q", " G")) {
314                 //-- Mac Caffrey, Mac Gregor --//
315                 index += 3;
316             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
317                        !contains(value, index + 1, 2, "CE", "CI")) {
318                 index += 2;
319             } else {
320                 index++;
321             }
322         }
323 
324         return index;
325     }
326 
327     /**
328      * Handles 'CC' cases.
329      */
330     private int handleCC(String value, DoubleMetaphoneResult result, int index) {
331         if (contains(value, index + 2, 1, "I", "E", "H") &&
332             !contains(value, index + 2, 2, "HU")) {
333             //-- "bellocchio" but not "bacchus" --//
334             if ((index == 1 && charAt(value, index - 1) == 'A') ||
335                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
336                 //-- "accident", "accede", "succeed" --//
337                 result.append("KS");
338             } else {
339                 //-- "bacci", "bertucci", other Italian --//
340                 result.append('X');
341             }
342             index += 3;
343         } else {    // Pierce's rule
344             result.append('K');
345             index += 2;
346         }
347 
348         return index;
349     }
350 
351     /**
352      * Handles 'CH' cases.
353      */
354     private int handleCH(String value, DoubleMetaphoneResult result, int index) {
355         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
356             result.append('K', 'X');
357             return index + 2;
358         } else if (conditionCH0(value, index)) {
359             //-- Greek roots ("chemistry", "chorus", etc.) --//
360             result.append('K');
361             return index + 2;
362         } else if (conditionCH1(value, index)) {
363             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
364             result.append('K');
365             return index + 2;
366         } else {
367             if (index > 0) {
368                 if (contains(value, 0, 2, "MC")) {
369                     result.append('K');
370                 } else {
371                     result.append('X', 'K');
372                 }
373             } else {
374                 result.append('X');
375             }
376             return index + 2;
377         }
378     }
379 
380     /**
381      * Handles 'D' cases.
382      */
383     private int handleD(String value, DoubleMetaphoneResult result, int index) {
384         if (contains(value, index, 2, "DG")) {
385             //-- "Edge" --//
386             if (contains(value, index + 2, 1, "I", "E", "Y")) {
387                 result.append('J');
388                 index += 3;
389                 //-- "Edgar" --//
390             } else {
391                 result.append("TK");
392                 index += 2;
393             }
394         } else if (contains(value, index, 2, "DT", "DD")) {
395             result.append('T');
396             index += 2;
397         } else {
398             result.append('T');
399             index++;
400         }
401         return index;
402     }
403 
404     /**
405      * Handles 'G' cases.
406      */
407     private int handleG(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
408         if (charAt(value, index + 1) == 'H') {
409             index = handleGH(value, result, index);
410         } else if (charAt(value, index + 1) == 'N') {
411             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
412                 result.append("KN", "N");
413             } else if (!contains(value, index + 2, 2, "EY") &&
414                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
415                 result.append("N", "KN");
416             } else {
417                 result.append("KN");
418             }
419             index = index + 2;
420         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
421             result.append("KL", "L");
422             index += 2;
423         } else if (index == 0 &&
424                    (charAt(value, index + 1) == 'Y' ||
425                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
426             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
427             result.append('K', 'J');
428             index += 2;
429         } else if ((contains(value, index + 1, 2, "ER") ||
430                     charAt(value, index + 1) == 'Y') &&
431                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
432                    !contains(value, index - 1, 1, "E", "I") &&
433                    !contains(value, index - 1, 3, "RGY", "OGY")) {
434             //-- -ger-, -gy- --//
435             result.append('K', 'J');
436             index += 2;
437         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
438                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
439             //-- Italian "biaggi" --//
440             if (contains(value, 0 ,4, "VAN ", "VON ") ||
441                 contains(value, 0, 3, "SCH") ||
442                 contains(value, index + 1, 2, "ET")) {
443                 //-- obvious germanic --//
444                 result.append('K');
445             } else if (contains(value, index + 1, 3, "IER")) {
446                 result.append('J');
447             } else {
448                 result.append('J', 'K');
449             }
450             index += 2;
451         } else if (charAt(value, index + 1) == 'G') {
452             index += 2;
453             result.append('K');
454         } else {
455             index++;
456             result.append('K');
457         }
458         return index;
459     }
460 
461     /**
462      * Handles 'GH' cases.
463      */
464     private int handleGH(String value, DoubleMetaphoneResult result, int index) {
465         if (index > 0 && !isVowel(charAt(value, index - 1))) {
466             result.append('K');
467             index += 2;
468         } else if (index == 0) {
469             if (charAt(value, index + 2) == 'I') {
470                 result.append('J');
471             } else {
472                 result.append('K');
473             }
474             index += 2;
475         } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
476                    (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
477                    (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
478             //-- Parker's rule (with some further refinements) - "hugh"
479             index += 2;
480         } else {
481             if (index > 2 && charAt(value, index - 1) == 'U' &&
482                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
483                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
484                 result.append('F');
485             } else if (index > 0 && charAt(value, index - 1) != 'I') {
486                 result.append('K');
487             }
488             index += 2;
489         }
490         return index;
491     }
492 
493     /**
494      * Handles 'H' cases.
495      */
496     private int handleH(String value, DoubleMetaphoneResult result, int index) {
497         //-- only keep if first & before vowel or between 2 vowels --//
498         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
499             isVowel(charAt(value, index + 1))) {
500             result.append('H');
501             index += 2;
502             //-- also takes car of "HH" --//
503         } else {
504             index++;
505         }
506         return index;
507     }
508 
509     /**
510      * Handles 'J' cases.
511      */
512     private int handleJ(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
513         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
514                 //-- obvious Spanish, "Jose", "San Jacinto" --//
515                 if ((index == 0 && (charAt(value, index + 4) == ' ') ||
516                      value.length() == 4) || contains(value, 0, 4, "SAN ")) {
517                     result.append('H');
518                 } else {
519                     result.append('J', 'H');
520                 }
521                 index++;
522             } else {
523                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
524                     result.append('J', 'A');
525                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
526                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
527                     result.append('J', 'H');
528                 } else if (index == value.length() - 1) {
529                     result.append('J', ' ');
530                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
531                            !contains(value, index - 1, 1, "S", "K", "L")) {
532                     result.append('J');
533                 }
534 
535                 if (charAt(value, index + 1) == 'J') {
536                     index += 2;
537                 } else {
538                     index++;
539                 }
540             }
541         return index;
542     }
543 
544     /**
545      * Handles 'L' cases.
546      */
547     private int handleL(String value, DoubleMetaphoneResult result, int index) {
548         if (charAt(value, index + 1) == 'L') {
549             if (conditionL0(value, index)) {
550                 result.appendPrimary('L');
551             } else {
552                 result.append('L');
553             }
554             index += 2;
555         } else {
556             index++;
557             result.append('L');
558         }
559         return index;
560     }
561 
562     /**
563      * Handles 'P' cases.
564      */
565     private int handleP(String value, DoubleMetaphoneResult result, int index) {
566         if (charAt(value, index + 1) == 'H') {
567             result.append('F');
568             index += 2;
569         } else {
570             result.append('P');
571             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
572         }
573         return index;
574     }
575 
576     /**
577      * Handles 'R' cases.
578      */
579     private int handleR(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
580         if (index == value.length() - 1 && !slavoGermanic &&
581             contains(value, index - 2, 2, "IE") &&
582             !contains(value, index - 4, 2, "ME", "MA")) {
583             result.appendAlternate('R');
584         } else {
585             result.append('R');
586         }
587         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
588     }
589 
590     /**
591      * Handles 'S' cases.
592      */
593     private int handleS(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
594         if (contains(value, index - 1, 3, "ISL", "YSL")) {
595             //-- special cases "island", "isle", "carlisle", "carlysle" --//
596             index++;
597         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
598             //-- special case "sugar-" --//
599             result.append('X', 'S');
600             index++;
601         } else if (contains(value, index, 2, "SH")) {
602             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
603                 //-- germanic --//
604                 result.append('S');
605             } else {
606                 result.append('X');
607             }
608             index += 2;
609         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
610             //-- Italian and Armenian --//
611             if (slavoGermanic) {
612                 result.append('S');
613             } else {
614                 result.append('S', 'X');
615             }
616             index += 3;
617         } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) ||
618                    contains(value, index + 1, 1, "Z")) {
619             //-- german & anglicisations, e.g. "smith" match "schmidt" //
620             // "snider" match "schneider" --//
621             //-- also, -sz- in slavic language altho in hungarian it //
622             //   is pronounced "s" --//
623             result.append('S', 'X');
624             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
625         } else if (contains(value, index, 2, "SC")) {
626             index = handleSC(value, result, index);
627         } else {
628             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
629                 //-- french e.g. "resnais", "artois" --//
630                 result.appendAlternate('S');
631             } else {
632                 result.append('S');
633             }
634             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
635         }
636         return index;
637     }
638 
639     /**
640      * Handles 'SC' cases.
641      */
642     private int handleSC(String value, DoubleMetaphoneResult result, int index) {
643         if (charAt(value, index + 2) == 'H') {
644             //-- Schlesinger's rule --//
645             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
646                 //-- Dutch origin, e.g. "school", "schooner" --//
647                 if (contains(value, index + 3, 2, "ER", "EN")) {
648                     //-- "schermerhorn", "schenker" --//
649                     result.append("X", "SK");
650                 } else {
651                     result.append("SK");
652                 }
653             } else {
654                 if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
655                     result.append('X', 'S');
656                 } else {
657                     result.append('X');
658                 }
659             }
660         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
661             result.append('S');
662         } else {
663             result.append("SK");
664         }
665         return index + 3;
666     }
667 
668     /**
669      * Handles 'T' cases.
670      */
671     private int handleT(String value, DoubleMetaphoneResult result, int index) {
672         if (contains(value, index, 4, "TION")) {
673             result.append('X');
674             index += 3;
675         } else if (contains(value, index, 3, "TIA", "TCH")) {
676             result.append('X');
677             index += 3;
678         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
679             if (contains(value, index + 2, 2, "OM", "AM") ||
680                 //-- special case "thomas", "thames" or germanic --//
681                 contains(value, 0, 4, "VAN ", "VON ") ||
682                 contains(value, 0, 3, "SCH")) {
683                 result.append('T');
684             } else {
685                 result.append('0', 'T');
686             }
687             index += 2;
688         } else {
689             result.append('T');
690             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
691         }
692         return index;
693     }
694 
695     /**
696      * Handles 'W' cases.
697      */
698     private int handleW(String value, DoubleMetaphoneResult result, int index) {
699         if (contains(value, index, 2, "WR")) {
700             //-- can also be in middle of word --//
701             result.append('R');
702             index += 2;
703         } else {
704             if (index == 0 && (isVowel(charAt(value, index + 1)) ||
705                                contains(value, index, 2, "WH"))) {
706                 if (isVowel(charAt(value, index + 1))) {
707                     //-- Wasserman should match Vasserman --//
708                     result.append('A', 'F');
709                 } else {
710                     //-- need Uomo to match Womo --//
711                     result.append('A');
712                 }
713                 index++;
714             } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
715                        contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
716                        contains(value, 0, 3, "SCH")) {
717                 //-- Arnow should match Arnoff --//
718                 result.appendAlternate('F');
719                 index++;
720             } else if (contains(value, index, 4, "WICZ", "WITZ")) {
721                 //-- Polish e.g. "filipowicz" --//
722                 result.append("TS", "FX");
723                 index += 4;
724             } else {
725                 index++;
726             }
727         }
728         return index;
729     }
730 
731     /**
732      * Handles 'X' cases.
733      */
734     private int handleX(String value, DoubleMetaphoneResult result, int index) {
735         if (index == 0) {
736             result.append('S');
737             index++;
738         } else {
739             if (!((index == value.length() - 1) &&
740                   (contains(value, index - 3, 3, "IAU", "EAU") ||
741                    contains(value, index - 2, 2, "AU", "OU")))) {
742                 //-- French e.g. breaux --//
743                 result.append("KS");
744             }
745             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
746         }
747         return index;
748     }
749 
750     /**
751      * Handles 'Z' cases.
752      */
753     private int handleZ(String value, DoubleMetaphoneResult result, int index, boolean slavoGermanic) {
754         if (charAt(value, index + 1) == 'H') {
755             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
756             result.append('J');
757             index += 2;
758         } else {
759             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
760                 (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
761                 result.append("S", "TS");
762             } else {
763                 result.append('S');
764             }
765             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
766         }
767         return index;
768     }
769 
770     //-- BEGIN CONDITIONS --//
771 
772     /**
773      * Complex condition 0 for 'C'.
774      */
775     private boolean conditionC0(String value, int index) {
776         if (contains(value, index, 4, "CHIA")) {
777             return true;
778         } else if (index <= 1) {
779             return false;
780         } else if (isVowel(charAt(value, index - 2))) {
781             return false;
782         } else if (!contains(value, index - 1, 3, "ACH")) {
783             return false;
784         } else {
785             char c = charAt(value, index + 2);
786             return (c != 'I' && c != 'E') ||
787                     contains(value, index - 2, 6, "BACHER", "MACHER");
788         }
789     }
790 
791     /**
792      * Complex condition 0 for 'CH'.
793      */
794     private boolean conditionCH0(String value, int index) {
795         if (index != 0) {
796             return false;
797         } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
798                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
799             return false;
800         } else if (contains(value, 0, 5, "CHORE")) {
801             return false;
802         } else {
803             return true;
804         }
805     }
806 
807     /**
808      * Complex condition 1 for 'CH'.
809      */
810     private boolean conditionCH1(String value, int index) {
811         return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) ||
812                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
813                 contains(value, index + 2, 1, "T", "S") ||
814                 ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
815                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
816     }
817 
818     /**
819      * Complex condition 0 for 'L'.
820      */
821     private boolean conditionL0(String value, int index) {
822         if (index == value.length() - 3 &&
823             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
824             return true;
825         } else if ((contains(value, value.length() - 2, 2, "AS", "OS") ||
826                     contains(value, value.length() - 1, 1, "A", "O")) &&
827                    contains(value, index - 1, 4, "ALLE")) {
828             return true;
829         } else {
830             return false;
831         }
832     }
833 
834     /**
835      * Complex condition 0 for 'M'.
836      */
837     private boolean conditionM0(String value, int index) {
838         if (charAt(value, index + 1) == 'M') {
839             return true;
840         }
841         return contains(value, index - 1, 3, "UMB") &&
842                ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER"));
843     }
844 
845     //-- BEGIN HELPER FUNCTIONS --//
846 
847     /**
848      * Determines whether or not a value is of slavo-germanic orgin. A value is
849      * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
850      */
851     private boolean isSlavoGermanic(String value) {
852         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
853             value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
854     }
855 
856     /**
857      * Determines whether or not a character is a vowel or not
858      */
859     private boolean isVowel(char ch) {
860         return VOWELS.indexOf(ch) != -1;
861     }
862 
863     /**
864      * Determines whether or not the value starts with a silent letter.  It will
865      * return {@code true} if the value starts with any of 'GN', 'KN',
866      * 'PN', 'WR' or 'PS'.
867      */
868     private boolean isSilentStart(String value) {
869         boolean result = false;
870         for (String element : SILENT_START) {
871             if (value.startsWith(element)) {
872                 result = true;
873                 break;
874             }
875         }
876         return result;
877     }
878 
879     /**
880      * Cleans the input.
881      */
882     private String cleanInput(String input) {
883         if (input == null) {
884             return null;
885         }
886         input = input.trim();
887         if (input.length() == 0) {
888             return null;
889         }
890         return input.toUpperCase(java.util.Locale.ENGLISH);
891     }
892 
893     /**
894      * Gets the character at index <code>index</code> if available, otherwise
895      * it returns <code>Character.MIN_VALUE</code> so that there is some sort
896      * of a default.
897      */
898     protected char charAt(String value, int index) {
899         if (index < 0 || index >= value.length()) {
900             return Character.MIN_VALUE;
901         }
902         return value.charAt(index);
903     }
904 
905     /**
906      * Shortcut method with 1 criteria.
907      */
908     private static boolean contains(String value, int start, int length, String criteria) {
909         return contains(value, start, length, new String[] { criteria });
910     }
911 
912     /**
913      * Shortcut method with 2 criteria.
914      */
915     private static boolean contains(String value, int start, int length,
916                                     String criteria1, String criteria2) {
917         return contains(value, start, length, new String[] { criteria1, criteria2 });
918     }
919 
920     /**
921      * Shortcut method with 3 criteria.
922      */
923     private static boolean contains(String value, int start, int length,
924                                     String criteria1, String criteria2, String criteria3) {
925         return contains(value, start, length, new String[] { criteria1, criteria2, criteria3 });
926     }
927 
928     /**
929      * Shortcut method with 4 criteria.
930      */
931     private static boolean contains(String value, int start, int length,
932                                     String criteria1, String criteria2,
933                                     String criteria3, String criteria4) {
934         return contains(value, start, length,
935                         new String[] { criteria1, criteria2, criteria3, criteria4 });
936     }
937 
938     /**
939      * Shortcut method with 5 criteria.
940      */
941     private static boolean contains(String value, int start, int length,
942                                     String criteria1, String criteria2,
943                                     String criteria3, String criteria4,
944                                     String criteria5) {
945         return contains(value, start, length,
946                         new String[] { criteria1, criteria2, criteria3,
947                                        criteria4, criteria5 });
948     }
949 
950     /**
951      * Shortcut method with 6 criteria.
952      */
953     private static boolean contains(String value, int start, int length,
954                                     String criteria1, String criteria2,
955                                     String criteria3, String criteria4,
956                                     String criteria5, String criteria6) {
957         return contains(value, start, length,
958                         new String[] { criteria1, criteria2, criteria3,
959                                        criteria4, criteria5, criteria6 });
960     }
961 
962     /**
963      * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
964      * matching up to length <code>length</code>.
965      */
966     protected static boolean contains(String value, int start, int length,
967                                       String[] criteria) {
968         boolean result = false;
969         if (start >= 0 && start + length <= value.length()) {
970             String target = value.substring(start, start + length);
971 
972             for (String element : criteria) {
973                 if (target.equals(element)) {
974                     result = true;
975                     break;
976                 }
977             }
978         }
979         return result;
980     }
981 
982     //-- BEGIN INNER CLASSES --//
983 
984     /**
985      * Inner class for storing results, since there is the optional alternate encoding.
986      */
987     public class DoubleMetaphoneResult {
988 
989         private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
990         private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
991         private final int maxLength;
992 
993         public DoubleMetaphoneResult(int maxLength) {
994             this.maxLength = maxLength;
995         }
996 
997         public void append(char value) {
998             appendPrimary(value);
999             appendAlternate(value);
1000         }
1001 
1002         public void append(char primary, char alternate) {
1003             appendPrimary(primary);
1004             appendAlternate(alternate);
1005         }
1006 
1007         public void appendPrimary(char value) {
1008             if (this.primary.length() < this.maxLength) {
1009                 this.primary.append(value);
1010             }
1011         }
1012 
1013         public void appendAlternate(char value) {
1014             if (this.alternate.length() < this.maxLength) {
1015                 this.alternate.append(value);
1016             }
1017         }
1018 
1019         public void append(String value) {
1020             appendPrimary(value);
1021             appendAlternate(value);
1022         }
1023 
1024         public void append(String primary, String alternate) {
1025             appendPrimary(primary);
1026             appendAlternate(alternate);
1027         }
1028 
1029         public void appendPrimary(String value) {
1030             int addChars = this.maxLength - this.primary.length();
1031             if (value.length() <= addChars) {
1032                 this.primary.append(value);
1033             } else {
1034                 this.primary.append(value.substring(0, addChars));
1035             }
1036         }
1037 
1038         public void appendAlternate(String value) {
1039             int addChars = this.maxLength - this.alternate.length();
1040             if (value.length() <= addChars) {
1041                 this.alternate.append(value);
1042             } else {
1043                 this.alternate.append(value.substring(0, addChars));
1044             }
1045         }
1046 
1047         public String getPrimary() {
1048             return this.primary.toString();
1049         }
1050 
1051         public String getAlternate() {
1052             return this.alternate.toString();
1053         }
1054 
1055         public boolean isComplete() {
1056             return this.primary.length() >= this.maxLength &&
1057                    this.alternate.length() >= this.maxLength;
1058         }
1059     }
1060 }