View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoder;
22  import org.apache.commons.codec.binary.StringUtils;
23  
24  /**
25   * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
26   * Philips</CITE>.
27   * <p>
28   * This class is conditionally thread-safe. The instance field for the maximum code length is mutable
29   * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
30   * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
31   * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
32   * </p>
33   *
34   * @see <a href="https://drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2">Dr. Dobbs Original Article</a>
35   * @see <a href="https://en.wikipedia.org/wiki/Metaphone">Wikipedia Metaphone</a>
36   */
37  public class DoubleMetaphone implements StringEncoder {
38  
39      /**
40       * Inner class for storing results, since there is the optional alternate encoding.
41       */
42      public class DoubleMetaphoneResult {
43  
44          private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
45          private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
46          private final int maxLength;
47  
48          public DoubleMetaphoneResult(final int maxLength) {
49              this.maxLength = maxLength;
50          }
51  
52          public void append(final char value) {
53              appendPrimary(value);
54              appendAlternate(value);
55          }
56  
57          public void append(final char primary, final char alternate) {
58              appendPrimary(primary);
59              appendAlternate(alternate);
60          }
61  
62          public void append(final String value) {
63              appendPrimary(value);
64              appendAlternate(value);
65          }
66  
67          public void append(final String primary, final String alternate) {
68              appendPrimary(primary);
69              appendAlternate(alternate);
70          }
71  
72          public void appendAlternate(final char value) {
73              if (this.alternate.length() < this.maxLength) {
74                  this.alternate.append(value);
75              }
76          }
77  
78          public void appendAlternate(final String value) {
79              final int addChars = this.maxLength - this.alternate.length();
80              if (value.length() <= addChars) {
81                  this.alternate.append(value);
82              } else {
83                  this.alternate.append(value, 0, addChars);
84              }
85          }
86  
87          public void appendPrimary(final char value) {
88              if (this.primary.length() < this.maxLength) {
89                  this.primary.append(value);
90              }
91          }
92  
93          public void appendPrimary(final String value) {
94              final int addChars = this.maxLength - this.primary.length();
95              if (value.length() <= addChars) {
96                  this.primary.append(value);
97              } else {
98                  this.primary.append(value, 0, addChars);
99              }
100         }
101 
102         public String getAlternate() {
103             return this.alternate.toString();
104         }
105 
106         public String getPrimary() {
107             return this.primary.toString();
108         }
109 
110         public boolean isComplete() {
111             return this.primary.length() >= this.maxLength &&
112                    this.alternate.length() >= this.maxLength;
113         }
114     }
115 
116     /**
117      * "Vowels" to test for
118      */
119     private static final String VOWELS = "AEIOUY";
120     /**
121      * Prefixes when present which are not pronounced
122      */
123     private static final String[] SILENT_START =
124         { "GN", "KN", "PN", "WR", "PS" };
125     private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
126         { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
127     private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
128         { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
129 
130     private static final String[] L_T_K_S_N_M_B_Z =
131         { "L", "T", "K", "S", "N", "M", "B", "Z" };
132 
133     /*
134      * Determines whether {@code value} contains any of the criteria starting at index {@code start} and
135      * matching up to length {@code length}.
136      */
137     protected static boolean contains(final String value, final int start, final int length,
138                                       final String... criteria) {
139         boolean result = false;
140         if (start >= 0 && start + length <= value.length()) {
141             final String target = value.substring(start, start + length);
142 
143             for (final String element : criteria) {
144                 if (target.equals(element)) {
145                     result = true;
146                     break;
147                 }
148             }
149         }
150         return result;
151     }
152 
153     /**
154      * Maximum length of an encoding, default is 4
155      */
156     private int maxCodeLen = 4;
157 
158     /*
159      * Gets the character at index {@code index} if available, otherwise
160      * it returns {@code Character.MIN_VALUE} so that there is some sort
161      * of default.
162      */
163     protected char charAt(final String value, final int index) {
164         if (index < 0 || index >= value.length()) {
165             return Character.MIN_VALUE;
166         }
167         return value.charAt(index);
168     }
169 
170     /**
171      * Cleans the input.
172      */
173     private String cleanInput(String input) {
174         if (input == null) {
175             return null;
176         }
177         input = input.trim();
178         if (input.isEmpty()) {
179             return null;
180         }
181         return input.toUpperCase(java.util.Locale.ENGLISH);
182     }
183 
184     /**
185      * Complex condition 0 for 'C'.
186      */
187     private boolean conditionC0(final String value, final int index) {
188         if (contains(value, index, 4, "CHIA")) {
189             return true;
190         }
191         if (index <= 1) {
192             return false;
193         }
194         if (isVowel(charAt(value, index - 2))) {
195             return false;
196         }
197         if (!contains(value, index - 1, 3, "ACH")) {
198             return false;
199         }
200         final char c = charAt(value, index + 2);
201         return c != 'I' && c != 'E' ||
202                 contains(value, index - 2, 6, "BACHER", "MACHER");
203     }
204 
205     /**
206      * Complex condition 0 for 'CH'.
207      */
208     private boolean conditionCH0(final String value, final int index) {
209         if (index != 0) {
210             return false;
211         }
212         if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
213                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
214             return false;
215         }
216         return !contains(value, 0, 5, "CHORE");
217     }
218 
219     /**
220      * Complex condition 1 for 'CH'.
221      */
222     private boolean conditionCH1(final String value, final int index) {
223         return contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH") ||
224                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
225                 contains(value, index + 2, 1, "T", "S") ||
226                 (contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
227                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1);
228     }
229 
230     /**
231      * Complex condition 0 for 'L'.
232      */
233     private boolean conditionL0(final String value, final int index) {
234         if (index == value.length() - 3 &&
235             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
236             return true;
237         }
238         return (contains(value, value.length() - 2, 2, "AS", "OS") ||
239                 contains(value, value.length() - 1, 1, "A", "O")) &&
240                 contains(value, index - 1, 4, "ALLE");
241     }
242 
243     //-- BEGIN HANDLERS --//
244 
245     /**
246      * Complex condition 0 for 'M'.
247      */
248     private boolean conditionM0(final String value, final int index) {
249         if (charAt(value, index + 1) == 'M') {
250             return true;
251         }
252         return contains(value, index - 1, 3, "UMB") &&
253                (index + 1 == value.length() - 1 || contains(value, index + 2, 2, "ER"));
254     }
255 
256     /**
257      * Encode a value with Double Metaphone.
258      *
259      * @param value String to encode
260      * @return an encoded string
261      */
262     public String doubleMetaphone(final String value) {
263         return doubleMetaphone(value, false);
264     }
265 
266     /**
267      * Encode a value with Double Metaphone, optionally using the alternate encoding.
268      *
269      * @param value String to encode
270      * @param alternate use alternate encode
271      * @return an encoded string
272      */
273     public String doubleMetaphone(String value, final boolean alternate) {
274         value = cleanInput(value);
275         if (value == null) {
276             return null;
277         }
278 
279         final boolean slavoGermanic = isSlavoGermanic(value);
280         int index = isSilentStart(value) ? 1 : 0;
281 
282         final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
283 
284         while (!result.isComplete() && index <= value.length() - 1) {
285             switch (value.charAt(index)) {
286             case 'A':
287             case 'E':
288             case 'I':
289             case 'O':
290             case 'U':
291             case 'Y':
292                 index = handleAEIOUY(result, index);
293                 break;
294             case 'B':
295                 result.append('P');
296                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
297                 break;
298             case '\u00C7':
299                 // A C with a Cedilla
300                 result.append('S');
301                 index++;
302                 break;
303             case 'C':
304                 index = handleC(value, result, index);
305                 break;
306             case 'D':
307                 index = handleD(value, result, index);
308                 break;
309             case 'F':
310                 result.append('F');
311                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
312                 break;
313             case 'G':
314                 index = handleG(value, result, index, slavoGermanic);
315                 break;
316             case 'H':
317                 index = handleH(value, result, index);
318                 break;
319             case 'J':
320                 index = handleJ(value, result, index, slavoGermanic);
321                 break;
322             case 'K':
323                 result.append('K');
324                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
325                 break;
326             case 'L':
327                 index = handleL(value, result, index);
328                 break;
329             case 'M':
330                 result.append('M');
331                 index = conditionM0(value, index) ? index + 2 : index + 1;
332                 break;
333             case 'N':
334                 result.append('N');
335                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
336                 break;
337             case '\u00D1':
338                 // N with a tilde (spanish ene)
339                 result.append('N');
340                 index++;
341                 break;
342             case 'P':
343                 index = handleP(value, result, index);
344                 break;
345             case 'Q':
346                 result.append('K');
347                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
348                 break;
349             case 'R':
350                 index = handleR(value, result, index, slavoGermanic);
351                 break;
352             case 'S':
353                 index = handleS(value, result, index, slavoGermanic);
354                 break;
355             case 'T':
356                 index = handleT(value, result, index);
357                 break;
358             case 'V':
359                 result.append('F');
360                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
361                 break;
362             case 'W':
363                 index = handleW(value, result, index);
364                 break;
365             case 'X':
366                 index = handleX(value, result, index);
367                 break;
368             case 'Z':
369                 index = handleZ(value, result, index, slavoGermanic);
370                 break;
371             default:
372                 index++;
373                 break;
374             }
375         }
376 
377         return alternate ? result.getAlternate() : result.getPrimary();
378     }
379 
380     /**
381      * Encode the value using DoubleMetaphone.  It will only work if
382      * {@code obj} is a {@code String} (like {@code Metaphone}).
383      *
384      * @param obj Object to encode (should be of type String)
385      * @return An encoded Object (will be of type String)
386      * @throws EncoderException encode parameter is not of type String
387      */
388     @Override
389     public Object encode(final Object obj) throws EncoderException {
390         if (!(obj instanceof String)) {
391             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
392         }
393         return doubleMetaphone((String) obj);
394     }
395 
396     /**
397      * Encode the value using DoubleMetaphone.
398      *
399      * @param value String to encode
400      * @return An encoded String
401      */
402     @Override
403     public String encode(final String value) {
404         return doubleMetaphone(value);
405     }
406 
407     /**
408      * Returns the maxCodeLen.
409      * @return int
410      */
411     public int getMaxCodeLen() {
412         return this.maxCodeLen;
413     }
414 
415     /**
416      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
417      */
418     private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
419         if (index == 0) {
420             result.append('A');
421         }
422         return index + 1;
423     }
424 
425     /**
426      * Handles 'C' cases.
427      */
428     private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
429         if (conditionC0(value, index)) {  // very confusing, moved out
430             result.append('K');
431             index += 2;
432         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
433             result.append('S');
434             index += 2;
435         } else if (contains(value, index, 2, "CH")) {
436             index = handleCH(value, result, index);
437         } else if (contains(value, index, 2, "CZ") &&
438                    !contains(value, index - 2, 4, "WICZ")) {
439             //-- "Czerny" --//
440             result.append('S', 'X');
441             index += 2;
442         } else if (contains(value, index + 1, 3, "CIA")) {
443             //-- "focaccia" --//
444             result.append('X');
445             index += 3;
446         } else if (contains(value, index, 2, "CC") &&
447                    !(index == 1 && charAt(value, 0) == 'M')) {
448             //-- double "cc" but not "McClelland" --//
449             return handleCC(value, result, index);
450         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
451             result.append('K');
452             index += 2;
453         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
454             //-- Italian vs. English --//
455             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
456                 result.append('S', 'X');
457             } else {
458                 result.append('S');
459             }
460             index += 2;
461         } else {
462             result.append('K');
463             if (contains(value, index + 1, 2, " C", " Q", " G")) {
464                 //-- Mac Caffrey, Mac Gregor --//
465                 index += 3;
466             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
467                        !contains(value, index + 1, 2, "CE", "CI")) {
468                 index += 2;
469             } else {
470                 index++;
471             }
472         }
473 
474         return index;
475     }
476 
477     /**
478      * Handles 'CC' cases.
479      */
480     private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
481         if (contains(value, index + 2, 1, "I", "E", "H") &&
482             !contains(value, index + 2, 2, "HU")) {
483             //-- "bellocchio" but not "bacchus" --//
484             if (index == 1 && charAt(value, index - 1) == 'A' ||
485                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
486                 //-- "accident", "accede", "succeed" --//
487                 result.append("KS");
488             } else {
489                 //-- "bacci", "bertucci", other Italian --//
490                 result.append('X');
491             }
492             index += 3;
493         } else {    // Pierce's rule
494             result.append('K');
495             index += 2;
496         }
497 
498         return index;
499     }
500 
501     /**
502      * Handles 'CH' cases.
503      */
504     private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
505         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
506             result.append('K', 'X');
507             return index + 2;
508         }
509         if (conditionCH0(value, index)) {
510             //-- Greek roots ("chemistry", "chorus", etc.) --//
511             result.append('K');
512             return index + 2;
513         }
514         if (conditionCH1(value, index)) {
515             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
516             result.append('K');
517             return index + 2;
518         }
519         if (index > 0) {
520             if (contains(value, 0, 2, "MC")) {
521                 result.append('K');
522             } else {
523                 result.append('X', 'K');
524             }
525         } else {
526             result.append('X');
527         }
528         return index + 2;
529     }
530 
531     /**
532      * Handles 'D' cases.
533      */
534     private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
535         if (contains(value, index, 2, "DG")) {
536             //-- "Edge" --//
537             if (contains(value, index + 2, 1, "I", "E", "Y")) {
538                 result.append('J');
539                 index += 3;
540                 //-- "Edgar" --//
541             } else {
542                 result.append("TK");
543                 index += 2;
544             }
545         } else if (contains(value, index, 2, "DT", "DD")) {
546             result.append('T');
547             index += 2;
548         } else {
549             result.append('T');
550             index++;
551         }
552         return index;
553     }
554 
555     /**
556      * Handles 'G' cases.
557      */
558     private int handleG(final String value, final DoubleMetaphoneResult result, int index,
559                         final boolean slavoGermanic) {
560         if (charAt(value, index + 1) == 'H') {
561             index = handleGH(value, result, index);
562         } else if (charAt(value, index + 1) == 'N') {
563             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
564                 result.append("KN", "N");
565             } else if (!contains(value, index + 2, 2, "EY") &&
566                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
567                 result.append("N", "KN");
568             } else {
569                 result.append("KN");
570             }
571             index += 2;
572         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
573             result.append("KL", "L");
574             index += 2;
575         } else if (index == 0 &&
576                    (charAt(value, index + 1) == 'Y' ||
577                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
578             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
579             result.append('K', 'J');
580             index += 2;
581         } else if ((contains(value, index + 1, 2, "ER") ||
582                     charAt(value, index + 1) == 'Y') &&
583                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
584                    !contains(value, index - 1, 1, "E", "I") &&
585                    !contains(value, index - 1, 3, "RGY", "OGY")) {
586             //-- -ger-, -gy- --//
587             result.append('K', 'J');
588             index += 2;
589         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
590                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
591             //-- Italian "biaggi" --//
592             if (contains(value, 0, 4, "VAN ", "VON ") ||
593                 contains(value, 0, 3, "SCH") ||
594                 contains(value, index + 1, 2, "ET")) {
595                 //-- obvious germanic --//
596                 result.append('K');
597             } else if (contains(value, index + 1, 3, "IER")) {
598                 result.append('J');
599             } else {
600                 result.append('J', 'K');
601             }
602             index += 2;
603         } else {
604             if (charAt(value, index + 1) == 'G') {
605                 index += 2;
606             } else {
607                 index++;
608             }
609             result.append('K');
610         }
611         return index;
612     }
613 
614     /**
615      * Handles 'GH' cases.
616      */
617     private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
618         if (index > 0 && !isVowel(charAt(value, index - 1))) {
619             result.append('K');
620             index += 2;
621         } else if (index == 0) {
622             if (charAt(value, index + 2) == 'I') {
623                 result.append('J');
624             } else {
625                 result.append('K');
626             }
627             index += 2;
628         } else if (index > 1 && contains(value, index - 2, 1, "B", "H", "D") ||
629                    index > 2 && contains(value, index - 3, 1, "B", "H", "D") ||
630                    index > 3 && contains(value, index - 4, 1, "B", "H")) {
631             //-- Parker's rule (with some further refinements) - "hugh"
632             index += 2;
633         } else {
634             if (index > 2 && charAt(value, index - 1) == 'U' &&
635                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
636                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
637                 result.append('F');
638             } else if (index > 0 && charAt(value, index - 1) != 'I') {
639                 result.append('K');
640             }
641             index += 2;
642         }
643         return index;
644     }
645 
646     /**
647      * Handles 'H' cases.
648      */
649     private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
650         //-- only keep if first & before vowel or between 2 vowels --//
651         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
652             isVowel(charAt(value, index + 1))) {
653             result.append('H');
654             index += 2;
655             //-- also takes car of "HH" --//
656         } else {
657             index++;
658         }
659         return index;
660     }
661 
662     /**
663      * Handles 'J' cases.
664      */
665     private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
666                         final boolean slavoGermanic) {
667         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
668                 //-- obvious Spanish, "Jose", "San Jacinto" --//
669                 if (index == 0 && charAt(value, index + 4) == ' ' ||
670                      value.length() == 4 || contains(value, 0, 4, "SAN ")) {
671                     result.append('H');
672                 } else {
673                     result.append('J', 'H');
674                 }
675                 index++;
676             } else {
677                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
678                     result.append('J', 'A');
679                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
680                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
681                     result.append('J', 'H');
682                 } else if (index == value.length() - 1) {
683                     result.append('J', ' ');
684                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
685                            !contains(value, index - 1, 1, "S", "K", "L")) {
686                     result.append('J');
687                 }
688 
689                 if (charAt(value, index + 1) == 'J') {
690                     index += 2;
691                 } else {
692                     index++;
693                 }
694             }
695         return index;
696     }
697 
698     /**
699      * Handles 'L' cases.
700      */
701     private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
702         if (charAt(value, index + 1) == 'L') {
703             if (conditionL0(value, index)) {
704                 result.appendPrimary('L');
705             } else {
706                 result.append('L');
707             }
708             index += 2;
709         } else {
710             index++;
711             result.append('L');
712         }
713         return index;
714     }
715 
716     /**
717      * Handles 'P' cases.
718      */
719     private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
720         if (charAt(value, index + 1) == 'H') {
721             result.append('F');
722             index += 2;
723         } else {
724             result.append('P');
725             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
726         }
727         return index;
728     }
729 
730     /**
731      * Handles 'R' cases.
732      */
733     private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
734                         final boolean slavoGermanic) {
735         if (index == value.length() - 1 && !slavoGermanic &&
736             contains(value, index - 2, 2, "IE") &&
737             !contains(value, index - 4, 2, "ME", "MA")) {
738             result.appendAlternate('R');
739         } else {
740             result.append('R');
741         }
742         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
743     }
744 
745     //-- BEGIN CONDITIONS --//
746 
747     /**
748      * Handles 'S' cases.
749      */
750     private int handleS(final String value, final DoubleMetaphoneResult result, int index,
751                         final boolean slavoGermanic) {
752         if (contains(value, index - 1, 3, "ISL", "YSL")) {
753             //-- special cases "island", "isle", "carlisle", "carlysle" --//
754             index++;
755         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
756             //-- special case "sugar-" --//
757             result.append('X', 'S');
758             index++;
759         } else if (contains(value, index, 2, "SH")) {
760             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
761                 //-- germanic --//
762                 result.append('S');
763             } else {
764                 result.append('X');
765             }
766             index += 2;
767         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
768             //-- Italian and Armenian --//
769             if (slavoGermanic) {
770                 result.append('S');
771             } else {
772                 result.append('S', 'X');
773             }
774             index += 3;
775         } else if (index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W") ||
776                    contains(value, index + 1, 1, "Z")) {
777             //-- german & anglicisations, e.g. "smith" match "schmidt" //
778             // "snider" match "schneider" --//
779             //-- also, -sz- in slavic language although in hungarian it //
780             //   is pronounced "s" --//
781             result.append('S', 'X');
782             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
783         } else if (contains(value, index, 2, "SC")) {
784             index = handleSC(value, result, index);
785         } else {
786             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
787                 //-- french e.g. "resnais", "artois" --//
788                 result.appendAlternate('S');
789             } else {
790                 result.append('S');
791             }
792             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
793         }
794         return index;
795     }
796 
797     /**
798      * Handles 'SC' cases.
799      */
800     private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
801         if (charAt(value, index + 2) == 'H') {
802             //-- Schlesinger's rule --//
803             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
804                 //-- Dutch origin, e.g. "school", "schooner" --//
805                 if (contains(value, index + 3, 2, "ER", "EN")) {
806                     //-- "schermerhorn", "schenker" --//
807                     result.append("X", "SK");
808                 } else {
809                     result.append("SK");
810                 }
811             } else if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
812                 result.append('X', 'S');
813             } else {
814                 result.append('X');
815             }
816         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
817             result.append('S');
818         } else {
819             result.append("SK");
820         }
821         return index + 3;
822     }
823 
824     /**
825      * Handles 'T' cases.
826      */
827     private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
828         if (contains(value, index, 4, "TION") || contains(value, index, 3, "TIA", "TCH")) {
829             result.append('X');
830             index += 3;
831         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
832             if (contains(value, index + 2, 2, "OM", "AM") ||
833                 //-- special case "thomas", "thames" or germanic --//
834                 contains(value, 0, 4, "VAN ", "VON ") ||
835                 contains(value, 0, 3, "SCH")) {
836                 result.append('T');
837             } else {
838                 result.append('0', 'T');
839             }
840             index += 2;
841         } else {
842             result.append('T');
843             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
844         }
845         return index;
846     }
847 
848     /**
849      * Handles 'W' cases.
850      */
851     private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
852         if (contains(value, index, 2, "WR")) {
853             //-- can also be in middle of word --//
854             result.append('R');
855             index += 2;
856         } else if (index == 0 && (isVowel(charAt(value, index + 1)) ||
857                            contains(value, index, 2, "WH"))) {
858             if (isVowel(charAt(value, index + 1))) {
859                 //-- Wasserman should match Vasserman --//
860                 result.append('A', 'F');
861             } else {
862                 //-- need Uomo to match Womo --//
863                 result.append('A');
864             }
865             index++;
866         } else if (index == value.length() - 1 && isVowel(charAt(value, index - 1)) ||
867                    contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
868                    contains(value, 0, 3, "SCH")) {
869             //-- Arnow should match Arnoff --//
870             result.appendAlternate('F');
871             index++;
872         } else if (contains(value, index, 4, "WICZ", "WITZ")) {
873             //-- Polish e.g. "filipowicz" --//
874             result.append("TS", "FX");
875             index += 4;
876         } else {
877             index++;
878         }
879         return index;
880     }
881 
882     /**
883      * Handles 'X' cases.
884      */
885     private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
886         if (index == 0) {
887             result.append('S');
888             index++;
889         } else {
890             if (!(index == value.length() - 1 &&
891                   (contains(value, index - 3, 3, "IAU", "EAU") ||
892                    contains(value, index - 2, 2, "AU", "OU")))) {
893                 //-- French e.g. breaux --//
894                 result.append("KS");
895             }
896             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
897         }
898         return index;
899     }
900 
901     //-- BEGIN HELPER FUNCTIONS --//
902 
903     /**
904      * Handles 'Z' cases.
905      */
906     private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
907                         final boolean slavoGermanic) {
908         if (charAt(value, index + 1) == 'H') {
909             //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
910             result.append('J');
911             index += 2;
912         } else {
913             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
914                 slavoGermanic && index > 0 && charAt(value, index - 1) != 'T') {
915                 result.append("S", "TS");
916             } else {
917                 result.append('S');
918             }
919             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
920         }
921         return index;
922     }
923 
924     /**
925      * Check if the Double Metaphone values of two {@code String} values
926      * are equal.
927      *
928      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
929      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
930      * @return {@code true} if the encoded {@code String}s are equal;
931      *          {@code false} otherwise.
932      * @see #isDoubleMetaphoneEqual(String,String,boolean)
933      */
934     public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
935         return isDoubleMetaphoneEqual(value1, value2, false);
936     }
937 
938     /**
939      * Check if the Double Metaphone values of two {@code String} values
940      * are equal, optionally using the alternate value.
941      *
942      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
943      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
944      * @param alternate use the alternate value if {@code true}.
945      * @return {@code true} if the encoded {@code String}s are equal;
946      *          {@code false} otherwise.
947      */
948     public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
949         return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
950     }
951 
952     /**
953      * Determines whether or not the value starts with a silent letter.  It will
954      * return {@code true} if the value starts with any of 'GN', 'KN',
955      * 'PN', 'WR' or 'PS'.
956      */
957     private boolean isSilentStart(final String value) {
958         boolean result = false;
959         for (final String element : SILENT_START) {
960             if (value.startsWith(element)) {
961                 result = true;
962                 break;
963             }
964         }
965         return result;
966     }
967 
968     /**
969      * Determines whether or not a value is of slavo-germanic origin. A value is
970      * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
971      */
972     private boolean isSlavoGermanic(final String value) {
973         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
974                 value.contains("CZ") || value.contains("WITZ");
975     }
976 
977     /**
978      * Determines whether or not a character is a vowel or not
979      */
980     private boolean isVowel(final char ch) {
981         return VOWELS.indexOf(ch) != -1;
982     }
983 
984     //-- BEGIN INNER CLASSES --//
985 
986     /**
987      * Sets the maxCodeLen.
988      * @param maxCodeLen The maxCodeLen to set
989      */
990     public void setMaxCodeLen(final int maxCodeLen) {
991         this.maxCodeLen = maxCodeLen;
992     }
993 }