View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import java.util.Locale;
21  
22  import org.apache.commons.codec.EncoderException;
23  import org.apache.commons.codec.StringEncoder;
24  import org.apache.commons.codec.binary.StringUtils;
25  
26  /**
27   * Encodes a string into a Double Metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
28   * Philips</CITE>.
29   * <p>
30   * This class is conditionally thread-safe. The instance field for the maximum code length is mutable
31   * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
32   * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
33   * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
34   * </p>
35   *
36   * @see <a href="https://drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2">Dr. Dobbs Original Article</a>
37   * @see <a href="https://en.wikipedia.org/wiki/Metaphone">Wikipedia Metaphone</a>
38   * @see <a href="http://aspell.net/metaphone/dmetaph.cpp">Double Metaphone C++ source by Lawrence Philips</a>
39   */
40  public class DoubleMetaphone implements StringEncoder {
41  
42      /**
43       * Stores results, since there is the optional alternate encoding.
44       */
45      public class DoubleMetaphoneResult {
46  
47          private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
48          private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
49          private final int maxLength;
50  
51          /**
52           * Constructs a new instance.
53           *
54           * @param maxLength The maximum length.
55           */
56          public DoubleMetaphoneResult(final int maxLength) {
57              this.maxLength = maxLength;
58          }
59  
60          /**
61           * Appends the given value as primary and alternative.
62           *
63           * @param value The value to append.
64           */
65          public void append(final char value) {
66              appendPrimary(value);
67              appendAlternate(value);
68          }
69  
70          /**
71           * Appends the given primary and alternative values.
72           *
73           * @param primary   The primary value.
74           * @param alternate The alternate value.
75           */
76          public void append(final char primary, final char alternate) {
77              appendPrimary(primary);
78              appendAlternate(alternate);
79          }
80  
81          /**
82           * Appends the given value as primary and alternative.
83           *
84           * @param value The value to append.
85           */
86          public void append(final String value) {
87              appendPrimary(value);
88              appendAlternate(value);
89          }
90  
91          /**
92           * Appends the given primary and alternative values.
93           *
94           * @param primary   The primary value.
95           * @param alternate The alternate value.
96           */
97          public void append(final String primary, final String alternate) {
98              appendPrimary(primary);
99              appendAlternate(alternate);
100         }
101 
102         /**
103          * Appends the given value as alternative.
104          *
105          * @param value The value to append.
106          */
107         public void appendAlternate(final char value) {
108             if (alternate.length() < maxLength) {
109                 alternate.append(value);
110             }
111         }
112 
113         /**
114          * Appends the given value as alternative.
115          *
116          * @param value The value to append.
117          */
118         public void appendAlternate(final String value) {
119             final int addChars = maxLength - alternate.length();
120             if (value.length() <= addChars) {
121                 alternate.append(value);
122             } else {
123                 alternate.append(value, 0, addChars);
124             }
125         }
126 
127         /**
128          * Appends the given value as primary.
129          *
130          * @param value The value to append.
131          */
132         public void appendPrimary(final char value) {
133             if (primary.length() < maxLength) {
134                 primary.append(value);
135             }
136         }
137 
138         /**
139          * Appends the given value as primary.
140          *
141          * @param value The value to append.
142          */
143         public void appendPrimary(final String value) {
144             final int addChars = maxLength - primary.length();
145             if (value.length() <= addChars) {
146                 primary.append(value);
147             } else {
148                 primary.append(value, 0, addChars);
149             }
150         }
151 
152         /**
153          * Gets the alternate string.
154          *
155          * @return the alternate string.
156          */
157         public String getAlternate() {
158             return alternate.toString();
159         }
160 
161         /**
162          * Gets the primary string.
163          *
164          * @return the primary string.
165          */
166         public String getPrimary() {
167             return primary.toString();
168         }
169 
170         /**
171          * Tests whether this result is complete.
172          *
173          * @return whether this result is complete.
174          */
175         public boolean isComplete() {
176             return primary.length() >= maxLength && alternate.length() >= maxLength;
177         }
178     }
179 
180     /**
181      * "Vowels" to test.
182      */
183     private static final String VOWELS = "AEIOUY";
184 
185     /**
186      * Prefixes when present which are not pronounced.
187      */
188     private static final String[] SILENT_START = { "GN", "KN", "PN", "WR", "PS" };
189 
190     private static final String[] L_R_N_M_B_H_F_V_W_SPACE = { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
191     private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
192     private static final String[] L_T_K_S_N_M_B_Z = { "L", "T", "K", "S", "N", "M", "B", "Z" };
193 
194     /**
195      * Tests whether {@code value} contains any of the {@code criteria} starting at index {@code start} and matching up to length {@code length}.
196      *
197      * @param value    The value to test.
198      * @param start    Where in {@code value} to start testing.
199      * @param length   How many to test.
200      * @param criteria The search criteria.
201      * @return Whether there was a match.
202      */
203     protected static boolean contains(final String value, final int start, final int length, final String... criteria) {
204         boolean result = false;
205         if (start >= 0 && start + length <= value.length()) {
206             final String target = value.substring(start, start + length);
207             for (final String element : criteria) {
208                 if (target.equals(element)) {
209                     result = true;
210                     break;
211                 }
212             }
213         }
214         return result;
215     }
216 
217     /**
218      * Maximum length of an encoding, default is 4.
219      */
220     private int maxCodeLen = 4;
221 
222     /**
223      * Constructs a new instance.
224      */
225     public DoubleMetaphone() {
226         // empty
227     }
228 
229     /**
230      * Gets the character at index {@code index} if available, or {@link Character#MIN_VALUE} if out of bounds.
231      *
232      * @param value The String to query.
233      * @param index A string index.
234      * @return The character at the index or {@link Character#MIN_VALUE} if out of bounds.
235      */
236     protected char charAt(final String value, final int index) {
237         if (index < 0 || index >= value.length()) {
238             return Character.MIN_VALUE;
239         }
240         return value.charAt(index);
241     }
242 
243     /**
244      * Cleans the input.
245      */
246     private String cleanInput(String input) {
247         if (input == null) {
248             return null;
249         }
250         input = input.trim();
251         if (input.isEmpty()) {
252             return null;
253         }
254         return input.toUpperCase(Locale.ENGLISH);
255     }
256 
257     /**
258      * Complex condition 0 for 'C'.
259      */
260     private boolean conditionC0(final String value, final int index) {
261         if (contains(value, index, 4, "CHIA")) {
262             return true;
263         }
264         if (index <= 1) {
265             return false;
266         }
267         if (isVowel(charAt(value, index - 2))) {
268             return false;
269         }
270         if (!contains(value, index - 1, 3, "ACH")) {
271             return false;
272         }
273         final char c = charAt(value, index + 2);
274         return c != 'I' && c != 'E' ||
275                 contains(value, index - 2, 6, "BACHER", "MACHER");
276     }
277 
278     /**
279      * Complex condition 0 for 'CH'.
280      */
281     private boolean conditionCH0(final String value, final int index) {
282         if (index != 0) {
283             return false;
284         }
285         if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
286                    !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
287             return false;
288         }
289         return !contains(value, 0, 5, "CHORE");
290     }
291 
292     /**
293      * Complex condition 1 for 'CH'.
294      */
295     private boolean conditionCH1(final String value, final int index) {
296         return contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH") ||
297                 contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
298                 contains(value, index + 2, 1, "T", "S") ||
299                 (contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
300                  (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1);
301     }
302 
303     /**
304      * Complex condition 0 for 'L'.
305      */
306     private boolean conditionL0(final String value, final int index) {
307         if (index == value.length() - 3 &&
308             contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
309             return true;
310         }
311         return (contains(value, value.length() - 2, 2, "AS", "OS") ||
312                 contains(value, value.length() - 1, 1, "A", "O")) &&
313                 contains(value, index - 1, 4, "ALLE");
314     }
315 
316     /**
317      * Complex condition 0 for 'M'.
318      */
319     private boolean conditionM0(final String value, final int index) {
320         if (charAt(value, index + 1) == 'M') {
321             return true;
322         }
323         return contains(value, index - 1, 3, "UMB") &&
324                (index + 1 == value.length() - 1 || contains(value, index + 2, 2, "ER"));
325     }
326 
327     /**
328      * Encodes a value with Double Metaphone.
329      *
330      * @param value String to encode.
331      * @return an encoded string.
332      */
333     public String doubleMetaphone(final String value) {
334         return doubleMetaphone(value, false);
335     }
336 
337     /**
338      * Encodes a value with Double Metaphone, optionally using the alternate encoding.
339      *
340      * @param value String to encode.
341      * @param alternate use alternate encode.
342      * @return an encoded string.
343      */
344     public String doubleMetaphone(String value, final boolean alternate) {
345         value = cleanInput(value);
346         if (value == null) {
347             return null;
348         }
349 
350         final boolean slavoGermanic = isSlavoGermanic(value);
351         int index = isSilentStart(value) ? 1 : 0;
352 
353         final DoubleMetaphoneResult result = new DoubleMetaphoneResult(getMaxCodeLen());
354 
355         while (!result.isComplete() && index <= value.length() - 1) {
356             switch (value.charAt(index)) {
357             case 'A':
358             case 'E':
359             case 'I':
360             case 'O':
361             case 'U':
362             case 'Y':
363                 index = handleAEIOUY(result, index);
364                 break;
365             case 'B':
366                 result.append('P');
367                 index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
368                 break;
369             case '\u00C7':
370                 // C with a Cedilla
371                 result.append('S');
372                 index++;
373                 break;
374             case 'C':
375                 index = handleC(value, result, index);
376                 break;
377             case 'D':
378                 index = handleD(value, result, index);
379                 break;
380             case 'F':
381                 result.append('F');
382                 index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
383                 break;
384             case 'G':
385                 index = handleG(value, result, index, slavoGermanic);
386                 break;
387             case 'H':
388                 index = handleH(value, result, index);
389                 break;
390             case 'J':
391                 index = handleJ(value, result, index, slavoGermanic);
392                 break;
393             case 'K':
394                 result.append('K');
395                 index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
396                 break;
397             case 'L':
398                 index = handleL(value, result, index);
399                 break;
400             case 'M':
401                 result.append('M');
402                 index = conditionM0(value, index) ? index + 2 : index + 1;
403                 break;
404             case 'N':
405                 result.append('N');
406                 index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
407                 break;
408             case '\u00D1':
409                 // N with a tilde (Spanish ene)
410                 result.append('N');
411                 index++;
412                 break;
413             case 'P':
414                 index = handleP(value, result, index);
415                 break;
416             case 'Q':
417                 result.append('K');
418                 index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
419                 break;
420             case 'R':
421                 index = handleR(value, result, index, slavoGermanic);
422                 break;
423             case 'S':
424                 index = handleS(value, result, index, slavoGermanic);
425                 break;
426             case 'T':
427                 index = handleT(value, result, index);
428                 break;
429             case 'V':
430                 result.append('F');
431                 index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
432                 break;
433             case 'W':
434                 index = handleW(value, result, index);
435                 break;
436             case 'X':
437                 index = handleX(value, result, index);
438                 break;
439             case 'Z':
440                 index = handleZ(value, result, index, slavoGermanic);
441                 break;
442             default:
443                 index++;
444                 break;
445             }
446         }
447 
448         return alternate ? result.getAlternate() : result.getPrimary();
449     }
450 
451     /**
452      * Encodes the value using DoubleMetaphone.  It will only work if
453      * {@code obj} is a {@code String} (like {@code Metaphone}).
454      *
455      * @param obj Object to encode (should be of type String).
456      * @return An encoded Object (will be of type String).
457      * @throws EncoderException encode parameter is not of type String.
458      */
459     @Override
460     public Object encode(final Object obj) throws EncoderException {
461         if (!(obj instanceof String)) {
462             throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
463         }
464         return doubleMetaphone((String) obj);
465     }
466 
467     /**
468      * Encodes the value using DoubleMetaphone.
469      *
470      * @param value String to encode.
471      * @return An encoded String.
472      */
473     @Override
474     public String encode(final String value) {
475         return doubleMetaphone(value);
476     }
477 
478     /**
479      * Gets the maxCodeLen.
480      *
481      * @return the maxCodeLen.
482      */
483     public int getMaxCodeLen() {
484         return maxCodeLen;
485     }
486 
487     /**
488      * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
489      */
490     private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
491         if (index == 0) {
492             result.append('A');
493         }
494         return index + 1;
495     }
496 
497     /**
498      * Handles 'C' cases.
499      */
500     private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
501         if (conditionC0(value, index)) {  // very confusing, moved out
502             result.append('K');
503             index += 2;
504         } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
505             result.append('S');
506             index += 2;
507         } else if (contains(value, index, 2, "CH")) {
508             index = handleCH(value, result, index);
509         } else if (contains(value, index, 2, "CZ") &&
510                    !contains(value, index - 2, 4, "WICZ")) {
511             //-- "Czerny" --//
512             result.append('S', 'X');
513             index += 2;
514         } else if (contains(value, index + 1, 3, "CIA")) {
515             //-- "focaccia" --//
516             result.append('X');
517             index += 3;
518         } else if (contains(value, index, 2, "CC") &&
519                    !(index == 1 && charAt(value, 0) == 'M')) {
520             //-- double "cc" but not "McClelland" --//
521             return handleCC(value, result, index);
522         } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
523             result.append('K');
524             index += 2;
525         } else if (contains(value, index, 2, "CI", "CE", "CY")) {
526             //-- Italian vs. English --//
527             if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
528                 result.append('S', 'X');
529             } else {
530                 result.append('S');
531             }
532             index += 2;
533         } else {
534             result.append('K');
535             if (contains(value, index + 1, 2, " C", " Q", " G")) {
536                 //-- Mac Caffrey, Mac Gregor --//
537                 index += 3;
538             } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
539                        !contains(value, index + 1, 2, "CE", "CI")) {
540                 index += 2;
541             } else {
542                 index++;
543             }
544         }
545 
546         return index;
547     }
548 
549     /**
550      * Handles 'CC' cases.
551      */
552     private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
553         if (contains(value, index + 2, 1, "I", "E", "H") &&
554             !contains(value, index + 2, 2, "HU")) {
555             //-- "bellocchio" but not "bacchus" --//
556             if (index == 1 && charAt(value, index - 1) == 'A' ||
557                 contains(value, index - 1, 5, "UCCEE", "UCCES")) {
558                 //-- "accident", "accede", "succeed" --//
559                 result.append("KS");
560             } else {
561                 //-- "bacci", "bertucci", other Italian --//
562                 result.append('X');
563             }
564             index += 3;
565         } else {    // Pierce's rule
566             result.append('K');
567             index += 2;
568         }
569 
570         return index;
571     }
572 
573     /**
574      * Handles 'CH' cases.
575      */
576     private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
577         if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
578             result.append('K', 'X');
579             return index + 2;
580         }
581         if (conditionCH0(value, index)) {
582             //-- Greek roots ("chemistry", "chorus", etc.) --//
583             result.append('K');
584             return index + 2;
585         }
586         if (conditionCH1(value, index)) {
587             //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
588             result.append('K');
589             return index + 2;
590         }
591         if (index > 0) {
592             if (contains(value, 0, 2, "MC")) {
593                 result.append('K');
594             } else {
595                 result.append('X', 'K');
596             }
597         } else {
598             result.append('X');
599         }
600         return index + 2;
601     }
602 
603     /**
604      * Handles 'D' cases.
605      */
606     private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
607         if (contains(value, index, 2, "DG")) {
608             //-- "Edge" --//
609             if (contains(value, index + 2, 1, "I", "E", "Y")) {
610                 result.append('J');
611                 index += 3;
612                 //-- "Edgar" --//
613             } else {
614                 result.append("TK");
615                 index += 2;
616             }
617         } else if (contains(value, index, 2, "DT", "DD")) {
618             result.append('T');
619             index += 2;
620         } else {
621             result.append('T');
622             index++;
623         }
624         return index;
625     }
626 
627     /**
628      * Handles 'G' cases.
629      */
630     private int handleG(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
631         if (charAt(value, index + 1) == 'H') {
632             index = handleGH(value, result, index);
633         } else if (charAt(value, index + 1) == 'N') {
634             if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
635                 result.append("KN", "N");
636             } else if (!contains(value, index + 2, 2, "EY") &&
637                        charAt(value, index + 1) != 'Y' && !slavoGermanic) {
638                 result.append("N", "KN");
639             } else {
640                 result.append("KN");
641             }
642             index += 2;
643         } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
644             result.append("KL", "L");
645             index += 2;
646         } else if (index == 0 &&
647                    (charAt(value, index + 1) == 'Y' ||
648                     contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
649             //-- -ges-, -gep-, -gel-, -gie- at beginning --//
650             result.append('K', 'J');
651             index += 2;
652         } else if ((contains(value, index + 1, 2, "ER") ||
653                     charAt(value, index + 1) == 'Y') &&
654                    !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
655                    !contains(value, index - 1, 1, "E", "I") &&
656                    !contains(value, index - 1, 3, "RGY", "OGY")) {
657             //-- -ger-, -gy- --//
658             result.append('K', 'J');
659             index += 2;
660         } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
661                    contains(value, index - 1, 4, "AGGI", "OGGI")) {
662             //-- Italian "biaggi" --//
663             if (contains(value, 0, 4, "VAN ", "VON ") ||
664                 contains(value, 0, 3, "SCH") ||
665                 contains(value, index + 1, 2, "ET")) {
666                 //-- obvious germanic --//
667                 result.append('K');
668             } else if (contains(value, index + 1, 3, "IER")) {
669                 result.append('J');
670             } else {
671                 result.append('J', 'K');
672             }
673             index += 2;
674         } else {
675             if (charAt(value, index + 1) == 'G') {
676                 index += 2;
677             } else {
678                 index++;
679             }
680             result.append('K');
681         }
682         return index;
683     }
684 
685     /**
686      * Handles 'GH' cases.
687      */
688     private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
689         if (index > 0 && !isVowel(charAt(value, index - 1))) {
690             result.append('K');
691             index += 2;
692         } else if (index == 0) {
693             if (charAt(value, index + 2) == 'I') {
694                 result.append('J');
695             } else {
696                 result.append('K');
697             }
698             index += 2;
699         } else if (index > 1 && contains(value, index - 2, 1, "B", "H", "D") ||
700                    index > 2 && contains(value, index - 3, 1, "B", "H", "D") ||
701                    index > 3 && contains(value, index - 4, 1, "B", "H")) {
702             //-- Parker's rule (with some further refinements) - "hugh"
703             index += 2;
704         } else {
705             if (index > 2 && charAt(value, index - 1) == 'U' &&
706                 contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
707                 //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
708                 result.append('F');
709             } else if (index > 0 && charAt(value, index - 1) != 'I') {
710                 result.append('K');
711             }
712             index += 2;
713         }
714         return index;
715     }
716 
717     /**
718      * Handles 'H' cases.
719      */
720     private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
721         //-- only keep if first & before vowel or between 2 vowels --//
722         if ((index == 0 || isVowel(charAt(value, index - 1))) &&
723             isVowel(charAt(value, index + 1))) {
724             result.append('H');
725             index += 2;
726             //-- also takes car of "HH" --//
727         } else {
728             index++;
729         }
730         return index;
731     }
732 
733     /**
734      * Handles 'J' cases.
735      */
736     private int handleJ(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
737         if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
738                 //-- obvious Spanish, "Jose", "San Jacinto" --//
739                 if (index == 0 && charAt(value, index + 4) == ' ' ||
740                      value.length() == 4 || contains(value, 0, 4, "SAN ")) {
741                     result.append('H');
742                 } else {
743                     result.append('J', 'H');
744                 }
745                 index++;
746             } else {
747                 if (index == 0 && !contains(value, index, 4, "JOSE")) {
748                     result.append('J', 'A');
749                 } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
750                            (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
751                     result.append('J', 'H');
752                 } else if (index == value.length() - 1) {
753                     result.append('J', ' ');
754                 } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
755                            !contains(value, index - 1, 1, "S", "K", "L")) {
756                     result.append('J');
757                 }
758 
759                 if (charAt(value, index + 1) == 'J') {
760                     index += 2;
761                 } else {
762                     index++;
763                 }
764             }
765         return index;
766     }
767 
768     /**
769      * Handles 'L' cases.
770      */
771     private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
772         if (charAt(value, index + 1) == 'L') {
773             if (conditionL0(value, index)) {
774                 result.appendPrimary('L');
775             } else {
776                 result.append('L');
777             }
778             index += 2;
779         } else {
780             index++;
781             result.append('L');
782         }
783         return index;
784     }
785 
786     /**
787      * Handles 'P' cases.
788      */
789     private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
790         if (charAt(value, index + 1) == 'H') {
791             result.append('F');
792             index += 2;
793         } else {
794             result.append('P');
795             index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
796         }
797         return index;
798     }
799 
800     /**
801      * Handles 'R' cases.
802      */
803     private int handleR(final String value, final DoubleMetaphoneResult result, final int index, final boolean slavoGermanic) {
804         if (index == value.length() - 1 && !slavoGermanic &&
805             contains(value, index - 2, 2, "IE") &&
806             !contains(value, index - 4, 2, "ME", "MA")) {
807             result.appendAlternate('R');
808         } else {
809             result.append('R');
810         }
811         return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
812     }
813 
814     /**
815      * Handles 'S' cases.
816      */
817     private int handleS(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
818         if (contains(value, index - 1, 3, "ISL", "YSL")) {
819             //-- special cases "island", "isle", "carlisle", "carlysle" --//
820             index++;
821         } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
822             //-- special case "sugar-" --//
823             result.append('X', 'S');
824             index++;
825         } else if (contains(value, index, 2, "SH")) {
826             if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
827                 //-- germanic --//
828                 result.append('S');
829             } else {
830                 result.append('X');
831             }
832             index += 2;
833         } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
834             //-- Italian and Armenian --//
835             if (slavoGermanic) {
836                 result.append('S');
837             } else {
838                 result.append('S', 'X');
839             }
840             index += 3;
841         } else if (index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W") ||
842                    contains(value, index + 1, 1, "Z")) {
843             //-- german & anglicisations, for example "smith" match "schmidt" //
844             // "snider" match "schneider" --//
845             //-- also, -sz- in slavic language although in hungarian it //
846             //   is pronounced "s" --//
847             result.append('S', 'X');
848             index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
849         } else if (contains(value, index, 2, "SC")) {
850             index = handleSC(value, result, index);
851         } else {
852             if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
853                 //-- french for example "resnais", "artois" --//
854                 result.appendAlternate('S');
855             } else {
856                 result.append('S');
857             }
858             index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
859         }
860         return index;
861     }
862 
863     /**
864      * Handles 'SC' cases.
865      */
866     private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
867         if (charAt(value, index + 2) == 'H') {
868             //-- Schlesinger's rule --//
869             if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
870                 //-- Dutch origin, for example "school", "schooner" --//
871                 if (contains(value, index + 3, 2, "ER", "EN")) {
872                     //-- "schermerhorn", "schenker" --//
873                     result.append("X", "SK");
874                 } else {
875                     result.append("SK");
876                 }
877             } else if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
878                 result.append('X', 'S');
879             } else {
880                 result.append('X');
881             }
882         } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
883             result.append('S');
884         } else {
885             result.append("SK");
886         }
887         return index + 3;
888     }
889 
890     /**
891      * Handles 'T' cases.
892      */
893     private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
894         if (contains(value, index, 4, "TION") || contains(value, index, 3, "TIA", "TCH")) {
895             result.append('X');
896             index += 3;
897         } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
898             if (contains(value, index + 2, 2, "OM", "AM") ||
899                 //-- special case "thomas", "thames" or germanic --//
900                 contains(value, 0, 4, "VAN ", "VON ") ||
901                 contains(value, 0, 3, "SCH")) {
902                 result.append('T');
903             } else {
904                 result.append('0', 'T');
905             }
906             index += 2;
907         } else {
908             result.append('T');
909             index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
910         }
911         return index;
912     }
913 
914     /**
915      * Handles 'W' cases.
916      */
917     private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
918         if (contains(value, index, 2, "WR")) {
919             //-- can also be in middle of word --//
920             result.append('R');
921             index += 2;
922         } else if (index == 0 && (isVowel(charAt(value, index + 1)) ||
923                            contains(value, index, 2, "WH"))) {
924             if (isVowel(charAt(value, index + 1))) {
925                 //-- Wasserman should match Vasserman --//
926                 result.append('A', 'F');
927             } else {
928                 //-- need Uomo to match Womo --//
929                 result.append('A');
930             }
931             index++;
932         } else if (index == value.length() - 1 && isVowel(charAt(value, index - 1)) ||
933                    contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
934                    contains(value, 0, 3, "SCH")) {
935             //-- Arnow should match Arnoff --//
936             result.appendAlternate('F');
937             index++;
938         } else if (contains(value, index, 4, "WICZ", "WITZ")) {
939             //-- Polish for example "filipowicz" --//
940             result.append("TS", "FX");
941             index += 4;
942         } else {
943             index++;
944         }
945         return index;
946     }
947 
948     /**
949      * Handles 'X' cases.
950      */
951     private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
952         if (index == 0) {
953             result.append('S');
954             index++;
955         } else {
956             if (!(index == value.length() - 1 &&
957                   (contains(value, index - 3, 3, "IAU", "EAU") ||
958                    contains(value, index - 2, 2, "AU", "OU")))) {
959                 //-- French for example breaux --//
960                 result.append("KS");
961             }
962             index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
963         }
964         return index;
965     }
966 
967     /**
968      * Handles 'Z' cases.
969      */
970     private int handleZ(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
971         if (charAt(value, index + 1) == 'H') {
972             //-- Chinese pinyin for example "zhao" or Angelina "Zhang" --//
973             result.append('J');
974             index += 2;
975         } else {
976             if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
977                 slavoGermanic && index > 0 && charAt(value, index - 1) != 'T') {
978                 result.append("S", "TS");
979             } else {
980                 result.append('S');
981             }
982             index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
983         }
984         return index;
985     }
986 
987     /**
988      * Tests whether the Double Metaphone values of two {@code String} values
989      * are equal.
990      *
991      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
992      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
993      * @return {@code true} if the encoded {@code String}s are equal;
994      *          {@code false} otherwise.
995      * @see #isDoubleMetaphoneEqual(String,String,boolean)
996      */
997     public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
998         return isDoubleMetaphoneEqual(value1, value2, false);
999     }
1000 
1001     /**
1002      * Tests whether the Double Metaphone values of two {@code String} values
1003      * are equal, optionally using the alternate value.
1004      *
1005      * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
1006      * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
1007      * @param alternate use the alternate value if {@code true}.
1008      * @return {@code true} if the encoded {@code String}s are equal;
1009      *          {@code false} otherwise.
1010      */
1011     public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
1012         return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
1013     }
1014 
1015     /**
1016      * Tests whether or not the value starts with a silent letter.  It will
1017      * return {@code true} if the value starts with any of 'GN', 'KN',
1018      * 'PN', 'WR' or 'PS'.
1019      */
1020     private boolean isSilentStart(final String value) {
1021         boolean result = false;
1022         for (final String element : SILENT_START) {
1023             if (value.startsWith(element)) {
1024                 result = true;
1025                 break;
1026             }
1027         }
1028         return result;
1029     }
1030 
1031     /**
1032      * Tests whether or not a value is of slavo-germanic origin. A value is
1033      * of Slavo-Germanic origin if it contains any of 'W', 'K', 'CZ', or 'WITZ'.
1034      */
1035     private boolean isSlavoGermanic(final String value) {
1036         return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
1037                 value.contains("CZ") || value.contains("WITZ");
1038     }
1039 
1040     /**
1041      * Tests whether or not a character is a vowel or not.
1042      */
1043     private boolean isVowel(final char ch) {
1044         return VOWELS.indexOf(ch) != -1;
1045     }
1046 
1047     /**
1048      * Sets the maxCodeLen.
1049      *
1050      * @param maxCodeLen The maxCodeLen to set.
1051      */
1052     public void setMaxCodeLen(final int maxCodeLen) {
1053         this.maxCodeLen = maxCodeLen;
1054     }
1055 }