001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import org.apache.commons.codec.EncoderException;
021import org.apache.commons.codec.StringEncoder;
022import org.apache.commons.codec.binary.StringUtils;
023
024/**
025 * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
026 * Philips</CITE>.
027 * <p>
028 * This class is conditionally thread-safe. The instance field for the maximum code length is mutable
029 * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
030 * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
031 * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
032 * </p>
033 *
034 * @see <a href="https://drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2">Dr. Dobbs Original Article</a>
035 * @see <a href="https://en.wikipedia.org/wiki/Metaphone">Wikipedia Metaphone</a>
036 */
037public class DoubleMetaphone implements StringEncoder {
038
039    /**
040     * Inner class for storing results, since there is the optional alternate encoding.
041     */
042    public class DoubleMetaphoneResult {
043
044        private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
045        private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
046        private final int maxLength;
047
048        public DoubleMetaphoneResult(final int maxLength) {
049            this.maxLength = maxLength;
050        }
051
052        public void append(final char value) {
053            appendPrimary(value);
054            appendAlternate(value);
055        }
056
057        public void append(final char primary, final char alternate) {
058            appendPrimary(primary);
059            appendAlternate(alternate);
060        }
061
062        public void append(final String value) {
063            appendPrimary(value);
064            appendAlternate(value);
065        }
066
067        public void append(final String primary, final String alternate) {
068            appendPrimary(primary);
069            appendAlternate(alternate);
070        }
071
072        public void appendAlternate(final char value) {
073            if (this.alternate.length() < this.maxLength) {
074                this.alternate.append(value);
075            }
076        }
077
078        public void appendAlternate(final String value) {
079            final int addChars = this.maxLength - this.alternate.length();
080            if (value.length() <= addChars) {
081                this.alternate.append(value);
082            } else {
083                this.alternate.append(value, 0, addChars);
084            }
085        }
086
087        public void appendPrimary(final char value) {
088            if (this.primary.length() < this.maxLength) {
089                this.primary.append(value);
090            }
091        }
092
093        public void appendPrimary(final String value) {
094            final int addChars = this.maxLength - this.primary.length();
095            if (value.length() <= addChars) {
096                this.primary.append(value);
097            } else {
098                this.primary.append(value, 0, addChars);
099            }
100        }
101
102        public String getAlternate() {
103            return this.alternate.toString();
104        }
105
106        public String getPrimary() {
107            return this.primary.toString();
108        }
109
110        public boolean isComplete() {
111            return this.primary.length() >= this.maxLength &&
112                   this.alternate.length() >= this.maxLength;
113        }
114    }
115
116    /**
117     * "Vowels" to test for
118     */
119    private static final String VOWELS = "AEIOUY";
120    /**
121     * Prefixes when present which are not pronounced
122     */
123    private static final String[] SILENT_START =
124        { "GN", "KN", "PN", "WR", "PS" };
125    private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
126        { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
127    private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
128        { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
129
130    private static final String[] L_T_K_S_N_M_B_Z =
131        { "L", "T", "K", "S", "N", "M", "B", "Z" };
132
133    /*
134     * Determines whether {@code value} contains any of the criteria starting at index {@code start} and
135     * matching up to length {@code length}.
136     */
137    protected static boolean contains(final String value, final int start, final int length,
138                                      final String... criteria) {
139        boolean result = false;
140        if (start >= 0 && start + length <= value.length()) {
141            final String target = value.substring(start, start + length);
142
143            for (final String element : criteria) {
144                if (target.equals(element)) {
145                    result = true;
146                    break;
147                }
148            }
149        }
150        return result;
151    }
152
153    /**
154     * Maximum length of an encoding, default is 4
155     */
156    private int maxCodeLen = 4;
157
158    /*
159     * Gets the character at index {@code index} if available, otherwise
160     * it returns {@code Character.MIN_VALUE} so that there is some sort
161     * of default.
162     */
163    protected char charAt(final String value, final int index) {
164        if (index < 0 || index >= value.length()) {
165            return Character.MIN_VALUE;
166        }
167        return value.charAt(index);
168    }
169
170    /**
171     * Cleans the input.
172     */
173    private String cleanInput(String input) {
174        if (input == null) {
175            return null;
176        }
177        input = input.trim();
178        if (input.isEmpty()) {
179            return null;
180        }
181        return input.toUpperCase(java.util.Locale.ENGLISH);
182    }
183
184    /**
185     * Complex condition 0 for 'C'.
186     */
187    private boolean conditionC0(final String value, final int index) {
188        if (contains(value, index, 4, "CHIA")) {
189            return true;
190        }
191        if (index <= 1) {
192            return false;
193        }
194        if (isVowel(charAt(value, index - 2))) {
195            return false;
196        }
197        if (!contains(value, index - 1, 3, "ACH")) {
198            return false;
199        }
200        final char c = charAt(value, index + 2);
201        return c != 'I' && c != 'E' ||
202                contains(value, index - 2, 6, "BACHER", "MACHER");
203    }
204
205    /**
206     * Complex condition 0 for 'CH'.
207     */
208    private boolean conditionCH0(final String value, final int index) {
209        if (index != 0) {
210            return false;
211        }
212        if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
213                   !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
214            return false;
215        }
216        return !contains(value, 0, 5, "CHORE");
217    }
218
219    /**
220     * Complex condition 1 for 'CH'.
221     */
222    private boolean conditionCH1(final String value, final int index) {
223        return contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH") ||
224                contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
225                contains(value, index + 2, 1, "T", "S") ||
226                (contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
227                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1);
228    }
229
230    /**
231     * Complex condition 0 for 'L'.
232     */
233    private boolean conditionL0(final String value, final int index) {
234        if (index == value.length() - 3 &&
235            contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
236            return true;
237        }
238        return (contains(value, value.length() - 2, 2, "AS", "OS") ||
239                contains(value, value.length() - 1, 1, "A", "O")) &&
240                contains(value, index - 1, 4, "ALLE");
241    }
242
243    //-- BEGIN HANDLERS --//
244
245    /**
246     * Complex condition 0 for 'M'.
247     */
248    private boolean conditionM0(final String value, final int index) {
249        if (charAt(value, index + 1) == 'M') {
250            return true;
251        }
252        return contains(value, index - 1, 3, "UMB") &&
253               (index + 1 == value.length() - 1 || contains(value, index + 2, 2, "ER"));
254    }
255
256    /**
257     * Encode a value with Double Metaphone.
258     *
259     * @param value String to encode
260     * @return an encoded string
261     */
262    public String doubleMetaphone(final String value) {
263        return doubleMetaphone(value, false);
264    }
265
266    /**
267     * Encode a value with Double Metaphone, optionally using the alternate encoding.
268     *
269     * @param value String to encode
270     * @param alternate use alternate encode
271     * @return an encoded string
272     */
273    public String doubleMetaphone(String value, final boolean alternate) {
274        value = cleanInput(value);
275        if (value == null) {
276            return null;
277        }
278
279        final boolean slavoGermanic = isSlavoGermanic(value);
280        int index = isSilentStart(value) ? 1 : 0;
281
282        final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
283
284        while (!result.isComplete() && index <= value.length() - 1) {
285            switch (value.charAt(index)) {
286            case 'A':
287            case 'E':
288            case 'I':
289            case 'O':
290            case 'U':
291            case 'Y':
292                index = handleAEIOUY(result, index);
293                break;
294            case 'B':
295                result.append('P');
296                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
297                break;
298            case '\u00C7':
299                // A C with a Cedilla
300                result.append('S');
301                index++;
302                break;
303            case 'C':
304                index = handleC(value, result, index);
305                break;
306            case 'D':
307                index = handleD(value, result, index);
308                break;
309            case 'F':
310                result.append('F');
311                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
312                break;
313            case 'G':
314                index = handleG(value, result, index, slavoGermanic);
315                break;
316            case 'H':
317                index = handleH(value, result, index);
318                break;
319            case 'J':
320                index = handleJ(value, result, index, slavoGermanic);
321                break;
322            case 'K':
323                result.append('K');
324                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
325                break;
326            case 'L':
327                index = handleL(value, result, index);
328                break;
329            case 'M':
330                result.append('M');
331                index = conditionM0(value, index) ? index + 2 : index + 1;
332                break;
333            case 'N':
334                result.append('N');
335                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
336                break;
337            case '\u00D1':
338                // N with a tilde (spanish ene)
339                result.append('N');
340                index++;
341                break;
342            case 'P':
343                index = handleP(value, result, index);
344                break;
345            case 'Q':
346                result.append('K');
347                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
348                break;
349            case 'R':
350                index = handleR(value, result, index, slavoGermanic);
351                break;
352            case 'S':
353                index = handleS(value, result, index, slavoGermanic);
354                break;
355            case 'T':
356                index = handleT(value, result, index);
357                break;
358            case 'V':
359                result.append('F');
360                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
361                break;
362            case 'W':
363                index = handleW(value, result, index);
364                break;
365            case 'X':
366                index = handleX(value, result, index);
367                break;
368            case 'Z':
369                index = handleZ(value, result, index, slavoGermanic);
370                break;
371            default:
372                index++;
373                break;
374            }
375        }
376
377        return alternate ? result.getAlternate() : result.getPrimary();
378    }
379
380    /**
381     * Encode the value using DoubleMetaphone.  It will only work if
382     * {@code obj} is a {@code String} (like {@code Metaphone}).
383     *
384     * @param obj Object to encode (should be of type String)
385     * @return An encoded Object (will be of type String)
386     * @throws EncoderException encode parameter is not of type String
387     */
388    @Override
389    public Object encode(final Object obj) throws EncoderException {
390        if (!(obj instanceof String)) {
391            throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
392        }
393        return doubleMetaphone((String) obj);
394    }
395
396    /**
397     * Encode the value using DoubleMetaphone.
398     *
399     * @param value String to encode
400     * @return An encoded String
401     */
402    @Override
403    public String encode(final String value) {
404        return doubleMetaphone(value);
405    }
406
407    /**
408     * Returns the maxCodeLen.
409     * @return int
410     */
411    public int getMaxCodeLen() {
412        return this.maxCodeLen;
413    }
414
415    /**
416     * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
417     */
418    private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
419        if (index == 0) {
420            result.append('A');
421        }
422        return index + 1;
423    }
424
425    /**
426     * Handles 'C' cases.
427     */
428    private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
429        if (conditionC0(value, index)) {  // very confusing, moved out
430            result.append('K');
431            index += 2;
432        } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
433            result.append('S');
434            index += 2;
435        } else if (contains(value, index, 2, "CH")) {
436            index = handleCH(value, result, index);
437        } else if (contains(value, index, 2, "CZ") &&
438                   !contains(value, index - 2, 4, "WICZ")) {
439            //-- "Czerny" --//
440            result.append('S', 'X');
441            index += 2;
442        } else if (contains(value, index + 1, 3, "CIA")) {
443            //-- "focaccia" --//
444            result.append('X');
445            index += 3;
446        } else if (contains(value, index, 2, "CC") &&
447                   !(index == 1 && charAt(value, 0) == 'M')) {
448            //-- double "cc" but not "McClelland" --//
449            return handleCC(value, result, index);
450        } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
451            result.append('K');
452            index += 2;
453        } else if (contains(value, index, 2, "CI", "CE", "CY")) {
454            //-- Italian vs. English --//
455            if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
456                result.append('S', 'X');
457            } else {
458                result.append('S');
459            }
460            index += 2;
461        } else {
462            result.append('K');
463            if (contains(value, index + 1, 2, " C", " Q", " G")) {
464                //-- Mac Caffrey, Mac Gregor --//
465                index += 3;
466            } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
467                       !contains(value, index + 1, 2, "CE", "CI")) {
468                index += 2;
469            } else {
470                index++;
471            }
472        }
473
474        return index;
475    }
476
477    /**
478     * Handles 'CC' cases.
479     */
480    private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
481        if (contains(value, index + 2, 1, "I", "E", "H") &&
482            !contains(value, index + 2, 2, "HU")) {
483            //-- "bellocchio" but not "bacchus" --//
484            if (index == 1 && charAt(value, index - 1) == 'A' ||
485                contains(value, index - 1, 5, "UCCEE", "UCCES")) {
486                //-- "accident", "accede", "succeed" --//
487                result.append("KS");
488            } else {
489                //-- "bacci", "bertucci", other Italian --//
490                result.append('X');
491            }
492            index += 3;
493        } else {    // Pierce's rule
494            result.append('K');
495            index += 2;
496        }
497
498        return index;
499    }
500
501    /**
502     * Handles 'CH' cases.
503     */
504    private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
505        if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
506            result.append('K', 'X');
507            return index + 2;
508        }
509        if (conditionCH0(value, index)) {
510            //-- Greek roots ("chemistry", "chorus", etc.) --//
511            result.append('K');
512            return index + 2;
513        }
514        if (conditionCH1(value, index)) {
515            //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
516            result.append('K');
517            return index + 2;
518        }
519        if (index > 0) {
520            if (contains(value, 0, 2, "MC")) {
521                result.append('K');
522            } else {
523                result.append('X', 'K');
524            }
525        } else {
526            result.append('X');
527        }
528        return index + 2;
529    }
530
531    /**
532     * Handles 'D' cases.
533     */
534    private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
535        if (contains(value, index, 2, "DG")) {
536            //-- "Edge" --//
537            if (contains(value, index + 2, 1, "I", "E", "Y")) {
538                result.append('J');
539                index += 3;
540                //-- "Edgar" --//
541            } else {
542                result.append("TK");
543                index += 2;
544            }
545        } else if (contains(value, index, 2, "DT", "DD")) {
546            result.append('T');
547            index += 2;
548        } else {
549            result.append('T');
550            index++;
551        }
552        return index;
553    }
554
555    /**
556     * Handles 'G' cases.
557     */
558    private int handleG(final String value, final DoubleMetaphoneResult result, int index,
559                        final boolean slavoGermanic) {
560        if (charAt(value, index + 1) == 'H') {
561            index = handleGH(value, result, index);
562        } else if (charAt(value, index + 1) == 'N') {
563            if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
564                result.append("KN", "N");
565            } else if (!contains(value, index + 2, 2, "EY") &&
566                       charAt(value, index + 1) != 'Y' && !slavoGermanic) {
567                result.append("N", "KN");
568            } else {
569                result.append("KN");
570            }
571            index += 2;
572        } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
573            result.append("KL", "L");
574            index += 2;
575        } else if (index == 0 &&
576                   (charAt(value, index + 1) == 'Y' ||
577                    contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
578            //-- -ges-, -gep-, -gel-, -gie- at beginning --//
579            result.append('K', 'J');
580            index += 2;
581        } else if ((contains(value, index + 1, 2, "ER") ||
582                    charAt(value, index + 1) == 'Y') &&
583                   !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
584                   !contains(value, index - 1, 1, "E", "I") &&
585                   !contains(value, index - 1, 3, "RGY", "OGY")) {
586            //-- -ger-, -gy- --//
587            result.append('K', 'J');
588            index += 2;
589        } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
590                   contains(value, index - 1, 4, "AGGI", "OGGI")) {
591            //-- Italian "biaggi" --//
592            if (contains(value, 0, 4, "VAN ", "VON ") ||
593                contains(value, 0, 3, "SCH") ||
594                contains(value, index + 1, 2, "ET")) {
595                //-- obvious germanic --//
596                result.append('K');
597            } else if (contains(value, index + 1, 3, "IER")) {
598                result.append('J');
599            } else {
600                result.append('J', 'K');
601            }
602            index += 2;
603        } else {
604            if (charAt(value, index + 1) == 'G') {
605                index += 2;
606            } else {
607                index++;
608            }
609            result.append('K');
610        }
611        return index;
612    }
613
614    /**
615     * Handles 'GH' cases.
616     */
617    private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
618        if (index > 0 && !isVowel(charAt(value, index - 1))) {
619            result.append('K');
620            index += 2;
621        } else if (index == 0) {
622            if (charAt(value, index + 2) == 'I') {
623                result.append('J');
624            } else {
625                result.append('K');
626            }
627            index += 2;
628        } else if (index > 1 && contains(value, index - 2, 1, "B", "H", "D") ||
629                   index > 2 && contains(value, index - 3, 1, "B", "H", "D") ||
630                   index > 3 && contains(value, index - 4, 1, "B", "H")) {
631            //-- Parker's rule (with some further refinements) - "hugh"
632            index += 2;
633        } else {
634            if (index > 2 && charAt(value, index - 1) == 'U' &&
635                contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
636                //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
637                result.append('F');
638            } else if (index > 0 && charAt(value, index - 1) != 'I') {
639                result.append('K');
640            }
641            index += 2;
642        }
643        return index;
644    }
645
646    /**
647     * Handles 'H' cases.
648     */
649    private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
650        //-- only keep if first & before vowel or between 2 vowels --//
651        if ((index == 0 || isVowel(charAt(value, index - 1))) &&
652            isVowel(charAt(value, index + 1))) {
653            result.append('H');
654            index += 2;
655            //-- also takes car of "HH" --//
656        } else {
657            index++;
658        }
659        return index;
660    }
661
662    /**
663     * Handles 'J' cases.
664     */
665    private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
666                        final boolean slavoGermanic) {
667        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
668                //-- obvious Spanish, "Jose", "San Jacinto" --//
669                if (index == 0 && charAt(value, index + 4) == ' ' ||
670                     value.length() == 4 || contains(value, 0, 4, "SAN ")) {
671                    result.append('H');
672                } else {
673                    result.append('J', 'H');
674                }
675                index++;
676            } else {
677                if (index == 0 && !contains(value, index, 4, "JOSE")) {
678                    result.append('J', 'A');
679                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
680                           (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
681                    result.append('J', 'H');
682                } else if (index == value.length() - 1) {
683                    result.append('J', ' ');
684                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
685                           !contains(value, index - 1, 1, "S", "K", "L")) {
686                    result.append('J');
687                }
688
689                if (charAt(value, index + 1) == 'J') {
690                    index += 2;
691                } else {
692                    index++;
693                }
694            }
695        return index;
696    }
697
698    /**
699     * Handles 'L' cases.
700     */
701    private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
702        if (charAt(value, index + 1) == 'L') {
703            if (conditionL0(value, index)) {
704                result.appendPrimary('L');
705            } else {
706                result.append('L');
707            }
708            index += 2;
709        } else {
710            index++;
711            result.append('L');
712        }
713        return index;
714    }
715
716    /**
717     * Handles 'P' cases.
718     */
719    private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
720        if (charAt(value, index + 1) == 'H') {
721            result.append('F');
722            index += 2;
723        } else {
724            result.append('P');
725            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
726        }
727        return index;
728    }
729
730    /**
731     * Handles 'R' cases.
732     */
733    private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
734                        final boolean slavoGermanic) {
735        if (index == value.length() - 1 && !slavoGermanic &&
736            contains(value, index - 2, 2, "IE") &&
737            !contains(value, index - 4, 2, "ME", "MA")) {
738            result.appendAlternate('R');
739        } else {
740            result.append('R');
741        }
742        return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
743    }
744
745    //-- BEGIN CONDITIONS --//
746
747    /**
748     * Handles 'S' cases.
749     */
750    private int handleS(final String value, final DoubleMetaphoneResult result, int index,
751                        final boolean slavoGermanic) {
752        if (contains(value, index - 1, 3, "ISL", "YSL")) {
753            //-- special cases "island", "isle", "carlisle", "carlysle" --//
754            index++;
755        } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
756            //-- special case "sugar-" --//
757            result.append('X', 'S');
758            index++;
759        } else if (contains(value, index, 2, "SH")) {
760            if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
761                //-- germanic --//
762                result.append('S');
763            } else {
764                result.append('X');
765            }
766            index += 2;
767        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
768            //-- Italian and Armenian --//
769            if (slavoGermanic) {
770                result.append('S');
771            } else {
772                result.append('S', 'X');
773            }
774            index += 3;
775        } else if (index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W") ||
776                   contains(value, index + 1, 1, "Z")) {
777            //-- german & anglicisations, e.g. "smith" match "schmidt" //
778            // "snider" match "schneider" --//
779            //-- also, -sz- in slavic language although in hungarian it //
780            //   is pronounced "s" --//
781            result.append('S', 'X');
782            index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
783        } else if (contains(value, index, 2, "SC")) {
784            index = handleSC(value, result, index);
785        } else {
786            if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
787                //-- french e.g. "resnais", "artois" --//
788                result.appendAlternate('S');
789            } else {
790                result.append('S');
791            }
792            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
793        }
794        return index;
795    }
796
797    /**
798     * Handles 'SC' cases.
799     */
800    private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
801        if (charAt(value, index + 2) == 'H') {
802            //-- Schlesinger's rule --//
803            if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
804                //-- Dutch origin, e.g. "school", "schooner" --//
805                if (contains(value, index + 3, 2, "ER", "EN")) {
806                    //-- "schermerhorn", "schenker" --//
807                    result.append("X", "SK");
808                } else {
809                    result.append("SK");
810                }
811            } else if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
812                result.append('X', 'S');
813            } else {
814                result.append('X');
815            }
816        } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
817            result.append('S');
818        } else {
819            result.append("SK");
820        }
821        return index + 3;
822    }
823
824    /**
825     * Handles 'T' cases.
826     */
827    private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
828        if (contains(value, index, 4, "TION") || contains(value, index, 3, "TIA", "TCH")) {
829            result.append('X');
830            index += 3;
831        } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
832            if (contains(value, index + 2, 2, "OM", "AM") ||
833                //-- special case "thomas", "thames" or germanic --//
834                contains(value, 0, 4, "VAN ", "VON ") ||
835                contains(value, 0, 3, "SCH")) {
836                result.append('T');
837            } else {
838                result.append('0', 'T');
839            }
840            index += 2;
841        } else {
842            result.append('T');
843            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
844        }
845        return index;
846    }
847
848    /**
849     * Handles 'W' cases.
850     */
851    private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
852        if (contains(value, index, 2, "WR")) {
853            //-- can also be in middle of word --//
854            result.append('R');
855            index += 2;
856        } else if (index == 0 && (isVowel(charAt(value, index + 1)) ||
857                           contains(value, index, 2, "WH"))) {
858            if (isVowel(charAt(value, index + 1))) {
859                //-- Wasserman should match Vasserman --//
860                result.append('A', 'F');
861            } else {
862                //-- need Uomo to match Womo --//
863                result.append('A');
864            }
865            index++;
866        } else if (index == value.length() - 1 && isVowel(charAt(value, index - 1)) ||
867                   contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
868                   contains(value, 0, 3, "SCH")) {
869            //-- Arnow should match Arnoff --//
870            result.appendAlternate('F');
871            index++;
872        } else if (contains(value, index, 4, "WICZ", "WITZ")) {
873            //-- Polish e.g. "filipowicz" --//
874            result.append("TS", "FX");
875            index += 4;
876        } else {
877            index++;
878        }
879        return index;
880    }
881
882    /**
883     * Handles 'X' cases.
884     */
885    private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
886        if (index == 0) {
887            result.append('S');
888            index++;
889        } else {
890            if (!(index == value.length() - 1 &&
891                  (contains(value, index - 3, 3, "IAU", "EAU") ||
892                   contains(value, index - 2, 2, "AU", "OU")))) {
893                //-- French e.g. breaux --//
894                result.append("KS");
895            }
896            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
897        }
898        return index;
899    }
900
901    //-- BEGIN HELPER FUNCTIONS --//
902
903    /**
904     * Handles 'Z' cases.
905     */
906    private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
907                        final boolean slavoGermanic) {
908        if (charAt(value, index + 1) == 'H') {
909            //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
910            result.append('J');
911            index += 2;
912        } else {
913            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
914                slavoGermanic && index > 0 && charAt(value, index - 1) != 'T') {
915                result.append("S", "TS");
916            } else {
917                result.append('S');
918            }
919            index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
920        }
921        return index;
922    }
923
924    /**
925     * Check if the Double Metaphone values of two {@code String} values
926     * are equal.
927     *
928     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
929     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
930     * @return {@code true} if the encoded {@code String}s are equal;
931     *          {@code false} otherwise.
932     * @see #isDoubleMetaphoneEqual(String,String,boolean)
933     */
934    public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
935        return isDoubleMetaphoneEqual(value1, value2, false);
936    }
937
938    /**
939     * Check if the Double Metaphone values of two {@code String} values
940     * are equal, optionally using the alternate value.
941     *
942     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
943     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
944     * @param alternate use the alternate value if {@code true}.
945     * @return {@code true} if the encoded {@code String}s are equal;
946     *          {@code false} otherwise.
947     */
948    public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
949        return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
950    }
951
952    /**
953     * Determines whether or not the value starts with a silent letter.  It will
954     * return {@code true} if the value starts with any of 'GN', 'KN',
955     * 'PN', 'WR' or 'PS'.
956     */
957    private boolean isSilentStart(final String value) {
958        boolean result = false;
959        for (final String element : SILENT_START) {
960            if (value.startsWith(element)) {
961                result = true;
962                break;
963            }
964        }
965        return result;
966    }
967
968    /**
969     * Determines whether or not a value is of slavo-germanic origin. A value is
970     * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
971     */
972    private boolean isSlavoGermanic(final String value) {
973        return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
974                value.contains("CZ") || value.contains("WITZ");
975    }
976
977    /**
978     * Determines whether or not a character is a vowel or not
979     */
980    private boolean isVowel(final char ch) {
981        return VOWELS.indexOf(ch) != -1;
982    }
983
984    //-- BEGIN INNER CLASSES --//
985
986    /**
987     * Sets the maxCodeLen.
988     * @param maxCodeLen The maxCodeLen to set
989     */
990    public void setMaxCodeLen(final int maxCodeLen) {
991        this.maxCodeLen = maxCodeLen;
992    }
993}