001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import java.util.Locale;
021
022import org.apache.commons.codec.EncoderException;
023import org.apache.commons.codec.StringEncoder;
024import org.apache.commons.codec.binary.StringUtils;
025
026/**
027 * Encodes a string into a Double Metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
028 * Philips</CITE>.
029 * <p>
030 * This class is conditionally thread-safe. The instance field for the maximum code length is mutable
031 * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
032 * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
033 * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
034 * </p>
035 *
036 * @see <a href="https://drdobbs.com/the-double-metaphone-search-algorithm/184401251?pgno=2">Dr. Dobbs Original Article</a>
037 * @see <a href="https://en.wikipedia.org/wiki/Metaphone">Wikipedia Metaphone</a>
038 * @see <a href="http://aspell.net/metaphone/dmetaph.cpp">Double Metaphone C++ source by Lawrence Philips</a>
039 */
040public class DoubleMetaphone implements StringEncoder {
041
042    /**
043     * Stores results, since there is the optional alternate encoding.
044     */
045    public class DoubleMetaphoneResult {
046
047        private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
048        private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
049        private final int maxLength;
050
051        /**
052         * Constructs a new instance.
053         *
054         * @param maxLength The maximum length.
055         */
056        public DoubleMetaphoneResult(final int maxLength) {
057            this.maxLength = maxLength;
058        }
059
060        /**
061         * Appends the given value as primary and alternative.
062         *
063         * @param value The value to append.
064         */
065        public void append(final char value) {
066            appendPrimary(value);
067            appendAlternate(value);
068        }
069
070        /**
071         * Appends the given primary and alternative values.
072         *
073         * @param primary   The primary value.
074         * @param alternate The alternate value.
075         */
076        public void append(final char primary, final char alternate) {
077            appendPrimary(primary);
078            appendAlternate(alternate);
079        }
080
081        /**
082         * Appends the given value as primary and alternative.
083         *
084         * @param value The value to append.
085         */
086        public void append(final String value) {
087            appendPrimary(value);
088            appendAlternate(value);
089        }
090
091        /**
092         * Appends the given primary and alternative values.
093         *
094         * @param primary   The primary value.
095         * @param alternate The alternate value.
096         */
097        public void append(final String primary, final String alternate) {
098            appendPrimary(primary);
099            appendAlternate(alternate);
100        }
101
102        /**
103         * Appends the given value as alternative.
104         *
105         * @param value The value to append.
106         */
107        public void appendAlternate(final char value) {
108            if (alternate.length() < maxLength) {
109                alternate.append(value);
110            }
111        }
112
113        /**
114         * Appends the given value as alternative.
115         *
116         * @param value The value to append.
117         */
118        public void appendAlternate(final String value) {
119            final int addChars = maxLength - alternate.length();
120            if (value.length() <= addChars) {
121                alternate.append(value);
122            } else {
123                alternate.append(value, 0, addChars);
124            }
125        }
126
127        /**
128         * Appends the given value as primary.
129         *
130         * @param value The value to append.
131         */
132        public void appendPrimary(final char value) {
133            if (primary.length() < maxLength) {
134                primary.append(value);
135            }
136        }
137
138        /**
139         * Appends the given value as primary.
140         *
141         * @param value The value to append.
142         */
143        public void appendPrimary(final String value) {
144            final int addChars = maxLength - primary.length();
145            if (value.length() <= addChars) {
146                primary.append(value);
147            } else {
148                primary.append(value, 0, addChars);
149            }
150        }
151
152        /**
153         * Gets the alternate string.
154         *
155         * @return the alternate string.
156         */
157        public String getAlternate() {
158            return alternate.toString();
159        }
160
161        /**
162         * Gets the primary string.
163         *
164         * @return the primary string.
165         */
166        public String getPrimary() {
167            return primary.toString();
168        }
169
170        /**
171         * Tests whether this result is complete.
172         *
173         * @return whether this result is complete.
174         */
175        public boolean isComplete() {
176            return primary.length() >= maxLength && alternate.length() >= maxLength;
177        }
178    }
179
180    /**
181     * "Vowels" to test.
182     */
183    private static final String VOWELS = "AEIOUY";
184
185    /**
186     * Prefixes when present which are not pronounced.
187     */
188    private static final String[] SILENT_START = { "GN", "KN", "PN", "WR", "PS" };
189
190    private static final String[] L_R_N_M_B_H_F_V_W_SPACE = { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
191    private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER = { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
192    private static final String[] L_T_K_S_N_M_B_Z = { "L", "T", "K", "S", "N", "M", "B", "Z" };
193
194    /**
195     * Tests whether {@code value} contains any of the {@code criteria} starting at index {@code start} and matching up to length {@code length}.
196     *
197     * @param value    The value to test.
198     * @param start    Where in {@code value} to start testing.
199     * @param length   How many to test.
200     * @param criteria The search criteria.
201     * @return Whether there was a match.
202     */
203    protected static boolean contains(final String value, final int start, final int length, final String... criteria) {
204        boolean result = false;
205        if (start >= 0 && start + length <= value.length()) {
206            final String target = value.substring(start, start + length);
207            for (final String element : criteria) {
208                if (target.equals(element)) {
209                    result = true;
210                    break;
211                }
212            }
213        }
214        return result;
215    }
216
217    /**
218     * Maximum length of an encoding, default is 4.
219     */
220    private int maxCodeLen = 4;
221
222    /**
223     * Constructs a new instance.
224     */
225    public DoubleMetaphone() {
226        // empty
227    }
228
229    /**
230     * Gets the character at index {@code index} if available, or {@link Character#MIN_VALUE} if out of bounds.
231     *
232     * @param value The String to query.
233     * @param index A string index.
234     * @return The character at the index or {@link Character#MIN_VALUE} if out of bounds.
235     */
236    protected char charAt(final String value, final int index) {
237        if (index < 0 || index >= value.length()) {
238            return Character.MIN_VALUE;
239        }
240        return value.charAt(index);
241    }
242
243    /**
244     * Cleans the input.
245     */
246    private String cleanInput(String input) {
247        if (input == null) {
248            return null;
249        }
250        input = input.trim();
251        if (input.isEmpty()) {
252            return null;
253        }
254        return input.toUpperCase(Locale.ENGLISH);
255    }
256
257    /**
258     * Complex condition 0 for 'C'.
259     */
260    private boolean conditionC0(final String value, final int index) {
261        if (contains(value, index, 4, "CHIA")) {
262            return true;
263        }
264        if (index <= 1) {
265            return false;
266        }
267        if (isVowel(charAt(value, index - 2))) {
268            return false;
269        }
270        if (!contains(value, index - 1, 3, "ACH")) {
271            return false;
272        }
273        final char c = charAt(value, index + 2);
274        return c != 'I' && c != 'E' ||
275                contains(value, index - 2, 6, "BACHER", "MACHER");
276    }
277
278    /**
279     * Complex condition 0 for 'CH'.
280     */
281    private boolean conditionCH0(final String value, final int index) {
282        if (index != 0) {
283            return false;
284        }
285        if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
286                   !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
287            return false;
288        }
289        return !contains(value, 0, 5, "CHORE");
290    }
291
292    /**
293     * Complex condition 1 for 'CH'.
294     */
295    private boolean conditionCH1(final String value, final int index) {
296        return contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH") ||
297                contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
298                contains(value, index + 2, 1, "T", "S") ||
299                (contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
300                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1);
301    }
302
303    /**
304     * Complex condition 0 for 'L'.
305     */
306    private boolean conditionL0(final String value, final int index) {
307        if (index == value.length() - 3 &&
308            contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
309            return true;
310        }
311        return (contains(value, value.length() - 2, 2, "AS", "OS") ||
312                contains(value, value.length() - 1, 1, "A", "O")) &&
313                contains(value, index - 1, 4, "ALLE");
314    }
315
316    /**
317     * Complex condition 0 for 'M'.
318     */
319    private boolean conditionM0(final String value, final int index) {
320        if (charAt(value, index + 1) == 'M') {
321            return true;
322        }
323        return contains(value, index - 1, 3, "UMB") &&
324               (index + 1 == value.length() - 1 || contains(value, index + 2, 2, "ER"));
325    }
326
327    /**
328     * Encodes a value with Double Metaphone.
329     *
330     * @param value String to encode.
331     * @return an encoded string.
332     */
333    public String doubleMetaphone(final String value) {
334        return doubleMetaphone(value, false);
335    }
336
337    /**
338     * Encodes a value with Double Metaphone, optionally using the alternate encoding.
339     *
340     * @param value String to encode.
341     * @param alternate use alternate encode.
342     * @return an encoded string.
343     */
344    public String doubleMetaphone(String value, final boolean alternate) {
345        value = cleanInput(value);
346        if (value == null) {
347            return null;
348        }
349
350        final boolean slavoGermanic = isSlavoGermanic(value);
351        int index = isSilentStart(value) ? 1 : 0;
352
353        final DoubleMetaphoneResult result = new DoubleMetaphoneResult(getMaxCodeLen());
354
355        while (!result.isComplete() && index <= value.length() - 1) {
356            switch (value.charAt(index)) {
357            case 'A':
358            case 'E':
359            case 'I':
360            case 'O':
361            case 'U':
362            case 'Y':
363                index = handleAEIOUY(result, index);
364                break;
365            case 'B':
366                result.append('P');
367                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
368                break;
369            case '\u00C7':
370                // C with a Cedilla
371                result.append('S');
372                index++;
373                break;
374            case 'C':
375                index = handleC(value, result, index);
376                break;
377            case 'D':
378                index = handleD(value, result, index);
379                break;
380            case 'F':
381                result.append('F');
382                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
383                break;
384            case 'G':
385                index = handleG(value, result, index, slavoGermanic);
386                break;
387            case 'H':
388                index = handleH(value, result, index);
389                break;
390            case 'J':
391                index = handleJ(value, result, index, slavoGermanic);
392                break;
393            case 'K':
394                result.append('K');
395                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
396                break;
397            case 'L':
398                index = handleL(value, result, index);
399                break;
400            case 'M':
401                result.append('M');
402                index = conditionM0(value, index) ? index + 2 : index + 1;
403                break;
404            case 'N':
405                result.append('N');
406                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
407                break;
408            case '\u00D1':
409                // N with a tilde (Spanish ene)
410                result.append('N');
411                index++;
412                break;
413            case 'P':
414                index = handleP(value, result, index);
415                break;
416            case 'Q':
417                result.append('K');
418                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
419                break;
420            case 'R':
421                index = handleR(value, result, index, slavoGermanic);
422                break;
423            case 'S':
424                index = handleS(value, result, index, slavoGermanic);
425                break;
426            case 'T':
427                index = handleT(value, result, index);
428                break;
429            case 'V':
430                result.append('F');
431                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
432                break;
433            case 'W':
434                index = handleW(value, result, index);
435                break;
436            case 'X':
437                index = handleX(value, result, index);
438                break;
439            case 'Z':
440                index = handleZ(value, result, index, slavoGermanic);
441                break;
442            default:
443                index++;
444                break;
445            }
446        }
447
448        return alternate ? result.getAlternate() : result.getPrimary();
449    }
450
451    /**
452     * Encodes the value using DoubleMetaphone.  It will only work if
453     * {@code obj} is a {@code String} (like {@code Metaphone}).
454     *
455     * @param obj Object to encode (should be of type String).
456     * @return An encoded Object (will be of type String).
457     * @throws EncoderException encode parameter is not of type String.
458     */
459    @Override
460    public Object encode(final Object obj) throws EncoderException {
461        if (!(obj instanceof String)) {
462            throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
463        }
464        return doubleMetaphone((String) obj);
465    }
466
467    /**
468     * Encodes the value using DoubleMetaphone.
469     *
470     * @param value String to encode.
471     * @return An encoded String.
472     */
473    @Override
474    public String encode(final String value) {
475        return doubleMetaphone(value);
476    }
477
478    /**
479     * Gets the maxCodeLen.
480     *
481     * @return the maxCodeLen.
482     */
483    public int getMaxCodeLen() {
484        return maxCodeLen;
485    }
486
487    /**
488     * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
489     */
490    private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
491        if (index == 0) {
492            result.append('A');
493        }
494        return index + 1;
495    }
496
497    /**
498     * Handles 'C' cases.
499     */
500    private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
501        if (conditionC0(value, index)) {  // very confusing, moved out
502            result.append('K');
503            index += 2;
504        } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
505            result.append('S');
506            index += 2;
507        } else if (contains(value, index, 2, "CH")) {
508            index = handleCH(value, result, index);
509        } else if (contains(value, index, 2, "CZ") &&
510                   !contains(value, index - 2, 4, "WICZ")) {
511            //-- "Czerny" --//
512            result.append('S', 'X');
513            index += 2;
514        } else if (contains(value, index + 1, 3, "CIA")) {
515            //-- "focaccia" --//
516            result.append('X');
517            index += 3;
518        } else if (contains(value, index, 2, "CC") &&
519                   !(index == 1 && charAt(value, 0) == 'M')) {
520            //-- double "cc" but not "McClelland" --//
521            return handleCC(value, result, index);
522        } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
523            result.append('K');
524            index += 2;
525        } else if (contains(value, index, 2, "CI", "CE", "CY")) {
526            //-- Italian vs. English --//
527            if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
528                result.append('S', 'X');
529            } else {
530                result.append('S');
531            }
532            index += 2;
533        } else {
534            result.append('K');
535            if (contains(value, index + 1, 2, " C", " Q", " G")) {
536                //-- Mac Caffrey, Mac Gregor --//
537                index += 3;
538            } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
539                       !contains(value, index + 1, 2, "CE", "CI")) {
540                index += 2;
541            } else {
542                index++;
543            }
544        }
545
546        return index;
547    }
548
549    /**
550     * Handles 'CC' cases.
551     */
552    private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
553        if (contains(value, index + 2, 1, "I", "E", "H") &&
554            !contains(value, index + 2, 2, "HU")) {
555            //-- "bellocchio" but not "bacchus" --//
556            if (index == 1 && charAt(value, index - 1) == 'A' ||
557                contains(value, index - 1, 5, "UCCEE", "UCCES")) {
558                //-- "accident", "accede", "succeed" --//
559                result.append("KS");
560            } else {
561                //-- "bacci", "bertucci", other Italian --//
562                result.append('X');
563            }
564            index += 3;
565        } else {    // Pierce's rule
566            result.append('K');
567            index += 2;
568        }
569
570        return index;
571    }
572
573    /**
574     * Handles 'CH' cases.
575     */
576    private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
577        if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
578            result.append('K', 'X');
579            return index + 2;
580        }
581        if (conditionCH0(value, index)) {
582            //-- Greek roots ("chemistry", "chorus", etc.) --//
583            result.append('K');
584            return index + 2;
585        }
586        if (conditionCH1(value, index)) {
587            //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
588            result.append('K');
589            return index + 2;
590        }
591        if (index > 0) {
592            if (contains(value, 0, 2, "MC")) {
593                result.append('K');
594            } else {
595                result.append('X', 'K');
596            }
597        } else {
598            result.append('X');
599        }
600        return index + 2;
601    }
602
603    /**
604     * Handles 'D' cases.
605     */
606    private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
607        if (contains(value, index, 2, "DG")) {
608            //-- "Edge" --//
609            if (contains(value, index + 2, 1, "I", "E", "Y")) {
610                result.append('J');
611                index += 3;
612                //-- "Edgar" --//
613            } else {
614                result.append("TK");
615                index += 2;
616            }
617        } else if (contains(value, index, 2, "DT", "DD")) {
618            result.append('T');
619            index += 2;
620        } else {
621            result.append('T');
622            index++;
623        }
624        return index;
625    }
626
627    /**
628     * Handles 'G' cases.
629     */
630    private int handleG(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
631        if (charAt(value, index + 1) == 'H') {
632            index = handleGH(value, result, index);
633        } else if (charAt(value, index + 1) == 'N') {
634            if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
635                result.append("KN", "N");
636            } else if (!contains(value, index + 2, 2, "EY") &&
637                       charAt(value, index + 1) != 'Y' && !slavoGermanic) {
638                result.append("N", "KN");
639            } else {
640                result.append("KN");
641            }
642            index += 2;
643        } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
644            result.append("KL", "L");
645            index += 2;
646        } else if (index == 0 &&
647                   (charAt(value, index + 1) == 'Y' ||
648                    contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
649            //-- -ges-, -gep-, -gel-, -gie- at beginning --//
650            result.append('K', 'J');
651            index += 2;
652        } else if ((contains(value, index + 1, 2, "ER") ||
653                    charAt(value, index + 1) == 'Y') &&
654                   !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
655                   !contains(value, index - 1, 1, "E", "I") &&
656                   !contains(value, index - 1, 3, "RGY", "OGY")) {
657            //-- -ger-, -gy- --//
658            result.append('K', 'J');
659            index += 2;
660        } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
661                   contains(value, index - 1, 4, "AGGI", "OGGI")) {
662            //-- Italian "biaggi" --//
663            if (contains(value, 0, 4, "VAN ", "VON ") ||
664                contains(value, 0, 3, "SCH") ||
665                contains(value, index + 1, 2, "ET")) {
666                //-- obvious germanic --//
667                result.append('K');
668            } else if (contains(value, index + 1, 3, "IER")) {
669                result.append('J');
670            } else {
671                result.append('J', 'K');
672            }
673            index += 2;
674        } else {
675            if (charAt(value, index + 1) == 'G') {
676                index += 2;
677            } else {
678                index++;
679            }
680            result.append('K');
681        }
682        return index;
683    }
684
685    /**
686     * Handles 'GH' cases.
687     */
688    private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
689        if (index > 0 && !isVowel(charAt(value, index - 1))) {
690            result.append('K');
691            index += 2;
692        } else if (index == 0) {
693            if (charAt(value, index + 2) == 'I') {
694                result.append('J');
695            } else {
696                result.append('K');
697            }
698            index += 2;
699        } else if (index > 1 && contains(value, index - 2, 1, "B", "H", "D") ||
700                   index > 2 && contains(value, index - 3, 1, "B", "H", "D") ||
701                   index > 3 && contains(value, index - 4, 1, "B", "H")) {
702            //-- Parker's rule (with some further refinements) - "hugh"
703            index += 2;
704        } else {
705            if (index > 2 && charAt(value, index - 1) == 'U' &&
706                contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
707                //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
708                result.append('F');
709            } else if (index > 0 && charAt(value, index - 1) != 'I') {
710                result.append('K');
711            }
712            index += 2;
713        }
714        return index;
715    }
716
717    /**
718     * Handles 'H' cases.
719     */
720    private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
721        //-- only keep if first & before vowel or between 2 vowels --//
722        if ((index == 0 || isVowel(charAt(value, index - 1))) &&
723            isVowel(charAt(value, index + 1))) {
724            result.append('H');
725            index += 2;
726            //-- also takes car of "HH" --//
727        } else {
728            index++;
729        }
730        return index;
731    }
732
733    /**
734     * Handles 'J' cases.
735     */
736    private int handleJ(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
737        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
738                //-- obvious Spanish, "Jose", "San Jacinto" --//
739                if (index == 0 && charAt(value, index + 4) == ' ' ||
740                     value.length() == 4 || contains(value, 0, 4, "SAN ")) {
741                    result.append('H');
742                } else {
743                    result.append('J', 'H');
744                }
745                index++;
746            } else {
747                if (index == 0 && !contains(value, index, 4, "JOSE")) {
748                    result.append('J', 'A');
749                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
750                           (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
751                    result.append('J', 'H');
752                } else if (index == value.length() - 1) {
753                    result.append('J', ' ');
754                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
755                           !contains(value, index - 1, 1, "S", "K", "L")) {
756                    result.append('J');
757                }
758
759                if (charAt(value, index + 1) == 'J') {
760                    index += 2;
761                } else {
762                    index++;
763                }
764            }
765        return index;
766    }
767
768    /**
769     * Handles 'L' cases.
770     */
771    private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
772        if (charAt(value, index + 1) == 'L') {
773            if (conditionL0(value, index)) {
774                result.appendPrimary('L');
775            } else {
776                result.append('L');
777            }
778            index += 2;
779        } else {
780            index++;
781            result.append('L');
782        }
783        return index;
784    }
785
786    /**
787     * Handles 'P' cases.
788     */
789    private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
790        if (charAt(value, index + 1) == 'H') {
791            result.append('F');
792            index += 2;
793        } else {
794            result.append('P');
795            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
796        }
797        return index;
798    }
799
800    /**
801     * Handles 'R' cases.
802     */
803    private int handleR(final String value, final DoubleMetaphoneResult result, final int index, final boolean slavoGermanic) {
804        if (index == value.length() - 1 && !slavoGermanic &&
805            contains(value, index - 2, 2, "IE") &&
806            !contains(value, index - 4, 2, "ME", "MA")) {
807            result.appendAlternate('R');
808        } else {
809            result.append('R');
810        }
811        return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
812    }
813
814    /**
815     * Handles 'S' cases.
816     */
817    private int handleS(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
818        if (contains(value, index - 1, 3, "ISL", "YSL")) {
819            //-- special cases "island", "isle", "carlisle", "carlysle" --//
820            index++;
821        } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
822            //-- special case "sugar-" --//
823            result.append('X', 'S');
824            index++;
825        } else if (contains(value, index, 2, "SH")) {
826            if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
827                //-- germanic --//
828                result.append('S');
829            } else {
830                result.append('X');
831            }
832            index += 2;
833        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
834            //-- Italian and Armenian --//
835            if (slavoGermanic) {
836                result.append('S');
837            } else {
838                result.append('S', 'X');
839            }
840            index += 3;
841        } else if (index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W") ||
842                   contains(value, index + 1, 1, "Z")) {
843            //-- german & anglicisations, for example "smith" match "schmidt" //
844            // "snider" match "schneider" --//
845            //-- also, -sz- in slavic language although in hungarian it //
846            //   is pronounced "s" --//
847            result.append('S', 'X');
848            index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
849        } else if (contains(value, index, 2, "SC")) {
850            index = handleSC(value, result, index);
851        } else {
852            if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
853                //-- french for example "resnais", "artois" --//
854                result.appendAlternate('S');
855            } else {
856                result.append('S');
857            }
858            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
859        }
860        return index;
861    }
862
863    /**
864     * Handles 'SC' cases.
865     */
866    private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
867        if (charAt(value, index + 2) == 'H') {
868            //-- Schlesinger's rule --//
869            if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
870                //-- Dutch origin, for example "school", "schooner" --//
871                if (contains(value, index + 3, 2, "ER", "EN")) {
872                    //-- "schermerhorn", "schenker" --//
873                    result.append("X", "SK");
874                } else {
875                    result.append("SK");
876                }
877            } else if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
878                result.append('X', 'S');
879            } else {
880                result.append('X');
881            }
882        } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
883            result.append('S');
884        } else {
885            result.append("SK");
886        }
887        return index + 3;
888    }
889
890    /**
891     * Handles 'T' cases.
892     */
893    private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
894        if (contains(value, index, 4, "TION") || contains(value, index, 3, "TIA", "TCH")) {
895            result.append('X');
896            index += 3;
897        } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
898            if (contains(value, index + 2, 2, "OM", "AM") ||
899                //-- special case "thomas", "thames" or germanic --//
900                contains(value, 0, 4, "VAN ", "VON ") ||
901                contains(value, 0, 3, "SCH")) {
902                result.append('T');
903            } else {
904                result.append('0', 'T');
905            }
906            index += 2;
907        } else {
908            result.append('T');
909            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
910        }
911        return index;
912    }
913
914    /**
915     * Handles 'W' cases.
916     */
917    private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
918        if (contains(value, index, 2, "WR")) {
919            //-- can also be in middle of word --//
920            result.append('R');
921            index += 2;
922        } else if (index == 0 && (isVowel(charAt(value, index + 1)) ||
923                           contains(value, index, 2, "WH"))) {
924            if (isVowel(charAt(value, index + 1))) {
925                //-- Wasserman should match Vasserman --//
926                result.append('A', 'F');
927            } else {
928                //-- need Uomo to match Womo --//
929                result.append('A');
930            }
931            index++;
932        } else if (index == value.length() - 1 && isVowel(charAt(value, index - 1)) ||
933                   contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
934                   contains(value, 0, 3, "SCH")) {
935            //-- Arnow should match Arnoff --//
936            result.appendAlternate('F');
937            index++;
938        } else if (contains(value, index, 4, "WICZ", "WITZ")) {
939            //-- Polish for example "filipowicz" --//
940            result.append("TS", "FX");
941            index += 4;
942        } else {
943            index++;
944        }
945        return index;
946    }
947
948    /**
949     * Handles 'X' cases.
950     */
951    private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
952        if (index == 0) {
953            result.append('S');
954            index++;
955        } else {
956            if (!(index == value.length() - 1 &&
957                  (contains(value, index - 3, 3, "IAU", "EAU") ||
958                   contains(value, index - 2, 2, "AU", "OU")))) {
959                //-- French for example breaux --//
960                result.append("KS");
961            }
962            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
963        }
964        return index;
965    }
966
967    /**
968     * Handles 'Z' cases.
969     */
970    private int handleZ(final String value, final DoubleMetaphoneResult result, int index, final boolean slavoGermanic) {
971        if (charAt(value, index + 1) == 'H') {
972            //-- Chinese pinyin for example "zhao" or Angelina "Zhang" --//
973            result.append('J');
974            index += 2;
975        } else {
976            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
977                slavoGermanic && index > 0 && charAt(value, index - 1) != 'T') {
978                result.append("S", "TS");
979            } else {
980                result.append('S');
981            }
982            index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
983        }
984        return index;
985    }
986
987    /**
988     * Tests whether the Double Metaphone values of two {@code String} values
989     * are equal.
990     *
991     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
992     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
993     * @return {@code true} if the encoded {@code String}s are equal;
994     *          {@code false} otherwise.
995     * @see #isDoubleMetaphoneEqual(String,String,boolean)
996     */
997    public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
998        return isDoubleMetaphoneEqual(value1, value2, false);
999    }
1000
1001    /**
1002     * Tests whether the Double Metaphone values of two {@code String} values
1003     * are equal, optionally using the alternate value.
1004     *
1005     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
1006     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
1007     * @param alternate use the alternate value if {@code true}.
1008     * @return {@code true} if the encoded {@code String}s are equal;
1009     *          {@code false} otherwise.
1010     */
1011    public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
1012        return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
1013    }
1014
1015    /**
1016     * Tests whether or not the value starts with a silent letter.  It will
1017     * return {@code true} if the value starts with any of 'GN', 'KN',
1018     * 'PN', 'WR' or 'PS'.
1019     */
1020    private boolean isSilentStart(final String value) {
1021        boolean result = false;
1022        for (final String element : SILENT_START) {
1023            if (value.startsWith(element)) {
1024                result = true;
1025                break;
1026            }
1027        }
1028        return result;
1029    }
1030
1031    /**
1032     * Tests whether or not a value is of slavo-germanic origin. A value is
1033     * of Slavo-Germanic origin if it contains any of 'W', 'K', 'CZ', or 'WITZ'.
1034     */
1035    private boolean isSlavoGermanic(final String value) {
1036        return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
1037                value.contains("CZ") || value.contains("WITZ");
1038    }
1039
1040    /**
1041     * Tests whether or not a character is a vowel or not.
1042     */
1043    private boolean isVowel(final char ch) {
1044        return VOWELS.indexOf(ch) != -1;
1045    }
1046
1047    /**
1048     * Sets the maxCodeLen.
1049     *
1050     * @param maxCodeLen The maxCodeLen to set.
1051     */
1052    public void setMaxCodeLen(final int maxCodeLen) {
1053        this.maxCodeLen = maxCodeLen;
1054    }
1055}