001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import org.apache.commons.codec.EncoderException;
021import org.apache.commons.codec.StringEncoder;
022import org.apache.commons.codec.binary.StringUtils;
023
024/**
025 * Encodes a string into a double metaphone value. This Implementation is based on the algorithm by <CITE>Lawrence
026 * Philips</CITE>.
027 * <p>
028 * This class is conditionally thread-safe. The instance field {@link #maxCodeLen} is mutable
029 * {@link #setMaxCodeLen(int)} but is not volatile, and accesses are not synchronized. If an instance of the class is
030 * shared between threads, the caller needs to ensure that suitable synchronization is used to ensure safe publication
031 * of the value between threads, and must not invoke {@link #setMaxCodeLen(int)} after initial setup.
032 *
033 * @see <a href="http://drdobbs.com/184401251?pgno=2">Original Article</a>
034 * @see <a href="http://en.wikipedia.org/wiki/Metaphone">http://en.wikipedia.org/wiki/Metaphone</a>
035 *
036 * @version $Id: DoubleMetaphone.java 1634417 2014-10-27 00:42:28Z ggregory $
037 */
038public class DoubleMetaphone implements StringEncoder {
039
040    /**
041     * "Vowels" to test for
042     */
043    private static final String VOWELS = "AEIOUY";
044
045    /**
046     * Prefixes when present which are not pronounced
047     */
048    private static final String[] SILENT_START =
049        { "GN", "KN", "PN", "WR", "PS" };
050    private static final String[] L_R_N_M_B_H_F_V_W_SPACE =
051        { "L", "R", "N", "M", "B", "H", "F", "V", "W", " " };
052    private static final String[] ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER =
053        { "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER" };
054    private static final String[] L_T_K_S_N_M_B_Z =
055        { "L", "T", "K", "S", "N", "M", "B", "Z" };
056
057    /**
058     * Maximum length of an encoding, default is 4
059     */
060    private int maxCodeLen = 4;
061
062    /**
063     * Creates an instance of this DoubleMetaphone encoder
064     */
065    public DoubleMetaphone() {
066        super();
067    }
068
069    /**
070     * Encode a value with Double Metaphone.
071     *
072     * @param value String to encode
073     * @return an encoded string
074     */
075    public String doubleMetaphone(final String value) {
076        return doubleMetaphone(value, false);
077    }
078
079    /**
080     * Encode a value with Double Metaphone, optionally using the alternate encoding.
081     *
082     * @param value String to encode
083     * @param alternate use alternate encode
084     * @return an encoded string
085     */
086    public String doubleMetaphone(String value, final boolean alternate) {
087        value = cleanInput(value);
088        if (value == null) {
089            return null;
090        }
091
092        final boolean slavoGermanic = isSlavoGermanic(value);
093        int index = isSilentStart(value) ? 1 : 0;
094
095        final DoubleMetaphoneResult result = new DoubleMetaphoneResult(this.getMaxCodeLen());
096
097        while (!result.isComplete() && index <= value.length() - 1) {
098            switch (value.charAt(index)) {
099            case 'A':
100            case 'E':
101            case 'I':
102            case 'O':
103            case 'U':
104            case 'Y':
105                index = handleAEIOUY(result, index);
106                break;
107            case 'B':
108                result.append('P');
109                index = charAt(value, index + 1) == 'B' ? index + 2 : index + 1;
110                break;
111            case '\u00C7':
112                // A C with a Cedilla
113                result.append('S');
114                index++;
115                break;
116            case 'C':
117                index = handleC(value, result, index);
118                break;
119            case 'D':
120                index = handleD(value, result, index);
121                break;
122            case 'F':
123                result.append('F');
124                index = charAt(value, index + 1) == 'F' ? index + 2 : index + 1;
125                break;
126            case 'G':
127                index = handleG(value, result, index, slavoGermanic);
128                break;
129            case 'H':
130                index = handleH(value, result, index);
131                break;
132            case 'J':
133                index = handleJ(value, result, index, slavoGermanic);
134                break;
135            case 'K':
136                result.append('K');
137                index = charAt(value, index + 1) == 'K' ? index + 2 : index + 1;
138                break;
139            case 'L':
140                index = handleL(value, result, index);
141                break;
142            case 'M':
143                result.append('M');
144                index = conditionM0(value, index) ? index + 2 : index + 1;
145                break;
146            case 'N':
147                result.append('N');
148                index = charAt(value, index + 1) == 'N' ? index + 2 : index + 1;
149                break;
150            case '\u00D1':
151                // N with a tilde (spanish ene)
152                result.append('N');
153                index++;
154                break;
155            case 'P':
156                index = handleP(value, result, index);
157                break;
158            case 'Q':
159                result.append('K');
160                index = charAt(value, index + 1) == 'Q' ? index + 2 : index + 1;
161                break;
162            case 'R':
163                index = handleR(value, result, index, slavoGermanic);
164                break;
165            case 'S':
166                index = handleS(value, result, index, slavoGermanic);
167                break;
168            case 'T':
169                index = handleT(value, result, index);
170                break;
171            case 'V':
172                result.append('F');
173                index = charAt(value, index + 1) == 'V' ? index + 2 : index + 1;
174                break;
175            case 'W':
176                index = handleW(value, result, index);
177                break;
178            case 'X':
179                index = handleX(value, result, index);
180                break;
181            case 'Z':
182                index = handleZ(value, result, index, slavoGermanic);
183                break;
184            default:
185                index++;
186                break;
187            }
188        }
189
190        return alternate ? result.getAlternate() : result.getPrimary();
191    }
192
193    /**
194     * Encode the value using DoubleMetaphone.  It will only work if
195     * <code>obj</code> is a <code>String</code> (like <code>Metaphone</code>).
196     *
197     * @param obj Object to encode (should be of type String)
198     * @return An encoded Object (will be of type String)
199     * @throws EncoderException encode parameter is not of type String
200     */
201    @Override
202    public Object encode(final Object obj) throws EncoderException {
203        if (!(obj instanceof String)) {
204            throw new EncoderException("DoubleMetaphone encode parameter is not of type String");
205        }
206        return doubleMetaphone((String) obj);
207    }
208
209    /**
210     * Encode the value using DoubleMetaphone.
211     *
212     * @param value String to encode
213     * @return An encoded String
214     */
215    @Override
216    public String encode(final String value) {
217        return doubleMetaphone(value);
218    }
219
220    /**
221     * Check if the Double Metaphone values of two <code>String</code> values
222     * are equal.
223     *
224     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
225     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
226     * @return <code>true</code> if the encoded <code>String</code>s are equal;
227     *          <code>false</code> otherwise.
228     * @see #isDoubleMetaphoneEqual(String,String,boolean)
229     */
230    public boolean isDoubleMetaphoneEqual(final String value1, final String value2) {
231        return isDoubleMetaphoneEqual(value1, value2, false);
232    }
233
234    /**
235     * Check if the Double Metaphone values of two <code>String</code> values
236     * are equal, optionally using the alternate value.
237     *
238     * @param value1 The left-hand side of the encoded {@link String#equals(Object)}.
239     * @param value2 The right-hand side of the encoded {@link String#equals(Object)}.
240     * @param alternate use the alternate value if <code>true</code>.
241     * @return <code>true</code> if the encoded <code>String</code>s are equal;
242     *          <code>false</code> otherwise.
243     */
244    public boolean isDoubleMetaphoneEqual(final String value1, final String value2, final boolean alternate) {
245        return StringUtils.equals(doubleMetaphone(value1, alternate), doubleMetaphone(value2, alternate));
246    }
247
248    /**
249     * Returns the maxCodeLen.
250     * @return int
251     */
252    public int getMaxCodeLen() {
253        return this.maxCodeLen;
254    }
255
256    /**
257     * Sets the maxCodeLen.
258     * @param maxCodeLen The maxCodeLen to set
259     */
260    public void setMaxCodeLen(final int maxCodeLen) {
261        this.maxCodeLen = maxCodeLen;
262    }
263
264    //-- BEGIN HANDLERS --//
265
266    /**
267     * Handles 'A', 'E', 'I', 'O', 'U', and 'Y' cases.
268     */
269    private int handleAEIOUY(final DoubleMetaphoneResult result, final int index) {
270        if (index == 0) {
271            result.append('A');
272        }
273        return index + 1;
274    }
275
276    /**
277     * Handles 'C' cases.
278     */
279    private int handleC(final String value, final DoubleMetaphoneResult result, int index) {
280        if (conditionC0(value, index)) {  // very confusing, moved out
281            result.append('K');
282            index += 2;
283        } else if (index == 0 && contains(value, index, 6, "CAESAR")) {
284            result.append('S');
285            index += 2;
286        } else if (contains(value, index, 2, "CH")) {
287            index = handleCH(value, result, index);
288        } else if (contains(value, index, 2, "CZ") &&
289                   !contains(value, index - 2, 4, "WICZ")) {
290            //-- "Czerny" --//
291            result.append('S', 'X');
292            index += 2;
293        } else if (contains(value, index + 1, 3, "CIA")) {
294            //-- "focaccia" --//
295            result.append('X');
296            index += 3;
297        } else if (contains(value, index, 2, "CC") &&
298                   !(index == 1 && charAt(value, 0) == 'M')) {
299            //-- double "cc" but not "McClelland" --//
300            return handleCC(value, result, index);
301        } else if (contains(value, index, 2, "CK", "CG", "CQ")) {
302            result.append('K');
303            index += 2;
304        } else if (contains(value, index, 2, "CI", "CE", "CY")) {
305            //-- Italian vs. English --//
306            if (contains(value, index, 3, "CIO", "CIE", "CIA")) {
307                result.append('S', 'X');
308            } else {
309                result.append('S');
310            }
311            index += 2;
312        } else {
313            result.append('K');
314            if (contains(value, index + 1, 2, " C", " Q", " G")) {
315                //-- Mac Caffrey, Mac Gregor --//
316                index += 3;
317            } else if (contains(value, index + 1, 1, "C", "K", "Q") &&
318                       !contains(value, index + 1, 2, "CE", "CI")) {
319                index += 2;
320            } else {
321                index++;
322            }
323        }
324
325        return index;
326    }
327
328    /**
329     * Handles 'CC' cases.
330     */
331    private int handleCC(final String value, final DoubleMetaphoneResult result, int index) {
332        if (contains(value, index + 2, 1, "I", "E", "H") &&
333            !contains(value, index + 2, 2, "HU")) {
334            //-- "bellocchio" but not "bacchus" --//
335            if ((index == 1 && charAt(value, index - 1) == 'A') ||
336                contains(value, index - 1, 5, "UCCEE", "UCCES")) {
337                //-- "accident", "accede", "succeed" --//
338                result.append("KS");
339            } else {
340                //-- "bacci", "bertucci", other Italian --//
341                result.append('X');
342            }
343            index += 3;
344        } else {    // Pierce's rule
345            result.append('K');
346            index += 2;
347        }
348
349        return index;
350    }
351
352    /**
353     * Handles 'CH' cases.
354     */
355    private int handleCH(final String value, final DoubleMetaphoneResult result, final int index) {
356        if (index > 0 && contains(value, index, 4, "CHAE")) {   // Michael
357            result.append('K', 'X');
358            return index + 2;
359        } else if (conditionCH0(value, index)) {
360            //-- Greek roots ("chemistry", "chorus", etc.) --//
361            result.append('K');
362            return index + 2;
363        } else if (conditionCH1(value, index)) {
364            //-- Germanic, Greek, or otherwise 'ch' for 'kh' sound --//
365            result.append('K');
366            return index + 2;
367        } else {
368            if (index > 0) {
369                if (contains(value, 0, 2, "MC")) {
370                    result.append('K');
371                } else {
372                    result.append('X', 'K');
373                }
374            } else {
375                result.append('X');
376            }
377            return index + 2;
378        }
379    }
380
381    /**
382     * Handles 'D' cases.
383     */
384    private int handleD(final String value, final DoubleMetaphoneResult result, int index) {
385        if (contains(value, index, 2, "DG")) {
386            //-- "Edge" --//
387            if (contains(value, index + 2, 1, "I", "E", "Y")) {
388                result.append('J');
389                index += 3;
390                //-- "Edgar" --//
391            } else {
392                result.append("TK");
393                index += 2;
394            }
395        } else if (contains(value, index, 2, "DT", "DD")) {
396            result.append('T');
397            index += 2;
398        } else {
399            result.append('T');
400            index++;
401        }
402        return index;
403    }
404
405    /**
406     * Handles 'G' cases.
407     */
408    private int handleG(final String value, final DoubleMetaphoneResult result, int index,
409                        final boolean slavoGermanic) {
410        if (charAt(value, index + 1) == 'H') {
411            index = handleGH(value, result, index);
412        } else if (charAt(value, index + 1) == 'N') {
413            if (index == 1 && isVowel(charAt(value, 0)) && !slavoGermanic) {
414                result.append("KN", "N");
415            } else if (!contains(value, index + 2, 2, "EY") &&
416                       charAt(value, index + 1) != 'Y' && !slavoGermanic) {
417                result.append("N", "KN");
418            } else {
419                result.append("KN");
420            }
421            index = index + 2;
422        } else if (contains(value, index + 1, 2, "LI") && !slavoGermanic) {
423            result.append("KL", "L");
424            index += 2;
425        } else if (index == 0 &&
426                   (charAt(value, index + 1) == 'Y' ||
427                    contains(value, index + 1, 2, ES_EP_EB_EL_EY_IB_IL_IN_IE_EI_ER))) {
428            //-- -ges-, -gep-, -gel-, -gie- at beginning --//
429            result.append('K', 'J');
430            index += 2;
431        } else if ((contains(value, index + 1, 2, "ER") ||
432                    charAt(value, index + 1) == 'Y') &&
433                   !contains(value, 0, 6, "DANGER", "RANGER", "MANGER") &&
434                   !contains(value, index - 1, 1, "E", "I") &&
435                   !contains(value, index - 1, 3, "RGY", "OGY")) {
436            //-- -ger-, -gy- --//
437            result.append('K', 'J');
438            index += 2;
439        } else if (contains(value, index + 1, 1, "E", "I", "Y") ||
440                   contains(value, index - 1, 4, "AGGI", "OGGI")) {
441            //-- Italian "biaggi" --//
442            if (contains(value, 0 ,4, "VAN ", "VON ") ||
443                contains(value, 0, 3, "SCH") ||
444                contains(value, index + 1, 2, "ET")) {
445                //-- obvious germanic --//
446                result.append('K');
447            } else if (contains(value, index + 1, 3, "IER")) {
448                result.append('J');
449            } else {
450                result.append('J', 'K');
451            }
452            index += 2;
453        } else if (charAt(value, index + 1) == 'G') {
454            index += 2;
455            result.append('K');
456        } else {
457            index++;
458            result.append('K');
459        }
460        return index;
461    }
462
463    /**
464     * Handles 'GH' cases.
465     */
466    private int handleGH(final String value, final DoubleMetaphoneResult result, int index) {
467        if (index > 0 && !isVowel(charAt(value, index - 1))) {
468            result.append('K');
469            index += 2;
470        } else if (index == 0) {
471            if (charAt(value, index + 2) == 'I') {
472                result.append('J');
473            } else {
474                result.append('K');
475            }
476            index += 2;
477        } else if ((index > 1 && contains(value, index - 2, 1, "B", "H", "D")) ||
478                   (index > 2 && contains(value, index - 3, 1, "B", "H", "D")) ||
479                   (index > 3 && contains(value, index - 4, 1, "B", "H"))) {
480            //-- Parker's rule (with some further refinements) - "hugh"
481            index += 2;
482        } else {
483            if (index > 2 && charAt(value, index - 1) == 'U' &&
484                contains(value, index - 3, 1, "C", "G", "L", "R", "T")) {
485                //-- "laugh", "McLaughlin", "cough", "gough", "rough", "tough"
486                result.append('F');
487            } else if (index > 0 && charAt(value, index - 1) != 'I') {
488                result.append('K');
489            }
490            index += 2;
491        }
492        return index;
493    }
494
495    /**
496     * Handles 'H' cases.
497     */
498    private int handleH(final String value, final DoubleMetaphoneResult result, int index) {
499        //-- only keep if first & before vowel or between 2 vowels --//
500        if ((index == 0 || isVowel(charAt(value, index - 1))) &&
501            isVowel(charAt(value, index + 1))) {
502            result.append('H');
503            index += 2;
504            //-- also takes car of "HH" --//
505        } else {
506            index++;
507        }
508        return index;
509    }
510
511    /**
512     * Handles 'J' cases.
513     */
514    private int handleJ(final String value, final DoubleMetaphoneResult result, int index,
515                        final boolean slavoGermanic) {
516        if (contains(value, index, 4, "JOSE") || contains(value, 0, 4, "SAN ")) {
517                //-- obvious Spanish, "Jose", "San Jacinto" --//
518                if ((index == 0 && (charAt(value, index + 4) == ' ') ||
519                     value.length() == 4) || contains(value, 0, 4, "SAN ")) {
520                    result.append('H');
521                } else {
522                    result.append('J', 'H');
523                }
524                index++;
525            } else {
526                if (index == 0 && !contains(value, index, 4, "JOSE")) {
527                    result.append('J', 'A');
528                } else if (isVowel(charAt(value, index - 1)) && !slavoGermanic &&
529                           (charAt(value, index + 1) == 'A' || charAt(value, index + 1) == 'O')) {
530                    result.append('J', 'H');
531                } else if (index == value.length() - 1) {
532                    result.append('J', ' ');
533                } else if (!contains(value, index + 1, 1, L_T_K_S_N_M_B_Z) &&
534                           !contains(value, index - 1, 1, "S", "K", "L")) {
535                    result.append('J');
536                }
537
538                if (charAt(value, index + 1) == 'J') {
539                    index += 2;
540                } else {
541                    index++;
542                }
543            }
544        return index;
545    }
546
547    /**
548     * Handles 'L' cases.
549     */
550    private int handleL(final String value, final DoubleMetaphoneResult result, int index) {
551        if (charAt(value, index + 1) == 'L') {
552            if (conditionL0(value, index)) {
553                result.appendPrimary('L');
554            } else {
555                result.append('L');
556            }
557            index += 2;
558        } else {
559            index++;
560            result.append('L');
561        }
562        return index;
563    }
564
565    /**
566     * Handles 'P' cases.
567     */
568    private int handleP(final String value, final DoubleMetaphoneResult result, int index) {
569        if (charAt(value, index + 1) == 'H') {
570            result.append('F');
571            index += 2;
572        } else {
573            result.append('P');
574            index = contains(value, index + 1, 1, "P", "B") ? index + 2 : index + 1;
575        }
576        return index;
577    }
578
579    /**
580     * Handles 'R' cases.
581     */
582    private int handleR(final String value, final DoubleMetaphoneResult result, final int index,
583                        final boolean slavoGermanic) {
584        if (index == value.length() - 1 && !slavoGermanic &&
585            contains(value, index - 2, 2, "IE") &&
586            !contains(value, index - 4, 2, "ME", "MA")) {
587            result.appendAlternate('R');
588        } else {
589            result.append('R');
590        }
591        return charAt(value, index + 1) == 'R' ? index + 2 : index + 1;
592    }
593
594    /**
595     * Handles 'S' cases.
596     */
597    private int handleS(final String value, final DoubleMetaphoneResult result, int index,
598                        final boolean slavoGermanic) {
599        if (contains(value, index - 1, 3, "ISL", "YSL")) {
600            //-- special cases "island", "isle", "carlisle", "carlysle" --//
601            index++;
602        } else if (index == 0 && contains(value, index, 5, "SUGAR")) {
603            //-- special case "sugar-" --//
604            result.append('X', 'S');
605            index++;
606        } else if (contains(value, index, 2, "SH")) {
607            if (contains(value, index + 1, 4, "HEIM", "HOEK", "HOLM", "HOLZ")) {
608                //-- germanic --//
609                result.append('S');
610            } else {
611                result.append('X');
612            }
613            index += 2;
614        } else if (contains(value, index, 3, "SIO", "SIA") || contains(value, index, 4, "SIAN")) {
615            //-- Italian and Armenian --//
616            if (slavoGermanic) {
617                result.append('S');
618            } else {
619                result.append('S', 'X');
620            }
621            index += 3;
622        } else if ((index == 0 && contains(value, index + 1, 1, "M", "N", "L", "W")) ||
623                   contains(value, index + 1, 1, "Z")) {
624            //-- german & anglicisations, e.g. "smith" match "schmidt" //
625            // "snider" match "schneider" --//
626            //-- also, -sz- in slavic language although in hungarian it //
627            //   is pronounced "s" --//
628            result.append('S', 'X');
629            index = contains(value, index + 1, 1, "Z") ? index + 2 : index + 1;
630        } else if (contains(value, index, 2, "SC")) {
631            index = handleSC(value, result, index);
632        } else {
633            if (index == value.length() - 1 && contains(value, index - 2, 2, "AI", "OI")) {
634                //-- french e.g. "resnais", "artois" --//
635                result.appendAlternate('S');
636            } else {
637                result.append('S');
638            }
639            index = contains(value, index + 1, 1, "S", "Z") ? index + 2 : index + 1;
640        }
641        return index;
642    }
643
644    /**
645     * Handles 'SC' cases.
646     */
647    private int handleSC(final String value, final DoubleMetaphoneResult result, final int index) {
648        if (charAt(value, index + 2) == 'H') {
649            //-- Schlesinger's rule --//
650            if (contains(value, index + 3, 2, "OO", "ER", "EN", "UY", "ED", "EM")) {
651                //-- Dutch origin, e.g. "school", "schooner" --//
652                if (contains(value, index + 3, 2, "ER", "EN")) {
653                    //-- "schermerhorn", "schenker" --//
654                    result.append("X", "SK");
655                } else {
656                    result.append("SK");
657                }
658            } else {
659                if (index == 0 && !isVowel(charAt(value, 3)) && charAt(value, 3) != 'W') {
660                    result.append('X', 'S');
661                } else {
662                    result.append('X');
663                }
664            }
665        } else if (contains(value, index + 2, 1, "I", "E", "Y")) {
666            result.append('S');
667        } else {
668            result.append("SK");
669        }
670        return index + 3;
671    }
672
673    /**
674     * Handles 'T' cases.
675     */
676    private int handleT(final String value, final DoubleMetaphoneResult result, int index) {
677        if (contains(value, index, 4, "TION")) {
678            result.append('X');
679            index += 3;
680        } else if (contains(value, index, 3, "TIA", "TCH")) {
681            result.append('X');
682            index += 3;
683        } else if (contains(value, index, 2, "TH") || contains(value, index, 3, "TTH")) {
684            if (contains(value, index + 2, 2, "OM", "AM") ||
685                //-- special case "thomas", "thames" or germanic --//
686                contains(value, 0, 4, "VAN ", "VON ") ||
687                contains(value, 0, 3, "SCH")) {
688                result.append('T');
689            } else {
690                result.append('0', 'T');
691            }
692            index += 2;
693        } else {
694            result.append('T');
695            index = contains(value, index + 1, 1, "T", "D") ? index + 2 : index + 1;
696        }
697        return index;
698    }
699
700    /**
701     * Handles 'W' cases.
702     */
703    private int handleW(final String value, final DoubleMetaphoneResult result, int index) {
704        if (contains(value, index, 2, "WR")) {
705            //-- can also be in middle of word --//
706            result.append('R');
707            index += 2;
708        } else {
709            if (index == 0 && (isVowel(charAt(value, index + 1)) ||
710                               contains(value, index, 2, "WH"))) {
711                if (isVowel(charAt(value, index + 1))) {
712                    //-- Wasserman should match Vasserman --//
713                    result.append('A', 'F');
714                } else {
715                    //-- need Uomo to match Womo --//
716                    result.append('A');
717                }
718                index++;
719            } else if ((index == value.length() - 1 && isVowel(charAt(value, index - 1))) ||
720                       contains(value, index - 1, 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY") ||
721                       contains(value, 0, 3, "SCH")) {
722                //-- Arnow should match Arnoff --//
723                result.appendAlternate('F');
724                index++;
725            } else if (contains(value, index, 4, "WICZ", "WITZ")) {
726                //-- Polish e.g. "filipowicz" --//
727                result.append("TS", "FX");
728                index += 4;
729            } else {
730                index++;
731            }
732        }
733        return index;
734    }
735
736    /**
737     * Handles 'X' cases.
738     */
739    private int handleX(final String value, final DoubleMetaphoneResult result, int index) {
740        if (index == 0) {
741            result.append('S');
742            index++;
743        } else {
744            if (!((index == value.length() - 1) &&
745                  (contains(value, index - 3, 3, "IAU", "EAU") ||
746                   contains(value, index - 2, 2, "AU", "OU")))) {
747                //-- French e.g. breaux --//
748                result.append("KS");
749            }
750            index = contains(value, index + 1, 1, "C", "X") ? index + 2 : index + 1;
751        }
752        return index;
753    }
754
755    /**
756     * Handles 'Z' cases.
757     */
758    private int handleZ(final String value, final DoubleMetaphoneResult result, int index,
759                        final boolean slavoGermanic) {
760        if (charAt(value, index + 1) == 'H') {
761            //-- Chinese pinyin e.g. "zhao" or Angelina "Zhang" --//
762            result.append('J');
763            index += 2;
764        } else {
765            if (contains(value, index + 1, 2, "ZO", "ZI", "ZA") ||
766                (slavoGermanic && (index > 0 && charAt(value, index - 1) != 'T'))) {
767                result.append("S", "TS");
768            } else {
769                result.append('S');
770            }
771            index = charAt(value, index + 1) == 'Z' ? index + 2 : index + 1;
772        }
773        return index;
774    }
775
776    //-- BEGIN CONDITIONS --//
777
778    /**
779     * Complex condition 0 for 'C'.
780     */
781    private boolean conditionC0(final String value, final int index) {
782        if (contains(value, index, 4, "CHIA")) {
783            return true;
784        } else if (index <= 1) {
785            return false;
786        } else if (isVowel(charAt(value, index - 2))) {
787            return false;
788        } else if (!contains(value, index - 1, 3, "ACH")) {
789            return false;
790        } else {
791            final char c = charAt(value, index + 2);
792            return (c != 'I' && c != 'E') ||
793                    contains(value, index - 2, 6, "BACHER", "MACHER");
794        }
795    }
796
797    /**
798     * Complex condition 0 for 'CH'.
799     */
800    private boolean conditionCH0(final String value, final int index) {
801        if (index != 0) {
802            return false;
803        } else if (!contains(value, index + 1, 5, "HARAC", "HARIS") &&
804                   !contains(value, index + 1, 3, "HOR", "HYM", "HIA", "HEM")) {
805            return false;
806        } else if (contains(value, 0, 5, "CHORE")) {
807            return false;
808        } else {
809            return true;
810        }
811    }
812
813    /**
814     * Complex condition 1 for 'CH'.
815     */
816    private boolean conditionCH1(final String value, final int index) {
817        return ((contains(value, 0, 4, "VAN ", "VON ") || contains(value, 0, 3, "SCH")) ||
818                contains(value, index - 2, 6, "ORCHES", "ARCHIT", "ORCHID") ||
819                contains(value, index + 2, 1, "T", "S") ||
820                ((contains(value, index - 1, 1, "A", "O", "U", "E") || index == 0) &&
821                 (contains(value, index + 2, 1, L_R_N_M_B_H_F_V_W_SPACE) || index + 1 == value.length() - 1)));
822    }
823
824    /**
825     * Complex condition 0 for 'L'.
826     */
827    private boolean conditionL0(final String value, final int index) {
828        if (index == value.length() - 3 &&
829            contains(value, index - 1, 4, "ILLO", "ILLA", "ALLE")) {
830            return true;
831        } else if ((contains(value, value.length() - 2, 2, "AS", "OS") ||
832                    contains(value, value.length() - 1, 1, "A", "O")) &&
833                   contains(value, index - 1, 4, "ALLE")) {
834            return true;
835        } else {
836            return false;
837        }
838    }
839
840    /**
841     * Complex condition 0 for 'M'.
842     */
843    private boolean conditionM0(final String value, final int index) {
844        if (charAt(value, index + 1) == 'M') {
845            return true;
846        }
847        return contains(value, index - 1, 3, "UMB") &&
848               ((index + 1) == value.length() - 1 || contains(value, index + 2, 2, "ER"));
849    }
850
851    //-- BEGIN HELPER FUNCTIONS --//
852
853    /**
854     * Determines whether or not a value is of slavo-germanic origin. A value is
855     * of slavo-germanic origin if it contians any of 'W', 'K', 'CZ', or 'WITZ'.
856     */
857    private boolean isSlavoGermanic(final String value) {
858        return value.indexOf('W') > -1 || value.indexOf('K') > -1 ||
859            value.indexOf("CZ") > -1 || value.indexOf("WITZ") > -1;
860    }
861
862    /**
863     * Determines whether or not a character is a vowel or not
864     */
865    private boolean isVowel(final char ch) {
866        return VOWELS.indexOf(ch) != -1;
867    }
868
869    /**
870     * Determines whether or not the value starts with a silent letter.  It will
871     * return <code>true</code> if the value starts with any of 'GN', 'KN',
872     * 'PN', 'WR' or 'PS'.
873     */
874    private boolean isSilentStart(final String value) {
875        boolean result = false;
876        for (final String element : SILENT_START) {
877            if (value.startsWith(element)) {
878                result = true;
879                break;
880            }
881        }
882        return result;
883    }
884
885    /**
886     * Cleans the input.
887     */
888    private String cleanInput(String input) {
889        if (input == null) {
890            return null;
891        }
892        input = input.trim();
893        if (input.length() == 0) {
894            return null;
895        }
896        return input.toUpperCase(java.util.Locale.ENGLISH);
897    }
898
899    /*
900     * Gets the character at index <code>index</code> if available, otherwise
901     * it returns <code>Character.MIN_VALUE</code> so that there is some sort
902     * of a default.
903     */
904    protected char charAt(final String value, final int index) {
905        if (index < 0 || index >= value.length()) {
906            return Character.MIN_VALUE;
907        }
908        return value.charAt(index);
909    }
910
911    /*
912     * Determines whether <code>value</code> contains any of the criteria starting at index <code>start</code> and
913     * matching up to length <code>length</code>.
914     */
915    protected static boolean contains(final String value, final int start, final int length,
916                                      final String... criteria) {
917        boolean result = false;
918        if (start >= 0 && start + length <= value.length()) {
919            final String target = value.substring(start, start + length);
920
921            for (final String element : criteria) {
922                if (target.equals(element)) {
923                    result = true;
924                    break;
925                }
926            }
927        }
928        return result;
929    }
930
931    //-- BEGIN INNER CLASSES --//
932
933    /**
934     * Inner class for storing results, since there is the optional alternate encoding.
935     */
936    public class DoubleMetaphoneResult {
937
938        private final StringBuilder primary = new StringBuilder(getMaxCodeLen());
939        private final StringBuilder alternate = new StringBuilder(getMaxCodeLen());
940        private final int maxLength;
941
942        public DoubleMetaphoneResult(final int maxLength) {
943            this.maxLength = maxLength;
944        }
945
946        public void append(final char value) {
947            appendPrimary(value);
948            appendAlternate(value);
949        }
950
951        public void append(final char primary, final char alternate) {
952            appendPrimary(primary);
953            appendAlternate(alternate);
954        }
955
956        public void appendPrimary(final char value) {
957            if (this.primary.length() < this.maxLength) {
958                this.primary.append(value);
959            }
960        }
961
962        public void appendAlternate(final char value) {
963            if (this.alternate.length() < this.maxLength) {
964                this.alternate.append(value);
965            }
966        }
967
968        public void append(final String value) {
969            appendPrimary(value);
970            appendAlternate(value);
971        }
972
973        public void append(final String primary, final String alternate) {
974            appendPrimary(primary);
975            appendAlternate(alternate);
976        }
977
978        public void appendPrimary(final String value) {
979            final int addChars = this.maxLength - this.primary.length();
980            if (value.length() <= addChars) {
981                this.primary.append(value);
982            } else {
983                this.primary.append(value.substring(0, addChars));
984            }
985        }
986
987        public void appendAlternate(final String value) {
988            final int addChars = this.maxLength - this.alternate.length();
989            if (value.length() <= addChars) {
990                this.alternate.append(value);
991            } else {
992                this.alternate.append(value.substring(0, addChars));
993            }
994        }
995
996        public String getPrimary() {
997            return this.primary.toString();
998        }
999
1000        public String getAlternate() {
1001            return this.alternate.toString();
1002        }
1003
1004        public boolean isComplete() {
1005            return this.primary.length() >= this.maxLength &&
1006                   this.alternate.length() >= this.maxLength;
1007        }
1008    }
1009}