001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.language;
019
020import org.apache.commons.codec.EncoderException;
021import org.apache.commons.codec.StringEncoderAbstractTest;
022import org.junit.Test;
023
024/**
025 * Tests the {@code ColognePhonetic} class.
026 *
027 * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
028 *
029 */
030public class ColognePhoneticTest extends StringEncoderAbstractTest<ColognePhonetic> {
031
032    @Override
033    protected ColognePhonetic createStringEncoder() {
034        return new ColognePhonetic();
035    }
036
037    @Test
038    public void testAabjoe() throws EncoderException {
039        this.checkEncoding("01", "Aabjoe");
040    }
041
042    @Test
043    public void testAaclan() throws EncoderException {
044        this.checkEncoding("0856", "Aaclan");
045    }
046
047    /**
048     * Tests [CODEC-122]
049     *
050     * @throws EncoderException
051     */
052    @Test
053    public void testAychlmajrForCodec122() throws EncoderException {
054        this.checkEncoding("04567", "Aychlmajr");
055    }
056
057    @Test
058    public void testEdgeCases() throws EncoderException {
059        final String[][] data = {
060            {"a", "0"},
061            {"e", "0"},
062            {"i", "0"},
063            {"o", "0"},
064            {"u", "0"},
065            {"\u00E4", "0"}, // a-umlaut
066            {"\u00F6", "0"}, // o-umlaut
067            {"\u00FC", "0"}, // u-umlaut
068            {"aa", "0"},
069            {"ha", "0"},
070            {"h", ""},
071            {"aha", "0"},
072            {"b", "1"},
073            {"p", "1"},
074            {"ph", "3"},
075            {"f", "3"},
076            {"v", "3"},
077            {"w", "3"},
078            {"g", "4"},
079            {"k", "4"},
080            {"q", "4"},
081            {"x", "48"},
082            {"ax", "048"},
083            {"cx", "48"},
084            {"l", "5"},
085            {"cl", "45"},
086            {"acl", "085"},
087            {"mn", "6"},
088            {"r", "7"}};
089        this.checkEncodings(data);
090    }
091
092    @Test
093    public void testExamples() throws EncoderException {
094        final String[][] data = {
095            {"m\u00DCller", "657"}, // m�ller - why upper case U-umlaut?
096            {"schmidt", "862"},
097            {"schneider", "8627"},
098            {"fischer", "387"},
099            {"weber", "317"},
100            {"wagner", "3467"},
101            {"becker", "147"},
102            {"hoffmann", "0366"},
103            {"sch\u00C4fer", "837"}, // sch�fer - why upper case A-umlaut ?
104            {"Breschnew", "17863"},
105            {"Wikipedia", "3412"},
106            {"peter", "127"},
107            {"pharma", "376"},
108            {"m\u00f6nchengladbach", "664645214"}, // m�nchengladbach
109            {"deutsch", "28"},
110            {"deutz", "28"},
111            {"hamburg", "06174"},
112            {"hannover", "0637"},
113            {"christstollen", "478256"},
114            {"Xanthippe", "48621"},
115            {"Zacharias", "8478"},
116            {"Holzbau", "0581"},
117            {"matsch", "68"},
118            {"matz", "68"},
119            {"Arbeitsamt", "071862"},
120            {"Eberhard", "01772"},
121            {"Eberhardt", "01772"},
122            {"heithabu", "021"}};
123        this.checkEncodings(data);
124    }
125
126    @Test
127    public void testHyphen() throws EncoderException {
128        final String[][] data = {{"bergisch-gladbach", "174845214"},
129                {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // M�ller-L�denscheidt
130        this.checkEncodings(data);
131    }
132
133    @Test
134    public void testIsEncodeEquals() {
135        final String[][] data = {
136            {"Meyer", "M\u00fcller"}, // M�ller
137            {"Meyer", "Mayr"},
138            {"house", "house"},
139            {"House", "house"},
140            {"Haus", "house"},
141            {"ganz", "Gans"},
142            {"ganz", "G\u00e4nse"}, // G�nse
143            {"Miyagi", "Miyako"}};
144        for (final String[] element : data) {
145            this.getStringEncoder().isEncodeEqual(element[1], element[0]);
146        }
147    }
148
149    @Test
150    public void testVariationsMella() throws EncoderException {
151        final String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"};
152        this.checkEncodingVariations("65", data);
153    }
154
155    @Test
156    public void testVariationsMeyer() throws EncoderException {
157        final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"};
158        this.checkEncodingVariations("67", data);
159    }
160}