001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.language;
019    
020    import org.apache.commons.codec.EncoderException;
021    import org.apache.commons.codec.StringEncoder;
022    import org.apache.commons.codec.StringEncoderAbstractTest;
023    import org.junit.Test;
024    
025    /**
026     * Tests the {@code ColognePhonetic} class.
027     *
028     * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
029     *
030     */
031    public class ColognePhoneticTest extends StringEncoderAbstractTest {
032    
033        @Override
034        protected StringEncoder createStringEncoder() {
035            return new ColognePhonetic();
036        }
037    
038        @Test
039        public void testAabjoe() throws EncoderException {
040            this.checkEncoding("01", "Aabjoe");
041        }
042    
043        @Test
044        public void testAaclan() throws EncoderException {
045            this.checkEncoding("0856", "Aaclan");
046        }
047    
048        /**
049         * Tests [CODEC-122]
050         *
051         * @throws EncoderException
052         */
053        @Test
054        public void testAychlmajrForCodec122() throws EncoderException {
055            this.checkEncoding("04567", "Aychlmajr");
056        }
057    
058        @Test
059        public void testEdgeCases() throws EncoderException {
060            String[][] data = {
061                {"a", "0"},
062                {"e", "0"},
063                {"i", "0"},
064                {"o", "0"},
065                {"u", "0"},
066                {"\u00E4", "0"}, // a-umlaut
067                {"\u00F6", "0"}, // o-umlaut
068                {"\u00FC", "0"}, // u-umlaut
069                {"aa", "0"},
070                {"ha", "0"},
071                {"h", ""},
072                {"aha", "0"},
073                {"b", "1"},
074                {"p", "1"},
075                {"ph", "3"},
076                {"f", "3"},
077                {"v", "3"},
078                {"w", "3"},
079                {"g", "4"},
080                {"k", "4"},
081                {"q", "4"},
082                {"x", "48"},
083                {"ax", "048"},
084                {"cx", "48"},
085                {"l", "5"},
086                {"cl", "45"},
087                {"acl", "085"},
088                {"mn", "6"},
089                {"r", "7"}};
090            this.checkEncodings(data);
091        }
092    
093        @Test
094        public void testExamples() throws EncoderException {
095            String[][] data = {
096                {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
097                {"schmidt", "862"},
098                {"schneider", "8627"},
099                {"fischer", "387"},
100                {"weber", "317"},
101                {"wagner", "3467"},
102                {"becker", "147"},
103                {"hoffmann", "0366"},
104                {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
105                {"Breschnew", "17863"},
106                {"Wikipedia", "3412"},
107                {"peter", "127"},
108                {"pharma", "376"},
109                {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
110                {"deutsch", "28"},
111                {"deutz", "28"},
112                {"hamburg", "06174"},
113                {"hannover", "0637"},
114                {"christstollen", "478256"},
115                {"Xanthippe", "48621"},
116                {"Zacharias", "8478"},
117                {"Holzbau", "0581"},
118                {"matsch", "68"},
119                {"matz", "68"},
120                {"Arbeitsamt", "071862"},
121                {"Eberhard", "01772"},
122                {"Eberhardt", "01772"},
123                {"heithabu", "021"}};
124            this.checkEncodings(data);
125        }
126    
127        @Test
128        public void testHyphen() throws EncoderException {
129            String[][] data = {{"bergisch-gladbach", "174845214"},
130                    {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // Müller-Lüdenscheidt
131            this.checkEncodings(data);
132        }
133    
134        @Test
135        public void testIsEncodeEquals() {
136            String[][] data = {
137                {"Meyer", "M\u00fcller"}, // Müller
138                {"Meyer", "Mayr"},
139                {"house", "house"},
140                {"House", "house"},
141                {"Haus", "house"},
142                {"ganz", "Gans"},
143                {"ganz", "G\u00e4nse"}, // Gänse
144                {"Miyagi", "Miyako"}};
145            for (String[] element : data) {
146                ((ColognePhonetic) this.getStringEncoder()).isEncodeEqual(element[1], element[0]);
147            }
148        }
149    
150        @Test
151        public void testVariationsMella() throws EncoderException {
152            String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"};
153            this.checkEncodingVariations("65", data);
154        }
155    
156        @Test
157        public void testVariationsMeyer() throws EncoderException {
158            String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"};
159            this.checkEncodingVariations("67", data);
160        }
161    }