001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.language;
019
020 import org.apache.commons.codec.EncoderException;
021 import org.apache.commons.codec.StringEncoder;
022 import org.apache.commons.codec.StringEncoderAbstractTest;
023 import org.junit.Test;
024
025 /**
026 * Tests the {@code ColognePhonetic} class.
027 *
028 * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
029 *
030 */
031 public class ColognePhoneticTest extends StringEncoderAbstractTest {
032
033 @Override
034 protected StringEncoder createStringEncoder() {
035 return new ColognePhonetic();
036 }
037
038 @Test
039 public void testAabjoe() throws EncoderException {
040 this.checkEncoding("01", "Aabjoe");
041 }
042
043 @Test
044 public void testAaclan() throws EncoderException {
045 this.checkEncoding("0856", "Aaclan");
046 }
047
048 /**
049 * Tests [CODEC-122]
050 *
051 * @throws EncoderException
052 */
053 @Test
054 public void testAychlmajrForCodec122() throws EncoderException {
055 this.checkEncoding("04567", "Aychlmajr");
056 }
057
058 @Test
059 public void testEdgeCases() throws EncoderException {
060 String[][] data = {
061 {"a", "0"},
062 {"e", "0"},
063 {"i", "0"},
064 {"o", "0"},
065 {"u", "0"},
066 {"\u00E4", "0"}, // a-umlaut
067 {"\u00F6", "0"}, // o-umlaut
068 {"\u00FC", "0"}, // u-umlaut
069 {"aa", "0"},
070 {"ha", "0"},
071 {"h", ""},
072 {"aha", "0"},
073 {"b", "1"},
074 {"p", "1"},
075 {"ph", "3"},
076 {"f", "3"},
077 {"v", "3"},
078 {"w", "3"},
079 {"g", "4"},
080 {"k", "4"},
081 {"q", "4"},
082 {"x", "48"},
083 {"ax", "048"},
084 {"cx", "48"},
085 {"l", "5"},
086 {"cl", "45"},
087 {"acl", "085"},
088 {"mn", "6"},
089 {"r", "7"}};
090 this.checkEncodings(data);
091 }
092
093 @Test
094 public void testExamples() throws EncoderException {
095 String[][] data = {
096 {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
097 {"schmidt", "862"},
098 {"schneider", "8627"},
099 {"fischer", "387"},
100 {"weber", "317"},
101 {"wagner", "3467"},
102 {"becker", "147"},
103 {"hoffmann", "0366"},
104 {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
105 {"Breschnew", "17863"},
106 {"Wikipedia", "3412"},
107 {"peter", "127"},
108 {"pharma", "376"},
109 {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
110 {"deutsch", "28"},
111 {"deutz", "28"},
112 {"hamburg", "06174"},
113 {"hannover", "0637"},
114 {"christstollen", "478256"},
115 {"Xanthippe", "48621"},
116 {"Zacharias", "8478"},
117 {"Holzbau", "0581"},
118 {"matsch", "68"},
119 {"matz", "68"},
120 {"Arbeitsamt", "071862"},
121 {"Eberhard", "01772"},
122 {"Eberhardt", "01772"},
123 {"heithabu", "021"}};
124 this.checkEncodings(data);
125 }
126
127 @Test
128 public void testHyphen() throws EncoderException {
129 String[][] data = {{"bergisch-gladbach", "174845214"},
130 {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // Müller-Lüdenscheidt
131 this.checkEncodings(data);
132 }
133
134 @Test
135 public void testIsEncodeEquals() {
136 String[][] data = {
137 {"Meyer", "M\u00fcller"}, // Müller
138 {"Meyer", "Mayr"},
139 {"house", "house"},
140 {"House", "house"},
141 {"Haus", "house"},
142 {"ganz", "Gans"},
143 {"ganz", "G\u00e4nse"}, // Gänse
144 {"Miyagi", "Miyako"}};
145 for (String[] element : data) {
146 ((ColognePhonetic) this.getStringEncoder()).isEncodeEqual(element[1], element[0]);
147 }
148 }
149
150 @Test
151 public void testVariationsMella() throws EncoderException {
152 String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"};
153 this.checkEncodingVariations("65", data);
154 }
155
156 @Test
157 public void testVariationsMeyer() throws EncoderException {
158 String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"};
159 this.checkEncodingVariations("67", data);
160 }
161 }