View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoder;
22  import org.apache.commons.codec.StringEncoderAbstractTest;
23  import org.junit.Test;
24  
25  /**
26   * Tests the {@code ColognePhonetic} class.
27   *
28   * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
29   *
30   */
31  public class ColognePhoneticTest extends StringEncoderAbstractTest {
32  
33      @Override
34      protected StringEncoder createStringEncoder() {
35          return new ColognePhonetic();
36      }
37  
38      @Test
39      public void testAabjoe() throws EncoderException {
40          this.checkEncoding("01", "Aabjoe");
41      }
42  
43      @Test
44      public void testAaclan() throws EncoderException {
45          this.checkEncoding("0856", "Aaclan");
46      }
47  
48      /**
49       * Tests [CODEC-122]
50       *
51       * @throws EncoderException
52       */
53      @Test
54      public void testAychlmajrForCodec122() throws EncoderException {
55          this.checkEncoding("04567", "Aychlmajr");
56      }
57  
58      @Test
59      public void testEdgeCases() throws EncoderException {
60          String[][] data = {
61              {"a", "0"},
62              {"e", "0"},
63              {"i", "0"},
64              {"o", "0"},
65              {"u", "0"},
66              {"\u00E4", "0"}, // a-umlaut
67              {"\u00F6", "0"}, // o-umlaut
68              {"\u00FC", "0"}, // u-umlaut
69              {"aa", "0"},
70              {"ha", "0"},
71              {"h", ""},
72              {"aha", "0"},
73              {"b", "1"},
74              {"p", "1"},
75              {"ph", "3"},
76              {"f", "3"},
77              {"v", "3"},
78              {"w", "3"},
79              {"g", "4"},
80              {"k", "4"},
81              {"q", "4"},
82              {"x", "48"},
83              {"ax", "048"},
84              {"cx", "48"},
85              {"l", "5"},
86              {"cl", "45"},
87              {"acl", "085"},
88              {"mn", "6"},
89              {"r", "7"}};
90          this.checkEncodings(data);
91      }
92  
93      @Test
94      public void testExamples() throws EncoderException {
95          String[][] data = {
96              {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
97              {"schmidt", "862"},
98              {"schneider", "8627"},
99              {"fischer", "387"},
100             {"weber", "317"},
101             {"wagner", "3467"},
102             {"becker", "147"},
103             {"hoffmann", "0366"},
104             {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
105             {"Breschnew", "17863"},
106             {"Wikipedia", "3412"},
107             {"peter", "127"},
108             {"pharma", "376"},
109             {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
110             {"deutsch", "28"},
111             {"deutz", "28"},
112             {"hamburg", "06174"},
113             {"hannover", "0637"},
114             {"christstollen", "478256"},
115             {"Xanthippe", "48621"},
116             {"Zacharias", "8478"},
117             {"Holzbau", "0581"},
118             {"matsch", "68"},
119             {"matz", "68"},
120             {"Arbeitsamt", "071862"},
121             {"Eberhard", "01772"},
122             {"Eberhardt", "01772"},
123             {"heithabu", "021"}};
124         this.checkEncodings(data);
125     }
126 
127     @Test
128     public void testHyphen() throws EncoderException {
129         String[][] data = {{"bergisch-gladbach", "174845214"},
130                 {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // Müller-Lüdenscheidt
131         this.checkEncodings(data);
132     }
133 
134     @Test
135     public void testIsEncodeEquals() {
136         String[][] data = {
137             {"Meyer", "M\u00fcller"}, // Müller
138             {"Meyer", "Mayr"},
139             {"house", "house"},
140             {"House", "house"},
141             {"Haus", "house"},
142             {"ganz", "Gans"},
143             {"ganz", "G\u00e4nse"}, // Gänse
144             {"Miyagi", "Miyako"}};
145         for (String[] element : data) {
146             ((ColognePhonetic) this.getStringEncoder()).isEncodeEqual(element[1], element[0]);
147         }
148     }
149 
150     @Test
151     public void testVariationsMella() throws EncoderException {
152         String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"};
153         this.checkEncodingVariations("65", data);
154     }
155 
156     @Test
157     public void testVariationsMeyer() throws EncoderException {
158         String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"};
159         this.checkEncodingVariations("67", data);
160     }
161 }