View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.language;
19  
20  import org.apache.commons.codec.EncoderException;
21  import org.apache.commons.codec.StringEncoderAbstractTest;
22  import org.junit.Test;
23  
24  /**
25   * Tests the <code>ColognePhonetic</code> class.
26   *
27   * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p>
28   *
29   */
30  public class ColognePhoneticTest extends StringEncoderAbstractTest<ColognePhonetic> {
31  
32      @Override
33      protected ColognePhonetic createStringEncoder() {
34          return new ColognePhonetic();
35      }
36  
37      @Test
38      public void testAabjoe() throws EncoderException {
39          this.checkEncoding("01", "Aabjoe");
40      }
41  
42      @Test
43      public void testAaclan() throws EncoderException {
44          this.checkEncoding("0856", "Aaclan");
45      }
46  
47      /**
48       * Tests [CODEC-122]
49       *
50       * @throws EncoderException
51       */
52      @Test
53      public void testAychlmajrForCodec122() throws EncoderException {
54          this.checkEncoding("04567", "Aychlmajr");
55      }
56  
57      @Test
58      public void testEdgeCases() throws EncoderException {
59          final String[][] data = {
60              {"a", "0"},
61              {"e", "0"},
62              {"i", "0"},
63              {"o", "0"},
64              {"u", "0"},
65              {"\u00E4", "0"}, // a-umlaut
66              {"\u00F6", "0"}, // o-umlaut
67              {"\u00FC", "0"}, // u-umlaut
68              {"aa", "0"},
69              {"ha", "0"},
70              {"h", ""},
71              {"aha", "0"},
72              {"b", "1"},
73              {"p", "1"},
74              {"ph", "3"},
75              {"f", "3"},
76              {"v", "3"},
77              {"w", "3"},
78              {"g", "4"},
79              {"k", "4"},
80              {"q", "4"},
81              {"x", "48"},
82              {"ax", "048"},
83              {"cx", "48"},
84              {"l", "5"},
85              {"cl", "45"},
86              {"acl", "085"},
87              {"mn", "6"},
88              {"r", "7"}};
89          this.checkEncodings(data);
90      }
91  
92      @Test
93      public void testExamples() throws EncoderException {
94          final String[][] data = {
95              {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut?
96              {"schmidt", "862"},
97              {"schneider", "8627"},
98              {"fischer", "387"},
99              {"weber", "317"},
100             {"wagner", "3467"},
101             {"becker", "147"},
102             {"hoffmann", "0366"},
103             {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ?
104             {"Breschnew", "17863"},
105             {"Wikipedia", "3412"},
106             {"peter", "127"},
107             {"pharma", "376"},
108             {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach
109             {"deutsch", "28"},
110             {"deutz", "28"},
111             {"hamburg", "06174"},
112             {"hannover", "0637"},
113             {"christstollen", "478256"},
114             {"Xanthippe", "48621"},
115             {"Zacharias", "8478"},
116             {"Holzbau", "0581"},
117             {"matsch", "68"},
118             {"matz", "68"},
119             {"Arbeitsamt", "071862"},
120             {"Eberhard", "01772"},
121             {"Eberhardt", "01772"},
122             {"heithabu", "021"}};
123         this.checkEncodings(data);
124     }
125 
126     @Test
127     public void testHyphen() throws EncoderException {
128         final String[][] data = {{"bergisch-gladbach", "174845214"},
129                 {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // Müller-Lüdenscheidt
130         this.checkEncodings(data);
131     }
132 
133     @Test
134     public void testIsEncodeEquals() {
135         final String[][] data = {
136             {"Meyer", "M\u00fcller"}, // Müller
137             {"Meyer", "Mayr"},
138             {"house", "house"},
139             {"House", "house"},
140             {"Haus", "house"},
141             {"ganz", "Gans"},
142             {"ganz", "G\u00e4nse"}, // Gänse
143             {"Miyagi", "Miyako"}};
144         for (final String[] element : data) {
145             this.getStringEncoder().isEncodeEqual(element[1], element[0]);
146         }
147     }
148 
149     @Test
150     public void testVariationsMella() throws EncoderException {
151         final String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"};
152         this.checkEncodingVariations("65", data);
153     }
154 
155     @Test
156     public void testVariationsMeyer() throws EncoderException {
157         final String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"};
158         this.checkEncodingVariations("67", data);
159     }
160 }