001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.codec.language; 019 020 import org.apache.commons.codec.EncoderException; 021 import org.apache.commons.codec.StringEncoder; 022 import org.apache.commons.codec.StringEncoderAbstractTest; 023 import org.junit.Test; 024 025 /** 026 * Tests the {@code ColognePhonetic} class. 027 * 028 * <p>Keep this file in UTF-8 encoding for proper Javadoc processing.</p> 029 * 030 */ 031 public class ColognePhoneticTest extends StringEncoderAbstractTest { 032 033 @Override 034 protected StringEncoder createStringEncoder() { 035 return new ColognePhonetic(); 036 } 037 038 @Test 039 public void testAabjoe() throws EncoderException { 040 this.checkEncoding("01", "Aabjoe"); 041 } 042 043 @Test 044 public void testAaclan() throws EncoderException { 045 this.checkEncoding("0856", "Aaclan"); 046 } 047 048 /** 049 * Tests [CODEC-122] 050 * 051 * @throws EncoderException 052 */ 053 @Test 054 public void testAychlmajrForCodec122() throws EncoderException { 055 this.checkEncoding("04567", "Aychlmajr"); 056 } 057 058 @Test 059 public void testEdgeCases() throws EncoderException { 060 String[][] data = { 061 {"a", "0"}, 062 {"e", "0"}, 063 {"i", "0"}, 064 {"o", "0"}, 065 {"u", "0"}, 066 {"\u00E4", "0"}, // a-umlaut 067 {"\u00F6", "0"}, // o-umlaut 068 {"\u00FC", "0"}, // u-umlaut 069 {"aa", "0"}, 070 {"ha", "0"}, 071 {"h", ""}, 072 {"aha", "0"}, 073 {"b", "1"}, 074 {"p", "1"}, 075 {"ph", "3"}, 076 {"f", "3"}, 077 {"v", "3"}, 078 {"w", "3"}, 079 {"g", "4"}, 080 {"k", "4"}, 081 {"q", "4"}, 082 {"x", "48"}, 083 {"ax", "048"}, 084 {"cx", "48"}, 085 {"l", "5"}, 086 {"cl", "45"}, 087 {"acl", "085"}, 088 {"mn", "6"}, 089 {"r", "7"}}; 090 this.checkEncodings(data); 091 } 092 093 @Test 094 public void testExamples() throws EncoderException { 095 String[][] data = { 096 {"m\u00DCller", "657"}, // mÜller - why upper case U-umlaut? 097 {"schmidt", "862"}, 098 {"schneider", "8627"}, 099 {"fischer", "387"}, 100 {"weber", "317"}, 101 {"wagner", "3467"}, 102 {"becker", "147"}, 103 {"hoffmann", "0366"}, 104 {"sch\u00C4fer", "837"}, // schÄfer - why upper case A-umlaut ? 105 {"Breschnew", "17863"}, 106 {"Wikipedia", "3412"}, 107 {"peter", "127"}, 108 {"pharma", "376"}, 109 {"m\u00f6nchengladbach", "664645214"}, // mönchengladbach 110 {"deutsch", "28"}, 111 {"deutz", "28"}, 112 {"hamburg", "06174"}, 113 {"hannover", "0637"}, 114 {"christstollen", "478256"}, 115 {"Xanthippe", "48621"}, 116 {"Zacharias", "8478"}, 117 {"Holzbau", "0581"}, 118 {"matsch", "68"}, 119 {"matz", "68"}, 120 {"Arbeitsamt", "071862"}, 121 {"Eberhard", "01772"}, 122 {"Eberhardt", "01772"}, 123 {"heithabu", "021"}}; 124 this.checkEncodings(data); 125 } 126 127 @Test 128 public void testHyphen() throws EncoderException { 129 String[][] data = {{"bergisch-gladbach", "174845214"}, 130 {"M\u00fcller-L\u00fcdenscheidt", "65752682"}}; // Müller-Lüdenscheidt 131 this.checkEncodings(data); 132 } 133 134 @Test 135 public void testIsEncodeEquals() { 136 String[][] data = { 137 {"Meyer", "M\u00fcller"}, // Müller 138 {"Meyer", "Mayr"}, 139 {"house", "house"}, 140 {"House", "house"}, 141 {"Haus", "house"}, 142 {"ganz", "Gans"}, 143 {"ganz", "G\u00e4nse"}, // Gänse 144 {"Miyagi", "Miyako"}}; 145 for (String[] element : data) { 146 ((ColognePhonetic) this.getStringEncoder()).isEncodeEqual(element[1], element[0]); 147 } 148 } 149 150 @Test 151 public void testVariationsMella() throws EncoderException { 152 String data[] = {"mella", "milah", "moulla", "mellah", "muehle", "mule"}; 153 this.checkEncodingVariations("65", data); 154 } 155 156 @Test 157 public void testVariationsMeyer() throws EncoderException { 158 String data[] = {"Meier", "Maier", "Mair", "Meyer", "Meyr", "Mejer", "Major"}; 159 this.checkEncodingVariations("67", data); 160 } 161 }