1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import java.util.Locale;
21
22 import org.apache.commons.codec.EncoderException;
23 import org.apache.commons.codec.StringEncoder;
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180 public class ColognePhonetic implements StringEncoder {
181
182
183
184
185
186
187 private abstract class CologneBuffer {
188
189 protected final char[] data;
190
191 protected int length = 0;
192
193 public CologneBuffer(char[] data) {
194 this.data = data;
195 this.length = data.length;
196 }
197
198 public CologneBuffer(int buffSize) {
199 this.data = new char[buffSize];
200 this.length = 0;
201 }
202
203 protected abstract char[] copyData(int start, final int length);
204
205 public int length() {
206 return length;
207 }
208
209 @Override
210 public String toString() {
211 return new String(copyData(0, length));
212 }
213 }
214
215 private class CologneOutputBuffer extends CologneBuffer {
216
217 public CologneOutputBuffer(int buffSize) {
218 super(buffSize);
219 }
220
221 public void addRight(char chr) {
222 data[length] = chr;
223 length++;
224 }
225
226 @Override
227 protected char[] copyData(int start, final int length) {
228 char[] newData = new char[length];
229 System.arraycopy(data, start, newData, 0, length);
230 return newData;
231 }
232 }
233
234 private class CologneInputBuffer extends CologneBuffer {
235
236 public CologneInputBuffer(char[] data) {
237 super(data);
238 }
239
240 public void addLeft(char ch) {
241 length++;
242 data[getNextPos()] = ch;
243 }
244
245 @Override
246 protected char[] copyData(int start, final int length) {
247 char[] newData = new char[length];
248 System.arraycopy(data, data.length - this.length + start, newData, 0, length);
249 return newData;
250 }
251
252 public char getNextChar() {
253 return data[getNextPos()];
254 }
255
256 protected int getNextPos() {
257 return data.length - length;
258 }
259
260 public char removeNext() {
261 char ch = getNextChar();
262 length--;
263 return ch;
264 }
265 }
266
267
268
269
270
271
272
273
274
275
276 private static final char[][] PREPROCESS_MAP = new char[][]{
277 {'\u00C4', 'A'},
278 {'\u00DC', 'U'},
279 {'\u00D6', 'O'},
280 {'\u00DF', 'S'}
281 };
282
283
284
285
286 private static boolean arrayContains(char[] arr, char key) {
287 for (char element : arr) {
288 if (element == key) {
289 return true;
290 }
291 }
292 return false;
293 }
294
295
296
297
298
299
300
301
302
303
304
305
306 public String colognePhonetic(String text) {
307 if (text == null) {
308 return null;
309 }
310
311 text = preprocess(text);
312
313 CologneOutputBuffer output = new CologneOutputBuffer(text.length() * 2);
314 CologneInputBuffer input = new CologneInputBuffer(text.toCharArray());
315
316 char nextChar;
317
318 char lastChar = '-';
319 char lastCode = '/';
320 char code;
321 char chr;
322
323 int rightLength = input.length();
324
325 while (rightLength > 0) {
326 chr = input.removeNext();
327
328 if ((rightLength = input.length()) > 0) {
329 nextChar = input.getNextChar();
330 } else {
331 nextChar = '-';
332 }
333
334 if (arrayContains(new char[]{'A', 'E', 'I', 'J', 'O', 'U', 'Y'}, chr)) {
335 code = '0';
336 } else if (chr == 'H' || chr < 'A' || chr > 'Z') {
337 if (lastCode == '/') {
338 continue;
339 }
340 code = '-';
341 } else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
342 code = '1';
343 } else if ((chr == 'D' || chr == 'T') && !arrayContains(new char[]{'S', 'C', 'Z'}, nextChar)) {
344 code = '2';
345 } else if (arrayContains(new char[]{'W', 'F', 'P', 'V'}, chr)) {
346 code = '3';
347 } else if (arrayContains(new char[]{'G', 'K', 'Q'}, chr)) {
348 code = '4';
349 } else if (chr == 'X' && !arrayContains(new char[]{'C', 'K', 'Q'}, lastChar)) {
350 code = '4';
351 input.addLeft('S');
352 rightLength++;
353 } else if (chr == 'S' || chr == 'Z') {
354 code = '8';
355 } else if (chr == 'C') {
356 if (lastCode == '/') {
357 if (arrayContains(new char[]{'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X'}, nextChar)) {
358 code = '4';
359 } else {
360 code = '8';
361 }
362 } else {
363 if (arrayContains(new char[]{'S', 'Z'}, lastChar) ||
364 !arrayContains(new char[]{'A', 'H', 'O', 'U', 'K', 'Q', 'X'}, nextChar)) {
365 code = '8';
366 } else {
367 code = '4';
368 }
369 }
370 } else if (arrayContains(new char[]{'T', 'D', 'X'}, chr)) {
371 code = '8';
372 } else if (chr == 'R') {
373 code = '7';
374 } else if (chr == 'L') {
375 code = '5';
376 } else if (chr == 'M' || chr == 'N') {
377 code = '6';
378 } else {
379 code = chr;
380 }
381
382 if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8')) {
383 output.addRight(code);
384 }
385
386 lastChar = chr;
387 lastCode = code;
388 }
389 return output.toString();
390 }
391
392 @Override
393 public Object encode(Object object) throws EncoderException {
394 if (!(object instanceof String)) {
395 throw new EncoderException("This method's parameter was expected to be of the type " +
396 String.class.getName() +
397 ". But actually it was of the type " +
398 object.getClass().getName() +
399 ".");
400 }
401 return encode((String) object);
402 }
403
404 @Override
405 public String encode(String text) {
406 return colognePhonetic(text);
407 }
408
409 public boolean isEncodeEqual(String text1, String text2) {
410 return colognePhonetic(text1).equals(colognePhonetic(text2));
411 }
412
413
414
415
416 private String preprocess(String text) {
417 text = text.toUpperCase(Locale.GERMAN);
418
419 char[] chrs = text.toCharArray();
420
421 for (int index = 0; index < chrs.length; index++) {
422 if (chrs[index] > 'Z') {
423 for (char[] element : PREPROCESS_MAP) {
424 if (chrs[index] == element[0]) {
425 chrs[index] = element[1];
426 break;
427 }
428 }
429 }
430 }
431 return new String(chrs);
432 }
433 }