1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import java.util.Locale;
21
22 import org.apache.commons.codec.EncoderException;
23 import org.apache.commons.codec.StringEncoder;
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182 public class ColognePhonetic implements StringEncoder {
183
184
185 private static final char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
186 private static final char[] SCZ = new char[] { 'S', 'C', 'Z' };
187 private static final char[] WFPV = new char[] { 'W', 'F', 'P', 'V' };
188 private static final char[] GKQ = new char[] { 'G', 'K', 'Q' };
189 private static final char[] CKQ = new char[] { 'C', 'K', 'Q' };
190 private static final char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
191 private static final char[] SZ = new char[] { 'S', 'Z' };
192 private static final char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' };
193 private static final char[] TDX = new char[] { 'T', 'D', 'X' };
194
195
196
197
198
199
200 private abstract class CologneBuffer {
201
202 protected final char[] data;
203
204 protected int length = 0;
205
206 public CologneBuffer(final char[] data) {
207 this.data = data;
208 this.length = data.length;
209 }
210
211 public CologneBuffer(final int buffSize) {
212 this.data = new char[buffSize];
213 this.length = 0;
214 }
215
216 protected abstract char[] copyData(int start, final int length);
217
218 public int length() {
219 return length;
220 }
221
222 @Override
223 public String toString() {
224 return new String(copyData(0, length));
225 }
226 }
227
228 private class CologneOutputBuffer extends CologneBuffer {
229
230 public CologneOutputBuffer(final int buffSize) {
231 super(buffSize);
232 }
233
234 public void addRight(final char chr) {
235 data[length] = chr;
236 length++;
237 }
238
239 @Override
240 protected char[] copyData(final int start, final int length) {
241 final char[] newData = new char[length];
242 System.arraycopy(data, start, newData, 0, length);
243 return newData;
244 }
245 }
246
247 private class CologneInputBuffer extends CologneBuffer {
248
249 public CologneInputBuffer(final char[] data) {
250 super(data);
251 }
252
253 public void addLeft(final char ch) {
254 length++;
255 data[getNextPos()] = ch;
256 }
257
258 @Override
259 protected char[] copyData(final int start, final int length) {
260 final char[] newData = new char[length];
261 System.arraycopy(data, data.length - this.length + start, newData, 0, length);
262 return newData;
263 }
264
265 public char getNextChar() {
266 return data[getNextPos()];
267 }
268
269 protected int getNextPos() {
270 return data.length - length;
271 }
272
273 public char removeNext() {
274 final char ch = getNextChar();
275 length--;
276 return ch;
277 }
278 }
279
280
281
282
283
284
285
286
287
288
289 private static final char[][] PREPROCESS_MAP = new char[][]{
290 {'\u00C4', 'A'},
291 {'\u00DC', 'U'},
292 {'\u00D6', 'O'},
293 {'\u00DF', 'S'}
294 };
295
296
297
298
299 private static boolean arrayContains(final char[] arr, final char key) {
300 for (final char element : arr) {
301 if (element == key) {
302 return true;
303 }
304 }
305 return false;
306 }
307
308
309
310
311
312
313
314
315
316
317
318
319 public String colognePhonetic(String text) {
320 if (text == null) {
321 return null;
322 }
323
324 text = preprocess(text);
325
326 final CologneOutputBuffer output = new CologneOutputBuffer(text.length() * 2);
327 final CologneInputBuffer input = new CologneInputBuffer(text.toCharArray());
328
329 char nextChar;
330
331 char lastChar = '-';
332 char lastCode = '/';
333 char code;
334 char chr;
335
336 int rightLength = input.length();
337
338 while (rightLength > 0) {
339 chr = input.removeNext();
340
341 if ((rightLength = input.length()) > 0) {
342 nextChar = input.getNextChar();
343 } else {
344 nextChar = '-';
345 }
346
347 if (arrayContains(AEIJOUY, chr)) {
348 code = '0';
349 } else if (chr == 'H' || chr < 'A' || chr > 'Z') {
350 if (lastCode == '/') {
351 continue;
352 }
353 code = '-';
354 } else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
355 code = '1';
356 } else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) {
357 code = '2';
358 } else if (arrayContains(WFPV, chr)) {
359 code = '3';
360 } else if (arrayContains(GKQ, chr)) {
361 code = '4';
362 } else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
363 code = '4';
364 input.addLeft('S');
365 rightLength++;
366 } else if (chr == 'S' || chr == 'Z') {
367 code = '8';
368 } else if (chr == 'C') {
369 if (lastCode == '/') {
370 if (arrayContains(AHKLOQRUX, nextChar)) {
371 code = '4';
372 } else {
373 code = '8';
374 }
375 } else {
376 if (arrayContains(SZ, lastChar) || !arrayContains(AHOUKQX, nextChar)) {
377 code = '8';
378 } else {
379 code = '4';
380 }
381 }
382 } else if (arrayContains(TDX, chr)) {
383 code = '8';
384 } else if (chr == 'R') {
385 code = '7';
386 } else if (chr == 'L') {
387 code = '5';
388 } else if (chr == 'M' || chr == 'N') {
389 code = '6';
390 } else {
391 code = chr;
392 }
393
394 if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8')) {
395 output.addRight(code);
396 }
397
398 lastChar = chr;
399 lastCode = code;
400 }
401 return output.toString();
402 }
403
404 @Override
405 public Object encode(final Object object) throws EncoderException {
406 if (!(object instanceof String)) {
407 throw new EncoderException("This method's parameter was expected to be of the type " +
408 String.class.getName() +
409 ". But actually it was of the type " +
410 object.getClass().getName() +
411 ".");
412 }
413 return encode((String) object);
414 }
415
416 @Override
417 public String encode(final String text) {
418 return colognePhonetic(text);
419 }
420
421 public boolean isEncodeEqual(final String text1, final String text2) {
422 return colognePhonetic(text1).equals(colognePhonetic(text2));
423 }
424
425
426
427
428 private String preprocess(String text) {
429 text = text.toUpperCase(Locale.GERMAN);
430
431 final char[] chrs = text.toCharArray();
432
433 for (int index = 0; index < chrs.length; index++) {
434 if (chrs[index] > 'Z') {
435 for (final char[] element : PREPROCESS_MAP) {
436 if (chrs[index] == element[0]) {
437 chrs[index] = element[1];
438 break;
439 }
440 }
441 }
442 }
443 return new String(chrs);
444 }
445 }