1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.commons.codec.language;
19
20 import java.util.Locale;
21
22 import org.apache.commons.codec.EncoderException;
23 import org.apache.commons.codec.StringEncoder;
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180 public class ColognePhonetic implements StringEncoder {
181
182
183 private static final char[] AEIJOUY = new char[] { 'A', 'E', 'I', 'J', 'O', 'U', 'Y' };
184 private static final char[] SCZ = new char[] { 'S', 'C', 'Z' };
185 private static final char[] WFPV = new char[] { 'W', 'F', 'P', 'V' };
186 private static final char[] GKQ = new char[] { 'G', 'K', 'Q' };
187 private static final char[] CKQ = new char[] { 'C', 'K', 'Q' };
188 private static final char[] AHKLOQRUX = new char[] { 'A', 'H', 'K', 'L', 'O', 'Q', 'R', 'U', 'X' };
189 private static final char[] SZ = new char[] { 'S', 'Z' };
190 private static final char[] AHOUKQX = new char[] { 'A', 'H', 'O', 'U', 'K', 'Q', 'X' };
191 private static final char[] TDX = new char[] { 'T', 'D', 'X' };
192
193
194
195
196
197
198 private abstract class CologneBuffer {
199
200 protected final char[] data;
201
202 protected int length = 0;
203
204 public CologneBuffer(final char[] data) {
205 this.data = data;
206 this.length = data.length;
207 }
208
209 public CologneBuffer(final int buffSize) {
210 this.data = new char[buffSize];
211 this.length = 0;
212 }
213
214 protected abstract char[] copyData(int start, final int length);
215
216 public int length() {
217 return length;
218 }
219
220 @Override
221 public String toString() {
222 return new String(copyData(0, length));
223 }
224 }
225
226 private class CologneOutputBuffer extends CologneBuffer {
227
228 public CologneOutputBuffer(final int buffSize) {
229 super(buffSize);
230 }
231
232 public void addRight(final char chr) {
233 data[length] = chr;
234 length++;
235 }
236
237 @Override
238 protected char[] copyData(final int start, final int length) {
239 final char[] newData = new char[length];
240 System.arraycopy(data, start, newData, 0, length);
241 return newData;
242 }
243 }
244
245 private class CologneInputBuffer extends CologneBuffer {
246
247 public CologneInputBuffer(final char[] data) {
248 super(data);
249 }
250
251 public void addLeft(final char ch) {
252 length++;
253 data[getNextPos()] = ch;
254 }
255
256 @Override
257 protected char[] copyData(final int start, final int length) {
258 final char[] newData = new char[length];
259 System.arraycopy(data, data.length - this.length + start, newData, 0, length);
260 return newData;
261 }
262
263 public char getNextChar() {
264 return data[getNextPos()];
265 }
266
267 protected int getNextPos() {
268 return data.length - length;
269 }
270
271 public char removeNext() {
272 final char ch = getNextChar();
273 length--;
274 return ch;
275 }
276 }
277
278
279
280
281
282
283
284
285
286
287 private static final char[][] PREPROCESS_MAP = new char[][]{
288 {'\u00C4', 'A'},
289 {'\u00DC', 'U'},
290 {'\u00D6', 'O'},
291 {'\u00DF', 'S'}
292 };
293
294
295
296
297 private static boolean arrayContains(final char[] arr, final char key) {
298 for (final char element : arr) {
299 if (element == key) {
300 return true;
301 }
302 }
303 return false;
304 }
305
306
307
308
309
310
311
312
313
314
315
316
317 public String colognePhonetic(String text) {
318 if (text == null) {
319 return null;
320 }
321
322 text = preprocess(text);
323
324 final CologneOutputBuffer output = new CologneOutputBuffer(text.length() * 2);
325 final CologneInputBuffer input = new CologneInputBuffer(text.toCharArray());
326
327 char nextChar;
328
329 char lastChar = '-';
330 char lastCode = '/';
331 char code;
332 char chr;
333
334 int rightLength = input.length();
335
336 while (rightLength > 0) {
337 chr = input.removeNext();
338
339 if ((rightLength = input.length()) > 0) {
340 nextChar = input.getNextChar();
341 } else {
342 nextChar = '-';
343 }
344
345 if (arrayContains(AEIJOUY, chr)) {
346 code = '0';
347 } else if (chr == 'H' || chr < 'A' || chr > 'Z') {
348 if (lastCode == '/') {
349 continue;
350 }
351 code = '-';
352 } else if (chr == 'B' || (chr == 'P' && nextChar != 'H')) {
353 code = '1';
354 } else if ((chr == 'D' || chr == 'T') && !arrayContains(SCZ, nextChar)) {
355 code = '2';
356 } else if (arrayContains(WFPV, chr)) {
357 code = '3';
358 } else if (arrayContains(GKQ, chr)) {
359 code = '4';
360 } else if (chr == 'X' && !arrayContains(CKQ, lastChar)) {
361 code = '4';
362 input.addLeft('S');
363 rightLength++;
364 } else if (chr == 'S' || chr == 'Z') {
365 code = '8';
366 } else if (chr == 'C') {
367 if (lastCode == '/') {
368 if (arrayContains(AHKLOQRUX, nextChar)) {
369 code = '4';
370 } else {
371 code = '8';
372 }
373 } else {
374 if (arrayContains(SZ, lastChar) || !arrayContains(AHOUKQX, nextChar)) {
375 code = '8';
376 } else {
377 code = '4';
378 }
379 }
380 } else if (arrayContains(TDX, chr)) {
381 code = '8';
382 } else if (chr == 'R') {
383 code = '7';
384 } else if (chr == 'L') {
385 code = '5';
386 } else if (chr == 'M' || chr == 'N') {
387 code = '6';
388 } else {
389 code = chr;
390 }
391
392 if (code != '-' && (lastCode != code && (code != '0' || lastCode == '/') || code < '0' || code > '8')) {
393 output.addRight(code);
394 }
395
396 lastChar = chr;
397 lastCode = code;
398 }
399 return output.toString();
400 }
401
402 @Override
403 public Object encode(final Object object) throws EncoderException {
404 if (!(object instanceof String)) {
405 throw new EncoderException("This method's parameter was expected to be of the type " +
406 String.class.getName() +
407 ". But actually it was of the type " +
408 object.getClass().getName() +
409 ".");
410 }
411 return encode((String) object);
412 }
413
414 @Override
415 public String encode(final String text) {
416 return colognePhonetic(text);
417 }
418
419 public boolean isEncodeEqual(final String text1, final String text2) {
420 return colognePhonetic(text1).equals(colognePhonetic(text2));
421 }
422
423
424
425
426 private String preprocess(String text) {
427 text = text.toUpperCase(Locale.GERMAN);
428
429 final char[] chrs = text.toCharArray();
430
431 for (int index = 0; index < chrs.length; index++) {
432 if (chrs[index] > 'Z') {
433 for (final char[] element : PREPROCESS_MAP) {
434 if (chrs[index] == element[0]) {
435 chrs[index] = element[1];
436 break;
437 }
438 }
439 }
440 }
441 return new String(chrs);
442 }
443 }