1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.text;
18
19 import java.io.UnsupportedEncodingException;
20 import java.util.Arrays;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.Iterator;
25 import java.util.LinkedHashMap;
26 import java.util.LinkedHashSet;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Objects;
30 import java.util.Set;
31
32 import org.apache.commons.lang3.ArrayUtils;
33 import org.apache.commons.lang3.StringUtils;
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 public final class AlphabetConverter {
76
77
78
79
80 private static final String ARROW = " -> ";
81
82
83
84
85
86
87
88
89 private static String codePointToString(final int i) {
90 if (Character.charCount(i) == 1) {
91 return String.valueOf((char) i);
92 }
93 return new String(Character.toChars(i));
94 }
95
96
97
98
99
100
101
102 private static Integer[] convertCharsToIntegers(final Character[] chars) {
103 if (ArrayUtils.isEmpty(chars)) {
104 return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
105 }
106 final Integer[] integers = new Integer[chars.length];
107 Arrays.setAll(integers, i -> (int) chars[i]);
108 return integers;
109 }
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129 public static AlphabetConverter createConverter(
130 final Integer[] original,
131 final Integer[] encoding,
132 final Integer[] doNotEncode) {
133 final Set<Integer> originalCopy = new LinkedHashSet<>(Arrays.asList(original));
134 final Set<Integer> encodingCopy = new LinkedHashSet<>(Arrays.asList(encoding));
135 final Set<Integer> doNotEncodeCopy = new LinkedHashSet<>(Arrays.asList(doNotEncode));
136
137 final Map<Integer, String> originalToEncoded = new LinkedHashMap<>();
138 final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
139 final Map<Integer, String> doNotEncodeMap = new HashMap<>();
140
141 final int encodedLetterLength;
142
143 for (final int i : doNotEncodeCopy) {
144 if (!originalCopy.contains(i)) {
145 throw new IllegalArgumentException(
146 "Can not use 'do not encode' list because original "
147 + "alphabet does not contain '"
148 + codePointToString(i) + "'");
149 }
150
151 if (!encodingCopy.contains(i)) {
152 throw new IllegalArgumentException(
153 "Can not use 'do not encode' list because encoding alphabet does not contain '"
154 + codePointToString(i) + "'");
155 }
156
157 doNotEncodeMap.put(i, codePointToString(i));
158 }
159
160 if (encodingCopy.size() >= originalCopy.size()) {
161 encodedLetterLength = 1;
162
163 final Iterator<Integer> it = encodingCopy.iterator();
164
165 for (final int originalLetter : originalCopy) {
166 final String originalLetterAsString = codePointToString(originalLetter);
167
168 if (doNotEncodeMap.containsKey(originalLetter)) {
169 originalToEncoded.put(originalLetter, originalLetterAsString);
170 encodedToOriginal.put(originalLetterAsString, originalLetterAsString);
171 } else {
172 Integer next = it.next();
173
174 while (doNotEncodeCopy.contains(next)) {
175 next = it.next();
176 }
177
178 final String encodedLetter = codePointToString(next);
179
180 originalToEncoded.put(originalLetter, encodedLetter);
181 encodedToOriginal.put(encodedLetter, originalLetterAsString);
182 }
183 }
184
185 return new AlphabetConverter(originalToEncoded, encodedToOriginal, encodedLetterLength);
186
187 }
188 if (encodingCopy.size() - doNotEncodeCopy.size() < 2) {
189 throw new IllegalArgumentException(
190 "Must have at least two encoding characters (excluding "
191 + "those in the 'do not encode' list), but has "
192 + (encodingCopy.size() - doNotEncodeCopy.size()));
193 }
194
195
196 int lettersSoFar = 1;
197
198
199
200 int lettersLeft = (originalCopy.size() - doNotEncodeCopy.size())
201 / (encodingCopy.size() - doNotEncodeCopy.size());
202
203 while (lettersLeft / encodingCopy.size() >= 1) {
204 lettersLeft /= encodingCopy.size();
205 lettersSoFar++;
206 }
207
208 encodedLetterLength = lettersSoFar + 1;
209
210 final AlphabetConverter ac =
211 new AlphabetConverter(originalToEncoded,
212 encodedToOriginal,
213 encodedLetterLength);
214
215 ac.addSingleEncoding(encodedLetterLength,
216 StringUtils.EMPTY,
217 encodingCopy,
218 originalCopy.iterator(),
219 doNotEncodeMap);
220
221 return ac;
222 }
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241 public static AlphabetConverter createConverterFromChars(
242 final Character[] original,
243 final Character[] encoding,
244 final Character[] doNotEncode) {
245 return AlphabetConverter.createConverter(
246 convertCharsToIntegers(original),
247 convertCharsToIntegers(encoding),
248 convertCharsToIntegers(doNotEncode));
249 }
250
251
252
253
254
255
256
257
258 public static AlphabetConverter createConverterFromMap(final Map<Integer, String> originalToEncoded) {
259 final Map<Integer, String> unmodifiableOriginalToEncoded = Collections.unmodifiableMap(originalToEncoded);
260 final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
261
262 int encodedLetterLength = 1;
263
264 for (final Entry<Integer, String> e : unmodifiableOriginalToEncoded.entrySet()) {
265 encodedToOriginal.put(e.getValue(), codePointToString(e.getKey()));
266
267 if (e.getValue().length() > encodedLetterLength) {
268 encodedLetterLength = e.getValue().length();
269 }
270 }
271
272 return new AlphabetConverter(unmodifiableOriginalToEncoded, encodedToOriginal, encodedLetterLength);
273 }
274
275
276
277
278 private final Map<Integer, String> originalToEncoded;
279
280
281
282
283 private final Map<String, String> encodedToOriginal;
284
285
286
287
288 private final int encodedLetterLength;
289
290
291
292
293
294
295
296
297 private AlphabetConverter(final Map<Integer, String> originalToEncoded,
298 final Map<String, String> encodedToOriginal,
299 final int encodedLetterLength) {
300
301 this.originalToEncoded = originalToEncoded;
302 this.encodedToOriginal = encodedToOriginal;
303 this.encodedLetterLength = encodedLetterLength;
304 }
305
306
307
308
309
310
311
312
313
314
315 private void addSingleEncoding(final int level,
316 final String currentEncoding,
317 final Collection<Integer> encoding,
318 final Iterator<Integer> originals,
319 final Map<Integer, String> doNotEncodeMap) {
320
321 if (level > 0) {
322 for (final int encodingLetter : encoding) {
323 if (!originals.hasNext()) {
324 return;
325 }
326
327
328 if (level != encodedLetterLength
329 || !doNotEncodeMap.containsKey(encodingLetter)) {
330 addSingleEncoding(level - 1,
331 currentEncoding
332 + codePointToString(encodingLetter),
333 encoding,
334 originals,
335 doNotEncodeMap
336 );
337 }
338 }
339 } else {
340 Integer next = originals.next();
341
342 while (doNotEncodeMap.containsKey(next)) {
343 final String originalLetterAsString = codePointToString(next);
344
345 originalToEncoded.put(next, originalLetterAsString);
346 encodedToOriginal.put(originalLetterAsString,
347 originalLetterAsString);
348
349 if (!originals.hasNext()) {
350 return;
351 }
352
353 next = originals.next();
354 }
355
356 final String originalLetterAsString = codePointToString(next);
357
358 originalToEncoded.put(next, currentEncoding);
359 encodedToOriginal.put(currentEncoding, originalLetterAsString);
360 }
361 }
362
363
364
365
366
367
368
369
370
371
372 public String decode(final String encoded)
373 throws UnsupportedEncodingException {
374 if (encoded == null) {
375 return null;
376 }
377
378 final StringBuilder result = new StringBuilder();
379
380 for (int j = 0; j < encoded.length();) {
381 final int i = encoded.codePointAt(j);
382 final String s = codePointToString(i);
383
384 if (s.equals(originalToEncoded.get(i))) {
385 result.append(s);
386 j++;
387
388 } else {
389 if (j + encodedLetterLength > encoded.length()) {
390 throw new UnsupportedEncodingException("Unexpected end "
391 + "of string while decoding " + encoded);
392 }
393 final String nextGroup = encoded.substring(j,
394 j + encodedLetterLength);
395 final String next = encodedToOriginal.get(nextGroup);
396 if (next == null) {
397 throw new UnsupportedEncodingException(
398 "Unexpected string without decoding ("
399 + nextGroup + ") in " + encoded);
400 }
401 result.append(next);
402 j += encodedLetterLength;
403 }
404 }
405
406 return result.toString();
407 }
408
409
410
411
412
413
414
415
416
417 public String encode(final String original)
418 throws UnsupportedEncodingException {
419 if (original == null) {
420 return null;
421 }
422
423 final StringBuilder sb = new StringBuilder();
424
425 for (int i = 0; i < original.length();) {
426 final int codePoint = original.codePointAt(i);
427
428 final String nextLetter = originalToEncoded.get(codePoint);
429
430 if (nextLetter == null) {
431 throw new UnsupportedEncodingException(
432 "Couldn't find encoding for '"
433 + codePointToString(codePoint)
434 + "' in "
435 + original
436 );
437 }
438
439 sb.append(nextLetter);
440
441 i += Character.charCount(codePoint);
442 }
443
444 return sb.toString();
445 }
446
447 @Override
448 public boolean equals(final Object obj) {
449 if (obj == null) {
450 return false;
451 }
452 if (obj == this) {
453 return true;
454 }
455 if (!(obj instanceof AlphabetConverter)) {
456 return false;
457 }
458 final AlphabetConverter other = (AlphabetConverter) obj;
459 return originalToEncoded.equals(other.originalToEncoded)
460 && encodedToOriginal.equals(other.encodedToOriginal)
461 && encodedLetterLength == other.encodedLetterLength;
462 }
463
464
465
466
467
468
469
470
471 public int getEncodedCharLength() {
472 return encodedLetterLength;
473 }
474
475
476
477
478
479
480
481
482 public Map<Integer, String> getOriginalToEncoded() {
483 return Collections.unmodifiableMap(originalToEncoded);
484 }
485
486 @Override
487 public int hashCode() {
488 return Objects.hash(originalToEncoded,
489 encodedToOriginal,
490 encodedLetterLength);
491 }
492
493 @Override
494 public String toString() {
495 final StringBuilder sb = new StringBuilder();
496
497 originalToEncoded.forEach((k, v) ->
498 sb.append(codePointToString(k))
499 .append(ARROW)
500 .append(k)
501 .append(System.lineSeparator()));
502
503 return sb.toString();
504 }
505 }