1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.text;
18
19 import java.io.UnsupportedEncodingException;
20 import java.util.Arrays;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.Iterator;
25 import java.util.LinkedHashMap;
26 import java.util.LinkedHashSet;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Objects;
30 import java.util.Set;
31
32 import org.apache.commons.lang3.ArrayUtils;
33 import org.apache.commons.lang3.StringUtils;
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 public final class AlphabetConverter {
76
77
78
79
80 private static final String ARROW = " -> ";
81
82
83
84
85
86
87
88
89 private static String codePointToString(final int i) {
90 if (Character.charCount(i) == 1) {
91 return String.valueOf((char) i);
92 }
93 return new String(Character.toChars(i));
94 }
95
96
97
98
99
100
101
102 private static Integer[] convertCharsToIntegers(final Character[] chars) {
103 if (ArrayUtils.isEmpty(chars)) {
104 return ArrayUtils.EMPTY_INTEGER_OBJECT_ARRAY;
105 }
106 final Integer[] integers = new Integer[chars.length];
107 Arrays.setAll(integers, i -> (int) chars[i]);
108 return integers;
109 }
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126 public static AlphabetConverter createConverter(
127 final Integer[] original,
128 final Integer[] encoding,
129 final Integer[] doNotEncode) {
130 final Set<Integer> originalCopy = new LinkedHashSet<>(Arrays.asList(original));
131 final Set<Integer> encodingCopy = new LinkedHashSet<>(Arrays.asList(encoding));
132 final Set<Integer> doNotEncodeCopy = new LinkedHashSet<>(Arrays.asList(doNotEncode));
133
134 final Map<Integer, String> originalToEncoded = new LinkedHashMap<>();
135 final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
136 final Map<Integer, String> doNotEncodeMap = new HashMap<>();
137
138 final int encodedLetterLength;
139
140 for (final int i : doNotEncodeCopy) {
141 if (!originalCopy.contains(i)) {
142 throw new IllegalArgumentException(
143 "Can not use 'do not encode' list because original "
144 + "alphabet does not contain '"
145 + codePointToString(i) + "'");
146 }
147
148 if (!encodingCopy.contains(i)) {
149 throw new IllegalArgumentException(
150 "Can not use 'do not encode' list because encoding alphabet does not contain '"
151 + codePointToString(i) + "'");
152 }
153
154 doNotEncodeMap.put(i, codePointToString(i));
155 }
156
157 if (encodingCopy.size() >= originalCopy.size()) {
158 encodedLetterLength = 1;
159
160 final Iterator<Integer> it = encodingCopy.iterator();
161
162 for (final int originalLetter : originalCopy) {
163 final String originalLetterAsString = codePointToString(originalLetter);
164
165 if (doNotEncodeMap.containsKey(originalLetter)) {
166 originalToEncoded.put(originalLetter, originalLetterAsString);
167 encodedToOriginal.put(originalLetterAsString, originalLetterAsString);
168 } else {
169 Integer next = it.next();
170
171 while (doNotEncodeCopy.contains(next)) {
172 next = it.next();
173 }
174
175 final String encodedLetter = codePointToString(next);
176
177 originalToEncoded.put(originalLetter, encodedLetter);
178 encodedToOriginal.put(encodedLetter, originalLetterAsString);
179 }
180 }
181
182 return new AlphabetConverter(originalToEncoded, encodedToOriginal, encodedLetterLength);
183
184 }
185 if (encodingCopy.size() - doNotEncodeCopy.size() < 2) {
186 throw new IllegalArgumentException(
187 "Must have at least two encoding characters (excluding "
188 + "those in the 'do not encode' list), but has "
189 + (encodingCopy.size() - doNotEncodeCopy.size()));
190 }
191
192
193 int lettersSoFar = 1;
194
195
196
197 int lettersLeft = (originalCopy.size() - doNotEncodeCopy.size())
198 / (encodingCopy.size() - doNotEncodeCopy.size());
199
200 while (lettersLeft / encodingCopy.size() >= 1) {
201 lettersLeft /= encodingCopy.size();
202 lettersSoFar++;
203 }
204
205 encodedLetterLength = lettersSoFar + 1;
206
207 final AlphabetConverter ac =
208 new AlphabetConverter(originalToEncoded,
209 encodedToOriginal,
210 encodedLetterLength);
211
212 ac.addSingleEncoding(encodedLetterLength,
213 StringUtils.EMPTY,
214 encodingCopy,
215 originalCopy.iterator(),
216 doNotEncodeMap);
217
218 return ac;
219 }
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238 public static AlphabetConverter createConverterFromChars(
239 final Character[] original,
240 final Character[] encoding,
241 final Character[] doNotEncode) {
242 return createConverter(
243 convertCharsToIntegers(original),
244 convertCharsToIntegers(encoding),
245 convertCharsToIntegers(doNotEncode));
246 }
247
248
249
250
251
252
253
254
255 public static AlphabetConverter createConverterFromMap(final Map<Integer, String> originalToEncoded) {
256 final Map<Integer, String> unmodifiableOriginalToEncoded = Collections.unmodifiableMap(originalToEncoded);
257 final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
258
259 int encodedLetterLength = 1;
260
261 for (final Entry<Integer, String> e : unmodifiableOriginalToEncoded.entrySet()) {
262 encodedToOriginal.put(e.getValue(), codePointToString(e.getKey()));
263
264 if (e.getValue().length() > encodedLetterLength) {
265 encodedLetterLength = e.getValue().length();
266 }
267 }
268
269 return new AlphabetConverter(unmodifiableOriginalToEncoded, encodedToOriginal, encodedLetterLength);
270 }
271
272
273
274
275 private final Map<Integer, String> originalToEncoded;
276
277
278
279
280 private final Map<String, String> encodedToOriginal;
281
282
283
284
285 private final int encodedLetterLength;
286
287
288
289
290
291
292
293
294 private AlphabetConverter(final Map<Integer, String> originalToEncoded,
295 final Map<String, String> encodedToOriginal,
296 final int encodedLetterLength) {
297
298 this.originalToEncoded = originalToEncoded;
299 this.encodedToOriginal = encodedToOriginal;
300 this.encodedLetterLength = encodedLetterLength;
301 }
302
303
304
305
306
307
308
309
310
311
312 private void addSingleEncoding(final int level,
313 final String currentEncoding,
314 final Collection<Integer> encoding,
315 final Iterator<Integer> originals,
316 final Map<Integer, String> doNotEncodeMap) {
317
318 if (level > 0) {
319 for (final int encodingLetter : encoding) {
320 if (!originals.hasNext()) {
321 return;
322 }
323
324
325 if (level != encodedLetterLength
326 || !doNotEncodeMap.containsKey(encodingLetter)) {
327 addSingleEncoding(level - 1,
328 currentEncoding
329 + codePointToString(encodingLetter),
330 encoding,
331 originals,
332 doNotEncodeMap
333 );
334 }
335 }
336 } else {
337 Integer next = originals.next();
338
339 while (doNotEncodeMap.containsKey(next)) {
340 final String originalLetterAsString = codePointToString(next);
341
342 originalToEncoded.put(next, originalLetterAsString);
343 encodedToOriginal.put(originalLetterAsString,
344 originalLetterAsString);
345
346 if (!originals.hasNext()) {
347 return;
348 }
349
350 next = originals.next();
351 }
352
353 final String originalLetterAsString = codePointToString(next);
354
355 originalToEncoded.put(next, currentEncoding);
356 encodedToOriginal.put(currentEncoding, originalLetterAsString);
357 }
358 }
359
360
361
362
363
364
365
366
367 public String decode(final String encoded)
368 throws UnsupportedEncodingException {
369 if (encoded == null) {
370 return null;
371 }
372
373 final StringBuilder result = new StringBuilder();
374
375 for (int j = 0; j < encoded.length();) {
376 final int i = encoded.codePointAt(j);
377 final String s = codePointToString(i);
378
379 if (s.equals(originalToEncoded.get(i))) {
380 result.append(s);
381 j++;
382
383 } else {
384 if (j + encodedLetterLength > encoded.length()) {
385 throw new UnsupportedEncodingException("Unexpected end "
386 + "of string while decoding " + encoded);
387 }
388 final String nextGroup = encoded.substring(j,
389 j + encodedLetterLength);
390 final String next = encodedToOriginal.get(nextGroup);
391 if (next == null) {
392 throw new UnsupportedEncodingException(
393 "Unexpected string without decoding ("
394 + nextGroup + ") in " + encoded);
395 }
396 result.append(next);
397 j += encodedLetterLength;
398 }
399 }
400
401 return result.toString();
402 }
403
404
405
406
407
408
409
410
411 public String encode(final String original)
412 throws UnsupportedEncodingException {
413 if (original == null) {
414 return null;
415 }
416
417 final StringBuilder sb = new StringBuilder();
418
419 for (int i = 0; i < original.length();) {
420 final int codePoint = original.codePointAt(i);
421
422 final String nextLetter = originalToEncoded.get(codePoint);
423
424 if (nextLetter == null) {
425 throw new UnsupportedEncodingException(
426 "Couldn't find encoding for '"
427 + codePointToString(codePoint)
428 + "' in "
429 + original
430 );
431 }
432
433 sb.append(nextLetter);
434
435 i += Character.charCount(codePoint);
436 }
437
438 return sb.toString();
439 }
440
441 @Override
442 public boolean equals(final Object obj) {
443 if (obj == null) {
444 return false;
445 }
446 if (obj == this) {
447 return true;
448 }
449 if (!(obj instanceof AlphabetConverter)) {
450 return false;
451 }
452 final AlphabetConverter other = (AlphabetConverter) obj;
453 return originalToEncoded.equals(other.originalToEncoded)
454 && encodedToOriginal.equals(other.encodedToOriginal)
455 && encodedLetterLength == other.encodedLetterLength;
456 }
457
458
459
460
461
462
463 public int getEncodedCharLength() {
464 return encodedLetterLength;
465 }
466
467
468
469
470
471
472 public Map<Integer, String> getOriginalToEncoded() {
473 return Collections.unmodifiableMap(originalToEncoded);
474 }
475
476 @Override
477 public int hashCode() {
478 return Objects.hash(originalToEncoded,
479 encodedToOriginal,
480 encodedLetterLength);
481 }
482
483 @Override
484 public String toString() {
485 final StringBuilder sb = new StringBuilder();
486
487 originalToEncoded.forEach((k, v) ->
488 sb.append(codePointToString(k))
489 .append(ARROW)
490 .append(k)
491 .append(System.lineSeparator()));
492
493 return sb.toString();
494 }
495 }