1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.text;
18
19 import java.io.UnsupportedEncodingException;
20 import java.util.Arrays;
21 import java.util.Collection;
22 import java.util.Collections;
23 import java.util.HashMap;
24 import java.util.Iterator;
25 import java.util.LinkedHashMap;
26 import java.util.LinkedHashSet;
27 import java.util.Map;
28 import java.util.Map.Entry;
29 import java.util.Objects;
30 import java.util.Set;
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68 public final class AlphabetConverter {
69
70
71
72
73 private final Map<Integer, String> originalToEncoded;
74
75
76
77 private final Map<String, String> encodedToOriginal;
78
79
80
81 private final int encodedLetterLength;
82
83
84
85 private static final String ARROW = " -> ";
86
87
88
89 private static final String LINE_SEPARATOR = System.getProperty("line.separator");
90
91
92
93
94
95
96
97
98 private AlphabetConverter(final Map<Integer, String> originalToEncoded, final Map<String, String> encodedToOriginal,
99 final int encodedLetterLength) {
100
101 this.originalToEncoded = originalToEncoded;
102 this.encodedToOriginal = encodedToOriginal;
103 this.encodedLetterLength = encodedLetterLength;
104 }
105
106
107
108
109
110
111
112
113 public String encode(final String original) throws UnsupportedEncodingException {
114 if (original == null) {
115 return null;
116 }
117
118 final StringBuilder sb = new StringBuilder();
119
120 for (int i = 0; i < original.length();) {
121 final int codepoint = original.codePointAt(i);
122
123 final String nextLetter = originalToEncoded.get(codepoint);
124
125 if (nextLetter == null) {
126 throw new UnsupportedEncodingException(
127 "Couldn't find encoding for '" + codePointToString(codepoint) + "' in " + original);
128 }
129
130 sb.append(nextLetter);
131
132 i += Character.charCount(codepoint);
133 }
134
135 return sb.toString();
136 }
137
138
139
140
141
142
143
144
145 public String decode(final String encoded) throws UnsupportedEncodingException {
146 if (encoded == null) {
147 return null;
148 }
149
150 final StringBuilder result = new StringBuilder();
151
152 for (int j = 0; j < encoded.length();) {
153 final Integer i = encoded.codePointAt(j);
154 final String s = codePointToString(i);
155
156 if (s.equals(originalToEncoded.get(i))) {
157 result.append(s);
158 j++;
159 } else {
160 if (j + encodedLetterLength > encoded.length()) {
161 throw new UnsupportedEncodingException("Unexpected end of string while decoding " + encoded);
162 }
163 final String nextGroup = encoded.substring(j, j + encodedLetterLength);
164 final String next = encodedToOriginal.get(nextGroup);
165 if (next == null) {
166 throw new UnsupportedEncodingException(
167 "Unexpected string without decoding (" + nextGroup + ") in " + encoded);
168 }
169 result.append(next);
170 j += encodedLetterLength;
171 }
172 }
173
174 return result.toString();
175 }
176
177
178
179
180
181
182
183 public int getEncodedCharLength() {
184 return encodedLetterLength;
185 }
186
187
188
189
190
191
192
193 public Map<Integer, String> getOriginalToEncoded() {
194 return Collections.unmodifiableMap(originalToEncoded);
195 }
196
197
198
199
200
201
202
203
204
205
206 @SuppressWarnings("PMD")
207 private void addSingleEncoding(final int level, final String currentEncoding, final Collection<Integer> encoding,
208 final Iterator<Integer> originals, final Map<Integer, String> doNotEncodeMap) {
209
210 if (level > 0) {
211 for (final int encodingLetter : encoding) {
212 if (originals.hasNext()) {
213
214
215
216 if (level != encodedLetterLength || !doNotEncodeMap.containsKey(encodingLetter)) {
217 addSingleEncoding(level - 1, currentEncoding + codePointToString(encodingLetter), encoding,
218 originals, doNotEncodeMap);
219 }
220 } else {
221 return;
222 }
223 }
224 } else {
225 Integer next = originals.next();
226
227 while (doNotEncodeMap.containsKey(next)) {
228 final String originalLetterAsString = codePointToString(next);
229
230 originalToEncoded.put(next, originalLetterAsString);
231 encodedToOriginal.put(originalLetterAsString, originalLetterAsString);
232
233 if (!originals.hasNext()) {
234 return;
235 }
236
237 next = originals.next();
238 }
239
240 final String originalLetterAsString = codePointToString(next);
241
242 originalToEncoded.put(next, currentEncoding);
243 encodedToOriginal.put(currentEncoding, originalLetterAsString);
244 }
245 }
246
247 @Override
248 public String toString() {
249 final StringBuilder sb = new StringBuilder();
250
251 for (final Entry<Integer, String> entry : originalToEncoded.entrySet()) {
252 sb.append(codePointToString(entry.getKey())).append(ARROW).append(entry.getValue()).append(LINE_SEPARATOR);
253 }
254
255 return sb.toString();
256 }
257
258 @Override
259 public boolean equals(final Object obj) {
260 if (obj == null) {
261 return false;
262 }
263 if (obj == this) {
264 return true;
265 }
266 if (!(obj instanceof AlphabetConverter)) {
267 return false;
268 }
269 final AlphabetConverter other = (AlphabetConverter) obj;
270 return originalToEncoded.equals(other.originalToEncoded) && encodedToOriginal.equals(other.encodedToOriginal)
271 && encodedLetterLength == other.encodedLetterLength;
272 }
273
274 @Override
275 public int hashCode() {
276 return Objects.hash(originalToEncoded, encodedToOriginal, encodedLetterLength);
277 }
278
279
280
281
282
283
284
285
286
287
288 public static AlphabetConverter createConverterFromMap(final Map<Integer, String> originalToEncoded) {
289 final Map<Integer, String> unmodifiableOriginalToEncoded = Collections.unmodifiableMap(originalToEncoded);
290 final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
291 final Map<Integer, String> doNotEncodeMap = new HashMap<>();
292
293 int encodedLetterLength = 1;
294
295 for (final Entry<Integer, String> e : unmodifiableOriginalToEncoded.entrySet()) {
296 final String originalAsString = codePointToString(e.getKey());
297 encodedToOriginal.put(e.getValue(), originalAsString);
298
299 if (e.getValue().equals(originalAsString)) {
300 doNotEncodeMap.put(e.getKey(), e.getValue());
301 }
302
303 if (e.getValue().length() > encodedLetterLength) {
304 encodedLetterLength = e.getValue().length();
305 }
306 }
307
308 return new AlphabetConverter(unmodifiableOriginalToEncoded, encodedToOriginal, encodedLetterLength);
309 }
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324 public static AlphabetConverter createConverterFromChars(final Character[] original, final Character[] encoding,
325 final Character[] doNotEncode) {
326 return AlphabetConverter.createConverter(convertCharsToIntegers(original), convertCharsToIntegers(encoding),
327 convertCharsToIntegers(doNotEncode));
328 }
329
330
331
332
333
334
335
336 private static Integer[] convertCharsToIntegers(final Character[] chars) {
337 if (chars == null || chars.length == 0) {
338 return new Integer[0];
339 }
340 final Integer[] integers = new Integer[chars.length];
341 for (int i = 0; i < chars.length; i++) {
342 integers[i] = (int) chars[i];
343 }
344 return integers;
345 }
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360 public static AlphabetConverter createConverter(final Integer[] original, final Integer[] encoding, final Integer[] doNotEncode) {
361
362 final Set<Integer> originalCopy = new LinkedHashSet<>(Arrays.<Integer> asList(original));
363 final Set<Integer> encodingCopy = new LinkedHashSet<>(Arrays.<Integer> asList(encoding));
364 final Set<Integer> doNotEncodeCopy = new LinkedHashSet<>(Arrays.<Integer> asList(doNotEncode));
365
366 final Map<Integer, String> originalToEncoded = new LinkedHashMap<>();
367 final Map<String, String> encodedToOriginal = new LinkedHashMap<>();
368 final Map<Integer, String> doNotEncodeMap = new HashMap<>();
369
370 int encodedLetterLength;
371
372 for (final int i : doNotEncodeCopy) {
373 if (!originalCopy.contains(i)) {
374 throw new IllegalArgumentException(
375 "Can not use 'do not encode' list because original alphabet does not contain '"
376 + codePointToString(i) + "'");
377 }
378
379 if (!encodingCopy.contains(i)) {
380 throw new IllegalArgumentException(
381 "Can not use 'do not encode' list because encoding alphabet does not contain '"
382 + codePointToString(i) + "'");
383 }
384
385 doNotEncodeMap.put(i, codePointToString(i));
386 }
387
388 if (encodingCopy.size() >= originalCopy.size()) {
389 encodedLetterLength = 1;
390
391 final Iterator<Integer> it = encodingCopy.iterator();
392
393 for (final int originalLetter : originalCopy) {
394 final String originalLetterAsString = codePointToString(originalLetter);
395
396 if (doNotEncodeMap.containsKey(originalLetter)) {
397 originalToEncoded.put(originalLetter, originalLetterAsString);
398 encodedToOriginal.put(originalLetterAsString, originalLetterAsString);
399 } else {
400 Integer next = it.next();
401
402 while (doNotEncodeCopy.contains(next)) {
403 next = it.next();
404 }
405
406 final String encodedLetter = codePointToString(next);
407
408 originalToEncoded.put(originalLetter, encodedLetter);
409 encodedToOriginal.put(encodedLetter, originalLetterAsString);
410 }
411 }
412
413 return new AlphabetConverter(originalToEncoded, encodedToOriginal, encodedLetterLength);
414
415 } else if (encodingCopy.size() - doNotEncodeCopy.size() < 2) {
416 throw new IllegalArgumentException(
417 "Must have at least two encoding characters (excluding those in the 'do not encode' list), but has "
418 + (encodingCopy.size() - doNotEncodeCopy.size()));
419 } else {
420
421
422 int lettersSoFar = 1;
423
424
425
426 int lettersLeft = (originalCopy.size() - doNotEncodeCopy.size())
427 / (encodingCopy.size() - doNotEncodeCopy.size());
428
429 while (lettersLeft / encodingCopy.size() >= 1) {
430 lettersLeft = lettersLeft / encodingCopy.size();
431 lettersSoFar++;
432 }
433
434 encodedLetterLength = lettersSoFar + 1;
435
436 final AlphabetConverter ac = new AlphabetConverter(originalToEncoded, encodedToOriginal, encodedLetterLength);
437
438 ac.addSingleEncoding(encodedLetterLength, "", encodingCopy, originalCopy.iterator(), doNotEncodeMap);
439
440 return ac;
441 }
442 }
443
444
445
446
447
448
449
450
451 private static String codePointToString(final int i) {
452 if (Character.charCount(i) == 1) {
453 return String.valueOf((char) i);
454 }
455 return new String(Character.toChars(i));
456 }
457 }