1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.text;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNull;
22 import static org.junit.jupiter.api.Assertions.assertThrows;
23 import static org.junit.jupiter.api.Assertions.assertTrue;
24
25 import java.io.UnsupportedEncodingException;
26 import java.util.Arrays;
27 import java.util.HashMap;
28 import java.util.List;
29 import java.util.Map;
30
31 import org.apache.commons.lang3.ArrayUtils;
32 import org.junit.jupiter.api.Test;
33
34
35
36
37 class AlphabetConverterTest {
38
39 private static final Character[] LOWER_CASE_ENGLISH = { ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
40 't', 'u', 'v', 'w', 'x', 'y', 'z' };
41
42 private static final Character[] ENGLISH_AND_NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
43 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
44 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' ' };
45
46 private static final Character[] LOWER_CASE_ENGLISH_AND_NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
47 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ' };
48
49 private static final Character[] NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
50
51 private static final Character[] BINARY = { '0', '1' };
52
53 private static final Character[] HEBREW = { '_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df', '\u05dd', '\u05e4', '\u05e9', '\u05d3',
54 '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da', '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de',
55 '\u05e6', '\u05ea', '\u05e5' };
56
57 private static final Integer[] UNICODE = { 32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
58 107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005 };
59
60 private static final Integer[] LOWER_CASE_ENGLISH_CODEPOINTS = { 32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
61 115, 116, 117, 118, 119, 120, 121, 122 };
62
63 private static final Integer[] DO_NOT_ENCODE_CODEPOINTS = { 32, 97, 98, 99 };
64
65 private AlphabetConverter createJavadocExample() {
66 final Character[] original = { 'a', 'b', 'c', 'd' };
67 final Character[] encoding = { '0', '1', 'd' };
68 final Character[] doNotEncode = { 'd' };
69
70 return AlphabetConverter.createConverterFromChars(original, encoding, doNotEncode);
71 }
72
73 private void test(final Character[] originalChars, final Character[] encodingChars, final Character[] doNotEncodeChars, final String... strings)
74 throws UnsupportedEncodingException {
75
76 final AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originalChars, encodingChars, doNotEncodeChars);
77
78 final AlphabetConverter reconstructedAlphabetConverter = AlphabetConverter.createConverterFromMap(ac.getOriginalToEncoded());
79
80 assertEquals(ac, reconstructedAlphabetConverter);
81 assertEquals(ac.hashCode(), reconstructedAlphabetConverter.hashCode());
82 assertEquals(ac.toString(), reconstructedAlphabetConverter.toString());
83 assertNull(ac.encode(null));
84 assertEquals("", ac.encode(""));
85
86
87 for (final String s : strings) {
88 final String encoded = ac.encode(s);
89
90
91 final List<Character> originalEncodingChars = Arrays.asList(encodingChars);
92 for (int i = 0; i < encoded.length(); i++) {
93 assertTrue(originalEncodingChars.contains(encoded.charAt(i)));
94 }
95
96 final String decoded = ac.decode(encoded);
97
98
99 final List<Character> originalCharsList = Arrays.asList(originalChars);
100 for (int i = 0; i < decoded.length(); i++) {
101 assertTrue(originalCharsList.contains(decoded.charAt(i)));
102 }
103
104 assertEquals(s, decoded, () -> "Encoded '" + s + "' into '" + encoded + "', but decoded into '" + decoded + "'");
105 }
106 }
107
108 @Test
109 void testBinaryTest() throws UnsupportedEncodingException {
110 test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "0", "1", "10", "11");
111 test(NUMBERS, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "12345", "0");
112 test(LOWER_CASE_ENGLISH, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "abc", "a");
113 }
114
115 @Test
116 void testCreateConverterFromCharsAndEquals() {
117 final Character[] characterArray = new Character[2];
118 final char charOne = '+';
119 final char character = '+';
120 characterArray[0] = character;
121 characterArray[1] = characterArray[0];
122 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
123
124 assertFalse(alphabetConverter.equals(charOne));
125 }
126
127 @Test
128 void testCreateConverterFromCharsOne() {
129 final Character[] characterArray = new Character[2];
130 characterArray[0] = '5';
131 characterArray[1] = characterArray[0];
132 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
133
134 assertEquals(1, alphabetConverter.getEncodedCharLength());
135 }
136
137 @Test
138 void testCreateConverterFromCharsWithNullAndNull() {
139 assertThrows(IllegalArgumentException.class, () -> {
140 final Character[] characterArray = new Character[2];
141 characterArray[0] = '$';
142 characterArray[1] = characterArray[0];
143 AlphabetConverter.createConverterFromChars(characterArray, null, null);
144 });
145 }
146
147 @Test
148 void testCreateConverterFromMapAndEquals() {
149 final Map<Integer, String> hashMap = new HashMap<>();
150 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromMap(hashMap);
151 hashMap.put(0, "CtDs");
152 final AlphabetConverter alphabetConverterTwo = AlphabetConverter.createConverterFromMap(hashMap);
153 assertFalse(alphabetConverter.equals(alphabetConverterTwo));
154 assertEquals(1, alphabetConverter.getEncodedCharLength());
155 }
156
157 @Test
158 void testDecodeReturningNull() throws UnsupportedEncodingException {
159 final Map<Integer, String> map = new HashMap<>();
160 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromMap(map);
161 alphabetConverter.decode(null);
162 assertEquals(1, alphabetConverter.getEncodedCharLength());
163 }
164
165 @Test
166 void testDoNotEncodeTest() throws UnsupportedEncodingException {
167 test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, "1", "456", "abc", "ABC", "this will not be converted but THIS WILL");
168 test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, NUMBERS, "1", "456", "abc", "ABC", "this will be converted but 12345 and this will be");
169 }
170
171 @Test
172 void testEncodeFailureTest() {
173 assertEquals("Couldn't find encoding for '3' in 3",
174 assertThrows(UnsupportedEncodingException.class, () -> test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "3")).getMessage());
175 }
176
177 @Test
178 void testEquals() {
179 final Character[] characterArray = new Character[2];
180 final char character = 'R';
181 characterArray[0] = character;
182 characterArray[1] = character;
183 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
184 final Map<Integer, String> map = new HashMap<>();
185 final AlphabetConverter alphabetConverterTwo = AlphabetConverter.createConverterFromMap(map);
186
187 assertEquals(1, alphabetConverterTwo.getEncodedCharLength());
188 assertFalse(alphabetConverter.equals(alphabetConverterTwo));
189 }
190
191 @Test
192 void testEqualsWithNull() {
193 final Character[] characterArray = ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY;
194 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, null, null);
195
196 assertFalse(alphabetConverter.equals(null));
197 }
198
199 @Test
200 void testEqualsWithSameObject() {
201 final Character[] characterArray = new Character[2];
202 final char character = 'R';
203 characterArray[0] = character;
204 characterArray[1] = character;
205 final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
206
207 assertTrue(alphabetConverter.equals(alphabetConverter));
208 }
209
210 @Test
211 void testHebrewTest() throws UnsupportedEncodingException {
212 test(HEBREW, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
213 "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
214 + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
215 test(HEBREW, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
216 "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
217 + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
218 test(NUMBERS, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "123456789", "1", "5");
219 test(LOWER_CASE_ENGLISH, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "this is a test");
220 }
221
222
223
224
225 @Test
226 void testJavadocExampleTest() throws UnsupportedEncodingException {
227 final AlphabetConverter ac = createJavadocExample();
228
229 assertEquals("00", ac.encode("a"));
230 assertEquals("01", ac.encode("b"));
231 assertEquals("0d", ac.encode("c"));
232 assertEquals("d", ac.encode("d"));
233 assertEquals("00010dd", ac.encode("abcd"));
234 }
235
236 @Test
237 void testMissingDoNotEncodeLettersFromEncodingTest() {
238 assertEquals("Can not use 'do not encode' list because encoding alphabet does not contain '0'",
239 assertThrows(IllegalArgumentException.class, () -> AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, NUMBERS))
240 .getMessage());
241 }
242
243 @Test
244 void testMissingDoNotEncodeLettersFromOriginalTest() {
245 assertEquals("Can not use 'do not encode' list because original alphabet does not contain '0'",
246 assertThrows(IllegalArgumentException.class, () -> AlphabetConverter.createConverterFromChars(LOWER_CASE_ENGLISH, ENGLISH_AND_NUMBERS, NUMBERS))
247 .getMessage());
248 }
249
250 @Test
251 void testNoEncodingLettersTest() {
252 assertEquals("Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0",
253 assertThrows(IllegalArgumentException.class, () -> AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, NUMBERS, NUMBERS))
254 .getMessage());
255 }
256
257 @Test
258 void testOnlyOneEncodingLettersTest() {
259 assertEquals("Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1",
260 assertThrows(IllegalArgumentException.class, () -> {
261 final Character[] numbersPlusUnderscore = Arrays.copyOf(NUMBERS, NUMBERS.length + 1);
262 numbersPlusUnderscore[numbersPlusUnderscore.length - 1] = '_';
263
264 AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, numbersPlusUnderscore, NUMBERS);
265 }).getMessage());
266 }
267
268 @Test
269 void testUnexpectedEndWhileDecodingTest() {
270 final String toDecode = "00d01d0";
271 assertEquals("Unexpected end of string while decoding " + toDecode,
272 assertThrows(UnsupportedEncodingException.class, () -> createJavadocExample().decode(toDecode)).getMessage());
273 }
274
275 @Test
276 void testUnexpectedStringWhileDecodingTest() {
277 final String toDecode = "00XX";
278 assertEquals("Unexpected string without decoding (XX) in " + toDecode,
279 assertThrows(UnsupportedEncodingException.class, () -> createJavadocExample().decode(toDecode)).getMessage());
280 }
281
282
283
284
285 @Test
286 void testUnicodeTest() throws UnsupportedEncodingException {
287 final AlphabetConverter ac = AlphabetConverter.createConverter(UNICODE, LOWER_CASE_ENGLISH_CODEPOINTS, DO_NOT_ENCODE_CODEPOINTS);
288 assertEquals(2, ac.getEncodedCharLength());
289 final String original = "\u8a43\u8a45 \u8dce ab \u8dc3 c \u8983";
290 final String encoded = ac.encode(original);
291 final String decoded = ac.decode(encoded);
292 assertEquals(original, decoded, () -> "Encoded '" + original + "' into '" + encoded + "', but decoded into '" + decoded + "'");
293 }
294
295 }