View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import static org.junit.jupiter.api.Assertions.assertEquals;
20  import static org.junit.jupiter.api.Assertions.assertFalse;
21  import static org.junit.jupiter.api.Assertions.assertNull;
22  import static org.junit.jupiter.api.Assertions.assertThrows;
23  import static org.junit.jupiter.api.Assertions.assertTrue;
24  
25  import java.io.UnsupportedEncodingException;
26  import java.util.Arrays;
27  import java.util.HashMap;
28  import java.util.List;
29  import java.util.Map;
30  
31  import org.apache.commons.lang3.ArrayUtils;
32  import org.junit.jupiter.api.Test;
33  
34  /**
35   * Tests {@link AlphabetConverter}.
36   */
37  class AlphabetConverterTest {
38  
39      private static final Character[] LOWER_CASE_ENGLISH = { ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
40              't', 'u', 'v', 'w', 'x', 'y', 'z' };
41  
42      private static final Character[] ENGLISH_AND_NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j',
43              'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
44              'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', ' ' };
45  
46      private static final Character[] LOWER_CASE_ENGLISH_AND_NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
47              'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ' };
48  
49      private static final Character[] NUMBERS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' };
50  
51      private static final Character[] BINARY = { '0', '1' };
52  
53      private static final Character[] HEBREW = { '_', ' ', '\u05e7', '\u05e8', '\u05d0', '\u05d8', '\u05d5', '\u05df', '\u05dd', '\u05e4', '\u05e9', '\u05d3',
54              '\u05d2', '\u05db', '\u05e2', '\u05d9', '\u05d7', '\u05dc', '\u05da', '\u05e3', '\u05d6', '\u05e1', '\u05d1', '\u05d4', '\u05e0', '\u05de',
55              '\u05e6', '\u05ea', '\u05e5' };
56  
57      private static final Integer[] UNICODE = { 32, 35395, 35397, 36302, 36291, 35203, 35201, 35215, 35219, 35268, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
58              107, 108, 109, 110, 1001, 1002, 1003, 1004, 1005 };
59  
60      private static final Integer[] LOWER_CASE_ENGLISH_CODEPOINTS = { 32, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
61              115, 116, 117, 118, 119, 120, 121, 122 };
62  
63      private static final Integer[] DO_NOT_ENCODE_CODEPOINTS = { 32, 97, 98, 99 }; // space, a, b, c
64  
65      private AlphabetConverter createJavadocExample() {
66          final Character[] original = { 'a', 'b', 'c', 'd' };
67          final Character[] encoding = { '0', '1', 'd' };
68          final Character[] doNotEncode = { 'd' };
69  
70          return AlphabetConverter.createConverterFromChars(original, encoding, doNotEncode);
71      }
72  
73      private void test(final Character[] originalChars, final Character[] encodingChars, final Character[] doNotEncodeChars, final String... strings)
74              throws UnsupportedEncodingException {
75  
76          final AlphabetConverter ac = AlphabetConverter.createConverterFromChars(originalChars, encodingChars, doNotEncodeChars);
77  
78          final AlphabetConverter reconstructedAlphabetConverter = AlphabetConverter.createConverterFromMap(ac.getOriginalToEncoded());
79  
80          assertEquals(ac, reconstructedAlphabetConverter);
81          assertEquals(ac.hashCode(), reconstructedAlphabetConverter.hashCode());
82          assertEquals(ac.toString(), reconstructedAlphabetConverter.toString());
83          assertNull(ac.encode(null)); // test null conversions
84          assertEquals("", ac.encode("")); // test empty conversion
85  
86          // test all the trial strings
87          for (final String s : strings) {
88              final String encoded = ac.encode(s);
89  
90              // test that only encoding chars are used
91              final List<Character> originalEncodingChars = Arrays.asList(encodingChars);
92              for (int i = 0; i < encoded.length(); i++) {
93                  assertTrue(originalEncodingChars.contains(encoded.charAt(i)));
94              }
95  
96              final String decoded = ac.decode(encoded);
97  
98              // test that only the original alphabet is used after decoding
99              final List<Character> originalCharsList = Arrays.asList(originalChars);
100             for (int i = 0; i < decoded.length(); i++) {
101                 assertTrue(originalCharsList.contains(decoded.charAt(i)));
102             }
103 
104             assertEquals(s, decoded, () -> "Encoded '" + s + "' into '" + encoded + "', but decoded into '" + decoded + "'");
105         }
106     }
107 
108     @Test
109     void testBinaryTest() throws UnsupportedEncodingException {
110         test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "0", "1", "10", "11");
111         test(NUMBERS, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "12345", "0");
112         test(LOWER_CASE_ENGLISH, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "abc", "a");
113     }
114 
115     @Test
116     void testCreateConverterFromCharsAndEquals() {
117         final Character[] characterArray = new Character[2];
118         final char charOne = '+';
119         final char character = '+';
120         characterArray[0] = character;
121         characterArray[1] = characterArray[0];
122         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
123 
124         assertFalse(alphabetConverter.equals(charOne));
125     }
126 
127     @Test
128     void testCreateConverterFromCharsOne() {
129         final Character[] characterArray = new Character[2];
130         characterArray[0] = '5';
131         characterArray[1] = characterArray[0];
132         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
133 
134         assertEquals(1, alphabetConverter.getEncodedCharLength());
135     }
136 
137     @Test
138     void testCreateConverterFromCharsWithNullAndNull() {
139         assertThrows(IllegalArgumentException.class, () -> {
140             final Character[] characterArray = new Character[2];
141             characterArray[0] = '$';
142             characterArray[1] = characterArray[0];
143             AlphabetConverter.createConverterFromChars(characterArray, null, null);
144         });
145     }
146 
147     @Test
148     void testCreateConverterFromMapAndEquals() {
149         final Map<Integer, String> hashMap = new HashMap<>();
150         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromMap(hashMap);
151         hashMap.put(0, "CtDs");
152         final AlphabetConverter alphabetConverterTwo = AlphabetConverter.createConverterFromMap(hashMap);
153         assertFalse(alphabetConverter.equals(alphabetConverterTwo));
154         assertEquals(1, alphabetConverter.getEncodedCharLength());
155     }
156 
157     @Test
158     void testDecodeReturningNull() throws UnsupportedEncodingException {
159         final Map<Integer, String> map = new HashMap<>();
160         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromMap(map);
161         alphabetConverter.decode(null);
162         assertEquals(1, alphabetConverter.getEncodedCharLength());
163     }
164 
165     @Test
166     void testDoNotEncodeTest() throws UnsupportedEncodingException {
167         test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, "1", "456", "abc", "ABC", "this will not be converted but THIS WILL");
168         test(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH_AND_NUMBERS, NUMBERS, "1", "456", "abc", "ABC", "this will be converted but 12345 and this will be");
169     }
170 
171     @Test
172     void testEncodeFailureTest() {
173         assertEquals("Couldn't find encoding for '3' in 3",
174                 assertThrows(UnsupportedEncodingException.class, () -> test(BINARY, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "3")).getMessage());
175     }
176 
177     @Test
178     void testEquals() {
179         final Character[] characterArray = new Character[2];
180         final char character = 'R';
181         characterArray[0] = character;
182         characterArray[1] = character;
183         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
184         final Map<Integer, String> map = new HashMap<>();
185         final AlphabetConverter alphabetConverterTwo = AlphabetConverter.createConverterFromMap(map);
186 
187         assertEquals(1, alphabetConverterTwo.getEncodedCharLength());
188         assertFalse(alphabetConverter.equals(alphabetConverterTwo));
189     }
190 
191     @Test
192     void testEqualsWithNull() {
193         final Character[] characterArray = ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY;
194         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, null, null);
195 
196         assertFalse(alphabetConverter.equals(null));
197     }
198 
199     @Test
200     void testEqualsWithSameObject() {
201         final Character[] characterArray = new Character[2];
202         final char character = 'R';
203         characterArray[0] = character;
204         characterArray[1] = character;
205         final AlphabetConverter alphabetConverter = AlphabetConverter.createConverterFromChars(characterArray, characterArray, characterArray);
206 
207         assertTrue(alphabetConverter.equals(alphabetConverter));
208     }
209 
210     @Test
211     void testHebrewTest() throws UnsupportedEncodingException {
212         test(HEBREW, BINARY, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
213                 "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
214               + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
215         test(HEBREW, NUMBERS, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "\u05d0", "\u05e2",
216                 "\u05d0\u05dc\u05e3_\u05d0\u05d5\u05d4\u05d1\u05dc_\u05d1\u05d9\u05ea_\u05d6\u05d4_\u05d1\u05d9\u05ea_"
217               + "\u05d2\u05d9\u05de\u05dc_\u05d6\u05d4_\u05db\u05de\u05dc_\u05d2\u05d3\u05d5\u05dc");
218         test(NUMBERS, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "123456789", "1", "5");
219         test(LOWER_CASE_ENGLISH, HEBREW, ArrayUtils.EMPTY_CHARACTER_OBJECT_ARRAY, "this is a test");
220     }
221 
222     /*
223      * Test example in javadocs for consistency
224      */
225     @Test
226     void testJavadocExampleTest() throws UnsupportedEncodingException {
227         final AlphabetConverter ac = createJavadocExample();
228 
229         assertEquals("00", ac.encode("a"));
230         assertEquals("01", ac.encode("b"));
231         assertEquals("0d", ac.encode("c"));
232         assertEquals("d", ac.encode("d"));
233         assertEquals("00010dd", ac.encode("abcd"));
234     }
235 
236     @Test
237     void testMissingDoNotEncodeLettersFromEncodingTest() {
238         assertEquals("Can not use 'do not encode' list because encoding alphabet does not contain '0'",
239                 assertThrows(IllegalArgumentException.class, () -> AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, LOWER_CASE_ENGLISH, NUMBERS))
240                         .getMessage());
241     }
242 
243     @Test
244     void testMissingDoNotEncodeLettersFromOriginalTest() {
245         assertEquals("Can not use 'do not encode' list because original alphabet does not contain '0'",
246                 assertThrows(IllegalArgumentException.class, () -> AlphabetConverter.createConverterFromChars(LOWER_CASE_ENGLISH, ENGLISH_AND_NUMBERS, NUMBERS))
247                         .getMessage());
248     }
249 
250     @Test
251     void testNoEncodingLettersTest() {
252         assertEquals("Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 0",
253                 assertThrows(IllegalArgumentException.class, () -> AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, NUMBERS, NUMBERS))
254                         .getMessage());
255     }
256 
257     @Test
258     void testOnlyOneEncodingLettersTest() {
259         assertEquals("Must have at least two encoding characters (excluding those in the 'do not encode' list), but has 1",
260                 assertThrows(IllegalArgumentException.class, () -> {
261                     final Character[] numbersPlusUnderscore = Arrays.copyOf(NUMBERS, NUMBERS.length + 1);
262                     numbersPlusUnderscore[numbersPlusUnderscore.length - 1] = '_';
263 
264                     AlphabetConverter.createConverterFromChars(ENGLISH_AND_NUMBERS, numbersPlusUnderscore, NUMBERS);
265                 }).getMessage());
266     }
267 
268     @Test
269     void testUnexpectedEndWhileDecodingTest() {
270         final String toDecode = "00d01d0";
271         assertEquals("Unexpected end of string while decoding " + toDecode,
272                 assertThrows(UnsupportedEncodingException.class, () -> createJavadocExample().decode(toDecode)).getMessage());
273     }
274 
275     @Test
276     void testUnexpectedStringWhileDecodingTest() {
277         final String toDecode = "00XX";
278         assertEquals("Unexpected string without decoding (XX) in " + toDecode,
279                 assertThrows(UnsupportedEncodingException.class, () -> createJavadocExample().decode(toDecode)).getMessage());
280     }
281 
282     /**
283      * Test constructor from code points
284      */
285     @Test
286     void testUnicodeTest() throws UnsupportedEncodingException {
287         final AlphabetConverter ac = AlphabetConverter.createConverter(UNICODE, LOWER_CASE_ENGLISH_CODEPOINTS, DO_NOT_ENCODE_CODEPOINTS);
288         assertEquals(2, ac.getEncodedCharLength());
289         final String original = "\u8a43\u8a45 \u8dce ab \u8dc3 c \u8983";
290         final String encoded = ac.encode(original);
291         final String decoded = ac.decode(encoded);
292         assertEquals(original, decoded, () -> "Encoded '" + original + "' into '" + encoded + "', but decoded into '" + decoded + "'");
293     }
294 
295 }