View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   * http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.apache.commons.lang3;
20  
21  import static org.junit.jupiter.api.Assertions.assertEquals;
22  import static org.junit.jupiter.api.Assertions.assertFalse;
23  import static org.junit.jupiter.api.Assertions.assertNotEquals;
24  import static org.junit.jupiter.api.Assertions.assertNull;
25  import static org.junit.jupiter.api.Assertions.assertSame;
26  import static org.junit.jupiter.api.Assertions.assertTrue;
27  
28  import java.lang.reflect.Modifier;
29  
30  import org.junit.jupiter.api.Test;
31  
32  /**
33   * Unit tests {@link org.apache.commons.lang3.CharSet}.
34   */
35  public class CharSetTest extends AbstractLangTest {
36  
37      @Test
38      public void testClass() {
39          assertTrue(Modifier.isPublic(CharSet.class.getModifiers()));
40          assertFalse(Modifier.isFinal(CharSet.class.getModifiers()));
41      }
42  
43      @Test
44      public void testConstructor_String_combo() {
45          CharSet set;
46          CharRange[] array;
47  
48          set = CharSet.getInstance("abc");
49          array = set.getCharRanges();
50          assertEquals(3, array.length);
51          assertTrue(ArrayUtils.contains(array, CharRange.is('a')));
52          assertTrue(ArrayUtils.contains(array, CharRange.is('b')));
53          assertTrue(ArrayUtils.contains(array, CharRange.is('c')));
54  
55          set = CharSet.getInstance("a-ce-f");
56          array = set.getCharRanges();
57          assertEquals(2, array.length);
58          assertTrue(ArrayUtils.contains(array, CharRange.isIn('a', 'c')));
59          assertTrue(ArrayUtils.contains(array, CharRange.isIn('e', 'f')));
60  
61          set = CharSet.getInstance("ae-f");
62          array = set.getCharRanges();
63          assertEquals(2, array.length);
64          assertTrue(ArrayUtils.contains(array, CharRange.is('a')));
65          assertTrue(ArrayUtils.contains(array, CharRange.isIn('e', 'f')));
66  
67          set = CharSet.getInstance("e-fa");
68          array = set.getCharRanges();
69          assertEquals(2, array.length);
70          assertTrue(ArrayUtils.contains(array, CharRange.is('a')));
71          assertTrue(ArrayUtils.contains(array, CharRange.isIn('e', 'f')));
72  
73          set = CharSet.getInstance("ae-fm-pz");
74          array = set.getCharRanges();
75          assertEquals(4, array.length);
76          assertTrue(ArrayUtils.contains(array, CharRange.is('a')));
77          assertTrue(ArrayUtils.contains(array, CharRange.isIn('e', 'f')));
78          assertTrue(ArrayUtils.contains(array, CharRange.isIn('m', 'p')));
79          assertTrue(ArrayUtils.contains(array, CharRange.is('z')));
80      }
81  
82      @Test
83      public void testConstructor_String_comboNegated() {
84          CharSet set;
85          CharRange[] array;
86  
87          set = CharSet.getInstance("^abc");
88          array = set.getCharRanges();
89          assertEquals(3, array.length);
90          assertTrue(ArrayUtils.contains(array, CharRange.isNot('a')));
91          assertTrue(ArrayUtils.contains(array, CharRange.is('b')));
92          assertTrue(ArrayUtils.contains(array, CharRange.is('c')));
93  
94          set = CharSet.getInstance("b^ac");
95          array = set.getCharRanges();
96          assertEquals(3, array.length);
97          assertTrue(ArrayUtils.contains(array, CharRange.is('b')));
98          assertTrue(ArrayUtils.contains(array, CharRange.isNot('a')));
99          assertTrue(ArrayUtils.contains(array, CharRange.is('c')));
100 
101         set = CharSet.getInstance("db^ac");
102         array = set.getCharRanges();
103         assertEquals(4, array.length);
104         assertTrue(ArrayUtils.contains(array, CharRange.is('d')));
105         assertTrue(ArrayUtils.contains(array, CharRange.is('b')));
106         assertTrue(ArrayUtils.contains(array, CharRange.isNot('a')));
107         assertTrue(ArrayUtils.contains(array, CharRange.is('c')));
108 
109         set = CharSet.getInstance("^b^a");
110         array = set.getCharRanges();
111         assertEquals(2, array.length);
112         assertTrue(ArrayUtils.contains(array, CharRange.isNot('b')));
113         assertTrue(ArrayUtils.contains(array, CharRange.isNot('a')));
114 
115         set = CharSet.getInstance("b^a-c^z");
116         array = set.getCharRanges();
117         assertEquals(3, array.length);
118         assertTrue(ArrayUtils.contains(array, CharRange.isNotIn('a', 'c')));
119         assertTrue(ArrayUtils.contains(array, CharRange.isNot('z')));
120         assertTrue(ArrayUtils.contains(array, CharRange.is('b')));
121     }
122 
123     @Test
124     public void testConstructor_String_oddCombinations() {
125         CharSet set;
126         CharRange[] array;
127 
128         set = CharSet.getInstance("a-^c");
129         array = set.getCharRanges();
130         assertTrue(ArrayUtils.contains(array, CharRange.isIn('a', '^'))); // "a-^"
131         assertTrue(ArrayUtils.contains(array, CharRange.is('c'))); // "c"
132         assertFalse(set.contains('b'));
133         assertTrue(set.contains('^'));
134         assertTrue(set.contains('_')); // between ^ and a
135         assertTrue(set.contains('c'));
136 
137         set = CharSet.getInstance("^a-^c");
138         array = set.getCharRanges();
139         assertTrue(ArrayUtils.contains(array, CharRange.isNotIn('a', '^'))); // "^a-^"
140         assertTrue(ArrayUtils.contains(array, CharRange.is('c'))); // "c"
141         assertTrue(set.contains('b'));
142         assertFalse(set.contains('^'));
143         assertFalse(set.contains('_')); // between ^ and a
144 
145         set = CharSet.getInstance("a- ^-- "); //contains everything
146         array = set.getCharRanges();
147         assertTrue(ArrayUtils.contains(array, CharRange.isIn('a', ' '))); // "a- "
148         assertTrue(ArrayUtils.contains(array, CharRange.isNotIn('-', ' '))); // "^-- "
149         assertTrue(set.contains('#'));
150         assertTrue(set.contains('^'));
151         assertTrue(set.contains('a'));
152         assertTrue(set.contains('*'));
153         assertTrue(set.contains('A'));
154 
155         set = CharSet.getInstance("^-b");
156         array = set.getCharRanges();
157         assertTrue(ArrayUtils.contains(array, CharRange.isIn('^', 'b'))); // "^-b"
158         assertTrue(set.contains('b'));
159         assertTrue(set.contains('_')); // between ^ and a
160         assertFalse(set.contains('A'));
161         assertTrue(set.contains('^'));
162 
163         set = CharSet.getInstance("b-^");
164         array = set.getCharRanges();
165         assertTrue(ArrayUtils.contains(array, CharRange.isIn('^', 'b'))); // "b-^"
166         assertTrue(set.contains('b'));
167         assertTrue(set.contains('^'));
168         assertTrue(set.contains('a')); // between ^ and b
169         assertFalse(set.contains('c'));
170     }
171 
172     @Test
173     public void testConstructor_String_oddDash() {
174         CharSet set;
175         CharRange[] array;
176 
177         set = CharSet.getInstance("-");
178         array = set.getCharRanges();
179         assertEquals(1, array.length);
180         assertTrue(ArrayUtils.contains(array, CharRange.is('-')));
181 
182         set = CharSet.getInstance("--");
183         array = set.getCharRanges();
184         assertEquals(1, array.length);
185         assertTrue(ArrayUtils.contains(array, CharRange.is('-')));
186 
187         set = CharSet.getInstance("---");
188         array = set.getCharRanges();
189         assertEquals(1, array.length);
190         assertTrue(ArrayUtils.contains(array, CharRange.is('-')));
191 
192         set = CharSet.getInstance("----");
193         array = set.getCharRanges();
194         assertEquals(1, array.length);
195         assertTrue(ArrayUtils.contains(array, CharRange.is('-')));
196 
197         set = CharSet.getInstance("-a");
198         array = set.getCharRanges();
199         assertEquals(2, array.length);
200         assertTrue(ArrayUtils.contains(array, CharRange.is('-')));
201         assertTrue(ArrayUtils.contains(array, CharRange.is('a')));
202 
203         set = CharSet.getInstance("a-");
204         array = set.getCharRanges();
205         assertEquals(2, array.length);
206         assertTrue(ArrayUtils.contains(array, CharRange.is('a')));
207         assertTrue(ArrayUtils.contains(array, CharRange.is('-')));
208 
209         set = CharSet.getInstance("a--");
210         array = set.getCharRanges();
211         assertEquals(1, array.length);
212         assertTrue(ArrayUtils.contains(array, CharRange.isIn('a', '-')));
213 
214         set = CharSet.getInstance("--a");
215         array = set.getCharRanges();
216         assertEquals(1, array.length);
217         assertTrue(ArrayUtils.contains(array, CharRange.isIn('-', 'a')));
218     }
219 
220     @Test
221     public void testConstructor_String_oddNegate() {
222         CharSet set;
223         CharRange[] array;
224         set = CharSet.getInstance("^");
225         array = set.getCharRanges();
226         assertEquals(1, array.length);
227         assertTrue(ArrayUtils.contains(array, CharRange.is('^'))); // "^"
228 
229         set = CharSet.getInstance("^^");
230         array = set.getCharRanges();
231         assertEquals(1, array.length);
232         assertTrue(ArrayUtils.contains(array, CharRange.isNot('^'))); // "^^"
233 
234         set = CharSet.getInstance("^^^");
235         array = set.getCharRanges();
236         assertEquals(2, array.length);
237         assertTrue(ArrayUtils.contains(array, CharRange.isNot('^'))); // "^^"
238         assertTrue(ArrayUtils.contains(array, CharRange.is('^'))); // "^"
239 
240         set = CharSet.getInstance("^^^^");
241         array = set.getCharRanges();
242         assertEquals(1, array.length);
243         assertTrue(ArrayUtils.contains(array, CharRange.isNot('^'))); // "^^" x2
244 
245         set = CharSet.getInstance("a^");
246         array = set.getCharRanges();
247         assertEquals(2, array.length);
248         assertTrue(ArrayUtils.contains(array, CharRange.is('a'))); // "a"
249         assertTrue(ArrayUtils.contains(array, CharRange.is('^'))); // "^"
250 
251         set = CharSet.getInstance("^a-");
252         array = set.getCharRanges();
253         assertEquals(2, array.length);
254         assertTrue(ArrayUtils.contains(array, CharRange.isNot('a'))); // "^a"
255         assertTrue(ArrayUtils.contains(array, CharRange.is('-'))); // "-"
256 
257         set = CharSet.getInstance("^^-c");
258         array = set.getCharRanges();
259         assertEquals(1, array.length);
260         assertTrue(ArrayUtils.contains(array, CharRange.isNotIn('^', 'c'))); // "^^-c"
261 
262         set = CharSet.getInstance("^c-^");
263         array = set.getCharRanges();
264         assertEquals(1, array.length);
265         assertTrue(ArrayUtils.contains(array, CharRange.isNotIn('c', '^'))); // "^c-^"
266 
267         set = CharSet.getInstance("^c-^d");
268         array = set.getCharRanges();
269         assertEquals(2, array.length);
270         assertTrue(ArrayUtils.contains(array, CharRange.isNotIn('c', '^'))); // "^c-^"
271         assertTrue(ArrayUtils.contains(array, CharRange.is('d'))); // "d"
272 
273         set = CharSet.getInstance("^^-");
274         array = set.getCharRanges();
275         assertEquals(2, array.length);
276         assertTrue(ArrayUtils.contains(array, CharRange.isNot('^'))); // "^^"
277         assertTrue(ArrayUtils.contains(array, CharRange.is('-'))); // "-"
278     }
279 
280     @Test
281     public void testConstructor_String_simple() {
282         CharSet set;
283         CharRange[] array;
284 
285         set = CharSet.getInstance((String) null);
286         array = set.getCharRanges();
287         assertEquals("[]", set.toString());
288         assertEquals(0, array.length);
289 
290         set = CharSet.getInstance("");
291         array = set.getCharRanges();
292         assertEquals("[]", set.toString());
293         assertEquals(0, array.length);
294 
295         set = CharSet.getInstance("a");
296         array = set.getCharRanges();
297         assertEquals("[a]", set.toString());
298         assertEquals(1, array.length);
299         assertEquals("a", array[0].toString());
300 
301         set = CharSet.getInstance("^a");
302         array = set.getCharRanges();
303         assertEquals("[^a]", set.toString());
304         assertEquals(1, array.length);
305         assertEquals("^a", array[0].toString());
306 
307         set = CharSet.getInstance("a-e");
308         array = set.getCharRanges();
309         assertEquals("[a-e]", set.toString());
310         assertEquals(1, array.length);
311         assertEquals("a-e", array[0].toString());
312 
313         set = CharSet.getInstance("^a-e");
314         array = set.getCharRanges();
315         assertEquals("[^a-e]", set.toString());
316         assertEquals(1, array.length);
317         assertEquals("^a-e", array[0].toString());
318     }
319 
320     @Test
321     public void testContains_Char() {
322         final CharSet btod = CharSet.getInstance("b-d");
323         final CharSet dtob = CharSet.getInstance("d-b");
324         final CharSet bcd = CharSet.getInstance("bcd");
325         final CharSet bd = CharSet.getInstance("bd");
326         final CharSet notbtod = CharSet.getInstance("^b-d");
327 
328         assertFalse(btod.contains('a'));
329         assertTrue(btod.contains('b'));
330         assertTrue(btod.contains('c'));
331         assertTrue(btod.contains('d'));
332         assertFalse(btod.contains('e'));
333 
334         assertFalse(bcd.contains('a'));
335         assertTrue(bcd.contains('b'));
336         assertTrue(bcd.contains('c'));
337         assertTrue(bcd.contains('d'));
338         assertFalse(bcd.contains('e'));
339 
340         assertFalse(bd.contains('a'));
341         assertTrue(bd.contains('b'));
342         assertFalse(bd.contains('c'));
343         assertTrue(bd.contains('d'));
344         assertFalse(bd.contains('e'));
345 
346         assertTrue(notbtod.contains('a'));
347         assertFalse(notbtod.contains('b'));
348         assertFalse(notbtod.contains('c'));
349         assertFalse(notbtod.contains('d'));
350         assertTrue(notbtod.contains('e'));
351 
352         assertFalse(dtob.contains('a'));
353         assertTrue(dtob.contains('b'));
354         assertTrue(dtob.contains('c'));
355         assertTrue(dtob.contains('d'));
356         assertFalse(dtob.contains('e'));
357 
358         final CharRange[] array = dtob.getCharRanges();
359         assertEquals("[b-d]", dtob.toString());
360         assertEquals(1, array.length);
361     }
362 
363     @Test
364     public void testEquals_Object() {
365         final CharSet abc = CharSet.getInstance("abc");
366         final CharSet abc2 = CharSet.getInstance("abc");
367         final CharSet atoc = CharSet.getInstance("a-c");
368         final CharSet atoc2 = CharSet.getInstance("a-c");
369         final CharSet notatoc = CharSet.getInstance("^a-c");
370         final CharSet notatoc2 = CharSet.getInstance("^a-c");
371 
372         assertNotEquals(null, abc);
373 
374         assertEquals(abc, abc);
375         assertEquals(abc, abc2);
376         assertNotEquals(abc, atoc);
377         assertNotEquals(abc, notatoc);
378 
379         assertNotEquals(atoc, abc);
380         assertEquals(atoc, atoc);
381         assertEquals(atoc, atoc2);
382         assertNotEquals(atoc, notatoc);
383 
384         assertNotEquals(notatoc, abc);
385         assertNotEquals(notatoc, atoc);
386         assertEquals(notatoc, notatoc);
387         assertEquals(notatoc, notatoc2);
388     }
389 
390     @Test
391     public void testGetInstance() {
392         assertSame(CharSet.EMPTY, CharSet.getInstance( (String) null));
393         assertSame(CharSet.EMPTY, CharSet.getInstance(""));
394         assertSame(CharSet.ASCII_ALPHA, CharSet.getInstance("a-zA-Z"));
395         assertSame(CharSet.ASCII_ALPHA, CharSet.getInstance("A-Za-z"));
396         assertSame(CharSet.ASCII_ALPHA_LOWER, CharSet.getInstance("a-z"));
397         assertSame(CharSet.ASCII_ALPHA_UPPER, CharSet.getInstance("A-Z"));
398         assertSame(CharSet.ASCII_NUMERIC, CharSet.getInstance("0-9"));
399     }
400 
401     @Test
402     public void testGetInstance_Stringarray() {
403         assertNull(CharSet.getInstance((String[]) null));
404         assertEquals("[]", CharSet.getInstance(new String[0]).toString());
405         assertEquals("[]", CharSet.getInstance(new String[] {null}).toString());
406         assertEquals("[a-e]", CharSet.getInstance(new String[] {"a-e"}).toString());
407     }
408 
409     @Test
410     public void testHashCode() {
411         final CharSet abc = CharSet.getInstance("abc");
412         final CharSet abc2 = CharSet.getInstance("abc");
413         final CharSet atoc = CharSet.getInstance("a-c");
414         final CharSet atoc2 = CharSet.getInstance("a-c");
415         final CharSet notatoc = CharSet.getInstance("^a-c");
416         final CharSet notatoc2 = CharSet.getInstance("^a-c");
417 
418         assertEquals(abc.hashCode(), abc.hashCode());
419         assertEquals(abc.hashCode(), abc2.hashCode());
420         assertEquals(atoc.hashCode(), atoc.hashCode());
421         assertEquals(atoc.hashCode(), atoc2.hashCode());
422         assertEquals(notatoc.hashCode(), notatoc.hashCode());
423         assertEquals(notatoc.hashCode(), notatoc2.hashCode());
424     }
425 
426     @Test
427     public void testJavadocExamples() {
428         assertFalse(CharSet.getInstance("^a-c").contains('a'));
429         assertTrue(CharSet.getInstance("^a-c").contains('d'));
430         assertTrue(CharSet.getInstance("^^a-c").contains('a'));
431         assertFalse(CharSet.getInstance("^^a-c").contains('^'));
432         assertTrue(CharSet.getInstance("^a-cd-f").contains('d'));
433         assertTrue(CharSet.getInstance("a-c^").contains('^'));
434         assertTrue(CharSet.getInstance("^", "a-c").contains('^'));
435     }
436 
437     @Test
438     public void testSerialization() {
439         CharSet set = CharSet.getInstance("a");
440         assertEquals(set, SerializationUtils.clone(set));
441         set = CharSet.getInstance("a-e");
442         assertEquals(set, SerializationUtils.clone(set));
443         set = CharSet.getInstance("be-f^a-z");
444         assertEquals(set, SerializationUtils.clone(set));
445     }
446 
447     @Test
448     public void testStatics() {
449         CharRange[] array;
450 
451         array = CharSet.EMPTY.getCharRanges();
452         assertEquals(0, array.length);
453 
454         array = CharSet.ASCII_ALPHA.getCharRanges();
455         assertEquals(2, array.length);
456         assertTrue(ArrayUtils.contains(array, CharRange.isIn('a', 'z')));
457         assertTrue(ArrayUtils.contains(array, CharRange.isIn('A', 'Z')));
458 
459         array = CharSet.ASCII_ALPHA_LOWER.getCharRanges();
460         assertEquals(1, array.length);
461         assertTrue(ArrayUtils.contains(array, CharRange.isIn('a', 'z')));
462 
463         array = CharSet.ASCII_ALPHA_UPPER.getCharRanges();
464         assertEquals(1, array.length);
465         assertTrue(ArrayUtils.contains(array, CharRange.isIn('A', 'Z')));
466 
467         array = CharSet.ASCII_NUMERIC.getCharRanges();
468         assertEquals(1, array.length);
469         assertTrue(ArrayUtils.contains(array, CharRange.isIn('0', '9')));
470     }
471 }