View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import java.io.Serializable;
20  import java.util.Collections;
21  import java.util.HashMap;
22  import java.util.HashSet;
23  import java.util.Map;
24  import java.util.Set;
25  
26  /**
27   * <p>A set of characters.</p>
28   *
29   * <p>Instances are immutable, but instances of subclasses may not be.</p>
30   *
31   * <p>#ThreadSafe#</p>
32   * @since 1.0
33   */
34  public class CharSet implements Serializable {
35  
36      /**
37       * Required for serialization support. Lang version 2.0.
38       *
39       * @see java.io.Serializable
40       */
41      private static final long serialVersionUID = 5947847346149275958L;
42  
43      /**
44       * A CharSet defining no characters.
45       * @since 2.0
46       */
47      public static final CharSet EMPTY = new CharSet((String) null);
48  
49      /**
50       * A CharSet defining ASCII alphabetic characters "a-zA-Z".
51       * @since 2.0
52       */
53      public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
54  
55      /**
56       * A CharSet defining ASCII alphabetic characters "a-z".
57       * @since 2.0
58       */
59      public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
60  
61      /**
62       * A CharSet defining ASCII alphabetic characters "A-Z".
63       * @since 2.0
64       */
65      public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
66  
67      /**
68       * A CharSet defining ASCII alphabetic characters "0-9".
69       * @since 2.0
70       */
71      public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
72  
73      /**
74       * A Map of the common cases used in the factory.
75       * Subclasses can add more common patterns if desired
76       * @since 2.0
77       */
78      protected static final Map<String, CharSet> COMMON = Collections.synchronizedMap(new HashMap<String, CharSet>());
79  
80      static {
81          COMMON.put(null, EMPTY);
82          COMMON.put(StringUtils.EMPTY, EMPTY);
83          COMMON.put("a-zA-Z", ASCII_ALPHA);
84          COMMON.put("A-Za-z", ASCII_ALPHA);
85          COMMON.put("a-z", ASCII_ALPHA_LOWER);
86          COMMON.put("A-Z", ASCII_ALPHA_UPPER);
87          COMMON.put("0-9", ASCII_NUMERIC);
88      }
89  
90      /** The set of CharRange objects. */
91      private final Set<CharRange> set = Collections.synchronizedSet(new HashSet<CharRange>());
92  
93      //-----------------------------------------------------------------------
94      /**
95       * <p>Factory method to create a new CharSet using a special syntax.</p>
96       *
97       * <ul>
98       *  <li>{@code null} or empty string ("")
99       * - set containing no characters</li>
100      *  <li>Single character, such as "a"
101      *  - set containing just that character</li>
102      *  <li>Multi character, such as "a-e"
103      *  - set containing characters from one character to the other</li>
104      *  <li>Negated, such as "^a" or "^a-e"
105      *  - set containing all characters except those defined</li>
106      *  <li>Combinations, such as "abe-g"
107      *  - set containing all the characters from the individual sets</li>
108      * </ul>
109      *
110      * <p>The matching order is:</p>
111      * <ol>
112      *  <li>Negated multi character range, such as "^a-e"
113      *  <li>Ordinary multi character range, such as "a-e"
114      *  <li>Negated single character, such as "^a"
115      *  <li>Ordinary single character, such as "a"
116      * </ol>
117      *
118      * <p>Matching works left to right. Once a match is found the
119      * search starts again from the next character.</p>
120      *
121      * <p>If the same range is defined twice using the same syntax, only
122      * one range will be kept.
123      * Thus, "a-ca-c" creates only one range of "a-c".</p>
124      *
125      * <p>If the start and end of a range are in the wrong order,
126      * they are reversed. Thus "a-e" is the same as "e-a".
127      * As a result, "a-ee-a" would create only one range,
128      * as the "a-e" and "e-a" are the same.</p>
129      *
130      * <p>The set of characters represented is the union of the specified ranges.</p>
131      *
132      * <p>There are two ways to add a literal negation character ({@code ^}):</p>
133      * <ul>
134      *     <li>As the last character in a string, e.g. {@code CharSet.getInstance("a-z^")}</li>
135      *     <li>As a separate element, e.g. {@code CharSet.getInstance("^","a-z")}</li>
136      * </ul>
137      *
138      * <p>Examples using the negation character:</p>
139      * <pre>
140      *     CharSet.getInstance("^a-c").contains('a') = false
141      *     CharSet.getInstance("^a-c").contains('d') = true
142      *     CharSet.getInstance("^^a-c").contains('a') = true // (only '^' is negated)
143      *     CharSet.getInstance("^^a-c").contains('^') = false
144      *     CharSet.getInstance("^a-cd-f").contains('d') = true
145      *     CharSet.getInstance("a-c^").contains('^') = true
146      *     CharSet.getInstance("^", "a-c").contains('^') = true
147      * </pre>
148      *
149      * <p>All CharSet objects returned by this method will be immutable.</p>
150      *
151      * @param setStrs  Strings to merge into the set, may be null
152      * @return a CharSet instance
153      * @since 2.4
154      */
155     public static CharSet getInstance(final String... setStrs) {
156         if (setStrs == null) {
157             return null;
158         }
159         if (setStrs.length == 1) {
160             final CharSet common = COMMON.get(setStrs[0]);
161             if (common != null) {
162                 return common;
163             }
164         }
165         return new CharSet(setStrs);
166     }
167 
168     //-----------------------------------------------------------------------
169     /**
170      * <p>Constructs a new CharSet using the set syntax.
171      * Each string is merged in with the set.</p>
172      *
173      * @param set  Strings to merge into the initial set
174      * @throws NullPointerException if set is {@code null}
175      */
176     protected CharSet(final String... set) {
177         super();
178         for (String s : set) {
179             add(s);
180         }
181     }
182 
183     //-----------------------------------------------------------------------
184     /**
185      * <p>Add a set definition string to the {@code CharSet}.</p>
186      *
187      * @param str  set definition string
188      */
189     protected void add(final String str) {
190         if (str == null) {
191             return;
192         }
193 
194         final int len = str.length();
195         int pos = 0;
196         while (pos < len) {
197             final int remainder = len - pos;
198             if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
199                 // negated range
200                 set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
201                 pos += 4;
202             } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
203                 // range
204                 set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
205                 pos += 3;
206             } else if (remainder >= 2 && str.charAt(pos) == '^') {
207                 // negated char
208                 set.add(CharRange.isNot(str.charAt(pos + 1)));
209                 pos += 2;
210             } else {
211                 // char
212                 set.add(CharRange.is(str.charAt(pos)));
213                 pos += 1;
214             }
215         }
216     }
217 
218     //-----------------------------------------------------------------------
219     /**
220      * <p>Gets the internal set as an array of CharRange objects.</p>
221      *
222      * @return an array of immutable CharRange objects
223      * @since 2.0
224      */
225 // NOTE: This is no longer public as CharRange is no longer a public class.
226 //       It may be replaced when CharSet moves to Range.
227     /*public*/ CharRange[] getCharRanges() {
228         return set.toArray(new CharRange[set.size()]);
229     }
230 
231     //-----------------------------------------------------------------------
232     /**
233      * <p>Does the {@code CharSet} contain the specified
234      * character {@code ch}.</p>
235      *
236      * @param ch  the character to check for
237      * @return {@code true} if the set contains the characters
238      */
239     public boolean contains(final char ch) {
240         for (final CharRange range : set) {
241             if (range.contains(ch)) {
242                 return true;
243             }
244         }
245         return false;
246     }
247 
248     // Basics
249     //-----------------------------------------------------------------------
250     /**
251      * <p>Compares two {@code CharSet} objects, returning true if they represent
252      * exactly the same set of characters defined in the same way.</p>
253      *
254      * <p>The two sets {@code abc} and {@code a-c} are <i>not</i>
255      * equal according to this method.</p>
256      *
257      * @param obj  the object to compare to
258      * @return true if equal
259      * @since 2.0
260      */
261     @Override
262     public boolean equals(final Object obj) {
263         if (obj == this) {
264             return true;
265         }
266         if (obj instanceof CharSet == false) {
267             return false;
268         }
269         final CharSet other = (CharSet) obj;
270         return set.equals(other.set);
271     }
272 
273     /**
274      * <p>Gets a hash code compatible with the equals method.</p>
275      *
276      * @return a suitable hash code
277      * @since 2.0
278      */
279     @Override
280     public int hashCode() {
281         return 89 + set.hashCode();
282     }
283 
284     /**
285      * <p>Gets a string representation of the set.</p>
286      *
287      * @return string representation of the set
288      */
289     @Override
290     public String toString() {
291         return set.toString();
292     }
293 
294 }