View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3;
18  
19  import java.io.Serializable;
20  import java.util.Collections;
21  import java.util.HashMap;
22  import java.util.HashSet;
23  import java.util.Map;
24  import java.util.Set;
25  
26  /**
27   * <p>A set of characters.</p>
28   *
29   * <p>Instances are immutable, but instances of subclasses may not be.</p>
30   *
31   * <p>#ThreadSafe#</p>
32   * @since 1.0
33   * @version $Id: CharSet.java 1436770 2013-01-22 07:09:45Z ggregory $
34   */
35  public class CharSet implements Serializable {
36  
37      /**
38       * Required for serialization support. Lang version 2.0. 
39       * 
40       * @see java.io.Serializable
41       */
42      private static final long serialVersionUID = 5947847346149275958L;
43  
44      /** 
45       * A CharSet defining no characters. 
46       * @since 2.0
47       */
48      public static final CharSet EMPTY = new CharSet((String) null);
49  
50      /** 
51       * A CharSet defining ASCII alphabetic characters "a-zA-Z".
52       * @since 2.0
53       */
54      public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
55  
56      /** 
57       * A CharSet defining ASCII alphabetic characters "a-z".
58       * @since 2.0
59       */
60      public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
61  
62      /** 
63       * A CharSet defining ASCII alphabetic characters "A-Z".
64       * @since 2.0
65       */
66      public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
67  
68      /** 
69       * A CharSet defining ASCII alphabetic characters "0-9".
70       * @since 2.0
71       */
72      public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
73  
74      /**
75       * A Map of the common cases used in the factory.
76       * Subclasses can add more common patterns if desired
77       * @since 2.0
78       */
79      protected static final Map<String, CharSet> COMMON = Collections.synchronizedMap(new HashMap<String, CharSet>());
80      
81      static {
82          COMMON.put(null, EMPTY);
83          COMMON.put("", EMPTY);
84          COMMON.put("a-zA-Z", ASCII_ALPHA);
85          COMMON.put("A-Za-z", ASCII_ALPHA);
86          COMMON.put("a-z", ASCII_ALPHA_LOWER);
87          COMMON.put("A-Z", ASCII_ALPHA_UPPER);
88          COMMON.put("0-9", ASCII_NUMERIC);
89      }
90  
91      /** The set of CharRange objects. */
92      private final Set<CharRange> set = Collections.synchronizedSet(new HashSet<CharRange>());
93  
94      //-----------------------------------------------------------------------
95      /**
96       * <p>Factory method to create a new CharSet using a special syntax.</p>
97       *
98       * <ul>
99       *  <li>{@code null} or empty string ("")
100      * - set containing no characters</li>
101      *  <li>Single character, such as "a"
102      *  - set containing just that character</li>
103      *  <li>Multi character, such as "a-e"
104      *  - set containing characters from one character to the other</li>
105      *  <li>Negated, such as "^a" or "^a-e"
106      *  - set containing all characters except those defined</li>
107      *  <li>Combinations, such as "abe-g"
108      *  - set containing all the characters from the individual sets</li>
109      * </ul>
110      *
111      * <p>The matching order is:</p>
112      * <ol>
113      *  <li>Negated multi character range, such as "^a-e"
114      *  <li>Ordinary multi character range, such as "a-e"
115      *  <li>Negated single character, such as "^a"
116      *  <li>Ordinary single character, such as "a"
117      * </ol>
118      * <p>Matching works left to right. Once a match is found the
119      * search starts again from the next character.</p>
120      *
121      * <p>If the same range is defined twice using the same syntax, only
122      * one range will be kept.
123      * Thus, "a-ca-c" creates only one range of "a-c".</p>
124      *
125      * <p>If the start and end of a range are in the wrong order,
126      * they are reversed. Thus "a-e" is the same as "e-a".
127      * As a result, "a-ee-a" would create only one range,
128      * as the "a-e" and "e-a" are the same.</p>
129      *
130      * <p>The set of characters represented is the union of the specified ranges.</p>
131      *
132      * <p>All CharSet objects returned by this method will be immutable.</p>
133      *
134      * @param setStrs  Strings to merge into the set, may be null
135      * @return a CharSet instance
136      * @since 2.4
137      */
138     public static CharSet getInstance(final String... setStrs) {
139         if (setStrs == null) {
140             return null;
141         }
142         if (setStrs.length == 1) {
143             final CharSet common = COMMON.get(setStrs[0]);
144             if (common != null) {
145                 return common;
146             }
147         }
148         return new CharSet(setStrs); 
149     }
150 
151     //-----------------------------------------------------------------------
152     /**
153      * <p>Constructs a new CharSet using the set syntax.
154      * Each string is merged in with the set.</p>
155      *
156      * @param set  Strings to merge into the initial set
157      * @throws NullPointerException if set is {@code null}
158      */
159     protected CharSet(final String... set) {
160         super();
161         final int sz = set.length;
162         for (int i = 0; i < sz; i++) {
163             add(set[i]);
164         }
165     }
166 
167     //-----------------------------------------------------------------------
168     /**
169      * <p>Add a set definition string to the {@code CharSet}.</p>
170      *
171      * @param str  set definition string
172      */
173     protected void add(final String str) {
174         if (str == null) {
175             return;
176         }
177 
178         final int len = str.length();
179         int pos = 0;
180         while (pos < len) {
181             final int remainder = len - pos;
182             if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
183                 // negated range
184                 set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
185                 pos += 4;
186             } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
187                 // range
188                 set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
189                 pos += 3;
190             } else if (remainder >= 2 && str.charAt(pos) == '^') {
191                 // negated char
192                 set.add(CharRange.isNot(str.charAt(pos + 1)));
193                 pos += 2;
194             } else {
195                 // char
196                 set.add(CharRange.is(str.charAt(pos)));
197                 pos += 1;
198             }
199         }
200     }
201 
202     //-----------------------------------------------------------------------
203     /**
204      * <p>Gets the internal set as an array of CharRange objects.</p>
205      *
206      * @return an array of immutable CharRange objects
207      * @since 2.0
208      */
209 // NOTE: This is no longer public as CharRange is no longer a public class. 
210 //       It may be replaced when CharSet moves to Range.
211     /*public*/ CharRange[] getCharRanges() {
212         return set.toArray(new CharRange[set.size()]);
213     }
214 
215     //-----------------------------------------------------------------------
216     /**
217      * <p>Does the {@code CharSet} contain the specified
218      * character {@code ch}.</p>
219      *
220      * @param ch  the character to check for
221      * @return {@code true} if the set contains the characters
222      */
223     public boolean contains(final char ch) {
224         for (final CharRange range : set) {
225             if (range.contains(ch)) {
226                 return true;
227             }
228         }
229         return false;
230     }
231 
232     // Basics
233     //-----------------------------------------------------------------------
234     /**
235      * <p>Compares two {@code CharSet} objects, returning true if they represent
236      * exactly the same set of characters defined in the same way.</p>
237      *
238      * <p>The two sets {@code abc} and {@code a-c} are <i>not</i>
239      * equal according to this method.</p>
240      *
241      * @param obj  the object to compare to
242      * @return true if equal
243      * @since 2.0
244      */
245     @Override
246     public boolean equals(final Object obj) {
247         if (obj == this) {
248             return true;
249         }
250         if (obj instanceof CharSet == false) {
251             return false;
252         }
253         final CharSet other = (CharSet) obj;
254         return set.equals(other.set);
255     }
256 
257     /**
258      * <p>Gets a hash code compatible with the equals method.</p>
259      *
260      * @return a suitable hash code
261      * @since 2.0
262      */
263     @Override
264     public int hashCode() {
265         return 89 + set.hashCode();
266     }
267 
268     /**
269      * <p>Gets a string representation of the set.</p>
270      *
271      * @return string representation of the set
272      */
273     @Override
274     public String toString() {
275         return set.toString();
276     }
277 
278 }