View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang;
18  
19  import java.io.Serializable;
20  import java.util.HashMap;
21  import java.util.HashSet;
22  import java.util.Iterator;
23  import java.util.Map;
24  import java.util.Set;
25  
26  /**
27   * <p>A set of characters.</p>
28   *
29   * <p>Instances are immutable, but instances of subclasses may not be.</p>
30   *
31   * @author Stephen Colebourne
32   * @author Phil Steitz
33   * @author Pete Gieser
34   * @author Gary Gregory
35   * @since 1.0
36   * @version $Id: CharSet.java 618884 2008-02-06 04:37:17Z bayard $
37   */
38  public class CharSet implements Serializable {
39  
40      /**
41       * Required for serialization support. Lang version 2.0. 
42       * 
43       * @see java.io.Serializable
44       */
45      private static final long serialVersionUID = 5947847346149275958L;
46  
47      /** 
48       * A CharSet defining no characters. 
49       * @since 2.0
50       */
51      public static final CharSet EMPTY = new CharSet((String) null);
52  
53      /** 
54       * A CharSet defining ASCII alphabetic characters "a-zA-Z".
55       * @since 2.0
56       */
57      public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
58  
59      /** 
60       * A CharSet defining ASCII alphabetic characters "a-z".
61       * @since 2.0
62       */
63      public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
64  
65      /** 
66       * A CharSet defining ASCII alphabetic characters "A-Z".
67       * @since 2.0
68       */
69      public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
70  
71      /** 
72       * A CharSet defining ASCII alphabetic characters "0-9".
73       * @since 2.0
74       */
75      public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
76  
77      /**
78       * A Map of the common cases used in the factory.
79       * Subclasses can add more common patterns if desired.
80       * @since 2.0
81       */
82      protected static final Map COMMON = new HashMap();
83      
84      static {
85          COMMON.put(null, EMPTY);
86          COMMON.put("", EMPTY);
87          COMMON.put("a-zA-Z", ASCII_ALPHA);
88          COMMON.put("A-Za-z", ASCII_ALPHA);
89          COMMON.put("a-z", ASCII_ALPHA_LOWER);
90          COMMON.put("A-Z", ASCII_ALPHA_UPPER);
91          COMMON.put("0-9", ASCII_NUMERIC);
92      }
93  
94      /** The set of CharRange objects. */
95      private Set set = new HashSet();
96  
97      //-----------------------------------------------------------------------
98      /**
99       * <p>Factory method to create a new CharSet using a special syntax.</p>
100      *
101      * <ul>
102      *  <li><code>null</code> or empty string ("")
103      * - set containing no characters</li>
104      *  <li>Single character, such as "a"
105      *  - set containing just that character</li>
106      *  <li>Multi character, such as "a-e"
107      *  - set containing characters from one character to the other</li>
108      *  <li>Negated, such as "^a" or "^a-e"
109      *  - set containing all characters except those defined</li>
110      *  <li>Combinations, such as "abe-g"
111      *  - set containing all the characters from the individual sets</li>
112      * </ul>
113      *
114      * <p>The matching order is:</p>
115      * <ol>
116      *  <li>Negated multi character range, such as "^a-e"
117      *  <li>Ordinary multi character range, such as "a-e"
118      *  <li>Negated single character, such as "^a"
119      *  <li>Ordinary single character, such as "a"
120      * </ol>
121      * <p>Matching works left to right. Once a match is found the
122      * search starts again from the next character.</p>
123      *
124      * <p>If the same range is defined twice using the same syntax, only
125      * one range will be kept.
126      * Thus, "a-ca-c" creates only one range of "a-c".</p>
127      *
128      * <p>If the start and end of a range are in the wrong order,
129      * they are reversed. Thus "a-e" is the same as "e-a".
130      * As a result, "a-ee-a" would create only one range,
131      * as the "a-e" and "e-a" are the same.</p>
132      *
133      * <p>The set of characters represented is the union of the specified ranges.</p>
134      *
135      * <p>All CharSet objects returned by this method will be immutable.</p>
136      *
137      * @param setStr  the String describing the set, may be null
138      * @return a CharSet instance
139      * @since 2.0
140      */
141     public static CharSet getInstance(String setStr) {
142         Object set = COMMON.get(setStr);
143         if (set != null) {
144             return (CharSet) set;
145         }
146         return new CharSet(setStr);
147     }
148 
149     /**
150      * <p>Constructs a new CharSet using the set syntax.
151      * Each string is merged in with the set.</p>
152      *
153      * @param setStrs  Strings to merge into the initial set, may be null
154      * @return a CharSet instance
155      * @since 2.4
156      */
157     public static CharSet getInstance(String[] setStrs) {
158         if (setStrs == null) {
159             return null;
160         }
161         return new CharSet(setStrs); 
162     }
163 
164     //-----------------------------------------------------------------------
165     /**
166      * <p>Constructs a new CharSet using the set syntax.</p>
167      *
168      * @param setStr  the String describing the set, may be null
169      * @since 2.0
170      */
171     protected CharSet(String setStr) {
172         super();
173         add(setStr);
174     }
175 
176     /**
177      * <p>Constructs a new CharSet using the set syntax.
178      * Each string is merged in with the set.</p>
179      *
180      * @param set  Strings to merge into the initial set
181      * @throws NullPointerException if set is <code>null</code>
182      */
183     protected CharSet(String[] set) {
184         super();
185         int sz = set.length;
186         for (int i = 0; i < sz; i++) {
187             add(set[i]);
188         }
189     }
190 
191     //-----------------------------------------------------------------------
192     /**
193      * <p>Add a set definition string to the <code>CharSet</code>.</p>
194      *
195      * @param str  set definition string
196      */
197     protected void add(String str) {
198         if (str == null) {
199             return;
200         }
201 
202         int len = str.length();
203         int pos = 0;
204         while (pos < len) {
205             int remainder = (len - pos);
206             if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
207                 // negated range
208                 set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true));
209                 pos += 4;
210             } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
211                 // range
212                 set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2)));
213                 pos += 3;
214             } else if (remainder >= 2 && str.charAt(pos) == '^') {
215                 // negated char
216                 set.add(new CharRange(str.charAt(pos + 1), true));
217                 pos += 2;
218             } else {
219                 // char
220                 set.add(new CharRange(str.charAt(pos)));
221                 pos += 1;
222             }
223         }
224     }
225 
226     //-----------------------------------------------------------------------
227     /**
228      * <p>Gets the internal set as an array of CharRange objects.</p>
229      *
230      * @return an array of immutable CharRange objects
231      * @since 2.0
232      */
233     public CharRange[] getCharRanges() {
234         return (CharRange[]) set.toArray(new CharRange[set.size()]);
235     }
236 
237     //-----------------------------------------------------------------------
238     /**
239      * <p>Does the <code>CharSet</code> contain the specified
240      * character <code>ch</code>.</p>
241      *
242      * @param ch  the character to check for
243      * @return <code>true</code> if the set contains the characters
244      */
245     public boolean contains(char ch) {
246         for (Iterator it = set.iterator(); it.hasNext();) {
247             CharRange range = (CharRange) it.next();
248             if (range.contains(ch)) {
249                 return true;
250             }
251         }
252         return false;
253     }
254 
255     // Basics
256     //-----------------------------------------------------------------------
257     /**
258      * <p>Compares two CharSet objects, returning true if they represent
259      * exactly the same set of characters defined in the same way.</p>
260      *
261      * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
262      * equal according to this method.</p>
263      *
264      * @param obj  the object to compare to
265      * @return true if equal
266      * @since 2.0
267      */
268     public boolean equals(Object obj) {
269         if (obj == this) {
270             return true;
271         }
272         if (obj instanceof CharSet == false) {
273             return false;
274         }
275         CharSet other = (CharSet) obj;
276         return set.equals(other.set);
277     }
278 
279     /**
280      * <p>Gets a hashCode compatible with the equals method.</p>
281      *
282      * @return a suitable hashCode
283      * @since 2.0
284      */
285     public int hashCode() {
286         return 89 + set.hashCode();
287     }
288 
289     /**
290      * <p>Gets a string representation of the set.</p>
291      *
292      * @return string representation of the set
293      */
294     public String toString() {
295         return set.toString();
296     }
297 
298 }