1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang;
18
19 import java.io.Serializable;
20 import java.util.HashMap;
21 import java.util.HashSet;
22 import java.util.Iterator;
23 import java.util.Map;
24 import java.util.Set;
25
26 /**
27 * <p>A set of characters.</p>
28 *
29 * <p>Instances are immutable, but instances of subclasses may not be.</p>
30 *
31 * @author Stephen Colebourne
32 * @author Phil Steitz
33 * @author Pete Gieser
34 * @author Gary Gregory
35 * @since 1.0
36 * @version $Id: CharSet.java 618884 2008-02-06 04:37:17Z bayard $
37 */
38 public class CharSet implements Serializable {
39
40 /**
41 * Required for serialization support. Lang version 2.0.
42 *
43 * @see java.io.Serializable
44 */
45 private static final long serialVersionUID = 5947847346149275958L;
46
47 /**
48 * A CharSet defining no characters.
49 * @since 2.0
50 */
51 public static final CharSet EMPTY = new CharSet((String) null);
52
53 /**
54 * A CharSet defining ASCII alphabetic characters "a-zA-Z".
55 * @since 2.0
56 */
57 public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
58
59 /**
60 * A CharSet defining ASCII alphabetic characters "a-z".
61 * @since 2.0
62 */
63 public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
64
65 /**
66 * A CharSet defining ASCII alphabetic characters "A-Z".
67 * @since 2.0
68 */
69 public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
70
71 /**
72 * A CharSet defining ASCII alphabetic characters "0-9".
73 * @since 2.0
74 */
75 public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
76
77 /**
78 * A Map of the common cases used in the factory.
79 * Subclasses can add more common patterns if desired.
80 * @since 2.0
81 */
82 protected static final Map COMMON = new HashMap();
83
84 static {
85 COMMON.put(null, EMPTY);
86 COMMON.put("", EMPTY);
87 COMMON.put("a-zA-Z", ASCII_ALPHA);
88 COMMON.put("A-Za-z", ASCII_ALPHA);
89 COMMON.put("a-z", ASCII_ALPHA_LOWER);
90 COMMON.put("A-Z", ASCII_ALPHA_UPPER);
91 COMMON.put("0-9", ASCII_NUMERIC);
92 }
93
94 /** The set of CharRange objects. */
95 private Set set = new HashSet();
96
97 //-----------------------------------------------------------------------
98 /**
99 * <p>Factory method to create a new CharSet using a special syntax.</p>
100 *
101 * <ul>
102 * <li><code>null</code> or empty string ("")
103 * - set containing no characters</li>
104 * <li>Single character, such as "a"
105 * - set containing just that character</li>
106 * <li>Multi character, such as "a-e"
107 * - set containing characters from one character to the other</li>
108 * <li>Negated, such as "^a" or "^a-e"
109 * - set containing all characters except those defined</li>
110 * <li>Combinations, such as "abe-g"
111 * - set containing all the characters from the individual sets</li>
112 * </ul>
113 *
114 * <p>The matching order is:</p>
115 * <ol>
116 * <li>Negated multi character range, such as "^a-e"
117 * <li>Ordinary multi character range, such as "a-e"
118 * <li>Negated single character, such as "^a"
119 * <li>Ordinary single character, such as "a"
120 * </ol>
121 * <p>Matching works left to right. Once a match is found the
122 * search starts again from the next character.</p>
123 *
124 * <p>If the same range is defined twice using the same syntax, only
125 * one range will be kept.
126 * Thus, "a-ca-c" creates only one range of "a-c".</p>
127 *
128 * <p>If the start and end of a range are in the wrong order,
129 * they are reversed. Thus "a-e" is the same as "e-a".
130 * As a result, "a-ee-a" would create only one range,
131 * as the "a-e" and "e-a" are the same.</p>
132 *
133 * <p>The set of characters represented is the union of the specified ranges.</p>
134 *
135 * <p>All CharSet objects returned by this method will be immutable.</p>
136 *
137 * @param setStr the String describing the set, may be null
138 * @return a CharSet instance
139 * @since 2.0
140 */
141 public static CharSet getInstance(String setStr) {
142 Object set = COMMON.get(setStr);
143 if (set != null) {
144 return (CharSet) set;
145 }
146 return new CharSet(setStr);
147 }
148
149 /**
150 * <p>Constructs a new CharSet using the set syntax.
151 * Each string is merged in with the set.</p>
152 *
153 * @param setStrs Strings to merge into the initial set, may be null
154 * @return a CharSet instance
155 * @since 2.4
156 */
157 public static CharSet getInstance(String[] setStrs) {
158 if (setStrs == null) {
159 return null;
160 }
161 return new CharSet(setStrs);
162 }
163
164 //-----------------------------------------------------------------------
165 /**
166 * <p>Constructs a new CharSet using the set syntax.</p>
167 *
168 * @param setStr the String describing the set, may be null
169 * @since 2.0
170 */
171 protected CharSet(String setStr) {
172 super();
173 add(setStr);
174 }
175
176 /**
177 * <p>Constructs a new CharSet using the set syntax.
178 * Each string is merged in with the set.</p>
179 *
180 * @param set Strings to merge into the initial set
181 * @throws NullPointerException if set is <code>null</code>
182 */
183 protected CharSet(String[] set) {
184 super();
185 int sz = set.length;
186 for (int i = 0; i < sz; i++) {
187 add(set[i]);
188 }
189 }
190
191 //-----------------------------------------------------------------------
192 /**
193 * <p>Add a set definition string to the <code>CharSet</code>.</p>
194 *
195 * @param str set definition string
196 */
197 protected void add(String str) {
198 if (str == null) {
199 return;
200 }
201
202 int len = str.length();
203 int pos = 0;
204 while (pos < len) {
205 int remainder = (len - pos);
206 if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
207 // negated range
208 set.add(new CharRange(str.charAt(pos + 1), str.charAt(pos + 3), true));
209 pos += 4;
210 } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
211 // range
212 set.add(new CharRange(str.charAt(pos), str.charAt(pos + 2)));
213 pos += 3;
214 } else if (remainder >= 2 && str.charAt(pos) == '^') {
215 // negated char
216 set.add(new CharRange(str.charAt(pos + 1), true));
217 pos += 2;
218 } else {
219 // char
220 set.add(new CharRange(str.charAt(pos)));
221 pos += 1;
222 }
223 }
224 }
225
226 //-----------------------------------------------------------------------
227 /**
228 * <p>Gets the internal set as an array of CharRange objects.</p>
229 *
230 * @return an array of immutable CharRange objects
231 * @since 2.0
232 */
233 public CharRange[] getCharRanges() {
234 return (CharRange[]) set.toArray(new CharRange[set.size()]);
235 }
236
237 //-----------------------------------------------------------------------
238 /**
239 * <p>Does the <code>CharSet</code> contain the specified
240 * character <code>ch</code>.</p>
241 *
242 * @param ch the character to check for
243 * @return <code>true</code> if the set contains the characters
244 */
245 public boolean contains(char ch) {
246 for (Iterator it = set.iterator(); it.hasNext();) {
247 CharRange range = (CharRange) it.next();
248 if (range.contains(ch)) {
249 return true;
250 }
251 }
252 return false;
253 }
254
255 // Basics
256 //-----------------------------------------------------------------------
257 /**
258 * <p>Compares two CharSet objects, returning true if they represent
259 * exactly the same set of characters defined in the same way.</p>
260 *
261 * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i>
262 * equal according to this method.</p>
263 *
264 * @param obj the object to compare to
265 * @return true if equal
266 * @since 2.0
267 */
268 public boolean equals(Object obj) {
269 if (obj == this) {
270 return true;
271 }
272 if (obj instanceof CharSet == false) {
273 return false;
274 }
275 CharSet other = (CharSet) obj;
276 return set.equals(other.set);
277 }
278
279 /**
280 * <p>Gets a hashCode compatible with the equals method.</p>
281 *
282 * @return a suitable hashCode
283 * @since 2.0
284 */
285 public int hashCode() {
286 return 89 + set.hashCode();
287 }
288
289 /**
290 * <p>Gets a string representation of the set.</p>
291 *
292 * @return string representation of the set
293 */
294 public String toString() {
295 return set.toString();
296 }
297
298 }