001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3;
018    
019    import java.io.Serializable;
020    import java.util.Collections;
021    import java.util.HashMap;
022    import java.util.HashSet;
023    import java.util.Map;
024    import java.util.Set;
025    
026    /**
027     * <p>A set of characters.</p>
028     *
029     * <p>Instances are immutable, but instances of subclasses may not be.</p>
030     *
031     * <p>#ThreadSafe#</p>
032     * @since 1.0
033     * @version $Id: CharSet.java 1090427 2011-04-08 20:17:10Z bayard $
034     */
035    public class CharSet implements Serializable {
036    
037        /**
038         * Required for serialization support. Lang version 2.0. 
039         * 
040         * @see java.io.Serializable
041         */
042        private static final long serialVersionUID = 5947847346149275958L;
043    
044        /** 
045         * A CharSet defining no characters. 
046         * @since 2.0
047         */
048        public static final CharSet EMPTY = new CharSet((String) null);
049    
050        /** 
051         * A CharSet defining ASCII alphabetic characters "a-zA-Z".
052         * @since 2.0
053         */
054        public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
055    
056        /** 
057         * A CharSet defining ASCII alphabetic characters "a-z".
058         * @since 2.0
059         */
060        public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
061    
062        /** 
063         * A CharSet defining ASCII alphabetic characters "A-Z".
064         * @since 2.0
065         */
066        public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
067    
068        /** 
069         * A CharSet defining ASCII alphabetic characters "0-9".
070         * @since 2.0
071         */
072        public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
073    
074        /**
075         * A Map of the common cases used in the factory.
076         * Subclasses can add more common patterns if desired
077         * @since 2.0
078         */
079        protected static final Map<String, CharSet> COMMON = Collections.synchronizedMap(new HashMap<String, CharSet>());
080        
081        static {
082            COMMON.put(null, EMPTY);
083            COMMON.put("", EMPTY);
084            COMMON.put("a-zA-Z", ASCII_ALPHA);
085            COMMON.put("A-Za-z", ASCII_ALPHA);
086            COMMON.put("a-z", ASCII_ALPHA_LOWER);
087            COMMON.put("A-Z", ASCII_ALPHA_UPPER);
088            COMMON.put("0-9", ASCII_NUMERIC);
089        }
090    
091        /** The set of CharRange objects. */
092        private final Set<CharRange> set = Collections.synchronizedSet(new HashSet<CharRange>());
093    
094        //-----------------------------------------------------------------------
095        /**
096         * <p>Factory method to create a new CharSet using a special syntax.</p>
097         *
098         * <ul>
099         *  <li>{@code null} or empty string ("")
100         * - set containing no characters</li>
101         *  <li>Single character, such as "a"
102         *  - set containing just that character</li>
103         *  <li>Multi character, such as "a-e"
104         *  - set containing characters from one character to the other</li>
105         *  <li>Negated, such as "^a" or "^a-e"
106         *  - set containing all characters except those defined</li>
107         *  <li>Combinations, such as "abe-g"
108         *  - set containing all the characters from the individual sets</li>
109         * </ul>
110         *
111         * <p>The matching order is:</p>
112         * <ol>
113         *  <li>Negated multi character range, such as "^a-e"
114         *  <li>Ordinary multi character range, such as "a-e"
115         *  <li>Negated single character, such as "^a"
116         *  <li>Ordinary single character, such as "a"
117         * </ol>
118         * <p>Matching works left to right. Once a match is found the
119         * search starts again from the next character.</p>
120         *
121         * <p>If the same range is defined twice using the same syntax, only
122         * one range will be kept.
123         * Thus, "a-ca-c" creates only one range of "a-c".</p>
124         *
125         * <p>If the start and end of a range are in the wrong order,
126         * they are reversed. Thus "a-e" is the same as "e-a".
127         * As a result, "a-ee-a" would create only one range,
128         * as the "a-e" and "e-a" are the same.</p>
129         *
130         * <p>The set of characters represented is the union of the specified ranges.</p>
131         *
132         * <p>All CharSet objects returned by this method will be immutable.</p>
133         *
134         * @param setStrs  Strings to merge into the set, may be null
135         * @return a CharSet instance
136         * @since 2.4
137         */
138        public static CharSet getInstance(String... setStrs) {
139            if (setStrs == null) {
140                return null;
141            }
142            if (setStrs.length == 1) {
143                CharSet common = COMMON.get(setStrs[0]);
144                if (common != null) {
145                    return common;
146                }
147            }
148            return new CharSet(setStrs); 
149        }
150    
151        //-----------------------------------------------------------------------
152        /**
153         * <p>Constructs a new CharSet using the set syntax.
154         * Each string is merged in with the set.</p>
155         *
156         * @param set  Strings to merge into the initial set
157         * @throws NullPointerException if set is {@code null}
158         */
159        protected CharSet(String... set) {
160            super();
161            int sz = set.length;
162            for (int i = 0; i < sz; i++) {
163                add(set[i]);
164            }
165        }
166    
167        //-----------------------------------------------------------------------
168        /**
169         * <p>Add a set definition string to the {@code CharSet}.</p>
170         *
171         * @param str  set definition string
172         */
173        protected void add(String str) {
174            if (str == null) {
175                return;
176            }
177    
178            int len = str.length();
179            int pos = 0;
180            while (pos < len) {
181                int remainder = (len - pos);
182                if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
183                    // negated range
184                    set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
185                    pos += 4;
186                } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
187                    // range
188                    set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
189                    pos += 3;
190                } else if (remainder >= 2 && str.charAt(pos) == '^') {
191                    // negated char
192                    set.add(CharRange.isNot(str.charAt(pos + 1)));
193                    pos += 2;
194                } else {
195                    // char
196                    set.add(CharRange.is(str.charAt(pos)));
197                    pos += 1;
198                }
199            }
200        }
201    
202        //-----------------------------------------------------------------------
203        /**
204         * <p>Gets the internal set as an array of CharRange objects.</p>
205         *
206         * @return an array of immutable CharRange objects
207         * @since 2.0
208         */
209    // NOTE: This is no longer public as CharRange is no longer a public class. 
210    //       It may be replaced when CharSet moves to Range.
211        /*public*/ CharRange[] getCharRanges() {
212            return set.toArray(new CharRange[set.size()]);
213        }
214    
215        //-----------------------------------------------------------------------
216        /**
217         * <p>Does the {@code CharSet} contain the specified
218         * character {@code ch}.</p>
219         *
220         * @param ch  the character to check for
221         * @return {@code true} if the set contains the characters
222         */
223        public boolean contains(char ch) {
224            for (CharRange range : set) {
225                if (range.contains(ch)) {
226                    return true;
227                }
228            }
229            return false;
230        }
231    
232        // Basics
233        //-----------------------------------------------------------------------
234        /**
235         * <p>Compares two {@code CharSet} objects, returning true if they represent
236         * exactly the same set of characters defined in the same way.</p>
237         *
238         * <p>The two sets {@code abc} and {@code a-c} are <i>not</i>
239         * equal according to this method.</p>
240         *
241         * @param obj  the object to compare to
242         * @return true if equal
243         * @since 2.0
244         */
245        @Override
246        public boolean equals(Object obj) {
247            if (obj == this) {
248                return true;
249            }
250            if (obj instanceof CharSet == false) {
251                return false;
252            }
253            CharSet other = (CharSet) obj;
254            return set.equals(other.set);
255        }
256    
257        /**
258         * <p>Gets a hash code compatible with the equals method.</p>
259         *
260         * @return a suitable hash code
261         * @since 2.0
262         */
263        @Override
264        public int hashCode() {
265            return 89 + set.hashCode();
266        }
267    
268        /**
269         * <p>Gets a string representation of the set.</p>
270         *
271         * @return string representation of the set
272         */
273        @Override
274        public String toString() {
275            return set.toString();
276        }
277    
278    }