001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 import java.io.Serializable; 020 import java.util.Collections; 021 import java.util.HashMap; 022 import java.util.HashSet; 023 import java.util.Iterator; 024 import java.util.Map; 025 import java.util.Set; 026 027 /** 028 * <p>A set of characters.</p> 029 * 030 * <p>Instances are immutable, but instances of subclasses may not be.</p> 031 * 032 * <p>#ThreadSafe#</p> 033 * @author Apache Software Foundation 034 * @author Phil Steitz 035 * @author Pete Gieser 036 * @author Gary Gregory 037 * @since 1.0 038 * @version $Id: CharSet.java 1056988 2011-01-09 17:58:53Z niallp $ 039 */ 040 public class CharSet implements Serializable { 041 042 /** 043 * Required for serialization support. Lang version 2.0. 044 * 045 * @see java.io.Serializable 046 */ 047 private static final long serialVersionUID = 5947847346149275958L; 048 049 /** 050 * A CharSet defining no characters. 051 * @since 2.0 052 */ 053 public static final CharSet EMPTY = new CharSet((String) null); 054 055 /** 056 * A CharSet defining ASCII alphabetic characters "a-zA-Z". 057 * @since 2.0 058 */ 059 public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z"); 060 061 /** 062 * A CharSet defining ASCII alphabetic characters "a-z". 063 * @since 2.0 064 */ 065 public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z"); 066 067 /** 068 * A CharSet defining ASCII alphabetic characters "A-Z". 069 * @since 2.0 070 */ 071 public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z"); 072 073 /** 074 * A CharSet defining ASCII alphabetic characters "0-9". 075 * @since 2.0 076 */ 077 public static final CharSet ASCII_NUMERIC = new CharSet("0-9"); 078 079 /** 080 * A Map of the common cases used in the factory. 081 * Subclasses can add more common patterns if desired 082 * @since 2.0 083 */ 084 protected static final Map COMMON = Collections.synchronizedMap(new HashMap()); 085 086 static { 087 COMMON.put(null, EMPTY); 088 COMMON.put("", EMPTY); 089 COMMON.put("a-zA-Z", ASCII_ALPHA); 090 COMMON.put("A-Za-z", ASCII_ALPHA); 091 COMMON.put("a-z", ASCII_ALPHA_LOWER); 092 COMMON.put("A-Z", ASCII_ALPHA_UPPER); 093 COMMON.put("0-9", ASCII_NUMERIC); 094 } 095 096 /** The set of CharRange objects. */ 097 private final Set set = Collections.synchronizedSet(new HashSet()); 098 099 //----------------------------------------------------------------------- 100 /** 101 * <p>Factory method to create a new CharSet using a special syntax.</p> 102 * 103 * <ul> 104 * <li><code>null</code> or empty string ("") 105 * - set containing no characters</li> 106 * <li>Single character, such as "a" 107 * - set containing just that character</li> 108 * <li>Multi character, such as "a-e" 109 * - set containing characters from one character to the other</li> 110 * <li>Negated, such as "^a" or "^a-e" 111 * - set containing all characters except those defined</li> 112 * <li>Combinations, such as "abe-g" 113 * - set containing all the characters from the individual sets</li> 114 * </ul> 115 * 116 * <p>The matching order is:</p> 117 * <ol> 118 * <li>Negated multi character range, such as "^a-e" 119 * <li>Ordinary multi character range, such as "a-e" 120 * <li>Negated single character, such as "^a" 121 * <li>Ordinary single character, such as "a" 122 * </ol> 123 * <p>Matching works left to right. Once a match is found the 124 * search starts again from the next character.</p> 125 * 126 * <p>If the same range is defined twice using the same syntax, only 127 * one range will be kept. 128 * Thus, "a-ca-c" creates only one range of "a-c".</p> 129 * 130 * <p>If the start and end of a range are in the wrong order, 131 * they are reversed. Thus "a-e" is the same as "e-a". 132 * As a result, "a-ee-a" would create only one range, 133 * as the "a-e" and "e-a" are the same.</p> 134 * 135 * <p>The set of characters represented is the union of the specified ranges.</p> 136 * 137 * <p>All CharSet objects returned by this method will be immutable.</p> 138 * 139 * @param setStr the String describing the set, may be null 140 * @return a CharSet instance 141 * @since 2.0 142 */ 143 public static CharSet getInstance(String setStr) { 144 Object set = COMMON.get(setStr); 145 if (set != null) { 146 return (CharSet) set; 147 } 148 return new CharSet(setStr); 149 } 150 151 /** 152 * <p>Constructs a new CharSet using the set syntax. 153 * Each string is merged in with the set.</p> 154 * 155 * @param setStrs Strings to merge into the initial set, may be null 156 * @return a CharSet instance 157 * @since 2.4 158 */ 159 public static CharSet getInstance(String[] setStrs) { 160 if (setStrs == null) { 161 return null; 162 } 163 return new CharSet(setStrs); 164 } 165 166 //----------------------------------------------------------------------- 167 /** 168 * <p>Constructs a new CharSet using the set syntax.</p> 169 * 170 * @param setStr the String describing the set, may be null 171 * @since 2.0 172 */ 173 protected CharSet(String setStr) { 174 super(); 175 add(setStr); 176 } 177 178 /** 179 * <p>Constructs a new CharSet using the set syntax. 180 * Each string is merged in with the set.</p> 181 * 182 * @param set Strings to merge into the initial set 183 * @throws NullPointerException if set is <code>null</code> 184 */ 185 protected CharSet(String[] set) { 186 super(); 187 int sz = set.length; 188 for (int i = 0; i < sz; i++) { 189 add(set[i]); 190 } 191 } 192 193 //----------------------------------------------------------------------- 194 /** 195 * <p>Add a set definition string to the <code>CharSet</code>.</p> 196 * 197 * @param str set definition string 198 */ 199 protected void add(String str) { 200 if (str == null) { 201 return; 202 } 203 204 int len = str.length(); 205 int pos = 0; 206 while (pos < len) { 207 int remainder = (len - pos); 208 if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') { 209 // negated range 210 set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3))); 211 pos += 4; 212 } else if (remainder >= 3 && str.charAt(pos + 1) == '-') { 213 // range 214 set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2))); 215 pos += 3; 216 } else if (remainder >= 2 && str.charAt(pos) == '^') { 217 // negated char 218 set.add(CharRange.isNot(str.charAt(pos + 1))); 219 pos += 2; 220 } else { 221 // char 222 set.add(CharRange.is(str.charAt(pos))); 223 pos += 1; 224 } 225 } 226 } 227 228 //----------------------------------------------------------------------- 229 /** 230 * <p>Gets the internal set as an array of CharRange objects.</p> 231 * 232 * @return an array of immutable CharRange objects 233 * @since 2.0 234 */ 235 public CharRange[] getCharRanges() { 236 return (CharRange[]) set.toArray(new CharRange[set.size()]); 237 } 238 239 //----------------------------------------------------------------------- 240 /** 241 * <p>Does the <code>CharSet</code> contain the specified 242 * character <code>ch</code>.</p> 243 * 244 * @param ch the character to check for 245 * @return <code>true</code> if the set contains the characters 246 */ 247 public boolean contains(char ch) { 248 for (Iterator it = set.iterator(); it.hasNext();) { 249 CharRange range = (CharRange) it.next(); 250 if (range.contains(ch)) { 251 return true; 252 } 253 } 254 return false; 255 } 256 257 // Basics 258 //----------------------------------------------------------------------- 259 /** 260 * <p>Compares two CharSet objects, returning true if they represent 261 * exactly the same set of characters defined in the same way.</p> 262 * 263 * <p>The two sets <code>abc</code> and <code>a-c</code> are <i>not</i> 264 * equal according to this method.</p> 265 * 266 * @param obj the object to compare to 267 * @return true if equal 268 * @since 2.0 269 */ 270 public boolean equals(Object obj) { 271 if (obj == this) { 272 return true; 273 } 274 if (obj instanceof CharSet == false) { 275 return false; 276 } 277 CharSet other = (CharSet) obj; 278 return set.equals(other.set); 279 } 280 281 /** 282 * <p>Gets a hashCode compatible with the equals method.</p> 283 * 284 * @return a suitable hashCode 285 * @since 2.0 286 */ 287 public int hashCode() { 288 return 89 + set.hashCode(); 289 } 290 291 /** 292 * <p>Gets a string representation of the set.</p> 293 * 294 * @return string representation of the set 295 */ 296 public String toString() { 297 return set.toString(); 298 } 299 300 }