| Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
| CharSet |
|
| 3.875;3.875 |
| 1 | /* | |
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more | |
| 3 | * contributor license agreements. See the NOTICE file distributed with | |
| 4 | * this work for additional information regarding copyright ownership. | |
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 | |
| 6 | * (the "License"); you may not use this file except in compliance with | |
| 7 | * the License. You may obtain a copy of the License at | |
| 8 | * | |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 | |
| 10 | * | |
| 11 | * Unless required by applicable law or agreed to in writing, software | |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, | |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| 14 | * See the License for the specific language governing permissions and | |
| 15 | * limitations under the License. | |
| 16 | */ | |
| 17 | package org.apache.commons.lang3; | |
| 18 | ||
| 19 | import java.io.Serializable; | |
| 20 | import java.util.Collections; | |
| 21 | import java.util.HashMap; | |
| 22 | import java.util.HashSet; | |
| 23 | import java.util.Map; | |
| 24 | import java.util.Set; | |
| 25 | ||
| 26 | /** | |
| 27 | * <p>A set of characters.</p> | |
| 28 | * | |
| 29 | * <p>Instances are immutable, but instances of subclasses may not be.</p> | |
| 30 | * | |
| 31 | * <p>#ThreadSafe#</p> | |
| 32 | * @since 1.0 | |
| 33 | * @version $Id: CharSet.java 1436770 2013-01-22 07:09:45Z ggregory $ | |
| 34 | */ | |
| 35 | public class CharSet implements Serializable { | |
| 36 | ||
| 37 | /** | |
| 38 | * Required for serialization support. Lang version 2.0. | |
| 39 | * | |
| 40 | * @see java.io.Serializable | |
| 41 | */ | |
| 42 | private static final long serialVersionUID = 5947847346149275958L; | |
| 43 | ||
| 44 | /** | |
| 45 | * A CharSet defining no characters. | |
| 46 | * @since 2.0 | |
| 47 | */ | |
| 48 | 1 | public static final CharSet EMPTY = new CharSet((String) null); |
| 49 | ||
| 50 | /** | |
| 51 | * A CharSet defining ASCII alphabetic characters "a-zA-Z". | |
| 52 | * @since 2.0 | |
| 53 | */ | |
| 54 | 1 | public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z"); |
| 55 | ||
| 56 | /** | |
| 57 | * A CharSet defining ASCII alphabetic characters "a-z". | |
| 58 | * @since 2.0 | |
| 59 | */ | |
| 60 | 1 | public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z"); |
| 61 | ||
| 62 | /** | |
| 63 | * A CharSet defining ASCII alphabetic characters "A-Z". | |
| 64 | * @since 2.0 | |
| 65 | */ | |
| 66 | 1 | public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z"); |
| 67 | ||
| 68 | /** | |
| 69 | * A CharSet defining ASCII alphabetic characters "0-9". | |
| 70 | * @since 2.0 | |
| 71 | */ | |
| 72 | 1 | public static final CharSet ASCII_NUMERIC = new CharSet("0-9"); |
| 73 | ||
| 74 | /** | |
| 75 | * A Map of the common cases used in the factory. | |
| 76 | * Subclasses can add more common patterns if desired | |
| 77 | * @since 2.0 | |
| 78 | */ | |
| 79 | 1 | protected static final Map<String, CharSet> COMMON = Collections.synchronizedMap(new HashMap<String, CharSet>()); |
| 80 | ||
| 81 | static { | |
| 82 | 1 | COMMON.put(null, EMPTY); |
| 83 | 1 | COMMON.put("", EMPTY); |
| 84 | 1 | COMMON.put("a-zA-Z", ASCII_ALPHA); |
| 85 | 1 | COMMON.put("A-Za-z", ASCII_ALPHA); |
| 86 | 1 | COMMON.put("a-z", ASCII_ALPHA_LOWER); |
| 87 | 1 | COMMON.put("A-Z", ASCII_ALPHA_UPPER); |
| 88 | 1 | COMMON.put("0-9", ASCII_NUMERIC); |
| 89 | 1 | } |
| 90 | ||
| 91 | /** The set of CharRange objects. */ | |
| 92 | 96 | private final Set<CharRange> set = Collections.synchronizedSet(new HashSet<CharRange>()); |
| 93 | ||
| 94 | //----------------------------------------------------------------------- | |
| 95 | /** | |
| 96 | * <p>Factory method to create a new CharSet using a special syntax.</p> | |
| 97 | * | |
| 98 | * <ul> | |
| 99 | * <li>{@code null} or empty string ("") | |
| 100 | * - set containing no characters</li> | |
| 101 | * <li>Single character, such as "a" | |
| 102 | * - set containing just that character</li> | |
| 103 | * <li>Multi character, such as "a-e" | |
| 104 | * - set containing characters from one character to the other</li> | |
| 105 | * <li>Negated, such as "^a" or "^a-e" | |
| 106 | * - set containing all characters except those defined</li> | |
| 107 | * <li>Combinations, such as "abe-g" | |
| 108 | * - set containing all the characters from the individual sets</li> | |
| 109 | * </ul> | |
| 110 | * | |
| 111 | * <p>The matching order is:</p> | |
| 112 | * <ol> | |
| 113 | * <li>Negated multi character range, such as "^a-e" | |
| 114 | * <li>Ordinary multi character range, such as "a-e" | |
| 115 | * <li>Negated single character, such as "^a" | |
| 116 | * <li>Ordinary single character, such as "a" | |
| 117 | * </ol> | |
| 118 | * <p>Matching works left to right. Once a match is found the | |
| 119 | * search starts again from the next character.</p> | |
| 120 | * | |
| 121 | * <p>If the same range is defined twice using the same syntax, only | |
| 122 | * one range will be kept. | |
| 123 | * Thus, "a-ca-c" creates only one range of "a-c".</p> | |
| 124 | * | |
| 125 | * <p>If the start and end of a range are in the wrong order, | |
| 126 | * they are reversed. Thus "a-e" is the same as "e-a". | |
| 127 | * As a result, "a-ee-a" would create only one range, | |
| 128 | * as the "a-e" and "e-a" are the same.</p> | |
| 129 | * | |
| 130 | * <p>The set of characters represented is the union of the specified ranges.</p> | |
| 131 | * | |
| 132 | * <p>All CharSet objects returned by this method will be immutable.</p> | |
| 133 | * | |
| 134 | * @param setStrs Strings to merge into the set, may be null | |
| 135 | * @return a CharSet instance | |
| 136 | * @since 2.4 | |
| 137 | */ | |
| 138 | public static CharSet getInstance(final String... setStrs) { | |
| 139 | 106 | if (setStrs == null) { |
| 140 | 1 | return null; |
| 141 | } | |
| 142 | 105 | if (setStrs.length == 1) { |
| 143 | 104 | final CharSet common = COMMON.get(setStrs[0]); |
| 144 | 104 | if (common != null) { |
| 145 | 14 | return common; |
| 146 | } | |
| 147 | } | |
| 148 | 91 | return new CharSet(setStrs); |
| 149 | } | |
| 150 | ||
| 151 | //----------------------------------------------------------------------- | |
| 152 | /** | |
| 153 | * <p>Constructs a new CharSet using the set syntax. | |
| 154 | * Each string is merged in with the set.</p> | |
| 155 | * | |
| 156 | * @param set Strings to merge into the initial set | |
| 157 | * @throws NullPointerException if set is {@code null} | |
| 158 | */ | |
| 159 | protected CharSet(final String... set) { | |
| 160 | 96 | super(); |
| 161 | 96 | final int sz = set.length; |
| 162 | 191 | for (int i = 0; i < sz; i++) { |
| 163 | 95 | add(set[i]); |
| 164 | } | |
| 165 | 96 | } |
| 166 | ||
| 167 | //----------------------------------------------------------------------- | |
| 168 | /** | |
| 169 | * <p>Add a set definition string to the {@code CharSet}.</p> | |
| 170 | * | |
| 171 | * @param str set definition string | |
| 172 | */ | |
| 173 | protected void add(final String str) { | |
| 174 | 95 | if (str == null) { |
| 175 | 1 | return; |
| 176 | } | |
| 177 | ||
| 178 | 94 | final int len = str.length(); |
| 179 | 94 | int pos = 0; |
| 180 | 253 | while (pos < len) { |
| 181 | 159 | final int remainder = len - pos; |
| 182 | 159 | if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') { |
| 183 | // negated range | |
| 184 | 13 | set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3))); |
| 185 | 13 | pos += 4; |
| 186 | 146 | } else if (remainder >= 3 && str.charAt(pos + 1) == '-') { |
| 187 | // range | |
| 188 | 40 | set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2))); |
| 189 | 40 | pos += 3; |
| 190 | 106 | } else if (remainder >= 2 && str.charAt(pos) == '^') { |
| 191 | // negated char | |
| 192 | 14 | set.add(CharRange.isNot(str.charAt(pos + 1))); |
| 193 | 14 | pos += 2; |
| 194 | } else { | |
| 195 | // char | |
| 196 | 92 | set.add(CharRange.is(str.charAt(pos))); |
| 197 | 92 | pos += 1; |
| 198 | } | |
| 199 | 159 | } |
| 200 | 94 | } |
| 201 | ||
| 202 | //----------------------------------------------------------------------- | |
| 203 | /** | |
| 204 | * <p>Gets the internal set as an array of CharRange objects.</p> | |
| 205 | * | |
| 206 | * @return an array of immutable CharRange objects | |
| 207 | * @since 2.0 | |
| 208 | */ | |
| 209 | // NOTE: This is no longer public as CharRange is no longer a public class. | |
| 210 | // It may be replaced when CharSet moves to Range. | |
| 211 | /*public*/ CharRange[] getCharRanges() { | |
| 212 | 45 | return set.toArray(new CharRange[set.size()]); |
| 213 | } | |
| 214 | ||
| 215 | //----------------------------------------------------------------------- | |
| 216 | /** | |
| 217 | * <p>Does the {@code CharSet} contain the specified | |
| 218 | * character {@code ch}.</p> | |
| 219 | * | |
| 220 | * @param ch the character to check for | |
| 221 | * @return {@code true} if the set contains the characters | |
| 222 | */ | |
| 223 | public boolean contains(final char ch) { | |
| 224 | 195 | for (final CharRange range : set) { |
| 225 | 280 | if (range.contains(ch)) { |
| 226 | 112 | return true; |
| 227 | } | |
| 228 | 168 | } |
| 229 | 83 | return false; |
| 230 | } | |
| 231 | ||
| 232 | // Basics | |
| 233 | //----------------------------------------------------------------------- | |
| 234 | /** | |
| 235 | * <p>Compares two {@code CharSet} objects, returning true if they represent | |
| 236 | * exactly the same set of characters defined in the same way.</p> | |
| 237 | * | |
| 238 | * <p>The two sets {@code abc} and {@code a-c} are <i>not</i> | |
| 239 | * equal according to this method.</p> | |
| 240 | * | |
| 241 | * @param obj the object to compare to | |
| 242 | * @return true if equal | |
| 243 | * @since 2.0 | |
| 244 | */ | |
| 245 | @Override | |
| 246 | public boolean equals(final Object obj) { | |
| 247 | 16 | if (obj == this) { |
| 248 | 3 | return true; |
| 249 | } | |
| 250 | 13 | if (obj instanceof CharSet == false) { |
| 251 | 1 | return false; |
| 252 | } | |
| 253 | 12 | final CharSet other = (CharSet) obj; |
| 254 | 12 | return set.equals(other.set); |
| 255 | } | |
| 256 | ||
| 257 | /** | |
| 258 | * <p>Gets a hash code compatible with the equals method.</p> | |
| 259 | * | |
| 260 | * @return a suitable hash code | |
| 261 | * @since 2.0 | |
| 262 | */ | |
| 263 | @Override | |
| 264 | public int hashCode() { | |
| 265 | 12 | return 89 + set.hashCode(); |
| 266 | } | |
| 267 | ||
| 268 | /** | |
| 269 | * <p>Gets a string representation of the set.</p> | |
| 270 | * | |
| 271 | * @return string representation of the set | |
| 272 | */ | |
| 273 | @Override | |
| 274 | public String toString() { | |
| 275 | 10 | return set.toString(); |
| 276 | } | |
| 277 | ||
| 278 | } |