Coverage Report - org.apache.commons.lang3.CharSet
 
Classes in this File Line Coverage Branch Coverage Complexity
CharSet
100%
60/60
100%
34/34
3,875
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  * 
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  * 
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 package org.apache.commons.lang3;
 18  
 
 19  
 import java.io.Serializable;
 20  
 import java.util.Collections;
 21  
 import java.util.HashMap;
 22  
 import java.util.HashSet;
 23  
 import java.util.Map;
 24  
 import java.util.Set;
 25  
 
 26  
 /**
 27  
  * <p>A set of characters.</p>
 28  
  *
 29  
  * <p>Instances are immutable, but instances of subclasses may not be.</p>
 30  
  *
 31  
  * <p>#ThreadSafe#</p>
 32  
  * @since 1.0
 33  
  */
 34  
 public class CharSet implements Serializable {
 35  
 
 36  
     /**
 37  
      * Required for serialization support. Lang version 2.0. 
 38  
      * 
 39  
      * @see java.io.Serializable
 40  
      */
 41  
     private static final long serialVersionUID = 5947847346149275958L;
 42  
 
 43  
     /** 
 44  
      * A CharSet defining no characters. 
 45  
      * @since 2.0
 46  
      */
 47  1
     public static final CharSet EMPTY = new CharSet((String) null);
 48  
 
 49  
     /** 
 50  
      * A CharSet defining ASCII alphabetic characters "a-zA-Z".
 51  
      * @since 2.0
 52  
      */
 53  1
     public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
 54  
 
 55  
     /** 
 56  
      * A CharSet defining ASCII alphabetic characters "a-z".
 57  
      * @since 2.0
 58  
      */
 59  1
     public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
 60  
 
 61  
     /** 
 62  
      * A CharSet defining ASCII alphabetic characters "A-Z".
 63  
      * @since 2.0
 64  
      */
 65  1
     public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
 66  
 
 67  
     /** 
 68  
      * A CharSet defining ASCII alphabetic characters "0-9".
 69  
      * @since 2.0
 70  
      */
 71  1
     public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
 72  
 
 73  
     /**
 74  
      * A Map of the common cases used in the factory.
 75  
      * Subclasses can add more common patterns if desired
 76  
      * @since 2.0
 77  
      */
 78  1
     protected static final Map<String, CharSet> COMMON = Collections.synchronizedMap(new HashMap<String, CharSet>());
 79  
     
 80  
     static {
 81  1
         COMMON.put(null, EMPTY);
 82  1
         COMMON.put("", EMPTY);
 83  1
         COMMON.put("a-zA-Z", ASCII_ALPHA);
 84  1
         COMMON.put("A-Za-z", ASCII_ALPHA);
 85  1
         COMMON.put("a-z", ASCII_ALPHA_LOWER);
 86  1
         COMMON.put("A-Z", ASCII_ALPHA_UPPER);
 87  1
         COMMON.put("0-9", ASCII_NUMERIC);
 88  1
     }
 89  
 
 90  
     /** The set of CharRange objects. */
 91  109
     private final Set<CharRange> set = Collections.synchronizedSet(new HashSet<CharRange>());
 92  
 
 93  
     //-----------------------------------------------------------------------
 94  
     /**
 95  
      * <p>Factory method to create a new CharSet using a special syntax.</p>
 96  
      *
 97  
      * <ul>
 98  
      *  <li>{@code null} or empty string ("")
 99  
      * - set containing no characters</li>
 100  
      *  <li>Single character, such as "a"
 101  
      *  - set containing just that character</li>
 102  
      *  <li>Multi character, such as "a-e"
 103  
      *  - set containing characters from one character to the other</li>
 104  
      *  <li>Negated, such as "^a" or "^a-e"
 105  
      *  - set containing all characters except those defined</li>
 106  
      *  <li>Combinations, such as "abe-g"
 107  
      *  - set containing all the characters from the individual sets</li>
 108  
      * </ul>
 109  
      *
 110  
      * <p>The matching order is:</p>
 111  
      * <ol>
 112  
      *  <li>Negated multi character range, such as "^a-e"
 113  
      *  <li>Ordinary multi character range, such as "a-e"
 114  
      *  <li>Negated single character, such as "^a"
 115  
      *  <li>Ordinary single character, such as "a"
 116  
      * </ol>
 117  
      * 
 118  
      * <p>Matching works left to right. Once a match is found the
 119  
      * search starts again from the next character.</p>
 120  
      *
 121  
      * <p>If the same range is defined twice using the same syntax, only
 122  
      * one range will be kept.
 123  
      * Thus, "a-ca-c" creates only one range of "a-c".</p>
 124  
      *
 125  
      * <p>If the start and end of a range are in the wrong order,
 126  
      * they are reversed. Thus "a-e" is the same as "e-a".
 127  
      * As a result, "a-ee-a" would create only one range,
 128  
      * as the "a-e" and "e-a" are the same.</p>
 129  
      *
 130  
      * <p>The set of characters represented is the union of the specified ranges.</p>
 131  
      * 
 132  
      * <p>There are two ways to add a literal negation character ({@code ^}):</p>
 133  
      * <ul>
 134  
      *     <li>As the last character in a string, e.g. {@code CharSet.getInstance("a-z^")}</li>
 135  
      *     <li>As a separate element, e.g. {@code CharSet.getInstance("^","a-z")}</li>
 136  
      * </ul>
 137  
      *
 138  
      * <p>Examples using the negation character:</p>
 139  
      * <pre>
 140  
      *     CharSet.getInstance("^a-c").contains('a') = false
 141  
      *     CharSet.getInstance("^a-c").contains('d') = true
 142  
      *     CharSet.getInstance("^^a-c").contains('a') = true // (only '^' is negated)
 143  
      *     CharSet.getInstance("^^a-c").contains('^') = false
 144  
      *     CharSet.getInstance("^a-cd-f").contains('d') = true 
 145  
      *     CharSet.getInstance("a-c^").contains('^') = true
 146  
      *     CharSet.getInstance("^", "a-c").contains('^') = true
 147  
      * </pre>
 148  
      * 
 149  
      * <p>All CharSet objects returned by this method will be immutable.</p>
 150  
      *
 151  
      * @param setStrs  Strings to merge into the set, may be null
 152  
      * @return a CharSet instance
 153  
      * @since 2.4
 154  
      */
 155  
     public static CharSet getInstance(final String... setStrs) {
 156  120
         if (setStrs == null) {
 157  1
             return null;
 158  
         }
 159  119
         if (setStrs.length == 1) {
 160  117
             final CharSet common = COMMON.get(setStrs[0]);
 161  117
             if (common != null) {
 162  15
                 return common;
 163  
             }
 164  
         }
 165  104
         return new CharSet(setStrs); 
 166  
     }
 167  
 
 168  
     //-----------------------------------------------------------------------
 169  
     /**
 170  
      * <p>Constructs a new CharSet using the set syntax.
 171  
      * Each string is merged in with the set.</p>
 172  
      *
 173  
      * @param set  Strings to merge into the initial set
 174  
      * @throws NullPointerException if set is {@code null}
 175  
      */
 176  
     protected CharSet(final String... set) {
 177  109
         super();
 178  109
         final int sz = set.length;
 179  218
         for (int i = 0; i < sz; i++) {
 180  109
             add(set[i]);
 181  
         }
 182  109
     }
 183  
 
 184  
     //-----------------------------------------------------------------------
 185  
     /**
 186  
      * <p>Add a set definition string to the {@code CharSet}.</p>
 187  
      *
 188  
      * @param str  set definition string
 189  
      */
 190  
     protected void add(final String str) {
 191  109
         if (str == null) {
 192  1
             return;
 193  
         }
 194  
 
 195  108
         final int len = str.length();
 196  108
         int pos = 0;
 197  286
         while (pos < len) {
 198  178
             final int remainder = len - pos;
 199  178
             if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
 200  
                 // negated range
 201  16
                 set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
 202  16
                 pos += 4;
 203  162
             } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
 204  
                 // range
 205  49
                 set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
 206  49
                 pos += 3;
 207  113
             } else if (remainder >= 2 && str.charAt(pos) == '^') {
 208  
                 // negated char
 209  16
                 set.add(CharRange.isNot(str.charAt(pos + 1)));
 210  16
                 pos += 2;
 211  
             } else {
 212  
                 // char
 213  97
                 set.add(CharRange.is(str.charAt(pos)));
 214  97
                 pos += 1;
 215  
             }
 216  178
         }
 217  108
     }
 218  
 
 219  
     //-----------------------------------------------------------------------
 220  
     /**
 221  
      * <p>Gets the internal set as an array of CharRange objects.</p>
 222  
      *
 223  
      * @return an array of immutable CharRange objects
 224  
      * @since 2.0
 225  
      */
 226  
 // NOTE: This is no longer public as CharRange is no longer a public class. 
 227  
 //       It may be replaced when CharSet moves to Range.
 228  
     /*public*/ CharRange[] getCharRanges() {
 229  45
         return set.toArray(new CharRange[set.size()]);
 230  
     }
 231  
 
 232  
     //-----------------------------------------------------------------------
 233  
     /**
 234  
      * <p>Does the {@code CharSet} contain the specified
 235  
      * character {@code ch}.</p>
 236  
      *
 237  
      * @param ch  the character to check for
 238  
      * @return {@code true} if the set contains the characters
 239  
      */
 240  
     public boolean contains(final char ch) {
 241  218
         for (final CharRange range : set) {
 242  305
             if (range.contains(ch)) {
 243  123
                 return true;
 244  
             }
 245  182
         }
 246  95
         return false;
 247  
     }
 248  
 
 249  
     // Basics
 250  
     //-----------------------------------------------------------------------
 251  
     /**
 252  
      * <p>Compares two {@code CharSet} objects, returning true if they represent
 253  
      * exactly the same set of characters defined in the same way.</p>
 254  
      *
 255  
      * <p>The two sets {@code abc} and {@code a-c} are <i>not</i>
 256  
      * equal according to this method.</p>
 257  
      *
 258  
      * @param obj  the object to compare to
 259  
      * @return true if equal
 260  
      * @since 2.0
 261  
      */
 262  
     @Override
 263  
     public boolean equals(final Object obj) {
 264  16
         if (obj == this) {
 265  3
             return true;
 266  
         }
 267  13
         if (obj instanceof CharSet == false) {
 268  1
             return false;
 269  
         }
 270  12
         final CharSet other = (CharSet) obj;
 271  12
         return set.equals(other.set);
 272  
     }
 273  
 
 274  
     /**
 275  
      * <p>Gets a hash code compatible with the equals method.</p>
 276  
      *
 277  
      * @return a suitable hash code
 278  
      * @since 2.0
 279  
      */
 280  
     @Override
 281  
     public int hashCode() {
 282  12
         return 89 + set.hashCode();
 283  
     }
 284  
 
 285  
     /**
 286  
      * <p>Gets a string representation of the set.</p>
 287  
      *
 288  
      * @return string representation of the set
 289  
      */
 290  
     @Override
 291  
     public String toString() {
 292  10
         return set.toString();
 293  
     }
 294  
 
 295  
 }