Coverage Report - org.apache.commons.lang3.CharSet
 
Classes in this File Line Coverage Branch Coverage Complexity
CharSet
100%
60/60
100%
34/34
3,875
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  * 
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  * 
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 package org.apache.commons.lang3;
 18  
 
 19  
 import java.io.Serializable;
 20  
 import java.util.Collections;
 21  
 import java.util.HashMap;
 22  
 import java.util.HashSet;
 23  
 import java.util.Map;
 24  
 import java.util.Set;
 25  
 
 26  
 /**
 27  
  * <p>A set of characters.</p>
 28  
  *
 29  
  * <p>Instances are immutable, but instances of subclasses may not be.</p>
 30  
  *
 31  
  * <p>#ThreadSafe#</p>
 32  
  * @since 1.0
 33  
  * @version $Id: CharSet.java 1436770 2013-01-22 07:09:45Z ggregory $
 34  
  */
 35  
 public class CharSet implements Serializable {
 36  
 
 37  
     /**
 38  
      * Required for serialization support. Lang version 2.0. 
 39  
      * 
 40  
      * @see java.io.Serializable
 41  
      */
 42  
     private static final long serialVersionUID = 5947847346149275958L;
 43  
 
 44  
     /** 
 45  
      * A CharSet defining no characters. 
 46  
      * @since 2.0
 47  
      */
 48  1
     public static final CharSet EMPTY = new CharSet((String) null);
 49  
 
 50  
     /** 
 51  
      * A CharSet defining ASCII alphabetic characters "a-zA-Z".
 52  
      * @since 2.0
 53  
      */
 54  1
     public static final CharSet ASCII_ALPHA = new CharSet("a-zA-Z");
 55  
 
 56  
     /** 
 57  
      * A CharSet defining ASCII alphabetic characters "a-z".
 58  
      * @since 2.0
 59  
      */
 60  1
     public static final CharSet ASCII_ALPHA_LOWER = new CharSet("a-z");
 61  
 
 62  
     /** 
 63  
      * A CharSet defining ASCII alphabetic characters "A-Z".
 64  
      * @since 2.0
 65  
      */
 66  1
     public static final CharSet ASCII_ALPHA_UPPER = new CharSet("A-Z");
 67  
 
 68  
     /** 
 69  
      * A CharSet defining ASCII alphabetic characters "0-9".
 70  
      * @since 2.0
 71  
      */
 72  1
     public static final CharSet ASCII_NUMERIC = new CharSet("0-9");
 73  
 
 74  
     /**
 75  
      * A Map of the common cases used in the factory.
 76  
      * Subclasses can add more common patterns if desired
 77  
      * @since 2.0
 78  
      */
 79  1
     protected static final Map<String, CharSet> COMMON = Collections.synchronizedMap(new HashMap<String, CharSet>());
 80  
     
 81  
     static {
 82  1
         COMMON.put(null, EMPTY);
 83  1
         COMMON.put("", EMPTY);
 84  1
         COMMON.put("a-zA-Z", ASCII_ALPHA);
 85  1
         COMMON.put("A-Za-z", ASCII_ALPHA);
 86  1
         COMMON.put("a-z", ASCII_ALPHA_LOWER);
 87  1
         COMMON.put("A-Z", ASCII_ALPHA_UPPER);
 88  1
         COMMON.put("0-9", ASCII_NUMERIC);
 89  1
     }
 90  
 
 91  
     /** The set of CharRange objects. */
 92  102
     private final Set<CharRange> set = Collections.synchronizedSet(new HashSet<CharRange>());
 93  
 
 94  
     //-----------------------------------------------------------------------
 95  
     /**
 96  
      * <p>Factory method to create a new CharSet using a special syntax.</p>
 97  
      *
 98  
      * <ul>
 99  
      *  <li>{@code null} or empty string ("")
 100  
      * - set containing no characters</li>
 101  
      *  <li>Single character, such as "a"
 102  
      *  - set containing just that character</li>
 103  
      *  <li>Multi character, such as "a-e"
 104  
      *  - set containing characters from one character to the other</li>
 105  
      *  <li>Negated, such as "^a" or "^a-e"
 106  
      *  - set containing all characters except those defined</li>
 107  
      *  <li>Combinations, such as "abe-g"
 108  
      *  - set containing all the characters from the individual sets</li>
 109  
      * </ul>
 110  
      *
 111  
      * <p>The matching order is:</p>
 112  
      * <ol>
 113  
      *  <li>Negated multi character range, such as "^a-e"
 114  
      *  <li>Ordinary multi character range, such as "a-e"
 115  
      *  <li>Negated single character, such as "^a"
 116  
      *  <li>Ordinary single character, such as "a"
 117  
      * </ol>
 118  
      * <p>Matching works left to right. Once a match is found the
 119  
      * search starts again from the next character.</p>
 120  
      *
 121  
      * <p>If the same range is defined twice using the same syntax, only
 122  
      * one range will be kept.
 123  
      * Thus, "a-ca-c" creates only one range of "a-c".</p>
 124  
      *
 125  
      * <p>If the start and end of a range are in the wrong order,
 126  
      * they are reversed. Thus "a-e" is the same as "e-a".
 127  
      * As a result, "a-ee-a" would create only one range,
 128  
      * as the "a-e" and "e-a" are the same.</p>
 129  
      *
 130  
      * <p>The set of characters represented is the union of the specified ranges.</p>
 131  
      *
 132  
      * <p>All CharSet objects returned by this method will be immutable.</p>
 133  
      *
 134  
      * @param setStrs  Strings to merge into the set, may be null
 135  
      * @return a CharSet instance
 136  
      * @since 2.4
 137  
      */
 138  
     public static CharSet getInstance(final String... setStrs) {
 139  113
         if (setStrs == null) {
 140  1
             return null;
 141  
         }
 142  112
         if (setStrs.length == 1) {
 143  111
             final CharSet common = COMMON.get(setStrs[0]);
 144  111
             if (common != null) {
 145  15
                 return common;
 146  
             }
 147  
         }
 148  97
         return new CharSet(setStrs); 
 149  
     }
 150  
 
 151  
     //-----------------------------------------------------------------------
 152  
     /**
 153  
      * <p>Constructs a new CharSet using the set syntax.
 154  
      * Each string is merged in with the set.</p>
 155  
      *
 156  
      * @param set  Strings to merge into the initial set
 157  
      * @throws NullPointerException if set is {@code null}
 158  
      */
 159  
     protected CharSet(final String... set) {
 160  102
         super();
 161  102
         final int sz = set.length;
 162  203
         for (int i = 0; i < sz; i++) {
 163  101
             add(set[i]);
 164  
         }
 165  102
     }
 166  
 
 167  
     //-----------------------------------------------------------------------
 168  
     /**
 169  
      * <p>Add a set definition string to the {@code CharSet}.</p>
 170  
      *
 171  
      * @param str  set definition string
 172  
      */
 173  
     protected void add(final String str) {
 174  101
         if (str == null) {
 175  1
             return;
 176  
         }
 177  
 
 178  100
         final int len = str.length();
 179  100
         int pos = 0;
 180  266
         while (pos < len) {
 181  166
             final int remainder = len - pos;
 182  166
             if (remainder >= 4 && str.charAt(pos) == '^' && str.charAt(pos + 2) == '-') {
 183  
                 // negated range
 184  13
                 set.add(CharRange.isNotIn(str.charAt(pos + 1), str.charAt(pos + 3)));
 185  13
                 pos += 4;
 186  153
             } else if (remainder >= 3 && str.charAt(pos + 1) == '-') {
 187  
                 // range
 188  44
                 set.add(CharRange.isIn(str.charAt(pos), str.charAt(pos + 2)));
 189  44
                 pos += 3;
 190  109
             } else if (remainder >= 2 && str.charAt(pos) == '^') {
 191  
                 // negated char
 192  14
                 set.add(CharRange.isNot(str.charAt(pos + 1)));
 193  14
                 pos += 2;
 194  
             } else {
 195  
                 // char
 196  95
                 set.add(CharRange.is(str.charAt(pos)));
 197  95
                 pos += 1;
 198  
             }
 199  166
         }
 200  100
     }
 201  
 
 202  
     //-----------------------------------------------------------------------
 203  
     /**
 204  
      * <p>Gets the internal set as an array of CharRange objects.</p>
 205  
      *
 206  
      * @return an array of immutable CharRange objects
 207  
      * @since 2.0
 208  
      */
 209  
 // NOTE: This is no longer public as CharRange is no longer a public class. 
 210  
 //       It may be replaced when CharSet moves to Range.
 211  
     /*public*/ CharRange[] getCharRanges() {
 212  45
         return set.toArray(new CharRange[set.size()]);
 213  
     }
 214  
 
 215  
     //-----------------------------------------------------------------------
 216  
     /**
 217  
      * <p>Does the {@code CharSet} contain the specified
 218  
      * character {@code ch}.</p>
 219  
      *
 220  
      * @param ch  the character to check for
 221  
      * @return {@code true} if the set contains the characters
 222  
      */
 223  
     public boolean contains(final char ch) {
 224  211
         for (final CharRange range : set) {
 225  297
             if (range.contains(ch)) {
 226  118
                 return true;
 227  
             }
 228  179
         }
 229  93
         return false;
 230  
     }
 231  
 
 232  
     // Basics
 233  
     //-----------------------------------------------------------------------
 234  
     /**
 235  
      * <p>Compares two {@code CharSet} objects, returning true if they represent
 236  
      * exactly the same set of characters defined in the same way.</p>
 237  
      *
 238  
      * <p>The two sets {@code abc} and {@code a-c} are <i>not</i>
 239  
      * equal according to this method.</p>
 240  
      *
 241  
      * @param obj  the object to compare to
 242  
      * @return true if equal
 243  
      * @since 2.0
 244  
      */
 245  
     @Override
 246  
     public boolean equals(final Object obj) {
 247  16
         if (obj == this) {
 248  3
             return true;
 249  
         }
 250  13
         if (obj instanceof CharSet == false) {
 251  1
             return false;
 252  
         }
 253  12
         final CharSet other = (CharSet) obj;
 254  12
         return set.equals(other.set);
 255  
     }
 256  
 
 257  
     /**
 258  
      * <p>Gets a hash code compatible with the equals method.</p>
 259  
      *
 260  
      * @return a suitable hash code
 261  
      * @since 2.0
 262  
      */
 263  
     @Override
 264  
     public int hashCode() {
 265  12
         return 89 + set.hashCode();
 266  
     }
 267  
 
 268  
     /**
 269  
      * <p>Gets a string representation of the set.</p>
 270  
      *
 271  
      * @return string representation of the set
 272  
      */
 273  
     @Override
 274  
     public String toString() {
 275  10
         return set.toString();
 276  
     }
 277  
 
 278  
 }