Coverage Report - org.apache.commons.lang3.CharSequenceUtils
 
Classes in this File Line Coverage Branch Coverage Complexity
CharSequenceUtils
94%
69/73
82%
58/70
7,125
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 package org.apache.commons.lang3;
 18  
 
 19  
 /**
 20  
  * <p>Operations on {@link CharSequence} that are
 21  
  * {@code null} safe.</p>
 22  
  *
 23  
  * @see CharSequence
 24  
  * @since 3.0
 25  
  */
 26  
 public class CharSequenceUtils {
 27  
 
 28  
     private static final int NOT_FOUND = -1;
 29  
 
 30  
     /**
 31  
      * <p>{@code CharSequenceUtils} instances should NOT be constructed in
 32  
      * standard programming. </p>
 33  
      *
 34  
      * <p>This constructor is public to permit tools that require a JavaBean
 35  
      * instance to operate.</p>
 36  
      */
 37  
     public CharSequenceUtils() {
 38  1
         super();
 39  1
     }
 40  
 
 41  
     //-----------------------------------------------------------------------
 42  
     /**
 43  
      * <p>Returns a new {@code CharSequence} that is a subsequence of this
 44  
      * sequence starting with the {@code char} value at the specified index.</p>
 45  
      *
 46  
      * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}.
 47  
      * The length (in {@code char}) of the returned sequence is {@code length() - start},
 48  
      * so if {@code start == end} then an empty sequence is returned.</p>
 49  
      *
 50  
      * @param cs  the specified subsequence, null returns null
 51  
      * @param start  the start index, inclusive, valid
 52  
      * @return a new subsequence, may be null
 53  
      * @throws IndexOutOfBoundsException if {@code start} is negative or if
 54  
      *  {@code start} is greater than {@code length()}
 55  
      */
 56  
     public static CharSequence subSequence(final CharSequence cs, final int start) {
 57  10
         return cs == null ? null : cs.subSequence(start, cs.length());
 58  
     }
 59  
 
 60  
     //-----------------------------------------------------------------------
 61  
     /**
 62  
      * Returns the index within <code>cs</code> of the first occurrence of the
 63  
      * specified character, starting the search at the specified index.
 64  
      * <p>
 65  
      * If a character with value <code>searchChar</code> occurs in the
 66  
      * character sequence represented by the <code>cs</code>
 67  
      * object at an index no smaller than <code>start</code>, then
 68  
      * the index of the first such occurrence is returned. For values
 69  
      * of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive),
 70  
      * this is the smallest value <i>k</i> such that:
 71  
      * <blockquote><pre>
 72  
      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
 73  
      * </pre></blockquote>
 74  
      * is true. For other values of <code>searchChar</code>, it is the
 75  
      * smallest value <i>k</i> such that:
 76  
      * <blockquote><pre>
 77  
      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &gt;= start)
 78  
      * </pre></blockquote>
 79  
      * is true. In either case, if no such character occurs inm <code>cs</code>
 80  
      * at or after position <code>start</code>, then
 81  
      * <code>-1</code> is returned.
 82  
      *
 83  
      * <p>
 84  
      * There is no restriction on the value of <code>start</code>. If it
 85  
      * is negative, it has the same effect as if it were zero: the entire
 86  
      * <code>CharSequence</code> may be searched. If it is greater than
 87  
      * the length of <code>cs</code>, it has the same effect as if it were
 88  
      * equal to the length of <code>cs</code>: <code>-1</code> is returned.
 89  
      *
 90  
      * <p>All indices are specified in <code>char</code> values
 91  
      * (Unicode code units).
 92  
      *
 93  
      * @param cs  the {@code CharSequence} to be processed, not null
 94  
      * @param searchChar  the char to be searched for
 95  
      * @param start  the start index, negative starts at the string start
 96  
      * @return the index where the search char was found, -1 if not found
 97  
      * @since 3.6 updated to behave more like <code>String</code>
 98  
      */
 99  
     static int indexOf(final CharSequence cs, final int searchChar, int start) {
 100  41
         if (cs instanceof String) {
 101  34
             return ((String) cs).indexOf(searchChar, start);
 102  
         }
 103  7
         final int sz = cs.length();
 104  7
         if (start < 0) {
 105  0
             start = 0;
 106  
         }
 107  7
         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 108  6
             for (int i = start; i < sz; i++) {
 109  6
                 if (cs.charAt(i) == searchChar) {
 110  2
                     return i;
 111  
                 }
 112  
             }
 113  
         }
 114  
         //supplementary characters (LANG1300)
 115  5
         if (searchChar <= Character.MAX_CODE_POINT) {
 116  5
             char[] chars = Character.toChars(searchChar);
 117  7
             for (int i = start; i < sz - 1; i++) {
 118  5
                 char high = cs.charAt(i);
 119  5
                 char low = cs.charAt(i + 1);
 120  5
                 if (high == chars[0] && low == chars[1]) {
 121  3
                     return i;
 122  
                 }
 123  
             }
 124  
         }
 125  2
         return NOT_FOUND;
 126  
     }
 127  
 
 128  
     /**
 129  
      * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
 130  
      *
 131  
      * @param cs the {@code CharSequence} to be processed
 132  
      * @param searchChar the {@code CharSequence} to be searched for
 133  
      * @param start the start index
 134  
      * @return the index where the search sequence was found
 135  
      */
 136  
     static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
 137  206
         return cs.toString().indexOf(searchChar.toString(), start);
 138  
 //        if (cs instanceof String && searchChar instanceof String) {
 139  
 //            // TODO: Do we assume searchChar is usually relatively small;
 140  
 //            //       If so then calling toString() on it is better than reverting to
 141  
 //            //       the green implementation in the else block
 142  
 //            return ((String) cs).indexOf((String) searchChar, start);
 143  
 //        } else {
 144  
 //            // TODO: Implement rather than convert to String
 145  
 //            return cs.toString().indexOf(searchChar.toString(), start);
 146  
 //        }
 147  
     }
 148  
 
 149  
     /**
 150  
      * Returns the index within <code>cs</code> of the last occurrence of
 151  
      * the specified character, searching backward starting at the
 152  
      * specified index. For values of <code>searchChar</code> in the range
 153  
      * from 0 to 0xFFFF (inclusive), the index returned is the largest
 154  
      * value <i>k</i> such that:
 155  
      * <blockquote><pre>
 156  
      * (this.charAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
 157  
      * </pre></blockquote>
 158  
      * is true. For other values of <code>searchChar</code>, it is the
 159  
      * largest value <i>k</i> such that:
 160  
      * <blockquote><pre>
 161  
      * (this.codePointAt(<i>k</i>) == searchChar) &amp;&amp; (<i>k</i> &lt;= start)
 162  
      * </pre></blockquote>
 163  
      * is true. In either case, if no such character occurs in <code>cs</code>
 164  
      * at or before position <code>start</code>, then <code>-1</code> is returned.
 165  
      *
 166  
      * <p>All indices are specified in <code>char</code> values
 167  
      * (Unicode code units).
 168  
      *
 169  
      * @param cs  the {@code CharSequence} to be processed
 170  
      * @param searchChar  the char to be searched for
 171  
      * @param start  the start index, negative returns -1, beyond length starts at end
 172  
      * @return the index where the search char was found, -1 if not found
 173  
      * @since 3.6 updated to behave more like <code>String</code>
 174  
      */
 175  
     static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
 176  20
         if (cs instanceof String) {
 177  10
             return ((String) cs).lastIndexOf(searchChar, start);
 178  
         }
 179  10
         final int sz = cs.length();
 180  10
         if (start < 0) {
 181  0
             return NOT_FOUND;
 182  
         }
 183  10
         if (start >= sz) {
 184  1
             start = sz - 1;
 185  
         }
 186  10
         if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
 187  4
             for (int i = start; i >= 0; --i) {
 188  4
                 if (cs.charAt(i) == searchChar) {
 189  2
                     return i;
 190  
                 }
 191  
             }
 192  
         }
 193  
         //supplementary characters (LANG1300)
 194  
         //NOTE - we must do a forward traversal for this to avoid duplicating code points
 195  8
         if (searchChar <= Character.MAX_CODE_POINT) {
 196  8
             char[] chars = Character.toChars(searchChar);
 197  
             //make sure it's not the last index
 198  8
             if (start == sz - 1) {
 199  1
                 return NOT_FOUND;
 200  
             }
 201  11
             for (int i = start; i >= 0; i--) {
 202  10
                 char high = cs.charAt(i);
 203  10
                 char low = cs.charAt(i + 1);
 204  10
                 if (chars[0] == high && chars[1] == low) {
 205  6
                     return i;
 206  
                 }
 207  
             }
 208  
         }
 209  1
         return NOT_FOUND;
 210  
     }
 211  
 
 212  
     /**
 213  
      * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
 214  
      *
 215  
      * @param cs the {@code CharSequence} to be processed
 216  
      * @param searchChar the {@code CharSequence} to be searched for
 217  
      * @param start the start index
 218  
      * @return the index where the search sequence was found
 219  
      */
 220  
     static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
 221  46
         return cs.toString().lastIndexOf(searchChar.toString(), start);
 222  
 //        if (cs instanceof String && searchChar instanceof String) {
 223  
 //            // TODO: Do we assume searchChar is usually relatively small;
 224  
 //            //       If so then calling toString() on it is better than reverting to
 225  
 //            //       the green implementation in the else block
 226  
 //            return ((String) cs).lastIndexOf((String) searchChar, start);
 227  
 //        } else {
 228  
 //            // TODO: Implement rather than convert to String
 229  
 //            return cs.toString().lastIndexOf(searchChar.toString(), start);
 230  
 //        }
 231  
     }
 232  
 
 233  
     /**
 234  
      * Green implementation of toCharArray.
 235  
      *
 236  
      * @param cs the {@code CharSequence} to be processed
 237  
      * @return the resulting char array
 238  
      */
 239  
     static char[] toCharArray(final CharSequence cs) {
 240  20
         if (cs instanceof String) {
 241  19
             return ((String) cs).toCharArray();
 242  
         }
 243  1
         final int sz = cs.length();
 244  1
         final char[] array = new char[cs.length()];
 245  8
         for (int i = 0; i < sz; i++) {
 246  7
             array[i] = cs.charAt(i);
 247  
         }
 248  1
         return array;
 249  
     }
 250  
 
 251  
     /**
 252  
      * Green implementation of regionMatches.
 253  
      *
 254  
      * @param cs the {@code CharSequence} to be processed
 255  
      * @param ignoreCase whether or not to be case insensitive
 256  
      * @param thisStart the index to start on the {@code cs} CharSequence
 257  
      * @param substring the {@code CharSequence} to be looked for
 258  
      * @param start the index to start on the {@code substring} CharSequence
 259  
      * @param length character length of the region
 260  
      * @return whether the region matched
 261  
      */
 262  
     static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
 263  
             final CharSequence substring, final int start, final int length)    {
 264  289
         if (cs instanceof String && substring instanceof String) {
 265  255
             return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
 266  
         }
 267  34
         int index1 = thisStart;
 268  34
         int index2 = start;
 269  34
         int tmpLen = length;
 270  
 
 271  
         // Extract these first so we detect NPEs the same as the java.lang.String version
 272  34
         final int srcLen = cs.length() - thisStart;
 273  32
         final int otherLen = substring.length() - start;
 274  
 
 275  
         // Check for invalid parameters
 276  30
         if (thisStart < 0 || start < 0 || length < 0) {
 277  1
             return false;
 278  
         }
 279  
 
 280  
         // Check that the regions are long enough
 281  29
         if (srcLen < length || otherLen < length) {
 282  1
             return false;
 283  
         }
 284  
 
 285  79
         while (tmpLen-- > 0) {
 286  60
             final char c1 = cs.charAt(index1++);
 287  60
             final char c2 = substring.charAt(index2++);
 288  
 
 289  60
             if (c1 == c2) {
 290  48
                 continue;
 291  
             }
 292  
 
 293  12
             if (!ignoreCase) {
 294  9
                 return false;
 295  
             }
 296  
 
 297  
             // The same check as in String.regionMatches():
 298  3
             if (Character.toUpperCase(c1) != Character.toUpperCase(c2)
 299  0
                     && Character.toLowerCase(c1) != Character.toLowerCase(c2)) {
 300  0
                 return false;
 301  
             }
 302  3
         }
 303  
 
 304  19
         return true;
 305  
     }
 306  
 }