1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * https://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.lang3; 18 19 /** 20 * Operations on {@link CharSequence} that are 21 * {@code null} safe. 22 * 23 * @see CharSequence 24 * @since 3.0 25 */ 26 public class CharSequenceUtils { 27 28 private static final int NOT_FOUND = -1; 29 30 static final int TO_STRING_LIMIT = 16; 31 32 private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { 33 for (int i = 1, j = len2 - 1; i <= j; i++, j--) { 34 if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) { 35 return false; 36 } 37 } 38 return true; 39 } 40 41 /** 42 * Used by the indexOf(CharSequence methods) as a green implementation of indexOf. 43 * 44 * @param cs the {@link CharSequence} to be processed 45 * @param searchChar the {@link CharSequence} to be searched for 46 * @param start the start index 47 * @return the index where the search sequence was found, or {@code -1} if there is no such occurrence. 48 */ 49 static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { 50 if (cs == null || searchChar == null) { 51 return StringUtils.INDEX_NOT_FOUND; 52 } 53 if (cs instanceof String) { 54 return ((String) cs).indexOf(searchChar.toString(), start); 55 } 56 if (cs instanceof StringBuilder) { 57 return ((StringBuilder) cs).indexOf(searchChar.toString(), start); 58 } 59 if (cs instanceof StringBuffer) { 60 return ((StringBuffer) cs).indexOf(searchChar.toString(), start); 61 } 62 return cs.toString().indexOf(searchChar.toString(), start); 63 // if (cs instanceof String && searchChar instanceof String) { 64 // // TODO: Do we assume searchChar is usually relatively small; 65 // // If so then calling toString() on it is better than reverting to 66 // // the green implementation in the else block 67 // return ((String) cs).indexOf((String) searchChar, start); 68 // } else { 69 // // TODO: Implement rather than convert to String 70 // return cs.toString().indexOf(searchChar.toString(), start); 71 // } 72 } 73 74 /** 75 * Returns the index within {@code cs} of the first occurrence of the 76 * specified character, starting the search at the specified index. 77 * <p> 78 * If a character with value {@code searchChar} occurs in the 79 * character sequence represented by the {@code cs} 80 * object at an index no smaller than {@code start}, then 81 * the index of the first such occurrence is returned. For values 82 * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), 83 * this is the smallest value <em>k</em> such that: 84 * </p> 85 * <blockquote><pre> 86 * (this.charAt(<em>k</em>) == searchChar) && (<em>k</em> >= start) 87 * </pre></blockquote> 88 * is true. For other values of {@code searchChar}, it is the 89 * smallest value <em>k</em> such that: 90 * <blockquote><pre> 91 * (this.codePointAt(<em>k</em>) == searchChar) && (<em>k</em> >= start) 92 * </pre></blockquote> 93 * <p> 94 * is true. In either case, if no such character occurs inm {@code cs} 95 * at or after position {@code start}, then 96 * {@code -1} is returned. 97 * </p> 98 * <p> 99 * There is no restriction on the value of {@code start}. If it 100 * is negative, it has the same effect as if it were zero: the entire 101 * {@link CharSequence} may be searched. If it is greater than 102 * the length of {@code cs}, it has the same effect as if it were 103 * equal to the length of {@code cs}: {@code -1} is returned. 104 * </p> 105 * <p>All indices are specified in {@code char} values 106 * (Unicode code units). 107 * </p> 108 * 109 * @param cs the {@link CharSequence} to be processed, not null 110 * @param searchChar the char to be searched for 111 * @param start the start index, negative starts at the string start 112 * @return the index where the search char was found, -1 if not found 113 * @since 3.6 updated to behave more like {@link String} 114 */ 115 static int indexOf(final CharSequence cs, final int searchChar, int start) { 116 if (cs instanceof String) { 117 return ((String) cs).indexOf(searchChar, start); 118 } 119 final int sz = cs.length(); 120 if (start < 0) { 121 start = 0; 122 } 123 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 124 for (int i = start; i < sz; i++) { 125 if (cs.charAt(i) == searchChar) { 126 return i; 127 } 128 } 129 return NOT_FOUND; 130 } 131 //supplementary characters (LANG1300) 132 if (searchChar <= Character.MAX_CODE_POINT) { 133 final char[] chars = Character.toChars(searchChar); 134 for (int i = start; i < sz - 1; i++) { 135 final char high = cs.charAt(i); 136 final char low = cs.charAt(i + 1); 137 if (high == chars[0] && low == chars[1]) { 138 return i; 139 } 140 } 141 } 142 return NOT_FOUND; 143 } 144 145 /** 146 * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf 147 * 148 * @param cs the {@link CharSequence} to be processed 149 * @param searchChar the {@link CharSequence} to find 150 * @param start the start index 151 * @return the index where the search sequence was found 152 */ 153 static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { 154 if (searchChar == null || cs == null) { 155 return NOT_FOUND; 156 } 157 if (searchChar instanceof String) { 158 if (cs instanceof String) { 159 return ((String) cs).lastIndexOf((String) searchChar, start); 160 } 161 if (cs instanceof StringBuilder) { 162 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); 163 } 164 if (cs instanceof StringBuffer) { 165 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); 166 } 167 } 168 169 final int len1 = cs.length(); 170 final int len2 = searchChar.length(); 171 172 if (start > len1) { 173 start = len1; 174 } 175 176 if (start < 0 || len2 > len1) { 177 return NOT_FOUND; 178 } 179 180 if (len2 == 0) { 181 return start; 182 } 183 184 if (len2 <= TO_STRING_LIMIT) { 185 if (cs instanceof String) { 186 return ((String) cs).lastIndexOf(searchChar.toString(), start); 187 } 188 if (cs instanceof StringBuilder) { 189 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); 190 } 191 if (cs instanceof StringBuffer) { 192 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); 193 } 194 } 195 196 if (start + len2 > len1) { 197 start = len1 - len2; 198 } 199 200 final char char0 = searchChar.charAt(0); 201 202 int i = start; 203 while (true) { 204 while (cs.charAt(i) != char0) { 205 i--; 206 if (i < 0) { 207 return NOT_FOUND; 208 } 209 } 210 if (checkLaterThan1(cs, searchChar, len2, i)) { 211 return i; 212 } 213 i--; 214 if (i < 0) { 215 return NOT_FOUND; 216 } 217 } 218 } 219 220 /** 221 * Returns the index within {@code cs} of the last occurrence of 222 * the specified character, searching backward starting at the 223 * specified index. For values of {@code searchChar} in the range 224 * from 0 to 0xFFFF (inclusive), the index returned is the largest 225 * value <em>k</em> such that: 226 * <blockquote><pre> 227 * (this.charAt(<em>k</em>) == searchChar) && (<em>k</em> <= start) 228 * </pre></blockquote> 229 * is true. For other values of {@code searchChar}, it is the 230 * largest value <em>k</em> such that: 231 * <blockquote><pre> 232 * (this.codePointAt(<em>k</em>) == searchChar) && (<em>k</em> <= start) 233 * </pre></blockquote> 234 * is true. In either case, if no such character occurs in {@code cs} 235 * at or before position {@code start}, then {@code -1} is returned. 236 * 237 * <p> 238 * All indices are specified in {@code char} values 239 * (Unicode code units). 240 * </p> 241 * 242 * @param cs the {@link CharSequence} to be processed 243 * @param searchChar the char to be searched for 244 * @param start the start index, negative returns -1, beyond length starts at end 245 * @return the index where the search char was found, -1 if not found 246 * @since 3.6 updated to behave more like {@link String} 247 */ 248 static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { 249 if (cs instanceof String) { 250 return ((String) cs).lastIndexOf(searchChar, start); 251 } 252 final int sz = cs.length(); 253 if (start < 0) { 254 return NOT_FOUND; 255 } 256 if (start >= sz) { 257 start = sz - 1; 258 } 259 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 260 for (int i = start; i >= 0; --i) { 261 if (cs.charAt(i) == searchChar) { 262 return i; 263 } 264 } 265 return NOT_FOUND; 266 } 267 //supplementary characters (LANG1300) 268 //NOTE - we must do a forward traversal for this to avoid duplicating code points 269 if (searchChar <= Character.MAX_CODE_POINT) { 270 final char[] chars = Character.toChars(searchChar); 271 //make sure it's not the last index 272 if (start == sz - 1) { 273 return NOT_FOUND; 274 } 275 for (int i = start; i >= 0; i--) { 276 final char high = cs.charAt(i); 277 final char low = cs.charAt(i + 1); 278 if (chars[0] == high && chars[1] == low) { 279 return i; 280 } 281 } 282 } 283 return NOT_FOUND; 284 } 285 286 /** 287 * Green implementation of regionMatches. 288 * 289 * @param cs the {@link CharSequence} to be processed 290 * @param ignoreCase whether or not to be case-insensitive 291 * @param thisStart the index to start on the {@code cs} CharSequence 292 * @param substring the {@link CharSequence} to be looked for 293 * @param start the index to start on the {@code substring} CharSequence 294 * @param length character length of the region 295 * @return whether the region matched 296 * @see String#regionMatches(boolean, int, String, int, int) 297 */ 298 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, 299 final CharSequence substring, final int start, final int length) { 300 if (cs instanceof String && substring instanceof String) { 301 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); 302 } 303 int index1 = thisStart; 304 int index2 = start; 305 int tmpLen = length; 306 307 // Extract these first so we detect NPEs the same as the java.lang.String version 308 final int srcLen = cs.length() - thisStart; 309 final int otherLen = substring.length() - start; 310 311 // Check for invalid parameters 312 if (thisStart < 0 || start < 0 || length < 0) { 313 return false; 314 } 315 316 // Check that the regions are long enough 317 if (srcLen < length || otherLen < length) { 318 return false; 319 } 320 321 while (tmpLen-- > 0) { 322 final char c1 = cs.charAt(index1++); 323 final char c2 = substring.charAt(index2++); 324 325 if (c1 == c2) { 326 continue; 327 } 328 329 if (!ignoreCase) { 330 return false; 331 } 332 333 // The real same check as in String#regionMatches(boolean, int, String, int, int): 334 final char u1 = Character.toUpperCase(c1); 335 final char u2 = Character.toUpperCase(c2); 336 if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { 337 return false; 338 } 339 } 340 341 return true; 342 } 343 344 /** 345 * Returns a new {@link CharSequence} that is a subsequence of this 346 * sequence starting with the {@code char} value at the specified index. 347 * 348 * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}. 349 * The length (in {@code char}) of the returned sequence is {@code length() - start}, 350 * so if {@code start == end} then an empty sequence is returned.</p> 351 * 352 * @param cs the specified subsequence, null returns null 353 * @param start the start index, inclusive, valid 354 * @return a new subsequence, may be null 355 * @throws IndexOutOfBoundsException if {@code start} is negative or if 356 * {@code start} is greater than {@code length()} 357 */ 358 public static CharSequence subSequence(final CharSequence cs, final int start) { 359 return cs == null ? null : cs.subSequence(start, cs.length()); 360 } 361 362 /** 363 * Converts the given CharSequence to a char[]. 364 * 365 * @param source the {@link CharSequence} to be processed. 366 * @return the resulting char array, never null. 367 * @since 3.11 368 */ 369 public static char[] toCharArray(final CharSequence source) { 370 final int len = StringUtils.length(source); 371 if (len == 0) { 372 return ArrayUtils.EMPTY_CHAR_ARRAY; 373 } 374 if (source instanceof String) { 375 return ((String) source).toCharArray(); 376 } 377 final char[] array = new char[len]; 378 for (int i = 0; i < len; i++) { 379 array[i] = source.charAt(i); 380 } 381 return array; 382 } 383 384 /** 385 * {@link CharSequenceUtils} instances should NOT be constructed in 386 * standard programming. 387 * 388 * <p>This constructor is public to permit tools that require a JavaBean 389 * instance to operate.</p> 390 * 391 * @deprecated TODO Make private in 4.0. 392 */ 393 @Deprecated 394 public CharSequenceUtils() { 395 // empty 396 } 397 }