001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019/** 020 * <p>Operations on {@link CharSequence} that are 021 * {@code null} safe.</p> 022 * 023 * @see CharSequence 024 * @since 3.0 025 */ 026public class CharSequenceUtils { 027 028 private static final int NOT_FOUND = -1; 029 030 /** 031 * <p>{@code CharSequenceUtils} instances should NOT be constructed in 032 * standard programming. </p> 033 * 034 * <p>This constructor is public to permit tools that require a JavaBean 035 * instance to operate.</p> 036 */ 037 public CharSequenceUtils() { 038 super(); 039 } 040 041 //----------------------------------------------------------------------- 042 /** 043 * <p>Returns a new {@code CharSequence} that is a subsequence of this 044 * sequence starting with the {@code char} value at the specified index.</p> 045 * 046 * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}. 047 * The length (in {@code char}) of the returned sequence is {@code length() - start}, 048 * so if {@code start == end} then an empty sequence is returned.</p> 049 * 050 * @param cs the specified subsequence, null returns null 051 * @param start the start index, inclusive, valid 052 * @return a new subsequence, may be null 053 * @throws IndexOutOfBoundsException if {@code start} is negative or if 054 * {@code start} is greater than {@code length()} 055 */ 056 public static CharSequence subSequence(final CharSequence cs, final int start) { 057 return cs == null ? null : cs.subSequence(start, cs.length()); 058 } 059 060 //----------------------------------------------------------------------- 061 /** 062 * Returns the index within <code>cs</code> of the first occurrence of the 063 * specified character, starting the search at the specified index. 064 * <p> 065 * If a character with value <code>searchChar</code> occurs in the 066 * character sequence represented by the <code>cs</code> 067 * object at an index no smaller than <code>start</code>, then 068 * the index of the first such occurrence is returned. For values 069 * of <code>searchChar</code> in the range from 0 to 0xFFFF (inclusive), 070 * this is the smallest value <i>k</i> such that: 071 * <blockquote><pre> 072 * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) 073 * </pre></blockquote> 074 * is true. For other values of <code>searchChar</code>, it is the 075 * smallest value <i>k</i> such that: 076 * <blockquote><pre> 077 * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) 078 * </pre></blockquote> 079 * is true. In either case, if no such character occurs inm <code>cs</code> 080 * at or after position <code>start</code>, then 081 * <code>-1</code> is returned. 082 * 083 * <p> 084 * There is no restriction on the value of <code>start</code>. If it 085 * is negative, it has the same effect as if it were zero: the entire 086 * <code>CharSequence</code> may be searched. If it is greater than 087 * the length of <code>cs</code>, it has the same effect as if it were 088 * equal to the length of <code>cs</code>: <code>-1</code> is returned. 089 * 090 * <p>All indices are specified in <code>char</code> values 091 * (Unicode code units). 092 * 093 * @param cs the {@code CharSequence} to be processed, not null 094 * @param searchChar the char to be searched for 095 * @param start the start index, negative starts at the string start 096 * @return the index where the search char was found, -1 if not found 097 * @since 3.6 updated to behave more like <code>String</code> 098 */ 099 static int indexOf(final CharSequence cs, final int searchChar, int start) { 100 if (cs instanceof String) { 101 return ((String) cs).indexOf(searchChar, start); 102 } 103 final int sz = cs.length(); 104 if (start < 0) { 105 start = 0; 106 } 107 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 108 for (int i = start; i < sz; i++) { 109 if (cs.charAt(i) == searchChar) { 110 return i; 111 } 112 } 113 } 114 //supplementary characters (LANG1300) 115 if (searchChar <= Character.MAX_CODE_POINT) { 116 char[] chars = Character.toChars(searchChar); 117 for (int i = start; i < sz - 1; i++) { 118 char high = cs.charAt(i); 119 char low = cs.charAt(i + 1); 120 if (high == chars[0] && low == chars[1]) { 121 return i; 122 } 123 } 124 } 125 return NOT_FOUND; 126 } 127 128 /** 129 * Used by the indexOf(CharSequence methods) as a green implementation of indexOf. 130 * 131 * @param cs the {@code CharSequence} to be processed 132 * @param searchChar the {@code CharSequence} to be searched for 133 * @param start the start index 134 * @return the index where the search sequence was found 135 */ 136 static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { 137 return cs.toString().indexOf(searchChar.toString(), start); 138// if (cs instanceof String && searchChar instanceof String) { 139// // TODO: Do we assume searchChar is usually relatively small; 140// // If so then calling toString() on it is better than reverting to 141// // the green implementation in the else block 142// return ((String) cs).indexOf((String) searchChar, start); 143// } else { 144// // TODO: Implement rather than convert to String 145// return cs.toString().indexOf(searchChar.toString(), start); 146// } 147 } 148 149 /** 150 * Returns the index within <code>cs</code> of the last occurrence of 151 * the specified character, searching backward starting at the 152 * specified index. For values of <code>searchChar</code> in the range 153 * from 0 to 0xFFFF (inclusive), the index returned is the largest 154 * value <i>k</i> such that: 155 * <blockquote><pre> 156 * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) 157 * </pre></blockquote> 158 * is true. For other values of <code>searchChar</code>, it is the 159 * largest value <i>k</i> such that: 160 * <blockquote><pre> 161 * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) 162 * </pre></blockquote> 163 * is true. In either case, if no such character occurs in <code>cs</code> 164 * at or before position <code>start</code>, then <code>-1</code> is returned. 165 * 166 * <p>All indices are specified in <code>char</code> values 167 * (Unicode code units). 168 * 169 * @param cs the {@code CharSequence} to be processed 170 * @param searchChar the char to be searched for 171 * @param start the start index, negative returns -1, beyond length starts at end 172 * @return the index where the search char was found, -1 if not found 173 * @since 3.6 updated to behave more like <code>String</code> 174 */ 175 static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { 176 if (cs instanceof String) { 177 return ((String) cs).lastIndexOf(searchChar, start); 178 } 179 final int sz = cs.length(); 180 if (start < 0) { 181 return NOT_FOUND; 182 } 183 if (start >= sz) { 184 start = sz - 1; 185 } 186 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 187 for (int i = start; i >= 0; --i) { 188 if (cs.charAt(i) == searchChar) { 189 return i; 190 } 191 } 192 } 193 //supplementary characters (LANG1300) 194 //NOTE - we must do a forward traversal for this to avoid duplicating code points 195 if (searchChar <= Character.MAX_CODE_POINT) { 196 char[] chars = Character.toChars(searchChar); 197 //make sure it's not the last index 198 if (start == sz - 1) { 199 return NOT_FOUND; 200 } 201 for (int i = start; i >= 0; i--) { 202 char high = cs.charAt(i); 203 char low = cs.charAt(i + 1); 204 if (chars[0] == high && chars[1] == low) { 205 return i; 206 } 207 } 208 } 209 return NOT_FOUND; 210 } 211 212 /** 213 * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf 214 * 215 * @param cs the {@code CharSequence} to be processed 216 * @param searchChar the {@code CharSequence} to be searched for 217 * @param start the start index 218 * @return the index where the search sequence was found 219 */ 220 static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, final int start) { 221 return cs.toString().lastIndexOf(searchChar.toString(), start); 222// if (cs instanceof String && searchChar instanceof String) { 223// // TODO: Do we assume searchChar is usually relatively small; 224// // If so then calling toString() on it is better than reverting to 225// // the green implementation in the else block 226// return ((String) cs).lastIndexOf((String) searchChar, start); 227// } else { 228// // TODO: Implement rather than convert to String 229// return cs.toString().lastIndexOf(searchChar.toString(), start); 230// } 231 } 232 233 /** 234 * Green implementation of toCharArray. 235 * 236 * @param cs the {@code CharSequence} to be processed 237 * @return the resulting char array 238 */ 239 static char[] toCharArray(final CharSequence cs) { 240 if (cs instanceof String) { 241 return ((String) cs).toCharArray(); 242 } 243 final int sz = cs.length(); 244 final char[] array = new char[cs.length()]; 245 for (int i = 0; i < sz; i++) { 246 array[i] = cs.charAt(i); 247 } 248 return array; 249 } 250 251 /** 252 * Green implementation of regionMatches. 253 * 254 * @param cs the {@code CharSequence} to be processed 255 * @param ignoreCase whether or not to be case insensitive 256 * @param thisStart the index to start on the {@code cs} CharSequence 257 * @param substring the {@code CharSequence} to be looked for 258 * @param start the index to start on the {@code substring} CharSequence 259 * @param length character length of the region 260 * @return whether the region matched 261 */ 262 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, 263 final CharSequence substring, final int start, final int length) { 264 if (cs instanceof String && substring instanceof String) { 265 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); 266 } 267 int index1 = thisStart; 268 int index2 = start; 269 int tmpLen = length; 270 271 // Extract these first so we detect NPEs the same as the java.lang.String version 272 final int srcLen = cs.length() - thisStart; 273 final int otherLen = substring.length() - start; 274 275 // Check for invalid parameters 276 if (thisStart < 0 || start < 0 || length < 0) { 277 return false; 278 } 279 280 // Check that the regions are long enough 281 if (srcLen < length || otherLen < length) { 282 return false; 283 } 284 285 while (tmpLen-- > 0) { 286 final char c1 = cs.charAt(index1++); 287 final char c2 = substring.charAt(index2++); 288 289 if (c1 == c2) { 290 continue; 291 } 292 293 if (!ignoreCase) { 294 return false; 295 } 296 297 // The same check as in String.regionMatches(): 298 if (Character.toUpperCase(c1) != Character.toUpperCase(c2) 299 && Character.toLowerCase(c1) != Character.toLowerCase(c2)) { 300 return false; 301 } 302 } 303 304 return true; 305 } 306}