1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3;
18
19 /**
20 * Operations on {@link CharSequence} that are
21 * {@code null} safe.
22 *
23 * @see CharSequence
24 * @since 3.0
25 */
26 public class CharSequenceUtils {
27
28 private static final int NOT_FOUND = -1;
29
30 static final int TO_STRING_LIMIT = 16;
31
32 private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) {
33 for (int i = 1, j = len2 - 1; i <= j; i++, j--) {
34 if (cs.charAt(start1 + i) != searchChar.charAt(i) || cs.charAt(start1 + j) != searchChar.charAt(j)) {
35 return false;
36 }
37 }
38 return true;
39 }
40
41 /**
42 * Used by the indexOf(CharSequence methods) as a green implementation of indexOf.
43 *
44 * @param cs the {@link CharSequence} to be processed
45 * @param searchChar the {@link CharSequence} to be searched for
46 * @param start the start index
47 * @return the index where the search sequence was found, or {@code -1} if there is no such occurrence.
48 */
49 static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) {
50 if (cs == null || searchChar == null) {
51 return StringUtils.INDEX_NOT_FOUND;
52 }
53 if (cs instanceof String) {
54 return ((String) cs).indexOf(searchChar.toString(), start);
55 }
56 if (cs instanceof StringBuilder) {
57 return ((StringBuilder) cs).indexOf(searchChar.toString(), start);
58 }
59 if (cs instanceof StringBuffer) {
60 return ((StringBuffer) cs).indexOf(searchChar.toString(), start);
61 }
62 return cs.toString().indexOf(searchChar.toString(), start);
63 // if (cs instanceof String && searchChar instanceof String) {
64 // // TODO: Do we assume searchChar is usually relatively small;
65 // // If so then calling toString() on it is better than reverting to
66 // // the green implementation in the else block
67 // return ((String) cs).indexOf((String) searchChar, start);
68 // } else {
69 // // TODO: Implement rather than convert to String
70 // return cs.toString().indexOf(searchChar.toString(), start);
71 // }
72 }
73
74 /**
75 * Returns the index within {@code cs} of the first occurrence of the specified character, starting the search at the specified index.
76 * <p>
77 * If a character with value {@code searchChar} occurs in the character sequence represented by the {@code cs} object at an index no smaller than
78 * {@code start}, then the index of the first such occurrence is returned. For values of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), this
79 * is the smallest value <em>k</em> such that:
80 * </p>
81 *
82 * <pre>
83 * (this.charAt(<em>k</em>) == searchChar) && (<em>k</em> >= start)
84 * </pre>
85 * <p>
86 * is true. For other values of {@code searchChar}, it is the smallest value <em>k</em> such that:
87 * </p>
88 *
89 * <pre>
90 * (this.codePointAt(<em>k</em>) == searchChar) && (<em>k</em> >= start)
91 * </pre>
92 * <p>
93 * is true. In either case, if no such character occurs inm {@code cs} at or after position {@code start}, then {@code -1} is returned.
94 * </p>
95 * <p>
96 * There is no restriction on the value of {@code start}. If it is negative, it has the same effect as if it were zero: the entire {@link CharSequence} may
97 * be searched. If it is greater than the length of {@code cs}, it has the same effect as if it were equal to the length of {@code cs}: {@code -1} is
98 * returned.
99 * </p>
100 * <p>
101 * All indices are specified in {@code char} values (Unicode code units).
102 * </p>
103 *
104 * @param cs the {@link CharSequence} to be processed, not null
105 * @param searchChar the char to be searched for
106 * @param start the start index, negative starts at the string start
107 * @return the index where the search char was found, -1 if not found
108 * @since 3.6 updated to behave more like {@link String}
109 */
110 static int indexOf(final CharSequence cs, final int searchChar, int start) {
111 if (cs instanceof String) {
112 return ((String) cs).indexOf(searchChar, start);
113 }
114 final int sz = cs.length();
115 if (start < 0) {
116 start = 0;
117 }
118 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
119 for (int i = start; i < sz; i++) {
120 if (cs.charAt(i) == searchChar) {
121 return i;
122 }
123 }
124 return NOT_FOUND;
125 }
126 //supplementary characters (LANG1300)
127 if (searchChar <= Character.MAX_CODE_POINT) {
128 final char[] chars = Character.toChars(searchChar);
129 for (int i = start; i < sz - 1; i++) {
130 final char high = cs.charAt(i);
131 final char low = cs.charAt(i + 1);
132 if (high == chars[0] && low == chars[1]) {
133 return i;
134 }
135 }
136 }
137 return NOT_FOUND;
138 }
139
140 /**
141 * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf
142 *
143 * @param cs the {@link CharSequence} to be processed
144 * @param searchChar the {@link CharSequence} to find
145 * @param start the start index
146 * @return the index where the search sequence was found
147 */
148 static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) {
149 if (searchChar == null || cs == null) {
150 return NOT_FOUND;
151 }
152 if (searchChar instanceof String) {
153 if (cs instanceof String) {
154 return ((String) cs).lastIndexOf((String) searchChar, start);
155 }
156 if (cs instanceof StringBuilder) {
157 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start);
158 }
159 if (cs instanceof StringBuffer) {
160 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start);
161 }
162 }
163
164 final int len1 = cs.length();
165 final int len2 = searchChar.length();
166
167 if (start > len1) {
168 start = len1;
169 }
170
171 if (start < 0 || len2 > len1) {
172 return NOT_FOUND;
173 }
174
175 if (len2 == 0) {
176 return start;
177 }
178
179 if (len2 <= TO_STRING_LIMIT) {
180 if (cs instanceof String) {
181 return ((String) cs).lastIndexOf(searchChar.toString(), start);
182 }
183 if (cs instanceof StringBuilder) {
184 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start);
185 }
186 if (cs instanceof StringBuffer) {
187 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start);
188 }
189 }
190
191 if (start + len2 > len1) {
192 start = len1 - len2;
193 }
194
195 final char char0 = searchChar.charAt(0);
196
197 int i = start;
198 while (true) {
199 while (cs.charAt(i) != char0) {
200 i--;
201 if (i < 0) {
202 return NOT_FOUND;
203 }
204 }
205 if (checkLaterThan1(cs, searchChar, len2, i)) {
206 return i;
207 }
208 i--;
209 if (i < 0) {
210 return NOT_FOUND;
211 }
212 }
213 }
214
215 /**
216 * Returns the index within {@code cs} of the last occurrence of the specified character, searching backward starting at the specified index. For values of
217 * {@code searchChar} in the range from 0 to 0xFFFF (inclusive), the index returned is the largest value <em>k</em> such that:
218 * </p>
219 *
220 * <pre>
221 * (this.charAt(<em>k</em>) == searchChar) && (<em>k</em> <= start)
222 * </pre>
223 *
224 * <p>
225 * is true. For other values of {@code searchChar}, it is the largest value <em>k</em> such that:
226 * <p>
227 *
228 * <pre>
229 * (this.codePointAt(<em>k</em>) == searchChar) && (<em>k</em> <= start)
230 * </pre>
231 *
232 * <p>
233 * is true. In either case, if no such character occurs in {@code cs} at or before position {@code start}, then {@code -1} is returned.
234 * </p>
235 * <p>
236 * All indices are specified in {@code char} values (Unicode code units).
237 * </p>
238 *
239 * @param cs the {@link CharSequence} to be processed.
240 * @param searchChar the char to be searched for.
241 * @param start the start index, negative returns -1, beyond length starts at end.
242 * @return the index where the search char was found, -1 if not found.
243 * @since 3.6 updated to behave more like {@link String}
244 */
245 static int lastIndexOf(final CharSequence cs, final int searchChar, int start) {
246 if (cs instanceof String) {
247 return ((String) cs).lastIndexOf(searchChar, start);
248 }
249 final int sz = cs.length();
250 if (start < 0) {
251 return NOT_FOUND;
252 }
253 if (start >= sz) {
254 start = sz - 1;
255 }
256 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
257 for (int i = start; i >= 0; --i) {
258 if (cs.charAt(i) == searchChar) {
259 return i;
260 }
261 }
262 return NOT_FOUND;
263 }
264 //supplementary characters (LANG1300)
265 //NOTE - we must do a forward traversal for this to avoid duplicating code points
266 if (searchChar <= Character.MAX_CODE_POINT) {
267 final char[] chars = Character.toChars(searchChar);
268 //make sure it's not the last index
269 if (start == sz - 1) {
270 return NOT_FOUND;
271 }
272 for (int i = start; i >= 0; i--) {
273 final char high = cs.charAt(i);
274 final char low = cs.charAt(i + 1);
275 if (chars[0] == high && chars[1] == low) {
276 return i;
277 }
278 }
279 }
280 return NOT_FOUND;
281 }
282
283 /**
284 * Green implementation of regionMatches.
285 *
286 * @param cs the {@link CharSequence} to be processed
287 * @param ignoreCase whether or not to be case-insensitive
288 * @param thisStart the index to start on the {@code cs} CharSequence
289 * @param substring the {@link CharSequence} to be looked for
290 * @param start the index to start on the {@code substring} CharSequence
291 * @param length character length of the region
292 * @return whether the region matched
293 * @see String#regionMatches(boolean, int, String, int, int)
294 */
295 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart,
296 final CharSequence substring, final int start, final int length) {
297 if (cs instanceof String && substring instanceof String) {
298 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length);
299 }
300 int index1 = thisStart;
301 int index2 = start;
302 int tmpLen = length;
303
304 // Extract these first so we detect NPEs the same as the java.lang.String version
305 final int srcLen = cs.length() - thisStart;
306 final int otherLen = substring.length() - start;
307
308 // Check for invalid parameters
309 if (thisStart < 0 || start < 0 || length < 0) {
310 return false;
311 }
312
313 // Check that the regions are long enough
314 if (srcLen < length || otherLen < length) {
315 return false;
316 }
317
318 while (tmpLen-- > 0) {
319 final char c1 = cs.charAt(index1++);
320 final char c2 = substring.charAt(index2++);
321
322 if (c1 == c2) {
323 continue;
324 }
325
326 if (!ignoreCase) {
327 return false;
328 }
329
330 // The real same check as in String#regionMatches(boolean, int, String, int, int):
331 final char u1 = Character.toUpperCase(c1);
332 final char u2 = Character.toUpperCase(c2);
333 if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) {
334 return false;
335 }
336 }
337
338 return true;
339 }
340
341 /**
342 * Returns a new {@link CharSequence} that is a subsequence of this
343 * sequence starting with the {@code char} value at the specified index.
344 *
345 * <p>This provides the {@link CharSequence} equivalent to {@link String#substring(int)}.
346 * The length (in {@code char}) of the returned sequence is {@code length() - start},
347 * so if {@code start == end} then an empty sequence is returned.</p>
348 *
349 * @param cs the specified subsequence, null returns null
350 * @param start the start index, inclusive, valid
351 * @return a new subsequence, may be null
352 * @throws IndexOutOfBoundsException if {@code start} is negative or if
353 * {@code start} is greater than {@code length()}
354 */
355 public static CharSequence subSequence(final CharSequence cs, final int start) {
356 return cs == null ? null : cs.subSequence(start, cs.length());
357 }
358
359 /**
360 * Converts the given CharSequence to a char[].
361 *
362 * @param source the {@link CharSequence} to be processed.
363 * @return the resulting char array, never null.
364 * @since 3.11
365 */
366 public static char[] toCharArray(final CharSequence source) {
367 final int len = StringUtils.length(source);
368 if (len == 0) {
369 return ArrayUtils.EMPTY_CHAR_ARRAY;
370 }
371 if (source instanceof String) {
372 return ((String) source).toCharArray();
373 }
374 final char[] array = new char[len];
375 for (int i = 0; i < len; i++) {
376 array[i] = source.charAt(i);
377 }
378 return array;
379 }
380
381 /**
382 * {@link CharSequenceUtils} instances should NOT be constructed in
383 * standard programming.
384 *
385 * <p>This constructor is public to permit tools that require a JavaBean
386 * instance to operate.</p>
387 *
388 * @deprecated TODO Make private in 4.0.
389 */
390 @Deprecated
391 public CharSequenceUtils() {
392 // empty
393 }
394 }