1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3;
18
19 import java.io.UnsupportedEncodingException;
20 import java.text.Normalizer;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Iterator;
24 import java.util.List;
25 import java.util.Locale;
26 import java.util.regex.Pattern;
27
28 /**
29 * <p>Operations on {@link java.lang.String} that are
30 * {@code null} safe.</p>
31 *
32 * <ul>
33 * <li><b>IsEmpty/IsBlank</b>
34 * - checks if a String contains text</li>
35 * <li><b>Trim/Strip</b>
36 * - removes leading and trailing whitespace</li>
37 * <li><b>Equals</b>
38 * - compares two strings null-safe</li>
39 * <li><b>startsWith</b>
40 * - check if a String starts with a prefix null-safe</li>
41 * <li><b>endsWith</b>
42 * - check if a String ends with a suffix null-safe</li>
43 * <li><b>IndexOf/LastIndexOf/Contains</b>
44 * - null-safe index-of checks
45 * <li><b>IndexOfAny/LastIndexOfAny/IndexOfAnyBut/LastIndexOfAnyBut</b>
46 * - index-of any of a set of Strings</li>
47 * <li><b>ContainsOnly/ContainsNone/ContainsAny</b>
48 * - does String contains only/none/any of these characters</li>
49 * <li><b>Substring/Left/Right/Mid</b>
50 * - null-safe substring extractions</li>
51 * <li><b>SubstringBefore/SubstringAfter/SubstringBetween</b>
52 * - substring extraction relative to other strings</li>
53 * <li><b>Split/Join</b>
54 * - splits a String into an array of substrings and vice versa</li>
55 * <li><b>Remove/Delete</b>
56 * - removes part of a String</li>
57 * <li><b>Replace/Overlay</b>
58 * - Searches a String and replaces one String with another</li>
59 * <li><b>Chomp/Chop</b>
60 * - removes the last part of a String</li>
61 * <li><b>LeftPad/RightPad/Center/Repeat</b>
62 * - pads a String</li>
63 * <li><b>UpperCase/LowerCase/SwapCase/Capitalize/Uncapitalize</b>
64 * - changes the case of a String</li>
65 * <li><b>CountMatches</b>
66 * - counts the number of occurrences of one String in another</li>
67 * <li><b>IsAlpha/IsNumeric/IsWhitespace/IsAsciiPrintable</b>
68 * - checks the characters in a String</li>
69 * <li><b>DefaultString</b>
70 * - protects against a null input String</li>
71 * <li><b>Reverse/ReverseDelimited</b>
72 * - reverses a String</li>
73 * <li><b>Abbreviate</b>
74 * - abbreviates a string using ellipsis</li>
75 * <li><b>Difference</b>
76 * - compares Strings and reports on their differences</li>
77 * <li><b>LevenshteinDistance</b>
78 * - the number of changes needed to change one String into another</li>
79 * </ul>
80 *
81 * <p>The {@code StringUtils} class defines certain words related to
82 * String handling.</p>
83 *
84 * <ul>
85 * <li>null - {@code null}</li>
86 * <li>empty - a zero-length string ({@code ""})</li>
87 * <li>space - the space character ({@code ' '}, char 32)</li>
88 * <li>whitespace - the characters defined by {@link Character#isWhitespace(char)}</li>
89 * <li>trim - the characters <= 32 as in {@link String#trim()}</li>
90 * </ul>
91 *
92 * <p>{@code StringUtils} handles {@code null} input Strings quietly.
93 * That is to say that a {@code null} input will return {@code null}.
94 * Where a {@code boolean} or {@code int} is being returned
95 * details vary by method.</p>
96 *
97 * <p>A side effect of the {@code null} handling is that a
98 * {@code NullPointerException} should be considered a bug in
99 * {@code StringUtils}.</p>
100 *
101 * <p>Methods in this class give sample code to explain their operation.
102 * The symbol {@code *} is used to indicate any input including {@code null}.</p>
103 *
104 * <p>#ThreadSafe#</p>
105 * @see java.lang.String
106 * @since 1.0
107 * @version $Id: StringUtils.java 1437065 2013-01-22 17:29:33Z ggregory $
108 */
109 //@Immutable
110 public class StringUtils {
111 // Performance testing notes (JDK 1.4, Jul03, scolebourne)
112 // Whitespace:
113 // Character.isWhitespace() is faster than WHITESPACE.indexOf()
114 // where WHITESPACE is a string of all whitespace characters
115 //
116 // Character access:
117 // String.charAt(n) versus toCharArray(), then array[n]
118 // String.charAt(n) is about 15% worse for a 10K string
119 // They are about equal for a length 50 string
120 // String.charAt(n) is about 4 times better for a length 3 string
121 // String.charAt(n) is best bet overall
122 //
123 // Append:
124 // String.concat about twice as fast as StringBuffer.append
125 // (not sure who tested this)
126
127 /**
128 * A String for a space character.
129 *
130 * @since 3.2
131 */
132 public static final String SPACE = " ";
133
134 /**
135 * The empty String {@code ""}.
136 * @since 2.0
137 */
138 public static final String EMPTY = "";
139
140 /**
141 * Represents a failed index search.
142 * @since 2.1
143 */
144 public static final int INDEX_NOT_FOUND = -1;
145
146 /**
147 * <p>The maximum size to which the padding constant(s) can expand.</p>
148 */
149 private static final int PAD_LIMIT = 8192;
150
151 /**
152 * A regex pattern for recognizing blocks of whitespace characters.
153 * The apparent convolutedness of the pattern serves the purpose of
154 * ignoring "blocks" consisting of only a single space: the pattern
155 * is used only to normalize whitespace, condensing "blocks" down to a
156 * single space, thus matching the same would likely cause a great
157 * many noop replacements.
158 */
159 private static final Pattern WHITESPACE_PATTERN = Pattern.compile("(?: \\s|[\\s&&[^ ]])\\s*");
160
161 /**
162 * <p>{@code StringUtils} instances should NOT be constructed in
163 * standard programming. Instead, the class should be used as
164 * {@code StringUtils.trim(" foo ");}.</p>
165 *
166 * <p>This constructor is public to permit tools that require a JavaBean
167 * instance to operate.</p>
168 */
169 public StringUtils() {
170 super();
171 }
172
173 // Empty checks
174 //-----------------------------------------------------------------------
175 /**
176 * <p>Checks if a CharSequence is empty ("") or null.</p>
177 *
178 * <pre>
179 * StringUtils.isEmpty(null) = true
180 * StringUtils.isEmpty("") = true
181 * StringUtils.isEmpty(" ") = false
182 * StringUtils.isEmpty("bob") = false
183 * StringUtils.isEmpty(" bob ") = false
184 * </pre>
185 *
186 * <p>NOTE: This method changed in Lang version 2.0.
187 * It no longer trims the CharSequence.
188 * That functionality is available in isBlank().</p>
189 *
190 * @param cs the CharSequence to check, may be null
191 * @return {@code true} if the CharSequence is empty or null
192 * @since 3.0 Changed signature from isEmpty(String) to isEmpty(CharSequence)
193 */
194 public static boolean isEmpty(final CharSequence cs) {
195 return cs == null || cs.length() == 0;
196 }
197
198 /**
199 * <p>Checks if a CharSequence is not empty ("") and not null.</p>
200 *
201 * <pre>
202 * StringUtils.isNotEmpty(null) = false
203 * StringUtils.isNotEmpty("") = false
204 * StringUtils.isNotEmpty(" ") = true
205 * StringUtils.isNotEmpty("bob") = true
206 * StringUtils.isNotEmpty(" bob ") = true
207 * </pre>
208 *
209 * @param cs the CharSequence to check, may be null
210 * @return {@code true} if the CharSequence is not empty and not null
211 * @since 3.0 Changed signature from isNotEmpty(String) to isNotEmpty(CharSequence)
212 */
213 public static boolean isNotEmpty(final CharSequence cs) {
214 return !StringUtils.isEmpty(cs);
215 }
216
217 /**
218 * <p>Checks if a CharSequence is whitespace, empty ("") or null.</p>
219 *
220 * <pre>
221 * StringUtils.isBlank(null) = true
222 * StringUtils.isBlank("") = true
223 * StringUtils.isBlank(" ") = true
224 * StringUtils.isBlank("bob") = false
225 * StringUtils.isBlank(" bob ") = false
226 * </pre>
227 *
228 * @param cs the CharSequence to check, may be null
229 * @return {@code true} if the CharSequence is null, empty or whitespace
230 * @since 2.0
231 * @since 3.0 Changed signature from isBlank(String) to isBlank(CharSequence)
232 */
233 public static boolean isBlank(final CharSequence cs) {
234 int strLen;
235 if (cs == null || (strLen = cs.length()) == 0) {
236 return true;
237 }
238 for (int i = 0; i < strLen; i++) {
239 if (Character.isWhitespace(cs.charAt(i)) == false) {
240 return false;
241 }
242 }
243 return true;
244 }
245
246 /**
247 * <p>Checks if a CharSequence is not empty (""), not null and not whitespace only.</p>
248 *
249 * <pre>
250 * StringUtils.isNotBlank(null) = false
251 * StringUtils.isNotBlank("") = false
252 * StringUtils.isNotBlank(" ") = false
253 * StringUtils.isNotBlank("bob") = true
254 * StringUtils.isNotBlank(" bob ") = true
255 * </pre>
256 *
257 * @param cs the CharSequence to check, may be null
258 * @return {@code true} if the CharSequence is
259 * not empty and not null and not whitespace
260 * @since 2.0
261 * @since 3.0 Changed signature from isNotBlank(String) to isNotBlank(CharSequence)
262 */
263 public static boolean isNotBlank(final CharSequence cs) {
264 return !StringUtils.isBlank(cs);
265 }
266
267 // Trim
268 //-----------------------------------------------------------------------
269 /**
270 * <p>Removes control characters (char <= 32) from both
271 * ends of this String, handling {@code null} by returning
272 * {@code null}.</p>
273 *
274 * <p>The String is trimmed using {@link String#trim()}.
275 * Trim removes start and end characters <= 32.
276 * To strip whitespace use {@link #strip(String)}.</p>
277 *
278 * <p>To trim your choice of characters, use the
279 * {@link #strip(String, String)} methods.</p>
280 *
281 * <pre>
282 * StringUtils.trim(null) = null
283 * StringUtils.trim("") = ""
284 * StringUtils.trim(" ") = ""
285 * StringUtils.trim("abc") = "abc"
286 * StringUtils.trim(" abc ") = "abc"
287 * </pre>
288 *
289 * @param str the String to be trimmed, may be null
290 * @return the trimmed string, {@code null} if null String input
291 */
292 public static String trim(final String str) {
293 return str == null ? null : str.trim();
294 }
295
296 /**
297 * <p>Removes control characters (char <= 32) from both
298 * ends of this String returning {@code null} if the String is
299 * empty ("") after the trim or if it is {@code null}.
300 *
301 * <p>The String is trimmed using {@link String#trim()}.
302 * Trim removes start and end characters <= 32.
303 * To strip whitespace use {@link #stripToNull(String)}.</p>
304 *
305 * <pre>
306 * StringUtils.trimToNull(null) = null
307 * StringUtils.trimToNull("") = null
308 * StringUtils.trimToNull(" ") = null
309 * StringUtils.trimToNull("abc") = "abc"
310 * StringUtils.trimToNull(" abc ") = "abc"
311 * </pre>
312 *
313 * @param str the String to be trimmed, may be null
314 * @return the trimmed String,
315 * {@code null} if only chars <= 32, empty or null String input
316 * @since 2.0
317 */
318 public static String trimToNull(final String str) {
319 final String ts = trim(str);
320 return isEmpty(ts) ? null : ts;
321 }
322
323 /**
324 * <p>Removes control characters (char <= 32) from both
325 * ends of this String returning an empty String ("") if the String
326 * is empty ("") after the trim or if it is {@code null}.
327 *
328 * <p>The String is trimmed using {@link String#trim()}.
329 * Trim removes start and end characters <= 32.
330 * To strip whitespace use {@link #stripToEmpty(String)}.</p>
331 *
332 * <pre>
333 * StringUtils.trimToEmpty(null) = ""
334 * StringUtils.trimToEmpty("") = ""
335 * StringUtils.trimToEmpty(" ") = ""
336 * StringUtils.trimToEmpty("abc") = "abc"
337 * StringUtils.trimToEmpty(" abc ") = "abc"
338 * </pre>
339 *
340 * @param str the String to be trimmed, may be null
341 * @return the trimmed String, or an empty String if {@code null} input
342 * @since 2.0
343 */
344 public static String trimToEmpty(final String str) {
345 return str == null ? EMPTY : str.trim();
346 }
347
348 // Stripping
349 //-----------------------------------------------------------------------
350 /**
351 * <p>Strips whitespace from the start and end of a String.</p>
352 *
353 * <p>This is similar to {@link #trim(String)} but removes whitespace.
354 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
355 *
356 * <p>A {@code null} input String returns {@code null}.</p>
357 *
358 * <pre>
359 * StringUtils.strip(null) = null
360 * StringUtils.strip("") = ""
361 * StringUtils.strip(" ") = ""
362 * StringUtils.strip("abc") = "abc"
363 * StringUtils.strip(" abc") = "abc"
364 * StringUtils.strip("abc ") = "abc"
365 * StringUtils.strip(" abc ") = "abc"
366 * StringUtils.strip(" ab c ") = "ab c"
367 * </pre>
368 *
369 * @param str the String to remove whitespace from, may be null
370 * @return the stripped String, {@code null} if null String input
371 */
372 public static String strip(final String str) {
373 return strip(str, null);
374 }
375
376 /**
377 * <p>Strips whitespace from the start and end of a String returning
378 * {@code null} if the String is empty ("") after the strip.</p>
379 *
380 * <p>This is similar to {@link #trimToNull(String)} but removes whitespace.
381 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
382 *
383 * <pre>
384 * StringUtils.stripToNull(null) = null
385 * StringUtils.stripToNull("") = null
386 * StringUtils.stripToNull(" ") = null
387 * StringUtils.stripToNull("abc") = "abc"
388 * StringUtils.stripToNull(" abc") = "abc"
389 * StringUtils.stripToNull("abc ") = "abc"
390 * StringUtils.stripToNull(" abc ") = "abc"
391 * StringUtils.stripToNull(" ab c ") = "ab c"
392 * </pre>
393 *
394 * @param str the String to be stripped, may be null
395 * @return the stripped String,
396 * {@code null} if whitespace, empty or null String input
397 * @since 2.0
398 */
399 public static String stripToNull(String str) {
400 if (str == null) {
401 return null;
402 }
403 str = strip(str, null);
404 return str.length() == 0 ? null : str;
405 }
406
407 /**
408 * <p>Strips whitespace from the start and end of a String returning
409 * an empty String if {@code null} input.</p>
410 *
411 * <p>This is similar to {@link #trimToEmpty(String)} but removes whitespace.
412 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
413 *
414 * <pre>
415 * StringUtils.stripToEmpty(null) = ""
416 * StringUtils.stripToEmpty("") = ""
417 * StringUtils.stripToEmpty(" ") = ""
418 * StringUtils.stripToEmpty("abc") = "abc"
419 * StringUtils.stripToEmpty(" abc") = "abc"
420 * StringUtils.stripToEmpty("abc ") = "abc"
421 * StringUtils.stripToEmpty(" abc ") = "abc"
422 * StringUtils.stripToEmpty(" ab c ") = "ab c"
423 * </pre>
424 *
425 * @param str the String to be stripped, may be null
426 * @return the trimmed String, or an empty String if {@code null} input
427 * @since 2.0
428 */
429 public static String stripToEmpty(final String str) {
430 return str == null ? EMPTY : strip(str, null);
431 }
432
433 /**
434 * <p>Strips any of a set of characters from the start and end of a String.
435 * This is similar to {@link String#trim()} but allows the characters
436 * to be stripped to be controlled.</p>
437 *
438 * <p>A {@code null} input String returns {@code null}.
439 * An empty string ("") input returns the empty string.</p>
440 *
441 * <p>If the stripChars String is {@code null}, whitespace is
442 * stripped as defined by {@link Character#isWhitespace(char)}.
443 * Alternatively use {@link #strip(String)}.</p>
444 *
445 * <pre>
446 * StringUtils.strip(null, *) = null
447 * StringUtils.strip("", *) = ""
448 * StringUtils.strip("abc", null) = "abc"
449 * StringUtils.strip(" abc", null) = "abc"
450 * StringUtils.strip("abc ", null) = "abc"
451 * StringUtils.strip(" abc ", null) = "abc"
452 * StringUtils.strip(" abcyx", "xyz") = " abc"
453 * </pre>
454 *
455 * @param str the String to remove characters from, may be null
456 * @param stripChars the characters to remove, null treated as whitespace
457 * @return the stripped String, {@code null} if null String input
458 */
459 public static String strip(String str, final String stripChars) {
460 if (isEmpty(str)) {
461 return str;
462 }
463 str = stripStart(str, stripChars);
464 return stripEnd(str, stripChars);
465 }
466
467 /**
468 * <p>Strips any of a set of characters from the start of a String.</p>
469 *
470 * <p>A {@code null} input String returns {@code null}.
471 * An empty string ("") input returns the empty string.</p>
472 *
473 * <p>If the stripChars String is {@code null}, whitespace is
474 * stripped as defined by {@link Character#isWhitespace(char)}.</p>
475 *
476 * <pre>
477 * StringUtils.stripStart(null, *) = null
478 * StringUtils.stripStart("", *) = ""
479 * StringUtils.stripStart("abc", "") = "abc"
480 * StringUtils.stripStart("abc", null) = "abc"
481 * StringUtils.stripStart(" abc", null) = "abc"
482 * StringUtils.stripStart("abc ", null) = "abc "
483 * StringUtils.stripStart(" abc ", null) = "abc "
484 * StringUtils.stripStart("yxabc ", "xyz") = "abc "
485 * </pre>
486 *
487 * @param str the String to remove characters from, may be null
488 * @param stripChars the characters to remove, null treated as whitespace
489 * @return the stripped String, {@code null} if null String input
490 */
491 public static String stripStart(final String str, final String stripChars) {
492 int strLen;
493 if (str == null || (strLen = str.length()) == 0) {
494 return str;
495 }
496 int start = 0;
497 if (stripChars == null) {
498 while (start != strLen && Character.isWhitespace(str.charAt(start))) {
499 start++;
500 }
501 } else if (stripChars.length() == 0) {
502 return str;
503 } else {
504 while (start != strLen && stripChars.indexOf(str.charAt(start)) != INDEX_NOT_FOUND) {
505 start++;
506 }
507 }
508 return str.substring(start);
509 }
510
511 /**
512 * <p>Strips any of a set of characters from the end of a String.</p>
513 *
514 * <p>A {@code null} input String returns {@code null}.
515 * An empty string ("") input returns the empty string.</p>
516 *
517 * <p>If the stripChars String is {@code null}, whitespace is
518 * stripped as defined by {@link Character#isWhitespace(char)}.</p>
519 *
520 * <pre>
521 * StringUtils.stripEnd(null, *) = null
522 * StringUtils.stripEnd("", *) = ""
523 * StringUtils.stripEnd("abc", "") = "abc"
524 * StringUtils.stripEnd("abc", null) = "abc"
525 * StringUtils.stripEnd(" abc", null) = " abc"
526 * StringUtils.stripEnd("abc ", null) = "abc"
527 * StringUtils.stripEnd(" abc ", null) = " abc"
528 * StringUtils.stripEnd(" abcyx", "xyz") = " abc"
529 * StringUtils.stripEnd("120.00", ".0") = "12"
530 * </pre>
531 *
532 * @param str the String to remove characters from, may be null
533 * @param stripChars the set of characters to remove, null treated as whitespace
534 * @return the stripped String, {@code null} if null String input
535 */
536 public static String stripEnd(final String str, final String stripChars) {
537 int end;
538 if (str == null || (end = str.length()) == 0) {
539 return str;
540 }
541
542 if (stripChars == null) {
543 while (end != 0 && Character.isWhitespace(str.charAt(end - 1))) {
544 end--;
545 }
546 } else if (stripChars.length() == 0) {
547 return str;
548 } else {
549 while (end != 0 && stripChars.indexOf(str.charAt(end - 1)) != INDEX_NOT_FOUND) {
550 end--;
551 }
552 }
553 return str.substring(0, end);
554 }
555
556 // StripAll
557 //-----------------------------------------------------------------------
558 /**
559 * <p>Strips whitespace from the start and end of every String in an array.
560 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
561 *
562 * <p>A new array is returned each time, except for length zero.
563 * A {@code null} array will return {@code null}.
564 * An empty array will return itself.
565 * A {@code null} array entry will be ignored.</p>
566 *
567 * <pre>
568 * StringUtils.stripAll(null) = null
569 * StringUtils.stripAll([]) = []
570 * StringUtils.stripAll(["abc", " abc"]) = ["abc", "abc"]
571 * StringUtils.stripAll(["abc ", null]) = ["abc", null]
572 * </pre>
573 *
574 * @param strs the array to remove whitespace from, may be null
575 * @return the stripped Strings, {@code null} if null array input
576 */
577 public static String[] stripAll(final String... strs) {
578 return stripAll(strs, null);
579 }
580
581 /**
582 * <p>Strips any of a set of characters from the start and end of every
583 * String in an array.</p>
584 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
585 *
586 * <p>A new array is returned each time, except for length zero.
587 * A {@code null} array will return {@code null}.
588 * An empty array will return itself.
589 * A {@code null} array entry will be ignored.
590 * A {@code null} stripChars will strip whitespace as defined by
591 * {@link Character#isWhitespace(char)}.</p>
592 *
593 * <pre>
594 * StringUtils.stripAll(null, *) = null
595 * StringUtils.stripAll([], *) = []
596 * StringUtils.stripAll(["abc", " abc"], null) = ["abc", "abc"]
597 * StringUtils.stripAll(["abc ", null], null) = ["abc", null]
598 * StringUtils.stripAll(["abc ", null], "yz") = ["abc ", null]
599 * StringUtils.stripAll(["yabcz", null], "yz") = ["abc", null]
600 * </pre>
601 *
602 * @param strs the array to remove characters from, may be null
603 * @param stripChars the characters to remove, null treated as whitespace
604 * @return the stripped Strings, {@code null} if null array input
605 */
606 public static String[] stripAll(final String[] strs, final String stripChars) {
607 int strsLen;
608 if (strs == null || (strsLen = strs.length) == 0) {
609 return strs;
610 }
611 final String[] newArr = new String[strsLen];
612 for (int i = 0; i < strsLen; i++) {
613 newArr[i] = strip(strs[i], stripChars);
614 }
615 return newArr;
616 }
617
618 /**
619 * <p>Removes diacritics (~= accents) from a string. The case will not be altered.</p>
620 * <p>For instance, 'à' will be replaced by 'a'.</p>
621 * <p>Note that ligatures will be left as is.</p>
622 *
623 * <pre>
624 * StringUtils.stripAccents(null) = null
625 * StringUtils.stripAccents("") = ""
626 * StringUtils.stripAccents("control") = "control"
627 * StringUtils.stripAccents("éclair") = "eclair"
628 * </pre>
629 *
630 * @param input String to be stripped
631 * @return input text with diacritics removed
632 *
633 * @since 3.0
634 */
635 // See also Lucene's ASCIIFoldingFilter (Lucene 2.9) that replaces accented characters by their unaccented equivalent (and uncommitted bug fix: https://issues.apache.org/jira/browse/LUCENE-1343?focusedCommentId=12858907&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#action_12858907).
636 public static String stripAccents(final String input) {
637 if(input == null) {
638 return null;
639 }
640 final Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+");//$NON-NLS-1$
641 final String decomposed = Normalizer.normalize(input, Normalizer.Form.NFD);
642 // Note that this doesn't correctly remove ligatures...
643 return pattern.matcher(decomposed).replaceAll("");//$NON-NLS-1$
644 }
645
646 // Equals
647 //-----------------------------------------------------------------------
648 /**
649 * <p>Compares two CharSequences, returning {@code true} if they represent
650 * equal sequences of characters.</p>
651 *
652 * <p>{@code null}s are handled without exceptions. Two {@code null}
653 * references are considered to be equal. The comparison is case sensitive.</p>
654 *
655 * <pre>
656 * StringUtils.equals(null, null) = true
657 * StringUtils.equals(null, "abc") = false
658 * StringUtils.equals("abc", null) = false
659 * StringUtils.equals("abc", "abc") = true
660 * StringUtils.equals("abc", "ABC") = false
661 * </pre>
662 *
663 * @see Object#equals(Object)
664 * @param cs1 the first CharSequence, may be {@code null}
665 * @param cs2 the second CharSequence, may be {@code null}
666 * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
667 * @since 3.0 Changed signature from equals(String, String) to equals(CharSequence, CharSequence)
668 */
669 public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
670 if (cs1 == cs2) {
671 return true;
672 }
673 if (cs1 == null || cs2 == null) {
674 return false;
675 }
676 if (cs1 instanceof String && cs2 instanceof String) {
677 return cs1.equals(cs2);
678 }
679 return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length()));
680 }
681
682 /**
683 * <p>Compares two CharSequences, returning {@code true} if they represent
684 * equal sequences of characters, ignoring case.</p>
685 *
686 * <p>{@code null}s are handled without exceptions. Two {@code null}
687 * references are considered equal. Comparison is case insensitive.</p>
688 *
689 * <pre>
690 * StringUtils.equalsIgnoreCase(null, null) = true
691 * StringUtils.equalsIgnoreCase(null, "abc") = false
692 * StringUtils.equalsIgnoreCase("abc", null) = false
693 * StringUtils.equalsIgnoreCase("abc", "abc") = true
694 * StringUtils.equalsIgnoreCase("abc", "ABC") = true
695 * </pre>
696 *
697 * @param str1 the first CharSequence, may be null
698 * @param str2 the second CharSequence, may be null
699 * @return {@code true} if the CharSequence are equal, case insensitive, or
700 * both {@code null}
701 * @since 3.0 Changed signature from equalsIgnoreCase(String, String) to equalsIgnoreCase(CharSequence, CharSequence)
702 */
703 public static boolean equalsIgnoreCase(final CharSequence str1, final CharSequence str2) {
704 if (str1 == null || str2 == null) {
705 return str1 == str2;
706 } else if (str1 == str2) {
707 return true;
708 } else if (str1.length() != str2.length()) {
709 return false;
710 } else {
711 return CharSequenceUtils.regionMatches(str1, true, 0, str2, 0, str1.length());
712 }
713 }
714
715 // IndexOf
716 //-----------------------------------------------------------------------
717 /**
718 * <p>Finds the first index within a CharSequence, handling {@code null}.
719 * This method uses {@link String#indexOf(int, int)} if possible.</p>
720 *
721 * <p>A {@code null} or empty ("") CharSequence will return {@code INDEX_NOT_FOUND (-1)}.</p>
722 *
723 * <pre>
724 * StringUtils.indexOf(null, *) = -1
725 * StringUtils.indexOf("", *) = -1
726 * StringUtils.indexOf("aabaabaa", 'a') = 0
727 * StringUtils.indexOf("aabaabaa", 'b') = 2
728 * </pre>
729 *
730 * @param seq the CharSequence to check, may be null
731 * @param searchChar the character to find
732 * @return the first index of the search character,
733 * -1 if no match or {@code null} string input
734 * @since 2.0
735 * @since 3.0 Changed signature from indexOf(String, int) to indexOf(CharSequence, int)
736 */
737 public static int indexOf(final CharSequence seq, final int searchChar) {
738 if (isEmpty(seq)) {
739 return INDEX_NOT_FOUND;
740 }
741 return CharSequenceUtils.indexOf(seq, searchChar, 0);
742 }
743
744 /**
745 * <p>Finds the first index within a CharSequence from a start position,
746 * handling {@code null}.
747 * This method uses {@link String#indexOf(int, int)} if possible.</p>
748 *
749 * <p>A {@code null} or empty ("") CharSequence will return {@code (INDEX_NOT_FOUND) -1}.
750 * A negative start position is treated as zero.
751 * A start position greater than the string length returns {@code -1}.</p>
752 *
753 * <pre>
754 * StringUtils.indexOf(null, *, *) = -1
755 * StringUtils.indexOf("", *, *) = -1
756 * StringUtils.indexOf("aabaabaa", 'b', 0) = 2
757 * StringUtils.indexOf("aabaabaa", 'b', 3) = 5
758 * StringUtils.indexOf("aabaabaa", 'b', 9) = -1
759 * StringUtils.indexOf("aabaabaa", 'b', -1) = 2
760 * </pre>
761 *
762 * @param seq the CharSequence to check, may be null
763 * @param searchChar the character to find
764 * @param startPos the start position, negative treated as zero
765 * @return the first index of the search character,
766 * -1 if no match or {@code null} string input
767 * @since 2.0
768 * @since 3.0 Changed signature from indexOf(String, int, int) to indexOf(CharSequence, int, int)
769 */
770 public static int indexOf(final CharSequence seq, final int searchChar, final int startPos) {
771 if (isEmpty(seq)) {
772 return INDEX_NOT_FOUND;
773 }
774 return CharSequenceUtils.indexOf(seq, searchChar, startPos);
775 }
776
777 /**
778 * <p>Finds the first index within a CharSequence, handling {@code null}.
779 * This method uses {@link String#indexOf(String, int)} if possible.</p>
780 *
781 * <p>A {@code null} CharSequence will return {@code -1}.</p>
782 *
783 * <pre>
784 * StringUtils.indexOf(null, *) = -1
785 * StringUtils.indexOf(*, null) = -1
786 * StringUtils.indexOf("", "") = 0
787 * StringUtils.indexOf("", *) = -1 (except when * = "")
788 * StringUtils.indexOf("aabaabaa", "a") = 0
789 * StringUtils.indexOf("aabaabaa", "b") = 2
790 * StringUtils.indexOf("aabaabaa", "ab") = 1
791 * StringUtils.indexOf("aabaabaa", "") = 0
792 * </pre>
793 *
794 * @param seq the CharSequence to check, may be null
795 * @param searchSeq the CharSequence to find, may be null
796 * @return the first index of the search CharSequence,
797 * -1 if no match or {@code null} string input
798 * @since 2.0
799 * @since 3.0 Changed signature from indexOf(String, String) to indexOf(CharSequence, CharSequence)
800 */
801 public static int indexOf(final CharSequence seq, final CharSequence searchSeq) {
802 if (seq == null || searchSeq == null) {
803 return INDEX_NOT_FOUND;
804 }
805 return CharSequenceUtils.indexOf(seq, searchSeq, 0);
806 }
807
808 /**
809 * <p>Finds the first index within a CharSequence, handling {@code null}.
810 * This method uses {@link String#indexOf(String, int)} if possible.</p>
811 *
812 * <p>A {@code null} CharSequence will return {@code -1}.
813 * A negative start position is treated as zero.
814 * An empty ("") search CharSequence always matches.
815 * A start position greater than the string length only matches
816 * an empty search CharSequence.</p>
817 *
818 * <pre>
819 * StringUtils.indexOf(null, *, *) = -1
820 * StringUtils.indexOf(*, null, *) = -1
821 * StringUtils.indexOf("", "", 0) = 0
822 * StringUtils.indexOf("", *, 0) = -1 (except when * = "")
823 * StringUtils.indexOf("aabaabaa", "a", 0) = 0
824 * StringUtils.indexOf("aabaabaa", "b", 0) = 2
825 * StringUtils.indexOf("aabaabaa", "ab", 0) = 1
826 * StringUtils.indexOf("aabaabaa", "b", 3) = 5
827 * StringUtils.indexOf("aabaabaa", "b", 9) = -1
828 * StringUtils.indexOf("aabaabaa", "b", -1) = 2
829 * StringUtils.indexOf("aabaabaa", "", 2) = 2
830 * StringUtils.indexOf("abc", "", 9) = 3
831 * </pre>
832 *
833 * @param seq the CharSequence to check, may be null
834 * @param searchSeq the CharSequence to find, may be null
835 * @param startPos the start position, negative treated as zero
836 * @return the first index of the search CharSequence,
837 * -1 if no match or {@code null} string input
838 * @since 2.0
839 * @since 3.0 Changed signature from indexOf(String, String, int) to indexOf(CharSequence, CharSequence, int)
840 */
841 public static int indexOf(final CharSequence seq, final CharSequence searchSeq, final int startPos) {
842 if (seq == null || searchSeq == null) {
843 return INDEX_NOT_FOUND;
844 }
845 return CharSequenceUtils.indexOf(seq, searchSeq, startPos);
846 }
847
848 /**
849 * <p>Finds the n-th index within a CharSequence, handling {@code null}.
850 * This method uses {@link String#indexOf(String)} if possible.</p>
851 *
852 * <p>A {@code null} CharSequence will return {@code -1}.</p>
853 *
854 * <pre>
855 * StringUtils.ordinalIndexOf(null, *, *) = -1
856 * StringUtils.ordinalIndexOf(*, null, *) = -1
857 * StringUtils.ordinalIndexOf("", "", *) = 0
858 * StringUtils.ordinalIndexOf("aabaabaa", "a", 1) = 0
859 * StringUtils.ordinalIndexOf("aabaabaa", "a", 2) = 1
860 * StringUtils.ordinalIndexOf("aabaabaa", "b", 1) = 2
861 * StringUtils.ordinalIndexOf("aabaabaa", "b", 2) = 5
862 * StringUtils.ordinalIndexOf("aabaabaa", "ab", 1) = 1
863 * StringUtils.ordinalIndexOf("aabaabaa", "ab", 2) = 4
864 * StringUtils.ordinalIndexOf("aabaabaa", "", 1) = 0
865 * StringUtils.ordinalIndexOf("aabaabaa", "", 2) = 0
866 * </pre>
867 *
868 * <p>Note that 'head(CharSequence str, int n)' may be implemented as: </p>
869 *
870 * <pre>
871 * str.substring(0, lastOrdinalIndexOf(str, "\n", n))
872 * </pre>
873 *
874 * @param str the CharSequence to check, may be null
875 * @param searchStr the CharSequence to find, may be null
876 * @param ordinal the n-th {@code searchStr} to find
877 * @return the n-th index of the search CharSequence,
878 * {@code -1} ({@code INDEX_NOT_FOUND}) if no match or {@code null} string input
879 * @since 2.1
880 * @since 3.0 Changed signature from ordinalIndexOf(String, String, int) to ordinalIndexOf(CharSequence, CharSequence, int)
881 */
882 public static int ordinalIndexOf(final CharSequence str, final CharSequence searchStr, final int ordinal) {
883 return ordinalIndexOf(str, searchStr, ordinal, false);
884 }
885
886 /**
887 * <p>Finds the n-th index within a String, handling {@code null}.
888 * This method uses {@link String#indexOf(String)} if possible.</p>
889 *
890 * <p>A {@code null} CharSequence will return {@code -1}.</p>
891 *
892 * @param str the CharSequence to check, may be null
893 * @param searchStr the CharSequence to find, may be null
894 * @param ordinal the n-th {@code searchStr} to find
895 * @param lastIndex true if lastOrdinalIndexOf() otherwise false if ordinalIndexOf()
896 * @return the n-th index of the search CharSequence,
897 * {@code -1} ({@code INDEX_NOT_FOUND}) if no match or {@code null} string input
898 */
899 // Shared code between ordinalIndexOf(String,String,int) and lastOrdinalIndexOf(String,String,int)
900 private static int ordinalIndexOf(final CharSequence str, final CharSequence searchStr, final int ordinal, final boolean lastIndex) {
901 if (str == null || searchStr == null || ordinal <= 0) {
902 return INDEX_NOT_FOUND;
903 }
904 if (searchStr.length() == 0) {
905 return lastIndex ? str.length() : 0;
906 }
907 int found = 0;
908 int index = lastIndex ? str.length() : INDEX_NOT_FOUND;
909 do {
910 if (lastIndex) {
911 index = CharSequenceUtils.lastIndexOf(str, searchStr, index - 1);
912 } else {
913 index = CharSequenceUtils.indexOf(str, searchStr, index + 1);
914 }
915 if (index < 0) {
916 return index;
917 }
918 found++;
919 } while (found < ordinal);
920 return index;
921 }
922
923 /**
924 * <p>Case in-sensitive find of the first index within a CharSequence.</p>
925 *
926 * <p>A {@code null} CharSequence will return {@code -1}.
927 * A negative start position is treated as zero.
928 * An empty ("") search CharSequence always matches.
929 * A start position greater than the string length only matches
930 * an empty search CharSequence.</p>
931 *
932 * <pre>
933 * StringUtils.indexOfIgnoreCase(null, *) = -1
934 * StringUtils.indexOfIgnoreCase(*, null) = -1
935 * StringUtils.indexOfIgnoreCase("", "") = 0
936 * StringUtils.indexOfIgnoreCase("aabaabaa", "a") = 0
937 * StringUtils.indexOfIgnoreCase("aabaabaa", "b") = 2
938 * StringUtils.indexOfIgnoreCase("aabaabaa", "ab") = 1
939 * </pre>
940 *
941 * @param str the CharSequence to check, may be null
942 * @param searchStr the CharSequence to find, may be null
943 * @return the first index of the search CharSequence,
944 * -1 if no match or {@code null} string input
945 * @since 2.5
946 * @since 3.0 Changed signature from indexOfIgnoreCase(String, String) to indexOfIgnoreCase(CharSequence, CharSequence)
947 */
948 public static int indexOfIgnoreCase(final CharSequence str, final CharSequence searchStr) {
949 return indexOfIgnoreCase(str, searchStr, 0);
950 }
951
952 /**
953 * <p>Case in-sensitive find of the first index within a CharSequence
954 * from the specified position.</p>
955 *
956 * <p>A {@code null} CharSequence will return {@code -1}.
957 * A negative start position is treated as zero.
958 * An empty ("") search CharSequence always matches.
959 * A start position greater than the string length only matches
960 * an empty search CharSequence.</p>
961 *
962 * <pre>
963 * StringUtils.indexOfIgnoreCase(null, *, *) = -1
964 * StringUtils.indexOfIgnoreCase(*, null, *) = -1
965 * StringUtils.indexOfIgnoreCase("", "", 0) = 0
966 * StringUtils.indexOfIgnoreCase("aabaabaa", "A", 0) = 0
967 * StringUtils.indexOfIgnoreCase("aabaabaa", "B", 0) = 2
968 * StringUtils.indexOfIgnoreCase("aabaabaa", "AB", 0) = 1
969 * StringUtils.indexOfIgnoreCase("aabaabaa", "B", 3) = 5
970 * StringUtils.indexOfIgnoreCase("aabaabaa", "B", 9) = -1
971 * StringUtils.indexOfIgnoreCase("aabaabaa", "B", -1) = 2
972 * StringUtils.indexOfIgnoreCase("aabaabaa", "", 2) = 2
973 * StringUtils.indexOfIgnoreCase("abc", "", 9) = 3
974 * </pre>
975 *
976 * @param str the CharSequence to check, may be null
977 * @param searchStr the CharSequence to find, may be null
978 * @param startPos the start position, negative treated as zero
979 * @return the first index of the search CharSequence,
980 * -1 if no match or {@code null} string input
981 * @since 2.5
982 * @since 3.0 Changed signature from indexOfIgnoreCase(String, String, int) to indexOfIgnoreCase(CharSequence, CharSequence, int)
983 */
984 public static int indexOfIgnoreCase(final CharSequence str, final CharSequence searchStr, int startPos) {
985 if (str == null || searchStr == null) {
986 return INDEX_NOT_FOUND;
987 }
988 if (startPos < 0) {
989 startPos = 0;
990 }
991 final int endLimit = str.length() - searchStr.length() + 1;
992 if (startPos > endLimit) {
993 return INDEX_NOT_FOUND;
994 }
995 if (searchStr.length() == 0) {
996 return startPos;
997 }
998 for (int i = startPos; i < endLimit; i++) {
999 if (CharSequenceUtils.regionMatches(str, true, i, searchStr, 0, searchStr.length())) {
1000 return i;
1001 }
1002 }
1003 return INDEX_NOT_FOUND;
1004 }
1005
1006 // LastIndexOf
1007 //-----------------------------------------------------------------------
1008 /**
1009 * <p>Finds the last index within a CharSequence, handling {@code null}.
1010 * This method uses {@link String#lastIndexOf(int)} if possible.</p>
1011 *
1012 * <p>A {@code null} or empty ("") CharSequence will return {@code -1}.</p>
1013 *
1014 * <pre>
1015 * StringUtils.lastIndexOf(null, *) = -1
1016 * StringUtils.lastIndexOf("", *) = -1
1017 * StringUtils.lastIndexOf("aabaabaa", 'a') = 7
1018 * StringUtils.lastIndexOf("aabaabaa", 'b') = 5
1019 * </pre>
1020 *
1021 * @param seq the CharSequence to check, may be null
1022 * @param searchChar the character to find
1023 * @return the last index of the search character,
1024 * -1 if no match or {@code null} string input
1025 * @since 2.0
1026 * @since 3.0 Changed signature from lastIndexOf(String, int) to lastIndexOf(CharSequence, int)
1027 */
1028 public static int lastIndexOf(final CharSequence seq, final int searchChar) {
1029 if (isEmpty(seq)) {
1030 return INDEX_NOT_FOUND;
1031 }
1032 return CharSequenceUtils.lastIndexOf(seq, searchChar, seq.length());
1033 }
1034
1035 /**
1036 * <p>Finds the last index within a CharSequence from a start position,
1037 * handling {@code null}.
1038 * This method uses {@link String#lastIndexOf(int, int)} if possible.</p>
1039 *
1040 * <p>A {@code null} or empty ("") CharSequence will return {@code -1}.
1041 * A negative start position returns {@code -1}.
1042 * A start position greater than the string length searches the whole string.</p>
1043 *
1044 * <pre>
1045 * StringUtils.lastIndexOf(null, *, *) = -1
1046 * StringUtils.lastIndexOf("", *, *) = -1
1047 * StringUtils.lastIndexOf("aabaabaa", 'b', 8) = 5
1048 * StringUtils.lastIndexOf("aabaabaa", 'b', 4) = 2
1049 * StringUtils.lastIndexOf("aabaabaa", 'b', 0) = -1
1050 * StringUtils.lastIndexOf("aabaabaa", 'b', 9) = 5
1051 * StringUtils.lastIndexOf("aabaabaa", 'b', -1) = -1
1052 * StringUtils.lastIndexOf("aabaabaa", 'a', 0) = 0
1053 * </pre>
1054 *
1055 * @param seq the CharSequence to check, may be null
1056 * @param searchChar the character to find
1057 * @param startPos the start position
1058 * @return the last index of the search character,
1059 * -1 if no match or {@code null} string input
1060 * @since 2.0
1061 * @since 3.0 Changed signature from lastIndexOf(String, int, int) to lastIndexOf(CharSequence, int, int)
1062 */
1063 public static int lastIndexOf(final CharSequence seq, final int searchChar, final int startPos) {
1064 if (isEmpty(seq)) {
1065 return INDEX_NOT_FOUND;
1066 }
1067 return CharSequenceUtils.lastIndexOf(seq, searchChar, startPos);
1068 }
1069
1070 /**
1071 * <p>Finds the last index within a CharSequence, handling {@code null}.
1072 * This method uses {@link String#lastIndexOf(String)} if possible.</p>
1073 *
1074 * <p>A {@code null} CharSequence will return {@code -1}.</p>
1075 *
1076 * <pre>
1077 * StringUtils.lastIndexOf(null, *) = -1
1078 * StringUtils.lastIndexOf(*, null) = -1
1079 * StringUtils.lastIndexOf("", "") = 0
1080 * StringUtils.lastIndexOf("aabaabaa", "a") = 7
1081 * StringUtils.lastIndexOf("aabaabaa", "b") = 5
1082 * StringUtils.lastIndexOf("aabaabaa", "ab") = 4
1083 * StringUtils.lastIndexOf("aabaabaa", "") = 8
1084 * </pre>
1085 *
1086 * @param seq the CharSequence to check, may be null
1087 * @param searchSeq the CharSequence to find, may be null
1088 * @return the last index of the search String,
1089 * -1 if no match or {@code null} string input
1090 * @since 2.0
1091 * @since 3.0 Changed signature from lastIndexOf(String, String) to lastIndexOf(CharSequence, CharSequence)
1092 */
1093 public static int lastIndexOf(final CharSequence seq, final CharSequence searchSeq) {
1094 if (seq == null || searchSeq == null) {
1095 return INDEX_NOT_FOUND;
1096 }
1097 return CharSequenceUtils.lastIndexOf(seq, searchSeq, seq.length());
1098 }
1099
1100 /**
1101 * <p>Finds the n-th last index within a String, handling {@code null}.
1102 * This method uses {@link String#lastIndexOf(String)}.</p>
1103 *
1104 * <p>A {@code null} String will return {@code -1}.</p>
1105 *
1106 * <pre>
1107 * StringUtils.lastOrdinalIndexOf(null, *, *) = -1
1108 * StringUtils.lastOrdinalIndexOf(*, null, *) = -1
1109 * StringUtils.lastOrdinalIndexOf("", "", *) = 0
1110 * StringUtils.lastOrdinalIndexOf("aabaabaa", "a", 1) = 7
1111 * StringUtils.lastOrdinalIndexOf("aabaabaa", "a", 2) = 6
1112 * StringUtils.lastOrdinalIndexOf("aabaabaa", "b", 1) = 5
1113 * StringUtils.lastOrdinalIndexOf("aabaabaa", "b", 2) = 2
1114 * StringUtils.lastOrdinalIndexOf("aabaabaa", "ab", 1) = 4
1115 * StringUtils.lastOrdinalIndexOf("aabaabaa", "ab", 2) = 1
1116 * StringUtils.lastOrdinalIndexOf("aabaabaa", "", 1) = 8
1117 * StringUtils.lastOrdinalIndexOf("aabaabaa", "", 2) = 8
1118 * </pre>
1119 *
1120 * <p>Note that 'tail(CharSequence str, int n)' may be implemented as: </p>
1121 *
1122 * <pre>
1123 * str.substring(lastOrdinalIndexOf(str, "\n", n) + 1)
1124 * </pre>
1125 *
1126 * @param str the CharSequence to check, may be null
1127 * @param searchStr the CharSequence to find, may be null
1128 * @param ordinal the n-th last {@code searchStr} to find
1129 * @return the n-th last index of the search CharSequence,
1130 * {@code -1} ({@code INDEX_NOT_FOUND}) if no match or {@code null} string input
1131 * @since 2.5
1132 * @since 3.0 Changed signature from lastOrdinalIndexOf(String, String, int) to lastOrdinalIndexOf(CharSequence, CharSequence, int)
1133 */
1134 public static int lastOrdinalIndexOf(final CharSequence str, final CharSequence searchStr, final int ordinal) {
1135 return ordinalIndexOf(str, searchStr, ordinal, true);
1136 }
1137
1138 /**
1139 * <p>Finds the first index within a CharSequence, handling {@code null}.
1140 * This method uses {@link String#lastIndexOf(String, int)} if possible.</p>
1141 *
1142 * <p>A {@code null} CharSequence will return {@code -1}.
1143 * A negative start position returns {@code -1}.
1144 * An empty ("") search CharSequence always matches unless the start position is negative.
1145 * A start position greater than the string length searches the whole string.</p>
1146 *
1147 * <pre>
1148 * StringUtils.lastIndexOf(null, *, *) = -1
1149 * StringUtils.lastIndexOf(*, null, *) = -1
1150 * StringUtils.lastIndexOf("aabaabaa", "a", 8) = 7
1151 * StringUtils.lastIndexOf("aabaabaa", "b", 8) = 5
1152 * StringUtils.lastIndexOf("aabaabaa", "ab", 8) = 4
1153 * StringUtils.lastIndexOf("aabaabaa", "b", 9) = 5
1154 * StringUtils.lastIndexOf("aabaabaa", "b", -1) = -1
1155 * StringUtils.lastIndexOf("aabaabaa", "a", 0) = 0
1156 * StringUtils.lastIndexOf("aabaabaa", "b", 0) = -1
1157 * </pre>
1158 *
1159 * @param seq the CharSequence to check, may be null
1160 * @param searchSeq the CharSequence to find, may be null
1161 * @param startPos the start position, negative treated as zero
1162 * @return the first index of the search CharSequence,
1163 * -1 if no match or {@code null} string input
1164 * @since 2.0
1165 * @since 3.0 Changed signature from lastIndexOf(String, String, int) to lastIndexOf(CharSequence, CharSequence, int)
1166 */
1167 public static int lastIndexOf(final CharSequence seq, final CharSequence searchSeq, final int startPos) {
1168 if (seq == null || searchSeq == null) {
1169 return INDEX_NOT_FOUND;
1170 }
1171 return CharSequenceUtils.lastIndexOf(seq, searchSeq, startPos);
1172 }
1173
1174 /**
1175 * <p>Case in-sensitive find of the last index within a CharSequence.</p>
1176 *
1177 * <p>A {@code null} CharSequence will return {@code -1}.
1178 * A negative start position returns {@code -1}.
1179 * An empty ("") search CharSequence always matches unless the start position is negative.
1180 * A start position greater than the string length searches the whole string.</p>
1181 *
1182 * <pre>
1183 * StringUtils.lastIndexOfIgnoreCase(null, *) = -1
1184 * StringUtils.lastIndexOfIgnoreCase(*, null) = -1
1185 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "A") = 7
1186 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "B") = 5
1187 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "AB") = 4
1188 * </pre>
1189 *
1190 * @param str the CharSequence to check, may be null
1191 * @param searchStr the CharSequence to find, may be null
1192 * @return the first index of the search CharSequence,
1193 * -1 if no match or {@code null} string input
1194 * @since 2.5
1195 * @since 3.0 Changed signature from lastIndexOfIgnoreCase(String, String) to lastIndexOfIgnoreCase(CharSequence, CharSequence)
1196 */
1197 public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr) {
1198 if (str == null || searchStr == null) {
1199 return INDEX_NOT_FOUND;
1200 }
1201 return lastIndexOfIgnoreCase(str, searchStr, str.length());
1202 }
1203
1204 /**
1205 * <p>Case in-sensitive find of the last index within a CharSequence
1206 * from the specified position.</p>
1207 *
1208 * <p>A {@code null} CharSequence will return {@code -1}.
1209 * A negative start position returns {@code -1}.
1210 * An empty ("") search CharSequence always matches unless the start position is negative.
1211 * A start position greater than the string length searches the whole string.</p>
1212 *
1213 * <pre>
1214 * StringUtils.lastIndexOfIgnoreCase(null, *, *) = -1
1215 * StringUtils.lastIndexOfIgnoreCase(*, null, *) = -1
1216 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "A", 8) = 7
1217 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "B", 8) = 5
1218 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "AB", 8) = 4
1219 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "B", 9) = 5
1220 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "B", -1) = -1
1221 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "A", 0) = 0
1222 * StringUtils.lastIndexOfIgnoreCase("aabaabaa", "B", 0) = -1
1223 * </pre>
1224 *
1225 * @param str the CharSequence to check, may be null
1226 * @param searchStr the CharSequence to find, may be null
1227 * @param startPos the start position
1228 * @return the first index of the search CharSequence,
1229 * -1 if no match or {@code null} input
1230 * @since 2.5
1231 * @since 3.0 Changed signature from lastIndexOfIgnoreCase(String, String, int) to lastIndexOfIgnoreCase(CharSequence, CharSequence, int)
1232 */
1233 public static int lastIndexOfIgnoreCase(final CharSequence str, final CharSequence searchStr, int startPos) {
1234 if (str == null || searchStr == null) {
1235 return INDEX_NOT_FOUND;
1236 }
1237 if (startPos > str.length() - searchStr.length()) {
1238 startPos = str.length() - searchStr.length();
1239 }
1240 if (startPos < 0) {
1241 return INDEX_NOT_FOUND;
1242 }
1243 if (searchStr.length() == 0) {
1244 return startPos;
1245 }
1246
1247 for (int i = startPos; i >= 0; i--) {
1248 if (CharSequenceUtils.regionMatches(str, true, i, searchStr, 0, searchStr.length())) {
1249 return i;
1250 }
1251 }
1252 return INDEX_NOT_FOUND;
1253 }
1254
1255 // Contains
1256 //-----------------------------------------------------------------------
1257 /**
1258 * <p>Checks if CharSequence contains a search character, handling {@code null}.
1259 * This method uses {@link String#indexOf(int)} if possible.</p>
1260 *
1261 * <p>A {@code null} or empty ("") CharSequence will return {@code false}.</p>
1262 *
1263 * <pre>
1264 * StringUtils.contains(null, *) = false
1265 * StringUtils.contains("", *) = false
1266 * StringUtils.contains("abc", 'a') = true
1267 * StringUtils.contains("abc", 'z') = false
1268 * </pre>
1269 *
1270 * @param seq the CharSequence to check, may be null
1271 * @param searchChar the character to find
1272 * @return true if the CharSequence contains the search character,
1273 * false if not or {@code null} string input
1274 * @since 2.0
1275 * @since 3.0 Changed signature from contains(String, int) to contains(CharSequence, int)
1276 */
1277 public static boolean contains(final CharSequence seq, final int searchChar) {
1278 if (isEmpty(seq)) {
1279 return false;
1280 }
1281 return CharSequenceUtils.indexOf(seq, searchChar, 0) >= 0;
1282 }
1283
1284 /**
1285 * <p>Checks if CharSequence contains a search CharSequence, handling {@code null}.
1286 * This method uses {@link String#indexOf(String)} if possible.</p>
1287 *
1288 * <p>A {@code null} CharSequence will return {@code false}.</p>
1289 *
1290 * <pre>
1291 * StringUtils.contains(null, *) = false
1292 * StringUtils.contains(*, null) = false
1293 * StringUtils.contains("", "") = true
1294 * StringUtils.contains("abc", "") = true
1295 * StringUtils.contains("abc", "a") = true
1296 * StringUtils.contains("abc", "z") = false
1297 * </pre>
1298 *
1299 * @param seq the CharSequence to check, may be null
1300 * @param searchSeq the CharSequence to find, may be null
1301 * @return true if the CharSequence contains the search CharSequence,
1302 * false if not or {@code null} string input
1303 * @since 2.0
1304 * @since 3.0 Changed signature from contains(String, String) to contains(CharSequence, CharSequence)
1305 */
1306 public static boolean contains(final CharSequence seq, final CharSequence searchSeq) {
1307 if (seq == null || searchSeq == null) {
1308 return false;
1309 }
1310 return CharSequenceUtils.indexOf(seq, searchSeq, 0) >= 0;
1311 }
1312
1313 /**
1314 * <p>Checks if CharSequence contains a search CharSequence irrespective of case,
1315 * handling {@code null}. Case-insensitivity is defined as by
1316 * {@link String#equalsIgnoreCase(String)}.
1317 *
1318 * <p>A {@code null} CharSequence will return {@code false}.</p>
1319 *
1320 * <pre>
1321 * StringUtils.contains(null, *) = false
1322 * StringUtils.contains(*, null) = false
1323 * StringUtils.contains("", "") = true
1324 * StringUtils.contains("abc", "") = true
1325 * StringUtils.contains("abc", "a") = true
1326 * StringUtils.contains("abc", "z") = false
1327 * StringUtils.contains("abc", "A") = true
1328 * StringUtils.contains("abc", "Z") = false
1329 * </pre>
1330 *
1331 * @param str the CharSequence to check, may be null
1332 * @param searchStr the CharSequence to find, may be null
1333 * @return true if the CharSequence contains the search CharSequence irrespective of
1334 * case or false if not or {@code null} string input
1335 * @since 3.0 Changed signature from containsIgnoreCase(String, String) to containsIgnoreCase(CharSequence, CharSequence)
1336 */
1337 public static boolean containsIgnoreCase(final CharSequence str, final CharSequence searchStr) {
1338 if (str == null || searchStr == null) {
1339 return false;
1340 }
1341 final int len = searchStr.length();
1342 final int max = str.length() - len;
1343 for (int i = 0; i <= max; i++) {
1344 if (CharSequenceUtils.regionMatches(str, true, i, searchStr, 0, len)) {
1345 return true;
1346 }
1347 }
1348 return false;
1349 }
1350
1351 /**
1352 * Check whether the given CharSequence contains any whitespace characters.
1353 * @param seq the CharSequence to check (may be {@code null})
1354 * @return {@code true} if the CharSequence is not empty and
1355 * contains at least 1 whitespace character
1356 * @see java.lang.Character#isWhitespace
1357 * @since 3.0
1358 */
1359 // From org.springframework.util.StringUtils, under Apache License 2.0
1360 public static boolean containsWhitespace(final CharSequence seq) {
1361 if (isEmpty(seq)) {
1362 return false;
1363 }
1364 final int strLen = seq.length();
1365 for (int i = 0; i < strLen; i++) {
1366 if (Character.isWhitespace(seq.charAt(i))) {
1367 return true;
1368 }
1369 }
1370 return false;
1371 }
1372
1373 // IndexOfAny chars
1374 //-----------------------------------------------------------------------
1375 /**
1376 * <p>Search a CharSequence to find the first index of any
1377 * character in the given set of characters.</p>
1378 *
1379 * <p>A {@code null} String will return {@code -1}.
1380 * A {@code null} or zero length search array will return {@code -1}.</p>
1381 *
1382 * <pre>
1383 * StringUtils.indexOfAny(null, *) = -1
1384 * StringUtils.indexOfAny("", *) = -1
1385 * StringUtils.indexOfAny(*, null) = -1
1386 * StringUtils.indexOfAny(*, []) = -1
1387 * StringUtils.indexOfAny("zzabyycdxx",['z','a']) = 0
1388 * StringUtils.indexOfAny("zzabyycdxx",['b','y']) = 3
1389 * StringUtils.indexOfAny("aba", ['z']) = -1
1390 * </pre>
1391 *
1392 * @param cs the CharSequence to check, may be null
1393 * @param searchChars the chars to search for, may be null
1394 * @return the index of any of the chars, -1 if no match or null input
1395 * @since 2.0
1396 * @since 3.0 Changed signature from indexOfAny(String, char[]) to indexOfAny(CharSequence, char...)
1397 */
1398 public static int indexOfAny(final CharSequence cs, final char... searchChars) {
1399 if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
1400 return INDEX_NOT_FOUND;
1401 }
1402 final int csLen = cs.length();
1403 final int csLast = csLen - 1;
1404 final int searchLen = searchChars.length;
1405 final int searchLast = searchLen - 1;
1406 for (int i = 0; i < csLen; i++) {
1407 final char ch = cs.charAt(i);
1408 for (int j = 0; j < searchLen; j++) {
1409 if (searchChars[j] == ch) {
1410 if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) {
1411 // ch is a supplementary character
1412 if (searchChars[j + 1] == cs.charAt(i + 1)) {
1413 return i;
1414 }
1415 } else {
1416 return i;
1417 }
1418 }
1419 }
1420 }
1421 return INDEX_NOT_FOUND;
1422 }
1423
1424 /**
1425 * <p>Search a CharSequence to find the first index of any
1426 * character in the given set of characters.</p>
1427 *
1428 * <p>A {@code null} String will return {@code -1}.
1429 * A {@code null} search string will return {@code -1}.</p>
1430 *
1431 * <pre>
1432 * StringUtils.indexOfAny(null, *) = -1
1433 * StringUtils.indexOfAny("", *) = -1
1434 * StringUtils.indexOfAny(*, null) = -1
1435 * StringUtils.indexOfAny(*, "") = -1
1436 * StringUtils.indexOfAny("zzabyycdxx", "za") = 0
1437 * StringUtils.indexOfAny("zzabyycdxx", "by") = 3
1438 * StringUtils.indexOfAny("aba","z") = -1
1439 * </pre>
1440 *
1441 * @param cs the CharSequence to check, may be null
1442 * @param searchChars the chars to search for, may be null
1443 * @return the index of any of the chars, -1 if no match or null input
1444 * @since 2.0
1445 * @since 3.0 Changed signature from indexOfAny(String, String) to indexOfAny(CharSequence, String)
1446 */
1447 public static int indexOfAny(final CharSequence cs, final String searchChars) {
1448 if (isEmpty(cs) || isEmpty(searchChars)) {
1449 return INDEX_NOT_FOUND;
1450 }
1451 return indexOfAny(cs, searchChars.toCharArray());
1452 }
1453
1454 // ContainsAny
1455 //-----------------------------------------------------------------------
1456 /**
1457 * <p>Checks if the CharSequence contains any character in the given
1458 * set of characters.</p>
1459 *
1460 * <p>A {@code null} CharSequence will return {@code false}.
1461 * A {@code null} or zero length search array will return {@code false}.</p>
1462 *
1463 * <pre>
1464 * StringUtils.containsAny(null, *) = false
1465 * StringUtils.containsAny("", *) = false
1466 * StringUtils.containsAny(*, null) = false
1467 * StringUtils.containsAny(*, []) = false
1468 * StringUtils.containsAny("zzabyycdxx",['z','a']) = true
1469 * StringUtils.containsAny("zzabyycdxx",['b','y']) = true
1470 * StringUtils.containsAny("aba", ['z']) = false
1471 * </pre>
1472 *
1473 * @param cs the CharSequence to check, may be null
1474 * @param searchChars the chars to search for, may be null
1475 * @return the {@code true} if any of the chars are found,
1476 * {@code false} if no match or null input
1477 * @since 2.4
1478 * @since 3.0 Changed signature from containsAny(String, char[]) to containsAny(CharSequence, char...)
1479 */
1480 public static boolean containsAny(final CharSequence cs, final char... searchChars) {
1481 if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
1482 return false;
1483 }
1484 final int csLength = cs.length();
1485 final int searchLength = searchChars.length;
1486 final int csLast = csLength - 1;
1487 final int searchLast = searchLength - 1;
1488 for (int i = 0; i < csLength; i++) {
1489 final char ch = cs.charAt(i);
1490 for (int j = 0; j < searchLength; j++) {
1491 if (searchChars[j] == ch) {
1492 if (Character.isHighSurrogate(ch)) {
1493 if (j == searchLast) {
1494 // missing low surrogate, fine, like String.indexOf(String)
1495 return true;
1496 }
1497 if (i < csLast && searchChars[j + 1] == cs.charAt(i + 1)) {
1498 return true;
1499 }
1500 } else {
1501 // ch is in the Basic Multilingual Plane
1502 return true;
1503 }
1504 }
1505 }
1506 }
1507 return false;
1508 }
1509
1510 /**
1511 * <p>
1512 * Checks if the CharSequence contains any character in the given set of characters.
1513 * </p>
1514 *
1515 * <p>
1516 * A {@code null} CharSequence will return {@code false}. A {@code null} search CharSequence will return
1517 * {@code false}.
1518 * </p>
1519 *
1520 * <pre>
1521 * StringUtils.containsAny(null, *) = false
1522 * StringUtils.containsAny("", *) = false
1523 * StringUtils.containsAny(*, null) = false
1524 * StringUtils.containsAny(*, "") = false
1525 * StringUtils.containsAny("zzabyycdxx", "za") = true
1526 * StringUtils.containsAny("zzabyycdxx", "by") = true
1527 * StringUtils.containsAny("aba","z") = false
1528 * </pre>
1529 *
1530 * @param cs
1531 * the CharSequence to check, may be null
1532 * @param searchChars
1533 * the chars to search for, may be null
1534 * @return the {@code true} if any of the chars are found, {@code false} if no match or null input
1535 * @since 2.4
1536 * @since 3.0 Changed signature from containsAny(String, String) to containsAny(CharSequence, CharSequence)
1537 */
1538 public static boolean containsAny(final CharSequence cs, final CharSequence searchChars) {
1539 if (searchChars == null) {
1540 return false;
1541 }
1542 return containsAny(cs, CharSequenceUtils.toCharArray(searchChars));
1543 }
1544
1545 // IndexOfAnyBut chars
1546 //-----------------------------------------------------------------------
1547 /**
1548 * <p>Searches a CharSequence to find the first index of any
1549 * character not in the given set of characters.</p>
1550 *
1551 * <p>A {@code null} CharSequence will return {@code -1}.
1552 * A {@code null} or zero length search array will return {@code -1}.</p>
1553 *
1554 * <pre>
1555 * StringUtils.indexOfAnyBut(null, *) = -1
1556 * StringUtils.indexOfAnyBut("", *) = -1
1557 * StringUtils.indexOfAnyBut(*, null) = -1
1558 * StringUtils.indexOfAnyBut(*, []) = -1
1559 * StringUtils.indexOfAnyBut("zzabyycdxx", new char[] {'z', 'a'} ) = 3
1560 * StringUtils.indexOfAnyBut("aba", new char[] {'z'} ) = 0
1561 * StringUtils.indexOfAnyBut("aba", new char[] {'a', 'b'} ) = -1
1562
1563 * </pre>
1564 *
1565 * @param cs the CharSequence to check, may be null
1566 * @param searchChars the chars to search for, may be null
1567 * @return the index of any of the chars, -1 if no match or null input
1568 * @since 2.0
1569 * @since 3.0 Changed signature from indexOfAnyBut(String, char[]) to indexOfAnyBut(CharSequence, char...)
1570 */
1571 public static int indexOfAnyBut(final CharSequence cs, final char... searchChars) {
1572 if (isEmpty(cs) || ArrayUtils.isEmpty(searchChars)) {
1573 return INDEX_NOT_FOUND;
1574 }
1575 final int csLen = cs.length();
1576 final int csLast = csLen - 1;
1577 final int searchLen = searchChars.length;
1578 final int searchLast = searchLen - 1;
1579 outer:
1580 for (int i = 0; i < csLen; i++) {
1581 final char ch = cs.charAt(i);
1582 for (int j = 0; j < searchLen; j++) {
1583 if (searchChars[j] == ch) {
1584 if (i < csLast && j < searchLast && Character.isHighSurrogate(ch)) {
1585 if (searchChars[j + 1] == cs.charAt(i + 1)) {
1586 continue outer;
1587 }
1588 } else {
1589 continue outer;
1590 }
1591 }
1592 }
1593 return i;
1594 }
1595 return INDEX_NOT_FOUND;
1596 }
1597
1598 /**
1599 * <p>Search a CharSequence to find the first index of any
1600 * character not in the given set of characters.</p>
1601 *
1602 * <p>A {@code null} CharSequence will return {@code -1}.
1603 * A {@code null} or empty search string will return {@code -1}.</p>
1604 *
1605 * <pre>
1606 * StringUtils.indexOfAnyBut(null, *) = -1
1607 * StringUtils.indexOfAnyBut("", *) = -1
1608 * StringUtils.indexOfAnyBut(*, null) = -1
1609 * StringUtils.indexOfAnyBut(*, "") = -1
1610 * StringUtils.indexOfAnyBut("zzabyycdxx", "za") = 3
1611 * StringUtils.indexOfAnyBut("zzabyycdxx", "") = -1
1612 * StringUtils.indexOfAnyBut("aba","ab") = -1
1613 * </pre>
1614 *
1615 * @param seq the CharSequence to check, may be null
1616 * @param searchChars the chars to search for, may be null
1617 * @return the index of any of the chars, -1 if no match or null input
1618 * @since 2.0
1619 * @since 3.0 Changed signature from indexOfAnyBut(String, String) to indexOfAnyBut(CharSequence, CharSequence)
1620 */
1621 public static int indexOfAnyBut(final CharSequence seq, final CharSequence searchChars) {
1622 if (isEmpty(seq) || isEmpty(searchChars)) {
1623 return INDEX_NOT_FOUND;
1624 }
1625 final int strLen = seq.length();
1626 for (int i = 0; i < strLen; i++) {
1627 final char ch = seq.charAt(i);
1628 final boolean chFound = CharSequenceUtils.indexOf(searchChars, ch, 0) >= 0;
1629 if (i + 1 < strLen && Character.isHighSurrogate(ch)) {
1630 final char ch2 = seq.charAt(i + 1);
1631 if (chFound && CharSequenceUtils.indexOf(searchChars, ch2, 0) < 0) {
1632 return i;
1633 }
1634 } else {
1635 if (!chFound) {
1636 return i;
1637 }
1638 }
1639 }
1640 return INDEX_NOT_FOUND;
1641 }
1642
1643 // ContainsOnly
1644 //-----------------------------------------------------------------------
1645 /**
1646 * <p>Checks if the CharSequence contains only certain characters.</p>
1647 *
1648 * <p>A {@code null} CharSequence will return {@code false}.
1649 * A {@code null} valid character array will return {@code false}.
1650 * An empty CharSequence (length()=0) always returns {@code true}.</p>
1651 *
1652 * <pre>
1653 * StringUtils.containsOnly(null, *) = false
1654 * StringUtils.containsOnly(*, null) = false
1655 * StringUtils.containsOnly("", *) = true
1656 * StringUtils.containsOnly("ab", '') = false
1657 * StringUtils.containsOnly("abab", 'abc') = true
1658 * StringUtils.containsOnly("ab1", 'abc') = false
1659 * StringUtils.containsOnly("abz", 'abc') = false
1660 * </pre>
1661 *
1662 * @param cs the String to check, may be null
1663 * @param valid an array of valid chars, may be null
1664 * @return true if it only contains valid chars and is non-null
1665 * @since 3.0 Changed signature from containsOnly(String, char[]) to containsOnly(CharSequence, char...)
1666 */
1667 public static boolean containsOnly(final CharSequence cs, final char... valid) {
1668 // All these pre-checks are to maintain API with an older version
1669 if (valid == null || cs == null) {
1670 return false;
1671 }
1672 if (cs.length() == 0) {
1673 return true;
1674 }
1675 if (valid.length == 0) {
1676 return false;
1677 }
1678 return indexOfAnyBut(cs, valid) == INDEX_NOT_FOUND;
1679 }
1680
1681 /**
1682 * <p>Checks if the CharSequence contains only certain characters.</p>
1683 *
1684 * <p>A {@code null} CharSequence will return {@code false}.
1685 * A {@code null} valid character String will return {@code false}.
1686 * An empty String (length()=0) always returns {@code true}.</p>
1687 *
1688 * <pre>
1689 * StringUtils.containsOnly(null, *) = false
1690 * StringUtils.containsOnly(*, null) = false
1691 * StringUtils.containsOnly("", *) = true
1692 * StringUtils.containsOnly("ab", "") = false
1693 * StringUtils.containsOnly("abab", "abc") = true
1694 * StringUtils.containsOnly("ab1", "abc") = false
1695 * StringUtils.containsOnly("abz", "abc") = false
1696 * </pre>
1697 *
1698 * @param cs the CharSequence to check, may be null
1699 * @param validChars a String of valid chars, may be null
1700 * @return true if it only contains valid chars and is non-null
1701 * @since 2.0
1702 * @since 3.0 Changed signature from containsOnly(String, String) to containsOnly(CharSequence, String)
1703 */
1704 public static boolean containsOnly(final CharSequence cs, final String validChars) {
1705 if (cs == null || validChars == null) {
1706 return false;
1707 }
1708 return containsOnly(cs, validChars.toCharArray());
1709 }
1710
1711 // ContainsNone
1712 //-----------------------------------------------------------------------
1713 /**
1714 * <p>Checks that the CharSequence does not contain certain characters.</p>
1715 *
1716 * <p>A {@code null} CharSequence will return {@code true}.
1717 * A {@code null} invalid character array will return {@code true}.
1718 * An empty CharSequence (length()=0) always returns true.</p>
1719 *
1720 * <pre>
1721 * StringUtils.containsNone(null, *) = true
1722 * StringUtils.containsNone(*, null) = true
1723 * StringUtils.containsNone("", *) = true
1724 * StringUtils.containsNone("ab", '') = true
1725 * StringUtils.containsNone("abab", 'xyz') = true
1726 * StringUtils.containsNone("ab1", 'xyz') = true
1727 * StringUtils.containsNone("abz", 'xyz') = false
1728 * </pre>
1729 *
1730 * @param cs the CharSequence to check, may be null
1731 * @param searchChars an array of invalid chars, may be null
1732 * @return true if it contains none of the invalid chars, or is null
1733 * @since 2.0
1734 * @since 3.0 Changed signature from containsNone(String, char[]) to containsNone(CharSequence, char...)
1735 */
1736 public static boolean containsNone(final CharSequence cs, final char... searchChars) {
1737 if (cs == null || searchChars == null) {
1738 return true;
1739 }
1740 final int csLen = cs.length();
1741 final int csLast = csLen - 1;
1742 final int searchLen = searchChars.length;
1743 final int searchLast = searchLen - 1;
1744 for (int i = 0; i < csLen; i++) {
1745 final char ch = cs.charAt(i);
1746 for (int j = 0; j < searchLen; j++) {
1747 if (searchChars[j] == ch) {
1748 if (Character.isHighSurrogate(ch)) {
1749 if (j == searchLast) {
1750 // missing low surrogate, fine, like String.indexOf(String)
1751 return false;
1752 }
1753 if (i < csLast && searchChars[j + 1] == cs.charAt(i + 1)) {
1754 return false;
1755 }
1756 } else {
1757 // ch is in the Basic Multilingual Plane
1758 return false;
1759 }
1760 }
1761 }
1762 }
1763 return true;
1764 }
1765
1766 /**
1767 * <p>Checks that the CharSequence does not contain certain characters.</p>
1768 *
1769 * <p>A {@code null} CharSequence will return {@code true}.
1770 * A {@code null} invalid character array will return {@code true}.
1771 * An empty String ("") always returns true.</p>
1772 *
1773 * <pre>
1774 * StringUtils.containsNone(null, *) = true
1775 * StringUtils.containsNone(*, null) = true
1776 * StringUtils.containsNone("", *) = true
1777 * StringUtils.containsNone("ab", "") = true
1778 * StringUtils.containsNone("abab", "xyz") = true
1779 * StringUtils.containsNone("ab1", "xyz") = true
1780 * StringUtils.containsNone("abz", "xyz") = false
1781 * </pre>
1782 *
1783 * @param cs the CharSequence to check, may be null
1784 * @param invalidChars a String of invalid chars, may be null
1785 * @return true if it contains none of the invalid chars, or is null
1786 * @since 2.0
1787 * @since 3.0 Changed signature from containsNone(String, String) to containsNone(CharSequence, String)
1788 */
1789 public static boolean containsNone(final CharSequence cs, final String invalidChars) {
1790 if (cs == null || invalidChars == null) {
1791 return true;
1792 }
1793 return containsNone(cs, invalidChars.toCharArray());
1794 }
1795
1796 // IndexOfAny strings
1797 //-----------------------------------------------------------------------
1798 /**
1799 * <p>Find the first index of any of a set of potential substrings.</p>
1800 *
1801 * <p>A {@code null} CharSequence will return {@code -1}.
1802 * A {@code null} or zero length search array will return {@code -1}.
1803 * A {@code null} search array entry will be ignored, but a search
1804 * array containing "" will return {@code 0} if {@code str} is not
1805 * null. This method uses {@link String#indexOf(String)} if possible.</p>
1806 *
1807 * <pre>
1808 * StringUtils.indexOfAny(null, *) = -1
1809 * StringUtils.indexOfAny(*, null) = -1
1810 * StringUtils.indexOfAny(*, []) = -1
1811 * StringUtils.indexOfAny("zzabyycdxx", ["ab","cd"]) = 2
1812 * StringUtils.indexOfAny("zzabyycdxx", ["cd","ab"]) = 2
1813 * StringUtils.indexOfAny("zzabyycdxx", ["mn","op"]) = -1
1814 * StringUtils.indexOfAny("zzabyycdxx", ["zab","aby"]) = 1
1815 * StringUtils.indexOfAny("zzabyycdxx", [""]) = 0
1816 * StringUtils.indexOfAny("", [""]) = 0
1817 * StringUtils.indexOfAny("", ["a"]) = -1
1818 * </pre>
1819 *
1820 * @param str the CharSequence to check, may be null
1821 * @param searchStrs the CharSequences to search for, may be null
1822 * @return the first index of any of the searchStrs in str, -1 if no match
1823 * @since 3.0 Changed signature from indexOfAny(String, String[]) to indexOfAny(CharSequence, CharSequence...)
1824 */
1825 public static int indexOfAny(final CharSequence str, final CharSequence... searchStrs) {
1826 if (str == null || searchStrs == null) {
1827 return INDEX_NOT_FOUND;
1828 }
1829 final int sz = searchStrs.length;
1830
1831 // String's can't have a MAX_VALUEth index.
1832 int ret = Integer.MAX_VALUE;
1833
1834 int tmp = 0;
1835 for (int i = 0; i < sz; i++) {
1836 final CharSequence search = searchStrs[i];
1837 if (search == null) {
1838 continue;
1839 }
1840 tmp = CharSequenceUtils.indexOf(str, search, 0);
1841 if (tmp == INDEX_NOT_FOUND) {
1842 continue;
1843 }
1844
1845 if (tmp < ret) {
1846 ret = tmp;
1847 }
1848 }
1849
1850 return ret == Integer.MAX_VALUE ? INDEX_NOT_FOUND : ret;
1851 }
1852
1853 /**
1854 * <p>Find the latest index of any of a set of potential substrings.</p>
1855 *
1856 * <p>A {@code null} CharSequence will return {@code -1}.
1857 * A {@code null} search array will return {@code -1}.
1858 * A {@code null} or zero length search array entry will be ignored,
1859 * but a search array containing "" will return the length of {@code str}
1860 * if {@code str} is not null. This method uses {@link String#indexOf(String)} if possible</p>
1861 *
1862 * <pre>
1863 * StringUtils.lastIndexOfAny(null, *) = -1
1864 * StringUtils.lastIndexOfAny(*, null) = -1
1865 * StringUtils.lastIndexOfAny(*, []) = -1
1866 * StringUtils.lastIndexOfAny(*, [null]) = -1
1867 * StringUtils.lastIndexOfAny("zzabyycdxx", ["ab","cd"]) = 6
1868 * StringUtils.lastIndexOfAny("zzabyycdxx", ["cd","ab"]) = 6
1869 * StringUtils.lastIndexOfAny("zzabyycdxx", ["mn","op"]) = -1
1870 * StringUtils.lastIndexOfAny("zzabyycdxx", ["mn","op"]) = -1
1871 * StringUtils.lastIndexOfAny("zzabyycdxx", ["mn",""]) = 10
1872 * </pre>
1873 *
1874 * @param str the CharSequence to check, may be null
1875 * @param searchStrs the CharSequences to search for, may be null
1876 * @return the last index of any of the CharSequences, -1 if no match
1877 * @since 3.0 Changed signature from lastIndexOfAny(String, String[]) to lastIndexOfAny(CharSequence, CharSequence)
1878 */
1879 public static int lastIndexOfAny(final CharSequence str, final CharSequence... searchStrs) {
1880 if (str == null || searchStrs == null) {
1881 return INDEX_NOT_FOUND;
1882 }
1883 final int sz = searchStrs.length;
1884 int ret = INDEX_NOT_FOUND;
1885 int tmp = 0;
1886 for (int i = 0; i < sz; i++) {
1887 final CharSequence search = searchStrs[i];
1888 if (search == null) {
1889 continue;
1890 }
1891 tmp = CharSequenceUtils.lastIndexOf(str, search, str.length());
1892 if (tmp > ret) {
1893 ret = tmp;
1894 }
1895 }
1896 return ret;
1897 }
1898
1899 // Substring
1900 //-----------------------------------------------------------------------
1901 /**
1902 * <p>Gets a substring from the specified String avoiding exceptions.</p>
1903 *
1904 * <p>A negative start position can be used to start {@code n}
1905 * characters from the end of the String.</p>
1906 *
1907 * <p>A {@code null} String will return {@code null}.
1908 * An empty ("") String will return "".</p>
1909 *
1910 * <pre>
1911 * StringUtils.substring(null, *) = null
1912 * StringUtils.substring("", *) = ""
1913 * StringUtils.substring("abc", 0) = "abc"
1914 * StringUtils.substring("abc", 2) = "c"
1915 * StringUtils.substring("abc", 4) = ""
1916 * StringUtils.substring("abc", -2) = "bc"
1917 * StringUtils.substring("abc", -4) = "abc"
1918 * </pre>
1919 *
1920 * @param str the String to get the substring from, may be null
1921 * @param start the position to start from, negative means
1922 * count back from the end of the String by this many characters
1923 * @return substring from start position, {@code null} if null String input
1924 */
1925 public static String substring(final String str, int start) {
1926 if (str == null) {
1927 return null;
1928 }
1929
1930 // handle negatives, which means last n characters
1931 if (start < 0) {
1932 start = str.length() + start; // remember start is negative
1933 }
1934
1935 if (start < 0) {
1936 start = 0;
1937 }
1938 if (start > str.length()) {
1939 return EMPTY;
1940 }
1941
1942 return str.substring(start);
1943 }
1944
1945 /**
1946 * <p>Gets a substring from the specified String avoiding exceptions.</p>
1947 *
1948 * <p>A negative start position can be used to start/end {@code n}
1949 * characters from the end of the String.</p>
1950 *
1951 * <p>The returned substring starts with the character in the {@code start}
1952 * position and ends before the {@code end} position. All position counting is
1953 * zero-based -- i.e., to start at the beginning of the string use
1954 * {@code start = 0}. Negative start and end positions can be used to
1955 * specify offsets relative to the end of the String.</p>
1956 *
1957 * <p>If {@code start} is not strictly to the left of {@code end}, ""
1958 * is returned.</p>
1959 *
1960 * <pre>
1961 * StringUtils.substring(null, *, *) = null
1962 * StringUtils.substring("", * , *) = "";
1963 * StringUtils.substring("abc", 0, 2) = "ab"
1964 * StringUtils.substring("abc", 2, 0) = ""
1965 * StringUtils.substring("abc", 2, 4) = "c"
1966 * StringUtils.substring("abc", 4, 6) = ""
1967 * StringUtils.substring("abc", 2, 2) = ""
1968 * StringUtils.substring("abc", -2, -1) = "b"
1969 * StringUtils.substring("abc", -4, 2) = "ab"
1970 * </pre>
1971 *
1972 * @param str the String to get the substring from, may be null
1973 * @param start the position to start from, negative means
1974 * count back from the end of the String by this many characters
1975 * @param end the position to end at (exclusive), negative means
1976 * count back from the end of the String by this many characters
1977 * @return substring from start position to end position,
1978 * {@code null} if null String input
1979 */
1980 public static String substring(final String str, int start, int end) {
1981 if (str == null) {
1982 return null;
1983 }
1984
1985 // handle negatives
1986 if (end < 0) {
1987 end = str.length() + end; // remember end is negative
1988 }
1989 if (start < 0) {
1990 start = str.length() + start; // remember start is negative
1991 }
1992
1993 // check length next
1994 if (end > str.length()) {
1995 end = str.length();
1996 }
1997
1998 // if start is greater than end, return ""
1999 if (start > end) {
2000 return EMPTY;
2001 }
2002
2003 if (start < 0) {
2004 start = 0;
2005 }
2006 if (end < 0) {
2007 end = 0;
2008 }
2009
2010 return str.substring(start, end);
2011 }
2012
2013 // Left/Right/Mid
2014 //-----------------------------------------------------------------------
2015 /**
2016 * <p>Gets the leftmost {@code len} characters of a String.</p>
2017 *
2018 * <p>If {@code len} characters are not available, or the
2019 * String is {@code null}, the String will be returned without
2020 * an exception. An empty String is returned if len is negative.</p>
2021 *
2022 * <pre>
2023 * StringUtils.left(null, *) = null
2024 * StringUtils.left(*, -ve) = ""
2025 * StringUtils.left("", *) = ""
2026 * StringUtils.left("abc", 0) = ""
2027 * StringUtils.left("abc", 2) = "ab"
2028 * StringUtils.left("abc", 4) = "abc"
2029 * </pre>
2030 *
2031 * @param str the String to get the leftmost characters from, may be null
2032 * @param len the length of the required String
2033 * @return the leftmost characters, {@code null} if null String input
2034 */
2035 public static String left(final String str, final int len) {
2036 if (str == null) {
2037 return null;
2038 }
2039 if (len < 0) {
2040 return EMPTY;
2041 }
2042 if (str.length() <= len) {
2043 return str;
2044 }
2045 return str.substring(0, len);
2046 }
2047
2048 /**
2049 * <p>Gets the rightmost {@code len} characters of a String.</p>
2050 *
2051 * <p>If {@code len} characters are not available, or the String
2052 * is {@code null}, the String will be returned without an
2053 * an exception. An empty String is returned if len is negative.</p>
2054 *
2055 * <pre>
2056 * StringUtils.right(null, *) = null
2057 * StringUtils.right(*, -ve) = ""
2058 * StringUtils.right("", *) = ""
2059 * StringUtils.right("abc", 0) = ""
2060 * StringUtils.right("abc", 2) = "bc"
2061 * StringUtils.right("abc", 4) = "abc"
2062 * </pre>
2063 *
2064 * @param str the String to get the rightmost characters from, may be null
2065 * @param len the length of the required String
2066 * @return the rightmost characters, {@code null} if null String input
2067 */
2068 public static String right(final String str, final int len) {
2069 if (str == null) {
2070 return null;
2071 }
2072 if (len < 0) {
2073 return EMPTY;
2074 }
2075 if (str.length() <= len) {
2076 return str;
2077 }
2078 return str.substring(str.length() - len);
2079 }
2080
2081 /**
2082 * <p>Gets {@code len} characters from the middle of a String.</p>
2083 *
2084 * <p>If {@code len} characters are not available, the remainder
2085 * of the String will be returned without an exception. If the
2086 * String is {@code null}, {@code null} will be returned.
2087 * An empty String is returned if len is negative or exceeds the
2088 * length of {@code str}.</p>
2089 *
2090 * <pre>
2091 * StringUtils.mid(null, *, *) = null
2092 * StringUtils.mid(*, *, -ve) = ""
2093 * StringUtils.mid("", 0, *) = ""
2094 * StringUtils.mid("abc", 0, 2) = "ab"
2095 * StringUtils.mid("abc", 0, 4) = "abc"
2096 * StringUtils.mid("abc", 2, 4) = "c"
2097 * StringUtils.mid("abc", 4, 2) = ""
2098 * StringUtils.mid("abc", -2, 2) = "ab"
2099 * </pre>
2100 *
2101 * @param str the String to get the characters from, may be null
2102 * @param pos the position to start from, negative treated as zero
2103 * @param len the length of the required String
2104 * @return the middle characters, {@code null} if null String input
2105 */
2106 public static String mid(final String str, int pos, final int len) {
2107 if (str == null) {
2108 return null;
2109 }
2110 if (len < 0 || pos > str.length()) {
2111 return EMPTY;
2112 }
2113 if (pos < 0) {
2114 pos = 0;
2115 }
2116 if (str.length() <= pos + len) {
2117 return str.substring(pos);
2118 }
2119 return str.substring(pos, pos + len);
2120 }
2121
2122 // SubStringAfter/SubStringBefore
2123 //-----------------------------------------------------------------------
2124 /**
2125 * <p>Gets the substring before the first occurrence of a separator.
2126 * The separator is not returned.</p>
2127 *
2128 * <p>A {@code null} string input will return {@code null}.
2129 * An empty ("") string input will return the empty string.
2130 * A {@code null} separator will return the input string.</p>
2131 *
2132 * <p>If nothing is found, the string input is returned.</p>
2133 *
2134 * <pre>
2135 * StringUtils.substringBefore(null, *) = null
2136 * StringUtils.substringBefore("", *) = ""
2137 * StringUtils.substringBefore("abc", "a") = ""
2138 * StringUtils.substringBefore("abcba", "b") = "a"
2139 * StringUtils.substringBefore("abc", "c") = "ab"
2140 * StringUtils.substringBefore("abc", "d") = "abc"
2141 * StringUtils.substringBefore("abc", "") = ""
2142 * StringUtils.substringBefore("abc", null) = "abc"
2143 * </pre>
2144 *
2145 * @param str the String to get a substring from, may be null
2146 * @param separator the String to search for, may be null
2147 * @return the substring before the first occurrence of the separator,
2148 * {@code null} if null String input
2149 * @since 2.0
2150 */
2151 public static String substringBefore(final String str, final String separator) {
2152 if (isEmpty(str) || separator == null) {
2153 return str;
2154 }
2155 if (separator.length() == 0) {
2156 return EMPTY;
2157 }
2158 final int pos = str.indexOf(separator);
2159 if (pos == INDEX_NOT_FOUND) {
2160 return str;
2161 }
2162 return str.substring(0, pos);
2163 }
2164
2165 /**
2166 * <p>Gets the substring after the first occurrence of a separator.
2167 * The separator is not returned.</p>
2168 *
2169 * <p>A {@code null} string input will return {@code null}.
2170 * An empty ("") string input will return the empty string.
2171 * A {@code null} separator will return the empty string if the
2172 * input string is not {@code null}.</p>
2173 *
2174 * <p>If nothing is found, the empty string is returned.</p>
2175 *
2176 * <pre>
2177 * StringUtils.substringAfter(null, *) = null
2178 * StringUtils.substringAfter("", *) = ""
2179 * StringUtils.substringAfter(*, null) = ""
2180 * StringUtils.substringAfter("abc", "a") = "bc"
2181 * StringUtils.substringAfter("abcba", "b") = "cba"
2182 * StringUtils.substringAfter("abc", "c") = ""
2183 * StringUtils.substringAfter("abc", "d") = ""
2184 * StringUtils.substringAfter("abc", "") = "abc"
2185 * </pre>
2186 *
2187 * @param str the String to get a substring from, may be null
2188 * @param separator the String to search for, may be null
2189 * @return the substring after the first occurrence of the separator,
2190 * {@code null} if null String input
2191 * @since 2.0
2192 */
2193 public static String substringAfter(final String str, final String separator) {
2194 if (isEmpty(str)) {
2195 return str;
2196 }
2197 if (separator == null) {
2198 return EMPTY;
2199 }
2200 final int pos = str.indexOf(separator);
2201 if (pos == INDEX_NOT_FOUND) {
2202 return EMPTY;
2203 }
2204 return str.substring(pos + separator.length());
2205 }
2206
2207 /**
2208 * <p>Gets the substring before the last occurrence of a separator.
2209 * The separator is not returned.</p>
2210 *
2211 * <p>A {@code null} string input will return {@code null}.
2212 * An empty ("") string input will return the empty string.
2213 * An empty or {@code null} separator will return the input string.</p>
2214 *
2215 * <p>If nothing is found, the string input is returned.</p>
2216 *
2217 * <pre>
2218 * StringUtils.substringBeforeLast(null, *) = null
2219 * StringUtils.substringBeforeLast("", *) = ""
2220 * StringUtils.substringBeforeLast("abcba", "b") = "abc"
2221 * StringUtils.substringBeforeLast("abc", "c") = "ab"
2222 * StringUtils.substringBeforeLast("a", "a") = ""
2223 * StringUtils.substringBeforeLast("a", "z") = "a"
2224 * StringUtils.substringBeforeLast("a", null) = "a"
2225 * StringUtils.substringBeforeLast("a", "") = "a"
2226 * </pre>
2227 *
2228 * @param str the String to get a substring from, may be null
2229 * @param separator the String to search for, may be null
2230 * @return the substring before the last occurrence of the separator,
2231 * {@code null} if null String input
2232 * @since 2.0
2233 */
2234 public static String substringBeforeLast(final String str, final String separator) {
2235 if (isEmpty(str) || isEmpty(separator)) {
2236 return str;
2237 }
2238 final int pos = str.lastIndexOf(separator);
2239 if (pos == INDEX_NOT_FOUND) {
2240 return str;
2241 }
2242 return str.substring(0, pos);
2243 }
2244
2245 /**
2246 * <p>Gets the substring after the last occurrence of a separator.
2247 * The separator is not returned.</p>
2248 *
2249 * <p>A {@code null} string input will return {@code null}.
2250 * An empty ("") string input will return the empty string.
2251 * An empty or {@code null} separator will return the empty string if
2252 * the input string is not {@code null}.</p>
2253 *
2254 * <p>If nothing is found, the empty string is returned.</p>
2255 *
2256 * <pre>
2257 * StringUtils.substringAfterLast(null, *) = null
2258 * StringUtils.substringAfterLast("", *) = ""
2259 * StringUtils.substringAfterLast(*, "") = ""
2260 * StringUtils.substringAfterLast(*, null) = ""
2261 * StringUtils.substringAfterLast("abc", "a") = "bc"
2262 * StringUtils.substringAfterLast("abcba", "b") = "a"
2263 * StringUtils.substringAfterLast("abc", "c") = ""
2264 * StringUtils.substringAfterLast("a", "a") = ""
2265 * StringUtils.substringAfterLast("a", "z") = ""
2266 * </pre>
2267 *
2268 * @param str the String to get a substring from, may be null
2269 * @param separator the String to search for, may be null
2270 * @return the substring after the last occurrence of the separator,
2271 * {@code null} if null String input
2272 * @since 2.0
2273 */
2274 public static String substringAfterLast(final String str, final String separator) {
2275 if (isEmpty(str)) {
2276 return str;
2277 }
2278 if (isEmpty(separator)) {
2279 return EMPTY;
2280 }
2281 final int pos = str.lastIndexOf(separator);
2282 if (pos == INDEX_NOT_FOUND || pos == str.length() - separator.length()) {
2283 return EMPTY;
2284 }
2285 return str.substring(pos + separator.length());
2286 }
2287
2288 // Substring between
2289 //-----------------------------------------------------------------------
2290 /**
2291 * <p>Gets the String that is nested in between two instances of the
2292 * same String.</p>
2293 *
2294 * <p>A {@code null} input String returns {@code null}.
2295 * A {@code null} tag returns {@code null}.</p>
2296 *
2297 * <pre>
2298 * StringUtils.substringBetween(null, *) = null
2299 * StringUtils.substringBetween("", "") = ""
2300 * StringUtils.substringBetween("", "tag") = null
2301 * StringUtils.substringBetween("tagabctag", null) = null
2302 * StringUtils.substringBetween("tagabctag", "") = ""
2303 * StringUtils.substringBetween("tagabctag", "tag") = "abc"
2304 * </pre>
2305 *
2306 * @param str the String containing the substring, may be null
2307 * @param tag the String before and after the substring, may be null
2308 * @return the substring, {@code null} if no match
2309 * @since 2.0
2310 */
2311 public static String substringBetween(final String str, final String tag) {
2312 return substringBetween(str, tag, tag);
2313 }
2314
2315 /**
2316 * <p>Gets the String that is nested in between two Strings.
2317 * Only the first match is returned.</p>
2318 *
2319 * <p>A {@code null} input String returns {@code null}.
2320 * A {@code null} open/close returns {@code null} (no match).
2321 * An empty ("") open and close returns an empty string.</p>
2322 *
2323 * <pre>
2324 * StringUtils.substringBetween("wx[b]yz", "[", "]") = "b"
2325 * StringUtils.substringBetween(null, *, *) = null
2326 * StringUtils.substringBetween(*, null, *) = null
2327 * StringUtils.substringBetween(*, *, null) = null
2328 * StringUtils.substringBetween("", "", "") = ""
2329 * StringUtils.substringBetween("", "", "]") = null
2330 * StringUtils.substringBetween("", "[", "]") = null
2331 * StringUtils.substringBetween("yabcz", "", "") = ""
2332 * StringUtils.substringBetween("yabcz", "y", "z") = "abc"
2333 * StringUtils.substringBetween("yabczyabcz", "y", "z") = "abc"
2334 * </pre>
2335 *
2336 * @param str the String containing the substring, may be null
2337 * @param open the String before the substring, may be null
2338 * @param close the String after the substring, may be null
2339 * @return the substring, {@code null} if no match
2340 * @since 2.0
2341 */
2342 public static String substringBetween(final String str, final String open, final String close) {
2343 if (str == null || open == null || close == null) {
2344 return null;
2345 }
2346 final int start = str.indexOf(open);
2347 if (start != INDEX_NOT_FOUND) {
2348 final int end = str.indexOf(close, start + open.length());
2349 if (end != INDEX_NOT_FOUND) {
2350 return str.substring(start + open.length(), end);
2351 }
2352 }
2353 return null;
2354 }
2355
2356 /**
2357 * <p>Searches a String for substrings delimited by a start and end tag,
2358 * returning all matching substrings in an array.</p>
2359 *
2360 * <p>A {@code null} input String returns {@code null}.
2361 * A {@code null} open/close returns {@code null} (no match).
2362 * An empty ("") open/close returns {@code null} (no match).</p>
2363 *
2364 * <pre>
2365 * StringUtils.substringsBetween("[a][b][c]", "[", "]") = ["a","b","c"]
2366 * StringUtils.substringsBetween(null, *, *) = null
2367 * StringUtils.substringsBetween(*, null, *) = null
2368 * StringUtils.substringsBetween(*, *, null) = null
2369 * StringUtils.substringsBetween("", "[", "]") = []
2370 * </pre>
2371 *
2372 * @param str the String containing the substrings, null returns null, empty returns empty
2373 * @param open the String identifying the start of the substring, empty returns null
2374 * @param close the String identifying the end of the substring, empty returns null
2375 * @return a String Array of substrings, or {@code null} if no match
2376 * @since 2.3
2377 */
2378 public static String[] substringsBetween(final String str, final String open, final String close) {
2379 if (str == null || isEmpty(open) || isEmpty(close)) {
2380 return null;
2381 }
2382 final int strLen = str.length();
2383 if (strLen == 0) {
2384 return ArrayUtils.EMPTY_STRING_ARRAY;
2385 }
2386 final int closeLen = close.length();
2387 final int openLen = open.length();
2388 final List<String> list = new ArrayList<String>();
2389 int pos = 0;
2390 while (pos < strLen - closeLen) {
2391 int start = str.indexOf(open, pos);
2392 if (start < 0) {
2393 break;
2394 }
2395 start += openLen;
2396 final int end = str.indexOf(close, start);
2397 if (end < 0) {
2398 break;
2399 }
2400 list.add(str.substring(start, end));
2401 pos = end + closeLen;
2402 }
2403 if (list.isEmpty()) {
2404 return null;
2405 }
2406 return list.toArray(new String [list.size()]);
2407 }
2408
2409 // Nested extraction
2410 //-----------------------------------------------------------------------
2411
2412 // Splitting
2413 //-----------------------------------------------------------------------
2414 /**
2415 * <p>Splits the provided text into an array, using whitespace as the
2416 * separator.
2417 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
2418 *
2419 * <p>The separator is not included in the returned String array.
2420 * Adjacent separators are treated as one separator.
2421 * For more control over the split use the StrTokenizer class.</p>
2422 *
2423 * <p>A {@code null} input String returns {@code null}.</p>
2424 *
2425 * <pre>
2426 * StringUtils.split(null) = null
2427 * StringUtils.split("") = []
2428 * StringUtils.split("abc def") = ["abc", "def"]
2429 * StringUtils.split("abc def") = ["abc", "def"]
2430 * StringUtils.split(" abc ") = ["abc"]
2431 * </pre>
2432 *
2433 * @param str the String to parse, may be null
2434 * @return an array of parsed Strings, {@code null} if null String input
2435 */
2436 public static String[] split(final String str) {
2437 return split(str, null, -1);
2438 }
2439
2440 /**
2441 * <p>Splits the provided text into an array, separator specified.
2442 * This is an alternative to using StringTokenizer.</p>
2443 *
2444 * <p>The separator is not included in the returned String array.
2445 * Adjacent separators are treated as one separator.
2446 * For more control over the split use the StrTokenizer class.</p>
2447 *
2448 * <p>A {@code null} input String returns {@code null}.</p>
2449 *
2450 * <pre>
2451 * StringUtils.split(null, *) = null
2452 * StringUtils.split("", *) = []
2453 * StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
2454 * StringUtils.split("a..b.c", '.') = ["a", "b", "c"]
2455 * StringUtils.split("a:b:c", '.') = ["a:b:c"]
2456 * StringUtils.split("a b c", ' ') = ["a", "b", "c"]
2457 * </pre>
2458 *
2459 * @param str the String to parse, may be null
2460 * @param separatorChar the character used as the delimiter
2461 * @return an array of parsed Strings, {@code null} if null String input
2462 * @since 2.0
2463 */
2464 public static String[] split(final String str, final char separatorChar) {
2465 return splitWorker(str, separatorChar, false);
2466 }
2467
2468 /**
2469 * <p>Splits the provided text into an array, separators specified.
2470 * This is an alternative to using StringTokenizer.</p>
2471 *
2472 * <p>The separator is not included in the returned String array.
2473 * Adjacent separators are treated as one separator.
2474 * For more control over the split use the StrTokenizer class.</p>
2475 *
2476 * <p>A {@code null} input String returns {@code null}.
2477 * A {@code null} separatorChars splits on whitespace.</p>
2478 *
2479 * <pre>
2480 * StringUtils.split(null, *) = null
2481 * StringUtils.split("", *) = []
2482 * StringUtils.split("abc def", null) = ["abc", "def"]
2483 * StringUtils.split("abc def", " ") = ["abc", "def"]
2484 * StringUtils.split("abc def", " ") = ["abc", "def"]
2485 * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
2486 * </pre>
2487 *
2488 * @param str the String to parse, may be null
2489 * @param separatorChars the characters used as the delimiters,
2490 * {@code null} splits on whitespace
2491 * @return an array of parsed Strings, {@code null} if null String input
2492 */
2493 public static String[] split(final String str, final String separatorChars) {
2494 return splitWorker(str, separatorChars, -1, false);
2495 }
2496
2497 /**
2498 * <p>Splits the provided text into an array with a maximum length,
2499 * separators specified.</p>
2500 *
2501 * <p>The separator is not included in the returned String array.
2502 * Adjacent separators are treated as one separator.</p>
2503 *
2504 * <p>A {@code null} input String returns {@code null}.
2505 * A {@code null} separatorChars splits on whitespace.</p>
2506 *
2507 * <p>If more than {@code max} delimited substrings are found, the last
2508 * returned string includes all characters after the first {@code max - 1}
2509 * returned strings (including separator characters).</p>
2510 *
2511 * <pre>
2512 * StringUtils.split(null, *, *) = null
2513 * StringUtils.split("", *, *) = []
2514 * StringUtils.split("ab cd ef", null, 0) = ["ab", "cd", "ef"]
2515 * StringUtils.split("ab cd ef", null, 0) = ["ab", "cd", "ef"]
2516 * StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
2517 * StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
2518 * </pre>
2519 *
2520 * @param str the String to parse, may be null
2521 * @param separatorChars the characters used as the delimiters,
2522 * {@code null} splits on whitespace
2523 * @param max the maximum number of elements to include in the
2524 * array. A zero or negative value implies no limit
2525 * @return an array of parsed Strings, {@code null} if null String input
2526 */
2527 public static String[] split(final String str, final String separatorChars, final int max) {
2528 return splitWorker(str, separatorChars, max, false);
2529 }
2530
2531 /**
2532 * <p>Splits the provided text into an array, separator string specified.</p>
2533 *
2534 * <p>The separator(s) will not be included in the returned String array.
2535 * Adjacent separators are treated as one separator.</p>
2536 *
2537 * <p>A {@code null} input String returns {@code null}.
2538 * A {@code null} separator splits on whitespace.</p>
2539 *
2540 * <pre>
2541 * StringUtils.splitByWholeSeparator(null, *) = null
2542 * StringUtils.splitByWholeSeparator("", *) = []
2543 * StringUtils.splitByWholeSeparator("ab de fg", null) = ["ab", "de", "fg"]
2544 * StringUtils.splitByWholeSeparator("ab de fg", null) = ["ab", "de", "fg"]
2545 * StringUtils.splitByWholeSeparator("ab:cd:ef", ":") = ["ab", "cd", "ef"]
2546 * StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-") = ["ab", "cd", "ef"]
2547 * </pre>
2548 *
2549 * @param str the String to parse, may be null
2550 * @param separator String containing the String to be used as a delimiter,
2551 * {@code null} splits on whitespace
2552 * @return an array of parsed Strings, {@code null} if null String was input
2553 */
2554 public static String[] splitByWholeSeparator(final String str, final String separator) {
2555 return splitByWholeSeparatorWorker( str, separator, -1, false ) ;
2556 }
2557
2558 /**
2559 * <p>Splits the provided text into an array, separator string specified.
2560 * Returns a maximum of {@code max} substrings.</p>
2561 *
2562 * <p>The separator(s) will not be included in the returned String array.
2563 * Adjacent separators are treated as one separator.</p>
2564 *
2565 * <p>A {@code null} input String returns {@code null}.
2566 * A {@code null} separator splits on whitespace.</p>
2567 *
2568 * <pre>
2569 * StringUtils.splitByWholeSeparator(null, *, *) = null
2570 * StringUtils.splitByWholeSeparator("", *, *) = []
2571 * StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab", "de", "fg"]
2572 * StringUtils.splitByWholeSeparator("ab de fg", null, 0) = ["ab", "de", "fg"]
2573 * StringUtils.splitByWholeSeparator("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
2574 * StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 5) = ["ab", "cd", "ef"]
2575 * StringUtils.splitByWholeSeparator("ab-!-cd-!-ef", "-!-", 2) = ["ab", "cd-!-ef"]
2576 * </pre>
2577 *
2578 * @param str the String to parse, may be null
2579 * @param separator String containing the String to be used as a delimiter,
2580 * {@code null} splits on whitespace
2581 * @param max the maximum number of elements to include in the returned
2582 * array. A zero or negative value implies no limit.
2583 * @return an array of parsed Strings, {@code null} if null String was input
2584 */
2585 public static String[] splitByWholeSeparator( final String str, final String separator, final int max ) {
2586 return splitByWholeSeparatorWorker(str, separator, max, false);
2587 }
2588
2589 /**
2590 * <p>Splits the provided text into an array, separator string specified. </p>
2591 *
2592 * <p>The separator is not included in the returned String array.
2593 * Adjacent separators are treated as separators for empty tokens.
2594 * For more control over the split use the StrTokenizer class.</p>
2595 *
2596 * <p>A {@code null} input String returns {@code null}.
2597 * A {@code null} separator splits on whitespace.</p>
2598 *
2599 * <pre>
2600 * StringUtils.splitByWholeSeparatorPreserveAllTokens(null, *) = null
2601 * StringUtils.splitByWholeSeparatorPreserveAllTokens("", *) = []
2602 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null) = ["ab", "de", "fg"]
2603 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null) = ["ab", "", "", "de", "fg"]
2604 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
2605 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-") = ["ab", "cd", "ef"]
2606 * </pre>
2607 *
2608 * @param str the String to parse, may be null
2609 * @param separator String containing the String to be used as a delimiter,
2610 * {@code null} splits on whitespace
2611 * @return an array of parsed Strings, {@code null} if null String was input
2612 * @since 2.4
2613 */
2614 public static String[] splitByWholeSeparatorPreserveAllTokens(final String str, final String separator) {
2615 return splitByWholeSeparatorWorker(str, separator, -1, true);
2616 }
2617
2618 /**
2619 * <p>Splits the provided text into an array, separator string specified.
2620 * Returns a maximum of {@code max} substrings.</p>
2621 *
2622 * <p>The separator is not included in the returned String array.
2623 * Adjacent separators are treated as separators for empty tokens.
2624 * For more control over the split use the StrTokenizer class.</p>
2625 *
2626 * <p>A {@code null} input String returns {@code null}.
2627 * A {@code null} separator splits on whitespace.</p>
2628 *
2629 * <pre>
2630 * StringUtils.splitByWholeSeparatorPreserveAllTokens(null, *, *) = null
2631 * StringUtils.splitByWholeSeparatorPreserveAllTokens("", *, *) = []
2632 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null, 0) = ["ab", "de", "fg"]
2633 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab de fg", null, 0) = ["ab", "", "", "de", "fg"]
2634 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
2635 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-", 5) = ["ab", "cd", "ef"]
2636 * StringUtils.splitByWholeSeparatorPreserveAllTokens("ab-!-cd-!-ef", "-!-", 2) = ["ab", "cd-!-ef"]
2637 * </pre>
2638 *
2639 * @param str the String to parse, may be null
2640 * @param separator String containing the String to be used as a delimiter,
2641 * {@code null} splits on whitespace
2642 * @param max the maximum number of elements to include in the returned
2643 * array. A zero or negative value implies no limit.
2644 * @return an array of parsed Strings, {@code null} if null String was input
2645 * @since 2.4
2646 */
2647 public static String[] splitByWholeSeparatorPreserveAllTokens(final String str, final String separator, final int max) {
2648 return splitByWholeSeparatorWorker(str, separator, max, true);
2649 }
2650
2651 /**
2652 * Performs the logic for the {@code splitByWholeSeparatorPreserveAllTokens} methods.
2653 *
2654 * @param str the String to parse, may be {@code null}
2655 * @param separator String containing the String to be used as a delimiter,
2656 * {@code null} splits on whitespace
2657 * @param max the maximum number of elements to include in the returned
2658 * array. A zero or negative value implies no limit.
2659 * @param preserveAllTokens if {@code true}, adjacent separators are
2660 * treated as empty token separators; if {@code false}, adjacent
2661 * separators are treated as one separator.
2662 * @return an array of parsed Strings, {@code null} if null String input
2663 * @since 2.4
2664 */
2665 private static String[] splitByWholeSeparatorWorker(
2666 final String str, final String separator, final int max, final boolean preserveAllTokens) {
2667 if (str == null) {
2668 return null;
2669 }
2670
2671 final int len = str.length();
2672
2673 if (len == 0) {
2674 return ArrayUtils.EMPTY_STRING_ARRAY;
2675 }
2676
2677 if (separator == null || EMPTY.equals(separator)) {
2678 // Split on whitespace.
2679 return splitWorker(str, null, max, preserveAllTokens);
2680 }
2681
2682 final int separatorLength = separator.length();
2683
2684 final ArrayList<String> substrings = new ArrayList<String>();
2685 int numberOfSubstrings = 0;
2686 int beg = 0;
2687 int end = 0;
2688 while (end < len) {
2689 end = str.indexOf(separator, beg);
2690
2691 if (end > -1) {
2692 if (end > beg) {
2693 numberOfSubstrings += 1;
2694
2695 if (numberOfSubstrings == max) {
2696 end = len;
2697 substrings.add(str.substring(beg));
2698 } else {
2699 // The following is OK, because String.substring( beg, end ) excludes
2700 // the character at the position 'end'.
2701 substrings.add(str.substring(beg, end));
2702
2703 // Set the starting point for the next search.
2704 // The following is equivalent to beg = end + (separatorLength - 1) + 1,
2705 // which is the right calculation:
2706 beg = end + separatorLength;
2707 }
2708 } else {
2709 // We found a consecutive occurrence of the separator, so skip it.
2710 if (preserveAllTokens) {
2711 numberOfSubstrings += 1;
2712 if (numberOfSubstrings == max) {
2713 end = len;
2714 substrings.add(str.substring(beg));
2715 } else {
2716 substrings.add(EMPTY);
2717 }
2718 }
2719 beg = end + separatorLength;
2720 }
2721 } else {
2722 // String.substring( beg ) goes from 'beg' to the end of the String.
2723 substrings.add(str.substring(beg));
2724 end = len;
2725 }
2726 }
2727
2728 return substrings.toArray(new String[substrings.size()]);
2729 }
2730
2731 // -----------------------------------------------------------------------
2732 /**
2733 * <p>Splits the provided text into an array, using whitespace as the
2734 * separator, preserving all tokens, including empty tokens created by
2735 * adjacent separators. This is an alternative to using StringTokenizer.
2736 * Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
2737 *
2738 * <p>The separator is not included in the returned String array.
2739 * Adjacent separators are treated as separators for empty tokens.
2740 * For more control over the split use the StrTokenizer class.</p>
2741 *
2742 * <p>A {@code null} input String returns {@code null}.</p>
2743 *
2744 * <pre>
2745 * StringUtils.splitPreserveAllTokens(null) = null
2746 * StringUtils.splitPreserveAllTokens("") = []
2747 * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"]
2748 * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"]
2749 * StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""]
2750 * </pre>
2751 *
2752 * @param str the String to parse, may be {@code null}
2753 * @return an array of parsed Strings, {@code null} if null String input
2754 * @since 2.1
2755 */
2756 public static String[] splitPreserveAllTokens(final String str) {
2757 return splitWorker(str, null, -1, true);
2758 }
2759
2760 /**
2761 * <p>Splits the provided text into an array, separator specified,
2762 * preserving all tokens, including empty tokens created by adjacent
2763 * separators. This is an alternative to using StringTokenizer.</p>
2764 *
2765 * <p>The separator is not included in the returned String array.
2766 * Adjacent separators are treated as separators for empty tokens.
2767 * For more control over the split use the StrTokenizer class.</p>
2768 *
2769 * <p>A {@code null} input String returns {@code null}.</p>
2770 *
2771 * <pre>
2772 * StringUtils.splitPreserveAllTokens(null, *) = null
2773 * StringUtils.splitPreserveAllTokens("", *) = []
2774 * StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"]
2775 * StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "", "b", "c"]
2776 * StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"]
2777 * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"]
2778 * StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"]
2779 * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""]
2780 * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""]
2781 * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"]
2782 * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"]
2783 * StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""]
2784 * </pre>
2785 *
2786 * @param str the String to parse, may be {@code null}
2787 * @param separatorChar the character used as the delimiter,
2788 * {@code null} splits on whitespace
2789 * @return an array of parsed Strings, {@code null} if null String input
2790 * @since 2.1
2791 */
2792 public static String[] splitPreserveAllTokens(final String str, final char separatorChar) {
2793 return splitWorker(str, separatorChar, true);
2794 }
2795
2796 /**
2797 * Performs the logic for the {@code split} and
2798 * {@code splitPreserveAllTokens} methods that do not return a
2799 * maximum array length.
2800 *
2801 * @param str the String to parse, may be {@code null}
2802 * @param separatorChar the separate character
2803 * @param preserveAllTokens if {@code true}, adjacent separators are
2804 * treated as empty token separators; if {@code false}, adjacent
2805 * separators are treated as one separator.
2806 * @return an array of parsed Strings, {@code null} if null String input
2807 */
2808 private static String[] splitWorker(final String str, final char separatorChar, final boolean preserveAllTokens) {
2809 // Performance tuned for 2.0 (JDK1.4)
2810
2811 if (str == null) {
2812 return null;
2813 }
2814 final int len = str.length();
2815 if (len == 0) {
2816 return ArrayUtils.EMPTY_STRING_ARRAY;
2817 }
2818 final List<String> list = new ArrayList<String>();
2819 int i = 0, start = 0;
2820 boolean match = false;
2821 boolean lastMatch = false;
2822 while (i < len) {
2823 if (str.charAt(i) == separatorChar) {
2824 if (match || preserveAllTokens) {
2825 list.add(str.substring(start, i));
2826 match = false;
2827 lastMatch = true;
2828 }
2829 start = ++i;
2830 continue;
2831 }
2832 lastMatch = false;
2833 match = true;
2834 i++;
2835 }
2836 if (match || preserveAllTokens && lastMatch) {
2837 list.add(str.substring(start, i));
2838 }
2839 return list.toArray(new String[list.size()]);
2840 }
2841
2842 /**
2843 * <p>Splits the provided text into an array, separators specified,
2844 * preserving all tokens, including empty tokens created by adjacent
2845 * separators. This is an alternative to using StringTokenizer.</p>
2846 *
2847 * <p>The separator is not included in the returned String array.
2848 * Adjacent separators are treated as separators for empty tokens.
2849 * For more control over the split use the StrTokenizer class.</p>
2850 *
2851 * <p>A {@code null} input String returns {@code null}.
2852 * A {@code null} separatorChars splits on whitespace.</p>
2853 *
2854 * <pre>
2855 * StringUtils.splitPreserveAllTokens(null, *) = null
2856 * StringUtils.splitPreserveAllTokens("", *) = []
2857 * StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"]
2858 * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"]
2859 * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"]
2860 * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
2861 * StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""]
2862 * StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
2863 * StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"]
2864 * StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"]
2865 * StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"]
2866 * StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""]
2867 * </pre>
2868 *
2869 * @param str the String to parse, may be {@code null}
2870 * @param separatorChars the characters used as the delimiters,
2871 * {@code null} splits on whitespace
2872 * @return an array of parsed Strings, {@code null} if null String input
2873 * @since 2.1
2874 */
2875 public static String[] splitPreserveAllTokens(final String str, final String separatorChars) {
2876 return splitWorker(str, separatorChars, -1, true);
2877 }
2878
2879 /**
2880 * <p>Splits the provided text into an array with a maximum length,
2881 * separators specified, preserving all tokens, including empty tokens
2882 * created by adjacent separators.</p>
2883 *
2884 * <p>The separator is not included in the returned String array.
2885 * Adjacent separators are treated as separators for empty tokens.
2886 * Adjacent separators are treated as one separator.</p>
2887 *
2888 * <p>A {@code null} input String returns {@code null}.
2889 * A {@code null} separatorChars splits on whitespace.</p>
2890 *
2891 * <p>If more than {@code max} delimited substrings are found, the last
2892 * returned string includes all characters after the first {@code max - 1}
2893 * returned strings (including separator characters).</p>
2894 *
2895 * <pre>
2896 * StringUtils.splitPreserveAllTokens(null, *, *) = null
2897 * StringUtils.splitPreserveAllTokens("", *, *) = []
2898 * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
2899 * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
2900 * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
2901 * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
2902 * StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"]
2903 * StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"]
2904 * StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"]
2905 * </pre>
2906 *
2907 * @param str the String to parse, may be {@code null}
2908 * @param separatorChars the characters used as the delimiters,
2909 * {@code null} splits on whitespace
2910 * @param max the maximum number of elements to include in the
2911 * array. A zero or negative value implies no limit
2912 * @return an array of parsed Strings, {@code null} if null String input
2913 * @since 2.1
2914 */
2915 public static String[] splitPreserveAllTokens(final String str, final String separatorChars, final int max) {
2916 return splitWorker(str, separatorChars, max, true);
2917 }
2918
2919 /**
2920 * Performs the logic for the {@code split} and
2921 * {@code splitPreserveAllTokens} methods that return a maximum array
2922 * length.
2923 *
2924 * @param str the String to parse, may be {@code null}
2925 * @param separatorChars the separate character
2926 * @param max the maximum number of elements to include in the
2927 * array. A zero or negative value implies no limit.
2928 * @param preserveAllTokens if {@code true}, adjacent separators are
2929 * treated as empty token separators; if {@code false}, adjacent
2930 * separators are treated as one separator.
2931 * @return an array of parsed Strings, {@code null} if null String input
2932 */
2933 private static String[] splitWorker(final String str, final String separatorChars, final int max, final boolean preserveAllTokens) {
2934 // Performance tuned for 2.0 (JDK1.4)
2935 // Direct code is quicker than StringTokenizer.
2936 // Also, StringTokenizer uses isSpace() not isWhitespace()
2937
2938 if (str == null) {
2939 return null;
2940 }
2941 final int len = str.length();
2942 if (len == 0) {
2943 return ArrayUtils.EMPTY_STRING_ARRAY;
2944 }
2945 final List<String> list = new ArrayList<String>();
2946 int sizePlus1 = 1;
2947 int i = 0, start = 0;
2948 boolean match = false;
2949 boolean lastMatch = false;
2950 if (separatorChars == null) {
2951 // Null separator means use whitespace
2952 while (i < len) {
2953 if (Character.isWhitespace(str.charAt(i))) {
2954 if (match || preserveAllTokens) {
2955 lastMatch = true;
2956 if (sizePlus1++ == max) {
2957 i = len;
2958 lastMatch = false;
2959 }
2960 list.add(str.substring(start, i));
2961 match = false;
2962 }
2963 start = ++i;
2964 continue;
2965 }
2966 lastMatch = false;
2967 match = true;
2968 i++;
2969 }
2970 } else if (separatorChars.length() == 1) {
2971 // Optimise 1 character case
2972 final char sep = separatorChars.charAt(0);
2973 while (i < len) {
2974 if (str.charAt(i) == sep) {
2975 if (match || preserveAllTokens) {
2976 lastMatch = true;
2977 if (sizePlus1++ == max) {
2978 i = len;
2979 lastMatch = false;
2980 }
2981 list.add(str.substring(start, i));
2982 match = false;
2983 }
2984 start = ++i;
2985 continue;
2986 }
2987 lastMatch = false;
2988 match = true;
2989 i++;
2990 }
2991 } else {
2992 // standard case
2993 while (i < len) {
2994 if (separatorChars.indexOf(str.charAt(i)) >= 0) {
2995 if (match || preserveAllTokens) {
2996 lastMatch = true;
2997 if (sizePlus1++ == max) {
2998 i = len;
2999 lastMatch = false;
3000 }
3001 list.add(str.substring(start, i));
3002 match = false;
3003 }
3004 start = ++i;
3005 continue;
3006 }
3007 lastMatch = false;
3008 match = true;
3009 i++;
3010 }
3011 }
3012 if (match || preserveAllTokens && lastMatch) {
3013 list.add(str.substring(start, i));
3014 }
3015 return list.toArray(new String[list.size()]);
3016 }
3017
3018 /**
3019 * <p>Splits a String by Character type as returned by
3020 * {@code java.lang.Character.getType(char)}. Groups of contiguous
3021 * characters of the same type are returned as complete tokens.
3022 * <pre>
3023 * StringUtils.splitByCharacterType(null) = null
3024 * StringUtils.splitByCharacterType("") = []
3025 * StringUtils.splitByCharacterType("ab de fg") = ["ab", " ", "de", " ", "fg"]
3026 * StringUtils.splitByCharacterType("ab de fg") = ["ab", " ", "de", " ", "fg"]
3027 * StringUtils.splitByCharacterType("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"]
3028 * StringUtils.splitByCharacterType("number5") = ["number", "5"]
3029 * StringUtils.splitByCharacterType("fooBar") = ["foo", "B", "ar"]
3030 * StringUtils.splitByCharacterType("foo200Bar") = ["foo", "200", "B", "ar"]
3031 * StringUtils.splitByCharacterType("ASFRules") = ["ASFR", "ules"]
3032 * </pre>
3033 * @param str the String to split, may be {@code null}
3034 * @return an array of parsed Strings, {@code null} if null String input
3035 * @since 2.4
3036 */
3037 public static String[] splitByCharacterType(final String str) {
3038 return splitByCharacterType(str, false);
3039 }
3040
3041 /**
3042 * <p>Splits a String by Character type as returned by
3043 * {@code java.lang.Character.getType(char)}. Groups of contiguous
3044 * characters of the same type are returned as complete tokens, with the
3045 * following exception: the character of type
3046 * {@code Character.UPPERCASE_LETTER}, if any, immediately
3047 * preceding a token of type {@code Character.LOWERCASE_LETTER}
3048 * will belong to the following token rather than to the preceding, if any,
3049 * {@code Character.UPPERCASE_LETTER} token.
3050 * <pre>
3051 * StringUtils.splitByCharacterTypeCamelCase(null) = null
3052 * StringUtils.splitByCharacterTypeCamelCase("") = []
3053 * StringUtils.splitByCharacterTypeCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
3054 * StringUtils.splitByCharacterTypeCamelCase("ab de fg") = ["ab", " ", "de", " ", "fg"]
3055 * StringUtils.splitByCharacterTypeCamelCase("ab:cd:ef") = ["ab", ":", "cd", ":", "ef"]
3056 * StringUtils.splitByCharacterTypeCamelCase("number5") = ["number", "5"]
3057 * StringUtils.splitByCharacterTypeCamelCase("fooBar") = ["foo", "Bar"]
3058 * StringUtils.splitByCharacterTypeCamelCase("foo200Bar") = ["foo", "200", "Bar"]
3059 * StringUtils.splitByCharacterTypeCamelCase("ASFRules") = ["ASF", "Rules"]
3060 * </pre>
3061 * @param str the String to split, may be {@code null}
3062 * @return an array of parsed Strings, {@code null} if null String input
3063 * @since 2.4
3064 */
3065 public static String[] splitByCharacterTypeCamelCase(final String str) {
3066 return splitByCharacterType(str, true);
3067 }
3068
3069 /**
3070 * <p>Splits a String by Character type as returned by
3071 * {@code java.lang.Character.getType(char)}. Groups of contiguous
3072 * characters of the same type are returned as complete tokens, with the
3073 * following exception: if {@code camelCase} is {@code true},
3074 * the character of type {@code Character.UPPERCASE_LETTER}, if any,
3075 * immediately preceding a token of type {@code Character.LOWERCASE_LETTER}
3076 * will belong to the following token rather than to the preceding, if any,
3077 * {@code Character.UPPERCASE_LETTER} token.
3078 * @param str the String to split, may be {@code null}
3079 * @param camelCase whether to use so-called "camel-case" for letter types
3080 * @return an array of parsed Strings, {@code null} if null String input
3081 * @since 2.4
3082 */
3083 private static String[] splitByCharacterType(final String str, final boolean camelCase) {
3084 if (str == null) {
3085 return null;
3086 }
3087 if (str.length() == 0) {
3088 return ArrayUtils.EMPTY_STRING_ARRAY;
3089 }
3090 final char[] c = str.toCharArray();
3091 final List<String> list = new ArrayList<String>();
3092 int tokenStart = 0;
3093 int currentType = Character.getType(c[tokenStart]);
3094 for (int pos = tokenStart + 1; pos < c.length; pos++) {
3095 final int type = Character.getType(c[pos]);
3096 if (type == currentType) {
3097 continue;
3098 }
3099 if (camelCase && type == Character.LOWERCASE_LETTER && currentType == Character.UPPERCASE_LETTER) {
3100 final int newTokenStart = pos - 1;
3101 if (newTokenStart != tokenStart) {
3102 list.add(new String(c, tokenStart, newTokenStart - tokenStart));
3103 tokenStart = newTokenStart;
3104 }
3105 } else {
3106 list.add(new String(c, tokenStart, pos - tokenStart));
3107 tokenStart = pos;
3108 }
3109 currentType = type;
3110 }
3111 list.add(new String(c, tokenStart, c.length - tokenStart));
3112 return list.toArray(new String[list.size()]);
3113 }
3114
3115 // Joining
3116 //-----------------------------------------------------------------------
3117 /**
3118 * <p>Joins the elements of the provided array into a single String
3119 * containing the provided list of elements.</p>
3120 *
3121 * <p>No separator is added to the joined String.
3122 * Null objects or empty strings within the array are represented by
3123 * empty strings.</p>
3124 *
3125 * <pre>
3126 * StringUtils.join(null) = null
3127 * StringUtils.join([]) = ""
3128 * StringUtils.join([null]) = ""
3129 * StringUtils.join(["a", "b", "c"]) = "abc"
3130 * StringUtils.join([null, "", "a"]) = "a"
3131 * </pre>
3132 *
3133 * @param <T> the specific type of values to join together
3134 * @param elements the values to join together, may be null
3135 * @return the joined String, {@code null} if null array input
3136 * @since 2.0
3137 * @since 3.0 Changed signature to use varargs
3138 */
3139 public static <T> String join(final T... elements) {
3140 return join(elements, null);
3141 }
3142
3143 /**
3144 * <p>Joins the elements of the provided array into a single String
3145 * containing the provided list of elements.</p>
3146 *
3147 * <p>No delimiter is added before or after the list.
3148 * Null objects or empty strings within the array are represented by
3149 * empty strings.</p>
3150 *
3151 * <pre>
3152 * StringUtils.join(null, *) = null
3153 * StringUtils.join([], *) = ""
3154 * StringUtils.join([null], *) = ""
3155 * StringUtils.join(["a", "b", "c"], ';') = "a;b;c"
3156 * StringUtils.join(["a", "b", "c"], null) = "abc"
3157 * StringUtils.join([null, "", "a"], ';') = ";;a"
3158 * </pre>
3159 *
3160 * @param array the array of values to join together, may be null
3161 * @param separator the separator character to use
3162 * @return the joined String, {@code null} if null array input
3163 * @since 2.0
3164 */
3165 public static String join(final Object[] array, final char separator) {
3166 if (array == null) {
3167 return null;
3168 }
3169 return join(array, separator, 0, array.length);
3170 }
3171
3172 /**
3173 * <p>
3174 * Joins the elements of the provided array into a single String containing the provided list of elements.
3175 * </p>
3176 *
3177 * <p>
3178 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3179 * by empty strings.
3180 * </p>
3181 *
3182 * <pre>
3183 * StringUtils.join(null, *) = null
3184 * StringUtils.join([], *) = ""
3185 * StringUtils.join([null], *) = ""
3186 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3187 * StringUtils.join([1, 2, 3], null) = "123"
3188 * </pre>
3189 *
3190 * @param array
3191 * the array of values to join together, may be null
3192 * @param separator
3193 * the separator character to use
3194 * @return the joined String, {@code null} if null array input
3195 * @since 3.2
3196 */
3197 public static String join(final long[] array, final char separator) {
3198 if (array == null) {
3199 return null;
3200 }
3201 return join(array, separator, 0, array.length);
3202 }
3203
3204 /**
3205 * <p>
3206 * Joins the elements of the provided array into a single String containing the provided list of elements.
3207 * </p>
3208 *
3209 * <p>
3210 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3211 * by empty strings.
3212 * </p>
3213 *
3214 * <pre>
3215 * StringUtils.join(null, *) = null
3216 * StringUtils.join([], *) = ""
3217 * StringUtils.join([null], *) = ""
3218 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3219 * StringUtils.join([1, 2, 3], null) = "123"
3220 * </pre>
3221 *
3222 * @param array
3223 * the array of values to join together, may be null
3224 * @param separator
3225 * the separator character to use
3226 * @return the joined String, {@code null} if null array input
3227 * @since 3.2
3228 */
3229 public static String join(final int[] array, final char separator) {
3230 if (array == null) {
3231 return null;
3232 }
3233 return join(array, separator, 0, array.length);
3234 }
3235
3236 /**
3237 * <p>
3238 * Joins the elements of the provided array into a single String containing the provided list of elements.
3239 * </p>
3240 *
3241 * <p>
3242 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3243 * by empty strings.
3244 * </p>
3245 *
3246 * <pre>
3247 * StringUtils.join(null, *) = null
3248 * StringUtils.join([], *) = ""
3249 * StringUtils.join([null], *) = ""
3250 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3251 * StringUtils.join([1, 2, 3], null) = "123"
3252 * </pre>
3253 *
3254 * @param array
3255 * the array of values to join together, may be null
3256 * @param separator
3257 * the separator character to use
3258 * @return the joined String, {@code null} if null array input
3259 * @since 3.2
3260 */
3261 public static String join(final short[] array, final char separator) {
3262 if (array == null) {
3263 return null;
3264 }
3265 return join(array, separator, 0, array.length);
3266 }
3267
3268 /**
3269 * <p>
3270 * Joins the elements of the provided array into a single String containing the provided list of elements.
3271 * </p>
3272 *
3273 * <p>
3274 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3275 * by empty strings.
3276 * </p>
3277 *
3278 * <pre>
3279 * StringUtils.join(null, *) = null
3280 * StringUtils.join([], *) = ""
3281 * StringUtils.join([null], *) = ""
3282 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3283 * StringUtils.join([1, 2, 3], null) = "123"
3284 * </pre>
3285 *
3286 * @param array
3287 * the array of values to join together, may be null
3288 * @param separator
3289 * the separator character to use
3290 * @return the joined String, {@code null} if null array input
3291 * @since 3.2
3292 */
3293 public static String join(final byte[] array, final char separator) {
3294 if (array == null) {
3295 return null;
3296 }
3297 return join(array, separator, 0, array.length);
3298 }
3299
3300 /**
3301 * <p>
3302 * Joins the elements of the provided array into a single String containing the provided list of elements.
3303 * </p>
3304 *
3305 * <p>
3306 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3307 * by empty strings.
3308 * </p>
3309 *
3310 * <pre>
3311 * StringUtils.join(null, *) = null
3312 * StringUtils.join([], *) = ""
3313 * StringUtils.join([null], *) = ""
3314 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3315 * StringUtils.join([1, 2, 3], null) = "123"
3316 * </pre>
3317 *
3318 * @param array
3319 * the array of values to join together, may be null
3320 * @param separator
3321 * the separator character to use
3322 * @return the joined String, {@code null} if null array input
3323 * @since 3.2
3324 */
3325 public static String join(final char[] array, final char separator) {
3326 if (array == null) {
3327 return null;
3328 }
3329 return join(array, separator, 0, array.length);
3330 }
3331
3332 /**
3333 * <p>
3334 * Joins the elements of the provided array into a single String containing the provided list of elements.
3335 * </p>
3336 *
3337 * <p>
3338 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3339 * by empty strings.
3340 * </p>
3341 *
3342 * <pre>
3343 * StringUtils.join(null, *) = null
3344 * StringUtils.join([], *) = ""
3345 * StringUtils.join([null], *) = ""
3346 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3347 * StringUtils.join([1, 2, 3], null) = "123"
3348 * </pre>
3349 *
3350 * @param array
3351 * the array of values to join together, may be null
3352 * @param separator
3353 * the separator character to use
3354 * @return the joined String, {@code null} if null array input
3355 * @since 3.2
3356 */
3357 public static String join(final float[] array, final char separator) {
3358 if (array == null) {
3359 return null;
3360 }
3361 return join(array, separator, 0, array.length);
3362 }
3363
3364 /**
3365 * <p>
3366 * Joins the elements of the provided array into a single String containing the provided list of elements.
3367 * </p>
3368 *
3369 * <p>
3370 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3371 * by empty strings.
3372 * </p>
3373 *
3374 * <pre>
3375 * StringUtils.join(null, *) = null
3376 * StringUtils.join([], *) = ""
3377 * StringUtils.join([null], *) = ""
3378 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3379 * StringUtils.join([1, 2, 3], null) = "123"
3380 * </pre>
3381 *
3382 * @param array
3383 * the array of values to join together, may be null
3384 * @param separator
3385 * the separator character to use
3386 * @return the joined String, {@code null} if null array input
3387 * @since 3.2
3388 */
3389 public static String join(final double[] array, final char separator) {
3390 if (array == null) {
3391 return null;
3392 }
3393 return join(array, separator, 0, array.length);
3394 }
3395
3396
3397 /**
3398 * <p>Joins the elements of the provided array into a single String
3399 * containing the provided list of elements.</p>
3400 *
3401 * <p>No delimiter is added before or after the list.
3402 * Null objects or empty strings within the array are represented by
3403 * empty strings.</p>
3404 *
3405 * <pre>
3406 * StringUtils.join(null, *) = null
3407 * StringUtils.join([], *) = ""
3408 * StringUtils.join([null], *) = ""
3409 * StringUtils.join(["a", "b", "c"], ';') = "a;b;c"
3410 * StringUtils.join(["a", "b", "c"], null) = "abc"
3411 * StringUtils.join([null, "", "a"], ';') = ";;a"
3412 * </pre>
3413 *
3414 * @param array the array of values to join together, may be null
3415 * @param separator the separator character to use
3416 * @param startIndex the first index to start joining from. It is
3417 * an error to pass in an end index past the end of the array
3418 * @param endIndex the index to stop joining from (exclusive). It is
3419 * an error to pass in an end index past the end of the array
3420 * @return the joined String, {@code null} if null array input
3421 * @since 2.0
3422 */
3423 public static String join(final Object[] array, final char separator, final int startIndex, final int endIndex) {
3424 if (array == null) {
3425 return null;
3426 }
3427 final int noOfItems = endIndex - startIndex;
3428 if (noOfItems <= 0) {
3429 return EMPTY;
3430 }
3431 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3432 for (int i = startIndex; i < endIndex; i++) {
3433 if (i > startIndex) {
3434 buf.append(separator);
3435 }
3436 if (array[i] != null) {
3437 buf.append(array[i]);
3438 }
3439 }
3440 return buf.toString();
3441 }
3442
3443 /**
3444 * <p>
3445 * Joins the elements of the provided array into a single String containing the provided list of elements.
3446 * </p>
3447 *
3448 * <p>
3449 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3450 * by empty strings.
3451 * </p>
3452 *
3453 * <pre>
3454 * StringUtils.join(null, *) = null
3455 * StringUtils.join([], *) = ""
3456 * StringUtils.join([null], *) = ""
3457 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3458 * StringUtils.join([1, 2, 3], null) = "123"
3459 * </pre>
3460 *
3461 * @param array
3462 * the array of values to join together, may be null
3463 * @param separator
3464 * the separator character to use
3465 * @param startIndex
3466 * the first index to start joining from. It is an error to pass in an end index past the end of the
3467 * array
3468 * @param endIndex
3469 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3470 * the array
3471 * @return the joined String, {@code null} if null array input
3472 * @since 3.2
3473 */
3474 public static String join(final long[] array, final char separator, final int startIndex, final int endIndex) {
3475 if (array == null) {
3476 return null;
3477 }
3478 final int noOfItems = endIndex - startIndex;
3479 if (noOfItems <= 0) {
3480 return EMPTY;
3481 }
3482 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3483 for (int i = startIndex; i < endIndex; i++) {
3484 if (i > startIndex) {
3485 buf.append(separator);
3486 }
3487 buf.append(array[i]);
3488 }
3489 return buf.toString();
3490 }
3491
3492 /**
3493 * <p>
3494 * Joins the elements of the provided array into a single String containing the provided list of elements.
3495 * </p>
3496 *
3497 * <p>
3498 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3499 * by empty strings.
3500 * </p>
3501 *
3502 * <pre>
3503 * StringUtils.join(null, *) = null
3504 * StringUtils.join([], *) = ""
3505 * StringUtils.join([null], *) = ""
3506 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3507 * StringUtils.join([1, 2, 3], null) = "123"
3508 * </pre>
3509 *
3510 * @param array
3511 * the array of values to join together, may be null
3512 * @param separator
3513 * the separator character to use
3514 * @param startIndex
3515 * the first index to start joining from. It is an error to pass in an end index past the end of the
3516 * array
3517 * @param endIndex
3518 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3519 * the array
3520 * @return the joined String, {@code null} if null array input
3521 * @since 3.2
3522 */
3523 public static String join(final int[] array, final char separator, final int startIndex, final int endIndex) {
3524 if (array == null) {
3525 return null;
3526 }
3527 final int noOfItems = endIndex - startIndex;
3528 if (noOfItems <= 0) {
3529 return EMPTY;
3530 }
3531 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3532 for (int i = startIndex; i < endIndex; i++) {
3533 if (i > startIndex) {
3534 buf.append(separator);
3535 }
3536 buf.append(array[i]);
3537 }
3538 return buf.toString();
3539 }
3540
3541 /**
3542 * <p>
3543 * Joins the elements of the provided array into a single String containing the provided list of elements.
3544 * </p>
3545 *
3546 * <p>
3547 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3548 * by empty strings.
3549 * </p>
3550 *
3551 * <pre>
3552 * StringUtils.join(null, *) = null
3553 * StringUtils.join([], *) = ""
3554 * StringUtils.join([null], *) = ""
3555 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3556 * StringUtils.join([1, 2, 3], null) = "123"
3557 * </pre>
3558 *
3559 * @param array
3560 * the array of values to join together, may be null
3561 * @param separator
3562 * the separator character to use
3563 * @param startIndex
3564 * the first index to start joining from. It is an error to pass in an end index past the end of the
3565 * array
3566 * @param endIndex
3567 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3568 * the array
3569 * @return the joined String, {@code null} if null array input
3570 * @since 3.2
3571 */
3572 public static String join(final byte[] array, final char separator, final int startIndex, final int endIndex) {
3573 if (array == null) {
3574 return null;
3575 }
3576 final int noOfItems = endIndex - startIndex;
3577 if (noOfItems <= 0) {
3578 return EMPTY;
3579 }
3580 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3581 for (int i = startIndex; i < endIndex; i++) {
3582 if (i > startIndex) {
3583 buf.append(separator);
3584 }
3585 buf.append(array[i]);
3586 }
3587 return buf.toString();
3588 }
3589
3590 /**
3591 * <p>
3592 * Joins the elements of the provided array into a single String containing the provided list of elements.
3593 * </p>
3594 *
3595 * <p>
3596 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3597 * by empty strings.
3598 * </p>
3599 *
3600 * <pre>
3601 * StringUtils.join(null, *) = null
3602 * StringUtils.join([], *) = ""
3603 * StringUtils.join([null], *) = ""
3604 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3605 * StringUtils.join([1, 2, 3], null) = "123"
3606 * </pre>
3607 *
3608 * @param array
3609 * the array of values to join together, may be null
3610 * @param separator
3611 * the separator character to use
3612 * @param startIndex
3613 * the first index to start joining from. It is an error to pass in an end index past the end of the
3614 * array
3615 * @param endIndex
3616 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3617 * the array
3618 * @return the joined String, {@code null} if null array input
3619 * @since 3.2
3620 */
3621 public static String join(final short[] array, final char separator, final int startIndex, final int endIndex) {
3622 if (array == null) {
3623 return null;
3624 }
3625 final int noOfItems = endIndex - startIndex;
3626 if (noOfItems <= 0) {
3627 return EMPTY;
3628 }
3629 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3630 for (int i = startIndex; i < endIndex; i++) {
3631 if (i > startIndex) {
3632 buf.append(separator);
3633 }
3634 buf.append(array[i]);
3635 }
3636 return buf.toString();
3637 }
3638
3639 /**
3640 * <p>
3641 * Joins the elements of the provided array into a single String containing the provided list of elements.
3642 * </p>
3643 *
3644 * <p>
3645 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3646 * by empty strings.
3647 * </p>
3648 *
3649 * <pre>
3650 * StringUtils.join(null, *) = null
3651 * StringUtils.join([], *) = ""
3652 * StringUtils.join([null], *) = ""
3653 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3654 * StringUtils.join([1, 2, 3], null) = "123"
3655 * </pre>
3656 *
3657 * @param array
3658 * the array of values to join together, may be null
3659 * @param separator
3660 * the separator character to use
3661 * @param startIndex
3662 * the first index to start joining from. It is an error to pass in an end index past the end of the
3663 * array
3664 * @param endIndex
3665 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3666 * the array
3667 * @return the joined String, {@code null} if null array input
3668 * @since 3.2
3669 */
3670 public static String join(final char[] array, final char separator, final int startIndex, final int endIndex) {
3671 if (array == null) {
3672 return null;
3673 }
3674 final int noOfItems = endIndex - startIndex;
3675 if (noOfItems <= 0) {
3676 return EMPTY;
3677 }
3678 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3679 for (int i = startIndex; i < endIndex; i++) {
3680 if (i > startIndex) {
3681 buf.append(separator);
3682 }
3683 buf.append(array[i]);
3684 }
3685 return buf.toString();
3686 }
3687
3688 /**
3689 * <p>
3690 * Joins the elements of the provided array into a single String containing the provided list of elements.
3691 * </p>
3692 *
3693 * <p>
3694 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3695 * by empty strings.
3696 * </p>
3697 *
3698 * <pre>
3699 * StringUtils.join(null, *) = null
3700 * StringUtils.join([], *) = ""
3701 * StringUtils.join([null], *) = ""
3702 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3703 * StringUtils.join([1, 2, 3], null) = "123"
3704 * </pre>
3705 *
3706 * @param array
3707 * the array of values to join together, may be null
3708 * @param separator
3709 * the separator character to use
3710 * @param startIndex
3711 * the first index to start joining from. It is an error to pass in an end index past the end of the
3712 * array
3713 * @param endIndex
3714 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3715 * the array
3716 * @return the joined String, {@code null} if null array input
3717 * @since 3.2
3718 */
3719 public static String join(final double[] array, final char separator, final int startIndex, final int endIndex) {
3720 if (array == null) {
3721 return null;
3722 }
3723 final int noOfItems = endIndex - startIndex;
3724 if (noOfItems <= 0) {
3725 return EMPTY;
3726 }
3727 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3728 for (int i = startIndex; i < endIndex; i++) {
3729 if (i > startIndex) {
3730 buf.append(separator);
3731 }
3732 buf.append(array[i]);
3733 }
3734 return buf.toString();
3735 }
3736
3737 /**
3738 * <p>
3739 * Joins the elements of the provided array into a single String containing the provided list of elements.
3740 * </p>
3741 *
3742 * <p>
3743 * No delimiter is added before or after the list. Null objects or empty strings within the array are represented
3744 * by empty strings.
3745 * </p>
3746 *
3747 * <pre>
3748 * StringUtils.join(null, *) = null
3749 * StringUtils.join([], *) = ""
3750 * StringUtils.join([null], *) = ""
3751 * StringUtils.join([1, 2, 3], ';') = "1;2;3"
3752 * StringUtils.join([1, 2, 3], null) = "123"
3753 * </pre>
3754 *
3755 * @param array
3756 * the array of values to join together, may be null
3757 * @param separator
3758 * the separator character to use
3759 * @param startIndex
3760 * the first index to start joining from. It is an error to pass in an end index past the end of the
3761 * array
3762 * @param endIndex
3763 * the index to stop joining from (exclusive). It is an error to pass in an end index past the end of
3764 * the array
3765 * @return the joined String, {@code null} if null array input
3766 * @since 3.2
3767 */
3768 public static String join(final float[] array, final char separator, final int startIndex, final int endIndex) {
3769 if (array == null) {
3770 return null;
3771 }
3772 final int noOfItems = endIndex - startIndex;
3773 if (noOfItems <= 0) {
3774 return EMPTY;
3775 }
3776 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3777 for (int i = startIndex; i < endIndex; i++) {
3778 if (i > startIndex) {
3779 buf.append(separator);
3780 }
3781 buf.append(array[i]);
3782 }
3783 return buf.toString();
3784 }
3785
3786
3787 /**
3788 * <p>Joins the elements of the provided array into a single String
3789 * containing the provided list of elements.</p>
3790 *
3791 * <p>No delimiter is added before or after the list.
3792 * A {@code null} separator is the same as an empty String ("").
3793 * Null objects or empty strings within the array are represented by
3794 * empty strings.</p>
3795 *
3796 * <pre>
3797 * StringUtils.join(null, *) = null
3798 * StringUtils.join([], *) = ""
3799 * StringUtils.join([null], *) = ""
3800 * StringUtils.join(["a", "b", "c"], "--") = "a--b--c"
3801 * StringUtils.join(["a", "b", "c"], null) = "abc"
3802 * StringUtils.join(["a", "b", "c"], "") = "abc"
3803 * StringUtils.join([null, "", "a"], ',') = ",,a"
3804 * </pre>
3805 *
3806 * @param array the array of values to join together, may be null
3807 * @param separator the separator character to use, null treated as ""
3808 * @return the joined String, {@code null} if null array input
3809 */
3810 public static String join(final Object[] array, final String separator) {
3811 if (array == null) {
3812 return null;
3813 }
3814 return join(array, separator, 0, array.length);
3815 }
3816
3817 /**
3818 * <p>Joins the elements of the provided array into a single String
3819 * containing the provided list of elements.</p>
3820 *
3821 * <p>No delimiter is added before or after the list.
3822 * A {@code null} separator is the same as an empty String ("").
3823 * Null objects or empty strings within the array are represented by
3824 * empty strings.</p>
3825 *
3826 * <pre>
3827 * StringUtils.join(null, *, *, *) = null
3828 * StringUtils.join([], *, *, *) = ""
3829 * StringUtils.join([null], *, *, *) = ""
3830 * StringUtils.join(["a", "b", "c"], "--", 0, 3) = "a--b--c"
3831 * StringUtils.join(["a", "b", "c"], "--", 1, 3) = "b--c"
3832 * StringUtils.join(["a", "b", "c"], "--", 2, 3) = "c"
3833 * StringUtils.join(["a", "b", "c"], "--", 2, 2) = ""
3834 * StringUtils.join(["a", "b", "c"], null, 0, 3) = "abc"
3835 * StringUtils.join(["a", "b", "c"], "", 0, 3) = "abc"
3836 * StringUtils.join([null, "", "a"], ',', 0, 3) = ",,a"
3837 * </pre>
3838 *
3839 * @param array the array of values to join together, may be null
3840 * @param separator the separator character to use, null treated as ""
3841 * @param startIndex the first index to start joining from.
3842 * @param endIndex the index to stop joining from (exclusive).
3843 * @return the joined String, {@code null} if null array input; or the empty string
3844 * if {@code endIndex - startIndex <= 0}. The number of joined entries is given by
3845 * {@code endIndex - startIndex}
3846 * @throws ArrayIndexOutOfBoundsException ife<br/>
3847 * {@code startIndex < 0} or <br/>
3848 * {@code startIndex >= array.length()} or <br/>
3849 * {@code endIndex < 0} or <br/>
3850 * {@code endIndex > array.length()}
3851 */
3852 public static String join(final Object[] array, String separator, final int startIndex, final int endIndex) {
3853 if (array == null) {
3854 return null;
3855 }
3856 if (separator == null) {
3857 separator = EMPTY;
3858 }
3859
3860 // endIndex - startIndex > 0: Len = NofStrings *(len(firstString) + len(separator))
3861 // (Assuming that all Strings are roughly equally long)
3862 final int noOfItems = endIndex - startIndex;
3863 if (noOfItems <= 0) {
3864 return EMPTY;
3865 }
3866
3867 final StringBuilder buf = new StringBuilder(noOfItems * 16);
3868
3869 for (int i = startIndex; i < endIndex; i++) {
3870 if (i > startIndex) {
3871 buf.append(separator);
3872 }
3873 if (array[i] != null) {
3874 buf.append(array[i]);
3875 }
3876 }
3877 return buf.toString();
3878 }
3879
3880 /**
3881 * <p>Joins the elements of the provided {@code Iterator} into
3882 * a single String containing the provided elements.</p>
3883 *
3884 * <p>No delimiter is added before or after the list. Null objects or empty
3885 * strings within the iteration are represented by empty strings.</p>
3886 *
3887 * <p>See the examples here: {@link #join(Object[],char)}. </p>
3888 *
3889 * @param iterator the {@code Iterator} of values to join together, may be null
3890 * @param separator the separator character to use
3891 * @return the joined String, {@code null} if null iterator input
3892 * @since 2.0
3893 */
3894 public static String join(final Iterator<?> iterator, final char separator) {
3895
3896 // handle null, zero and one elements before building a buffer
3897 if (iterator == null) {
3898 return null;
3899 }
3900 if (!iterator.hasNext()) {
3901 return EMPTY;
3902 }
3903 final Object first = iterator.next();
3904 if (!iterator.hasNext()) {
3905 return ObjectUtils.toString(first);
3906 }
3907
3908 // two or more elements
3909 final StringBuilder buf = new StringBuilder(256); // Java default is 16, probably too small
3910 if (first != null) {
3911 buf.append(first);
3912 }
3913
3914 while (iterator.hasNext()) {
3915 buf.append(separator);
3916 final Object obj = iterator.next();
3917 if (obj != null) {
3918 buf.append(obj);
3919 }
3920 }
3921
3922 return buf.toString();
3923 }
3924
3925 /**
3926 * <p>Joins the elements of the provided {@code Iterator} into
3927 * a single String containing the provided elements.</p>
3928 *
3929 * <p>No delimiter is added before or after the list.
3930 * A {@code null} separator is the same as an empty String ("").</p>
3931 *
3932 * <p>See the examples here: {@link #join(Object[],String)}. </p>
3933 *
3934 * @param iterator the {@code Iterator} of values to join together, may be null
3935 * @param separator the separator character to use, null treated as ""
3936 * @return the joined String, {@code null} if null iterator input
3937 */
3938 public static String join(final Iterator<?> iterator, final String separator) {
3939
3940 // handle null, zero and one elements before building a buffer
3941 if (iterator == null) {
3942 return null;
3943 }
3944 if (!iterator.hasNext()) {
3945 return EMPTY;
3946 }
3947 final Object first = iterator.next();
3948 if (!iterator.hasNext()) {
3949 return ObjectUtils.toString(first);
3950 }
3951
3952 // two or more elements
3953 final StringBuilder buf = new StringBuilder(256); // Java default is 16, probably too small
3954 if (first != null) {
3955 buf.append(first);
3956 }
3957
3958 while (iterator.hasNext()) {
3959 if (separator != null) {
3960 buf.append(separator);
3961 }
3962 final Object obj = iterator.next();
3963 if (obj != null) {
3964 buf.append(obj);
3965 }
3966 }
3967 return buf.toString();
3968 }
3969
3970 /**
3971 * <p>Joins the elements of the provided {@code Iterable} into
3972 * a single String containing the provided elements.</p>
3973 *
3974 * <p>No delimiter is added before or after the list. Null objects or empty
3975 * strings within the iteration are represented by empty strings.</p>
3976 *
3977 * <p>See the examples here: {@link #join(Object[],char)}. </p>
3978 *
3979 * @param iterable the {@code Iterable} providing the values to join together, may be null
3980 * @param separator the separator character to use
3981 * @return the joined String, {@code null} if null iterator input
3982 * @since 2.3
3983 */
3984 public static String join(final Iterable<?> iterable, final char separator) {
3985 if (iterable == null) {
3986 return null;
3987 }
3988 return join(iterable.iterator(), separator);
3989 }
3990
3991 /**
3992 * <p>Joins the elements of the provided {@code Iterable} into
3993 * a single String containing the provided elements.</p>
3994 *
3995 * <p>No delimiter is added before or after the list.
3996 * A {@code null} separator is the same as an empty String ("").</p>
3997 *
3998 * <p>See the examples here: {@link #join(Object[],String)}. </p>
3999 *
4000 * @param iterable the {@code Iterable} providing the values to join together, may be null
4001 * @param separator the separator character to use, null treated as ""
4002 * @return the joined String, {@code null} if null iterator input
4003 * @since 2.3
4004 */
4005 public static String join(final Iterable<?> iterable, final String separator) {
4006 if (iterable == null) {
4007 return null;
4008 }
4009 return join(iterable.iterator(), separator);
4010 }
4011
4012 // Delete
4013 //-----------------------------------------------------------------------
4014 /**
4015 * <p>Deletes all whitespaces from a String as defined by
4016 * {@link Character#isWhitespace(char)}.</p>
4017 *
4018 * <pre>
4019 * StringUtils.deleteWhitespace(null) = null
4020 * StringUtils.deleteWhitespace("") = ""
4021 * StringUtils.deleteWhitespace("abc") = "abc"
4022 * StringUtils.deleteWhitespace(" ab c ") = "abc"
4023 * </pre>
4024 *
4025 * @param str the String to delete whitespace from, may be null
4026 * @return the String without whitespaces, {@code null} if null String input
4027 */
4028 public static String deleteWhitespace(final String str) {
4029 if (isEmpty(str)) {
4030 return str;
4031 }
4032 final int sz = str.length();
4033 final char[] chs = new char[sz];
4034 int count = 0;
4035 for (int i = 0; i < sz; i++) {
4036 if (!Character.isWhitespace(str.charAt(i))) {
4037 chs[count++] = str.charAt(i);
4038 }
4039 }
4040 if (count == sz) {
4041 return str;
4042 }
4043 return new String(chs, 0, count);
4044 }
4045
4046 // Remove
4047 //-----------------------------------------------------------------------
4048 /**
4049 * <p>Removes a substring only if it is at the beginning of a source string,
4050 * otherwise returns the source string.</p>
4051 *
4052 * <p>A {@code null} source string will return {@code null}.
4053 * An empty ("") source string will return the empty string.
4054 * A {@code null} search string will return the source string.</p>
4055 *
4056 * <pre>
4057 * StringUtils.removeStart(null, *) = null
4058 * StringUtils.removeStart("", *) = ""
4059 * StringUtils.removeStart(*, null) = *
4060 * StringUtils.removeStart("www.domain.com", "www.") = "domain.com"
4061 * StringUtils.removeStart("domain.com", "www.") = "domain.com"
4062 * StringUtils.removeStart("www.domain.com", "domain") = "www.domain.com"
4063 * StringUtils.removeStart("abc", "") = "abc"
4064 * </pre>
4065 *
4066 * @param str the source String to search, may be null
4067 * @param remove the String to search for and remove, may be null
4068 * @return the substring with the string removed if found,
4069 * {@code null} if null String input
4070 * @since 2.1
4071 */
4072 public static String removeStart(final String str, final String remove) {
4073 if (isEmpty(str) || isEmpty(remove)) {
4074 return str;
4075 }
4076 if (str.startsWith(remove)){
4077 return str.substring(remove.length());
4078 }
4079 return str;
4080 }
4081
4082 /**
4083 * <p>Case insensitive removal of a substring if it is at the beginning of a source string,
4084 * otherwise returns the source string.</p>
4085 *
4086 * <p>A {@code null} source string will return {@code null}.
4087 * An empty ("") source string will return the empty string.
4088 * A {@code null} search string will return the source string.</p>
4089 *
4090 * <pre>
4091 * StringUtils.removeStartIgnoreCase(null, *) = null
4092 * StringUtils.removeStartIgnoreCase("", *) = ""
4093 * StringUtils.removeStartIgnoreCase(*, null) = *
4094 * StringUtils.removeStartIgnoreCase("www.domain.com", "www.") = "domain.com"
4095 * StringUtils.removeStartIgnoreCase("www.domain.com", "WWW.") = "domain.com"
4096 * StringUtils.removeStartIgnoreCase("domain.com", "www.") = "domain.com"
4097 * StringUtils.removeStartIgnoreCase("www.domain.com", "domain") = "www.domain.com"
4098 * StringUtils.removeStartIgnoreCase("abc", "") = "abc"
4099 * </pre>
4100 *
4101 * @param str the source String to search, may be null
4102 * @param remove the String to search for (case insensitive) and remove, may be null
4103 * @return the substring with the string removed if found,
4104 * {@code null} if null String input
4105 * @since 2.4
4106 */
4107 public static String removeStartIgnoreCase(final String str, final String remove) {
4108 if (isEmpty(str) || isEmpty(remove)) {
4109 return str;
4110 }
4111 if (startsWithIgnoreCase(str, remove)) {
4112 return str.substring(remove.length());
4113 }
4114 return str;
4115 }
4116
4117 /**
4118 * <p>Removes a substring only if it is at the end of a source string,
4119 * otherwise returns the source string.</p>
4120 *
4121 * <p>A {@code null} source string will return {@code null}.
4122 * An empty ("") source string will return the empty string.
4123 * A {@code null} search string will return the source string.</p>
4124 *
4125 * <pre>
4126 * StringUtils.removeEnd(null, *) = null
4127 * StringUtils.removeEnd("", *) = ""
4128 * StringUtils.removeEnd(*, null) = *
4129 * StringUtils.removeEnd("www.domain.com", ".com.") = "www.domain.com"
4130 * StringUtils.removeEnd("www.domain.com", ".com") = "www.domain"
4131 * StringUtils.removeEnd("www.domain.com", "domain") = "www.domain.com"
4132 * StringUtils.removeEnd("abc", "") = "abc"
4133 * </pre>
4134 *
4135 * @param str the source String to search, may be null
4136 * @param remove the String to search for and remove, may be null
4137 * @return the substring with the string removed if found,
4138 * {@code null} if null String input
4139 * @since 2.1
4140 */
4141 public static String removeEnd(final String str, final String remove) {
4142 if (isEmpty(str) || isEmpty(remove)) {
4143 return str;
4144 }
4145 if (str.endsWith(remove)) {
4146 return str.substring(0, str.length() - remove.length());
4147 }
4148 return str;
4149 }
4150
4151 /**
4152 * <p>Case insensitive removal of a substring if it is at the end of a source string,
4153 * otherwise returns the source string.</p>
4154 *
4155 * <p>A {@code null} source string will return {@code null}.
4156 * An empty ("") source string will return the empty string.
4157 * A {@code null} search string will return the source string.</p>
4158 *
4159 * <pre>
4160 * StringUtils.removeEndIgnoreCase(null, *) = null
4161 * StringUtils.removeEndIgnoreCase("", *) = ""
4162 * StringUtils.removeEndIgnoreCase(*, null) = *
4163 * StringUtils.removeEndIgnoreCase("www.domain.com", ".com.") = "www.domain.com"
4164 * StringUtils.removeEndIgnoreCase("www.domain.com", ".com") = "www.domain"
4165 * StringUtils.removeEndIgnoreCase("www.domain.com", "domain") = "www.domain.com"
4166 * StringUtils.removeEndIgnoreCase("abc", "") = "abc"
4167 * StringUtils.removeEndIgnoreCase("www.domain.com", ".COM") = "www.domain")
4168 * StringUtils.removeEndIgnoreCase("www.domain.COM", ".com") = "www.domain")
4169 * </pre>
4170 *
4171 * @param str the source String to search, may be null
4172 * @param remove the String to search for (case insensitive) and remove, may be null
4173 * @return the substring with the string removed if found,
4174 * {@code null} if null String input
4175 * @since 2.4
4176 */
4177 public static String removeEndIgnoreCase(final String str, final String remove) {
4178 if (isEmpty(str) || isEmpty(remove)) {
4179 return str;
4180 }
4181 if (endsWithIgnoreCase(str, remove)) {
4182 return str.substring(0, str.length() - remove.length());
4183 }
4184 return str;
4185 }
4186
4187 /**
4188 * <p>Removes all occurrences of a substring from within the source string.</p>
4189 *
4190 * <p>A {@code null} source string will return {@code null}.
4191 * An empty ("") source string will return the empty string.
4192 * A {@code null} remove string will return the source string.
4193 * An empty ("") remove string will return the source string.</p>
4194 *
4195 * <pre>
4196 * StringUtils.remove(null, *) = null
4197 * StringUtils.remove("", *) = ""
4198 * StringUtils.remove(*, null) = *
4199 * StringUtils.remove(*, "") = *
4200 * StringUtils.remove("queued", "ue") = "qd"
4201 * StringUtils.remove("queued", "zz") = "queued"
4202 * </pre>
4203 *
4204 * @param str the source String to search, may be null
4205 * @param remove the String to search for and remove, may be null
4206 * @return the substring with the string removed if found,
4207 * {@code null} if null String input
4208 * @since 2.1
4209 */
4210 public static String remove(final String str, final String remove) {
4211 if (isEmpty(str) || isEmpty(remove)) {
4212 return str;
4213 }
4214 return replace(str, remove, EMPTY, -1);
4215 }
4216
4217 /**
4218 * <p>Removes all occurrences of a character from within the source string.</p>
4219 *
4220 * <p>A {@code null} source string will return {@code null}.
4221 * An empty ("") source string will return the empty string.</p>
4222 *
4223 * <pre>
4224 * StringUtils.remove(null, *) = null
4225 * StringUtils.remove("", *) = ""
4226 * StringUtils.remove("queued", 'u') = "qeed"
4227 * StringUtils.remove("queued", 'z') = "queued"
4228 * </pre>
4229 *
4230 * @param str the source String to search, may be null
4231 * @param remove the char to search for and remove, may be null
4232 * @return the substring with the char removed if found,
4233 * {@code null} if null String input
4234 * @since 2.1
4235 */
4236 public static String remove(final String str, final char remove) {
4237 if (isEmpty(str) || str.indexOf(remove) == INDEX_NOT_FOUND) {
4238 return str;
4239 }
4240 final char[] chars = str.toCharArray();
4241 int pos = 0;
4242 for (int i = 0; i < chars.length; i++) {
4243 if (chars[i] != remove) {
4244 chars[pos++] = chars[i];
4245 }
4246 }
4247 return new String(chars, 0, pos);
4248 }
4249
4250 // Replacing
4251 //-----------------------------------------------------------------------
4252 /**
4253 * <p>Replaces a String with another String inside a larger String, once.</p>
4254 *
4255 * <p>A {@code null} reference passed to this method is a no-op.</p>
4256 *
4257 * <pre>
4258 * StringUtils.replaceOnce(null, *, *) = null
4259 * StringUtils.replaceOnce("", *, *) = ""
4260 * StringUtils.replaceOnce("any", null, *) = "any"
4261 * StringUtils.replaceOnce("any", *, null) = "any"
4262 * StringUtils.replaceOnce("any", "", *) = "any"
4263 * StringUtils.replaceOnce("aba", "a", null) = "aba"
4264 * StringUtils.replaceOnce("aba", "a", "") = "ba"
4265 * StringUtils.replaceOnce("aba", "a", "z") = "zba"
4266 * </pre>
4267 *
4268 * @see #replace(String text, String searchString, String replacement, int max)
4269 * @param text text to search and replace in, may be null
4270 * @param searchString the String to search for, may be null
4271 * @param replacement the String to replace with, may be null
4272 * @return the text with any replacements processed,
4273 * {@code null} if null String input
4274 */
4275 public static String replaceOnce(final String text, final String searchString, final String replacement) {
4276 return replace(text, searchString, replacement, 1);
4277 }
4278
4279 /**
4280 * Replaces each substring of the source String that matches the given regular expression with the given
4281 * replacement using the {@link Pattern#DOTALL} option. DOTALL is also know as single-line mode in Perl. This call
4282 * is also equivalent to:
4283 * <ul>
4284 * <li>{@code source.replaceAll("(?s)" + regex, replacement)}</li>
4285 * <li>{@code Pattern.compile(regex, Pattern.DOTALL).matcher(source).replaceAll(replacement)}</li>
4286 * </ul>
4287 *
4288 * @param source
4289 * the source string
4290 * @param regex
4291 * the regular expression to which this string is to be matched
4292 * @param replacement
4293 * the string to be substituted for each match
4294 * @return The resulting {@code String}
4295 * @see String#replaceAll(String, String)
4296 * @see Pattern#DOTALL
4297 * @since 3.2
4298 */
4299 public static String replacePattern(final String source, final String regex, final String replacement) {
4300 return Pattern.compile(regex, Pattern.DOTALL).matcher(source).replaceAll(replacement);
4301 }
4302
4303 /**
4304 * Removes each substring of the source String that matches the given regular expression using the DOTALL option.
4305 *
4306 * @param source
4307 * the source string
4308 * @param regex
4309 * the regular expression to which this string is to be matched
4310 * @return The resulting {@code String}
4311 * @see String#replaceAll(String, String)
4312 * @see Pattern#DOTALL
4313 * @since 3.2
4314 */
4315 public static String removePattern(final String source, final String regex) {
4316 return replacePattern(source, regex, StringUtils.EMPTY);
4317 }
4318
4319 /**
4320 * <p>Replaces all occurrences of a String within another String.</p>
4321 *
4322 * <p>A {@code null} reference passed to this method is a no-op.</p>
4323 *
4324 * <pre>
4325 * StringUtils.replace(null, *, *) = null
4326 * StringUtils.replace("", *, *) = ""
4327 * StringUtils.replace("any", null, *) = "any"
4328 * StringUtils.replace("any", *, null) = "any"
4329 * StringUtils.replace("any", "", *) = "any"
4330 * StringUtils.replace("aba", "a", null) = "aba"
4331 * StringUtils.replace("aba", "a", "") = "b"
4332 * StringUtils.replace("aba", "a", "z") = "zbz"
4333 * </pre>
4334 *
4335 * @see #replace(String text, String searchString, String replacement, int max)
4336 * @param text text to search and replace in, may be null
4337 * @param searchString the String to search for, may be null
4338 * @param replacement the String to replace it with, may be null
4339 * @return the text with any replacements processed,
4340 * {@code null} if null String input
4341 */
4342 public static String replace(final String text, final String searchString, final String replacement) {
4343 return replace(text, searchString, replacement, -1);
4344 }
4345
4346 /**
4347 * <p>Replaces a String with another String inside a larger String,
4348 * for the first {@code max} values of the search String.</p>
4349 *
4350 * <p>A {@code null} reference passed to this method is a no-op.</p>
4351 *
4352 * <pre>
4353 * StringUtils.replace(null, *, *, *) = null
4354 * StringUtils.replace("", *, *, *) = ""
4355 * StringUtils.replace("any", null, *, *) = "any"
4356 * StringUtils.replace("any", *, null, *) = "any"
4357 * StringUtils.replace("any", "", *, *) = "any"
4358 * StringUtils.replace("any", *, *, 0) = "any"
4359 * StringUtils.replace("abaa", "a", null, -1) = "abaa"
4360 * StringUtils.replace("abaa", "a", "", -1) = "b"
4361 * StringUtils.replace("abaa", "a", "z", 0) = "abaa"
4362 * StringUtils.replace("abaa", "a", "z", 1) = "zbaa"
4363 * StringUtils.replace("abaa", "a", "z", 2) = "zbza"
4364 * StringUtils.replace("abaa", "a", "z", -1) = "zbzz"
4365 * </pre>
4366 *
4367 * @param text text to search and replace in, may be null
4368 * @param searchString the String to search for, may be null
4369 * @param replacement the String to replace it with, may be null
4370 * @param max maximum number of values to replace, or {@code -1} if no maximum
4371 * @return the text with any replacements processed,
4372 * {@code null} if null String input
4373 */
4374 public static String replace(final String text, final String searchString, final String replacement, int max) {
4375 if (isEmpty(text) || isEmpty(searchString) || replacement == null || max == 0) {
4376 return text;
4377 }
4378 int start = 0;
4379 int end = text.indexOf(searchString, start);
4380 if (end == INDEX_NOT_FOUND) {
4381 return text;
4382 }
4383 final int replLength = searchString.length();
4384 int increase = replacement.length() - replLength;
4385 increase = increase < 0 ? 0 : increase;
4386 increase *= max < 0 ? 16 : max > 64 ? 64 : max;
4387 final StringBuilder buf = new StringBuilder(text.length() + increase);
4388 while (end != INDEX_NOT_FOUND) {
4389 buf.append(text.substring(start, end)).append(replacement);
4390 start = end + replLength;
4391 if (--max == 0) {
4392 break;
4393 }
4394 end = text.indexOf(searchString, start);
4395 }
4396 buf.append(text.substring(start));
4397 return buf.toString();
4398 }
4399
4400 /**
4401 * <p>
4402 * Replaces all occurrences of Strings within another String.
4403 * </p>
4404 *
4405 * <p>
4406 * A {@code null} reference passed to this method is a no-op, or if
4407 * any "search string" or "string to replace" is null, that replace will be
4408 * ignored. This will not repeat. For repeating replaces, call the
4409 * overloaded method.
4410 * </p>
4411 *
4412 * <pre>
4413 * StringUtils.replaceEach(null, *, *) = null
4414 * StringUtils.replaceEach("", *, *) = ""
4415 * StringUtils.replaceEach("aba", null, null) = "aba"
4416 * StringUtils.replaceEach("aba", new String[0], null) = "aba"
4417 * StringUtils.replaceEach("aba", null, new String[0]) = "aba"
4418 * StringUtils.replaceEach("aba", new String[]{"a"}, null) = "aba"
4419 * StringUtils.replaceEach("aba", new String[]{"a"}, new String[]{""}) = "b"
4420 * StringUtils.replaceEach("aba", new String[]{null}, new String[]{"a"}) = "aba"
4421 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"w", "t"}) = "wcte"
4422 * (example of how it does not repeat)
4423 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "t"}) = "dcte"
4424 * </pre>
4425 *
4426 * @param text
4427 * text to search and replace in, no-op if null
4428 * @param searchList
4429 * the Strings to search for, no-op if null
4430 * @param replacementList
4431 * the Strings to replace them with, no-op if null
4432 * @return the text with any replacements processed, {@code null} if
4433 * null String input
4434 * @throws IllegalArgumentException
4435 * if the lengths of the arrays are not the same (null is ok,
4436 * and/or size 0)
4437 * @since 2.4
4438 */
4439 public static String replaceEach(final String text, final String[] searchList, final String[] replacementList) {
4440 return replaceEach(text, searchList, replacementList, false, 0);
4441 }
4442
4443 /**
4444 * <p>
4445 * Replaces all occurrences of Strings within another String.
4446 * </p>
4447 *
4448 * <p>
4449 * A {@code null} reference passed to this method is a no-op, or if
4450 * any "search string" or "string to replace" is null, that replace will be
4451 * ignored.
4452 * </p>
4453 *
4454 * <pre>
4455 * StringUtils.replaceEach(null, *, *, *) = null
4456 * StringUtils.replaceEach("", *, *, *) = ""
4457 * StringUtils.replaceEach("aba", null, null, *) = "aba"
4458 * StringUtils.replaceEach("aba", new String[0], null, *) = "aba"
4459 * StringUtils.replaceEach("aba", null, new String[0], *) = "aba"
4460 * StringUtils.replaceEach("aba", new String[]{"a"}, null, *) = "aba"
4461 * StringUtils.replaceEach("aba", new String[]{"a"}, new String[]{""}, *) = "b"
4462 * StringUtils.replaceEach("aba", new String[]{null}, new String[]{"a"}, *) = "aba"
4463 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"w", "t"}, *) = "wcte"
4464 * (example of how it repeats)
4465 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "t"}, false) = "dcte"
4466 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "t"}, true) = "tcte"
4467 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "ab"}, true) = IllegalStateException
4468 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "ab"}, false) = "dcabe"
4469 * </pre>
4470 *
4471 * @param text
4472 * text to search and replace in, no-op if null
4473 * @param searchList
4474 * the Strings to search for, no-op if null
4475 * @param replacementList
4476 * the Strings to replace them with, no-op if null
4477 * @return the text with any replacements processed, {@code null} if
4478 * null String input
4479 * @throws IllegalStateException
4480 * if the search is repeating and there is an endless loop due
4481 * to outputs of one being inputs to another
4482 * @throws IllegalArgumentException
4483 * if the lengths of the arrays are not the same (null is ok,
4484 * and/or size 0)
4485 * @since 2.4
4486 */
4487 public static String replaceEachRepeatedly(final String text, final String[] searchList, final String[] replacementList) {
4488 // timeToLive should be 0 if not used or nothing to replace, else it's
4489 // the length of the replace array
4490 final int timeToLive = searchList == null ? 0 : searchList.length;
4491 return replaceEach(text, searchList, replacementList, true, timeToLive);
4492 }
4493
4494 /**
4495 * <p>
4496 * Replaces all occurrences of Strings within another String.
4497 * </p>
4498 *
4499 * <p>
4500 * A {@code null} reference passed to this method is a no-op, or if
4501 * any "search string" or "string to replace" is null, that replace will be
4502 * ignored.
4503 * </p>
4504 *
4505 * <pre>
4506 * StringUtils.replaceEach(null, *, *, *) = null
4507 * StringUtils.replaceEach("", *, *, *) = ""
4508 * StringUtils.replaceEach("aba", null, null, *) = "aba"
4509 * StringUtils.replaceEach("aba", new String[0], null, *) = "aba"
4510 * StringUtils.replaceEach("aba", null, new String[0], *) = "aba"
4511 * StringUtils.replaceEach("aba", new String[]{"a"}, null, *) = "aba"
4512 * StringUtils.replaceEach("aba", new String[]{"a"}, new String[]{""}, *) = "b"
4513 * StringUtils.replaceEach("aba", new String[]{null}, new String[]{"a"}, *) = "aba"
4514 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"w", "t"}, *) = "wcte"
4515 * (example of how it repeats)
4516 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "t"}, false) = "dcte"
4517 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "t"}, true) = "tcte"
4518 * StringUtils.replaceEach("abcde", new String[]{"ab", "d"}, new String[]{"d", "ab"}, *) = IllegalStateException
4519 * </pre>
4520 *
4521 * @param text
4522 * text to search and replace in, no-op if null
4523 * @param searchList
4524 * the Strings to search for, no-op if null
4525 * @param replacementList
4526 * the Strings to replace them with, no-op if null
4527 * @param repeat if true, then replace repeatedly
4528 * until there are no more possible replacements or timeToLive < 0
4529 * @param timeToLive
4530 * if less than 0 then there is a circular reference and endless
4531 * loop
4532 * @return the text with any replacements processed, {@code null} if
4533 * null String input
4534 * @throws IllegalStateException
4535 * if the search is repeating and there is an endless loop due
4536 * to outputs of one being inputs to another
4537 * @throws IllegalArgumentException
4538 * if the lengths of the arrays are not the same (null is ok,
4539 * and/or size 0)
4540 * @since 2.4
4541 */
4542 private static String replaceEach(
4543 final String text, final String[] searchList, final String[] replacementList, final boolean repeat, final int timeToLive) {
4544
4545 // mchyzer Performance note: This creates very few new objects (one major goal)
4546 // let me know if there are performance requests, we can create a harness to measure
4547
4548 if (text == null || text.length() == 0 || searchList == null ||
4549 searchList.length == 0 || replacementList == null || replacementList.length == 0) {
4550 return text;
4551 }
4552
4553 // if recursing, this shouldn't be less than 0
4554 if (timeToLive < 0) {
4555 throw new IllegalStateException("Aborting to protect against StackOverflowError - " +
4556 "output of one loop is the input of another");
4557 }
4558
4559 final int searchLength = searchList.length;
4560 final int replacementLength = replacementList.length;
4561
4562 // make sure lengths are ok, these need to be equal
4563 if (searchLength != replacementLength) {
4564 throw new IllegalArgumentException("Search and Replace array lengths don't match: "
4565 + searchLength
4566 + " vs "
4567 + replacementLength);
4568 }
4569
4570 // keep track of which still have matches
4571 final boolean[] noMoreMatchesForReplIndex = new boolean[searchLength];
4572
4573 // index on index that the match was found
4574 int textIndex = -1;
4575 int replaceIndex = -1;
4576 int tempIndex = -1;
4577
4578 // index of replace array that will replace the search string found
4579 // NOTE: logic duplicated below START
4580 for (int i = 0; i < searchLength; i++) {
4581 if (noMoreMatchesForReplIndex[i] || searchList[i] == null ||
4582 searchList[i].length() == 0 || replacementList[i] == null) {
4583 continue;
4584 }
4585 tempIndex = text.indexOf(searchList[i]);
4586
4587 // see if we need to keep searching for this
4588 if (tempIndex == -1) {
4589 noMoreMatchesForReplIndex[i] = true;
4590 } else {
4591 if (textIndex == -1 || tempIndex < textIndex) {
4592 textIndex = tempIndex;
4593 replaceIndex = i;
4594 }
4595 }
4596 }
4597 // NOTE: logic mostly below END
4598
4599 // no search strings found, we are done
4600 if (textIndex == -1) {
4601 return text;
4602 }
4603
4604 int start = 0;
4605
4606 // get a good guess on the size of the result buffer so it doesn't have to double if it goes over a bit
4607 int increase = 0;
4608
4609 // count the replacement text elements that are larger than their corresponding text being replaced
4610 for (int i = 0; i < searchList.length; i++) {
4611 if (searchList[i] == null || replacementList[i] == null) {
4612 continue;
4613 }
4614 final int greater = replacementList[i].length() - searchList[i].length();
4615 if (greater > 0) {
4616 increase += 3 * greater; // assume 3 matches
4617 }
4618 }
4619 // have upper-bound at 20% increase, then let Java take over
4620 increase = Math.min(increase, text.length() / 5);
4621
4622 final StringBuilder buf = new StringBuilder(text.length() + increase);
4623
4624 while (textIndex != -1) {
4625
4626 for (int i = start; i < textIndex; i++) {
4627 buf.append(text.charAt(i));
4628 }
4629 buf.append(replacementList[replaceIndex]);
4630
4631 start = textIndex + searchList[replaceIndex].length();
4632
4633 textIndex = -1;
4634 replaceIndex = -1;
4635 tempIndex = -1;
4636 // find the next earliest match
4637 // NOTE: logic mostly duplicated above START
4638 for (int i = 0; i < searchLength; i++) {
4639 if (noMoreMatchesForReplIndex[i] || searchList[i] == null ||
4640 searchList[i].length() == 0 || replacementList[i] == null) {
4641 continue;
4642 }
4643 tempIndex = text.indexOf(searchList[i], start);
4644
4645 // see if we need to keep searching for this
4646 if (tempIndex == -1) {
4647 noMoreMatchesForReplIndex[i] = true;
4648 } else {
4649 if (textIndex == -1 || tempIndex < textIndex) {
4650 textIndex = tempIndex;
4651 replaceIndex = i;
4652 }
4653 }
4654 }
4655 // NOTE: logic duplicated above END
4656
4657 }
4658 final int textLength = text.length();
4659 for (int i = start; i < textLength; i++) {
4660 buf.append(text.charAt(i));
4661 }
4662 final String result = buf.toString();
4663 if (!repeat) {
4664 return result;
4665 }
4666
4667 return replaceEach(result, searchList, replacementList, repeat, timeToLive - 1);
4668 }
4669
4670 // Replace, character based
4671 //-----------------------------------------------------------------------
4672 /**
4673 * <p>Replaces all occurrences of a character in a String with another.
4674 * This is a null-safe version of {@link String#replace(char, char)}.</p>
4675 *
4676 * <p>A {@code null} string input returns {@code null}.
4677 * An empty ("") string input returns an empty string.</p>
4678 *
4679 * <pre>
4680 * StringUtils.replaceChars(null, *, *) = null
4681 * StringUtils.replaceChars("", *, *) = ""
4682 * StringUtils.replaceChars("abcba", 'b', 'y') = "aycya"
4683 * StringUtils.replaceChars("abcba", 'z', 'y') = "abcba"
4684 * </pre>
4685 *
4686 * @param str String to replace characters in, may be null
4687 * @param searchChar the character to search for, may be null
4688 * @param replaceChar the character to replace, may be null
4689 * @return modified String, {@code null} if null string input
4690 * @since 2.0
4691 */
4692 public static String replaceChars(final String str, final char searchChar, final char replaceChar) {
4693 if (str == null) {
4694 return null;
4695 }
4696 return str.replace(searchChar, replaceChar);
4697 }
4698
4699 /**
4700 * <p>Replaces multiple characters in a String in one go.
4701 * This method can also be used to delete characters.</p>
4702 *
4703 * <p>For example:<br />
4704 * <code>replaceChars("hello", "ho", "jy") = jelly</code>.</p>
4705 *
4706 * <p>A {@code null} string input returns {@code null}.
4707 * An empty ("") string input returns an empty string.
4708 * A null or empty set of search characters returns the input string.</p>
4709 *
4710 * <p>The length of the search characters should normally equal the length
4711 * of the replace characters.
4712 * If the search characters is longer, then the extra search characters
4713 * are deleted.
4714 * If the search characters is shorter, then the extra replace characters
4715 * are ignored.</p>
4716 *
4717 * <pre>
4718 * StringUtils.replaceChars(null, *, *) = null
4719 * StringUtils.replaceChars("", *, *) = ""
4720 * StringUtils.replaceChars("abc", null, *) = "abc"
4721 * StringUtils.replaceChars("abc", "", *) = "abc"
4722 * StringUtils.replaceChars("abc", "b", null) = "ac"
4723 * StringUtils.replaceChars("abc", "b", "") = "ac"
4724 * StringUtils.replaceChars("abcba", "bc", "yz") = "ayzya"
4725 * StringUtils.replaceChars("abcba", "bc", "y") = "ayya"
4726 * StringUtils.replaceChars("abcba", "bc", "yzx") = "ayzya"
4727 * </pre>
4728 *
4729 * @param str String to replace characters in, may be null
4730 * @param searchChars a set of characters to search for, may be null
4731 * @param replaceChars a set of characters to replace, may be null
4732 * @return modified String, {@code null} if null string input
4733 * @since 2.0
4734 */
4735 public static String replaceChars(final String str, final String searchChars, String replaceChars) {
4736 if (isEmpty(str) || isEmpty(searchChars)) {
4737 return str;
4738 }
4739 if (replaceChars == null) {
4740 replaceChars = EMPTY;
4741 }
4742 boolean modified = false;
4743 final int replaceCharsLength = replaceChars.length();
4744 final int strLength = str.length();
4745 final StringBuilder buf = new StringBuilder(strLength);
4746 for (int i = 0; i < strLength; i++) {
4747 final char ch = str.charAt(i);
4748 final int index = searchChars.indexOf(ch);
4749 if (index >= 0) {
4750 modified = true;
4751 if (index < replaceCharsLength) {
4752 buf.append(replaceChars.charAt(index));
4753 }
4754 } else {
4755 buf.append(ch);
4756 }
4757 }
4758 if (modified) {
4759 return buf.toString();
4760 }
4761 return str;
4762 }
4763
4764 // Overlay
4765 //-----------------------------------------------------------------------
4766 /**
4767 * <p>Overlays part of a String with another String.</p>
4768 *
4769 * <p>A {@code null} string input returns {@code null}.
4770 * A negative index is treated as zero.
4771 * An index greater than the string length is treated as the string length.
4772 * The start index is always the smaller of the two indices.</p>
4773 *
4774 * <pre>
4775 * StringUtils.overlay(null, *, *, *) = null
4776 * StringUtils.overlay("", "abc", 0, 0) = "abc"
4777 * StringUtils.overlay("abcdef", null, 2, 4) = "abef"
4778 * StringUtils.overlay("abcdef", "", 2, 4) = "abef"
4779 * StringUtils.overlay("abcdef", "", 4, 2) = "abef"
4780 * StringUtils.overlay("abcdef", "zzzz", 2, 4) = "abzzzzef"
4781 * StringUtils.overlay("abcdef", "zzzz", 4, 2) = "abzzzzef"
4782 * StringUtils.overlay("abcdef", "zzzz", -1, 4) = "zzzzef"
4783 * StringUtils.overlay("abcdef", "zzzz", 2, 8) = "abzzzz"
4784 * StringUtils.overlay("abcdef", "zzzz", -2, -3) = "zzzzabcdef"
4785 * StringUtils.overlay("abcdef", "zzzz", 8, 10) = "abcdefzzzz"
4786 * </pre>
4787 *
4788 * @param str the String to do overlaying in, may be null
4789 * @param overlay the String to overlay, may be null
4790 * @param start the position to start overlaying at
4791 * @param end the position to stop overlaying before
4792 * @return overlayed String, {@code null} if null String input
4793 * @since 2.0
4794 */
4795 public static String overlay(final String str, String overlay, int start, int end) {
4796 if (str == null) {
4797 return null;
4798 }
4799 if (overlay == null) {
4800 overlay = EMPTY;
4801 }
4802 final int len = str.length();
4803 if (start < 0) {
4804 start = 0;
4805 }
4806 if (start > len) {
4807 start = len;
4808 }
4809 if (end < 0) {
4810 end = 0;
4811 }
4812 if (end > len) {
4813 end = len;
4814 }
4815 if (start > end) {
4816 final int temp = start;
4817 start = end;
4818 end = temp;
4819 }
4820 return new StringBuilder(len + start - end + overlay.length() + 1)
4821 .append(str.substring(0, start))
4822 .append(overlay)
4823 .append(str.substring(end))
4824 .toString();
4825 }
4826
4827 // Chomping
4828 //-----------------------------------------------------------------------
4829 /**
4830 * <p>Removes one newline from end of a String if it's there,
4831 * otherwise leave it alone. A newline is "{@code \n}",
4832 * "{@code \r}", or "{@code \r\n}".</p>
4833 *
4834 * <p>NOTE: This method changed in 2.0.
4835 * It now more closely matches Perl chomp.</p>
4836 *
4837 * <pre>
4838 * StringUtils.chomp(null) = null
4839 * StringUtils.chomp("") = ""
4840 * StringUtils.chomp("abc \r") = "abc "
4841 * StringUtils.chomp("abc\n") = "abc"
4842 * StringUtils.chomp("abc\r\n") = "abc"
4843 * StringUtils.chomp("abc\r\n\r\n") = "abc\r\n"
4844 * StringUtils.chomp("abc\n\r") = "abc\n"
4845 * StringUtils.chomp("abc\n\rabc") = "abc\n\rabc"
4846 * StringUtils.chomp("\r") = ""
4847 * StringUtils.chomp("\n") = ""
4848 * StringUtils.chomp("\r\n") = ""
4849 * </pre>
4850 *
4851 * @param str the String to chomp a newline from, may be null
4852 * @return String without newline, {@code null} if null String input
4853 */
4854 public static String chomp(final String str) {
4855 if (isEmpty(str)) {
4856 return str;
4857 }
4858
4859 if (str.length() == 1) {
4860 final char ch = str.charAt(0);
4861 if (ch == CharUtils.CR || ch == CharUtils.LF) {
4862 return EMPTY;
4863 }
4864 return str;
4865 }
4866
4867 int lastIdx = str.length() - 1;
4868 final char last = str.charAt(lastIdx);
4869
4870 if (last == CharUtils.LF) {
4871 if (str.charAt(lastIdx - 1) == CharUtils.CR) {
4872 lastIdx--;
4873 }
4874 } else if (last != CharUtils.CR) {
4875 lastIdx++;
4876 }
4877 return str.substring(0, lastIdx);
4878 }
4879
4880 /**
4881 * <p>Removes {@code separator} from the end of
4882 * {@code str} if it's there, otherwise leave it alone.</p>
4883 *
4884 * <p>NOTE: This method changed in version 2.0.
4885 * It now more closely matches Perl chomp.
4886 * For the previous behavior, use {@link #substringBeforeLast(String, String)}.
4887 * This method uses {@link String#endsWith(String)}.</p>
4888 *
4889 * <pre>
4890 * StringUtils.chomp(null, *) = null
4891 * StringUtils.chomp("", *) = ""
4892 * StringUtils.chomp("foobar", "bar") = "foo"
4893 * StringUtils.chomp("foobar", "baz") = "foobar"
4894 * StringUtils.chomp("foo", "foo") = ""
4895 * StringUtils.chomp("foo ", "foo") = "foo "
4896 * StringUtils.chomp(" foo", "foo") = " "
4897 * StringUtils.chomp("foo", "foooo") = "foo"
4898 * StringUtils.chomp("foo", "") = "foo"
4899 * StringUtils.chomp("foo", null) = "foo"
4900 * </pre>
4901 *
4902 * @param str the String to chomp from, may be null
4903 * @param separator separator String, may be null
4904 * @return String without trailing separator, {@code null} if null String input
4905 * @deprecated This feature will be removed in Lang 4.0, use {@link StringUtils#removeEnd(String, String)} instead
4906 */
4907 @Deprecated
4908 public static String chomp(final String str, final String separator) {
4909 return removeEnd(str,separator);
4910 }
4911
4912 // Chopping
4913 //-----------------------------------------------------------------------
4914 /**
4915 * <p>Remove the last character from a String.</p>
4916 *
4917 * <p>If the String ends in {@code \r\n}, then remove both
4918 * of them.</p>
4919 *
4920 * <pre>
4921 * StringUtils.chop(null) = null
4922 * StringUtils.chop("") = ""
4923 * StringUtils.chop("abc \r") = "abc "
4924 * StringUtils.chop("abc\n") = "abc"
4925 * StringUtils.chop("abc\r\n") = "abc"
4926 * StringUtils.chop("abc") = "ab"
4927 * StringUtils.chop("abc\nabc") = "abc\nab"
4928 * StringUtils.chop("a") = ""
4929 * StringUtils.chop("\r") = ""
4930 * StringUtils.chop("\n") = ""
4931 * StringUtils.chop("\r\n") = ""
4932 * </pre>
4933 *
4934 * @param str the String to chop last character from, may be null
4935 * @return String without last character, {@code null} if null String input
4936 */
4937 public static String chop(final String str) {
4938 if (str == null) {
4939 return null;
4940 }
4941 final int strLen = str.length();
4942 if (strLen < 2) {
4943 return EMPTY;
4944 }
4945 final int lastIdx = strLen - 1;
4946 final String ret = str.substring(0, lastIdx);
4947 final char last = str.charAt(lastIdx);
4948 if (last == CharUtils.LF && ret.charAt(lastIdx - 1) == CharUtils.CR) {
4949 return ret.substring(0, lastIdx - 1);
4950 }
4951 return ret;
4952 }
4953
4954 // Conversion
4955 //-----------------------------------------------------------------------
4956
4957 // Padding
4958 //-----------------------------------------------------------------------
4959 /**
4960 * <p>Repeat a String {@code repeat} times to form a
4961 * new String.</p>
4962 *
4963 * <pre>
4964 * StringUtils.repeat(null, 2) = null
4965 * StringUtils.repeat("", 0) = ""
4966 * StringUtils.repeat("", 2) = ""
4967 * StringUtils.repeat("a", 3) = "aaa"
4968 * StringUtils.repeat("ab", 2) = "abab"
4969 * StringUtils.repeat("a", -2) = ""
4970 * </pre>
4971 *
4972 * @param str the String to repeat, may be null
4973 * @param repeat number of times to repeat str, negative treated as zero
4974 * @return a new String consisting of the original String repeated,
4975 * {@code null} if null String input
4976 */
4977 public static String repeat(final String str, final int repeat) {
4978 // Performance tuned for 2.0 (JDK1.4)
4979
4980 if (str == null) {
4981 return null;
4982 }
4983 if (repeat <= 0) {
4984 return EMPTY;
4985 }
4986 final int inputLength = str.length();
4987 if (repeat == 1 || inputLength == 0) {
4988 return str;
4989 }
4990 if (inputLength == 1 && repeat <= PAD_LIMIT) {
4991 return repeat(str.charAt(0), repeat);
4992 }
4993
4994 final int outputLength = inputLength * repeat;
4995 switch (inputLength) {
4996 case 1 :
4997 return repeat(str.charAt(0), repeat);
4998 case 2 :
4999 final char ch0 = str.charAt(0);
5000 final char ch1 = str.charAt(1);
5001 final char[] output2 = new char[outputLength];
5002 for (int i = repeat * 2 - 2; i >= 0; i--, i--) {
5003 output2[i] = ch0;
5004 output2[i + 1] = ch1;
5005 }
5006 return new String(output2);
5007 default :
5008 final StringBuilder buf = new StringBuilder(outputLength);
5009 for (int i = 0; i < repeat; i++) {
5010 buf.append(str);
5011 }
5012 return buf.toString();
5013 }
5014 }
5015
5016 /**
5017 * <p>Repeat a String {@code repeat} times to form a
5018 * new String, with a String separator injected each time. </p>
5019 *
5020 * <pre>
5021 * StringUtils.repeat(null, null, 2) = null
5022 * StringUtils.repeat(null, "x", 2) = null
5023 * StringUtils.repeat("", null, 0) = ""
5024 * StringUtils.repeat("", "", 2) = ""
5025 * StringUtils.repeat("", "x", 3) = "xxx"
5026 * StringUtils.repeat("?", ", ", 3) = "?, ?, ?"
5027 * </pre>
5028 *
5029 * @param str the String to repeat, may be null
5030 * @param separator the String to inject, may be null
5031 * @param repeat number of times to repeat str, negative treated as zero
5032 * @return a new String consisting of the original String repeated,
5033 * {@code null} if null String input
5034 * @since 2.5
5035 */
5036 public static String repeat(final String str, final String separator, final int repeat) {
5037 if(str == null || separator == null) {
5038 return repeat(str, repeat);
5039 } else {
5040 // given that repeat(String, int) is quite optimized, better to rely on it than try and splice this into it
5041 final String result = repeat(str + separator, repeat);
5042 return removeEnd(result, separator);
5043 }
5044 }
5045
5046 /**
5047 * <p>Returns padding using the specified delimiter repeated
5048 * to a given length.</p>
5049 *
5050 * <pre>
5051 * StringUtils.repeat('e', 0) = ""
5052 * StringUtils.repeat('e', 3) = "eee"
5053 * StringUtils.repeat('e', -2) = ""
5054 * </pre>
5055 *
5056 * <p>Note: this method doesn't not support padding with
5057 * <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a>
5058 * as they require a pair of {@code char}s to be represented.
5059 * If you are needing to support full I18N of your applications
5060 * consider using {@link #repeat(String, int)} instead.
5061 * </p>
5062 *
5063 * @param ch character to repeat
5064 * @param repeat number of times to repeat char, negative treated as zero
5065 * @return String with repeated character
5066 * @see #repeat(String, int)
5067 */
5068 public static String repeat(final char ch, final int repeat) {
5069 final char[] buf = new char[repeat];
5070 for (int i = repeat - 1; i >= 0; i--) {
5071 buf[i] = ch;
5072 }
5073 return new String(buf);
5074 }
5075
5076 /**
5077 * <p>Right pad a String with spaces (' ').</p>
5078 *
5079 * <p>The String is padded to the size of {@code size}.</p>
5080 *
5081 * <pre>
5082 * StringUtils.rightPad(null, *) = null
5083 * StringUtils.rightPad("", 3) = " "
5084 * StringUtils.rightPad("bat", 3) = "bat"
5085 * StringUtils.rightPad("bat", 5) = "bat "
5086 * StringUtils.rightPad("bat", 1) = "bat"
5087 * StringUtils.rightPad("bat", -1) = "bat"
5088 * </pre>
5089 *
5090 * @param str the String to pad out, may be null
5091 * @param size the size to pad to
5092 * @return right padded String or original String if no padding is necessary,
5093 * {@code null} if null String input
5094 */
5095 public static String rightPad(final String str, final int size) {
5096 return rightPad(str, size, ' ');
5097 }
5098
5099 /**
5100 * <p>Right pad a String with a specified character.</p>
5101 *
5102 * <p>The String is padded to the size of {@code size}.</p>
5103 *
5104 * <pre>
5105 * StringUtils.rightPad(null, *, *) = null
5106 * StringUtils.rightPad("", 3, 'z') = "zzz"
5107 * StringUtils.rightPad("bat", 3, 'z') = "bat"
5108 * StringUtils.rightPad("bat", 5, 'z') = "batzz"
5109 * StringUtils.rightPad("bat", 1, 'z') = "bat"
5110 * StringUtils.rightPad("bat", -1, 'z') = "bat"
5111 * </pre>
5112 *
5113 * @param str the String to pad out, may be null
5114 * @param size the size to pad to
5115 * @param padChar the character to pad with
5116 * @return right padded String or original String if no padding is necessary,
5117 * {@code null} if null String input
5118 * @since 2.0
5119 */
5120 public static String rightPad(final String str, final int size, final char padChar) {
5121 if (str == null) {
5122 return null;
5123 }
5124 final int pads = size - str.length();
5125 if (pads <= 0) {
5126 return str; // returns original String when possible
5127 }
5128 if (pads > PAD_LIMIT) {
5129 return rightPad(str, size, String.valueOf(padChar));
5130 }
5131 return str.concat(repeat(padChar, pads));
5132 }
5133
5134 /**
5135 * <p>Right pad a String with a specified String.</p>
5136 *
5137 * <p>The String is padded to the size of {@code size}.</p>
5138 *
5139 * <pre>
5140 * StringUtils.rightPad(null, *, *) = null
5141 * StringUtils.rightPad("", 3, "z") = "zzz"
5142 * StringUtils.rightPad("bat", 3, "yz") = "bat"
5143 * StringUtils.rightPad("bat", 5, "yz") = "batyz"
5144 * StringUtils.rightPad("bat", 8, "yz") = "batyzyzy"
5145 * StringUtils.rightPad("bat", 1, "yz") = "bat"
5146 * StringUtils.rightPad("bat", -1, "yz") = "bat"
5147 * StringUtils.rightPad("bat", 5, null) = "bat "
5148 * StringUtils.rightPad("bat", 5, "") = "bat "
5149 * </pre>
5150 *
5151 * @param str the String to pad out, may be null
5152 * @param size the size to pad to
5153 * @param padStr the String to pad with, null or empty treated as single space
5154 * @return right padded String or original String if no padding is necessary,
5155 * {@code null} if null String input
5156 */
5157 public static String rightPad(final String str, final int size, String padStr) {
5158 if (str == null) {
5159 return null;
5160 }
5161 if (isEmpty(padStr)) {
5162 padStr = SPACE;
5163 }
5164 final int padLen = padStr.length();
5165 final int strLen = str.length();
5166 final int pads = size - strLen;
5167 if (pads <= 0) {
5168 return str; // returns original String when possible
5169 }
5170 if (padLen == 1 && pads <= PAD_LIMIT) {
5171 return rightPad(str, size, padStr.charAt(0));
5172 }
5173
5174 if (pads == padLen) {
5175 return str.concat(padStr);
5176 } else if (pads < padLen) {
5177 return str.concat(padStr.substring(0, pads));
5178 } else {
5179 final char[] padding = new char[pads];
5180 final char[] padChars = padStr.toCharArray();
5181 for (int i = 0; i < pads; i++) {
5182 padding[i] = padChars[i % padLen];
5183 }
5184 return str.concat(new String(padding));
5185 }
5186 }
5187
5188 /**
5189 * <p>Left pad a String with spaces (' ').</p>
5190 *
5191 * <p>The String is padded to the size of {@code size}.</p>
5192 *
5193 * <pre>
5194 * StringUtils.leftPad(null, *) = null
5195 * StringUtils.leftPad("", 3) = " "
5196 * StringUtils.leftPad("bat", 3) = "bat"
5197 * StringUtils.leftPad("bat", 5) = " bat"
5198 * StringUtils.leftPad("bat", 1) = "bat"
5199 * StringUtils.leftPad("bat", -1) = "bat"
5200 * </pre>
5201 *
5202 * @param str the String to pad out, may be null
5203 * @param size the size to pad to
5204 * @return left padded String or original String if no padding is necessary,
5205 * {@code null} if null String input
5206 */
5207 public static String leftPad(final String str, final int size) {
5208 return leftPad(str, size, ' ');
5209 }
5210
5211 /**
5212 * <p>Left pad a String with a specified character.</p>
5213 *
5214 * <p>Pad to a size of {@code size}.</p>
5215 *
5216 * <pre>
5217 * StringUtils.leftPad(null, *, *) = null
5218 * StringUtils.leftPad("", 3, 'z') = "zzz"
5219 * StringUtils.leftPad("bat", 3, 'z') = "bat"
5220 * StringUtils.leftPad("bat", 5, 'z') = "zzbat"
5221 * StringUtils.leftPad("bat", 1, 'z') = "bat"
5222 * StringUtils.leftPad("bat", -1, 'z') = "bat"
5223 * </pre>
5224 *
5225 * @param str the String to pad out, may be null
5226 * @param size the size to pad to
5227 * @param padChar the character to pad with
5228 * @return left padded String or original String if no padding is necessary,
5229 * {@code null} if null String input
5230 * @since 2.0
5231 */
5232 public static String leftPad(final String str, final int size, final char padChar) {
5233 if (str == null) {
5234 return null;
5235 }
5236 final int pads = size - str.length();
5237 if (pads <= 0) {
5238 return str; // returns original String when possible
5239 }
5240 if (pads > PAD_LIMIT) {
5241 return leftPad(str, size, String.valueOf(padChar));
5242 }
5243 return repeat(padChar, pads).concat(str);
5244 }
5245
5246 /**
5247 * <p>Left pad a String with a specified String.</p>
5248 *
5249 * <p>Pad to a size of {@code size}.</p>
5250 *
5251 * <pre>
5252 * StringUtils.leftPad(null, *, *) = null
5253 * StringUtils.leftPad("", 3, "z") = "zzz"
5254 * StringUtils.leftPad("bat", 3, "yz") = "bat"
5255 * StringUtils.leftPad("bat", 5, "yz") = "yzbat"
5256 * StringUtils.leftPad("bat", 8, "yz") = "yzyzybat"
5257 * StringUtils.leftPad("bat", 1, "yz") = "bat"
5258 * StringUtils.leftPad("bat", -1, "yz") = "bat"
5259 * StringUtils.leftPad("bat", 5, null) = " bat"
5260 * StringUtils.leftPad("bat", 5, "") = " bat"
5261 * </pre>
5262 *
5263 * @param str the String to pad out, may be null
5264 * @param size the size to pad to
5265 * @param padStr the String to pad with, null or empty treated as single space
5266 * @return left padded String or original String if no padding is necessary,
5267 * {@code null} if null String input
5268 */
5269 public static String leftPad(final String str, final int size, String padStr) {
5270 if (str == null) {
5271 return null;
5272 }
5273 if (isEmpty(padStr)) {
5274 padStr = SPACE;
5275 }
5276 final int padLen = padStr.length();
5277 final int strLen = str.length();
5278 final int pads = size - strLen;
5279 if (pads <= 0) {
5280 return str; // returns original String when possible
5281 }
5282 if (padLen == 1 && pads <= PAD_LIMIT) {
5283 return leftPad(str, size, padStr.charAt(0));
5284 }
5285
5286 if (pads == padLen) {
5287 return padStr.concat(str);
5288 } else if (pads < padLen) {
5289 return padStr.substring(0, pads).concat(str);
5290 } else {
5291 final char[] padding = new char[pads];
5292 final char[] padChars = padStr.toCharArray();
5293 for (int i = 0; i < pads; i++) {
5294 padding[i] = padChars[i % padLen];
5295 }
5296 return new String(padding).concat(str);
5297 }
5298 }
5299
5300 /**
5301 * Gets a CharSequence length or {@code 0} if the CharSequence is
5302 * {@code null}.
5303 *
5304 * @param cs
5305 * a CharSequence or {@code null}
5306 * @return CharSequence length or {@code 0} if the CharSequence is
5307 * {@code null}.
5308 * @since 2.4
5309 * @since 3.0 Changed signature from length(String) to length(CharSequence)
5310 */
5311 public static int length(final CharSequence cs) {
5312 return cs == null ? 0 : cs.length();
5313 }
5314
5315 // Centering
5316 //-----------------------------------------------------------------------
5317 /**
5318 * <p>Centers a String in a larger String of size {@code size}
5319 * using the space character (' ').<p>
5320 *
5321 * <p>If the size is less than the String length, the String is returned.
5322 * A {@code null} String returns {@code null}.
5323 * A negative size is treated as zero.</p>
5324 *
5325 * <p>Equivalent to {@code center(str, size, " ")}.</p>
5326 *
5327 * <pre>
5328 * StringUtils.center(null, *) = null
5329 * StringUtils.center("", 4) = " "
5330 * StringUtils.center("ab", -1) = "ab"
5331 * StringUtils.center("ab", 4) = " ab "
5332 * StringUtils.center("abcd", 2) = "abcd"
5333 * StringUtils.center("a", 4) = " a "
5334 * </pre>
5335 *
5336 * @param str the String to center, may be null
5337 * @param size the int size of new String, negative treated as zero
5338 * @return centered String, {@code null} if null String input
5339 */
5340 public static String center(final String str, final int size) {
5341 return center(str, size, ' ');
5342 }
5343
5344 /**
5345 * <p>Centers a String in a larger String of size {@code size}.
5346 * Uses a supplied character as the value to pad the String with.</p>
5347 *
5348 * <p>If the size is less than the String length, the String is returned.
5349 * A {@code null} String returns {@code null}.
5350 * A negative size is treated as zero.</p>
5351 *
5352 * <pre>
5353 * StringUtils.center(null, *, *) = null
5354 * StringUtils.center("", 4, ' ') = " "
5355 * StringUtils.center("ab", -1, ' ') = "ab"
5356 * StringUtils.center("ab", 4, ' ') = " ab "
5357 * StringUtils.center("abcd", 2, ' ') = "abcd"
5358 * StringUtils.center("a", 4, ' ') = " a "
5359 * StringUtils.center("a", 4, 'y') = "yayy"
5360 * </pre>
5361 *
5362 * @param str the String to center, may be null
5363 * @param size the int size of new String, negative treated as zero
5364 * @param padChar the character to pad the new String with
5365 * @return centered String, {@code null} if null String input
5366 * @since 2.0
5367 */
5368 public static String center(String str, final int size, final char padChar) {
5369 if (str == null || size <= 0) {
5370 return str;
5371 }
5372 final int strLen = str.length();
5373 final int pads = size - strLen;
5374 if (pads <= 0) {
5375 return str;
5376 }
5377 str = leftPad(str, strLen + pads / 2, padChar);
5378 str = rightPad(str, size, padChar);
5379 return str;
5380 }
5381
5382 /**
5383 * <p>Centers a String in a larger String of size {@code size}.
5384 * Uses a supplied String as the value to pad the String with.</p>
5385 *
5386 * <p>If the size is less than the String length, the String is returned.
5387 * A {@code null} String returns {@code null}.
5388 * A negative size is treated as zero.</p>
5389 *
5390 * <pre>
5391 * StringUtils.center(null, *, *) = null
5392 * StringUtils.center("", 4, " ") = " "
5393 * StringUtils.center("ab", -1, " ") = "ab"
5394 * StringUtils.center("ab", 4, " ") = " ab "
5395 * StringUtils.center("abcd", 2, " ") = "abcd"
5396 * StringUtils.center("a", 4, " ") = " a "
5397 * StringUtils.center("a", 4, "yz") = "yayz"
5398 * StringUtils.center("abc", 7, null) = " abc "
5399 * StringUtils.center("abc", 7, "") = " abc "
5400 * </pre>
5401 *
5402 * @param str the String to center, may be null
5403 * @param size the int size of new String, negative treated as zero
5404 * @param padStr the String to pad the new String with, must not be null or empty
5405 * @return centered String, {@code null} if null String input
5406 * @throws IllegalArgumentException if padStr is {@code null} or empty
5407 */
5408 public static String center(String str, final int size, String padStr) {
5409 if (str == null || size <= 0) {
5410 return str;
5411 }
5412 if (isEmpty(padStr)) {
5413 padStr = SPACE;
5414 }
5415 final int strLen = str.length();
5416 final int pads = size - strLen;
5417 if (pads <= 0) {
5418 return str;
5419 }
5420 str = leftPad(str, strLen + pads / 2, padStr);
5421 str = rightPad(str, size, padStr);
5422 return str;
5423 }
5424
5425 // Case conversion
5426 //-----------------------------------------------------------------------
5427 /**
5428 * <p>Converts a String to upper case as per {@link String#toUpperCase()}.</p>
5429 *
5430 * <p>A {@code null} input String returns {@code null}.</p>
5431 *
5432 * <pre>
5433 * StringUtils.upperCase(null) = null
5434 * StringUtils.upperCase("") = ""
5435 * StringUtils.upperCase("aBc") = "ABC"
5436 * </pre>
5437 *
5438 * <p><strong>Note:</strong> As described in the documentation for {@link String#toUpperCase()},
5439 * the result of this method is affected by the current locale.
5440 * For platform-independent case transformations, the method {@link #lowerCase(String, Locale)}
5441 * should be used with a specific locale (e.g. {@link Locale#ENGLISH}).</p>
5442 *
5443 * @param str the String to upper case, may be null
5444 * @return the upper cased String, {@code null} if null String input
5445 */
5446 public static String upperCase(final String str) {
5447 if (str == null) {
5448 return null;
5449 }
5450 return str.toUpperCase();
5451 }
5452
5453 /**
5454 * <p>Converts a String to upper case as per {@link String#toUpperCase(Locale)}.</p>
5455 *
5456 * <p>A {@code null} input String returns {@code null}.</p>
5457 *
5458 * <pre>
5459 * StringUtils.upperCase(null, Locale.ENGLISH) = null
5460 * StringUtils.upperCase("", Locale.ENGLISH) = ""
5461 * StringUtils.upperCase("aBc", Locale.ENGLISH) = "ABC"
5462 * </pre>
5463 *
5464 * @param str the String to upper case, may be null
5465 * @param locale the locale that defines the case transformation rules, must not be null
5466 * @return the upper cased String, {@code null} if null String input
5467 * @since 2.5
5468 */
5469 public static String upperCase(final String str, final Locale locale) {
5470 if (str == null) {
5471 return null;
5472 }
5473 return str.toUpperCase(locale);
5474 }
5475
5476 /**
5477 * <p>Converts a String to lower case as per {@link String#toLowerCase()}.</p>
5478 *
5479 * <p>A {@code null} input String returns {@code null}.</p>
5480 *
5481 * <pre>
5482 * StringUtils.lowerCase(null) = null
5483 * StringUtils.lowerCase("") = ""
5484 * StringUtils.lowerCase("aBc") = "abc"
5485 * </pre>
5486 *
5487 * <p><strong>Note:</strong> As described in the documentation for {@link String#toLowerCase()},
5488 * the result of this method is affected by the current locale.
5489 * For platform-independent case transformations, the method {@link #lowerCase(String, Locale)}
5490 * should be used with a specific locale (e.g. {@link Locale#ENGLISH}).</p>
5491 *
5492 * @param str the String to lower case, may be null
5493 * @return the lower cased String, {@code null} if null String input
5494 */
5495 public static String lowerCase(final String str) {
5496 if (str == null) {
5497 return null;
5498 }
5499 return str.toLowerCase();
5500 }
5501
5502 /**
5503 * <p>Converts a String to lower case as per {@link String#toLowerCase(Locale)}.</p>
5504 *
5505 * <p>A {@code null} input String returns {@code null}.</p>
5506 *
5507 * <pre>
5508 * StringUtils.lowerCase(null, Locale.ENGLISH) = null
5509 * StringUtils.lowerCase("", Locale.ENGLISH) = ""
5510 * StringUtils.lowerCase("aBc", Locale.ENGLISH) = "abc"
5511 * </pre>
5512 *
5513 * @param str the String to lower case, may be null
5514 * @param locale the locale that defines the case transformation rules, must not be null
5515 * @return the lower cased String, {@code null} if null String input
5516 * @since 2.5
5517 */
5518 public static String lowerCase(final String str, final Locale locale) {
5519 if (str == null) {
5520 return null;
5521 }
5522 return str.toLowerCase(locale);
5523 }
5524
5525 /**
5526 * <p>Capitalizes a String changing the first letter to title case as
5527 * per {@link Character#toTitleCase(char)}. No other letters are changed.</p>
5528 *
5529 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#capitalize(String)}.
5530 * A {@code null} input String returns {@code null}.</p>
5531 *
5532 * <pre>
5533 * StringUtils.capitalize(null) = null
5534 * StringUtils.capitalize("") = ""
5535 * StringUtils.capitalize("cat") = "Cat"
5536 * StringUtils.capitalize("cAt") = "CAt"
5537 * </pre>
5538 *
5539 * @param str the String to capitalize, may be null
5540 * @return the capitalized String, {@code null} if null String input
5541 * @see org.apache.commons.lang3.text.WordUtils#capitalize(String)
5542 * @see #uncapitalize(String)
5543 * @since 2.0
5544 */
5545 public static String capitalize(final String str) {
5546 int strLen;
5547 if (str == null || (strLen = str.length()) == 0) {
5548 return str;
5549 }
5550 return new StringBuilder(strLen)
5551 .append(Character.toTitleCase(str.charAt(0)))
5552 .append(str.substring(1))
5553 .toString();
5554 }
5555
5556 /**
5557 * <p>Uncapitalizes a String changing the first letter to title case as
5558 * per {@link Character#toLowerCase(char)}. No other letters are changed.</p>
5559 *
5560 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#uncapitalize(String)}.
5561 * A {@code null} input String returns {@code null}.</p>
5562 *
5563 * <pre>
5564 * StringUtils.uncapitalize(null) = null
5565 * StringUtils.uncapitalize("") = ""
5566 * StringUtils.uncapitalize("Cat") = "cat"
5567 * StringUtils.uncapitalize("CAT") = "cAT"
5568 * </pre>
5569 *
5570 * @param str the String to uncapitalize, may be null
5571 * @return the uncapitalized String, {@code null} if null String input
5572 * @see org.apache.commons.lang3.text.WordUtils#uncapitalize(String)
5573 * @see #capitalize(String)
5574 * @since 2.0
5575 */
5576 public static String uncapitalize(final String str) {
5577 int strLen;
5578 if (str == null || (strLen = str.length()) == 0) {
5579 return str;
5580 }
5581 return new StringBuilder(strLen)
5582 .append(Character.toLowerCase(str.charAt(0)))
5583 .append(str.substring(1))
5584 .toString();
5585 }
5586
5587 /**
5588 * <p>Swaps the case of a String changing upper and title case to
5589 * lower case, and lower case to upper case.</p>
5590 *
5591 * <ul>
5592 * <li>Upper case character converts to Lower case</li>
5593 * <li>Title case character converts to Lower case</li>
5594 * <li>Lower case character converts to Upper case</li>
5595 * </ul>
5596 *
5597 * <p>For a word based algorithm, see {@link org.apache.commons.lang3.text.WordUtils#swapCase(String)}.
5598 * A {@code null} input String returns {@code null}.</p>
5599 *
5600 * <pre>
5601 * StringUtils.swapCase(null) = null
5602 * StringUtils.swapCase("") = ""
5603 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
5604 * </pre>
5605 *
5606 * <p>NOTE: This method changed in Lang version 2.0.
5607 * It no longer performs a word based algorithm.
5608 * If you only use ASCII, you will notice no change.
5609 * That functionality is available in org.apache.commons.lang3.text.WordUtils.</p>
5610 *
5611 * @param str the String to swap case, may be null
5612 * @return the changed String, {@code null} if null String input
5613 */
5614 public static String swapCase(final String str) {
5615 if (StringUtils.isEmpty(str)) {
5616 return str;
5617 }
5618
5619 final char[] buffer = str.toCharArray();
5620
5621 for (int i = 0; i < buffer.length; i++) {
5622 final char ch = buffer[i];
5623 if (Character.isUpperCase(ch)) {
5624 buffer[i] = Character.toLowerCase(ch);
5625 } else if (Character.isTitleCase(ch)) {
5626 buffer[i] = Character.toLowerCase(ch);
5627 } else if (Character.isLowerCase(ch)) {
5628 buffer[i] = Character.toUpperCase(ch);
5629 }
5630 }
5631 return new String(buffer);
5632 }
5633
5634 // Count matches
5635 //-----------------------------------------------------------------------
5636 /**
5637 * <p>Counts how many times the substring appears in the larger string.</p>
5638 *
5639 * <p>A {@code null} or empty ("") String input returns {@code 0}.</p>
5640 *
5641 * <pre>
5642 * StringUtils.countMatches(null, *) = 0
5643 * StringUtils.countMatches("", *) = 0
5644 * StringUtils.countMatches("abba", null) = 0
5645 * StringUtils.countMatches("abba", "") = 0
5646 * StringUtils.countMatches("abba", "a") = 2
5647 * StringUtils.countMatches("abba", "ab") = 1
5648 * StringUtils.countMatches("abba", "xxx") = 0
5649 * </pre>
5650 *
5651 * @param str the CharSequence to check, may be null
5652 * @param sub the substring to count, may be null
5653 * @return the number of occurrences, 0 if either CharSequence is {@code null}
5654 * @since 3.0 Changed signature from countMatches(String, String) to countMatches(CharSequence, CharSequence)
5655 */
5656 public static int countMatches(final CharSequence str, final CharSequence sub) {
5657 if (isEmpty(str) || isEmpty(sub)) {
5658 return 0;
5659 }
5660 int count = 0;
5661 int idx = 0;
5662 while ((idx = CharSequenceUtils.indexOf(str, sub, idx)) != INDEX_NOT_FOUND) {
5663 count++;
5664 idx += sub.length();
5665 }
5666 return count;
5667 }
5668
5669 // Character Tests
5670 //-----------------------------------------------------------------------
5671 /**
5672 * <p>Checks if the CharSequence contains only Unicode letters.</p>
5673 *
5674 * <p>{@code null} will return {@code false}.
5675 * An empty CharSequence (length()=0) will return {@code false}.</p>
5676 *
5677 * <pre>
5678 * StringUtils.isAlpha(null) = false
5679 * StringUtils.isAlpha("") = false
5680 * StringUtils.isAlpha(" ") = false
5681 * StringUtils.isAlpha("abc") = true
5682 * StringUtils.isAlpha("ab2c") = false
5683 * StringUtils.isAlpha("ab-c") = false
5684 * </pre>
5685 *
5686 * @param cs the CharSequence to check, may be null
5687 * @return {@code true} if only contains letters, and is non-null
5688 * @since 3.0 Changed signature from isAlpha(String) to isAlpha(CharSequence)
5689 * @since 3.0 Changed "" to return false and not true
5690 */
5691 public static boolean isAlpha(final CharSequence cs) {
5692 if (cs == null || cs.length() == 0) {
5693 return false;
5694 }
5695 final int sz = cs.length();
5696 for (int i = 0; i < sz; i++) {
5697 if (Character.isLetter(cs.charAt(i)) == false) {
5698 return false;
5699 }
5700 }
5701 return true;
5702 }
5703
5704 /**
5705 * <p>Checks if the CharSequence contains only Unicode letters and
5706 * space (' ').</p>
5707 *
5708 * <p>{@code null} will return {@code false}
5709 * An empty CharSequence (length()=0) will return {@code true}.</p>
5710 *
5711 * <pre>
5712 * StringUtils.isAlphaSpace(null) = false
5713 * StringUtils.isAlphaSpace("") = true
5714 * StringUtils.isAlphaSpace(" ") = true
5715 * StringUtils.isAlphaSpace("abc") = true
5716 * StringUtils.isAlphaSpace("ab c") = true
5717 * StringUtils.isAlphaSpace("ab2c") = false
5718 * StringUtils.isAlphaSpace("ab-c") = false
5719 * </pre>
5720 *
5721 * @param cs the CharSequence to check, may be null
5722 * @return {@code true} if only contains letters and space,
5723 * and is non-null
5724 * @since 3.0 Changed signature from isAlphaSpace(String) to isAlphaSpace(CharSequence)
5725 */
5726 public static boolean isAlphaSpace(final CharSequence cs) {
5727 if (cs == null) {
5728 return false;
5729 }
5730 final int sz = cs.length();
5731 for (int i = 0; i < sz; i++) {
5732 if (Character.isLetter(cs.charAt(i)) == false && cs.charAt(i) != ' ') {
5733 return false;
5734 }
5735 }
5736 return true;
5737 }
5738
5739 /**
5740 * <p>Checks if the CharSequence contains only Unicode letters or digits.</p>
5741 *
5742 * <p>{@code null} will return {@code false}.
5743 * An empty CharSequence (length()=0) will return {@code false}.</p>
5744 *
5745 * <pre>
5746 * StringUtils.isAlphanumeric(null) = false
5747 * StringUtils.isAlphanumeric("") = false
5748 * StringUtils.isAlphanumeric(" ") = false
5749 * StringUtils.isAlphanumeric("abc") = true
5750 * StringUtils.isAlphanumeric("ab c") = false
5751 * StringUtils.isAlphanumeric("ab2c") = true
5752 * StringUtils.isAlphanumeric("ab-c") = false
5753 * </pre>
5754 *
5755 * @param cs the CharSequence to check, may be null
5756 * @return {@code true} if only contains letters or digits,
5757 * and is non-null
5758 * @since 3.0 Changed signature from isAlphanumeric(String) to isAlphanumeric(CharSequence)
5759 * @since 3.0 Changed "" to return false and not true
5760 */
5761 public static boolean isAlphanumeric(final CharSequence cs) {
5762 if (cs == null || cs.length() == 0) {
5763 return false;
5764 }
5765 final int sz = cs.length();
5766 for (int i = 0; i < sz; i++) {
5767 if (Character.isLetterOrDigit(cs.charAt(i)) == false) {
5768 return false;
5769 }
5770 }
5771 return true;
5772 }
5773
5774 /**
5775 * <p>Checks if the CharSequence contains only Unicode letters, digits
5776 * or space ({@code ' '}).</p>
5777 *
5778 * <p>{@code null} will return {@code false}.
5779 * An empty CharSequence (length()=0) will return {@code true}.</p>
5780 *
5781 * <pre>
5782 * StringUtils.isAlphanumericSpace(null) = false
5783 * StringUtils.isAlphanumericSpace("") = true
5784 * StringUtils.isAlphanumericSpace(" ") = true
5785 * StringUtils.isAlphanumericSpace("abc") = true
5786 * StringUtils.isAlphanumericSpace("ab c") = true
5787 * StringUtils.isAlphanumericSpace("ab2c") = true
5788 * StringUtils.isAlphanumericSpace("ab-c") = false
5789 * </pre>
5790 *
5791 * @param cs the CharSequence to check, may be null
5792 * @return {@code true} if only contains letters, digits or space,
5793 * and is non-null
5794 * @since 3.0 Changed signature from isAlphanumericSpace(String) to isAlphanumericSpace(CharSequence)
5795 */
5796 public static boolean isAlphanumericSpace(final CharSequence cs) {
5797 if (cs == null) {
5798 return false;
5799 }
5800 final int sz = cs.length();
5801 for (int i = 0; i < sz; i++) {
5802 if (Character.isLetterOrDigit(cs.charAt(i)) == false && cs.charAt(i) != ' ') {
5803 return false;
5804 }
5805 }
5806 return true;
5807 }
5808
5809 /**
5810 * <p>Checks if the CharSequence contains only ASCII printable characters.</p>
5811 *
5812 * <p>{@code null} will return {@code false}.
5813 * An empty CharSequence (length()=0) will return {@code true}.</p>
5814 *
5815 * <pre>
5816 * StringUtils.isAsciiPrintable(null) = false
5817 * StringUtils.isAsciiPrintable("") = true
5818 * StringUtils.isAsciiPrintable(" ") = true
5819 * StringUtils.isAsciiPrintable("Ceki") = true
5820 * StringUtils.isAsciiPrintable("ab2c") = true
5821 * StringUtils.isAsciiPrintable("!ab-c~") = true
5822 * StringUtils.isAsciiPrintable("\u0020") = true
5823 * StringUtils.isAsciiPrintable("\u0021") = true
5824 * StringUtils.isAsciiPrintable("\u007e") = true
5825 * StringUtils.isAsciiPrintable("\u007f") = false
5826 * StringUtils.isAsciiPrintable("Ceki G\u00fclc\u00fc") = false
5827 * </pre>
5828 *
5829 * @param cs the CharSequence to check, may be null
5830 * @return {@code true} if every character is in the range
5831 * 32 thru 126
5832 * @since 2.1
5833 * @since 3.0 Changed signature from isAsciiPrintable(String) to isAsciiPrintable(CharSequence)
5834 */
5835 public static boolean isAsciiPrintable(final CharSequence cs) {
5836 if (cs == null) {
5837 return false;
5838 }
5839 final int sz = cs.length();
5840 for (int i = 0; i < sz; i++) {
5841 if (CharUtils.isAsciiPrintable(cs.charAt(i)) == false) {
5842 return false;
5843 }
5844 }
5845 return true;
5846 }
5847
5848 /**
5849 * <p>Checks if the CharSequence contains only Unicode digits.
5850 * A decimal point is not a Unicode digit and returns false.</p>
5851 *
5852 * <p>{@code null} will return {@code false}.
5853 * An empty CharSequence (length()=0) will return {@code false}.</p>
5854 *
5855 * <p>Note that the method does not allow for a leading sign, either positive or negative.
5856 * Also, if a String passes the numeric test, it may still generate a NumberFormatException
5857 * when parsed by Integer.parseInt or Long.parseLong, e.g. if the value is outside the range
5858 * for int or long respectively.</p>
5859 *
5860 * <pre>
5861 * StringUtils.isNumeric(null) = false
5862 * StringUtils.isNumeric("") = false
5863 * StringUtils.isNumeric(" ") = false
5864 * StringUtils.isNumeric("123") = true
5865 * StringUtils.isNumeric("12 3") = false
5866 * StringUtils.isNumeric("ab2c") = false
5867 * StringUtils.isNumeric("12-3") = false
5868 * StringUtils.isNumeric("12.3") = false
5869 * StringUtils.isNumeric("-123") = false
5870 * StringUtils.isNumeric("+123") = false
5871 * </pre>
5872 *
5873 * @param cs the CharSequence to check, may be null
5874 * @return {@code true} if only contains digits, and is non-null
5875 * @since 3.0 Changed signature from isNumeric(String) to isNumeric(CharSequence)
5876 * @since 3.0 Changed "" to return false and not true
5877 */
5878 public static boolean isNumeric(final CharSequence cs) {
5879 if (cs == null || cs.length() == 0) {
5880 return false;
5881 }
5882 final int sz = cs.length();
5883 for (int i = 0; i < sz; i++) {
5884 if (Character.isDigit(cs.charAt(i)) == false) {
5885 return false;
5886 }
5887 }
5888 return true;
5889 }
5890
5891 /**
5892 * <p>Checks if the CharSequence contains only Unicode digits or space
5893 * ({@code ' '}).
5894 * A decimal point is not a Unicode digit and returns false.</p>
5895 *
5896 * <p>{@code null} will return {@code false}.
5897 * An empty CharSequence (length()=0) will return {@code true}.</p>
5898 *
5899 * <pre>
5900 * StringUtils.isNumericSpace(null) = false
5901 * StringUtils.isNumericSpace("") = true
5902 * StringUtils.isNumericSpace(" ") = true
5903 * StringUtils.isNumericSpace("123") = true
5904 * StringUtils.isNumericSpace("12 3") = true
5905 * StringUtils.isNumericSpace("ab2c") = false
5906 * StringUtils.isNumericSpace("12-3") = false
5907 * StringUtils.isNumericSpace("12.3") = false
5908 * </pre>
5909 *
5910 * @param cs the CharSequence to check, may be null
5911 * @return {@code true} if only contains digits or space,
5912 * and is non-null
5913 * @since 3.0 Changed signature from isNumericSpace(String) to isNumericSpace(CharSequence)
5914 */
5915 public static boolean isNumericSpace(final CharSequence cs) {
5916 if (cs == null) {
5917 return false;
5918 }
5919 final int sz = cs.length();
5920 for (int i = 0; i < sz; i++) {
5921 if (Character.isDigit(cs.charAt(i)) == false && cs.charAt(i) != ' ') {
5922 return false;
5923 }
5924 }
5925 return true;
5926 }
5927
5928 /**
5929 * <p>Checks if the CharSequence contains only whitespace.</p>
5930 *
5931 * <p>{@code null} will return {@code false}.
5932 * An empty CharSequence (length()=0) will return {@code true}.</p>
5933 *
5934 * <pre>
5935 * StringUtils.isWhitespace(null) = false
5936 * StringUtils.isWhitespace("") = true
5937 * StringUtils.isWhitespace(" ") = true
5938 * StringUtils.isWhitespace("abc") = false
5939 * StringUtils.isWhitespace("ab2c") = false
5940 * StringUtils.isWhitespace("ab-c") = false
5941 * </pre>
5942 *
5943 * @param cs the CharSequence to check, may be null
5944 * @return {@code true} if only contains whitespace, and is non-null
5945 * @since 2.0
5946 * @since 3.0 Changed signature from isWhitespace(String) to isWhitespace(CharSequence)
5947 */
5948 public static boolean isWhitespace(final CharSequence cs) {
5949 if (cs == null) {
5950 return false;
5951 }
5952 final int sz = cs.length();
5953 for (int i = 0; i < sz; i++) {
5954 if (Character.isWhitespace(cs.charAt(i)) == false) {
5955 return false;
5956 }
5957 }
5958 return true;
5959 }
5960
5961 /**
5962 * <p>Checks if the CharSequence contains only lowercase characters.</p>
5963 *
5964 * <p>{@code null} will return {@code false}.
5965 * An empty CharSequence (length()=0) will return {@code false}.</p>
5966 *
5967 * <pre>
5968 * StringUtils.isAllLowerCase(null) = false
5969 * StringUtils.isAllLowerCase("") = false
5970 * StringUtils.isAllLowerCase(" ") = false
5971 * StringUtils.isAllLowerCase("abc") = true
5972 * StringUtils.isAllLowerCase("abC") = false
5973 * </pre>
5974 *
5975 * @param cs the CharSequence to check, may be null
5976 * @return {@code true} if only contains lowercase characters, and is non-null
5977 * @since 2.5
5978 * @since 3.0 Changed signature from isAllLowerCase(String) to isAllLowerCase(CharSequence)
5979 */
5980 public static boolean isAllLowerCase(final CharSequence cs) {
5981 if (cs == null || isEmpty(cs)) {
5982 return false;
5983 }
5984 final int sz = cs.length();
5985 for (int i = 0; i < sz; i++) {
5986 if (Character.isLowerCase(cs.charAt(i)) == false) {
5987 return false;
5988 }
5989 }
5990 return true;
5991 }
5992
5993 /**
5994 * <p>Checks if the CharSequence contains only uppercase characters.</p>
5995 *
5996 * <p>{@code null} will return {@code false}.
5997 * An empty String (length()=0) will return {@code false}.</p>
5998 *
5999 * <pre>
6000 * StringUtils.isAllUpperCase(null) = false
6001 * StringUtils.isAllUpperCase("") = false
6002 * StringUtils.isAllUpperCase(" ") = false
6003 * StringUtils.isAllUpperCase("ABC") = true
6004 * StringUtils.isAllUpperCase("aBC") = false
6005 * </pre>
6006 *
6007 * @param cs the CharSequence to check, may be null
6008 * @return {@code true} if only contains uppercase characters, and is non-null
6009 * @since 2.5
6010 * @since 3.0 Changed signature from isAllUpperCase(String) to isAllUpperCase(CharSequence)
6011 */
6012 public static boolean isAllUpperCase(final CharSequence cs) {
6013 if (cs == null || isEmpty(cs)) {
6014 return false;
6015 }
6016 final int sz = cs.length();
6017 for (int i = 0; i < sz; i++) {
6018 if (Character.isUpperCase(cs.charAt(i)) == false) {
6019 return false;
6020 }
6021 }
6022 return true;
6023 }
6024
6025 // Defaults
6026 //-----------------------------------------------------------------------
6027 /**
6028 * <p>Returns either the passed in String,
6029 * or if the String is {@code null}, an empty String ("").</p>
6030 *
6031 * <pre>
6032 * StringUtils.defaultString(null) = ""
6033 * StringUtils.defaultString("") = ""
6034 * StringUtils.defaultString("bat") = "bat"
6035 * </pre>
6036 *
6037 * @see ObjectUtils#toString(Object)
6038 * @see String#valueOf(Object)
6039 * @param str the String to check, may be null
6040 * @return the passed in String, or the empty String if it
6041 * was {@code null}
6042 */
6043 public static String defaultString(final String str) {
6044 return str == null ? EMPTY : str;
6045 }
6046
6047 /**
6048 * <p>Returns either the passed in String, or if the String is
6049 * {@code null}, the value of {@code defaultStr}.</p>
6050 *
6051 * <pre>
6052 * StringUtils.defaultString(null, "NULL") = "NULL"
6053 * StringUtils.defaultString("", "NULL") = ""
6054 * StringUtils.defaultString("bat", "NULL") = "bat"
6055 * </pre>
6056 *
6057 * @see ObjectUtils#toString(Object,String)
6058 * @see String#valueOf(Object)
6059 * @param str the String to check, may be null
6060 * @param defaultStr the default String to return
6061 * if the input is {@code null}, may be null
6062 * @return the passed in String, or the default if it was {@code null}
6063 */
6064 public static String defaultString(final String str, final String defaultStr) {
6065 return str == null ? defaultStr : str;
6066 }
6067
6068 /**
6069 * <p>Returns either the passed in CharSequence, or if the CharSequence is
6070 * whitespace, empty ("") or {@code null}, the value of {@code defaultStr}.</p>
6071 *
6072 * <pre>
6073 * StringUtils.defaultIfBlank(null, "NULL") = "NULL"
6074 * StringUtils.defaultIfBlank("", "NULL") = "NULL"
6075 * StringUtils.defaultIfBlank(" ", "NULL") = "NULL"
6076 * StringUtils.defaultIfBlank("bat", "NULL") = "bat"
6077 * StringUtils.defaultIfBlank("", null) = null
6078 * </pre>
6079 * @param <T> the specific kind of CharSequence
6080 * @param str the CharSequence to check, may be null
6081 * @param defaultStr the default CharSequence to return
6082 * if the input is whitespace, empty ("") or {@code null}, may be null
6083 * @return the passed in CharSequence, or the default
6084 * @see StringUtils#defaultString(String, String)
6085 */
6086 public static <T extends CharSequence> T defaultIfBlank(final T str, final T defaultStr) {
6087 return StringUtils.isBlank(str) ? defaultStr : str;
6088 }
6089
6090 /**
6091 * <p>Returns either the passed in CharSequence, or if the CharSequence is
6092 * empty or {@code null}, the value of {@code defaultStr}.</p>
6093 *
6094 * <pre>
6095 * StringUtils.defaultIfEmpty(null, "NULL") = "NULL"
6096 * StringUtils.defaultIfEmpty("", "NULL") = "NULL"
6097 * StringUtils.defaultIfEmpty(" ", "NULL") = " "
6098 * StringUtils.defaultIfEmpty("bat", "NULL") = "bat"
6099 * StringUtils.defaultIfEmpty("", null) = null
6100 * </pre>
6101 * @param <T> the specific kind of CharSequence
6102 * @param str the CharSequence to check, may be null
6103 * @param defaultStr the default CharSequence to return
6104 * if the input is empty ("") or {@code null}, may be null
6105 * @return the passed in CharSequence, or the default
6106 * @see StringUtils#defaultString(String, String)
6107 */
6108 public static <T extends CharSequence> T defaultIfEmpty(final T str, final T defaultStr) {
6109 return StringUtils.isEmpty(str) ? defaultStr : str;
6110 }
6111
6112 // Reversing
6113 //-----------------------------------------------------------------------
6114 /**
6115 * <p>Reverses a String as per {@link StringBuilder#reverse()}.</p>
6116 *
6117 * <p>A {@code null} String returns {@code null}.</p>
6118 *
6119 * <pre>
6120 * StringUtils.reverse(null) = null
6121 * StringUtils.reverse("") = ""
6122 * StringUtils.reverse("bat") = "tab"
6123 * </pre>
6124 *
6125 * @param str the String to reverse, may be null
6126 * @return the reversed String, {@code null} if null String input
6127 */
6128 public static String reverse(final String str) {
6129 if (str == null) {
6130 return null;
6131 }
6132 return new StringBuilder(str).reverse().toString();
6133 }
6134
6135 /**
6136 * <p>Reverses a String that is delimited by a specific character.</p>
6137 *
6138 * <p>The Strings between the delimiters are not reversed.
6139 * Thus java.lang.String becomes String.lang.java (if the delimiter
6140 * is {@code '.'}).</p>
6141 *
6142 * <pre>
6143 * StringUtils.reverseDelimited(null, *) = null
6144 * StringUtils.reverseDelimited("", *) = ""
6145 * StringUtils.reverseDelimited("a.b.c", 'x') = "a.b.c"
6146 * StringUtils.reverseDelimited("a.b.c", ".") = "c.b.a"
6147 * </pre>
6148 *
6149 * @param str the String to reverse, may be null
6150 * @param separatorChar the separator character to use
6151 * @return the reversed String, {@code null} if null String input
6152 * @since 2.0
6153 */
6154 public static String reverseDelimited(final String str, final char separatorChar) {
6155 if (str == null) {
6156 return null;
6157 }
6158 // could implement manually, but simple way is to reuse other,
6159 // probably slower, methods.
6160 final String[] strs = split(str, separatorChar);
6161 ArrayUtils.reverse(strs);
6162 return join(strs, separatorChar);
6163 }
6164
6165 // Abbreviating
6166 //-----------------------------------------------------------------------
6167 /**
6168 * <p>Abbreviates a String using ellipses. This will turn
6169 * "Now is the time for all good men" into "Now is the time for..."</p>
6170 *
6171 * <p>Specifically:
6172 * <ul>
6173 * <li>If {@code str} is less than {@code maxWidth} characters
6174 * long, return it.</li>
6175 * <li>Else abbreviate it to {@code (substring(str, 0, max-3) + "...")}.</li>
6176 * <li>If {@code maxWidth} is less than {@code 4}, throw an
6177 * {@code IllegalArgumentException}.</li>
6178 * <li>In no case will it return a String of length greater than
6179 * {@code maxWidth}.</li>
6180 * </ul>
6181 * </p>
6182 *
6183 * <pre>
6184 * StringUtils.abbreviate(null, *) = null
6185 * StringUtils.abbreviate("", 4) = ""
6186 * StringUtils.abbreviate("abcdefg", 6) = "abc..."
6187 * StringUtils.abbreviate("abcdefg", 7) = "abcdefg"
6188 * StringUtils.abbreviate("abcdefg", 8) = "abcdefg"
6189 * StringUtils.abbreviate("abcdefg", 4) = "a..."
6190 * StringUtils.abbreviate("abcdefg", 3) = IllegalArgumentException
6191 * </pre>
6192 *
6193 * @param str the String to check, may be null
6194 * @param maxWidth maximum length of result String, must be at least 4
6195 * @return abbreviated String, {@code null} if null String input
6196 * @throws IllegalArgumentException if the width is too small
6197 * @since 2.0
6198 */
6199 public static String abbreviate(final String str, final int maxWidth) {
6200 return abbreviate(str, 0, maxWidth);
6201 }
6202
6203 /**
6204 * <p>Abbreviates a String using ellipses. This will turn
6205 * "Now is the time for all good men" into "...is the time for..."</p>
6206 *
6207 * <p>Works like {@code abbreviate(String, int)}, but allows you to specify
6208 * a "left edge" offset. Note that this left edge is not necessarily going to
6209 * be the leftmost character in the result, or the first character following the
6210 * ellipses, but it will appear somewhere in the result.
6211 *
6212 * <p>In no case will it return a String of length greater than
6213 * {@code maxWidth}.</p>
6214 *
6215 * <pre>
6216 * StringUtils.abbreviate(null, *, *) = null
6217 * StringUtils.abbreviate("", 0, 4) = ""
6218 * StringUtils.abbreviate("abcdefghijklmno", -1, 10) = "abcdefg..."
6219 * StringUtils.abbreviate("abcdefghijklmno", 0, 10) = "abcdefg..."
6220 * StringUtils.abbreviate("abcdefghijklmno", 1, 10) = "abcdefg..."
6221 * StringUtils.abbreviate("abcdefghijklmno", 4, 10) = "abcdefg..."
6222 * StringUtils.abbreviate("abcdefghijklmno", 5, 10) = "...fghi..."
6223 * StringUtils.abbreviate("abcdefghijklmno", 6, 10) = "...ghij..."
6224 * StringUtils.abbreviate("abcdefghijklmno", 8, 10) = "...ijklmno"
6225 * StringUtils.abbreviate("abcdefghijklmno", 10, 10) = "...ijklmno"
6226 * StringUtils.abbreviate("abcdefghijklmno", 12, 10) = "...ijklmno"
6227 * StringUtils.abbreviate("abcdefghij", 0, 3) = IllegalArgumentException
6228 * StringUtils.abbreviate("abcdefghij", 5, 6) = IllegalArgumentException
6229 * </pre>
6230 *
6231 * @param str the String to check, may be null
6232 * @param offset left edge of source String
6233 * @param maxWidth maximum length of result String, must be at least 4
6234 * @return abbreviated String, {@code null} if null String input
6235 * @throws IllegalArgumentException if the width is too small
6236 * @since 2.0
6237 */
6238 public static String abbreviate(final String str, int offset, final int maxWidth) {
6239 if (str == null) {
6240 return null;
6241 }
6242 if (maxWidth < 4) {
6243 throw new IllegalArgumentException("Minimum abbreviation width is 4");
6244 }
6245 if (str.length() <= maxWidth) {
6246 return str;
6247 }
6248 if (offset > str.length()) {
6249 offset = str.length();
6250 }
6251 if (str.length() - offset < maxWidth - 3) {
6252 offset = str.length() - (maxWidth - 3);
6253 }
6254 final String abrevMarker = "...";
6255 if (offset <= 4) {
6256 return str.substring(0, maxWidth - 3) + abrevMarker;
6257 }
6258 if (maxWidth < 7) {
6259 throw new IllegalArgumentException("Minimum abbreviation width with offset is 7");
6260 }
6261 if (offset + maxWidth - 3 < str.length()) {
6262 return abrevMarker + abbreviate(str.substring(offset), maxWidth - 3);
6263 }
6264 return abrevMarker + str.substring(str.length() - (maxWidth - 3));
6265 }
6266
6267 /**
6268 * <p>Abbreviates a String to the length passed, replacing the middle characters with the supplied
6269 * replacement String.</p>
6270 *
6271 * <p>This abbreviation only occurs if the following criteria is met:
6272 * <ul>
6273 * <li>Neither the String for abbreviation nor the replacement String are null or empty </li>
6274 * <li>The length to truncate to is less than the length of the supplied String</li>
6275 * <li>The length to truncate to is greater than 0</li>
6276 * <li>The abbreviated String will have enough room for the length supplied replacement String
6277 * and the first and last characters of the supplied String for abbreviation</li>
6278 * </ul>
6279 * Otherwise, the returned String will be the same as the supplied String for abbreviation.
6280 * </p>
6281 *
6282 * <pre>
6283 * StringUtils.abbreviateMiddle(null, null, 0) = null
6284 * StringUtils.abbreviateMiddle("abc", null, 0) = "abc"
6285 * StringUtils.abbreviateMiddle("abc", ".", 0) = "abc"
6286 * StringUtils.abbreviateMiddle("abc", ".", 3) = "abc"
6287 * StringUtils.abbreviateMiddle("abcdef", ".", 4) = "ab.f"
6288 * </pre>
6289 *
6290 * @param str the String to abbreviate, may be null
6291 * @param middle the String to replace the middle characters with, may be null
6292 * @param length the length to abbreviate {@code str} to.
6293 * @return the abbreviated String if the above criteria is met, or the original String supplied for abbreviation.
6294 * @since 2.5
6295 */
6296 public static String abbreviateMiddle(final String str, final String middle, final int length) {
6297 if (isEmpty(str) || isEmpty(middle)) {
6298 return str;
6299 }
6300
6301 if (length >= str.length() || length < middle.length()+2) {
6302 return str;
6303 }
6304
6305 final int targetSting = length-middle.length();
6306 final int startOffset = targetSting/2+targetSting%2;
6307 final int endOffset = str.length()-targetSting/2;
6308
6309 final StringBuilder builder = new StringBuilder(length);
6310 builder.append(str.substring(0,startOffset));
6311 builder.append(middle);
6312 builder.append(str.substring(endOffset));
6313
6314 return builder.toString();
6315 }
6316
6317 // Difference
6318 //-----------------------------------------------------------------------
6319 /**
6320 * <p>Compares two Strings, and returns the portion where they differ.
6321 * (More precisely, return the remainder of the second String,
6322 * starting from where it's different from the first.)</p>
6323 *
6324 * <p>For example,
6325 * {@code difference("i am a machine", "i am a robot") -> "robot"}.</p>
6326 *
6327 * <pre>
6328 * StringUtils.difference(null, null) = null
6329 * StringUtils.difference("", "") = ""
6330 * StringUtils.difference("", "abc") = "abc"
6331 * StringUtils.difference("abc", "") = ""
6332 * StringUtils.difference("abc", "abc") = ""
6333 * StringUtils.difference("ab", "abxyz") = "xyz"
6334 * StringUtils.difference("abcde", "abxyz") = "xyz"
6335 * StringUtils.difference("abcde", "xyz") = "xyz"
6336 * </pre>
6337 *
6338 * @param str1 the first String, may be null
6339 * @param str2 the second String, may be null
6340 * @return the portion of str2 where it differs from str1; returns the
6341 * empty String if they are equal
6342 * @since 2.0
6343 */
6344 public static String difference(final String str1, final String str2) {
6345 if (str1 == null) {
6346 return str2;
6347 }
6348 if (str2 == null) {
6349 return str1;
6350 }
6351 final int at = indexOfDifference(str1, str2);
6352 if (at == INDEX_NOT_FOUND) {
6353 return EMPTY;
6354 }
6355 return str2.substring(at);
6356 }
6357
6358 /**
6359 * <p>Compares two CharSequences, and returns the index at which the
6360 * CharSequences begin to differ.</p>
6361 *
6362 * <p>For example,
6363 * {@code indexOfDifference("i am a machine", "i am a robot") -> 7}</p>
6364 *
6365 * <pre>
6366 * StringUtils.indexOfDifference(null, null) = -1
6367 * StringUtils.indexOfDifference("", "") = -1
6368 * StringUtils.indexOfDifference("", "abc") = 0
6369 * StringUtils.indexOfDifference("abc", "") = 0
6370 * StringUtils.indexOfDifference("abc", "abc") = -1
6371 * StringUtils.indexOfDifference("ab", "abxyz") = 2
6372 * StringUtils.indexOfDifference("abcde", "abxyz") = 2
6373 * StringUtils.indexOfDifference("abcde", "xyz") = 0
6374 * </pre>
6375 *
6376 * @param cs1 the first CharSequence, may be null
6377 * @param cs2 the second CharSequence, may be null
6378 * @return the index where cs1 and cs2 begin to differ; -1 if they are equal
6379 * @since 2.0
6380 * @since 3.0 Changed signature from indexOfDifference(String, String) to
6381 * indexOfDifference(CharSequence, CharSequence)
6382 */
6383 public static int indexOfDifference(final CharSequence cs1, final CharSequence cs2) {
6384 if (cs1 == cs2) {
6385 return INDEX_NOT_FOUND;
6386 }
6387 if (cs1 == null || cs2 == null) {
6388 return 0;
6389 }
6390 int i;
6391 for (i = 0; i < cs1.length() && i < cs2.length(); ++i) {
6392 if (cs1.charAt(i) != cs2.charAt(i)) {
6393 break;
6394 }
6395 }
6396 if (i < cs2.length() || i < cs1.length()) {
6397 return i;
6398 }
6399 return INDEX_NOT_FOUND;
6400 }
6401
6402 /**
6403 * <p>Compares all CharSequences in an array and returns the index at which the
6404 * CharSequences begin to differ.</p>
6405 *
6406 * <p>For example,
6407 * <code>indexOfDifference(new String[] {"i am a machine", "i am a robot"}) -> 7</code></p>
6408 *
6409 * <pre>
6410 * StringUtils.indexOfDifference(null) = -1
6411 * StringUtils.indexOfDifference(new String[] {}) = -1
6412 * StringUtils.indexOfDifference(new String[] {"abc"}) = -1
6413 * StringUtils.indexOfDifference(new String[] {null, null}) = -1
6414 * StringUtils.indexOfDifference(new String[] {"", ""}) = -1
6415 * StringUtils.indexOfDifference(new String[] {"", null}) = 0
6416 * StringUtils.indexOfDifference(new String[] {"abc", null, null}) = 0
6417 * StringUtils.indexOfDifference(new String[] {null, null, "abc"}) = 0
6418 * StringUtils.indexOfDifference(new String[] {"", "abc"}) = 0
6419 * StringUtils.indexOfDifference(new String[] {"abc", ""}) = 0
6420 * StringUtils.indexOfDifference(new String[] {"abc", "abc"}) = -1
6421 * StringUtils.indexOfDifference(new String[] {"abc", "a"}) = 1
6422 * StringUtils.indexOfDifference(new String[] {"ab", "abxyz"}) = 2
6423 * StringUtils.indexOfDifference(new String[] {"abcde", "abxyz"}) = 2
6424 * StringUtils.indexOfDifference(new String[] {"abcde", "xyz"}) = 0
6425 * StringUtils.indexOfDifference(new String[] {"xyz", "abcde"}) = 0
6426 * StringUtils.indexOfDifference(new String[] {"i am a machine", "i am a robot"}) = 7
6427 * </pre>
6428 *
6429 * @param css array of CharSequences, entries may be null
6430 * @return the index where the strings begin to differ; -1 if they are all equal
6431 * @since 2.4
6432 * @since 3.0 Changed signature from indexOfDifference(String...) to indexOfDifference(CharSequence...)
6433 */
6434 public static int indexOfDifference(final CharSequence... css) {
6435 if (css == null || css.length <= 1) {
6436 return INDEX_NOT_FOUND;
6437 }
6438 boolean anyStringNull = false;
6439 boolean allStringsNull = true;
6440 final int arrayLen = css.length;
6441 int shortestStrLen = Integer.MAX_VALUE;
6442 int longestStrLen = 0;
6443
6444 // find the min and max string lengths; this avoids checking to make
6445 // sure we are not exceeding the length of the string each time through
6446 // the bottom loop.
6447 for (int i = 0; i < arrayLen; i++) {
6448 if (css[i] == null) {
6449 anyStringNull = true;
6450 shortestStrLen = 0;
6451 } else {
6452 allStringsNull = false;
6453 shortestStrLen = Math.min(css[i].length(), shortestStrLen);
6454 longestStrLen = Math.max(css[i].length(), longestStrLen);
6455 }
6456 }
6457
6458 // handle lists containing all nulls or all empty strings
6459 if (allStringsNull || longestStrLen == 0 && !anyStringNull) {
6460 return INDEX_NOT_FOUND;
6461 }
6462
6463 // handle lists containing some nulls or some empty strings
6464 if (shortestStrLen == 0) {
6465 return 0;
6466 }
6467
6468 // find the position with the first difference across all strings
6469 int firstDiff = -1;
6470 for (int stringPos = 0; stringPos < shortestStrLen; stringPos++) {
6471 final char comparisonChar = css[0].charAt(stringPos);
6472 for (int arrayPos = 1; arrayPos < arrayLen; arrayPos++) {
6473 if (css[arrayPos].charAt(stringPos) != comparisonChar) {
6474 firstDiff = stringPos;
6475 break;
6476 }
6477 }
6478 if (firstDiff != -1) {
6479 break;
6480 }
6481 }
6482
6483 if (firstDiff == -1 && shortestStrLen != longestStrLen) {
6484 // we compared all of the characters up to the length of the
6485 // shortest string and didn't find a match, but the string lengths
6486 // vary, so return the length of the shortest string.
6487 return shortestStrLen;
6488 }
6489 return firstDiff;
6490 }
6491
6492 /**
6493 * <p>Compares all Strings in an array and returns the initial sequence of
6494 * characters that is common to all of them.</p>
6495 *
6496 * <p>For example,
6497 * <code>getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) -> "i am a "</code></p>
6498 *
6499 * <pre>
6500 * StringUtils.getCommonPrefix(null) = ""
6501 * StringUtils.getCommonPrefix(new String[] {}) = ""
6502 * StringUtils.getCommonPrefix(new String[] {"abc"}) = "abc"
6503 * StringUtils.getCommonPrefix(new String[] {null, null}) = ""
6504 * StringUtils.getCommonPrefix(new String[] {"", ""}) = ""
6505 * StringUtils.getCommonPrefix(new String[] {"", null}) = ""
6506 * StringUtils.getCommonPrefix(new String[] {"abc", null, null}) = ""
6507 * StringUtils.getCommonPrefix(new String[] {null, null, "abc"}) = ""
6508 * StringUtils.getCommonPrefix(new String[] {"", "abc"}) = ""
6509 * StringUtils.getCommonPrefix(new String[] {"abc", ""}) = ""
6510 * StringUtils.getCommonPrefix(new String[] {"abc", "abc"}) = "abc"
6511 * StringUtils.getCommonPrefix(new String[] {"abc", "a"}) = "a"
6512 * StringUtils.getCommonPrefix(new String[] {"ab", "abxyz"}) = "ab"
6513 * StringUtils.getCommonPrefix(new String[] {"abcde", "abxyz"}) = "ab"
6514 * StringUtils.getCommonPrefix(new String[] {"abcde", "xyz"}) = ""
6515 * StringUtils.getCommonPrefix(new String[] {"xyz", "abcde"}) = ""
6516 * StringUtils.getCommonPrefix(new String[] {"i am a machine", "i am a robot"}) = "i am a "
6517 * </pre>
6518 *
6519 * @param strs array of String objects, entries may be null
6520 * @return the initial sequence of characters that are common to all Strings
6521 * in the array; empty String if the array is null, the elements are all null
6522 * or if there is no common prefix.
6523 * @since 2.4
6524 */
6525 public static String getCommonPrefix(final String... strs) {
6526 if (strs == null || strs.length == 0) {
6527 return EMPTY;
6528 }
6529 final int smallestIndexOfDiff = indexOfDifference(strs);
6530 if (smallestIndexOfDiff == INDEX_NOT_FOUND) {
6531 // all strings were identical
6532 if (strs[0] == null) {
6533 return EMPTY;
6534 }
6535 return strs[0];
6536 } else if (smallestIndexOfDiff == 0) {
6537 // there were no common initial characters
6538 return EMPTY;
6539 } else {
6540 // we found a common initial character sequence
6541 return strs[0].substring(0, smallestIndexOfDiff);
6542 }
6543 }
6544
6545 // Misc
6546 //-----------------------------------------------------------------------
6547 /**
6548 * <p>Find the Levenshtein distance between two Strings.</p>
6549 *
6550 * <p>This is the number of changes needed to change one String into
6551 * another, where each change is a single character modification (deletion,
6552 * insertion or substitution).</p>
6553 *
6554 * <p>The previous implementation of the Levenshtein distance algorithm
6555 * was from <a href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a></p>
6556 *
6557 * <p>Chas Emerick has written an implementation in Java, which avoids an OutOfMemoryError
6558 * which can occur when my Java implementation is used with very large strings.<br>
6559 * This implementation of the Levenshtein distance algorithm
6560 * is from <a href="http://www.merriampark.com/ldjava.htm">http://www.merriampark.com/ldjava.htm</a></p>
6561 *
6562 * <pre>
6563 * StringUtils.getLevenshteinDistance(null, *) = IllegalArgumentException
6564 * StringUtils.getLevenshteinDistance(*, null) = IllegalArgumentException
6565 * StringUtils.getLevenshteinDistance("","") = 0
6566 * StringUtils.getLevenshteinDistance("","a") = 1
6567 * StringUtils.getLevenshteinDistance("aaapppp", "") = 7
6568 * StringUtils.getLevenshteinDistance("frog", "fog") = 1
6569 * StringUtils.getLevenshteinDistance("fly", "ant") = 3
6570 * StringUtils.getLevenshteinDistance("elephant", "hippo") = 7
6571 * StringUtils.getLevenshteinDistance("hippo", "elephant") = 7
6572 * StringUtils.getLevenshteinDistance("hippo", "zzzzzzzz") = 8
6573 * StringUtils.getLevenshteinDistance("hello", "hallo") = 1
6574 * </pre>
6575 *
6576 * @param s the first String, must not be null
6577 * @param t the second String, must not be null
6578 * @return result distance
6579 * @throws IllegalArgumentException if either String input {@code null}
6580 * @since 3.0 Changed signature from getLevenshteinDistance(String, String) to
6581 * getLevenshteinDistance(CharSequence, CharSequence)
6582 */
6583 public static int getLevenshteinDistance(CharSequence s, CharSequence t) {
6584 if (s == null || t == null) {
6585 throw new IllegalArgumentException("Strings must not be null");
6586 }
6587
6588 /*
6589 The difference between this impl. and the previous is that, rather
6590 than creating and retaining a matrix of size s.length() + 1 by t.length() + 1,
6591 we maintain two single-dimensional arrays of length s.length() + 1. The first, d,
6592 is the 'current working' distance array that maintains the newest distance cost
6593 counts as we iterate through the characters of String s. Each time we increment
6594 the index of String t we are comparing, d is copied to p, the second int[]. Doing so
6595 allows us to retain the previous cost counts as required by the algorithm (taking
6596 the minimum of the cost count to the left, up one, and diagonally up and to the left
6597 of the current cost count being calculated). (Note that the arrays aren't really
6598 copied anymore, just switched...this is clearly much better than cloning an array
6599 or doing a System.arraycopy() each time through the outer loop.)
6600
6601 Effectively, the difference between the two implementations is this one does not
6602 cause an out of memory condition when calculating the LD over two very large strings.
6603 */
6604
6605 int n = s.length(); // length of s
6606 int m = t.length(); // length of t
6607
6608 if (n == 0) {
6609 return m;
6610 } else if (m == 0) {
6611 return n;
6612 }
6613
6614 if (n > m) {
6615 // swap the input strings to consume less memory
6616 final CharSequence tmp = s;
6617 s = t;
6618 t = tmp;
6619 n = m;
6620 m = t.length();
6621 }
6622
6623 int p[] = new int[n + 1]; //'previous' cost array, horizontally
6624 int d[] = new int[n + 1]; // cost array, horizontally
6625 int _d[]; //placeholder to assist in swapping p and d
6626
6627 // indexes into strings s and t
6628 int i; // iterates through s
6629 int j; // iterates through t
6630
6631 char t_j; // jth character of t
6632
6633 int cost; // cost
6634
6635 for (i = 0; i <= n; i++) {
6636 p[i] = i;
6637 }
6638
6639 for (j = 1; j <= m; j++) {
6640 t_j = t.charAt(j - 1);
6641 d[0] = j;
6642
6643 for (i = 1; i <= n; i++) {
6644 cost = s.charAt(i - 1) == t_j ? 0 : 1;
6645 // minimum of cell to the left+1, to the top+1, diagonally left and up +cost
6646 d[i] = Math.min(Math.min(d[i - 1] + 1, p[i] + 1), p[i - 1] + cost);
6647 }
6648
6649 // copy current distance counts to 'previous row' distance counts
6650 _d = p;
6651 p = d;
6652 d = _d;
6653 }
6654
6655 // our last action in the above loop was to switch d and p, so p now
6656 // actually has the most recent cost counts
6657 return p[n];
6658 }
6659
6660 /**
6661 * <p>Find the Levenshtein distance between two Strings if it's less than or equal to a given
6662 * threshold.</p>
6663 *
6664 * <p>This is the number of changes needed to change one String into
6665 * another, where each change is a single character modification (deletion,
6666 * insertion or substitution).</p>
6667 *
6668 * <p>This implementation follows from Algorithms on Strings, Trees and Sequences by Dan Gusfield
6669 * and Chas Emerick's implementation of the Levenshtein distance algorithm from
6670 * <a href="http://www.merriampark.com/ld.htm">http://www.merriampark.com/ld.htm</a></p>
6671 *
6672 * <pre>
6673 * StringUtils.getLevenshteinDistance(null, *, *) = IllegalArgumentException
6674 * StringUtils.getLevenshteinDistance(*, null, *) = IllegalArgumentException
6675 * StringUtils.getLevenshteinDistance(*, *, -1) = IllegalArgumentException
6676 * StringUtils.getLevenshteinDistance("","", 0) = 0
6677 * StringUtils.getLevenshteinDistance("aaapppp", "", 8) = 7
6678 * StringUtils.getLevenshteinDistance("aaapppp", "", 7) = 7
6679 * StringUtils.getLevenshteinDistance("aaapppp", "", 6)) = -1
6680 * StringUtils.getLevenshteinDistance("elephant", "hippo", 7) = 7
6681 * StringUtils.getLevenshteinDistance("elephant", "hippo", 6) = -1
6682 * StringUtils.getLevenshteinDistance("hippo", "elephant", 7) = 7
6683 * StringUtils.getLevenshteinDistance("hippo", "elephant", 6) = -1
6684 * </pre>
6685 *
6686 * @param s the first String, must not be null
6687 * @param t the second String, must not be null
6688 * @param threshold the target threshold, must not be negative
6689 * @return result distance, or {@code -1} if the distance would be greater than the threshold
6690 * @throws IllegalArgumentException if either String input {@code null} or negative threshold
6691 */
6692 public static int getLevenshteinDistance(CharSequence s, CharSequence t, final int threshold) {
6693 if (s == null || t == null) {
6694 throw new IllegalArgumentException("Strings must not be null");
6695 }
6696 if (threshold < 0) {
6697 throw new IllegalArgumentException("Threshold must not be negative");
6698 }
6699
6700 /*
6701 This implementation only computes the distance if it's less than or equal to the
6702 threshold value, returning -1 if it's greater. The advantage is performance: unbounded
6703 distance is O(nm), but a bound of k allows us to reduce it to O(km) time by only
6704 computing a diagonal stripe of width 2k + 1 of the cost table.
6705 It is also possible to use this to compute the unbounded Levenshtein distance by starting
6706 the threshold at 1 and doubling each time until the distance is found; this is O(dm), where
6707 d is the distance.
6708
6709 One subtlety comes from needing to ignore entries on the border of our stripe
6710 eg.
6711 p[] = |#|#|#|*
6712 d[] = *|#|#|#|
6713 We must ignore the entry to the left of the leftmost member
6714 We must ignore the entry above the rightmost member
6715
6716 Another subtlety comes from our stripe running off the matrix if the strings aren't
6717 of the same size. Since string s is always swapped to be the shorter of the two,
6718 the stripe will always run off to the upper right instead of the lower left of the matrix.
6719
6720 As a concrete example, suppose s is of length 5, t is of length 7, and our threshold is 1.
6721 In this case we're going to walk a stripe of length 3. The matrix would look like so:
6722
6723 1 2 3 4 5
6724 1 |#|#| | | |
6725 2 |#|#|#| | |
6726 3 | |#|#|#| |
6727 4 | | |#|#|#|
6728 5 | | | |#|#|
6729 6 | | | | |#|
6730 7 | | | | | |
6731
6732 Note how the stripe leads off the table as there is no possible way to turn a string of length 5
6733 into one of length 7 in edit distance of 1.
6734
6735 Additionally, this implementation decreases memory usage by using two
6736 single-dimensional arrays and swapping them back and forth instead of allocating
6737 an entire n by m matrix. This requires a few minor changes, such as immediately returning
6738 when it's detected that the stripe has run off the matrix and initially filling the arrays with
6739 large values so that entries we don't compute are ignored.
6740
6741 See Algorithms on Strings, Trees and Sequences by Dan Gusfield for some discussion.
6742 */
6743
6744 int n = s.length(); // length of s
6745 int m = t.length(); // length of t
6746
6747 // if one string is empty, the edit distance is necessarily the length of the other
6748 if (n == 0) {
6749 return m <= threshold ? m : -1;
6750 } else if (m == 0) {
6751 return n <= threshold ? n : -1;
6752 }
6753
6754 if (n > m) {
6755 // swap the two strings to consume less memory
6756 final CharSequence tmp = s;
6757 s = t;
6758 t = tmp;
6759 n = m;
6760 m = t.length();
6761 }
6762
6763 int p[] = new int[n + 1]; // 'previous' cost array, horizontally
6764 int d[] = new int[n + 1]; // cost array, horizontally
6765 int _d[]; // placeholder to assist in swapping p and d
6766
6767 // fill in starting table values
6768 final int boundary = Math.min(n, threshold) + 1;
6769 for (int i = 0; i < boundary; i++) {
6770 p[i] = i;
6771 }
6772 // these fills ensure that the value above the rightmost entry of our
6773 // stripe will be ignored in following loop iterations
6774 Arrays.fill(p, boundary, p.length, Integer.MAX_VALUE);
6775 Arrays.fill(d, Integer.MAX_VALUE);
6776
6777 // iterates through t
6778 for (int j = 1; j <= m; j++) {
6779 final char t_j = t.charAt(j - 1); // jth character of t
6780 d[0] = j;
6781
6782 // compute stripe indices, constrain to array size
6783 final int min = Math.max(1, j - threshold);
6784 final int max = Math.min(n, j + threshold);
6785
6786 // the stripe may lead off of the table if s and t are of different sizes
6787 if (min > max) {
6788 return -1;
6789 }
6790
6791 // ignore entry left of leftmost
6792 if (min > 1) {
6793 d[min - 1] = Integer.MAX_VALUE;
6794 }
6795
6796 // iterates through [min, max] in s
6797 for (int i = min; i <= max; i++) {
6798 if (s.charAt(i - 1) == t_j) {
6799 // diagonally left and up
6800 d[i] = p[i - 1];
6801 } else {
6802 // 1 + minimum of cell to the left, to the top, diagonally left and up
6803 d[i] = 1 + Math.min(Math.min(d[i - 1], p[i]), p[i - 1]);
6804 }
6805 }
6806
6807 // copy current distance counts to 'previous row' distance counts
6808 _d = p;
6809 p = d;
6810 d = _d;
6811 }
6812
6813 // if p[n] is greater than the threshold, there's no guarantee on it being the correct
6814 // distance
6815 if (p[n] <= threshold) {
6816 return p[n];
6817 } else {
6818 return -1;
6819 }
6820 }
6821
6822 // startsWith
6823 //-----------------------------------------------------------------------
6824
6825 /**
6826 * <p>Check if a CharSequence starts with a specified prefix.</p>
6827 *
6828 * <p>{@code null}s are handled without exceptions. Two {@code null}
6829 * references are considered to be equal. The comparison is case sensitive.</p>
6830 *
6831 * <pre>
6832 * StringUtils.startsWith(null, null) = true
6833 * StringUtils.startsWith(null, "abc") = false
6834 * StringUtils.startsWith("abcdef", null) = false
6835 * StringUtils.startsWith("abcdef", "abc") = true
6836 * StringUtils.startsWith("ABCDEF", "abc") = false
6837 * </pre>
6838 *
6839 * @see java.lang.String#startsWith(String)
6840 * @param str the CharSequence to check, may be null
6841 * @param prefix the prefix to find, may be null
6842 * @return {@code true} if the CharSequence starts with the prefix, case sensitive, or
6843 * both {@code null}
6844 * @since 2.4
6845 * @since 3.0 Changed signature from startsWith(String, String) to startsWith(CharSequence, CharSequence)
6846 */
6847 public static boolean startsWith(final CharSequence str, final CharSequence prefix) {
6848 return startsWith(str, prefix, false);
6849 }
6850
6851 /**
6852 * <p>Case insensitive check if a CharSequence starts with a specified prefix.</p>
6853 *
6854 * <p>{@code null}s are handled without exceptions. Two {@code null}
6855 * references are considered to be equal. The comparison is case insensitive.</p>
6856 *
6857 * <pre>
6858 * StringUtils.startsWithIgnoreCase(null, null) = true
6859 * StringUtils.startsWithIgnoreCase(null, "abc") = false
6860 * StringUtils.startsWithIgnoreCase("abcdef", null) = false
6861 * StringUtils.startsWithIgnoreCase("abcdef", "abc") = true
6862 * StringUtils.startsWithIgnoreCase("ABCDEF", "abc") = true
6863 * </pre>
6864 *
6865 * @see java.lang.String#startsWith(String)
6866 * @param str the CharSequence to check, may be null
6867 * @param prefix the prefix to find, may be null
6868 * @return {@code true} if the CharSequence starts with the prefix, case insensitive, or
6869 * both {@code null}
6870 * @since 2.4
6871 * @since 3.0 Changed signature from startsWithIgnoreCase(String, String) to startsWithIgnoreCase(CharSequence, CharSequence)
6872 */
6873 public static boolean startsWithIgnoreCase(final CharSequence str, final CharSequence prefix) {
6874 return startsWith(str, prefix, true);
6875 }
6876
6877 /**
6878 * <p>Check if a CharSequence starts with a specified prefix (optionally case insensitive).</p>
6879 *
6880 * @see java.lang.String#startsWith(String)
6881 * @param str the CharSequence to check, may be null
6882 * @param prefix the prefix to find, may be null
6883 * @param ignoreCase indicates whether the compare should ignore case
6884 * (case insensitive) or not.
6885 * @return {@code true} if the CharSequence starts with the prefix or
6886 * both {@code null}
6887 */
6888 private static boolean startsWith(final CharSequence str, final CharSequence prefix, final boolean ignoreCase) {
6889 if (str == null || prefix == null) {
6890 return str == null && prefix == null;
6891 }
6892 if (prefix.length() > str.length()) {
6893 return false;
6894 }
6895 return CharSequenceUtils.regionMatches(str, ignoreCase, 0, prefix, 0, prefix.length());
6896 }
6897
6898 /**
6899 * <p>Check if a CharSequence starts with any of an array of specified strings.</p>
6900 *
6901 * <pre>
6902 * StringUtils.startsWithAny(null, null) = false
6903 * StringUtils.startsWithAny(null, new String[] {"abc"}) = false
6904 * StringUtils.startsWithAny("abcxyz", null) = false
6905 * StringUtils.startsWithAny("abcxyz", new String[] {""}) = false
6906 * StringUtils.startsWithAny("abcxyz", new String[] {"abc"}) = true
6907 * StringUtils.startsWithAny("abcxyz", new String[] {null, "xyz", "abc"}) = true
6908 * </pre>
6909 *
6910 * @param string the CharSequence to check, may be null
6911 * @param searchStrings the CharSequences to find, may be null or empty
6912 * @return {@code true} if the CharSequence starts with any of the the prefixes, case insensitive, or
6913 * both {@code null}
6914 * @since 2.5
6915 * @since 3.0 Changed signature from startsWithAny(String, String[]) to startsWithAny(CharSequence, CharSequence...)
6916 */
6917 public static boolean startsWithAny(final CharSequence string, final CharSequence... searchStrings) {
6918 if (isEmpty(string) || ArrayUtils.isEmpty(searchStrings)) {
6919 return false;
6920 }
6921 for (final CharSequence searchString : searchStrings) {
6922 if (StringUtils.startsWith(string, searchString)) {
6923 return true;
6924 }
6925 }
6926 return false;
6927 }
6928
6929 // endsWith
6930 //-----------------------------------------------------------------------
6931
6932 /**
6933 * <p>Check if a CharSequence ends with a specified suffix.</p>
6934 *
6935 * <p>{@code null}s are handled without exceptions. Two {@code null}
6936 * references are considered to be equal. The comparison is case sensitive.</p>
6937 *
6938 * <pre>
6939 * StringUtils.endsWith(null, null) = true
6940 * StringUtils.endsWith(null, "def") = false
6941 * StringUtils.endsWith("abcdef", null) = false
6942 * StringUtils.endsWith("abcdef", "def") = true
6943 * StringUtils.endsWith("ABCDEF", "def") = false
6944 * StringUtils.endsWith("ABCDEF", "cde") = false
6945 * </pre>
6946 *
6947 * @see java.lang.String#endsWith(String)
6948 * @param str the CharSequence to check, may be null
6949 * @param suffix the suffix to find, may be null
6950 * @return {@code true} if the CharSequence ends with the suffix, case sensitive, or
6951 * both {@code null}
6952 * @since 2.4
6953 * @since 3.0 Changed signature from endsWith(String, String) to endsWith(CharSequence, CharSequence)
6954 */
6955 public static boolean endsWith(final CharSequence str, final CharSequence suffix) {
6956 return endsWith(str, suffix, false);
6957 }
6958
6959 /**
6960 * <p>Case insensitive check if a CharSequence ends with a specified suffix.</p>
6961 *
6962 * <p>{@code null}s are handled without exceptions. Two {@code null}
6963 * references are considered to be equal. The comparison is case insensitive.</p>
6964 *
6965 * <pre>
6966 * StringUtils.endsWithIgnoreCase(null, null) = true
6967 * StringUtils.endsWithIgnoreCase(null, "def") = false
6968 * StringUtils.endsWithIgnoreCase("abcdef", null) = false
6969 * StringUtils.endsWithIgnoreCase("abcdef", "def") = true
6970 * StringUtils.endsWithIgnoreCase("ABCDEF", "def") = true
6971 * StringUtils.endsWithIgnoreCase("ABCDEF", "cde") = false
6972 * </pre>
6973 *
6974 * @see java.lang.String#endsWith(String)
6975 * @param str the CharSequence to check, may be null
6976 * @param suffix the suffix to find, may be null
6977 * @return {@code true} if the CharSequence ends with the suffix, case insensitive, or
6978 * both {@code null}
6979 * @since 2.4
6980 * @since 3.0 Changed signature from endsWithIgnoreCase(String, String) to endsWithIgnoreCase(CharSequence, CharSequence)
6981 */
6982 public static boolean endsWithIgnoreCase(final CharSequence str, final CharSequence suffix) {
6983 return endsWith(str, suffix, true);
6984 }
6985
6986 /**
6987 * <p>Check if a CharSequence ends with a specified suffix (optionally case insensitive).</p>
6988 *
6989 * @see java.lang.String#endsWith(String)
6990 * @param str the CharSequence to check, may be null
6991 * @param suffix the suffix to find, may be null
6992 * @param ignoreCase indicates whether the compare should ignore case
6993 * (case insensitive) or not.
6994 * @return {@code true} if the CharSequence starts with the prefix or
6995 * both {@code null}
6996 */
6997 private static boolean endsWith(final CharSequence str, final CharSequence suffix, final boolean ignoreCase) {
6998 if (str == null || suffix == null) {
6999 return str == null && suffix == null;
7000 }
7001 if (suffix.length() > str.length()) {
7002 return false;
7003 }
7004 final int strOffset = str.length() - suffix.length();
7005 return CharSequenceUtils.regionMatches(str, ignoreCase, strOffset, suffix, 0, suffix.length());
7006 }
7007
7008 /**
7009 * <p>
7010 * Similar to <a
7011 * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize
7012 * -space</a>
7013 * </p>
7014 * <p>
7015 * The function returns the argument string with whitespace normalized by using
7016 * <code>{@link #trim(String)}</code> to remove leading and trailing whitespace
7017 * and then replacing sequences of whitespace characters by a single space.
7018 * </p>
7019 * In XML Whitespace characters are the same as those allowed by the <a
7020 * href="http://www.w3.org/TR/REC-xml/#NT-S">S</a> production, which is S ::= (#x20 | #x9 | #xD | #xA)+
7021 * <p>
7022 * Java's regexp pattern \s defines whitespace as [ \t\n\x0B\f\r]
7023 * <p>
7024 * For reference:
7025 * <ul>
7026 * <li>\x0B = vertical tab</li>
7027 * <li>\f = #xC = form feed</li>
7028 * <li>#x20 = space</li>
7029 * <li>#x9 = \t</li>
7030 * <li>#xA = \n</li>
7031 * <li>#xD = \r</li>
7032 * </ul>
7033 * </p>
7034 * <p>
7035 * The difference is that Java's whitespace includes vertical tab and form feed, which this functional will also
7036 * normalize. Additionally <code>{@link #trim(String)}</code> removes control characters (char <= 32) from both
7037 * ends of this String.
7038 * </p>
7039 *
7040 * @see Pattern
7041 * @see #trim(String)
7042 * @see <a
7043 * href="http://www.w3.org/TR/xpath/#function-normalize-space">http://www.w3.org/TR/xpath/#function-normalize-space</a>
7044 * @param str the source String to normalize whitespaces from, may be null
7045 * @return the modified string with whitespace normalized, {@code null} if null String input
7046 *
7047 * @since 3.0
7048 */
7049 public static String normalizeSpace(final String str) {
7050 if (str == null) {
7051 return null;
7052 }
7053 return WHITESPACE_PATTERN.matcher(trim(str)).replaceAll(SPACE);
7054 }
7055
7056 /**
7057 * <p>Check if a CharSequence ends with any of an array of specified strings.</p>
7058 *
7059 * <pre>
7060 * StringUtils.endsWithAny(null, null) = false
7061 * StringUtils.endsWithAny(null, new String[] {"abc"}) = false
7062 * StringUtils.endsWithAny("abcxyz", null) = false
7063 * StringUtils.endsWithAny("abcxyz", new String[] {""}) = true
7064 * StringUtils.endsWithAny("abcxyz", new String[] {"xyz"}) = true
7065 * StringUtils.endsWithAny("abcxyz", new String[] {null, "xyz", "abc"}) = true
7066 * </pre>
7067 *
7068 * @param string the CharSequence to check, may be null
7069 * @param searchStrings the CharSequences to find, may be null or empty
7070 * @return {@code true} if the CharSequence ends with any of the the prefixes, case insensitive, or
7071 * both {@code null}
7072 * @since 3.0
7073 */
7074 public static boolean endsWithAny(final CharSequence string, final CharSequence... searchStrings) {
7075 if (isEmpty(string) || ArrayUtils.isEmpty(searchStrings)) {
7076 return false;
7077 }
7078 for (final CharSequence searchString : searchStrings) {
7079 if (StringUtils.endsWith(string, searchString)) {
7080 return true;
7081 }
7082 }
7083 return false;
7084 }
7085
7086 /**
7087 * Converts a <code>byte[]</code> to a String using the specified character encoding.
7088 *
7089 * @param bytes
7090 * the byte array to read from
7091 * @param charsetName
7092 * the encoding to use, if null then use the platform default
7093 * @return a new String
7094 * @throws UnsupportedEncodingException
7095 * If the named charset is not supported
7096 * @throws NullPointerException
7097 * if the input is null
7098 * @since 3.1
7099 */
7100 public static String toString(final byte[] bytes, final String charsetName) throws UnsupportedEncodingException {
7101 return charsetName == null ? new String(bytes) : new String(bytes, charsetName);
7102 }
7103
7104 }