1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.text;
18
19 import java.util.HashSet;
20 import java.util.Set;
21 import java.util.function.Predicate;
22 import java.util.regex.Matcher;
23 import java.util.regex.Pattern;
24
25 import org.apache.commons.lang3.ArrayUtils;
26 import org.apache.commons.lang3.StringUtils;
27 import org.apache.commons.lang3.Strings;
28 import org.apache.commons.lang3.Validate;
29
30 /**
31 * Operations on Strings that contain words.
32 *
33 * <p>
34 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
35 * {@code null} input. Each method documents its behavior in more detail.
36 * </p>
37 *
38 * @since 1.1
39 */
40 public class WordUtils {
41
42 /**
43 * Abbreviates the words nicely.
44 *
45 * <p>
46 * This method searches for the first space after the lower limit and abbreviates
47 * the String there. It will also append any String passed as a parameter
48 * to the end of the String. The upper limit can be specified to forcibly
49 * abbreviate a String.
50 * </p>
51 *
52 * @param str the string to be abbreviated. If null is passed, null is returned.
53 * If the empty String is passed, the empty string is returned.
54 * @param lower the lower limit; negative value is treated as zero.
55 * @param upper the upper limit; specify -1 if no limit is desired.
56 * The upper limit cannot be lower than the lower limit.
57 * @param appendToEnd String to be appended to the end of the abbreviated string.
58 * This is appended ONLY if the string was indeed abbreviated.
59 * The append does not count towards the lower or upper limits.
60 * @return The abbreviated String.
61 *
62 * <pre>
63 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now"
64 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the"
65 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all"
66 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now"
67 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the"
68 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all"
69 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..."
70 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..."
71 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..."
72 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now"
73 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the"
74 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all"
75 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men"
76 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men"
77 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = Throws {@link IllegalArgumentException}
78 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = Throws {@link IllegalArgumentException}
79 * </pre>
80 */
81 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
82 Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
83 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
84 if (StringUtils.isEmpty(str)) {
85 return str;
86 }
87
88 // if the lower value is greater than the length of the string,
89 // set to the length of the string
90 if (lower > str.length()) {
91 lower = str.length();
92 }
93
94 // if the upper value is -1 (i.e. no limit) or is greater
95 // than the length of the string, set to the length of the string
96 if (upper == -1 || upper > str.length()) {
97 upper = str.length();
98 }
99
100 final StringBuilder result = new StringBuilder();
101 final int index = Strings.CS.indexOf(str, " ", lower);
102 if (index == -1) {
103 result.append(str, 0, upper);
104 // only if abbreviation has occurred do we append the appendToEnd value
105 if (upper != str.length()) {
106 result.append(StringUtils.defaultString(appendToEnd));
107 }
108 } else {
109 result.append(str, 0, Math.min(index, upper));
110 result.append(StringUtils.defaultString(appendToEnd));
111 }
112
113 return result.toString();
114 }
115
116 /**
117 * Capitalizes all the whitespace separated words in a String.
118 * Only the first character of each word is changed. To convert the
119 * rest of each word to lowercase at the same time,
120 * use {@link #capitalizeFully(String)}.
121 *
122 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
123 * A {@code null} input String returns {@code null}.
124 * Capitalization uses the Unicode title case, normally equivalent to
125 * upper case.</p>
126 *
127 * <pre>
128 * WordUtils.capitalize(null) = null
129 * WordUtils.capitalize("") = ""
130 * WordUtils.capitalize("i am FINE") = "I Am FINE"
131 * </pre>
132 *
133 * @param str the String to capitalize, may be null.
134 * @return capitalized String, {@code null} if null String input.
135 * @see #uncapitalize(String)
136 * @see #capitalizeFully(String)
137 */
138 public static String capitalize(final String str) {
139 return capitalize(str, null);
140 }
141
142 /**
143 * Capitalizes all the delimiter separated words in a String.
144 * Only the first character of each word is changed. To convert the
145 * rest of each word to lowercase at the same time,
146 * use {@link #capitalizeFully(String, char[])}.
147 *
148 * <p>The delimiters represent a set of characters understood to separate words.
149 * The first string character and the first non-delimiter character after a
150 * delimiter will be capitalized.</p>
151 *
152 * <p>A {@code null} input String returns {@code null}.
153 * Capitalization uses the Unicode title case, normally equivalent to
154 * upper case.</p>
155 *
156 * <pre>
157 * WordUtils.capitalize(null, *) = null
158 * WordUtils.capitalize("", *) = ""
159 * WordUtils.capitalize(*, new char[0]) = *
160 * WordUtils.capitalize("i am fine", null) = "I Am Fine"
161 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
162 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
163 * </pre>
164 *
165 * @param str the String to capitalize, may be null.
166 * @param delimiters set of characters to determine capitalization, null means whitespace.
167 * @return capitalized String, {@code null} if null String input.
168 * @see #uncapitalize(String)
169 * @see #capitalizeFully(String)
170 */
171 public static String capitalize(final String str, final char... delimiters) {
172 if (StringUtils.isEmpty(str)) {
173 return str;
174 }
175 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
176 final int strLen = str.length();
177 final int[] newCodePoints = new int[strLen];
178 int outOffset = 0;
179
180 boolean capitalizeNext = true;
181 for (int index = 0; index < strLen;) {
182 final int codePoint = str.codePointAt(index);
183
184 if (isDelimiter.test(codePoint)) {
185 capitalizeNext = true;
186 newCodePoints[outOffset++] = codePoint;
187 index += Character.charCount(codePoint);
188 } else if (capitalizeNext) {
189 final int titleCaseCodePoint = Character.toTitleCase(codePoint);
190 newCodePoints[outOffset++] = titleCaseCodePoint;
191 index += Character.charCount(titleCaseCodePoint);
192 capitalizeNext = false;
193 } else {
194 newCodePoints[outOffset++] = codePoint;
195 index += Character.charCount(codePoint);
196 }
197 }
198 return new String(newCodePoints, 0, outOffset);
199 }
200
201 /**
202 * Converts all the whitespace separated words in a String into capitalized words,
203 * that is each word is made up of a titlecase character and then a series of
204 * lowercase characters.
205 *
206 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
207 * A {@code null} input String returns {@code null}.
208 * Capitalization uses the Unicode title case, normally equivalent to
209 * upper case.</p>
210 *
211 * <pre>
212 * WordUtils.capitalizeFully(null) = null
213 * WordUtils.capitalizeFully("") = ""
214 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
215 * </pre>
216 *
217 * @param str the String to capitalize, may be null.
218 * @return capitalized String, {@code null} if null String input.
219 */
220 public static String capitalizeFully(final String str) {
221 return capitalizeFully(str, null);
222 }
223
224 /**
225 * Converts all the delimiter separated words in a String into capitalized words,
226 * that is each word is made up of a titlecase character and then a series of
227 * lowercase characters.
228 *
229 * <p>The delimiters represent a set of characters understood to separate words.
230 * The first string character and the first non-delimiter character after a
231 * delimiter will be capitalized.</p>
232 *
233 * <p>A {@code null} input String returns {@code null}.
234 * Capitalization uses the Unicode title case, normally equivalent to
235 * upper case.</p>
236 *
237 * <pre>
238 * WordUtils.capitalizeFully(null, *) = null
239 * WordUtils.capitalizeFully("", *) = ""
240 * WordUtils.capitalizeFully(*, null) = *
241 * WordUtils.capitalizeFully(*, new char[0]) = *
242 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
243 * </pre>
244 *
245 * @param str the String to capitalize, may be null.
246 * @param delimiters set of characters to determine capitalization, null means whitespace.
247 * @return capitalized String, {@code null} if null String input.
248 */
249 public static String capitalizeFully(String str, final char... delimiters) {
250 if (StringUtils.isEmpty(str)) {
251 return str;
252 }
253 str = str.toLowerCase();
254 return capitalize(str, delimiters);
255 }
256
257 /**
258 * Checks if the String contains all words in the given array.
259 *
260 * <p>
261 * A {@code null} String will return {@code false}. A {@code null}, zero
262 * length search array or if one element of array is null will return {@code false}.
263 * </p>
264 *
265 * <pre>
266 * WordUtils.containsAllWords(null, *) = false
267 * WordUtils.containsAllWords("", *) = false
268 * WordUtils.containsAllWords(*, null) = false
269 * WordUtils.containsAllWords(*, []) = false
270 * WordUtils.containsAllWords("abcd", "ab", "cd") = false
271 * WordUtils.containsAllWords("abc def", "def", "abc") = true
272 * </pre>
273 *
274 * @param word The CharSequence to check, may be null.
275 * @param words The array of String words to search for, may be null.
276 * @return {@code true} if all search words are found, {@code false} otherwise.
277 */
278 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
279 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
280 return false;
281 }
282 for (final CharSequence w : words) {
283 if (StringUtils.isBlank(w)) {
284 return false;
285 }
286 final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
287 if (!p.matcher(word).matches()) {
288 return false;
289 }
290 }
291 return true;
292 }
293
294 /**
295 * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
296 * The function provides O(1) lookup time.
297 * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
298 *
299 * @param delimiters set of characters to determine delimiters, null means whitespace.
300 * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
301 */
302 private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
303 final Predicate<Integer> isDelimiter;
304 if (delimiters == null || delimiters.length == 0) {
305 isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
306 } else {
307 final Set<Integer> delimiterSet = new HashSet<>();
308 for (int index = 0; index < delimiters.length; index++) {
309 delimiterSet.add(Character.codePointAt(delimiters, index));
310 }
311 isDelimiter = delimiterSet::contains;
312 }
313
314 return isDelimiter;
315 }
316
317 /**
318 * Extracts the initial characters from each word in the String.
319 *
320 * <p>All first characters after whitespace are returned as a new string.
321 * Their case is not changed.</p>
322 *
323 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
324 * A {@code null} input String returns {@code null}.</p>
325 *
326 * <pre>
327 * WordUtils.initials(null) = null
328 * WordUtils.initials("") = ""
329 * WordUtils.initials("Ben John Lee") = "BJL"
330 * WordUtils.initials("Ben J.Lee") = "BJ"
331 * </pre>
332 *
333 * @param str the String to get initials from, may be null.
334 * @return String of initial letters, {@code null} if null String input.
335 * @see #initials(String,char[])
336 */
337 public static String initials(final String str) {
338 return initials(str, null);
339 }
340
341 /**
342 * Extracts the initial characters from each word in the String.
343 *
344 * <p>All first characters after the defined delimiters are returned as a new string.
345 * Their case is not changed.</p>
346 *
347 * <p>If the delimiters array is null, then Whitespace is used.
348 * Whitespace is defined by {@link Character#isWhitespace(char)}.
349 * A {@code null} input String returns {@code null}.
350 * An empty delimiter array returns an empty String.</p>
351 *
352 * <pre>
353 * WordUtils.initials(null, *) = null
354 * WordUtils.initials("", *) = ""
355 * WordUtils.initials("Ben John Lee", null) = "BJL"
356 * WordUtils.initials("Ben J.Lee", null) = "BJ"
357 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
358 * WordUtils.initials(*, new char[0]) = ""
359 * </pre>
360 *
361 * @param str the String to get initials from, may be null.
362 * @param delimiters set of characters to determine words, null means whitespace.
363 * @return String of initial characters, {@code null} if null String input.
364 * @see #initials(String)
365 */
366 public static String initials(final String str, final char... delimiters) {
367 if (StringUtils.isEmpty(str)) {
368 return str;
369 }
370 if (delimiters != null && delimiters.length == 0) {
371 return StringUtils.EMPTY;
372 }
373 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
374 final int strLen = str.length();
375 final int[] newCodePoints = new int[strLen / 2 + 1];
376 int count = 0;
377 boolean lastWasGap = true;
378 for (int i = 0; i < strLen;) {
379 final int codePoint = str.codePointAt(i);
380
381 if (isDelimiter.test(codePoint)) {
382 lastWasGap = true;
383 } else if (lastWasGap) {
384 newCodePoints[count++] = codePoint;
385 lastWasGap = false;
386 }
387
388 i += Character.charCount(codePoint);
389 }
390 return new String(newCodePoints, 0, count);
391 }
392
393 /**
394 * Is the character a delimiter.
395 *
396 * @param ch the character to check.
397 * @param delimiters the delimiters.
398 * @return true if it is a delimiter.
399 * @deprecated as of 1.2 and will be removed in 2.0.
400 */
401 @Deprecated
402 public static boolean isDelimiter(final char ch, final char[] delimiters) {
403 if (delimiters == null) {
404 return Character.isWhitespace(ch);
405 }
406 for (final char delimiter : delimiters) {
407 if (ch == delimiter) {
408 return true;
409 }
410 }
411 return false;
412 }
413
414 /**
415 * Is the codePoint a delimiter.
416 *
417 * @param codePoint the codePint to check.
418 * @param delimiters the delimiters.
419 * @return true if it is a delimiter.
420 * @deprecated as of 1.2 and will be removed in 2.0.
421 */
422 @Deprecated
423 public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
424 if (delimiters == null) {
425 return Character.isWhitespace(codePoint);
426 }
427 for (int index = 0; index < delimiters.length; index++) {
428 final int delimiterCodePoint = Character.codePointAt(delimiters, index);
429 if (delimiterCodePoint == codePoint) {
430 return true;
431 }
432 }
433 return false;
434 }
435
436 /**
437 * Swaps the case of a String using a word based algorithm.
438 *
439 * <ul>
440 * <li>Upper case character converts to Lower case</li>
441 * <li>Title case character converts to Lower case</li>
442 * <li>Lower case character after Whitespace or at start converts to Title case</li>
443 * <li>Other Lower case character converts to Upper case</li>
444 * </ul>
445 *
446 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
447 * A {@code null} input String returns {@code null}.</p>
448 *
449 * <pre>
450 * StringUtils.swapCase(null) = null
451 * StringUtils.swapCase("") = ""
452 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
453 * </pre>
454 *
455 * @param str the String to swap case, may be null.
456 * @return The changed String, {@code null} if null String input.
457 */
458 public static String swapCase(final String str) {
459 if (StringUtils.isEmpty(str)) {
460 return str;
461 }
462 final int strLen = str.length();
463 final int[] newCodePoints = new int[strLen];
464 int outOffset = 0;
465 boolean whitespace = true;
466 for (int index = 0; index < strLen;) {
467 final int oldCodepoint = str.codePointAt(index);
468 final int newCodePoint;
469 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
470 newCodePoint = Character.toLowerCase(oldCodepoint);
471 whitespace = false;
472 } else if (Character.isLowerCase(oldCodepoint)) {
473 if (whitespace) {
474 newCodePoint = Character.toTitleCase(oldCodepoint);
475 whitespace = false;
476 } else {
477 newCodePoint = Character.toUpperCase(oldCodepoint);
478 }
479 } else {
480 whitespace = Character.isWhitespace(oldCodepoint);
481 newCodePoint = oldCodepoint;
482 }
483 newCodePoints[outOffset++] = newCodePoint;
484 index += Character.charCount(newCodePoint);
485 }
486 return new String(newCodePoints, 0, outOffset);
487 }
488
489 /**
490 * Uncapitalizes all the whitespace separated words in a String.
491 * Only the first character of each word is changed.
492 *
493 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
494 * A {@code null} input String returns {@code null}.</p>
495 *
496 * <pre>
497 * WordUtils.uncapitalize(null) = null
498 * WordUtils.uncapitalize("") = ""
499 * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
500 * </pre>
501 *
502 * @param str the String to uncapitalize, may be null.
503 * @return uncapitalized String, {@code null} if null String input.
504 * @see #capitalize(String)
505 */
506 public static String uncapitalize(final String str) {
507 return uncapitalize(str, null);
508 }
509
510 /**
511 * Uncapitalizes all the whitespace separated words in a String.
512 * Only the first character of each word is changed.
513 *
514 * <p>The delimiters represent a set of characters understood to separate words.
515 * The first string character and the first non-delimiter character after a
516 * delimiter will be uncapitalized.</p>
517 *
518 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
519 * A {@code null} input String returns {@code null}.</p>
520 *
521 * <pre>
522 * WordUtils.uncapitalize(null, *) = null
523 * WordUtils.uncapitalize("", *) = ""
524 * WordUtils.uncapitalize(*, null) = *
525 * WordUtils.uncapitalize(*, new char[0]) = *
526 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
527 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
528 * </pre>
529 *
530 * @param str the String to uncapitalize, may be null.
531 * @param delimiters set of characters to determine uncapitalization, null means whitespace.
532 * @return uncapitalized String, {@code null} if null String input.
533 * @see #capitalize(String)
534 */
535 public static String uncapitalize(final String str, final char... delimiters) {
536 if (StringUtils.isEmpty(str)) {
537 return str;
538 }
539 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
540 final int strLen = str.length();
541 final int[] newCodePoints = new int[strLen];
542 int outOffset = 0;
543
544 boolean uncapitalizeNext = true;
545 for (int index = 0; index < strLen;) {
546 final int codePoint = str.codePointAt(index);
547
548 if (isDelimiter.test(codePoint)) {
549 uncapitalizeNext = true;
550 newCodePoints[outOffset++] = codePoint;
551 index += Character.charCount(codePoint);
552 } else if (uncapitalizeNext) {
553 final int titleCaseCodePoint = Character.toLowerCase(codePoint);
554 newCodePoints[outOffset++] = titleCaseCodePoint;
555 index += Character.charCount(titleCaseCodePoint);
556 uncapitalizeNext = false;
557 } else {
558 newCodePoints[outOffset++] = codePoint;
559 index += Character.charCount(codePoint);
560 }
561 }
562 return new String(newCodePoints, 0, outOffset);
563 }
564
565 /**
566 * Wraps a single line of text, identifying words by {@code ' '}.
567 *
568 * <p>New lines will be separated by the system property line separator.
569 * Very long words, such as URLs will <em>not</em> be wrapped.</p>
570 *
571 * <p>Leading spaces on a new line are stripped.
572 * Trailing spaces are not stripped.</p>
573 *
574 * <table border="1">
575 * <caption>Examples</caption>
576 * <tr>
577 * <th>input</th>
578 * <th>wrapLength</th>
579 * <th>result</th>
580 * </tr>
581 * <tr>
582 * <td>null</td>
583 * <td>*</td>
584 * <td>null</td>
585 * </tr>
586 * <tr>
587 * <td>""</td>
588 * <td>*</td>
589 * <td>""</td>
590 * </tr>
591 * <tr>
592 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
593 * <td>20</td>
594 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
595 * </tr>
596 * <tr>
597 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
598 * <td>20</td>
599 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
600 * </tr>
601 * <tr>
602 * <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
603 * <td>20</td>
604 * <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
605 * </tr>
606 * </table>
607 *
608 * (assuming that '\n' is the systems line separator)
609 *
610 * @param str the String to be word wrapped, may be null.
611 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1.
612 * @return a line with newlines inserted, {@code null} if null input.
613 */
614 public static String wrap(final String str, final int wrapLength) {
615 return wrap(str, wrapLength, null, false);
616 }
617
618 /**
619 * Wraps a single line of text, identifying words by {@code ' '}.
620 *
621 * <p>Leading spaces on a new line are stripped.
622 * Trailing spaces are not stripped.</p>
623 *
624 * <table border="1">
625 * <caption>Examples</caption>
626 * <tr>
627 * <th>input</th>
628 * <th>wrapLength</th>
629 * <th>newLineString</th>
630 * <th>wrapLongWords</th>
631 * <th>result</th>
632 * </tr>
633 * <tr>
634 * <td>null</td>
635 * <td>*</td>
636 * <td>*</td>
637 * <td>true/false</td>
638 * <td>null</td>
639 * </tr>
640 * <tr>
641 * <td>""</td>
642 * <td>*</td>
643 * <td>*</td>
644 * <td>true/false</td>
645 * <td>""</td>
646 * </tr>
647 * <tr>
648 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
649 * <td>20</td>
650 * <td>"\n"</td>
651 * <td>true/false</td>
652 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
653 * </tr>
654 * <tr>
655 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
656 * <td>20</td>
657 * <td>"<br />"</td>
658 * <td>true/false</td>
659 * <td>"Here is one line of<br />text that is going<
660 * br />to be wrapped after<br />20 columns."</td>
661 * </tr>
662 * <tr>
663 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
664 * <td>20</td>
665 * <td>null</td>
666 * <td>true/false</td>
667 * <td>"Here is one line of" + systemNewLine + "text that is going"
668 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
669 * </tr>
670 * <tr>
671 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
672 * <td>20</td>
673 * <td>"\n"</td>
674 * <td>false</td>
675 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
676 * </tr>
677 * <tr>
678 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
679 * <td>20</td>
680 * <td>"\n"</td>
681 * <td>true</td>
682 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
683 * </tr>
684 * </table>
685 *
686 * @param str the String to be word wrapped, may be null.
687 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1.
688 * @param newLineStr the string to insert for a new line, {@code null} uses the system property line separator.
689 * @param wrapLongWords true if long words (such as URLs) should be wrapped.
690 * @return a line with newlines inserted, {@code null} if null input.
691 */
692 public static String wrap(final String str,
693 final int wrapLength,
694 final String newLineStr,
695 final boolean wrapLongWords) {
696 return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
697 }
698
699 /**
700 * Wraps a single line of text, identifying words by {@code wrapOn}.
701 *
702 * <p>Leading spaces on a new line are stripped.
703 * Trailing spaces are not stripped.</p>
704 *
705 * <table border="1">
706 * <caption>Examples</caption>
707 * <tr>
708 * <th>input</th>
709 * <th>wrapLength</th>
710 * <th>newLineString</th>
711 * <th>wrapLongWords</th>
712 * <th>wrapOn</th>
713 * <th>result</th>
714 * </tr>
715 * <tr>
716 * <td>null</td>
717 * <td>*</td>
718 * <td>*</td>
719 * <td>true/false</td>
720 * <td>*</td>
721 * <td>null</td>
722 * </tr>
723 * <tr>
724 * <td>""</td>
725 * <td>*</td>
726 * <td>*</td>
727 * <td>true/false</td>
728 * <td>*</td>
729 * <td>""</td>
730 * </tr>
731 * <tr>
732 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
733 * <td>20</td>
734 * <td>"\n"</td>
735 * <td>true/false</td>
736 * <td>" "</td>
737 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
738 * </tr>
739 * <tr>
740 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
741 * <td>20</td>
742 * <td>"<br />"</td>
743 * <td>true/false</td>
744 * <td>" "</td>
745 * <td>"Here is one line of<br />text that is going<br />
746 * to be wrapped after<br />20 columns."</td>
747 * </tr>
748 * <tr>
749 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
750 * <td>20</td>
751 * <td>null</td>
752 * <td>true/false</td>
753 * <td>" "</td>
754 * <td>"Here is one line of" + systemNewLine + "text that is going"
755 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
756 * </tr>
757 * <tr>
758 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
759 * <td>20</td>
760 * <td>"\n"</td>
761 * <td>false</td>
762 * <td>" "</td>
763 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
764 * </tr>
765 * <tr>
766 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
767 * <td>20</td>
768 * <td>"\n"</td>
769 * <td>true</td>
770 * <td>" "</td>
771 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
772 * </tr>
773 * <tr>
774 * <td>"flammable/inflammable"</td>
775 * <td>20</td>
776 * <td>"\n"</td>
777 * <td>true</td>
778 * <td>"/"</td>
779 * <td>"flammable\ninflammable"</td>
780 * </tr>
781 * </table>
782 * @param str the String to be word wrapped, may be null.
783 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1.
784 * @param newLineStr the string to insert for a new line, {@code null} uses the system property line separator.
785 * @param wrapLongWords true if long words (such as URLs) should be wrapped.
786 * @param wrapOn regex expression to be used as a breakable characters, if blank string is provided a space character will be used.
787 * @return a line with newlines inserted, {@code null} if null input.
788 */
789 public static String wrap(final String str,
790 int wrapLength,
791 String newLineStr,
792 final boolean wrapLongWords,
793 String wrapOn) {
794 if (str == null) {
795 return null;
796 }
797 if (newLineStr == null) {
798 newLineStr = System.lineSeparator();
799 }
800 if (wrapLength < 1) {
801 wrapLength = 1;
802 }
803 if (StringUtils.isBlank(wrapOn)) {
804 wrapOn = " ";
805 }
806 final Pattern patternToWrapOn = Pattern.compile(wrapOn);
807 final int inputLineLength = str.length();
808 int offset = 0;
809 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
810 int matcherSize = -1;
811
812 while (offset < inputLineLength) {
813 int spaceToWrapAt = -1;
814 Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
815 Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
816 if (matcher.find()) {
817 if (matcher.start() == 0) {
818 matcherSize = matcher.end();
819 if (matcherSize != 0) {
820 offset += matcher.end();
821 continue;
822 }
823 offset += 1;
824 }
825 spaceToWrapAt = matcher.start() + offset;
826 }
827
828 // only last line without leading spaces is left
829 if (inputLineLength - offset <= wrapLength) {
830 break;
831 }
832
833 while (matcher.find()) {
834 spaceToWrapAt = matcher.start() + offset;
835 }
836
837 if (spaceToWrapAt >= offset) {
838 // normal case
839 wrappedLine.append(str, offset, spaceToWrapAt);
840 wrappedLine.append(newLineStr);
841 offset = spaceToWrapAt + 1;
842
843 } else // really long word or URL
844 if (wrapLongWords) {
845 if (matcherSize == 0) {
846 offset--;
847 }
848 // wrap really long word one line at a time
849 wrappedLine.append(str, offset, wrapLength + offset);
850 wrappedLine.append(newLineStr);
851 offset += wrapLength;
852 matcherSize = -1;
853 } else {
854 // do not wrap really long word, just extend beyond limit
855 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
856 if (matcher.find()) {
857 matcherSize = matcher.end() - matcher.start();
858 spaceToWrapAt = matcher.start() + offset + wrapLength;
859 }
860
861 if (spaceToWrapAt >= 0) {
862 if (matcherSize == 0 && offset != 0) {
863 offset--;
864 }
865 wrappedLine.append(str, offset, spaceToWrapAt);
866 wrappedLine.append(newLineStr);
867 offset = spaceToWrapAt + 1;
868 } else {
869 if (matcherSize == 0 && offset != 0) {
870 offset--;
871 }
872 wrappedLine.append(str, offset, str.length());
873 offset = inputLineLength;
874 matcherSize = -1;
875 }
876 }
877 }
878
879 if (matcherSize == 0 && offset < inputLineLength) {
880 offset--;
881 }
882
883 // Whatever is left in line is short enough to just pass through
884 wrappedLine.append(str, offset, str.length());
885
886 return wrappedLine.toString();
887 }
888
889 /**
890 * {@code WordUtils} instances should NOT be constructed in
891 * standard programming. Instead, the class should be used as
892 * {@code WordUtils.wrap("foo bar", 20);}.
893 *
894 * <p>This constructor is public to permit tools that require a JavaBean
895 * instance to operate.</p>
896 */
897 public WordUtils() {
898 }
899 }