View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.HashSet;
20  import java.util.Set;
21  import java.util.function.Predicate;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.lang3.ArrayUtils;
26  import org.apache.commons.lang3.StringUtils;
27  import org.apache.commons.lang3.Validate;
28  
29  /**
30   * Operations on Strings that contain words.
31   *
32   * <p>
33   * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
34   * {@code null} input. Each method documents its behavior in more detail.
35   * </p>
36   *
37   * @since 1.1
38   */
39  public class WordUtils {
40  
41      /**
42       * Abbreviates the words nicely.
43       *
44       * <p>
45       * This method searches for the first space after the lower limit and abbreviates
46       * the String there. It will also append any String passed as a parameter
47       * to the end of the String. The upper limit can be specified to forcibly
48       * abbreviate a String.
49       * </p>
50       *
51       * @param str         the string to be abbreviated. If null is passed, null is returned.
52       *                    If the empty String is passed, the empty string is returned.
53       * @param lower       the lower limit; negative value is treated as zero.
54       * @param upper       the upper limit; specify -1 if no limit is desired.
55       *                    The upper limit cannot be lower than the lower limit.
56       * @param appendToEnd String to be appended to the end of the abbreviated string.
57       *                    This is appended ONLY if the string was indeed abbreviated.
58       *                    The append does not count towards the lower or upper limits.
59       * @return The abbreviated String.
60       *
61       * <pre>
62       * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
63       * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
64       * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
65       * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
66       * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
67       * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
68       * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
69       * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
70       * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
71       * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
72       * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
73       * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
74       * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
75       * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
76       * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
77       * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
78       * </pre>
79       */
80      public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
81          Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
82          Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
83          if (StringUtils.isEmpty(str)) {
84              return str;
85          }
86  
87          // if the lower value is greater than the length of the string,
88          // set to the length of the string
89          if (lower > str.length()) {
90              lower = str.length();
91          }
92  
93          // if the upper value is -1 (i.e. no limit) or is greater
94          // than the length of the string, set to the length of the string
95          if (upper == -1 || upper > str.length()) {
96              upper = str.length();
97          }
98  
99          final StringBuilder result = new StringBuilder();
100         final int index = StringUtils.indexOf(str, " ", lower);
101         if (index == -1) {
102             result.append(str, 0, upper);
103             // only if abbreviation has occurred do we append the appendToEnd value
104             if (upper != str.length()) {
105                 result.append(StringUtils.defaultString(appendToEnd));
106             }
107         } else {
108             result.append(str, 0, Math.min(index, upper));
109             result.append(StringUtils.defaultString(appendToEnd));
110         }
111 
112         return result.toString();
113     }
114 
115     /**
116      * Capitalizes all the whitespace separated words in a String.
117      * Only the first character of each word is changed. To convert the
118      * rest of each word to lowercase at the same time,
119      * use {@link #capitalizeFully(String)}.
120      *
121      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
122      * A {@code null} input String returns {@code null}.
123      * Capitalization uses the Unicode title case, normally equivalent to
124      * upper case.</p>
125      *
126      * <pre>
127      * WordUtils.capitalize(null)        = null
128      * WordUtils.capitalize("")          = ""
129      * WordUtils.capitalize("i am FINE") = "I Am FINE"
130      * </pre>
131      *
132      * @param str  the String to capitalize, may be null
133      * @return capitalized String, {@code null} if null String input
134      * @see #uncapitalize(String)
135      * @see #capitalizeFully(String)
136      */
137     public static String capitalize(final String str) {
138         return capitalize(str, null);
139     }
140 
141     /**
142      * Capitalizes all the delimiter separated words in a String.
143      * Only the first character of each word is changed. To convert the
144      * rest of each word to lowercase at the same time,
145      * use {@link #capitalizeFully(String, char[])}.
146      *
147      * <p>The delimiters represent a set of characters understood to separate words.
148      * The first string character and the first non-delimiter character after a
149      * delimiter will be capitalized.</p>
150      *
151      * <p>A {@code null} input String returns {@code null}.
152      * Capitalization uses the Unicode title case, normally equivalent to
153      * upper case.</p>
154      *
155      * <pre>
156      * WordUtils.capitalize(null, *)            = null
157      * WordUtils.capitalize("", *)              = ""
158      * WordUtils.capitalize(*, new char[0])     = *
159      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
160      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
161      * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
162      * </pre>
163      *
164      * @param str  the String to capitalize, may be null
165      * @param delimiters  set of characters to determine capitalization, null means whitespace
166      * @return capitalized String, {@code null} if null String input
167      * @see #uncapitalize(String)
168      * @see #capitalizeFully(String)
169      */
170     public static String capitalize(final String str, final char... delimiters) {
171         if (StringUtils.isEmpty(str)) {
172             return str;
173         }
174         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
175         final int strLen = str.length();
176         final int[] newCodePoints = new int[strLen];
177         int outOffset = 0;
178 
179         boolean capitalizeNext = true;
180         for (int index = 0; index < strLen;) {
181             final int codePoint = str.codePointAt(index);
182 
183             if (isDelimiter.test(codePoint)) {
184                 capitalizeNext = true;
185                 newCodePoints[outOffset++] = codePoint;
186                 index += Character.charCount(codePoint);
187             } else if (capitalizeNext) {
188                 final int titleCaseCodePoint = Character.toTitleCase(codePoint);
189                 newCodePoints[outOffset++] = titleCaseCodePoint;
190                 index += Character.charCount(titleCaseCodePoint);
191                 capitalizeNext = false;
192             } else {
193                 newCodePoints[outOffset++] = codePoint;
194                 index += Character.charCount(codePoint);
195             }
196         }
197         return new String(newCodePoints, 0, outOffset);
198     }
199 
200     /**
201      * Converts all the whitespace separated words in a String into capitalized words,
202      * that is each word is made up of a titlecase character and then a series of
203      * lowercase characters.
204      *
205      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
206      * A {@code null} input String returns {@code null}.
207      * Capitalization uses the Unicode title case, normally equivalent to
208      * upper case.</p>
209      *
210      * <pre>
211      * WordUtils.capitalizeFully(null)        = null
212      * WordUtils.capitalizeFully("")          = ""
213      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
214      * </pre>
215      *
216      * @param str  the String to capitalize, may be null
217      * @return capitalized String, {@code null} if null String input
218      */
219     public static String capitalizeFully(final String str) {
220         return capitalizeFully(str, null);
221     }
222 
223     /**
224      * Converts all the delimiter separated words in a String into capitalized words,
225      * that is each word is made up of a titlecase character and then a series of
226      * lowercase characters.
227      *
228      * <p>The delimiters represent a set of characters understood to separate words.
229      * The first string character and the first non-delimiter character after a
230      * delimiter will be capitalized.</p>
231      *
232      * <p>A {@code null} input String returns {@code null}.
233      * Capitalization uses the Unicode title case, normally equivalent to
234      * upper case.</p>
235      *
236      * <pre>
237      * WordUtils.capitalizeFully(null, *)            = null
238      * WordUtils.capitalizeFully("", *)              = ""
239      * WordUtils.capitalizeFully(*, null)            = *
240      * WordUtils.capitalizeFully(*, new char[0])     = *
241      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
242      * </pre>
243      *
244      * @param str  the String to capitalize, may be null
245      * @param delimiters  set of characters to determine capitalization, null means whitespace
246      * @return capitalized String, {@code null} if null String input
247      */
248     public static String capitalizeFully(String str, final char... delimiters) {
249         if (StringUtils.isEmpty(str)) {
250             return str;
251         }
252         str = str.toLowerCase();
253         return capitalize(str, delimiters);
254     }
255 
256     /**
257      * Checks if the String contains all words in the given array.
258      *
259      * <p>
260      * A {@code null} String will return {@code false}. A {@code null}, zero
261      * length search array or if one element of array is null will return {@code false}.
262      * </p>
263      *
264      * <pre>
265      * WordUtils.containsAllWords(null, *)            = false
266      * WordUtils.containsAllWords("", *)              = false
267      * WordUtils.containsAllWords(*, null)            = false
268      * WordUtils.containsAllWords(*, [])              = false
269      * WordUtils.containsAllWords("abcd", "ab", "cd") = false
270      * WordUtils.containsAllWords("abc def", "def", "abc") = true
271      * </pre>
272      *
273      * @param word The CharSequence to check, may be null
274      * @param words The array of String words to search for, may be null
275      * @return {@code true} if all search words are found, {@code false} otherwise
276      */
277     public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
278         if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
279             return false;
280         }
281         for (final CharSequence w : words) {
282             if (StringUtils.isBlank(w)) {
283                 return false;
284             }
285             final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
286             if (!p.matcher(word).matches()) {
287                 return false;
288             }
289         }
290         return true;
291     }
292 
293     /**
294      * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
295      * The function provides O(1) lookup time.
296      * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
297      *
298      * @param delimiters set of characters to determine delimiters, null means whitespace
299      * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
300      */
301     private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
302         final Predicate<Integer> isDelimiter;
303         if (delimiters == null || delimiters.length == 0) {
304             isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
305         } else {
306             final Set<Integer> delimiterSet = new HashSet<>();
307             for (int index = 0; index < delimiters.length; index++) {
308                 delimiterSet.add(Character.codePointAt(delimiters, index));
309             }
310             isDelimiter = delimiterSet::contains;
311         }
312 
313         return isDelimiter;
314     }
315 
316     /**
317      * Extracts the initial characters from each word in the String.
318      *
319      * <p>All first characters after whitespace are returned as a new string.
320      * Their case is not changed.</p>
321      *
322      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
323      * A {@code null} input String returns {@code null}.</p>
324      *
325      * <pre>
326      * WordUtils.initials(null)             = null
327      * WordUtils.initials("")               = ""
328      * WordUtils.initials("Ben John Lee")   = "BJL"
329      * WordUtils.initials("Ben J.Lee")      = "BJ"
330      * </pre>
331      *
332      * @param str  the String to get initials from, may be null
333      * @return String of initial letters, {@code null} if null String input
334      * @see #initials(String,char[])
335      */
336     public static String initials(final String str) {
337         return initials(str, null);
338     }
339 
340     /**
341      * Extracts the initial characters from each word in the String.
342      *
343      * <p>All first characters after the defined delimiters are returned as a new string.
344      * Their case is not changed.</p>
345      *
346      * <p>If the delimiters array is null, then Whitespace is used.
347      * Whitespace is defined by {@link Character#isWhitespace(char)}.
348      * A {@code null} input String returns {@code null}.
349      * An empty delimiter array returns an empty String.</p>
350      *
351      * <pre>
352      * WordUtils.initials(null, *)                = null
353      * WordUtils.initials("", *)                  = ""
354      * WordUtils.initials("Ben John Lee", null)   = "BJL"
355      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
356      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
357      * WordUtils.initials(*, new char[0])         = ""
358      * </pre>
359      *
360      * @param str  the String to get initials from, may be null
361      * @param delimiters  set of characters to determine words, null means whitespace
362      * @return String of initial characters, {@code null} if null String input
363      * @see #initials(String)
364      */
365     public static String initials(final String str, final char... delimiters) {
366         if (StringUtils.isEmpty(str)) {
367             return str;
368         }
369         if (delimiters != null && delimiters.length == 0) {
370             return StringUtils.EMPTY;
371         }
372         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
373         final int strLen = str.length();
374         final int[] newCodePoints = new int[strLen / 2 + 1];
375         int count = 0;
376         boolean lastWasGap = true;
377         for (int i = 0; i < strLen;) {
378             final int codePoint = str.codePointAt(i);
379 
380             if (isDelimiter.test(codePoint)) {
381                 lastWasGap = true;
382             } else if (lastWasGap) {
383                 newCodePoints[count++] = codePoint;
384                 lastWasGap = false;
385             }
386 
387             i += Character.charCount(codePoint);
388         }
389         return new String(newCodePoints, 0, count);
390     }
391 
392     /**
393      * Is the character a delimiter.
394      *
395      * @param ch the character to check
396      * @param delimiters the delimiters
397      * @return true if it is a delimiter
398      * @deprecated as of 1.2 and will be removed in 2.0
399      */
400     @Deprecated
401     public static boolean isDelimiter(final char ch, final char[] delimiters) {
402         if (delimiters == null) {
403             return Character.isWhitespace(ch);
404         }
405         for (final char delimiter : delimiters) {
406             if (ch == delimiter) {
407                 return true;
408             }
409         }
410         return false;
411     }
412 
413     /**
414      * Is the codePoint a delimiter.
415      *
416      * @param codePoint the codePint to check
417      * @param delimiters the delimiters
418      * @return true if it is a delimiter
419      * @deprecated as of 1.2 and will be removed in 2.0
420      */
421     @Deprecated
422     public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
423         if (delimiters == null) {
424             return Character.isWhitespace(codePoint);
425         }
426         for (int index = 0; index < delimiters.length; index++) {
427             final int delimiterCodePoint = Character.codePointAt(delimiters, index);
428             if (delimiterCodePoint == codePoint) {
429                 return true;
430             }
431         }
432         return false;
433     }
434 
435     /**
436      * Swaps the case of a String using a word based algorithm.
437      *
438      * <ul>
439      *  <li>Upper case character converts to Lower case</li>
440      *  <li>Title case character converts to Lower case</li>
441      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
442      *  <li>Other Lower case character converts to Upper case</li>
443      * </ul>
444      *
445      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
446      * A {@code null} input String returns {@code null}.</p>
447      *
448      * <pre>
449      * StringUtils.swapCase(null)                 = null
450      * StringUtils.swapCase("")                   = ""
451      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
452      * </pre>
453      *
454      * @param str  the String to swap case, may be null
455      * @return The changed String, {@code null} if null String input
456      */
457     public static String swapCase(final String str) {
458         if (StringUtils.isEmpty(str)) {
459             return str;
460         }
461         final int strLen = str.length();
462         final int[] newCodePoints = new int[strLen];
463         int outOffset = 0;
464         boolean whitespace = true;
465         for (int index = 0; index < strLen;) {
466             final int oldCodepoint = str.codePointAt(index);
467             final int newCodePoint;
468             if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
469                 newCodePoint = Character.toLowerCase(oldCodepoint);
470                 whitespace = false;
471             } else if (Character.isLowerCase(oldCodepoint)) {
472                 if (whitespace) {
473                     newCodePoint = Character.toTitleCase(oldCodepoint);
474                     whitespace = false;
475                 } else {
476                     newCodePoint = Character.toUpperCase(oldCodepoint);
477                 }
478             } else {
479                 whitespace = Character.isWhitespace(oldCodepoint);
480                 newCodePoint = oldCodepoint;
481             }
482             newCodePoints[outOffset++] = newCodePoint;
483             index += Character.charCount(newCodePoint);
484         }
485         return new String(newCodePoints, 0, outOffset);
486     }
487 
488     /**
489      * Uncapitalizes all the whitespace separated words in a String.
490      * Only the first character of each word is changed.
491      *
492      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
493      * A {@code null} input String returns {@code null}.</p>
494      *
495      * <pre>
496      * WordUtils.uncapitalize(null)        = null
497      * WordUtils.uncapitalize("")          = ""
498      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
499      * </pre>
500      *
501      * @param str  the String to uncapitalize, may be null
502      * @return uncapitalized String, {@code null} if null String input
503      * @see #capitalize(String)
504      */
505     public static String uncapitalize(final String str) {
506         return uncapitalize(str, null);
507     }
508 
509     /**
510      * Uncapitalizes all the whitespace separated words in a String.
511      * Only the first character of each word is changed.
512      *
513      * <p>The delimiters represent a set of characters understood to separate words.
514      * The first string character and the first non-delimiter character after a
515      * delimiter will be uncapitalized.</p>
516      *
517      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
518      * A {@code null} input String returns {@code null}.</p>
519      *
520      * <pre>
521      * WordUtils.uncapitalize(null, *)            = null
522      * WordUtils.uncapitalize("", *)              = ""
523      * WordUtils.uncapitalize(*, null)            = *
524      * WordUtils.uncapitalize(*, new char[0])     = *
525      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
526      * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
527      * </pre>
528      *
529      * @param str  the String to uncapitalize, may be null
530      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
531      * @return uncapitalized String, {@code null} if null String input
532      * @see #capitalize(String)
533      */
534     public static String uncapitalize(final String str, final char... delimiters) {
535         if (StringUtils.isEmpty(str)) {
536             return str;
537         }
538         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
539         final int strLen = str.length();
540         final int[] newCodePoints = new int[strLen];
541         int outOffset = 0;
542 
543         boolean uncapitalizeNext = true;
544         for (int index = 0; index < strLen;) {
545             final int codePoint = str.codePointAt(index);
546 
547             if (isDelimiter.test(codePoint)) {
548                 uncapitalizeNext = true;
549                 newCodePoints[outOffset++] = codePoint;
550                 index += Character.charCount(codePoint);
551             } else if (uncapitalizeNext) {
552                 final int titleCaseCodePoint = Character.toLowerCase(codePoint);
553                 newCodePoints[outOffset++] = titleCaseCodePoint;
554                 index += Character.charCount(titleCaseCodePoint);
555                 uncapitalizeNext = false;
556             } else {
557                 newCodePoints[outOffset++] = codePoint;
558                 index += Character.charCount(codePoint);
559             }
560         }
561         return new String(newCodePoints, 0, outOffset);
562     }
563 
564     /**
565      * Wraps a single line of text, identifying words by {@code ' '}.
566      *
567      * <p>New lines will be separated by the system property line separator.
568      * Very long words, such as URLs will <i>not</i> be wrapped.</p>
569      *
570      * <p>Leading spaces on a new line are stripped.
571      * Trailing spaces are not stripped.</p>
572      *
573      * <table border="1">
574      *  <caption>Examples</caption>
575      *  <tr>
576      *   <th>input</th>
577      *   <th>wrapLength</th>
578      *   <th>result</th>
579      *  </tr>
580      *  <tr>
581      *   <td>null</td>
582      *   <td>*</td>
583      *   <td>null</td>
584      *  </tr>
585      *  <tr>
586      *   <td>""</td>
587      *   <td>*</td>
588      *   <td>""</td>
589      *  </tr>
590      *  <tr>
591      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
592      *   <td>20</td>
593      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
594      *  </tr>
595      *  <tr>
596      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
597      *   <td>20</td>
598      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
599      *  </tr>
600      *  <tr>
601      *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
602      *   <td>20</td>
603      *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
604      *  </tr>
605      * </table>
606      *
607      * (assuming that '\n' is the systems line separator)
608      *
609      * @param str  the String to be word wrapped, may be null
610      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
611      * @return a line with newlines inserted, {@code null} if null input
612      */
613     public static String wrap(final String str, final int wrapLength) {
614         return wrap(str, wrapLength, null, false);
615     }
616 
617     /**
618      * Wraps a single line of text, identifying words by {@code ' '}.
619      *
620      * <p>Leading spaces on a new line are stripped.
621      * Trailing spaces are not stripped.</p>
622      *
623      * <table border="1">
624      *  <caption>Examples</caption>
625      *  <tr>
626      *   <th>input</th>
627      *   <th>wrapLength</th>
628      *   <th>newLineString</th>
629      *   <th>wrapLongWords</th>
630      *   <th>result</th>
631      *  </tr>
632      *  <tr>
633      *   <td>null</td>
634      *   <td>*</td>
635      *   <td>*</td>
636      *   <td>true/false</td>
637      *   <td>null</td>
638      *  </tr>
639      *  <tr>
640      *   <td>""</td>
641      *   <td>*</td>
642      *   <td>*</td>
643      *   <td>true/false</td>
644      *   <td>""</td>
645      *  </tr>
646      *  <tr>
647      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
648      *   <td>20</td>
649      *   <td>"\n"</td>
650      *   <td>true/false</td>
651      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
652      *  </tr>
653      *  <tr>
654      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
655      *   <td>20</td>
656      *   <td>"&lt;br /&gt;"</td>
657      *   <td>true/false</td>
658      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
659      *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
660      *  </tr>
661      *  <tr>
662      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
663      *   <td>20</td>
664      *   <td>null</td>
665      *   <td>true/false</td>
666      *   <td>"Here is one line of" + systemNewLine + "text that is going"
667      *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
668      *  </tr>
669      *  <tr>
670      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
671      *   <td>20</td>
672      *   <td>"\n"</td>
673      *   <td>false</td>
674      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
675      *  </tr>
676      *  <tr>
677      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
678      *   <td>20</td>
679      *   <td>"\n"</td>
680      *   <td>true</td>
681      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
682      *  </tr>
683      * </table>
684      *
685      * @param str  the String to be word wrapped, may be null
686      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
687      * @param newLineStr  the string to insert for a new line,
688      *  {@code null} uses the system property line separator
689      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
690      * @return a line with newlines inserted, {@code null} if null input
691      */
692     public static String wrap(final String str,
693                               final int wrapLength,
694                               final String newLineStr,
695                               final boolean wrapLongWords) {
696         return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
697     }
698 
699     /**
700      * Wraps a single line of text, identifying words by {@code wrapOn}.
701      *
702      * <p>Leading spaces on a new line are stripped.
703      * Trailing spaces are not stripped.</p>
704      *
705      * <table border="1">
706      *  <caption>Examples</caption>
707      *  <tr>
708      *   <th>input</th>
709      *   <th>wrapLength</th>
710      *   <th>newLineString</th>
711      *   <th>wrapLongWords</th>
712      *   <th>wrapOn</th>
713      *   <th>result</th>
714      *  </tr>
715      *  <tr>
716      *   <td>null</td>
717      *   <td>*</td>
718      *   <td>*</td>
719      *   <td>true/false</td>
720      *   <td>*</td>
721      *   <td>null</td>
722      *  </tr>
723      *  <tr>
724      *   <td>""</td>
725      *   <td>*</td>
726      *   <td>*</td>
727      *   <td>true/false</td>
728      *   <td>*</td>
729      *   <td>""</td>
730      *  </tr>
731      *  <tr>
732      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
733      *   <td>20</td>
734      *   <td>"\n"</td>
735      *   <td>true/false</td>
736      *   <td>" "</td>
737      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
738      *  </tr>
739      *  <tr>
740      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
741      *   <td>20</td>
742      *   <td>"&lt;br /&gt;"</td>
743      *   <td>true/false</td>
744      *   <td>" "</td>
745      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
746      *   to be wrapped after&lt;br /&gt;20 columns."</td>
747      *  </tr>
748      *  <tr>
749      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
750      *   <td>20</td>
751      *   <td>null</td>
752      *   <td>true/false</td>
753      *   <td>" "</td>
754      *   <td>"Here is one line of" + systemNewLine + "text that is going"
755      *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
756      *  </tr>
757      *  <tr>
758      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
759      *   <td>20</td>
760      *   <td>"\n"</td>
761      *   <td>false</td>
762      *   <td>" "</td>
763      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
764      *  </tr>
765      *  <tr>
766      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
767      *   <td>20</td>
768      *   <td>"\n"</td>
769      *   <td>true</td>
770      *   <td>" "</td>
771      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
772      *  </tr>
773      *  <tr>
774      *   <td>"flammable/inflammable"</td>
775      *   <td>20</td>
776      *   <td>"\n"</td>
777      *   <td>true</td>
778      *   <td>"/"</td>
779      *   <td>"flammable\ninflammable"</td>
780      *  </tr>
781      * </table>
782      * @param str  the String to be word wrapped, may be null
783      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
784      * @param newLineStr  the string to insert for a new line,
785      *  {@code null} uses the system property line separator
786      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
787      * @param wrapOn regex expression to be used as a breakable characters,
788      *               if blank string is provided a space character will be used
789      * @return a line with newlines inserted, {@code null} if null input
790      */
791     public static String wrap(final String str,
792                               int wrapLength,
793                               String newLineStr,
794                               final boolean wrapLongWords,
795                               String wrapOn) {
796         if (str == null) {
797             return null;
798         }
799         if (newLineStr == null) {
800             newLineStr = System.lineSeparator();
801         }
802         if (wrapLength < 1) {
803             wrapLength = 1;
804         }
805         if (StringUtils.isBlank(wrapOn)) {
806             wrapOn = " ";
807         }
808         final Pattern patternToWrapOn = Pattern.compile(wrapOn);
809         final int inputLineLength = str.length();
810         int offset = 0;
811         final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
812         int matcherSize = -1;
813 
814         while (offset < inputLineLength) {
815             int spaceToWrapAt = -1;
816             Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
817                     Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
818             if (matcher.find()) {
819                 if (matcher.start() == 0) {
820                     matcherSize = matcher.end();
821                     if (matcherSize != 0) {
822                         offset += matcher.end();
823                         continue;
824                     }
825                     offset += 1;
826                 }
827                 spaceToWrapAt = matcher.start() + offset;
828             }
829 
830             // only last line without leading spaces is left
831             if (inputLineLength - offset <= wrapLength) {
832                 break;
833             }
834 
835             while (matcher.find()) {
836                 spaceToWrapAt = matcher.start() + offset;
837             }
838 
839             if (spaceToWrapAt >= offset) {
840                 // normal case
841                 wrappedLine.append(str, offset, spaceToWrapAt);
842                 wrappedLine.append(newLineStr);
843                 offset = spaceToWrapAt + 1;
844 
845             } else // really long word or URL
846             if (wrapLongWords) {
847                 if (matcherSize == 0) {
848                     offset--;
849                 }
850                 // wrap really long word one line at a time
851                 wrappedLine.append(str, offset, wrapLength + offset);
852                 wrappedLine.append(newLineStr);
853                 offset += wrapLength;
854                 matcherSize = -1;
855             } else {
856                 // do not wrap really long word, just extend beyond limit
857                 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
858                 if (matcher.find()) {
859                     matcherSize = matcher.end() - matcher.start();
860                     spaceToWrapAt = matcher.start() + offset + wrapLength;
861                 }
862 
863                 if (spaceToWrapAt >= 0) {
864                     if (matcherSize == 0 && offset != 0) {
865                         offset--;
866                     }
867                     wrappedLine.append(str, offset, spaceToWrapAt);
868                     wrappedLine.append(newLineStr);
869                     offset = spaceToWrapAt + 1;
870                 } else {
871                     if (matcherSize == 0 && offset != 0) {
872                         offset--;
873                     }
874                     wrappedLine.append(str, offset, str.length());
875                     offset = inputLineLength;
876                     matcherSize = -1;
877                 }
878             }
879         }
880 
881         if (matcherSize == 0 && offset < inputLineLength) {
882             offset--;
883         }
884 
885         // Whatever is left in line is short enough to just pass through
886         wrappedLine.append(str, offset, str.length());
887 
888         return wrappedLine.toString();
889     }
890 
891     /**
892      * {@code WordUtils} instances should NOT be constructed in
893      * standard programming. Instead, the class should be used as
894      * {@code WordUtils.wrap("foo bar", 20);}.
895      *
896      * <p>This constructor is public to permit tools that require a JavaBean
897      * instance to operate.</p>
898      */
899     public WordUtils() {
900     }
901  }