View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.text;
18  
19  import java.util.HashSet;
20  import java.util.Set;
21  import java.util.function.Predicate;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.lang3.ArrayUtils;
26  import org.apache.commons.lang3.StringUtils;
27  import org.apache.commons.lang3.Strings;
28  import org.apache.commons.lang3.Validate;
29  
30  /**
31   * Operations on Strings that contain words.
32   *
33   * <p>
34   * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
35   * {@code null} input. Each method documents its behavior in more detail.
36   * </p>
37   *
38   * @since 1.1
39   */
40  public class WordUtils {
41  
42      /**
43       * Abbreviates the words nicely.
44       *
45       * <p>
46       * This method searches for the first space after the lower limit and abbreviates
47       * the String there. It will also append any String passed as a parameter
48       * to the end of the String. The upper limit can be specified to forcibly
49       * abbreviate a String.
50       * </p>
51       *
52       * @param str         the string to be abbreviated. If null is passed, null is returned.
53       *                    If the empty String is passed, the empty string is returned.
54       * @param lower       the lower limit; negative value is treated as zero.
55       * @param upper       the upper limit; specify -1 if no limit is desired.
56       *                    The upper limit cannot be lower than the lower limit.
57       * @param appendToEnd String to be appended to the end of the abbreviated string.
58       *                    This is appended ONLY if the string was indeed abbreviated.
59       *                    The append does not count towards the lower or upper limits.
60       * @return The abbreviated String.
61       *
62       * <pre>
63       * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
64       * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
65       * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
66       * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
67       * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
68       * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
69       * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
70       * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
71       * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
72       * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
73       * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
74       * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
75       * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
76       * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
77       * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = Throws {@link IllegalArgumentException}
78       * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = Throws {@link IllegalArgumentException}
79       * </pre>
80       */
81      public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
82          Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
83          Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
84          if (StringUtils.isEmpty(str)) {
85              return str;
86          }
87  
88          // if the lower value is greater than the length of the string,
89          // set to the length of the string
90          if (lower > str.length()) {
91              lower = str.length();
92          }
93  
94          // if the upper value is -1 (i.e. no limit) or is greater
95          // than the length of the string, set to the length of the string
96          if (upper == -1 || upper > str.length()) {
97              upper = str.length();
98          }
99  
100         final StringBuilder result = new StringBuilder();
101         final int index = Strings.CS.indexOf(str, " ", lower);
102         if (index == -1) {
103             result.append(str, 0, upper);
104             // only if abbreviation has occurred do we append the appendToEnd value
105             if (upper != str.length()) {
106                 result.append(StringUtils.defaultString(appendToEnd));
107             }
108         } else {
109             result.append(str, 0, Math.min(index, upper));
110             result.append(StringUtils.defaultString(appendToEnd));
111         }
112 
113         return result.toString();
114     }
115 
116     /**
117      * Capitalizes all the whitespace separated words in a String.
118      * Only the first character of each word is changed. To convert the
119      * rest of each word to lowercase at the same time,
120      * use {@link #capitalizeFully(String)}.
121      *
122      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
123      * A {@code null} input String returns {@code null}.
124      * Capitalization uses the Unicode title case, normally equivalent to
125      * upper case.</p>
126      *
127      * <pre>
128      * WordUtils.capitalize(null)        = null
129      * WordUtils.capitalize("")          = ""
130      * WordUtils.capitalize("i am FINE") = "I Am FINE"
131      * </pre>
132      *
133      * @param str  the String to capitalize, may be null.
134      * @return capitalized String, {@code null} if null String input.
135      * @see #uncapitalize(String)
136      * @see #capitalizeFully(String)
137      */
138     public static String capitalize(final String str) {
139         return capitalize(str, null);
140     }
141 
142     /**
143      * Capitalizes all the delimiter separated words in a String.
144      * Only the first character of each word is changed. To convert the
145      * rest of each word to lowercase at the same time,
146      * use {@link #capitalizeFully(String, char[])}.
147      *
148      * <p>The delimiters represent a set of characters understood to separate words.
149      * The first string character and the first non-delimiter character after a
150      * delimiter will be capitalized.</p>
151      *
152      * <p>A {@code null} input String returns {@code null}.
153      * Capitalization uses the Unicode title case, normally equivalent to
154      * upper case.</p>
155      *
156      * <pre>
157      * WordUtils.capitalize(null, *)            = null
158      * WordUtils.capitalize("", *)              = ""
159      * WordUtils.capitalize(*, new char[0])     = *
160      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
161      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
162      * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
163      * </pre>
164      *
165      * @param str  the String to capitalize, may be null.
166      * @param delimiters  set of characters to determine capitalization, null means whitespace.
167      * @return capitalized String, {@code null} if null String input.
168      * @see #uncapitalize(String)
169      * @see #capitalizeFully(String)
170      */
171     public static String capitalize(final String str, final char... delimiters) {
172         if (StringUtils.isEmpty(str)) {
173             return str;
174         }
175         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
176         final int strLen = str.length();
177         final int[] newCodePoints = new int[strLen];
178         int outOffset = 0;
179 
180         boolean capitalizeNext = true;
181         for (int index = 0; index < strLen;) {
182             final int codePoint = str.codePointAt(index);
183 
184             if (isDelimiter.test(codePoint)) {
185                 capitalizeNext = true;
186                 newCodePoints[outOffset++] = codePoint;
187                 index += Character.charCount(codePoint);
188             } else if (capitalizeNext) {
189                 final int titleCaseCodePoint = Character.toTitleCase(codePoint);
190                 newCodePoints[outOffset++] = titleCaseCodePoint;
191                 index += Character.charCount(titleCaseCodePoint);
192                 capitalizeNext = false;
193             } else {
194                 newCodePoints[outOffset++] = codePoint;
195                 index += Character.charCount(codePoint);
196             }
197         }
198         return new String(newCodePoints, 0, outOffset);
199     }
200 
201     /**
202      * Converts all the whitespace separated words in a String into capitalized words,
203      * that is each word is made up of a titlecase character and then a series of
204      * lowercase characters.
205      *
206      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
207      * A {@code null} input String returns {@code null}.
208      * Capitalization uses the Unicode title case, normally equivalent to
209      * upper case.</p>
210      *
211      * <pre>
212      * WordUtils.capitalizeFully(null)        = null
213      * WordUtils.capitalizeFully("")          = ""
214      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
215      * </pre>
216      *
217      * @param str  the String to capitalize, may be null.
218      * @return capitalized String, {@code null} if null String input.
219      */
220     public static String capitalizeFully(final String str) {
221         return capitalizeFully(str, null);
222     }
223 
224     /**
225      * Converts all the delimiter separated words in a String into capitalized words,
226      * that is each word is made up of a titlecase character and then a series of
227      * lowercase characters.
228      *
229      * <p>The delimiters represent a set of characters understood to separate words.
230      * The first string character and the first non-delimiter character after a
231      * delimiter will be capitalized.</p>
232      *
233      * <p>A {@code null} input String returns {@code null}.
234      * Capitalization uses the Unicode title case, normally equivalent to
235      * upper case.</p>
236      *
237      * <pre>
238      * WordUtils.capitalizeFully(null, *)            = null
239      * WordUtils.capitalizeFully("", *)              = ""
240      * WordUtils.capitalizeFully(*, null)            = *
241      * WordUtils.capitalizeFully(*, new char[0])     = *
242      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
243      * </pre>
244      *
245      * @param str  the String to capitalize, may be null.
246      * @param delimiters  set of characters to determine capitalization, null means whitespace.
247      * @return capitalized String, {@code null} if null String input.
248      */
249     public static String capitalizeFully(String str, final char... delimiters) {
250         if (StringUtils.isEmpty(str)) {
251             return str;
252         }
253         str = str.toLowerCase();
254         return capitalize(str, delimiters);
255     }
256 
257     /**
258      * Checks if the String contains all words in the given array.
259      *
260      * <p>
261      * A {@code null} String will return {@code false}. A {@code null}, zero
262      * length search array or if one element of array is null will return {@code false}.
263      * </p>
264      *
265      * <pre>
266      * WordUtils.containsAllWords(null, *)            = false
267      * WordUtils.containsAllWords("", *)              = false
268      * WordUtils.containsAllWords(*, null)            = false
269      * WordUtils.containsAllWords(*, [])              = false
270      * WordUtils.containsAllWords("abcd", "ab", "cd") = false
271      * WordUtils.containsAllWords("abc def", "def", "abc") = true
272      * </pre>
273      *
274      * @param word The CharSequence to check, may be null.
275      * @param words The array of String words to search for, may be null.
276      * @return {@code true} if all search words are found, {@code false} otherwise.
277      */
278     public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
279         if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
280             return false;
281         }
282         for (final CharSequence w : words) {
283             if (StringUtils.isBlank(w)) {
284                 return false;
285             }
286             final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
287             if (!p.matcher(word).matches()) {
288                 return false;
289             }
290         }
291         return true;
292     }
293 
294     /**
295      * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
296      * The function provides O(1) lookup time.
297      * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
298      *
299      * @param delimiters set of characters to determine delimiters, null means whitespace.
300      * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
301      */
302     private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
303         final Predicate<Integer> isDelimiter;
304         if (delimiters == null || delimiters.length == 0) {
305             isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
306         } else {
307             final Set<Integer> delimiterSet = new HashSet<>();
308             for (int index = 0; index < delimiters.length; index++) {
309                 delimiterSet.add(Character.codePointAt(delimiters, index));
310             }
311             isDelimiter = delimiterSet::contains;
312         }
313 
314         return isDelimiter;
315     }
316 
317     /**
318      * Extracts the initial characters from each word in the String.
319      *
320      * <p>All first characters after whitespace are returned as a new string.
321      * Their case is not changed.</p>
322      *
323      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
324      * A {@code null} input String returns {@code null}.</p>
325      *
326      * <pre>
327      * WordUtils.initials(null)             = null
328      * WordUtils.initials("")               = ""
329      * WordUtils.initials("Ben John Lee")   = "BJL"
330      * WordUtils.initials("Ben J.Lee")      = "BJ"
331      * </pre>
332      *
333      * @param str  the String to get initials from, may be null.
334      * @return String of initial letters, {@code null} if null String input.
335      * @see #initials(String,char[])
336      */
337     public static String initials(final String str) {
338         return initials(str, null);
339     }
340 
341     /**
342      * Extracts the initial characters from each word in the String.
343      *
344      * <p>All first characters after the defined delimiters are returned as a new string.
345      * Their case is not changed.</p>
346      *
347      * <p>If the delimiters array is null, then Whitespace is used.
348      * Whitespace is defined by {@link Character#isWhitespace(char)}.
349      * A {@code null} input String returns {@code null}.
350      * An empty delimiter array returns an empty String.</p>
351      *
352      * <pre>
353      * WordUtils.initials(null, *)                = null
354      * WordUtils.initials("", *)                  = ""
355      * WordUtils.initials("Ben John Lee", null)   = "BJL"
356      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
357      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
358      * WordUtils.initials(*, new char[0])         = ""
359      * </pre>
360      *
361      * @param str  the String to get initials from, may be null.
362      * @param delimiters  set of characters to determine words, null means whitespace.
363      * @return String of initial characters, {@code null} if null String input.
364      * @see #initials(String)
365      */
366     public static String initials(final String str, final char... delimiters) {
367         if (StringUtils.isEmpty(str)) {
368             return str;
369         }
370         if (delimiters != null && delimiters.length == 0) {
371             return StringUtils.EMPTY;
372         }
373         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
374         final int strLen = str.length();
375         final int[] newCodePoints = new int[strLen / 2 + 1];
376         int count = 0;
377         boolean lastWasGap = true;
378         for (int i = 0; i < strLen;) {
379             final int codePoint = str.codePointAt(i);
380 
381             if (isDelimiter.test(codePoint)) {
382                 lastWasGap = true;
383             } else if (lastWasGap) {
384                 newCodePoints[count++] = codePoint;
385                 lastWasGap = false;
386             }
387 
388             i += Character.charCount(codePoint);
389         }
390         return new String(newCodePoints, 0, count);
391     }
392 
393     /**
394      * Is the character a delimiter.
395      *
396      * @param ch the character to check.
397      * @param delimiters the delimiters.
398      * @return true if it is a delimiter.
399      * @deprecated as of 1.2 and will be removed in 2.0.
400      */
401     @Deprecated
402     public static boolean isDelimiter(final char ch, final char[] delimiters) {
403         if (delimiters == null) {
404             return Character.isWhitespace(ch);
405         }
406         for (final char delimiter : delimiters) {
407             if (ch == delimiter) {
408                 return true;
409             }
410         }
411         return false;
412     }
413 
414     /**
415      * Is the codePoint a delimiter.
416      *
417      * @param codePoint the codePint to check.
418      * @param delimiters the delimiters.
419      * @return true if it is a delimiter.
420      * @deprecated as of 1.2 and will be removed in 2.0.
421      */
422     @Deprecated
423     public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
424         if (delimiters == null) {
425             return Character.isWhitespace(codePoint);
426         }
427         for (int index = 0; index < delimiters.length; index++) {
428             final int delimiterCodePoint = Character.codePointAt(delimiters, index);
429             if (delimiterCodePoint == codePoint) {
430                 return true;
431             }
432         }
433         return false;
434     }
435 
436     /**
437      * Swaps the case of a String using a word based algorithm.
438      *
439      * <ul>
440      *  <li>Upper case character converts to Lower case</li>
441      *  <li>Title case character converts to Lower case</li>
442      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
443      *  <li>Other Lower case character converts to Upper case</li>
444      * </ul>
445      *
446      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
447      * A {@code null} input String returns {@code null}.</p>
448      *
449      * <pre>
450      * StringUtils.swapCase(null)                 = null
451      * StringUtils.swapCase("")                   = ""
452      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
453      * </pre>
454      *
455      * @param str  the String to swap case, may be null.
456      * @return The changed String, {@code null} if null String input.
457      */
458     public static String swapCase(final String str) {
459         if (StringUtils.isEmpty(str)) {
460             return str;
461         }
462         final int strLen = str.length();
463         final int[] newCodePoints = new int[strLen];
464         int outOffset = 0;
465         boolean whitespace = true;
466         for (int index = 0; index < strLen;) {
467             final int oldCodepoint = str.codePointAt(index);
468             final int newCodePoint;
469             if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
470                 newCodePoint = Character.toLowerCase(oldCodepoint);
471                 whitespace = false;
472             } else if (Character.isLowerCase(oldCodepoint)) {
473                 if (whitespace) {
474                     newCodePoint = Character.toTitleCase(oldCodepoint);
475                     whitespace = false;
476                 } else {
477                     newCodePoint = Character.toUpperCase(oldCodepoint);
478                 }
479             } else {
480                 whitespace = Character.isWhitespace(oldCodepoint);
481                 newCodePoint = oldCodepoint;
482             }
483             newCodePoints[outOffset++] = newCodePoint;
484             index += Character.charCount(newCodePoint);
485         }
486         return new String(newCodePoints, 0, outOffset);
487     }
488 
489     /**
490      * Uncapitalizes all the whitespace separated words in a String.
491      * Only the first character of each word is changed.
492      *
493      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
494      * A {@code null} input String returns {@code null}.</p>
495      *
496      * <pre>
497      * WordUtils.uncapitalize(null)        = null
498      * WordUtils.uncapitalize("")          = ""
499      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
500      * </pre>
501      *
502      * @param str  the String to uncapitalize, may be null.
503      * @return uncapitalized String, {@code null} if null String input.
504      * @see #capitalize(String)
505      */
506     public static String uncapitalize(final String str) {
507         return uncapitalize(str, null);
508     }
509 
510     /**
511      * Uncapitalizes all the whitespace separated words in a String.
512      * Only the first character of each word is changed.
513      *
514      * <p>The delimiters represent a set of characters understood to separate words.
515      * The first string character and the first non-delimiter character after a
516      * delimiter will be uncapitalized.</p>
517      *
518      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
519      * A {@code null} input String returns {@code null}.</p>
520      *
521      * <pre>
522      * WordUtils.uncapitalize(null, *)            = null
523      * WordUtils.uncapitalize("", *)              = ""
524      * WordUtils.uncapitalize(*, null)            = *
525      * WordUtils.uncapitalize(*, new char[0])     = *
526      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
527      * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
528      * </pre>
529      *
530      * @param str  the String to uncapitalize, may be null.
531      * @param delimiters  set of characters to determine uncapitalization, null means whitespace.
532      * @return uncapitalized String, {@code null} if null String input.
533      * @see #capitalize(String)
534      */
535     public static String uncapitalize(final String str, final char... delimiters) {
536         if (StringUtils.isEmpty(str)) {
537             return str;
538         }
539         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
540         final int strLen = str.length();
541         final int[] newCodePoints = new int[strLen];
542         int outOffset = 0;
543 
544         boolean uncapitalizeNext = true;
545         for (int index = 0; index < strLen;) {
546             final int codePoint = str.codePointAt(index);
547 
548             if (isDelimiter.test(codePoint)) {
549                 uncapitalizeNext = true;
550                 newCodePoints[outOffset++] = codePoint;
551                 index += Character.charCount(codePoint);
552             } else if (uncapitalizeNext) {
553                 final int titleCaseCodePoint = Character.toLowerCase(codePoint);
554                 newCodePoints[outOffset++] = titleCaseCodePoint;
555                 index += Character.charCount(titleCaseCodePoint);
556                 uncapitalizeNext = false;
557             } else {
558                 newCodePoints[outOffset++] = codePoint;
559                 index += Character.charCount(codePoint);
560             }
561         }
562         return new String(newCodePoints, 0, outOffset);
563     }
564 
565     /**
566      * Wraps a single line of text, identifying words by {@code ' '}.
567      *
568      * <p>New lines will be separated by the system property line separator.
569      * Very long words, such as URLs will <em>not</em> be wrapped.</p>
570      *
571      * <p>Leading spaces on a new line are stripped.
572      * Trailing spaces are not stripped.</p>
573      *
574      * <table border="1">
575      *  <caption>Examples</caption>
576      *  <tr>
577      *   <th>input</th>
578      *   <th>wrapLength</th>
579      *   <th>result</th>
580      *  </tr>
581      *  <tr>
582      *   <td>null</td>
583      *   <td>*</td>
584      *   <td>null</td>
585      *  </tr>
586      *  <tr>
587      *   <td>""</td>
588      *   <td>*</td>
589      *   <td>""</td>
590      *  </tr>
591      *  <tr>
592      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
593      *   <td>20</td>
594      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
595      *  </tr>
596      *  <tr>
597      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
598      *   <td>20</td>
599      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
600      *  </tr>
601      *  <tr>
602      *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
603      *   <td>20</td>
604      *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
605      *  </tr>
606      * </table>
607      *
608      * (assuming that '\n' is the systems line separator)
609      *
610      * @param str  the String to be word wrapped, may be null.
611      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1.
612      * @return a line with newlines inserted, {@code null} if null input.
613      */
614     public static String wrap(final String str, final int wrapLength) {
615         return wrap(str, wrapLength, null, false);
616     }
617 
618     /**
619      * Wraps a single line of text, identifying words by {@code ' '}.
620      *
621      * <p>Leading spaces on a new line are stripped.
622      * Trailing spaces are not stripped.</p>
623      *
624      * <table border="1">
625      *  <caption>Examples</caption>
626      *  <tr>
627      *   <th>input</th>
628      *   <th>wrapLength</th>
629      *   <th>newLineString</th>
630      *   <th>wrapLongWords</th>
631      *   <th>result</th>
632      *  </tr>
633      *  <tr>
634      *   <td>null</td>
635      *   <td>*</td>
636      *   <td>*</td>
637      *   <td>true/false</td>
638      *   <td>null</td>
639      *  </tr>
640      *  <tr>
641      *   <td>""</td>
642      *   <td>*</td>
643      *   <td>*</td>
644      *   <td>true/false</td>
645      *   <td>""</td>
646      *  </tr>
647      *  <tr>
648      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
649      *   <td>20</td>
650      *   <td>"\n"</td>
651      *   <td>true/false</td>
652      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
653      *  </tr>
654      *  <tr>
655      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
656      *   <td>20</td>
657      *   <td>"&lt;br /&gt;"</td>
658      *   <td>true/false</td>
659      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
660      *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
661      *  </tr>
662      *  <tr>
663      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
664      *   <td>20</td>
665      *   <td>null</td>
666      *   <td>true/false</td>
667      *   <td>"Here is one line of" + systemNewLine + "text that is going"
668      *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
669      *  </tr>
670      *  <tr>
671      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
672      *   <td>20</td>
673      *   <td>"\n"</td>
674      *   <td>false</td>
675      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
676      *  </tr>
677      *  <tr>
678      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
679      *   <td>20</td>
680      *   <td>"\n"</td>
681      *   <td>true</td>
682      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
683      *  </tr>
684      * </table>
685      *
686      * @param str  the String to be word wrapped, may be null.
687      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1.
688      * @param newLineStr  the string to insert for a new line, {@code null} uses the system property line separator.
689      * @param wrapLongWords  true if long words (such as URLs) should be wrapped.
690      * @return a line with newlines inserted, {@code null} if null input.
691      */
692     public static String wrap(final String str,
693                               final int wrapLength,
694                               final String newLineStr,
695                               final boolean wrapLongWords) {
696         return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
697     }
698 
699     /**
700      * Wraps a single line of text, identifying words by {@code wrapOn}.
701      *
702      * <p>Leading spaces on a new line are stripped.
703      * Trailing spaces are not stripped.</p>
704      *
705      * <table border="1">
706      *  <caption>Examples</caption>
707      *  <tr>
708      *   <th>input</th>
709      *   <th>wrapLength</th>
710      *   <th>newLineString</th>
711      *   <th>wrapLongWords</th>
712      *   <th>wrapOn</th>
713      *   <th>result</th>
714      *  </tr>
715      *  <tr>
716      *   <td>null</td>
717      *   <td>*</td>
718      *   <td>*</td>
719      *   <td>true/false</td>
720      *   <td>*</td>
721      *   <td>null</td>
722      *  </tr>
723      *  <tr>
724      *   <td>""</td>
725      *   <td>*</td>
726      *   <td>*</td>
727      *   <td>true/false</td>
728      *   <td>*</td>
729      *   <td>""</td>
730      *  </tr>
731      *  <tr>
732      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
733      *   <td>20</td>
734      *   <td>"\n"</td>
735      *   <td>true/false</td>
736      *   <td>" "</td>
737      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
738      *  </tr>
739      *  <tr>
740      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
741      *   <td>20</td>
742      *   <td>"&lt;br /&gt;"</td>
743      *   <td>true/false</td>
744      *   <td>" "</td>
745      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
746      *   to be wrapped after&lt;br /&gt;20 columns."</td>
747      *  </tr>
748      *  <tr>
749      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
750      *   <td>20</td>
751      *   <td>null</td>
752      *   <td>true/false</td>
753      *   <td>" "</td>
754      *   <td>"Here is one line of" + systemNewLine + "text that is going"
755      *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
756      *  </tr>
757      *  <tr>
758      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
759      *   <td>20</td>
760      *   <td>"\n"</td>
761      *   <td>false</td>
762      *   <td>" "</td>
763      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
764      *  </tr>
765      *  <tr>
766      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
767      *   <td>20</td>
768      *   <td>"\n"</td>
769      *   <td>true</td>
770      *   <td>" "</td>
771      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
772      *  </tr>
773      *  <tr>
774      *   <td>"flammable/inflammable"</td>
775      *   <td>20</td>
776      *   <td>"\n"</td>
777      *   <td>true</td>
778      *   <td>"/"</td>
779      *   <td>"flammable\ninflammable"</td>
780      *  </tr>
781      * </table>
782      * @param str  the String to be word wrapped, may be null.
783      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1.
784      * @param newLineStr  the string to insert for a new line, {@code null} uses the system property line separator.
785      * @param wrapLongWords  true if long words (such as URLs) should be wrapped.
786      * @param wrapOn regex expression to be used as a breakable characters, if blank string is provided a space character will be used.
787      * @return a line with newlines inserted, {@code null} if null input.
788      */
789     public static String wrap(final String str,
790                               int wrapLength,
791                               String newLineStr,
792                               final boolean wrapLongWords,
793                               String wrapOn) {
794         if (str == null) {
795             return null;
796         }
797         if (newLineStr == null) {
798             newLineStr = System.lineSeparator();
799         }
800         if (wrapLength < 1) {
801             wrapLength = 1;
802         }
803         if (StringUtils.isBlank(wrapOn)) {
804             wrapOn = " ";
805         }
806         final Pattern patternToWrapOn = Pattern.compile(wrapOn);
807         final int inputLineLength = str.length();
808         int offset = 0;
809         final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
810         int matcherSize = -1;
811 
812         while (offset < inputLineLength) {
813             int spaceToWrapAt = -1;
814             Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
815                     Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
816             if (matcher.find()) {
817                 if (matcher.start() == 0) {
818                     matcherSize = matcher.end();
819                     if (matcherSize != 0) {
820                         offset += matcher.end();
821                         continue;
822                     }
823                     offset += 1;
824                 }
825                 spaceToWrapAt = matcher.start() + offset;
826             }
827 
828             // only last line without leading spaces is left
829             if (inputLineLength - offset <= wrapLength) {
830                 break;
831             }
832 
833             while (matcher.find()) {
834                 spaceToWrapAt = matcher.start() + offset;
835             }
836 
837             if (spaceToWrapAt >= offset) {
838                 // normal case
839                 wrappedLine.append(str, offset, spaceToWrapAt);
840                 wrappedLine.append(newLineStr);
841                 offset = spaceToWrapAt + 1;
842 
843             } else // really long word or URL
844             if (wrapLongWords) {
845                 if (matcherSize == 0) {
846                     offset--;
847                 }
848                 // wrap really long word one line at a time
849                 wrappedLine.append(str, offset, wrapLength + offset);
850                 wrappedLine.append(newLineStr);
851                 offset += wrapLength;
852                 matcherSize = -1;
853             } else {
854                 // do not wrap really long word, just extend beyond limit
855                 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
856                 if (matcher.find()) {
857                     matcherSize = matcher.end() - matcher.start();
858                     spaceToWrapAt = matcher.start() + offset + wrapLength;
859                 }
860 
861                 if (spaceToWrapAt >= 0) {
862                     if (matcherSize == 0 && offset != 0) {
863                         offset--;
864                     }
865                     wrappedLine.append(str, offset, spaceToWrapAt);
866                     wrappedLine.append(newLineStr);
867                     offset = spaceToWrapAt + 1;
868                 } else {
869                     if (matcherSize == 0 && offset != 0) {
870                         offset--;
871                     }
872                     wrappedLine.append(str, offset, str.length());
873                     offset = inputLineLength;
874                     matcherSize = -1;
875                 }
876             }
877         }
878 
879         if (matcherSize == 0 && offset < inputLineLength) {
880             offset--;
881         }
882 
883         // Whatever is left in line is short enough to just pass through
884         wrappedLine.append(str, offset, str.length());
885 
886         return wrappedLine.toString();
887     }
888 
889     /**
890      * {@code WordUtils} instances should NOT be constructed in
891      * standard programming. Instead, the class should be used as
892      * {@code WordUtils.wrap("foo bar", 20);}.
893      *
894      * <p>This constructor is public to permit tools that require a JavaBean
895      * instance to operate.</p>
896      */
897     public WordUtils() {
898     }
899  }