001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.HashSet;
020import java.util.Set;
021import java.util.function.Predicate;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import org.apache.commons.lang3.ArrayUtils;
026import org.apache.commons.lang3.StringUtils;
027import org.apache.commons.lang3.Strings;
028import org.apache.commons.lang3.Validate;
029
030/**
031 * Operations on Strings that contain words.
032 *
033 * <p>
034 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
035 * {@code null} input. Each method documents its behavior in more detail.
036 * </p>
037 *
038 * @since 1.1
039 */
040public class WordUtils {
041
042    /**
043     * Abbreviates the words nicely.
044     *
045     * <p>
046     * This method searches for the first space after the lower limit and abbreviates
047     * the String there. It will also append any String passed as a parameter
048     * to the end of the String. The upper limit can be specified to forcibly
049     * abbreviate a String.
050     * </p>
051     *
052     * @param str         the string to be abbreviated. If null is passed, null is returned.
053     *                    If the empty String is passed, the empty string is returned.
054     * @param lower       the lower limit; negative value is treated as zero.
055     * @param upper       the upper limit; specify -1 if no limit is desired.
056     *                    The upper limit cannot be lower than the lower limit.
057     * @param appendToEnd String to be appended to the end of the abbreviated string.
058     *                    This is appended ONLY if the string was indeed abbreviated.
059     *                    The append does not count towards the lower or upper limits.
060     * @return The abbreviated String.
061     *
062     * <pre>
063     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
064     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
065     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
066     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
067     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
068     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
069     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
070     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
071     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
072     * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
073     * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
074     * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
075     * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
076     * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
077     * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = Throws {@link IllegalArgumentException}
078     * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = Throws {@link IllegalArgumentException}
079     * </pre>
080     */
081    public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
082        Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
083        Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
084        if (StringUtils.isEmpty(str)) {
085            return str;
086        }
087
088        // if the lower value is greater than the length of the string,
089        // set to the length of the string
090        if (lower > str.length()) {
091            lower = str.length();
092        }
093
094        // if the upper value is -1 (i.e. no limit) or is greater
095        // than the length of the string, set to the length of the string
096        if (upper == -1 || upper > str.length()) {
097            upper = str.length();
098        }
099
100        final StringBuilder result = new StringBuilder();
101        final int index = Strings.CS.indexOf(str, " ", lower);
102        if (index == -1) {
103            result.append(str, 0, upper);
104            // only if abbreviation has occurred do we append the appendToEnd value
105            if (upper != str.length()) {
106                result.append(StringUtils.defaultString(appendToEnd));
107            }
108        } else {
109            result.append(str, 0, Math.min(index, upper));
110            result.append(StringUtils.defaultString(appendToEnd));
111        }
112
113        return result.toString();
114    }
115
116    /**
117     * Capitalizes all the whitespace separated words in a String.
118     * Only the first character of each word is changed. To convert the
119     * rest of each word to lowercase at the same time,
120     * use {@link #capitalizeFully(String)}.
121     *
122     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
123     * A {@code null} input String returns {@code null}.
124     * Capitalization uses the Unicode title case, normally equivalent to
125     * upper case.</p>
126     *
127     * <pre>
128     * WordUtils.capitalize(null)        = null
129     * WordUtils.capitalize("")          = ""
130     * WordUtils.capitalize("i am FINE") = "I Am FINE"
131     * </pre>
132     *
133     * @param str  the String to capitalize, may be null.
134     * @return capitalized String, {@code null} if null String input.
135     * @see #uncapitalize(String)
136     * @see #capitalizeFully(String)
137     */
138    public static String capitalize(final String str) {
139        return capitalize(str, null);
140    }
141
142    /**
143     * Capitalizes all the delimiter separated words in a String.
144     * Only the first character of each word is changed. To convert the
145     * rest of each word to lowercase at the same time,
146     * use {@link #capitalizeFully(String, char[])}.
147     *
148     * <p>The delimiters represent a set of characters understood to separate words.
149     * The first string character and the first non-delimiter character after a
150     * delimiter will be capitalized.</p>
151     *
152     * <p>A {@code null} input String returns {@code null}.
153     * Capitalization uses the Unicode title case, normally equivalent to
154     * upper case.</p>
155     *
156     * <pre>
157     * WordUtils.capitalize(null, *)            = null
158     * WordUtils.capitalize("", *)              = ""
159     * WordUtils.capitalize(*, new char[0])     = *
160     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
161     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
162     * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
163     * </pre>
164     *
165     * @param str  the String to capitalize, may be null.
166     * @param delimiters  set of characters to determine capitalization, null means whitespace.
167     * @return capitalized String, {@code null} if null String input.
168     * @see #uncapitalize(String)
169     * @see #capitalizeFully(String)
170     */
171    public static String capitalize(final String str, final char... delimiters) {
172        if (StringUtils.isEmpty(str)) {
173            return str;
174        }
175        final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
176        final int strLen = str.length();
177        final int[] newCodePoints = new int[strLen];
178        int outOffset = 0;
179
180        boolean capitalizeNext = true;
181        for (int index = 0; index < strLen;) {
182            final int codePoint = str.codePointAt(index);
183
184            if (isDelimiter.test(codePoint)) {
185                capitalizeNext = true;
186                newCodePoints[outOffset++] = codePoint;
187                index += Character.charCount(codePoint);
188            } else if (capitalizeNext) {
189                final int titleCaseCodePoint = Character.toTitleCase(codePoint);
190                newCodePoints[outOffset++] = titleCaseCodePoint;
191                index += Character.charCount(titleCaseCodePoint);
192                capitalizeNext = false;
193            } else {
194                newCodePoints[outOffset++] = codePoint;
195                index += Character.charCount(codePoint);
196            }
197        }
198        return new String(newCodePoints, 0, outOffset);
199    }
200
201    /**
202     * Converts all the whitespace separated words in a String into capitalized words,
203     * that is each word is made up of a titlecase character and then a series of
204     * lowercase characters.
205     *
206     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
207     * A {@code null} input String returns {@code null}.
208     * Capitalization uses the Unicode title case, normally equivalent to
209     * upper case.</p>
210     *
211     * <pre>
212     * WordUtils.capitalizeFully(null)        = null
213     * WordUtils.capitalizeFully("")          = ""
214     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
215     * </pre>
216     *
217     * @param str  the String to capitalize, may be null.
218     * @return capitalized String, {@code null} if null String input.
219     */
220    public static String capitalizeFully(final String str) {
221        return capitalizeFully(str, null);
222    }
223
224    /**
225     * Converts all the delimiter separated words in a String into capitalized words,
226     * that is each word is made up of a titlecase character and then a series of
227     * lowercase characters.
228     *
229     * <p>The delimiters represent a set of characters understood to separate words.
230     * The first string character and the first non-delimiter character after a
231     * delimiter will be capitalized.</p>
232     *
233     * <p>A {@code null} input String returns {@code null}.
234     * Capitalization uses the Unicode title case, normally equivalent to
235     * upper case.</p>
236     *
237     * <pre>
238     * WordUtils.capitalizeFully(null, *)            = null
239     * WordUtils.capitalizeFully("", *)              = ""
240     * WordUtils.capitalizeFully(*, null)            = *
241     * WordUtils.capitalizeFully(*, new char[0])     = *
242     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
243     * </pre>
244     *
245     * @param str  the String to capitalize, may be null.
246     * @param delimiters  set of characters to determine capitalization, null means whitespace.
247     * @return capitalized String, {@code null} if null String input.
248     */
249    public static String capitalizeFully(String str, final char... delimiters) {
250        if (StringUtils.isEmpty(str)) {
251            return str;
252        }
253        str = str.toLowerCase();
254        return capitalize(str, delimiters);
255    }
256
257    /**
258     * Checks if the String contains all words in the given array.
259     *
260     * <p>
261     * A {@code null} String will return {@code false}. A {@code null}, zero
262     * length search array or if one element of array is null will return {@code false}.
263     * </p>
264     *
265     * <pre>
266     * WordUtils.containsAllWords(null, *)            = false
267     * WordUtils.containsAllWords("", *)              = false
268     * WordUtils.containsAllWords(*, null)            = false
269     * WordUtils.containsAllWords(*, [])              = false
270     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
271     * WordUtils.containsAllWords("abc def", "def", "abc") = true
272     * </pre>
273     *
274     * @param word The CharSequence to check, may be null.
275     * @param words The array of String words to search for, may be null.
276     * @return {@code true} if all search words are found, {@code false} otherwise.
277     */
278    public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
279        if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
280            return false;
281        }
282        for (final CharSequence w : words) {
283            if (StringUtils.isBlank(w)) {
284                return false;
285            }
286            final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
287            if (!p.matcher(word).matches()) {
288                return false;
289            }
290        }
291        return true;
292    }
293
294    /**
295     * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
296     * The function provides O(1) lookup time.
297     * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
298     *
299     * @param delimiters set of characters to determine delimiters, null means whitespace.
300     * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
301     */
302    private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
303        final Predicate<Integer> isDelimiter;
304        if (delimiters == null || delimiters.length == 0) {
305            isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
306        } else {
307            final Set<Integer> delimiterSet = new HashSet<>();
308            for (int index = 0; index < delimiters.length; index++) {
309                delimiterSet.add(Character.codePointAt(delimiters, index));
310            }
311            isDelimiter = delimiterSet::contains;
312        }
313
314        return isDelimiter;
315    }
316
317    /**
318     * Extracts the initial characters from each word in the String.
319     *
320     * <p>All first characters after whitespace are returned as a new string.
321     * Their case is not changed.</p>
322     *
323     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
324     * A {@code null} input String returns {@code null}.</p>
325     *
326     * <pre>
327     * WordUtils.initials(null)             = null
328     * WordUtils.initials("")               = ""
329     * WordUtils.initials("Ben John Lee")   = "BJL"
330     * WordUtils.initials("Ben J.Lee")      = "BJ"
331     * </pre>
332     *
333     * @param str  the String to get initials from, may be null.
334     * @return String of initial letters, {@code null} if null String input.
335     * @see #initials(String,char[])
336     */
337    public static String initials(final String str) {
338        return initials(str, null);
339    }
340
341    /**
342     * Extracts the initial characters from each word in the String.
343     *
344     * <p>All first characters after the defined delimiters are returned as a new string.
345     * Their case is not changed.</p>
346     *
347     * <p>If the delimiters array is null, then Whitespace is used.
348     * Whitespace is defined by {@link Character#isWhitespace(char)}.
349     * A {@code null} input String returns {@code null}.
350     * An empty delimiter array returns an empty String.</p>
351     *
352     * <pre>
353     * WordUtils.initials(null, *)                = null
354     * WordUtils.initials("", *)                  = ""
355     * WordUtils.initials("Ben John Lee", null)   = "BJL"
356     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
357     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
358     * WordUtils.initials(*, new char[0])         = ""
359     * </pre>
360     *
361     * @param str  the String to get initials from, may be null.
362     * @param delimiters  set of characters to determine words, null means whitespace.
363     * @return String of initial characters, {@code null} if null String input.
364     * @see #initials(String)
365     */
366    public static String initials(final String str, final char... delimiters) {
367        if (StringUtils.isEmpty(str)) {
368            return str;
369        }
370        if (delimiters != null && delimiters.length == 0) {
371            return StringUtils.EMPTY;
372        }
373        final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
374        final int strLen = str.length();
375        final int[] newCodePoints = new int[strLen / 2 + 1];
376        int count = 0;
377        boolean lastWasGap = true;
378        for (int i = 0; i < strLen;) {
379            final int codePoint = str.codePointAt(i);
380
381            if (isDelimiter.test(codePoint)) {
382                lastWasGap = true;
383            } else if (lastWasGap) {
384                newCodePoints[count++] = codePoint;
385                lastWasGap = false;
386            }
387
388            i += Character.charCount(codePoint);
389        }
390        return new String(newCodePoints, 0, count);
391    }
392
393    /**
394     * Is the character a delimiter.
395     *
396     * @param ch the character to check.
397     * @param delimiters the delimiters.
398     * @return true if it is a delimiter.
399     * @deprecated as of 1.2 and will be removed in 2.0.
400     */
401    @Deprecated
402    public static boolean isDelimiter(final char ch, final char[] delimiters) {
403        if (delimiters == null) {
404            return Character.isWhitespace(ch);
405        }
406        for (final char delimiter : delimiters) {
407            if (ch == delimiter) {
408                return true;
409            }
410        }
411        return false;
412    }
413
414    /**
415     * Is the codePoint a delimiter.
416     *
417     * @param codePoint the codePint to check.
418     * @param delimiters the delimiters.
419     * @return true if it is a delimiter.
420     * @deprecated as of 1.2 and will be removed in 2.0.
421     */
422    @Deprecated
423    public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
424        if (delimiters == null) {
425            return Character.isWhitespace(codePoint);
426        }
427        for (int index = 0; index < delimiters.length; index++) {
428            final int delimiterCodePoint = Character.codePointAt(delimiters, index);
429            if (delimiterCodePoint == codePoint) {
430                return true;
431            }
432        }
433        return false;
434    }
435
436    /**
437     * Swaps the case of a String using a word based algorithm.
438     *
439     * <ul>
440     *  <li>Upper case character converts to Lower case</li>
441     *  <li>Title case character converts to Lower case</li>
442     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
443     *  <li>Other Lower case character converts to Upper case</li>
444     * </ul>
445     *
446     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
447     * A {@code null} input String returns {@code null}.</p>
448     *
449     * <pre>
450     * StringUtils.swapCase(null)                 = null
451     * StringUtils.swapCase("")                   = ""
452     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
453     * </pre>
454     *
455     * @param str  the String to swap case, may be null.
456     * @return The changed String, {@code null} if null String input.
457     */
458    public static String swapCase(final String str) {
459        if (StringUtils.isEmpty(str)) {
460            return str;
461        }
462        final int strLen = str.length();
463        final int[] newCodePoints = new int[strLen];
464        int outOffset = 0;
465        boolean whitespace = true;
466        for (int index = 0; index < strLen;) {
467            final int oldCodepoint = str.codePointAt(index);
468            final int newCodePoint;
469            if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
470                newCodePoint = Character.toLowerCase(oldCodepoint);
471                whitespace = false;
472            } else if (Character.isLowerCase(oldCodepoint)) {
473                if (whitespace) {
474                    newCodePoint = Character.toTitleCase(oldCodepoint);
475                    whitespace = false;
476                } else {
477                    newCodePoint = Character.toUpperCase(oldCodepoint);
478                }
479            } else {
480                whitespace = Character.isWhitespace(oldCodepoint);
481                newCodePoint = oldCodepoint;
482            }
483            newCodePoints[outOffset++] = newCodePoint;
484            index += Character.charCount(newCodePoint);
485        }
486        return new String(newCodePoints, 0, outOffset);
487    }
488
489    /**
490     * Uncapitalizes all the whitespace separated words in a String.
491     * Only the first character of each word is changed.
492     *
493     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
494     * A {@code null} input String returns {@code null}.</p>
495     *
496     * <pre>
497     * WordUtils.uncapitalize(null)        = null
498     * WordUtils.uncapitalize("")          = ""
499     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
500     * </pre>
501     *
502     * @param str  the String to uncapitalize, may be null.
503     * @return uncapitalized String, {@code null} if null String input.
504     * @see #capitalize(String)
505     */
506    public static String uncapitalize(final String str) {
507        return uncapitalize(str, null);
508    }
509
510    /**
511     * Uncapitalizes all the whitespace separated words in a String.
512     * Only the first character of each word is changed.
513     *
514     * <p>The delimiters represent a set of characters understood to separate words.
515     * The first string character and the first non-delimiter character after a
516     * delimiter will be uncapitalized.</p>
517     *
518     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
519     * A {@code null} input String returns {@code null}.</p>
520     *
521     * <pre>
522     * WordUtils.uncapitalize(null, *)            = null
523     * WordUtils.uncapitalize("", *)              = ""
524     * WordUtils.uncapitalize(*, null)            = *
525     * WordUtils.uncapitalize(*, new char[0])     = *
526     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
527     * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
528     * </pre>
529     *
530     * @param str  the String to uncapitalize, may be null.
531     * @param delimiters  set of characters to determine uncapitalization, null means whitespace.
532     * @return uncapitalized String, {@code null} if null String input.
533     * @see #capitalize(String)
534     */
535    public static String uncapitalize(final String str, final char... delimiters) {
536        if (StringUtils.isEmpty(str)) {
537            return str;
538        }
539        final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
540        final int strLen = str.length();
541        final int[] newCodePoints = new int[strLen];
542        int outOffset = 0;
543
544        boolean uncapitalizeNext = true;
545        for (int index = 0; index < strLen;) {
546            final int codePoint = str.codePointAt(index);
547
548            if (isDelimiter.test(codePoint)) {
549                uncapitalizeNext = true;
550                newCodePoints[outOffset++] = codePoint;
551                index += Character.charCount(codePoint);
552            } else if (uncapitalizeNext) {
553                final int titleCaseCodePoint = Character.toLowerCase(codePoint);
554                newCodePoints[outOffset++] = titleCaseCodePoint;
555                index += Character.charCount(titleCaseCodePoint);
556                uncapitalizeNext = false;
557            } else {
558                newCodePoints[outOffset++] = codePoint;
559                index += Character.charCount(codePoint);
560            }
561        }
562        return new String(newCodePoints, 0, outOffset);
563    }
564
565    /**
566     * Wraps a single line of text, identifying words by {@code ' '}.
567     *
568     * <p>New lines will be separated by the system property line separator.
569     * Very long words, such as URLs will <em>not</em> be wrapped.</p>
570     *
571     * <p>Leading spaces on a new line are stripped.
572     * Trailing spaces are not stripped.</p>
573     *
574     * <table border="1">
575     *  <caption>Examples</caption>
576     *  <tr>
577     *   <th>input</th>
578     *   <th>wrapLength</th>
579     *   <th>result</th>
580     *  </tr>
581     *  <tr>
582     *   <td>null</td>
583     *   <td>*</td>
584     *   <td>null</td>
585     *  </tr>
586     *  <tr>
587     *   <td>""</td>
588     *   <td>*</td>
589     *   <td>""</td>
590     *  </tr>
591     *  <tr>
592     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
593     *   <td>20</td>
594     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
595     *  </tr>
596     *  <tr>
597     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
598     *   <td>20</td>
599     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
600     *  </tr>
601     *  <tr>
602     *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
603     *   <td>20</td>
604     *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
605     *  </tr>
606     * </table>
607     *
608     * (assuming that '\n' is the systems line separator)
609     *
610     * @param str  the String to be word wrapped, may be null.
611     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1.
612     * @return a line with newlines inserted, {@code null} if null input.
613     */
614    public static String wrap(final String str, final int wrapLength) {
615        return wrap(str, wrapLength, null, false);
616    }
617
618    /**
619     * Wraps a single line of text, identifying words by {@code ' '}.
620     *
621     * <p>Leading spaces on a new line are stripped.
622     * Trailing spaces are not stripped.</p>
623     *
624     * <table border="1">
625     *  <caption>Examples</caption>
626     *  <tr>
627     *   <th>input</th>
628     *   <th>wrapLength</th>
629     *   <th>newLineString</th>
630     *   <th>wrapLongWords</th>
631     *   <th>result</th>
632     *  </tr>
633     *  <tr>
634     *   <td>null</td>
635     *   <td>*</td>
636     *   <td>*</td>
637     *   <td>true/false</td>
638     *   <td>null</td>
639     *  </tr>
640     *  <tr>
641     *   <td>""</td>
642     *   <td>*</td>
643     *   <td>*</td>
644     *   <td>true/false</td>
645     *   <td>""</td>
646     *  </tr>
647     *  <tr>
648     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
649     *   <td>20</td>
650     *   <td>"\n"</td>
651     *   <td>true/false</td>
652     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
653     *  </tr>
654     *  <tr>
655     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
656     *   <td>20</td>
657     *   <td>"&lt;br /&gt;"</td>
658     *   <td>true/false</td>
659     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
660     *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
661     *  </tr>
662     *  <tr>
663     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
664     *   <td>20</td>
665     *   <td>null</td>
666     *   <td>true/false</td>
667     *   <td>"Here is one line of" + systemNewLine + "text that is going"
668     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
669     *  </tr>
670     *  <tr>
671     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
672     *   <td>20</td>
673     *   <td>"\n"</td>
674     *   <td>false</td>
675     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
676     *  </tr>
677     *  <tr>
678     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
679     *   <td>20</td>
680     *   <td>"\n"</td>
681     *   <td>true</td>
682     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
683     *  </tr>
684     * </table>
685     *
686     * @param str  the String to be word wrapped, may be null.
687     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1.
688     * @param newLineStr  the string to insert for a new line, {@code null} uses the system property line separator.
689     * @param wrapLongWords  true if long words (such as URLs) should be wrapped.
690     * @return a line with newlines inserted, {@code null} if null input.
691     */
692    public static String wrap(final String str,
693                              final int wrapLength,
694                              final String newLineStr,
695                              final boolean wrapLongWords) {
696        return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
697    }
698
699    /**
700     * Wraps a single line of text, identifying words by {@code wrapOn}.
701     *
702     * <p>Leading spaces on a new line are stripped.
703     * Trailing spaces are not stripped.</p>
704     *
705     * <table border="1">
706     *  <caption>Examples</caption>
707     *  <tr>
708     *   <th>input</th>
709     *   <th>wrapLength</th>
710     *   <th>newLineString</th>
711     *   <th>wrapLongWords</th>
712     *   <th>wrapOn</th>
713     *   <th>result</th>
714     *  </tr>
715     *  <tr>
716     *   <td>null</td>
717     *   <td>*</td>
718     *   <td>*</td>
719     *   <td>true/false</td>
720     *   <td>*</td>
721     *   <td>null</td>
722     *  </tr>
723     *  <tr>
724     *   <td>""</td>
725     *   <td>*</td>
726     *   <td>*</td>
727     *   <td>true/false</td>
728     *   <td>*</td>
729     *   <td>""</td>
730     *  </tr>
731     *  <tr>
732     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
733     *   <td>20</td>
734     *   <td>"\n"</td>
735     *   <td>true/false</td>
736     *   <td>" "</td>
737     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
738     *  </tr>
739     *  <tr>
740     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
741     *   <td>20</td>
742     *   <td>"&lt;br /&gt;"</td>
743     *   <td>true/false</td>
744     *   <td>" "</td>
745     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
746     *   to be wrapped after&lt;br /&gt;20 columns."</td>
747     *  </tr>
748     *  <tr>
749     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
750     *   <td>20</td>
751     *   <td>null</td>
752     *   <td>true/false</td>
753     *   <td>" "</td>
754     *   <td>"Here is one line of" + systemNewLine + "text that is going"
755     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
756     *  </tr>
757     *  <tr>
758     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
759     *   <td>20</td>
760     *   <td>"\n"</td>
761     *   <td>false</td>
762     *   <td>" "</td>
763     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
764     *  </tr>
765     *  <tr>
766     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
767     *   <td>20</td>
768     *   <td>"\n"</td>
769     *   <td>true</td>
770     *   <td>" "</td>
771     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
772     *  </tr>
773     *  <tr>
774     *   <td>"flammable/inflammable"</td>
775     *   <td>20</td>
776     *   <td>"\n"</td>
777     *   <td>true</td>
778     *   <td>"/"</td>
779     *   <td>"flammable\ninflammable"</td>
780     *  </tr>
781     * </table>
782     * @param str  the String to be word wrapped, may be null.
783     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1.
784     * @param newLineStr  the string to insert for a new line, {@code null} uses the system property line separator.
785     * @param wrapLongWords  true if long words (such as URLs) should be wrapped.
786     * @param wrapOn regex expression to be used as a breakable characters, if blank string is provided a space character will be used.
787     * @return a line with newlines inserted, {@code null} if null input.
788     */
789    public static String wrap(final String str,
790                              int wrapLength,
791                              String newLineStr,
792                              final boolean wrapLongWords,
793                              String wrapOn) {
794        if (str == null) {
795            return null;
796        }
797        if (newLineStr == null) {
798            newLineStr = System.lineSeparator();
799        }
800        if (wrapLength < 1) {
801            wrapLength = 1;
802        }
803        if (StringUtils.isBlank(wrapOn)) {
804            wrapOn = " ";
805        }
806        final Pattern patternToWrapOn = Pattern.compile(wrapOn);
807        final int inputLineLength = str.length();
808        int offset = 0;
809        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
810        int matcherSize = -1;
811
812        while (offset < inputLineLength) {
813            int spaceToWrapAt = -1;
814            Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
815                    Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
816            if (matcher.find()) {
817                if (matcher.start() == 0) {
818                    matcherSize = matcher.end();
819                    if (matcherSize != 0) {
820                        offset += matcher.end();
821                        continue;
822                    }
823                    offset += 1;
824                }
825                spaceToWrapAt = matcher.start() + offset;
826            }
827
828            // only last line without leading spaces is left
829            if (inputLineLength - offset <= wrapLength) {
830                break;
831            }
832
833            while (matcher.find()) {
834                spaceToWrapAt = matcher.start() + offset;
835            }
836
837            if (spaceToWrapAt >= offset) {
838                // normal case
839                wrappedLine.append(str, offset, spaceToWrapAt);
840                wrappedLine.append(newLineStr);
841                offset = spaceToWrapAt + 1;
842
843            } else // really long word or URL
844            if (wrapLongWords) {
845                if (matcherSize == 0) {
846                    offset--;
847                }
848                // wrap really long word one line at a time
849                wrappedLine.append(str, offset, wrapLength + offset);
850                wrappedLine.append(newLineStr);
851                offset += wrapLength;
852                matcherSize = -1;
853            } else {
854                // do not wrap really long word, just extend beyond limit
855                matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
856                if (matcher.find()) {
857                    matcherSize = matcher.end() - matcher.start();
858                    spaceToWrapAt = matcher.start() + offset + wrapLength;
859                }
860
861                if (spaceToWrapAt >= 0) {
862                    if (matcherSize == 0 && offset != 0) {
863                        offset--;
864                    }
865                    wrappedLine.append(str, offset, spaceToWrapAt);
866                    wrappedLine.append(newLineStr);
867                    offset = spaceToWrapAt + 1;
868                } else {
869                    if (matcherSize == 0 && offset != 0) {
870                        offset--;
871                    }
872                    wrappedLine.append(str, offset, str.length());
873                    offset = inputLineLength;
874                    matcherSize = -1;
875                }
876            }
877        }
878
879        if (matcherSize == 0 && offset < inputLineLength) {
880            offset--;
881        }
882
883        // Whatever is left in line is short enough to just pass through
884        wrappedLine.append(str, offset, str.length());
885
886        return wrappedLine.toString();
887    }
888
889    /**
890     * {@code WordUtils} instances should NOT be constructed in
891     * standard programming. Instead, the class should be used as
892     * {@code WordUtils.wrap("foo bar", 20);}.
893     *
894     * <p>This constructor is public to permit tools that require a JavaBean
895     * instance to operate.</p>
896     */
897    public WordUtils() {
898    }
899 }