WordUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.text;

  18. import java.util.HashSet;
  19. import java.util.Set;
  20. import java.util.function.Predicate;
  21. import java.util.regex.Matcher;
  22. import java.util.regex.Pattern;

  23. import org.apache.commons.lang3.ArrayUtils;
  24. import org.apache.commons.lang3.StringUtils;
  25. import org.apache.commons.lang3.Validate;

  26. /**
  27.  * Operations on Strings that contain words.
  28.  *
  29.  * <p>
  30.  * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
  31.  * {@code null} input. Each method documents its behavior in more detail.
  32.  * </p>
  33.  *
  34.  * @since 1.1
  35.  */
  36. public class WordUtils {

  37.     /**
  38.      * Abbreviates the words nicely.
  39.      *
  40.      * <p>
  41.      * This method searches for the first space after the lower limit and abbreviates
  42.      * the String there. It will also append any String passed as a parameter
  43.      * to the end of the String. The upper limit can be specified to forcibly
  44.      * abbreviate a String.
  45.      * </p>
  46.      *
  47.      * @param str         the string to be abbreviated. If null is passed, null is returned.
  48.      *                    If the empty String is passed, the empty string is returned.
  49.      * @param lower       the lower limit; negative value is treated as zero.
  50.      * @param upper       the upper limit; specify -1 if no limit is desired.
  51.      *                    The upper limit cannot be lower than the lower limit.
  52.      * @param appendToEnd String to be appended to the end of the abbreviated string.
  53.      *                    This is appended ONLY if the string was indeed abbreviated.
  54.      *                    The append does not count towards the lower or upper limits.
  55.      * @return The abbreviated String.
  56.      *
  57.      * <pre>
  58.      * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
  59.      * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
  60.      * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
  61.      * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
  62.      * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
  63.      * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
  64.      * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
  65.      * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
  66.      * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
  67.      * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
  68.      * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
  69.      * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
  70.      * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
  71.      * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
  72.      * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
  73.      * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
  74.      * </pre>
  75.      */
  76.     public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
  77.         Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
  78.         Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
  79.         if (StringUtils.isEmpty(str)) {
  80.             return str;
  81.         }

  82.         // if the lower value is greater than the length of the string,
  83.         // set to the length of the string
  84.         if (lower > str.length()) {
  85.             lower = str.length();
  86.         }

  87.         // if the upper value is -1 (i.e. no limit) or is greater
  88.         // than the length of the string, set to the length of the string
  89.         if (upper == -1 || upper > str.length()) {
  90.             upper = str.length();
  91.         }

  92.         final StringBuilder result = new StringBuilder();
  93.         final int index = StringUtils.indexOf(str, " ", lower);
  94.         if (index == -1) {
  95.             result.append(str, 0, upper);
  96.             // only if abbreviation has occurred do we append the appendToEnd value
  97.             if (upper != str.length()) {
  98.                 result.append(StringUtils.defaultString(appendToEnd));
  99.             }
  100.         } else {
  101.             result.append(str, 0, Math.min(index, upper));
  102.             result.append(StringUtils.defaultString(appendToEnd));
  103.         }

  104.         return result.toString();
  105.     }

  106.     /**
  107.      * Capitalizes all the whitespace separated words in a String.
  108.      * Only the first character of each word is changed. To convert the
  109.      * rest of each word to lowercase at the same time,
  110.      * use {@link #capitalizeFully(String)}.
  111.      *
  112.      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
  113.      * A {@code null} input String returns {@code null}.
  114.      * Capitalization uses the Unicode title case, normally equivalent to
  115.      * upper case.</p>
  116.      *
  117.      * <pre>
  118.      * WordUtils.capitalize(null)        = null
  119.      * WordUtils.capitalize("")          = ""
  120.      * WordUtils.capitalize("i am FINE") = "I Am FINE"
  121.      * </pre>
  122.      *
  123.      * @param str  the String to capitalize, may be null
  124.      * @return capitalized String, {@code null} if null String input
  125.      * @see #uncapitalize(String)
  126.      * @see #capitalizeFully(String)
  127.      */
  128.     public static String capitalize(final String str) {
  129.         return capitalize(str, null);
  130.     }

  131.     /**
  132.      * Capitalizes all the delimiter separated words in a String.
  133.      * Only the first character of each word is changed. To convert the
  134.      * rest of each word to lowercase at the same time,
  135.      * use {@link #capitalizeFully(String, char[])}.
  136.      *
  137.      * <p>The delimiters represent a set of characters understood to separate words.
  138.      * The first string character and the first non-delimiter character after a
  139.      * delimiter will be capitalized.</p>
  140.      *
  141.      * <p>A {@code null} input String returns {@code null}.
  142.      * Capitalization uses the Unicode title case, normally equivalent to
  143.      * upper case.</p>
  144.      *
  145.      * <pre>
  146.      * WordUtils.capitalize(null, *)            = null
  147.      * WordUtils.capitalize("", *)              = ""
  148.      * WordUtils.capitalize(*, new char[0])     = *
  149.      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
  150.      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
  151.      * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
  152.      * </pre>
  153.      *
  154.      * @param str  the String to capitalize, may be null
  155.      * @param delimiters  set of characters to determine capitalization, null means whitespace
  156.      * @return capitalized String, {@code null} if null String input
  157.      * @see #uncapitalize(String)
  158.      * @see #capitalizeFully(String)
  159.      */
  160.     public static String capitalize(final String str, final char... delimiters) {
  161.         if (StringUtils.isEmpty(str)) {
  162.             return str;
  163.         }
  164.         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
  165.         final int strLen = str.length();
  166.         final int[] newCodePoints = new int[strLen];
  167.         int outOffset = 0;

  168.         boolean capitalizeNext = true;
  169.         for (int index = 0; index < strLen;) {
  170.             final int codePoint = str.codePointAt(index);

  171.             if (isDelimiter.test(codePoint)) {
  172.                 capitalizeNext = true;
  173.                 newCodePoints[outOffset++] = codePoint;
  174.                 index += Character.charCount(codePoint);
  175.             } else if (capitalizeNext) {
  176.                 final int titleCaseCodePoint = Character.toTitleCase(codePoint);
  177.                 newCodePoints[outOffset++] = titleCaseCodePoint;
  178.                 index += Character.charCount(titleCaseCodePoint);
  179.                 capitalizeNext = false;
  180.             } else {
  181.                 newCodePoints[outOffset++] = codePoint;
  182.                 index += Character.charCount(codePoint);
  183.             }
  184.         }
  185.         return new String(newCodePoints, 0, outOffset);
  186.     }

  187.     /**
  188.      * Converts all the whitespace separated words in a String into capitalized words,
  189.      * that is each word is made up of a titlecase character and then a series of
  190.      * lowercase characters.
  191.      *
  192.      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
  193.      * A {@code null} input String returns {@code null}.
  194.      * Capitalization uses the Unicode title case, normally equivalent to
  195.      * upper case.</p>
  196.      *
  197.      * <pre>
  198.      * WordUtils.capitalizeFully(null)        = null
  199.      * WordUtils.capitalizeFully("")          = ""
  200.      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
  201.      * </pre>
  202.      *
  203.      * @param str  the String to capitalize, may be null
  204.      * @return capitalized String, {@code null} if null String input
  205.      */
  206.     public static String capitalizeFully(final String str) {
  207.         return capitalizeFully(str, null);
  208.     }

  209.     /**
  210.      * Converts all the delimiter separated words in a String into capitalized words,
  211.      * that is each word is made up of a titlecase character and then a series of
  212.      * lowercase characters.
  213.      *
  214.      * <p>The delimiters represent a set of characters understood to separate words.
  215.      * The first string character and the first non-delimiter character after a
  216.      * delimiter will be capitalized.</p>
  217.      *
  218.      * <p>A {@code null} input String returns {@code null}.
  219.      * Capitalization uses the Unicode title case, normally equivalent to
  220.      * upper case.</p>
  221.      *
  222.      * <pre>
  223.      * WordUtils.capitalizeFully(null, *)            = null
  224.      * WordUtils.capitalizeFully("", *)              = ""
  225.      * WordUtils.capitalizeFully(*, null)            = *
  226.      * WordUtils.capitalizeFully(*, new char[0])     = *
  227.      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
  228.      * </pre>
  229.      *
  230.      * @param str  the String to capitalize, may be null
  231.      * @param delimiters  set of characters to determine capitalization, null means whitespace
  232.      * @return capitalized String, {@code null} if null String input
  233.      */
  234.     public static String capitalizeFully(String str, final char... delimiters) {
  235.         if (StringUtils.isEmpty(str)) {
  236.             return str;
  237.         }
  238.         str = str.toLowerCase();
  239.         return capitalize(str, delimiters);
  240.     }

  241.     /**
  242.      * Checks if the String contains all words in the given array.
  243.      *
  244.      * <p>
  245.      * A {@code null} String will return {@code false}. A {@code null}, zero
  246.      * length search array or if one element of array is null will return {@code false}.
  247.      * </p>
  248.      *
  249.      * <pre>
  250.      * WordUtils.containsAllWords(null, *)            = false
  251.      * WordUtils.containsAllWords("", *)              = false
  252.      * WordUtils.containsAllWords(*, null)            = false
  253.      * WordUtils.containsAllWords(*, [])              = false
  254.      * WordUtils.containsAllWords("abcd", "ab", "cd") = false
  255.      * WordUtils.containsAllWords("abc def", "def", "abc") = true
  256.      * </pre>
  257.      *
  258.      * @param word The CharSequence to check, may be null
  259.      * @param words The array of String words to search for, may be null
  260.      * @return {@code true} if all search words are found, {@code false} otherwise
  261.      */
  262.     public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
  263.         if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
  264.             return false;
  265.         }
  266.         for (final CharSequence w : words) {
  267.             if (StringUtils.isBlank(w)) {
  268.                 return false;
  269.             }
  270.             final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
  271.             if (!p.matcher(word).matches()) {
  272.                 return false;
  273.             }
  274.         }
  275.         return true;
  276.     }

  277.     /**
  278.      * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
  279.      * The function provides O(1) lookup time.
  280.      * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
  281.      *
  282.      * @param delimiters set of characters to determine delimiters, null means whitespace
  283.      * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
  284.      */
  285.     private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
  286.         final Predicate<Integer> isDelimiter;
  287.         if (delimiters == null || delimiters.length == 0) {
  288.             isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
  289.         } else {
  290.             final Set<Integer> delimiterSet = new HashSet<>();
  291.             for (int index = 0; index < delimiters.length; index++) {
  292.                 delimiterSet.add(Character.codePointAt(delimiters, index));
  293.             }
  294.             isDelimiter = delimiterSet::contains;
  295.         }

  296.         return isDelimiter;
  297.     }

  298.     /**
  299.      * Extracts the initial characters from each word in the String.
  300.      *
  301.      * <p>All first characters after whitespace are returned as a new string.
  302.      * Their case is not changed.</p>
  303.      *
  304.      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
  305.      * A {@code null} input String returns {@code null}.</p>
  306.      *
  307.      * <pre>
  308.      * WordUtils.initials(null)             = null
  309.      * WordUtils.initials("")               = ""
  310.      * WordUtils.initials("Ben John Lee")   = "BJL"
  311.      * WordUtils.initials("Ben J.Lee")      = "BJ"
  312.      * </pre>
  313.      *
  314.      * @param str  the String to get initials from, may be null
  315.      * @return String of initial letters, {@code null} if null String input
  316.      * @see #initials(String,char[])
  317.      */
  318.     public static String initials(final String str) {
  319.         return initials(str, null);
  320.     }

  321.     /**
  322.      * Extracts the initial characters from each word in the String.
  323.      *
  324.      * <p>All first characters after the defined delimiters are returned as a new string.
  325.      * Their case is not changed.</p>
  326.      *
  327.      * <p>If the delimiters array is null, then Whitespace is used.
  328.      * Whitespace is defined by {@link Character#isWhitespace(char)}.
  329.      * A {@code null} input String returns {@code null}.
  330.      * An empty delimiter array returns an empty String.</p>
  331.      *
  332.      * <pre>
  333.      * WordUtils.initials(null, *)                = null
  334.      * WordUtils.initials("", *)                  = ""
  335.      * WordUtils.initials("Ben John Lee", null)   = "BJL"
  336.      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
  337.      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
  338.      * WordUtils.initials(*, new char[0])         = ""
  339.      * </pre>
  340.      *
  341.      * @param str  the String to get initials from, may be null
  342.      * @param delimiters  set of characters to determine words, null means whitespace
  343.      * @return String of initial characters, {@code null} if null String input
  344.      * @see #initials(String)
  345.      */
  346.     public static String initials(final String str, final char... delimiters) {
  347.         if (StringUtils.isEmpty(str)) {
  348.             return str;
  349.         }
  350.         if (delimiters != null && delimiters.length == 0) {
  351.             return StringUtils.EMPTY;
  352.         }
  353.         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
  354.         final int strLen = str.length();
  355.         final int[] newCodePoints = new int[strLen / 2 + 1];
  356.         int count = 0;
  357.         boolean lastWasGap = true;
  358.         for (int i = 0; i < strLen;) {
  359.             final int codePoint = str.codePointAt(i);

  360.             if (isDelimiter.test(codePoint)) {
  361.                 lastWasGap = true;
  362.             } else if (lastWasGap) {
  363.                 newCodePoints[count++] = codePoint;
  364.                 lastWasGap = false;
  365.             }

  366.             i += Character.charCount(codePoint);
  367.         }
  368.         return new String(newCodePoints, 0, count);
  369.     }

  370.     /**
  371.      * Is the character a delimiter.
  372.      *
  373.      * @param ch the character to check
  374.      * @param delimiters the delimiters
  375.      * @return true if it is a delimiter
  376.      * @deprecated as of 1.2 and will be removed in 2.0
  377.      */
  378.     @Deprecated
  379.     public static boolean isDelimiter(final char ch, final char[] delimiters) {
  380.         if (delimiters == null) {
  381.             return Character.isWhitespace(ch);
  382.         }
  383.         for (final char delimiter : delimiters) {
  384.             if (ch == delimiter) {
  385.                 return true;
  386.             }
  387.         }
  388.         return false;
  389.     }

  390.     /**
  391.      * Is the codePoint a delimiter.
  392.      *
  393.      * @param codePoint the codePint to check
  394.      * @param delimiters the delimiters
  395.      * @return true if it is a delimiter
  396.      * @deprecated as of 1.2 and will be removed in 2.0
  397.      */
  398.     @Deprecated
  399.     public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
  400.         if (delimiters == null) {
  401.             return Character.isWhitespace(codePoint);
  402.         }
  403.         for (int index = 0; index < delimiters.length; index++) {
  404.             final int delimiterCodePoint = Character.codePointAt(delimiters, index);
  405.             if (delimiterCodePoint == codePoint) {
  406.                 return true;
  407.             }
  408.         }
  409.         return false;
  410.     }

  411.     /**
  412.      * Swaps the case of a String using a word based algorithm.
  413.      *
  414.      * <ul>
  415.      *  <li>Upper case character converts to Lower case</li>
  416.      *  <li>Title case character converts to Lower case</li>
  417.      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
  418.      *  <li>Other Lower case character converts to Upper case</li>
  419.      * </ul>
  420.      *
  421.      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
  422.      * A {@code null} input String returns {@code null}.</p>
  423.      *
  424.      * <pre>
  425.      * StringUtils.swapCase(null)                 = null
  426.      * StringUtils.swapCase("")                   = ""
  427.      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
  428.      * </pre>
  429.      *
  430.      * @param str  the String to swap case, may be null
  431.      * @return The changed String, {@code null} if null String input
  432.      */
  433.     public static String swapCase(final String str) {
  434.         if (StringUtils.isEmpty(str)) {
  435.             return str;
  436.         }
  437.         final int strLen = str.length();
  438.         final int[] newCodePoints = new int[strLen];
  439.         int outOffset = 0;
  440.         boolean whitespace = true;
  441.         for (int index = 0; index < strLen;) {
  442.             final int oldCodepoint = str.codePointAt(index);
  443.             final int newCodePoint;
  444.             if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
  445.                 newCodePoint = Character.toLowerCase(oldCodepoint);
  446.                 whitespace = false;
  447.             } else if (Character.isLowerCase(oldCodepoint)) {
  448.                 if (whitespace) {
  449.                     newCodePoint = Character.toTitleCase(oldCodepoint);
  450.                     whitespace = false;
  451.                 } else {
  452.                     newCodePoint = Character.toUpperCase(oldCodepoint);
  453.                 }
  454.             } else {
  455.                 whitespace = Character.isWhitespace(oldCodepoint);
  456.                 newCodePoint = oldCodepoint;
  457.             }
  458.             newCodePoints[outOffset++] = newCodePoint;
  459.             index += Character.charCount(newCodePoint);
  460.         }
  461.         return new String(newCodePoints, 0, outOffset);
  462.     }

  463.     /**
  464.      * Uncapitalizes all the whitespace separated words in a String.
  465.      * Only the first character of each word is changed.
  466.      *
  467.      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
  468.      * A {@code null} input String returns {@code null}.</p>
  469.      *
  470.      * <pre>
  471.      * WordUtils.uncapitalize(null)        = null
  472.      * WordUtils.uncapitalize("")          = ""
  473.      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
  474.      * </pre>
  475.      *
  476.      * @param str  the String to uncapitalize, may be null
  477.      * @return uncapitalized String, {@code null} if null String input
  478.      * @see #capitalize(String)
  479.      */
  480.     public static String uncapitalize(final String str) {
  481.         return uncapitalize(str, null);
  482.     }

  483.     /**
  484.      * Uncapitalizes all the whitespace separated words in a String.
  485.      * Only the first character of each word is changed.
  486.      *
  487.      * <p>The delimiters represent a set of characters understood to separate words.
  488.      * The first string character and the first non-delimiter character after a
  489.      * delimiter will be uncapitalized.</p>
  490.      *
  491.      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
  492.      * A {@code null} input String returns {@code null}.</p>
  493.      *
  494.      * <pre>
  495.      * WordUtils.uncapitalize(null, *)            = null
  496.      * WordUtils.uncapitalize("", *)              = ""
  497.      * WordUtils.uncapitalize(*, null)            = *
  498.      * WordUtils.uncapitalize(*, new char[0])     = *
  499.      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
  500.      * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
  501.      * </pre>
  502.      *
  503.      * @param str  the String to uncapitalize, may be null
  504.      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
  505.      * @return uncapitalized String, {@code null} if null String input
  506.      * @see #capitalize(String)
  507.      */
  508.     public static String uncapitalize(final String str, final char... delimiters) {
  509.         if (StringUtils.isEmpty(str)) {
  510.             return str;
  511.         }
  512.         final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
  513.         final int strLen = str.length();
  514.         final int[] newCodePoints = new int[strLen];
  515.         int outOffset = 0;

  516.         boolean uncapitalizeNext = true;
  517.         for (int index = 0; index < strLen;) {
  518.             final int codePoint = str.codePointAt(index);

  519.             if (isDelimiter.test(codePoint)) {
  520.                 uncapitalizeNext = true;
  521.                 newCodePoints[outOffset++] = codePoint;
  522.                 index += Character.charCount(codePoint);
  523.             } else if (uncapitalizeNext) {
  524.                 final int titleCaseCodePoint = Character.toLowerCase(codePoint);
  525.                 newCodePoints[outOffset++] = titleCaseCodePoint;
  526.                 index += Character.charCount(titleCaseCodePoint);
  527.                 uncapitalizeNext = false;
  528.             } else {
  529.                 newCodePoints[outOffset++] = codePoint;
  530.                 index += Character.charCount(codePoint);
  531.             }
  532.         }
  533.         return new String(newCodePoints, 0, outOffset);
  534.     }

  535.     /**
  536.      * Wraps a single line of text, identifying words by {@code ' '}.
  537.      *
  538.      * <p>New lines will be separated by the system property line separator.
  539.      * Very long words, such as URLs will <em>not</em> be wrapped.</p>
  540.      *
  541.      * <p>Leading spaces on a new line are stripped.
  542.      * Trailing spaces are not stripped.</p>
  543.      *
  544.      * <table border="1">
  545.      *  <caption>Examples</caption>
  546.      *  <tr>
  547.      *   <th>input</th>
  548.      *   <th>wrapLength</th>
  549.      *   <th>result</th>
  550.      *  </tr>
  551.      *  <tr>
  552.      *   <td>null</td>
  553.      *   <td>*</td>
  554.      *   <td>null</td>
  555.      *  </tr>
  556.      *  <tr>
  557.      *   <td>""</td>
  558.      *   <td>*</td>
  559.      *   <td>""</td>
  560.      *  </tr>
  561.      *  <tr>
  562.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  563.      *   <td>20</td>
  564.      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
  565.      *  </tr>
  566.      *  <tr>
  567.      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
  568.      *   <td>20</td>
  569.      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
  570.      *  </tr>
  571.      *  <tr>
  572.      *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
  573.      *   <td>20</td>
  574.      *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
  575.      *  </tr>
  576.      * </table>
  577.      *
  578.      * (assuming that '\n' is the systems line separator)
  579.      *
  580.      * @param str  the String to be word wrapped, may be null
  581.      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
  582.      * @return a line with newlines inserted, {@code null} if null input
  583.      */
  584.     public static String wrap(final String str, final int wrapLength) {
  585.         return wrap(str, wrapLength, null, false);
  586.     }

  587.     /**
  588.      * Wraps a single line of text, identifying words by {@code ' '}.
  589.      *
  590.      * <p>Leading spaces on a new line are stripped.
  591.      * Trailing spaces are not stripped.</p>
  592.      *
  593.      * <table border="1">
  594.      *  <caption>Examples</caption>
  595.      *  <tr>
  596.      *   <th>input</th>
  597.      *   <th>wrapLength</th>
  598.      *   <th>newLineString</th>
  599.      *   <th>wrapLongWords</th>
  600.      *   <th>result</th>
  601.      *  </tr>
  602.      *  <tr>
  603.      *   <td>null</td>
  604.      *   <td>*</td>
  605.      *   <td>*</td>
  606.      *   <td>true/false</td>
  607.      *   <td>null</td>
  608.      *  </tr>
  609.      *  <tr>
  610.      *   <td>""</td>
  611.      *   <td>*</td>
  612.      *   <td>*</td>
  613.      *   <td>true/false</td>
  614.      *   <td>""</td>
  615.      *  </tr>
  616.      *  <tr>
  617.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  618.      *   <td>20</td>
  619.      *   <td>"\n"</td>
  620.      *   <td>true/false</td>
  621.      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
  622.      *  </tr>
  623.      *  <tr>
  624.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  625.      *   <td>20</td>
  626.      *   <td>"&lt;br /&gt;"</td>
  627.      *   <td>true/false</td>
  628.      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
  629.      *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
  630.      *  </tr>
  631.      *  <tr>
  632.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  633.      *   <td>20</td>
  634.      *   <td>null</td>
  635.      *   <td>true/false</td>
  636.      *   <td>"Here is one line of" + systemNewLine + "text that is going"
  637.      *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
  638.      *  </tr>
  639.      *  <tr>
  640.      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
  641.      *   <td>20</td>
  642.      *   <td>"\n"</td>
  643.      *   <td>false</td>
  644.      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
  645.      *  </tr>
  646.      *  <tr>
  647.      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
  648.      *   <td>20</td>
  649.      *   <td>"\n"</td>
  650.      *   <td>true</td>
  651.      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
  652.      *  </tr>
  653.      * </table>
  654.      *
  655.      * @param str  the String to be word wrapped, may be null
  656.      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
  657.      * @param newLineStr  the string to insert for a new line,
  658.      *  {@code null} uses the system property line separator
  659.      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
  660.      * @return a line with newlines inserted, {@code null} if null input
  661.      */
  662.     public static String wrap(final String str,
  663.                               final int wrapLength,
  664.                               final String newLineStr,
  665.                               final boolean wrapLongWords) {
  666.         return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
  667.     }

  668.     /**
  669.      * Wraps a single line of text, identifying words by {@code wrapOn}.
  670.      *
  671.      * <p>Leading spaces on a new line are stripped.
  672.      * Trailing spaces are not stripped.</p>
  673.      *
  674.      * <table border="1">
  675.      *  <caption>Examples</caption>
  676.      *  <tr>
  677.      *   <th>input</th>
  678.      *   <th>wrapLength</th>
  679.      *   <th>newLineString</th>
  680.      *   <th>wrapLongWords</th>
  681.      *   <th>wrapOn</th>
  682.      *   <th>result</th>
  683.      *  </tr>
  684.      *  <tr>
  685.      *   <td>null</td>
  686.      *   <td>*</td>
  687.      *   <td>*</td>
  688.      *   <td>true/false</td>
  689.      *   <td>*</td>
  690.      *   <td>null</td>
  691.      *  </tr>
  692.      *  <tr>
  693.      *   <td>""</td>
  694.      *   <td>*</td>
  695.      *   <td>*</td>
  696.      *   <td>true/false</td>
  697.      *   <td>*</td>
  698.      *   <td>""</td>
  699.      *  </tr>
  700.      *  <tr>
  701.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  702.      *   <td>20</td>
  703.      *   <td>"\n"</td>
  704.      *   <td>true/false</td>
  705.      *   <td>" "</td>
  706.      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
  707.      *  </tr>
  708.      *  <tr>
  709.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  710.      *   <td>20</td>
  711.      *   <td>"&lt;br /&gt;"</td>
  712.      *   <td>true/false</td>
  713.      *   <td>" "</td>
  714.      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
  715.      *   to be wrapped after&lt;br /&gt;20 columns."</td>
  716.      *  </tr>
  717.      *  <tr>
  718.      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
  719.      *   <td>20</td>
  720.      *   <td>null</td>
  721.      *   <td>true/false</td>
  722.      *   <td>" "</td>
  723.      *   <td>"Here is one line of" + systemNewLine + "text that is going"
  724.      *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
  725.      *  </tr>
  726.      *  <tr>
  727.      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
  728.      *   <td>20</td>
  729.      *   <td>"\n"</td>
  730.      *   <td>false</td>
  731.      *   <td>" "</td>
  732.      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
  733.      *  </tr>
  734.      *  <tr>
  735.      *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
  736.      *   <td>20</td>
  737.      *   <td>"\n"</td>
  738.      *   <td>true</td>
  739.      *   <td>" "</td>
  740.      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
  741.      *  </tr>
  742.      *  <tr>
  743.      *   <td>"flammable/inflammable"</td>
  744.      *   <td>20</td>
  745.      *   <td>"\n"</td>
  746.      *   <td>true</td>
  747.      *   <td>"/"</td>
  748.      *   <td>"flammable\ninflammable"</td>
  749.      *  </tr>
  750.      * </table>
  751.      * @param str  the String to be word wrapped, may be null
  752.      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
  753.      * @param newLineStr  the string to insert for a new line,
  754.      *  {@code null} uses the system property line separator
  755.      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
  756.      * @param wrapOn regex expression to be used as a breakable characters,
  757.      *               if blank string is provided a space character will be used
  758.      * @return a line with newlines inserted, {@code null} if null input
  759.      */
  760.     public static String wrap(final String str,
  761.                               int wrapLength,
  762.                               String newLineStr,
  763.                               final boolean wrapLongWords,
  764.                               String wrapOn) {
  765.         if (str == null) {
  766.             return null;
  767.         }
  768.         if (newLineStr == null) {
  769.             newLineStr = System.lineSeparator();
  770.         }
  771.         if (wrapLength < 1) {
  772.             wrapLength = 1;
  773.         }
  774.         if (StringUtils.isBlank(wrapOn)) {
  775.             wrapOn = " ";
  776.         }
  777.         final Pattern patternToWrapOn = Pattern.compile(wrapOn);
  778.         final int inputLineLength = str.length();
  779.         int offset = 0;
  780.         final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
  781.         int matcherSize = -1;

  782.         while (offset < inputLineLength) {
  783.             int spaceToWrapAt = -1;
  784.             Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
  785.                     Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
  786.             if (matcher.find()) {
  787.                 if (matcher.start() == 0) {
  788.                     matcherSize = matcher.end();
  789.                     if (matcherSize != 0) {
  790.                         offset += matcher.end();
  791.                         continue;
  792.                     }
  793.                     offset += 1;
  794.                 }
  795.                 spaceToWrapAt = matcher.start() + offset;
  796.             }

  797.             // only last line without leading spaces is left
  798.             if (inputLineLength - offset <= wrapLength) {
  799.                 break;
  800.             }

  801.             while (matcher.find()) {
  802.                 spaceToWrapAt = matcher.start() + offset;
  803.             }

  804.             if (spaceToWrapAt >= offset) {
  805.                 // normal case
  806.                 wrappedLine.append(str, offset, spaceToWrapAt);
  807.                 wrappedLine.append(newLineStr);
  808.                 offset = spaceToWrapAt + 1;

  809.             } else // really long word or URL
  810.             if (wrapLongWords) {
  811.                 if (matcherSize == 0) {
  812.                     offset--;
  813.                 }
  814.                 // wrap really long word one line at a time
  815.                 wrappedLine.append(str, offset, wrapLength + offset);
  816.                 wrappedLine.append(newLineStr);
  817.                 offset += wrapLength;
  818.                 matcherSize = -1;
  819.             } else {
  820.                 // do not wrap really long word, just extend beyond limit
  821.                 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
  822.                 if (matcher.find()) {
  823.                     matcherSize = matcher.end() - matcher.start();
  824.                     spaceToWrapAt = matcher.start() + offset + wrapLength;
  825.                 }

  826.                 if (spaceToWrapAt >= 0) {
  827.                     if (matcherSize == 0 && offset != 0) {
  828.                         offset--;
  829.                     }
  830.                     wrappedLine.append(str, offset, spaceToWrapAt);
  831.                     wrappedLine.append(newLineStr);
  832.                     offset = spaceToWrapAt + 1;
  833.                 } else {
  834.                     if (matcherSize == 0 && offset != 0) {
  835.                         offset--;
  836.                     }
  837.                     wrappedLine.append(str, offset, str.length());
  838.                     offset = inputLineLength;
  839.                     matcherSize = -1;
  840.                 }
  841.             }
  842.         }

  843.         if (matcherSize == 0 && offset < inputLineLength) {
  844.             offset--;
  845.         }

  846.         // Whatever is left in line is short enough to just pass through
  847.         wrappedLine.append(str, offset, str.length());

  848.         return wrappedLine.toString();
  849.     }

  850.     /**
  851.      * {@code WordUtils} instances should NOT be constructed in
  852.      * standard programming. Instead, the class should be used as
  853.      * {@code WordUtils.wrap("foo bar", 20);}.
  854.      *
  855.      * <p>This constructor is public to permit tools that require a JavaBean
  856.      * instance to operate.</p>
  857.      */
  858.     public WordUtils() {
  859.     }
  860.  }