WordUtils.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.text;
- import java.util.HashSet;
- import java.util.Set;
- import java.util.function.Predicate;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import org.apache.commons.lang3.ArrayUtils;
- import org.apache.commons.lang3.StringUtils;
- import org.apache.commons.lang3.Validate;
- /**
- * Operations on Strings that contain words.
- *
- * <p>
- * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
- * {@code null} input. Each method documents its behavior in more detail.
- * </p>
- *
- * @since 1.1
- */
- public class WordUtils {
- /**
- * Abbreviates the words nicely.
- *
- * <p>
- * This method searches for the first space after the lower limit and abbreviates
- * the String there. It will also append any String passed as a parameter
- * to the end of the String. The upper limit can be specified to forcibly
- * abbreviate a String.
- * </p>
- *
- * @param str the string to be abbreviated. If null is passed, null is returned.
- * If the empty String is passed, the empty string is returned.
- * @param lower the lower limit; negative value is treated as zero.
- * @param upper the upper limit; specify -1 if no limit is desired.
- * The upper limit cannot be lower than the lower limit.
- * @param appendToEnd String to be appended to the end of the abbreviated string.
- * This is appended ONLY if the string was indeed abbreviated.
- * The append does not count towards the lower or upper limits.
- * @return The abbreviated String.
- *
- * <pre>
- * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now"
- * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the"
- * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all"
- * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now"
- * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the"
- * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all"
- * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..."
- * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..."
- * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..."
- * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now"
- * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the"
- * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all"
- * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men"
- * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men"
- * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException
- * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException
- * </pre>
- */
- public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
- Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
- Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
- if (StringUtils.isEmpty(str)) {
- return str;
- }
- // if the lower value is greater than the length of the string,
- // set to the length of the string
- if (lower > str.length()) {
- lower = str.length();
- }
- // if the upper value is -1 (i.e. no limit) or is greater
- // than the length of the string, set to the length of the string
- if (upper == -1 || upper > str.length()) {
- upper = str.length();
- }
- final StringBuilder result = new StringBuilder();
- final int index = StringUtils.indexOf(str, " ", lower);
- if (index == -1) {
- result.append(str, 0, upper);
- // only if abbreviation has occurred do we append the appendToEnd value
- if (upper != str.length()) {
- result.append(StringUtils.defaultString(appendToEnd));
- }
- } else {
- result.append(str, 0, Math.min(index, upper));
- result.append(StringUtils.defaultString(appendToEnd));
- }
- return result.toString();
- }
- /**
- * Capitalizes all the whitespace separated words in a String.
- * Only the first character of each word is changed. To convert the
- * rest of each word to lowercase at the same time,
- * use {@link #capitalizeFully(String)}.
- *
- * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.
- * Capitalization uses the Unicode title case, normally equivalent to
- * upper case.</p>
- *
- * <pre>
- * WordUtils.capitalize(null) = null
- * WordUtils.capitalize("") = ""
- * WordUtils.capitalize("i am FINE") = "I Am FINE"
- * </pre>
- *
- * @param str the String to capitalize, may be null
- * @return capitalized String, {@code null} if null String input
- * @see #uncapitalize(String)
- * @see #capitalizeFully(String)
- */
- public static String capitalize(final String str) {
- return capitalize(str, null);
- }
- /**
- * Capitalizes all the delimiter separated words in a String.
- * Only the first character of each word is changed. To convert the
- * rest of each word to lowercase at the same time,
- * use {@link #capitalizeFully(String, char[])}.
- *
- * <p>The delimiters represent a set of characters understood to separate words.
- * The first string character and the first non-delimiter character after a
- * delimiter will be capitalized.</p>
- *
- * <p>A {@code null} input String returns {@code null}.
- * Capitalization uses the Unicode title case, normally equivalent to
- * upper case.</p>
- *
- * <pre>
- * WordUtils.capitalize(null, *) = null
- * WordUtils.capitalize("", *) = ""
- * WordUtils.capitalize(*, new char[0]) = *
- * WordUtils.capitalize("i am fine", null) = "I Am Fine"
- * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
- * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
- * </pre>
- *
- * @param str the String to capitalize, may be null
- * @param delimiters set of characters to determine capitalization, null means whitespace
- * @return capitalized String, {@code null} if null String input
- * @see #uncapitalize(String)
- * @see #capitalizeFully(String)
- */
- public static String capitalize(final String str, final char... delimiters) {
- if (StringUtils.isEmpty(str)) {
- return str;
- }
- final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
- final int strLen = str.length();
- final int[] newCodePoints = new int[strLen];
- int outOffset = 0;
- boolean capitalizeNext = true;
- for (int index = 0; index < strLen;) {
- final int codePoint = str.codePointAt(index);
- if (isDelimiter.test(codePoint)) {
- capitalizeNext = true;
- newCodePoints[outOffset++] = codePoint;
- index += Character.charCount(codePoint);
- } else if (capitalizeNext) {
- final int titleCaseCodePoint = Character.toTitleCase(codePoint);
- newCodePoints[outOffset++] = titleCaseCodePoint;
- index += Character.charCount(titleCaseCodePoint);
- capitalizeNext = false;
- } else {
- newCodePoints[outOffset++] = codePoint;
- index += Character.charCount(codePoint);
- }
- }
- return new String(newCodePoints, 0, outOffset);
- }
- /**
- * Converts all the whitespace separated words in a String into capitalized words,
- * that is each word is made up of a titlecase character and then a series of
- * lowercase characters.
- *
- * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.
- * Capitalization uses the Unicode title case, normally equivalent to
- * upper case.</p>
- *
- * <pre>
- * WordUtils.capitalizeFully(null) = null
- * WordUtils.capitalizeFully("") = ""
- * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
- * </pre>
- *
- * @param str the String to capitalize, may be null
- * @return capitalized String, {@code null} if null String input
- */
- public static String capitalizeFully(final String str) {
- return capitalizeFully(str, null);
- }
- /**
- * Converts all the delimiter separated words in a String into capitalized words,
- * that is each word is made up of a titlecase character and then a series of
- * lowercase characters.
- *
- * <p>The delimiters represent a set of characters understood to separate words.
- * The first string character and the first non-delimiter character after a
- * delimiter will be capitalized.</p>
- *
- * <p>A {@code null} input String returns {@code null}.
- * Capitalization uses the Unicode title case, normally equivalent to
- * upper case.</p>
- *
- * <pre>
- * WordUtils.capitalizeFully(null, *) = null
- * WordUtils.capitalizeFully("", *) = ""
- * WordUtils.capitalizeFully(*, null) = *
- * WordUtils.capitalizeFully(*, new char[0]) = *
- * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
- * </pre>
- *
- * @param str the String to capitalize, may be null
- * @param delimiters set of characters to determine capitalization, null means whitespace
- * @return capitalized String, {@code null} if null String input
- */
- public static String capitalizeFully(String str, final char... delimiters) {
- if (StringUtils.isEmpty(str)) {
- return str;
- }
- str = str.toLowerCase();
- return capitalize(str, delimiters);
- }
- /**
- * Checks if the String contains all words in the given array.
- *
- * <p>
- * A {@code null} String will return {@code false}. A {@code null}, zero
- * length search array or if one element of array is null will return {@code false}.
- * </p>
- *
- * <pre>
- * WordUtils.containsAllWords(null, *) = false
- * WordUtils.containsAllWords("", *) = false
- * WordUtils.containsAllWords(*, null) = false
- * WordUtils.containsAllWords(*, []) = false
- * WordUtils.containsAllWords("abcd", "ab", "cd") = false
- * WordUtils.containsAllWords("abc def", "def", "abc") = true
- * </pre>
- *
- * @param word The CharSequence to check, may be null
- * @param words The array of String words to search for, may be null
- * @return {@code true} if all search words are found, {@code false} otherwise
- */
- public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
- if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
- return false;
- }
- for (final CharSequence w : words) {
- if (StringUtils.isBlank(w)) {
- return false;
- }
- final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*");
- if (!p.matcher(word).matches()) {
- return false;
- }
- }
- return true;
- }
- /**
- * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter.
- * The function provides O(1) lookup time.
- * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null.
- *
- * @param delimiters set of characters to determine delimiters, null means whitespace
- * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter.
- */
- private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) {
- final Predicate<Integer> isDelimiter;
- if (delimiters == null || delimiters.length == 0) {
- isDelimiter = delimiters == null ? Character::isWhitespace : c -> false;
- } else {
- final Set<Integer> delimiterSet = new HashSet<>();
- for (int index = 0; index < delimiters.length; index++) {
- delimiterSet.add(Character.codePointAt(delimiters, index));
- }
- isDelimiter = delimiterSet::contains;
- }
- return isDelimiter;
- }
- /**
- * Extracts the initial characters from each word in the String.
- *
- * <p>All first characters after whitespace are returned as a new string.
- * Their case is not changed.</p>
- *
- * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.</p>
- *
- * <pre>
- * WordUtils.initials(null) = null
- * WordUtils.initials("") = ""
- * WordUtils.initials("Ben John Lee") = "BJL"
- * WordUtils.initials("Ben J.Lee") = "BJ"
- * </pre>
- *
- * @param str the String to get initials from, may be null
- * @return String of initial letters, {@code null} if null String input
- * @see #initials(String,char[])
- */
- public static String initials(final String str) {
- return initials(str, null);
- }
- /**
- * Extracts the initial characters from each word in the String.
- *
- * <p>All first characters after the defined delimiters are returned as a new string.
- * Their case is not changed.</p>
- *
- * <p>If the delimiters array is null, then Whitespace is used.
- * Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.
- * An empty delimiter array returns an empty String.</p>
- *
- * <pre>
- * WordUtils.initials(null, *) = null
- * WordUtils.initials("", *) = ""
- * WordUtils.initials("Ben John Lee", null) = "BJL"
- * WordUtils.initials("Ben J.Lee", null) = "BJ"
- * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
- * WordUtils.initials(*, new char[0]) = ""
- * </pre>
- *
- * @param str the String to get initials from, may be null
- * @param delimiters set of characters to determine words, null means whitespace
- * @return String of initial characters, {@code null} if null String input
- * @see #initials(String)
- */
- public static String initials(final String str, final char... delimiters) {
- if (StringUtils.isEmpty(str)) {
- return str;
- }
- if (delimiters != null && delimiters.length == 0) {
- return StringUtils.EMPTY;
- }
- final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
- final int strLen = str.length();
- final int[] newCodePoints = new int[strLen / 2 + 1];
- int count = 0;
- boolean lastWasGap = true;
- for (int i = 0; i < strLen;) {
- final int codePoint = str.codePointAt(i);
- if (isDelimiter.test(codePoint)) {
- lastWasGap = true;
- } else if (lastWasGap) {
- newCodePoints[count++] = codePoint;
- lastWasGap = false;
- }
- i += Character.charCount(codePoint);
- }
- return new String(newCodePoints, 0, count);
- }
- /**
- * Is the character a delimiter.
- *
- * @param ch the character to check
- * @param delimiters the delimiters
- * @return true if it is a delimiter
- * @deprecated as of 1.2 and will be removed in 2.0
- */
- @Deprecated
- public static boolean isDelimiter(final char ch, final char[] delimiters) {
- if (delimiters == null) {
- return Character.isWhitespace(ch);
- }
- for (final char delimiter : delimiters) {
- if (ch == delimiter) {
- return true;
- }
- }
- return false;
- }
- /**
- * Is the codePoint a delimiter.
- *
- * @param codePoint the codePint to check
- * @param delimiters the delimiters
- * @return true if it is a delimiter
- * @deprecated as of 1.2 and will be removed in 2.0
- */
- @Deprecated
- public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
- if (delimiters == null) {
- return Character.isWhitespace(codePoint);
- }
- for (int index = 0; index < delimiters.length; index++) {
- final int delimiterCodePoint = Character.codePointAt(delimiters, index);
- if (delimiterCodePoint == codePoint) {
- return true;
- }
- }
- return false;
- }
- /**
- * Swaps the case of a String using a word based algorithm.
- *
- * <ul>
- * <li>Upper case character converts to Lower case</li>
- * <li>Title case character converts to Lower case</li>
- * <li>Lower case character after Whitespace or at start converts to Title case</li>
- * <li>Other Lower case character converts to Upper case</li>
- * </ul>
- *
- * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.</p>
- *
- * <pre>
- * StringUtils.swapCase(null) = null
- * StringUtils.swapCase("") = ""
- * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
- * </pre>
- *
- * @param str the String to swap case, may be null
- * @return The changed String, {@code null} if null String input
- */
- public static String swapCase(final String str) {
- if (StringUtils.isEmpty(str)) {
- return str;
- }
- final int strLen = str.length();
- final int[] newCodePoints = new int[strLen];
- int outOffset = 0;
- boolean whitespace = true;
- for (int index = 0; index < strLen;) {
- final int oldCodepoint = str.codePointAt(index);
- final int newCodePoint;
- if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
- newCodePoint = Character.toLowerCase(oldCodepoint);
- whitespace = false;
- } else if (Character.isLowerCase(oldCodepoint)) {
- if (whitespace) {
- newCodePoint = Character.toTitleCase(oldCodepoint);
- whitespace = false;
- } else {
- newCodePoint = Character.toUpperCase(oldCodepoint);
- }
- } else {
- whitespace = Character.isWhitespace(oldCodepoint);
- newCodePoint = oldCodepoint;
- }
- newCodePoints[outOffset++] = newCodePoint;
- index += Character.charCount(newCodePoint);
- }
- return new String(newCodePoints, 0, outOffset);
- }
- /**
- * Uncapitalizes all the whitespace separated words in a String.
- * Only the first character of each word is changed.
- *
- * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.</p>
- *
- * <pre>
- * WordUtils.uncapitalize(null) = null
- * WordUtils.uncapitalize("") = ""
- * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
- * </pre>
- *
- * @param str the String to uncapitalize, may be null
- * @return uncapitalized String, {@code null} if null String input
- * @see #capitalize(String)
- */
- public static String uncapitalize(final String str) {
- return uncapitalize(str, null);
- }
- /**
- * Uncapitalizes all the whitespace separated words in a String.
- * Only the first character of each word is changed.
- *
- * <p>The delimiters represent a set of characters understood to separate words.
- * The first string character and the first non-delimiter character after a
- * delimiter will be uncapitalized.</p>
- *
- * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
- * A {@code null} input String returns {@code null}.</p>
- *
- * <pre>
- * WordUtils.uncapitalize(null, *) = null
- * WordUtils.uncapitalize("", *) = ""
- * WordUtils.uncapitalize(*, null) = *
- * WordUtils.uncapitalize(*, new char[0]) = *
- * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
- * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
- * </pre>
- *
- * @param str the String to uncapitalize, may be null
- * @param delimiters set of characters to determine uncapitalization, null means whitespace
- * @return uncapitalized String, {@code null} if null String input
- * @see #capitalize(String)
- */
- public static String uncapitalize(final String str, final char... delimiters) {
- if (StringUtils.isEmpty(str)) {
- return str;
- }
- final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters);
- final int strLen = str.length();
- final int[] newCodePoints = new int[strLen];
- int outOffset = 0;
- boolean uncapitalizeNext = true;
- for (int index = 0; index < strLen;) {
- final int codePoint = str.codePointAt(index);
- if (isDelimiter.test(codePoint)) {
- uncapitalizeNext = true;
- newCodePoints[outOffset++] = codePoint;
- index += Character.charCount(codePoint);
- } else if (uncapitalizeNext) {
- final int titleCaseCodePoint = Character.toLowerCase(codePoint);
- newCodePoints[outOffset++] = titleCaseCodePoint;
- index += Character.charCount(titleCaseCodePoint);
- uncapitalizeNext = false;
- } else {
- newCodePoints[outOffset++] = codePoint;
- index += Character.charCount(codePoint);
- }
- }
- return new String(newCodePoints, 0, outOffset);
- }
- /**
- * Wraps a single line of text, identifying words by {@code ' '}.
- *
- * <p>New lines will be separated by the system property line separator.
- * Very long words, such as URLs will <em>not</em> be wrapped.</p>
- *
- * <p>Leading spaces on a new line are stripped.
- * Trailing spaces are not stripped.</p>
- *
- * <table border="1">
- * <caption>Examples</caption>
- * <tr>
- * <th>input</th>
- * <th>wrapLength</th>
- * <th>result</th>
- * </tr>
- * <tr>
- * <td>null</td>
- * <td>*</td>
- * <td>null</td>
- * </tr>
- * <tr>
- * <td>""</td>
- * <td>*</td>
- * <td>""</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
- * <td>20</td>
- * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
- * </tr>
- * <tr>
- * <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
- * <td>20</td>
- * <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
- * </tr>
- * </table>
- *
- * (assuming that '\n' is the systems line separator)
- *
- * @param str the String to be word wrapped, may be null
- * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
- * @return a line with newlines inserted, {@code null} if null input
- */
- public static String wrap(final String str, final int wrapLength) {
- return wrap(str, wrapLength, null, false);
- }
- /**
- * Wraps a single line of text, identifying words by {@code ' '}.
- *
- * <p>Leading spaces on a new line are stripped.
- * Trailing spaces are not stripped.</p>
- *
- * <table border="1">
- * <caption>Examples</caption>
- * <tr>
- * <th>input</th>
- * <th>wrapLength</th>
- * <th>newLineString</th>
- * <th>wrapLongWords</th>
- * <th>result</th>
- * </tr>
- * <tr>
- * <td>null</td>
- * <td>*</td>
- * <td>*</td>
- * <td>true/false</td>
- * <td>null</td>
- * </tr>
- * <tr>
- * <td>""</td>
- * <td>*</td>
- * <td>*</td>
- * <td>true/false</td>
- * <td>""</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>true/false</td>
- * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>"<br />"</td>
- * <td>true/false</td>
- * <td>"Here is one line of<br />text that is going<
- * br />to be wrapped after<br />20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>null</td>
- * <td>true/false</td>
- * <td>"Here is one line of" + systemNewLine + "text that is going"
- * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>false</td>
- * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
- * </tr>
- * <tr>
- * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>true</td>
- * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
- * </tr>
- * </table>
- *
- * @param str the String to be word wrapped, may be null
- * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
- * @param newLineStr the string to insert for a new line,
- * {@code null} uses the system property line separator
- * @param wrapLongWords true if long words (such as URLs) should be wrapped
- * @return a line with newlines inserted, {@code null} if null input
- */
- public static String wrap(final String str,
- final int wrapLength,
- final String newLineStr,
- final boolean wrapLongWords) {
- return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
- }
- /**
- * Wraps a single line of text, identifying words by {@code wrapOn}.
- *
- * <p>Leading spaces on a new line are stripped.
- * Trailing spaces are not stripped.</p>
- *
- * <table border="1">
- * <caption>Examples</caption>
- * <tr>
- * <th>input</th>
- * <th>wrapLength</th>
- * <th>newLineString</th>
- * <th>wrapLongWords</th>
- * <th>wrapOn</th>
- * <th>result</th>
- * </tr>
- * <tr>
- * <td>null</td>
- * <td>*</td>
- * <td>*</td>
- * <td>true/false</td>
- * <td>*</td>
- * <td>null</td>
- * </tr>
- * <tr>
- * <td>""</td>
- * <td>*</td>
- * <td>*</td>
- * <td>true/false</td>
- * <td>*</td>
- * <td>""</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>true/false</td>
- * <td>" "</td>
- * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>"<br />"</td>
- * <td>true/false</td>
- * <td>" "</td>
- * <td>"Here is one line of<br />text that is going<br />
- * to be wrapped after<br />20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
- * <td>20</td>
- * <td>null</td>
- * <td>true/false</td>
- * <td>" "</td>
- * <td>"Here is one line of" + systemNewLine + "text that is going"
- * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
- * </tr>
- * <tr>
- * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>false</td>
- * <td>" "</td>
- * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
- * </tr>
- * <tr>
- * <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>true</td>
- * <td>" "</td>
- * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td>
- * </tr>
- * <tr>
- * <td>"flammable/inflammable"</td>
- * <td>20</td>
- * <td>"\n"</td>
- * <td>true</td>
- * <td>"/"</td>
- * <td>"flammable\ninflammable"</td>
- * </tr>
- * </table>
- * @param str the String to be word wrapped, may be null
- * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
- * @param newLineStr the string to insert for a new line,
- * {@code null} uses the system property line separator
- * @param wrapLongWords true if long words (such as URLs) should be wrapped
- * @param wrapOn regex expression to be used as a breakable characters,
- * if blank string is provided a space character will be used
- * @return a line with newlines inserted, {@code null} if null input
- */
- public static String wrap(final String str,
- int wrapLength,
- String newLineStr,
- final boolean wrapLongWords,
- String wrapOn) {
- if (str == null) {
- return null;
- }
- if (newLineStr == null) {
- newLineStr = System.lineSeparator();
- }
- if (wrapLength < 1) {
- wrapLength = 1;
- }
- if (StringUtils.isBlank(wrapOn)) {
- wrapOn = " ";
- }
- final Pattern patternToWrapOn = Pattern.compile(wrapOn);
- final int inputLineLength = str.length();
- int offset = 0;
- final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
- int matcherSize = -1;
- while (offset < inputLineLength) {
- int spaceToWrapAt = -1;
- Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
- Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
- if (matcher.find()) {
- if (matcher.start() == 0) {
- matcherSize = matcher.end();
- if (matcherSize != 0) {
- offset += matcher.end();
- continue;
- }
- offset += 1;
- }
- spaceToWrapAt = matcher.start() + offset;
- }
- // only last line without leading spaces is left
- if (inputLineLength - offset <= wrapLength) {
- break;
- }
- while (matcher.find()) {
- spaceToWrapAt = matcher.start() + offset;
- }
- if (spaceToWrapAt >= offset) {
- // normal case
- wrappedLine.append(str, offset, spaceToWrapAt);
- wrappedLine.append(newLineStr);
- offset = spaceToWrapAt + 1;
- } else // really long word or URL
- if (wrapLongWords) {
- if (matcherSize == 0) {
- offset--;
- }
- // wrap really long word one line at a time
- wrappedLine.append(str, offset, wrapLength + offset);
- wrappedLine.append(newLineStr);
- offset += wrapLength;
- matcherSize = -1;
- } else {
- // do not wrap really long word, just extend beyond limit
- matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
- if (matcher.find()) {
- matcherSize = matcher.end() - matcher.start();
- spaceToWrapAt = matcher.start() + offset + wrapLength;
- }
- if (spaceToWrapAt >= 0) {
- if (matcherSize == 0 && offset != 0) {
- offset--;
- }
- wrappedLine.append(str, offset, spaceToWrapAt);
- wrappedLine.append(newLineStr);
- offset = spaceToWrapAt + 1;
- } else {
- if (matcherSize == 0 && offset != 0) {
- offset--;
- }
- wrappedLine.append(str, offset, str.length());
- offset = inputLineLength;
- matcherSize = -1;
- }
- }
- }
- if (matcherSize == 0 && offset < inputLineLength) {
- offset--;
- }
- // Whatever is left in line is short enough to just pass through
- wrappedLine.append(str, offset, str.length());
- return wrappedLine.toString();
- }
- /**
- * {@code WordUtils} instances should NOT be constructed in
- * standard programming. Instead, the class should be used as
- * {@code WordUtils.wrap("foo bar", 20);}.
- *
- * <p>This constructor is public to permit tools that require a JavaBean
- * instance to operate.</p>
- */
- public WordUtils() {
- }
- }