001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.function.Predicate; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import org.apache.commons.lang3.ArrayUtils; 026import org.apache.commons.lang3.StringUtils; 027import org.apache.commons.lang3.Strings; 028import org.apache.commons.lang3.Validate; 029 030/** 031 * Operations on Strings that contain words. 032 * 033 * <p> 034 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a 035 * {@code null} input. Each method documents its behavior in more detail. 036 * </p> 037 * 038 * @since 1.1 039 */ 040public class WordUtils { 041 042 /** 043 * Abbreviates the words nicely. 044 * 045 * <p> 046 * This method searches for the first space after the lower limit and abbreviates 047 * the String there. It will also append any String passed as a parameter 048 * to the end of the String. The upper limit can be specified to forcibly 049 * abbreviate a String. 050 * </p> 051 * 052 * @param str the string to be abbreviated. If null is passed, null is returned. 053 * If the empty String is passed, the empty string is returned. 054 * @param lower the lower limit; negative value is treated as zero. 055 * @param upper the upper limit; specify -1 if no limit is desired. 056 * The upper limit cannot be lower than the lower limit. 057 * @param appendToEnd String to be appended to the end of the abbreviated string. 058 * This is appended ONLY if the string was indeed abbreviated. 059 * The append does not count towards the lower or upper limits. 060 * @return The abbreviated String. 061 * 062 * <pre> 063 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 064 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 065 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 066 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 067 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 068 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 069 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 070 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 071 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 072 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 073 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 074 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 075 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 076 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 077 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = Throws {@link IllegalArgumentException} 078 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = Throws {@link IllegalArgumentException} 079 * </pre> 080 */ 081 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) { 082 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 083 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 084 if (StringUtils.isEmpty(str)) { 085 return str; 086 } 087 088 // if the lower value is greater than the length of the string, 089 // set to the length of the string 090 if (lower > str.length()) { 091 lower = str.length(); 092 } 093 094 // if the upper value is -1 (i.e. no limit) or is greater 095 // than the length of the string, set to the length of the string 096 if (upper == -1 || upper > str.length()) { 097 upper = str.length(); 098 } 099 100 final StringBuilder result = new StringBuilder(); 101 final int index = Strings.CS.indexOf(str, " ", lower); 102 if (index == -1) { 103 result.append(str, 0, upper); 104 // only if abbreviation has occurred do we append the appendToEnd value 105 if (upper != str.length()) { 106 result.append(StringUtils.defaultString(appendToEnd)); 107 } 108 } else { 109 result.append(str, 0, Math.min(index, upper)); 110 result.append(StringUtils.defaultString(appendToEnd)); 111 } 112 113 return result.toString(); 114 } 115 116 /** 117 * Capitalizes all the whitespace separated words in a String. 118 * Only the first character of each word is changed. To convert the 119 * rest of each word to lowercase at the same time, 120 * use {@link #capitalizeFully(String)}. 121 * 122 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 123 * A {@code null} input String returns {@code null}. 124 * Capitalization uses the Unicode title case, normally equivalent to 125 * upper case.</p> 126 * 127 * <pre> 128 * WordUtils.capitalize(null) = null 129 * WordUtils.capitalize("") = "" 130 * WordUtils.capitalize("i am FINE") = "I Am FINE" 131 * </pre> 132 * 133 * @param str the String to capitalize, may be null. 134 * @return capitalized String, {@code null} if null String input. 135 * @see #uncapitalize(String) 136 * @see #capitalizeFully(String) 137 */ 138 public static String capitalize(final String str) { 139 return capitalize(str, null); 140 } 141 142 /** 143 * Capitalizes all the delimiter separated words in a String. 144 * Only the first character of each word is changed. To convert the 145 * rest of each word to lowercase at the same time, 146 * use {@link #capitalizeFully(String, char[])}. 147 * 148 * <p>The delimiters represent a set of characters understood to separate words. 149 * The first string character and the first non-delimiter character after a 150 * delimiter will be capitalized.</p> 151 * 152 * <p>A {@code null} input String returns {@code null}. 153 * Capitalization uses the Unicode title case, normally equivalent to 154 * upper case.</p> 155 * 156 * <pre> 157 * WordUtils.capitalize(null, *) = null 158 * WordUtils.capitalize("", *) = "" 159 * WordUtils.capitalize(*, new char[0]) = * 160 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 161 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 162 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine" 163 * </pre> 164 * 165 * @param str the String to capitalize, may be null. 166 * @param delimiters set of characters to determine capitalization, null means whitespace. 167 * @return capitalized String, {@code null} if null String input. 168 * @see #uncapitalize(String) 169 * @see #capitalizeFully(String) 170 */ 171 public static String capitalize(final String str, final char... delimiters) { 172 if (StringUtils.isEmpty(str)) { 173 return str; 174 } 175 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); 176 final int strLen = str.length(); 177 final int[] newCodePoints = new int[strLen]; 178 int outOffset = 0; 179 180 boolean capitalizeNext = true; 181 for (int index = 0; index < strLen;) { 182 final int codePoint = str.codePointAt(index); 183 184 if (isDelimiter.test(codePoint)) { 185 capitalizeNext = true; 186 newCodePoints[outOffset++] = codePoint; 187 index += Character.charCount(codePoint); 188 } else if (capitalizeNext) { 189 final int titleCaseCodePoint = Character.toTitleCase(codePoint); 190 newCodePoints[outOffset++] = titleCaseCodePoint; 191 index += Character.charCount(titleCaseCodePoint); 192 capitalizeNext = false; 193 } else { 194 newCodePoints[outOffset++] = codePoint; 195 index += Character.charCount(codePoint); 196 } 197 } 198 return new String(newCodePoints, 0, outOffset); 199 } 200 201 /** 202 * Converts all the whitespace separated words in a String into capitalized words, 203 * that is each word is made up of a titlecase character and then a series of 204 * lowercase characters. 205 * 206 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 207 * A {@code null} input String returns {@code null}. 208 * Capitalization uses the Unicode title case, normally equivalent to 209 * upper case.</p> 210 * 211 * <pre> 212 * WordUtils.capitalizeFully(null) = null 213 * WordUtils.capitalizeFully("") = "" 214 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 215 * </pre> 216 * 217 * @param str the String to capitalize, may be null. 218 * @return capitalized String, {@code null} if null String input. 219 */ 220 public static String capitalizeFully(final String str) { 221 return capitalizeFully(str, null); 222 } 223 224 /** 225 * Converts all the delimiter separated words in a String into capitalized words, 226 * that is each word is made up of a titlecase character and then a series of 227 * lowercase characters. 228 * 229 * <p>The delimiters represent a set of characters understood to separate words. 230 * The first string character and the first non-delimiter character after a 231 * delimiter will be capitalized.</p> 232 * 233 * <p>A {@code null} input String returns {@code null}. 234 * Capitalization uses the Unicode title case, normally equivalent to 235 * upper case.</p> 236 * 237 * <pre> 238 * WordUtils.capitalizeFully(null, *) = null 239 * WordUtils.capitalizeFully("", *) = "" 240 * WordUtils.capitalizeFully(*, null) = * 241 * WordUtils.capitalizeFully(*, new char[0]) = * 242 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 243 * </pre> 244 * 245 * @param str the String to capitalize, may be null. 246 * @param delimiters set of characters to determine capitalization, null means whitespace. 247 * @return capitalized String, {@code null} if null String input. 248 */ 249 public static String capitalizeFully(String str, final char... delimiters) { 250 if (StringUtils.isEmpty(str)) { 251 return str; 252 } 253 str = str.toLowerCase(); 254 return capitalize(str, delimiters); 255 } 256 257 /** 258 * Checks if the String contains all words in the given array. 259 * 260 * <p> 261 * A {@code null} String will return {@code false}. A {@code null}, zero 262 * length search array or if one element of array is null will return {@code false}. 263 * </p> 264 * 265 * <pre> 266 * WordUtils.containsAllWords(null, *) = false 267 * WordUtils.containsAllWords("", *) = false 268 * WordUtils.containsAllWords(*, null) = false 269 * WordUtils.containsAllWords(*, []) = false 270 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 271 * WordUtils.containsAllWords("abc def", "def", "abc") = true 272 * </pre> 273 * 274 * @param word The CharSequence to check, may be null. 275 * @param words The array of String words to search for, may be null. 276 * @return {@code true} if all search words are found, {@code false} otherwise. 277 */ 278 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 279 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 280 return false; 281 } 282 for (final CharSequence w : words) { 283 if (StringUtils.isBlank(w)) { 284 return false; 285 } 286 final Pattern p = Pattern.compile(".*\\b" + Pattern.quote(w.toString()) + "\\b.*"); 287 if (!p.matcher(word).matches()) { 288 return false; 289 } 290 } 291 return true; 292 } 293 294 /** 295 * Given the array of delimiters supplied; returns a function determining whether a character code point is a delimiter. 296 * The function provides O(1) lookup time. 297 * Whitespace is defined by {@link Character#isWhitespace(char)} and is used as the defaultvalue if delimiters is null. 298 * 299 * @param delimiters set of characters to determine delimiters, null means whitespace. 300 * @return Predicate<Integer> taking a code point value as an argument and returning true if a delimiter. 301 */ 302 private static Predicate<Integer> generateIsDelimiterFunction(final char[] delimiters) { 303 final Predicate<Integer> isDelimiter; 304 if (delimiters == null || delimiters.length == 0) { 305 isDelimiter = delimiters == null ? Character::isWhitespace : c -> false; 306 } else { 307 final Set<Integer> delimiterSet = new HashSet<>(); 308 for (int index = 0; index < delimiters.length; index++) { 309 delimiterSet.add(Character.codePointAt(delimiters, index)); 310 } 311 isDelimiter = delimiterSet::contains; 312 } 313 314 return isDelimiter; 315 } 316 317 /** 318 * Extracts the initial characters from each word in the String. 319 * 320 * <p>All first characters after whitespace are returned as a new string. 321 * Their case is not changed.</p> 322 * 323 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 324 * A {@code null} input String returns {@code null}.</p> 325 * 326 * <pre> 327 * WordUtils.initials(null) = null 328 * WordUtils.initials("") = "" 329 * WordUtils.initials("Ben John Lee") = "BJL" 330 * WordUtils.initials("Ben J.Lee") = "BJ" 331 * </pre> 332 * 333 * @param str the String to get initials from, may be null. 334 * @return String of initial letters, {@code null} if null String input. 335 * @see #initials(String,char[]) 336 */ 337 public static String initials(final String str) { 338 return initials(str, null); 339 } 340 341 /** 342 * Extracts the initial characters from each word in the String. 343 * 344 * <p>All first characters after the defined delimiters are returned as a new string. 345 * Their case is not changed.</p> 346 * 347 * <p>If the delimiters array is null, then Whitespace is used. 348 * Whitespace is defined by {@link Character#isWhitespace(char)}. 349 * A {@code null} input String returns {@code null}. 350 * An empty delimiter array returns an empty String.</p> 351 * 352 * <pre> 353 * WordUtils.initials(null, *) = null 354 * WordUtils.initials("", *) = "" 355 * WordUtils.initials("Ben John Lee", null) = "BJL" 356 * WordUtils.initials("Ben J.Lee", null) = "BJ" 357 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 358 * WordUtils.initials(*, new char[0]) = "" 359 * </pre> 360 * 361 * @param str the String to get initials from, may be null. 362 * @param delimiters set of characters to determine words, null means whitespace. 363 * @return String of initial characters, {@code null} if null String input. 364 * @see #initials(String) 365 */ 366 public static String initials(final String str, final char... delimiters) { 367 if (StringUtils.isEmpty(str)) { 368 return str; 369 } 370 if (delimiters != null && delimiters.length == 0) { 371 return StringUtils.EMPTY; 372 } 373 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); 374 final int strLen = str.length(); 375 final int[] newCodePoints = new int[strLen / 2 + 1]; 376 int count = 0; 377 boolean lastWasGap = true; 378 for (int i = 0; i < strLen;) { 379 final int codePoint = str.codePointAt(i); 380 381 if (isDelimiter.test(codePoint)) { 382 lastWasGap = true; 383 } else if (lastWasGap) { 384 newCodePoints[count++] = codePoint; 385 lastWasGap = false; 386 } 387 388 i += Character.charCount(codePoint); 389 } 390 return new String(newCodePoints, 0, count); 391 } 392 393 /** 394 * Is the character a delimiter. 395 * 396 * @param ch the character to check. 397 * @param delimiters the delimiters. 398 * @return true if it is a delimiter. 399 * @deprecated as of 1.2 and will be removed in 2.0. 400 */ 401 @Deprecated 402 public static boolean isDelimiter(final char ch, final char[] delimiters) { 403 if (delimiters == null) { 404 return Character.isWhitespace(ch); 405 } 406 for (final char delimiter : delimiters) { 407 if (ch == delimiter) { 408 return true; 409 } 410 } 411 return false; 412 } 413 414 /** 415 * Is the codePoint a delimiter. 416 * 417 * @param codePoint the codePint to check. 418 * @param delimiters the delimiters. 419 * @return true if it is a delimiter. 420 * @deprecated as of 1.2 and will be removed in 2.0. 421 */ 422 @Deprecated 423 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 424 if (delimiters == null) { 425 return Character.isWhitespace(codePoint); 426 } 427 for (int index = 0; index < delimiters.length; index++) { 428 final int delimiterCodePoint = Character.codePointAt(delimiters, index); 429 if (delimiterCodePoint == codePoint) { 430 return true; 431 } 432 } 433 return false; 434 } 435 436 /** 437 * Swaps the case of a String using a word based algorithm. 438 * 439 * <ul> 440 * <li>Upper case character converts to Lower case</li> 441 * <li>Title case character converts to Lower case</li> 442 * <li>Lower case character after Whitespace or at start converts to Title case</li> 443 * <li>Other Lower case character converts to Upper case</li> 444 * </ul> 445 * 446 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 447 * A {@code null} input String returns {@code null}.</p> 448 * 449 * <pre> 450 * StringUtils.swapCase(null) = null 451 * StringUtils.swapCase("") = "" 452 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 453 * </pre> 454 * 455 * @param str the String to swap case, may be null. 456 * @return The changed String, {@code null} if null String input. 457 */ 458 public static String swapCase(final String str) { 459 if (StringUtils.isEmpty(str)) { 460 return str; 461 } 462 final int strLen = str.length(); 463 final int[] newCodePoints = new int[strLen]; 464 int outOffset = 0; 465 boolean whitespace = true; 466 for (int index = 0; index < strLen;) { 467 final int oldCodepoint = str.codePointAt(index); 468 final int newCodePoint; 469 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) { 470 newCodePoint = Character.toLowerCase(oldCodepoint); 471 whitespace = false; 472 } else if (Character.isLowerCase(oldCodepoint)) { 473 if (whitespace) { 474 newCodePoint = Character.toTitleCase(oldCodepoint); 475 whitespace = false; 476 } else { 477 newCodePoint = Character.toUpperCase(oldCodepoint); 478 } 479 } else { 480 whitespace = Character.isWhitespace(oldCodepoint); 481 newCodePoint = oldCodepoint; 482 } 483 newCodePoints[outOffset++] = newCodePoint; 484 index += Character.charCount(newCodePoint); 485 } 486 return new String(newCodePoints, 0, outOffset); 487 } 488 489 /** 490 * Uncapitalizes all the whitespace separated words in a String. 491 * Only the first character of each word is changed. 492 * 493 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 494 * A {@code null} input String returns {@code null}.</p> 495 * 496 * <pre> 497 * WordUtils.uncapitalize(null) = null 498 * WordUtils.uncapitalize("") = "" 499 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 500 * </pre> 501 * 502 * @param str the String to uncapitalize, may be null. 503 * @return uncapitalized String, {@code null} if null String input. 504 * @see #capitalize(String) 505 */ 506 public static String uncapitalize(final String str) { 507 return uncapitalize(str, null); 508 } 509 510 /** 511 * Uncapitalizes all the whitespace separated words in a String. 512 * Only the first character of each word is changed. 513 * 514 * <p>The delimiters represent a set of characters understood to separate words. 515 * The first string character and the first non-delimiter character after a 516 * delimiter will be uncapitalized.</p> 517 * 518 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 519 * A {@code null} input String returns {@code null}.</p> 520 * 521 * <pre> 522 * WordUtils.uncapitalize(null, *) = null 523 * WordUtils.uncapitalize("", *) = "" 524 * WordUtils.uncapitalize(*, null) = * 525 * WordUtils.uncapitalize(*, new char[0]) = * 526 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 527 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine" 528 * </pre> 529 * 530 * @param str the String to uncapitalize, may be null. 531 * @param delimiters set of characters to determine uncapitalization, null means whitespace. 532 * @return uncapitalized String, {@code null} if null String input. 533 * @see #capitalize(String) 534 */ 535 public static String uncapitalize(final String str, final char... delimiters) { 536 if (StringUtils.isEmpty(str)) { 537 return str; 538 } 539 final Predicate<Integer> isDelimiter = generateIsDelimiterFunction(delimiters); 540 final int strLen = str.length(); 541 final int[] newCodePoints = new int[strLen]; 542 int outOffset = 0; 543 544 boolean uncapitalizeNext = true; 545 for (int index = 0; index < strLen;) { 546 final int codePoint = str.codePointAt(index); 547 548 if (isDelimiter.test(codePoint)) { 549 uncapitalizeNext = true; 550 newCodePoints[outOffset++] = codePoint; 551 index += Character.charCount(codePoint); 552 } else if (uncapitalizeNext) { 553 final int titleCaseCodePoint = Character.toLowerCase(codePoint); 554 newCodePoints[outOffset++] = titleCaseCodePoint; 555 index += Character.charCount(titleCaseCodePoint); 556 uncapitalizeNext = false; 557 } else { 558 newCodePoints[outOffset++] = codePoint; 559 index += Character.charCount(codePoint); 560 } 561 } 562 return new String(newCodePoints, 0, outOffset); 563 } 564 565 /** 566 * Wraps a single line of text, identifying words by {@code ' '}. 567 * 568 * <p>New lines will be separated by the system property line separator. 569 * Very long words, such as URLs will <em>not</em> be wrapped.</p> 570 * 571 * <p>Leading spaces on a new line are stripped. 572 * Trailing spaces are not stripped.</p> 573 * 574 * <table border="1"> 575 * <caption>Examples</caption> 576 * <tr> 577 * <th>input</th> 578 * <th>wrapLength</th> 579 * <th>result</th> 580 * </tr> 581 * <tr> 582 * <td>null</td> 583 * <td>*</td> 584 * <td>null</td> 585 * </tr> 586 * <tr> 587 * <td>""</td> 588 * <td>*</td> 589 * <td>""</td> 590 * </tr> 591 * <tr> 592 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 593 * <td>20</td> 594 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 595 * </tr> 596 * <tr> 597 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 598 * <td>20</td> 599 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 600 * </tr> 601 * <tr> 602 * <td>"Click here, https://commons.apache.org, to jump to the commons website"</td> 603 * <td>20</td> 604 * <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td> 605 * </tr> 606 * </table> 607 * 608 * (assuming that '\n' is the systems line separator) 609 * 610 * @param str the String to be word wrapped, may be null. 611 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1. 612 * @return a line with newlines inserted, {@code null} if null input. 613 */ 614 public static String wrap(final String str, final int wrapLength) { 615 return wrap(str, wrapLength, null, false); 616 } 617 618 /** 619 * Wraps a single line of text, identifying words by {@code ' '}. 620 * 621 * <p>Leading spaces on a new line are stripped. 622 * Trailing spaces are not stripped.</p> 623 * 624 * <table border="1"> 625 * <caption>Examples</caption> 626 * <tr> 627 * <th>input</th> 628 * <th>wrapLength</th> 629 * <th>newLineString</th> 630 * <th>wrapLongWords</th> 631 * <th>result</th> 632 * </tr> 633 * <tr> 634 * <td>null</td> 635 * <td>*</td> 636 * <td>*</td> 637 * <td>true/false</td> 638 * <td>null</td> 639 * </tr> 640 * <tr> 641 * <td>""</td> 642 * <td>*</td> 643 * <td>*</td> 644 * <td>true/false</td> 645 * <td>""</td> 646 * </tr> 647 * <tr> 648 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 649 * <td>20</td> 650 * <td>"\n"</td> 651 * <td>true/false</td> 652 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 653 * </tr> 654 * <tr> 655 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 656 * <td>20</td> 657 * <td>"<br />"</td> 658 * <td>true/false</td> 659 * <td>"Here is one line of<br />text that is going< 660 * br />to be wrapped after<br />20 columns."</td> 661 * </tr> 662 * <tr> 663 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 664 * <td>20</td> 665 * <td>null</td> 666 * <td>true/false</td> 667 * <td>"Here is one line of" + systemNewLine + "text that is going" 668 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 669 * </tr> 670 * <tr> 671 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 672 * <td>20</td> 673 * <td>"\n"</td> 674 * <td>false</td> 675 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 676 * </tr> 677 * <tr> 678 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 679 * <td>20</td> 680 * <td>"\n"</td> 681 * <td>true</td> 682 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td> 683 * </tr> 684 * </table> 685 * 686 * @param str the String to be word wrapped, may be null. 687 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1. 688 * @param newLineStr the string to insert for a new line, {@code null} uses the system property line separator. 689 * @param wrapLongWords true if long words (such as URLs) should be wrapped. 690 * @return a line with newlines inserted, {@code null} if null input. 691 */ 692 public static String wrap(final String str, 693 final int wrapLength, 694 final String newLineStr, 695 final boolean wrapLongWords) { 696 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 697 } 698 699 /** 700 * Wraps a single line of text, identifying words by {@code wrapOn}. 701 * 702 * <p>Leading spaces on a new line are stripped. 703 * Trailing spaces are not stripped.</p> 704 * 705 * <table border="1"> 706 * <caption>Examples</caption> 707 * <tr> 708 * <th>input</th> 709 * <th>wrapLength</th> 710 * <th>newLineString</th> 711 * <th>wrapLongWords</th> 712 * <th>wrapOn</th> 713 * <th>result</th> 714 * </tr> 715 * <tr> 716 * <td>null</td> 717 * <td>*</td> 718 * <td>*</td> 719 * <td>true/false</td> 720 * <td>*</td> 721 * <td>null</td> 722 * </tr> 723 * <tr> 724 * <td>""</td> 725 * <td>*</td> 726 * <td>*</td> 727 * <td>true/false</td> 728 * <td>*</td> 729 * <td>""</td> 730 * </tr> 731 * <tr> 732 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 733 * <td>20</td> 734 * <td>"\n"</td> 735 * <td>true/false</td> 736 * <td>" "</td> 737 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 738 * </tr> 739 * <tr> 740 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 741 * <td>20</td> 742 * <td>"<br />"</td> 743 * <td>true/false</td> 744 * <td>" "</td> 745 * <td>"Here is one line of<br />text that is going<br /> 746 * to be wrapped after<br />20 columns."</td> 747 * </tr> 748 * <tr> 749 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 750 * <td>20</td> 751 * <td>null</td> 752 * <td>true/false</td> 753 * <td>" "</td> 754 * <td>"Here is one line of" + systemNewLine + "text that is going" 755 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 756 * </tr> 757 * <tr> 758 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 759 * <td>20</td> 760 * <td>"\n"</td> 761 * <td>false</td> 762 * <td>" "</td> 763 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 764 * </tr> 765 * <tr> 766 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 767 * <td>20</td> 768 * <td>"\n"</td> 769 * <td>true</td> 770 * <td>" "</td> 771 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apach\ne.org"</td> 772 * </tr> 773 * <tr> 774 * <td>"flammable/inflammable"</td> 775 * <td>20</td> 776 * <td>"\n"</td> 777 * <td>true</td> 778 * <td>"/"</td> 779 * <td>"flammable\ninflammable"</td> 780 * </tr> 781 * </table> 782 * @param str the String to be word wrapped, may be null. 783 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1. 784 * @param newLineStr the string to insert for a new line, {@code null} uses the system property line separator. 785 * @param wrapLongWords true if long words (such as URLs) should be wrapped. 786 * @param wrapOn regex expression to be used as a breakable characters, if blank string is provided a space character will be used. 787 * @return a line with newlines inserted, {@code null} if null input. 788 */ 789 public static String wrap(final String str, 790 int wrapLength, 791 String newLineStr, 792 final boolean wrapLongWords, 793 String wrapOn) { 794 if (str == null) { 795 return null; 796 } 797 if (newLineStr == null) { 798 newLineStr = System.lineSeparator(); 799 } 800 if (wrapLength < 1) { 801 wrapLength = 1; 802 } 803 if (StringUtils.isBlank(wrapOn)) { 804 wrapOn = " "; 805 } 806 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 807 final int inputLineLength = str.length(); 808 int offset = 0; 809 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 810 int matcherSize = -1; 811 812 while (offset < inputLineLength) { 813 int spaceToWrapAt = -1; 814 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, 815 Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); 816 if (matcher.find()) { 817 if (matcher.start() == 0) { 818 matcherSize = matcher.end(); 819 if (matcherSize != 0) { 820 offset += matcher.end(); 821 continue; 822 } 823 offset += 1; 824 } 825 spaceToWrapAt = matcher.start() + offset; 826 } 827 828 // only last line without leading spaces is left 829 if (inputLineLength - offset <= wrapLength) { 830 break; 831 } 832 833 while (matcher.find()) { 834 spaceToWrapAt = matcher.start() + offset; 835 } 836 837 if (spaceToWrapAt >= offset) { 838 // normal case 839 wrappedLine.append(str, offset, spaceToWrapAt); 840 wrappedLine.append(newLineStr); 841 offset = spaceToWrapAt + 1; 842 843 } else // really long word or URL 844 if (wrapLongWords) { 845 if (matcherSize == 0) { 846 offset--; 847 } 848 // wrap really long word one line at a time 849 wrappedLine.append(str, offset, wrapLength + offset); 850 wrappedLine.append(newLineStr); 851 offset += wrapLength; 852 matcherSize = -1; 853 } else { 854 // do not wrap really long word, just extend beyond limit 855 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 856 if (matcher.find()) { 857 matcherSize = matcher.end() - matcher.start(); 858 spaceToWrapAt = matcher.start() + offset + wrapLength; 859 } 860 861 if (spaceToWrapAt >= 0) { 862 if (matcherSize == 0 && offset != 0) { 863 offset--; 864 } 865 wrappedLine.append(str, offset, spaceToWrapAt); 866 wrappedLine.append(newLineStr); 867 offset = spaceToWrapAt + 1; 868 } else { 869 if (matcherSize == 0 && offset != 0) { 870 offset--; 871 } 872 wrappedLine.append(str, offset, str.length()); 873 offset = inputLineLength; 874 matcherSize = -1; 875 } 876 } 877 } 878 879 if (matcherSize == 0 && offset < inputLineLength) { 880 offset--; 881 } 882 883 // Whatever is left in line is short enough to just pass through 884 wrappedLine.append(str, offset, str.length()); 885 886 return wrappedLine.toString(); 887 } 888 889 /** 890 * {@code WordUtils} instances should NOT be constructed in 891 * standard programming. Instead, the class should be used as 892 * {@code WordUtils.wrap("foo bar", 20);}. 893 * 894 * <p>This constructor is public to permit tools that require a JavaBean 895 * instance to operate.</p> 896 */ 897 public WordUtils() { 898 } 899 }