001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import org.apache.commons.lang3.StringUtils; 020import org.apache.commons.lang3.SystemUtils; 021 022/** 023 * <p>Operations on Strings that contain words.</p> 024 * 025 * <p>This class tries to handle <code>null</code> input gracefully. 026 * An exception will not be thrown for a <code>null</code> input. 027 * Each method documents its behaviour in more detail.</p> 028 * 029 * @since 2.0 030 * @version $Id: WordUtils.java 1561036 2014-01-24 15:42:19Z britter $ 031 */ 032public class WordUtils { 033 034 /** 035 * <p><code>WordUtils</code> instances should NOT be constructed in 036 * standard programming. Instead, the class should be used as 037 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 038 * 039 * <p>This constructor is public to permit tools that require a JavaBean 040 * instance to operate.</p> 041 */ 042 public WordUtils() { 043 super(); 044 } 045 046 // Wrapping 047 //-------------------------------------------------------------------------- 048 /** 049 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 050 * 051 * <p>New lines will be separated by the system property line separator. 052 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 053 * 054 * <p>Leading spaces on a new line are stripped. 055 * Trailing spaces are not stripped.</p> 056 * 057 * <table border="1"> 058 * <tr> 059 * <th>input</th> 060 * <th>wrapLenght</th> 061 * <th>result</th> 062 * </tr> 063 * <tr> 064 * <td>null</td> 065 * <td>*</td> 066 * <td>null</td> 067 * </tr> 068 * <tr> 069 * <td>""</td> 070 * <td>*</td> 071 * <td>""</td> 072 * </tr> 073 * <tr> 074 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 075 * <td>20</td> 076 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 077 * </tr> 078 * <tr> 079 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 080 * <td>20</td> 081 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 082 * </tr> 083 * <tr> 084 * <td>"Click here, http://commons.apache.org, to jump to the commons website"</td> 085 * <td>20</td> 086 * <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td> 087 * </tr> 088 * </table> 089 * 090 * (assuming that '\n' is the systems line separator) 091 * 092 * @param str the String to be word wrapped, may be null 093 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 094 * @return a line with newlines inserted, <code>null</code> if null input 095 */ 096 public static String wrap(final String str, final int wrapLength) { 097 return wrap(str, wrapLength, null, false); 098 } 099 100 /** 101 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 102 * 103 * <p>Leading spaces on a new line are stripped. 104 * Trailing spaces are not stripped.</p> 105 * 106 * <table border="1"> 107 * <tr> 108 * <th>input</th> 109 * <th>wrapLenght</th> 110 * <th>newLineString</th> 111 * <th>wrapLongWords</th> 112 * <th>result</th> 113 * </tr> 114 * <tr> 115 * <td>null</td> 116 * <td>*</td> 117 * <td>*</td> 118 * <td>true/false</td> 119 * <td>null</td> 120 * </tr> 121 * <tr> 122 * <td>""</td> 123 * <td>*</td> 124 * <td>*</td> 125 * <td>true/false</td> 126 * <td>""</td> 127 * </tr> 128 * <tr> 129 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 130 * <td>20</td> 131 * <td>"\n"</td> 132 * <td>true/false</td> 133 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 134 * </tr> 135 * <tr> 136 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 137 * <td>20</td> 138 * <td>"<br />"</td> 139 * <td>true/false</td> 140 * <td>"Here is one line of<br />text that is going<br />to be wrapped after<br />20 columns."</td> 141 * </tr> 142 * <tr> 143 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 144 * <td>20</td> 145 * <td>null</td> 146 * <td>true/false</td> 147 * <td>"Here is one line of" + systemNewLine + "text that is going" + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 148 * </tr> 149 * <tr> 150 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 151 * <td>20</td> 152 * <td>"\n"</td> 153 * <td>false</td> 154 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 155 * </tr> 156 * <tr> 157 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 158 * <td>20</td> 159 * <td>"\n"</td> 160 * <td>true</td> 161 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 162 * </tr> 163 * </table> 164 * 165 * @param str the String to be word wrapped, may be null 166 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 167 * @param newLineStr the string to insert for a new line, 168 * <code>null</code> uses the system property line separator 169 * @param wrapLongWords true if long words (such as URLs) should be wrapped 170 * @return a line with newlines inserted, <code>null</code> if null input 171 */ 172 public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) { 173 if (str == null) { 174 return null; 175 } 176 if (newLineStr == null) { 177 newLineStr = SystemUtils.LINE_SEPARATOR; 178 } 179 if (wrapLength < 1) { 180 wrapLength = 1; 181 } 182 final int inputLineLength = str.length(); 183 int offset = 0; 184 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 185 186 while (inputLineLength - offset > wrapLength) { 187 if (str.charAt(offset) == ' ') { 188 offset++; 189 continue; 190 } 191 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset); 192 193 if (spaceToWrapAt >= offset) { 194 // normal case 195 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 196 wrappedLine.append(newLineStr); 197 offset = spaceToWrapAt + 1; 198 199 } else { 200 // really long word or URL 201 if (wrapLongWords) { 202 // wrap really long word one line at a time 203 wrappedLine.append(str.substring(offset, wrapLength + offset)); 204 wrappedLine.append(newLineStr); 205 offset += wrapLength; 206 } else { 207 // do not wrap really long word, just extend beyond limit 208 spaceToWrapAt = str.indexOf(' ', wrapLength + offset); 209 if (spaceToWrapAt >= 0) { 210 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 211 wrappedLine.append(newLineStr); 212 offset = spaceToWrapAt + 1; 213 } else { 214 wrappedLine.append(str.substring(offset)); 215 offset = inputLineLength; 216 } 217 } 218 } 219 } 220 221 // Whatever is left in line is short enough to just pass through 222 wrappedLine.append(str.substring(offset)); 223 224 return wrappedLine.toString(); 225 } 226 227 // Capitalizing 228 //----------------------------------------------------------------------- 229 /** 230 * <p>Capitalizes all the whitespace separated words in a String. 231 * Only the first letter of each word is changed. To convert the 232 * rest of each word to lowercase at the same time, 233 * use {@link #capitalizeFully(String)}.</p> 234 * 235 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 236 * A <code>null</code> input String returns <code>null</code>. 237 * Capitalization uses the Unicode title case, normally equivalent to 238 * upper case.</p> 239 * 240 * <pre> 241 * WordUtils.capitalize(null) = null 242 * WordUtils.capitalize("") = "" 243 * WordUtils.capitalize("i am FINE") = "I Am FINE" 244 * </pre> 245 * 246 * @param str the String to capitalize, may be null 247 * @return capitalized String, <code>null</code> if null String input 248 * @see #uncapitalize(String) 249 * @see #capitalizeFully(String) 250 */ 251 public static String capitalize(final String str) { 252 return capitalize(str, null); 253 } 254 255 /** 256 * <p>Capitalizes all the delimiter separated words in a String. 257 * Only the first letter of each word is changed. To convert the 258 * rest of each word to lowercase at the same time, 259 * use {@link #capitalizeFully(String, char[])}.</p> 260 * 261 * <p>The delimiters represent a set of characters understood to separate words. 262 * The first string character and the first non-delimiter character after a 263 * delimiter will be capitalized. </p> 264 * 265 * <p>A <code>null</code> input String returns <code>null</code>. 266 * Capitalization uses the Unicode title case, normally equivalent to 267 * upper case.</p> 268 * 269 * <pre> 270 * WordUtils.capitalize(null, *) = null 271 * WordUtils.capitalize("", *) = "" 272 * WordUtils.capitalize(*, new char[0]) = * 273 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 274 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 275 * </pre> 276 * 277 * @param str the String to capitalize, may be null 278 * @param delimiters set of characters to determine capitalization, null means whitespace 279 * @return capitalized String, <code>null</code> if null String input 280 * @see #uncapitalize(String) 281 * @see #capitalizeFully(String) 282 * @since 2.1 283 */ 284 public static String capitalize(final String str, final char... delimiters) { 285 final int delimLen = delimiters == null ? -1 : delimiters.length; 286 if (StringUtils.isEmpty(str) || delimLen == 0) { 287 return str; 288 } 289 final char[] buffer = str.toCharArray(); 290 boolean capitalizeNext = true; 291 for (int i = 0; i < buffer.length; i++) { 292 final char ch = buffer[i]; 293 if (isDelimiter(ch, delimiters)) { 294 capitalizeNext = true; 295 } else if (capitalizeNext) { 296 buffer[i] = Character.toTitleCase(ch); 297 capitalizeNext = false; 298 } 299 } 300 return new String(buffer); 301 } 302 303 //----------------------------------------------------------------------- 304 /** 305 * <p>Converts all the whitespace separated words in a String into capitalized words, 306 * that is each word is made up of a titlecase character and then a series of 307 * lowercase characters. </p> 308 * 309 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 310 * A <code>null</code> input String returns <code>null</code>. 311 * Capitalization uses the Unicode title case, normally equivalent to 312 * upper case.</p> 313 * 314 * <pre> 315 * WordUtils.capitalizeFully(null) = null 316 * WordUtils.capitalizeFully("") = "" 317 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 318 * </pre> 319 * 320 * @param str the String to capitalize, may be null 321 * @return capitalized String, <code>null</code> if null String input 322 */ 323 public static String capitalizeFully(final String str) { 324 return capitalizeFully(str, null); 325 } 326 327 /** 328 * <p>Converts all the delimiter separated words in a String into capitalized words, 329 * that is each word is made up of a titlecase character and then a series of 330 * lowercase characters. </p> 331 * 332 * <p>The delimiters represent a set of characters understood to separate words. 333 * The first string character and the first non-delimiter character after a 334 * delimiter will be capitalized. </p> 335 * 336 * <p>A <code>null</code> input String returns <code>null</code>. 337 * Capitalization uses the Unicode title case, normally equivalent to 338 * upper case.</p> 339 * 340 * <pre> 341 * WordUtils.capitalizeFully(null, *) = null 342 * WordUtils.capitalizeFully("", *) = "" 343 * WordUtils.capitalizeFully(*, null) = * 344 * WordUtils.capitalizeFully(*, new char[0]) = * 345 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 346 * </pre> 347 * 348 * @param str the String to capitalize, may be null 349 * @param delimiters set of characters to determine capitalization, null means whitespace 350 * @return capitalized String, <code>null</code> if null String input 351 * @since 2.1 352 */ 353 public static String capitalizeFully(String str, final char... delimiters) { 354 final int delimLen = delimiters == null ? -1 : delimiters.length; 355 if (StringUtils.isEmpty(str) || delimLen == 0) { 356 return str; 357 } 358 str = str.toLowerCase(); 359 return capitalize(str, delimiters); 360 } 361 362 //----------------------------------------------------------------------- 363 /** 364 * <p>Uncapitalizes all the whitespace separated words in a String. 365 * Only the first letter of each word is changed.</p> 366 * 367 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 368 * A <code>null</code> input String returns <code>null</code>.</p> 369 * 370 * <pre> 371 * WordUtils.uncapitalize(null) = null 372 * WordUtils.uncapitalize("") = "" 373 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 374 * </pre> 375 * 376 * @param str the String to uncapitalize, may be null 377 * @return uncapitalized String, <code>null</code> if null String input 378 * @see #capitalize(String) 379 */ 380 public static String uncapitalize(final String str) { 381 return uncapitalize(str, null); 382 } 383 384 /** 385 * <p>Uncapitalizes all the whitespace separated words in a String. 386 * Only the first letter of each word is changed.</p> 387 * 388 * <p>The delimiters represent a set of characters understood to separate words. 389 * The first string character and the first non-delimiter character after a 390 * delimiter will be uncapitalized. </p> 391 * 392 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 393 * A <code>null</code> input String returns <code>null</code>.</p> 394 * 395 * <pre> 396 * WordUtils.uncapitalize(null, *) = null 397 * WordUtils.uncapitalize("", *) = "" 398 * WordUtils.uncapitalize(*, null) = * 399 * WordUtils.uncapitalize(*, new char[0]) = * 400 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 401 * </pre> 402 * 403 * @param str the String to uncapitalize, may be null 404 * @param delimiters set of characters to determine uncapitalization, null means whitespace 405 * @return uncapitalized String, <code>null</code> if null String input 406 * @see #capitalize(String) 407 * @since 2.1 408 */ 409 public static String uncapitalize(final String str, final char... delimiters) { 410 final int delimLen = delimiters == null ? -1 : delimiters.length; 411 if (StringUtils.isEmpty(str) || delimLen == 0) { 412 return str; 413 } 414 final char[] buffer = str.toCharArray(); 415 boolean uncapitalizeNext = true; 416 for (int i = 0; i < buffer.length; i++) { 417 final char ch = buffer[i]; 418 if (isDelimiter(ch, delimiters)) { 419 uncapitalizeNext = true; 420 } else if (uncapitalizeNext) { 421 buffer[i] = Character.toLowerCase(ch); 422 uncapitalizeNext = false; 423 } 424 } 425 return new String(buffer); 426 } 427 428 //----------------------------------------------------------------------- 429 /** 430 * <p>Swaps the case of a String using a word based algorithm.</p> 431 * 432 * <ul> 433 * <li>Upper case character converts to Lower case</li> 434 * <li>Title case character converts to Lower case</li> 435 * <li>Lower case character after Whitespace or at start converts to Title case</li> 436 * <li>Other Lower case character converts to Upper case</li> 437 * </ul> 438 * 439 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 440 * A <code>null</code> input String returns <code>null</code>.</p> 441 * 442 * <pre> 443 * StringUtils.swapCase(null) = null 444 * StringUtils.swapCase("") = "" 445 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 446 * </pre> 447 * 448 * @param str the String to swap case, may be null 449 * @return the changed String, <code>null</code> if null String input 450 */ 451 public static String swapCase(final String str) { 452 if (StringUtils.isEmpty(str)) { 453 return str; 454 } 455 final char[] buffer = str.toCharArray(); 456 457 boolean whitespace = true; 458 459 for (int i = 0; i < buffer.length; i++) { 460 final char ch = buffer[i]; 461 if (Character.isUpperCase(ch)) { 462 buffer[i] = Character.toLowerCase(ch); 463 whitespace = false; 464 } else if (Character.isTitleCase(ch)) { 465 buffer[i] = Character.toLowerCase(ch); 466 whitespace = false; 467 } else if (Character.isLowerCase(ch)) { 468 if (whitespace) { 469 buffer[i] = Character.toTitleCase(ch); 470 whitespace = false; 471 } else { 472 buffer[i] = Character.toUpperCase(ch); 473 } 474 } else { 475 whitespace = Character.isWhitespace(ch); 476 } 477 } 478 return new String(buffer); 479 } 480 481 //----------------------------------------------------------------------- 482 /** 483 * <p>Extracts the initial letters from each word in the String.</p> 484 * 485 * <p>The first letter of the string and all first letters after 486 * whitespace are returned as a new string. 487 * Their case is not changed.</p> 488 * 489 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 490 * A <code>null</code> input String returns <code>null</code>.</p> 491 * 492 * <pre> 493 * WordUtils.initials(null) = null 494 * WordUtils.initials("") = "" 495 * WordUtils.initials("Ben John Lee") = "BJL" 496 * WordUtils.initials("Ben J.Lee") = "BJ" 497 * </pre> 498 * 499 * @param str the String to get initials from, may be null 500 * @return String of initial letters, <code>null</code> if null String input 501 * @see #initials(String,char[]) 502 * @since 2.2 503 */ 504 public static String initials(final String str) { 505 return initials(str, null); 506 } 507 508 /** 509 * <p>Extracts the initial letters from each word in the String.</p> 510 * 511 * <p>The first letter of the string and all first letters after the 512 * defined delimiters are returned as a new string. 513 * Their case is not changed.</p> 514 * 515 * <p>If the delimiters array is null, then Whitespace is used. 516 * Whitespace is defined by {@link Character#isWhitespace(char)}. 517 * A <code>null</code> input String returns <code>null</code>. 518 * An empty delimiter array returns an empty String.</p> 519 * 520 * <pre> 521 * WordUtils.initials(null, *) = null 522 * WordUtils.initials("", *) = "" 523 * WordUtils.initials("Ben John Lee", null) = "BJL" 524 * WordUtils.initials("Ben J.Lee", null) = "BJ" 525 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 526 * WordUtils.initials(*, new char[0]) = "" 527 * </pre> 528 * 529 * @param str the String to get initials from, may be null 530 * @param delimiters set of characters to determine words, null means whitespace 531 * @return String of initial letters, <code>null</code> if null String input 532 * @see #initials(String) 533 * @since 2.2 534 */ 535 public static String initials(final String str, final char... delimiters) { 536 if (StringUtils.isEmpty(str)) { 537 return str; 538 } 539 if (delimiters != null && delimiters.length == 0) { 540 return ""; 541 } 542 final int strLen = str.length(); 543 final char[] buf = new char[strLen / 2 + 1]; 544 int count = 0; 545 boolean lastWasGap = true; 546 for (int i = 0; i < strLen; i++) { 547 final char ch = str.charAt(i); 548 549 if (isDelimiter(ch, delimiters)) { 550 lastWasGap = true; 551 } else if (lastWasGap) { 552 buf[count++] = ch; 553 lastWasGap = false; 554 } else { 555 continue; // ignore ch 556 } 557 } 558 return new String(buf, 0, count); 559 } 560 561 //----------------------------------------------------------------------- 562 /** 563 * Is the character a delimiter. 564 * 565 * @param ch the character to check 566 * @param delimiters the delimiters 567 * @return true if it is a delimiter 568 */ 569 private static boolean isDelimiter(final char ch, final char[] delimiters) { 570 if (delimiters == null) { 571 return Character.isWhitespace(ch); 572 } 573 for (final char delimiter : delimiters) { 574 if (ch == delimiter) { 575 return true; 576 } 577 } 578 return false; 579 } 580 581}