001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import org.apache.commons.lang3.StringUtils; 020import org.apache.commons.lang3.SystemUtils; 021 022/** 023 * <p>Operations on Strings that contain words.</p> 024 * 025 * <p>This class tries to handle <code>null</code> input gracefully. 026 * An exception will not be thrown for a <code>null</code> input. 027 * Each method documents its behaviour in more detail.</p> 028 * 029 * @since 2.0 030 * @version $Id: WordUtils.java 1586649 2014-04-11 13:28:30Z britter $ 031 */ 032public class WordUtils { 033 034 /** 035 * <p><code>WordUtils</code> instances should NOT be constructed in 036 * standard programming. Instead, the class should be used as 037 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 038 * 039 * <p>This constructor is public to permit tools that require a JavaBean 040 * instance to operate.</p> 041 */ 042 public WordUtils() { 043 super(); 044 } 045 046 // Wrapping 047 //-------------------------------------------------------------------------- 048 /** 049 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 050 * 051 * <p>New lines will be separated by the system property line separator. 052 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 053 * 054 * <p>Leading spaces on a new line are stripped. 055 * Trailing spaces are not stripped.</p> 056 * 057 * <table border="1" summary="Wrap Results"> 058 * <tr> 059 * <th>input</th> 060 * <th>wrapLength</th> 061 * <th>result</th> 062 * </tr> 063 * <tr> 064 * <td>null</td> 065 * <td>*</td> 066 * <td>null</td> 067 * </tr> 068 * <tr> 069 * <td>""</td> 070 * <td>*</td> 071 * <td>""</td> 072 * </tr> 073 * <tr> 074 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 075 * <td>20</td> 076 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 077 * </tr> 078 * <tr> 079 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 080 * <td>20</td> 081 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 082 * </tr> 083 * <tr> 084 * <td>"Click here, http://commons.apache.org, to jump to the commons website"</td> 085 * <td>20</td> 086 * <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td> 087 * </tr> 088 * </table> 089 * 090 * (assuming that '\n' is the systems line separator) 091 * 092 * @param str the String to be word wrapped, may be null 093 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 094 * @return a line with newlines inserted, <code>null</code> if null input 095 */ 096 public static String wrap(final String str, final int wrapLength) { 097 return wrap(str, wrapLength, null, false); 098 } 099 100 /** 101 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 102 * 103 * <p>Leading spaces on a new line are stripped. 104 * Trailing spaces are not stripped.</p> 105 * 106 * <table border="1" summary="Wrap Results"> 107 * <tr> 108 * <th>input</th> 109 * <th>wrapLenght</th> 110 * <th>newLineString</th> 111 * <th>wrapLongWords</th> 112 * <th>result</th> 113 * </tr> 114 * <tr> 115 * <td>null</td> 116 * <td>*</td> 117 * <td>*</td> 118 * <td>true/false</td> 119 * <td>null</td> 120 * </tr> 121 * <tr> 122 * <td>""</td> 123 * <td>*</td> 124 * <td>*</td> 125 * <td>true/false</td> 126 * <td>""</td> 127 * </tr> 128 * <tr> 129 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 130 * <td>20</td> 131 * <td>"\n"</td> 132 * <td>true/false</td> 133 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 134 * </tr> 135 * <tr> 136 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 137 * <td>20</td> 138 * <td>"<br />"</td> 139 * <td>true/false</td> 140 * <td>"Here is one line of<br />text that is going<br />to be wrapped after<br />20 columns."</td> 141 * </tr> 142 * <tr> 143 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 144 * <td>20</td> 145 * <td>null</td> 146 * <td>true/false</td> 147 * <td>"Here is one line of" + systemNewLine + "text that is going" + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 148 * </tr> 149 * <tr> 150 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 151 * <td>20</td> 152 * <td>"\n"</td> 153 * <td>false</td> 154 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td> 155 * </tr> 156 * <tr> 157 * <td>"Click here to jump to the commons website - http://commons.apache.org"</td> 158 * <td>20</td> 159 * <td>"\n"</td> 160 * <td>true</td> 161 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 162 * </tr> 163 * </table> 164 * 165 * @param str the String to be word wrapped, may be null 166 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 167 * @param newLineStr the string to insert for a new line, 168 * <code>null</code> uses the system property line separator 169 * @param wrapLongWords true if long words (such as URLs) should be wrapped 170 * @return a line with newlines inserted, <code>null</code> if null input 171 */ 172 public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) { 173 if (str == null) { 174 return null; 175 } 176 if (newLineStr == null) { 177 newLineStr = SystemUtils.LINE_SEPARATOR; 178 } 179 if (wrapLength < 1) { 180 wrapLength = 1; 181 } 182 final int inputLineLength = str.length(); 183 int offset = 0; 184 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 185 186 while (offset < inputLineLength) { 187 if (str.charAt(offset) == ' ') { 188 offset++; 189 continue; 190 } 191 // only last line without leading spaces is left 192 if(inputLineLength - offset <= wrapLength) { 193 break; 194 } 195 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset); 196 197 if (spaceToWrapAt >= offset) { 198 // normal case 199 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 200 wrappedLine.append(newLineStr); 201 offset = spaceToWrapAt + 1; 202 203 } else { 204 // really long word or URL 205 if (wrapLongWords) { 206 // wrap really long word one line at a time 207 wrappedLine.append(str.substring(offset, wrapLength + offset)); 208 wrappedLine.append(newLineStr); 209 offset += wrapLength; 210 } else { 211 // do not wrap really long word, just extend beyond limit 212 spaceToWrapAt = str.indexOf(' ', wrapLength + offset); 213 if (spaceToWrapAt >= 0) { 214 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 215 wrappedLine.append(newLineStr); 216 offset = spaceToWrapAt + 1; 217 } else { 218 wrappedLine.append(str.substring(offset)); 219 offset = inputLineLength; 220 } 221 } 222 } 223 } 224 225 // Whatever is left in line is short enough to just pass through 226 wrappedLine.append(str.substring(offset)); 227 228 return wrappedLine.toString(); 229 } 230 231 // Capitalizing 232 //----------------------------------------------------------------------- 233 /** 234 * <p>Capitalizes all the whitespace separated words in a String. 235 * Only the first letter of each word is changed. To convert the 236 * rest of each word to lowercase at the same time, 237 * use {@link #capitalizeFully(String)}.</p> 238 * 239 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 240 * A <code>null</code> input String returns <code>null</code>. 241 * Capitalization uses the Unicode title case, normally equivalent to 242 * upper case.</p> 243 * 244 * <pre> 245 * WordUtils.capitalize(null) = null 246 * WordUtils.capitalize("") = "" 247 * WordUtils.capitalize("i am FINE") = "I Am FINE" 248 * </pre> 249 * 250 * @param str the String to capitalize, may be null 251 * @return capitalized String, <code>null</code> if null String input 252 * @see #uncapitalize(String) 253 * @see #capitalizeFully(String) 254 */ 255 public static String capitalize(final String str) { 256 return capitalize(str, null); 257 } 258 259 /** 260 * <p>Capitalizes all the delimiter separated words in a String. 261 * Only the first letter of each word is changed. To convert the 262 * rest of each word to lowercase at the same time, 263 * use {@link #capitalizeFully(String, char[])}.</p> 264 * 265 * <p>The delimiters represent a set of characters understood to separate words. 266 * The first string character and the first non-delimiter character after a 267 * delimiter will be capitalized. </p> 268 * 269 * <p>A <code>null</code> input String returns <code>null</code>. 270 * Capitalization uses the Unicode title case, normally equivalent to 271 * upper case.</p> 272 * 273 * <pre> 274 * WordUtils.capitalize(null, *) = null 275 * WordUtils.capitalize("", *) = "" 276 * WordUtils.capitalize(*, new char[0]) = * 277 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 278 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 279 * </pre> 280 * 281 * @param str the String to capitalize, may be null 282 * @param delimiters set of characters to determine capitalization, null means whitespace 283 * @return capitalized String, <code>null</code> if null String input 284 * @see #uncapitalize(String) 285 * @see #capitalizeFully(String) 286 * @since 2.1 287 */ 288 public static String capitalize(final String str, final char... delimiters) { 289 final int delimLen = delimiters == null ? -1 : delimiters.length; 290 if (StringUtils.isEmpty(str) || delimLen == 0) { 291 return str; 292 } 293 final char[] buffer = str.toCharArray(); 294 boolean capitalizeNext = true; 295 for (int i = 0; i < buffer.length; i++) { 296 final char ch = buffer[i]; 297 if (isDelimiter(ch, delimiters)) { 298 capitalizeNext = true; 299 } else if (capitalizeNext) { 300 buffer[i] = Character.toTitleCase(ch); 301 capitalizeNext = false; 302 } 303 } 304 return new String(buffer); 305 } 306 307 //----------------------------------------------------------------------- 308 /** 309 * <p>Converts all the whitespace separated words in a String into capitalized words, 310 * that is each word is made up of a titlecase character and then a series of 311 * lowercase characters. </p> 312 * 313 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 314 * A <code>null</code> input String returns <code>null</code>. 315 * Capitalization uses the Unicode title case, normally equivalent to 316 * upper case.</p> 317 * 318 * <pre> 319 * WordUtils.capitalizeFully(null) = null 320 * WordUtils.capitalizeFully("") = "" 321 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 322 * </pre> 323 * 324 * @param str the String to capitalize, may be null 325 * @return capitalized String, <code>null</code> if null String input 326 */ 327 public static String capitalizeFully(final String str) { 328 return capitalizeFully(str, null); 329 } 330 331 /** 332 * <p>Converts all the delimiter separated words in a String into capitalized words, 333 * that is each word is made up of a titlecase character and then a series of 334 * lowercase characters. </p> 335 * 336 * <p>The delimiters represent a set of characters understood to separate words. 337 * The first string character and the first non-delimiter character after a 338 * delimiter will be capitalized. </p> 339 * 340 * <p>A <code>null</code> input String returns <code>null</code>. 341 * Capitalization uses the Unicode title case, normally equivalent to 342 * upper case.</p> 343 * 344 * <pre> 345 * WordUtils.capitalizeFully(null, *) = null 346 * WordUtils.capitalizeFully("", *) = "" 347 * WordUtils.capitalizeFully(*, null) = * 348 * WordUtils.capitalizeFully(*, new char[0]) = * 349 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 350 * </pre> 351 * 352 * @param str the String to capitalize, may be null 353 * @param delimiters set of characters to determine capitalization, null means whitespace 354 * @return capitalized String, <code>null</code> if null String input 355 * @since 2.1 356 */ 357 public static String capitalizeFully(String str, final char... delimiters) { 358 final int delimLen = delimiters == null ? -1 : delimiters.length; 359 if (StringUtils.isEmpty(str) || delimLen == 0) { 360 return str; 361 } 362 str = str.toLowerCase(); 363 return capitalize(str, delimiters); 364 } 365 366 //----------------------------------------------------------------------- 367 /** 368 * <p>Uncapitalizes all the whitespace separated words in a String. 369 * Only the first letter of each word is changed.</p> 370 * 371 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 372 * A <code>null</code> input String returns <code>null</code>.</p> 373 * 374 * <pre> 375 * WordUtils.uncapitalize(null) = null 376 * WordUtils.uncapitalize("") = "" 377 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 378 * </pre> 379 * 380 * @param str the String to uncapitalize, may be null 381 * @return uncapitalized String, <code>null</code> if null String input 382 * @see #capitalize(String) 383 */ 384 public static String uncapitalize(final String str) { 385 return uncapitalize(str, null); 386 } 387 388 /** 389 * <p>Uncapitalizes all the whitespace separated words in a String. 390 * Only the first letter of each word is changed.</p> 391 * 392 * <p>The delimiters represent a set of characters understood to separate words. 393 * The first string character and the first non-delimiter character after a 394 * delimiter will be uncapitalized. </p> 395 * 396 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 397 * A <code>null</code> input String returns <code>null</code>.</p> 398 * 399 * <pre> 400 * WordUtils.uncapitalize(null, *) = null 401 * WordUtils.uncapitalize("", *) = "" 402 * WordUtils.uncapitalize(*, null) = * 403 * WordUtils.uncapitalize(*, new char[0]) = * 404 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 405 * </pre> 406 * 407 * @param str the String to uncapitalize, may be null 408 * @param delimiters set of characters to determine uncapitalization, null means whitespace 409 * @return uncapitalized String, <code>null</code> if null String input 410 * @see #capitalize(String) 411 * @since 2.1 412 */ 413 public static String uncapitalize(final String str, final char... delimiters) { 414 final int delimLen = delimiters == null ? -1 : delimiters.length; 415 if (StringUtils.isEmpty(str) || delimLen == 0) { 416 return str; 417 } 418 final char[] buffer = str.toCharArray(); 419 boolean uncapitalizeNext = true; 420 for (int i = 0; i < buffer.length; i++) { 421 final char ch = buffer[i]; 422 if (isDelimiter(ch, delimiters)) { 423 uncapitalizeNext = true; 424 } else if (uncapitalizeNext) { 425 buffer[i] = Character.toLowerCase(ch); 426 uncapitalizeNext = false; 427 } 428 } 429 return new String(buffer); 430 } 431 432 //----------------------------------------------------------------------- 433 /** 434 * <p>Swaps the case of a String using a word based algorithm.</p> 435 * 436 * <ul> 437 * <li>Upper case character converts to Lower case</li> 438 * <li>Title case character converts to Lower case</li> 439 * <li>Lower case character after Whitespace or at start converts to Title case</li> 440 * <li>Other Lower case character converts to Upper case</li> 441 * </ul> 442 * 443 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 444 * A <code>null</code> input String returns <code>null</code>.</p> 445 * 446 * <pre> 447 * StringUtils.swapCase(null) = null 448 * StringUtils.swapCase("") = "" 449 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 450 * </pre> 451 * 452 * @param str the String to swap case, may be null 453 * @return the changed String, <code>null</code> if null String input 454 */ 455 public static String swapCase(final String str) { 456 if (StringUtils.isEmpty(str)) { 457 return str; 458 } 459 final char[] buffer = str.toCharArray(); 460 461 boolean whitespace = true; 462 463 for (int i = 0; i < buffer.length; i++) { 464 final char ch = buffer[i]; 465 if (Character.isUpperCase(ch)) { 466 buffer[i] = Character.toLowerCase(ch); 467 whitespace = false; 468 } else if (Character.isTitleCase(ch)) { 469 buffer[i] = Character.toLowerCase(ch); 470 whitespace = false; 471 } else if (Character.isLowerCase(ch)) { 472 if (whitespace) { 473 buffer[i] = Character.toTitleCase(ch); 474 whitespace = false; 475 } else { 476 buffer[i] = Character.toUpperCase(ch); 477 } 478 } else { 479 whitespace = Character.isWhitespace(ch); 480 } 481 } 482 return new String(buffer); 483 } 484 485 //----------------------------------------------------------------------- 486 /** 487 * <p>Extracts the initial letters from each word in the String.</p> 488 * 489 * <p>The first letter of the string and all first letters after 490 * whitespace are returned as a new string. 491 * Their case is not changed.</p> 492 * 493 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 494 * A <code>null</code> input String returns <code>null</code>.</p> 495 * 496 * <pre> 497 * WordUtils.initials(null) = null 498 * WordUtils.initials("") = "" 499 * WordUtils.initials("Ben John Lee") = "BJL" 500 * WordUtils.initials("Ben J.Lee") = "BJ" 501 * </pre> 502 * 503 * @param str the String to get initials from, may be null 504 * @return String of initial letters, <code>null</code> if null String input 505 * @see #initials(String,char[]) 506 * @since 2.2 507 */ 508 public static String initials(final String str) { 509 return initials(str, null); 510 } 511 512 /** 513 * <p>Extracts the initial letters from each word in the String.</p> 514 * 515 * <p>The first letter of the string and all first letters after the 516 * defined delimiters are returned as a new string. 517 * Their case is not changed.</p> 518 * 519 * <p>If the delimiters array is null, then Whitespace is used. 520 * Whitespace is defined by {@link Character#isWhitespace(char)}. 521 * A <code>null</code> input String returns <code>null</code>. 522 * An empty delimiter array returns an empty String.</p> 523 * 524 * <pre> 525 * WordUtils.initials(null, *) = null 526 * WordUtils.initials("", *) = "" 527 * WordUtils.initials("Ben John Lee", null) = "BJL" 528 * WordUtils.initials("Ben J.Lee", null) = "BJ" 529 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 530 * WordUtils.initials(*, new char[0]) = "" 531 * </pre> 532 * 533 * @param str the String to get initials from, may be null 534 * @param delimiters set of characters to determine words, null means whitespace 535 * @return String of initial letters, <code>null</code> if null String input 536 * @see #initials(String) 537 * @since 2.2 538 */ 539 public static String initials(final String str, final char... delimiters) { 540 if (StringUtils.isEmpty(str)) { 541 return str; 542 } 543 if (delimiters != null && delimiters.length == 0) { 544 return ""; 545 } 546 final int strLen = str.length(); 547 final char[] buf = new char[strLen / 2 + 1]; 548 int count = 0; 549 boolean lastWasGap = true; 550 for (int i = 0; i < strLen; i++) { 551 final char ch = str.charAt(i); 552 553 if (isDelimiter(ch, delimiters)) { 554 lastWasGap = true; 555 } else if (lastWasGap) { 556 buf[count++] = ch; 557 lastWasGap = false; 558 } else { 559 continue; // ignore ch 560 } 561 } 562 return new String(buf, 0, count); 563 } 564 565 //----------------------------------------------------------------------- 566 /** 567 * Is the character a delimiter. 568 * 569 * @param ch the character to check 570 * @param delimiters the delimiters 571 * @return true if it is a delimiter 572 */ 573 private static boolean isDelimiter(final char ch, final char[] delimiters) { 574 if (delimiters == null) { 575 return Character.isWhitespace(ch); 576 } 577 for (final char delimiter : delimiters) { 578 if (ch == delimiter) { 579 return true; 580 } 581 } 582 return false; 583 } 584 585}