001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 /** 020 * <p>Operations on Strings that contain words.</p> 021 * 022 * <p>This class tries to handle <code>null</code> input gracefully. 023 * An exception will not be thrown for a <code>null</code> input. 024 * Each method documents its behaviour in more detail.</p> 025 * 026 * @author Apache Jakarta Velocity 027 * @author Apache Software Foundation 028 * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a> 029 * @author Gary Gregory 030 * @since 2.0 031 * @version $Id: WordUtils.java 905636 2010-02-02 14:03:32Z niallp $ 032 */ 033 public class WordUtils { 034 035 /** 036 * <p><code>WordUtils</code> instances should NOT be constructed in 037 * standard programming. Instead, the class should be used as 038 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 039 * 040 * <p>This constructor is public to permit tools that require a JavaBean 041 * instance to operate.</p> 042 */ 043 public WordUtils() { 044 super(); 045 } 046 047 // Wrapping 048 //-------------------------------------------------------------------------- 049 // /** 050 // * <p>Wraps a block of text to a specified line length using '\n' as 051 // * a newline.</p> 052 // * 053 // * <p>This method takes a block of text, which might have long lines in it 054 // * and wraps the long lines based on the supplied lineLength parameter.</p> 055 // * 056 // * <p>If a single word is longer than the line length (eg. a URL), it will 057 // * not be broken, and will display beyond the expected width.</p> 058 // * 059 // * <p>If there are tabs in inString, you are going to get results that are 060 // * a bit strange. Tabs are a single character but are displayed as 4 or 8 061 // * spaces. Remove the tabs.</p> 062 // * 063 // * @param str text which is in need of word-wrapping, may be null 064 // * @param lineLength the column to wrap the words at 065 // * @return the text with all the long lines word-wrapped 066 // * <code>null</code> if null string input 067 // */ 068 // public static String wrapText(String str, int lineLength) { 069 // return wrap(str, null, lineLength); 070 // } 071 072 // /** 073 // * <p>Wraps a block of text to a specified line length.</p> 074 // * 075 // * <p>This method takes a block of text, which might have long lines in it 076 // * and wraps the long lines based on the supplied lineLength parameter.</p> 077 // * 078 // * <p>If a single word is longer than the wrapColumn (eg. a URL), it will 079 // * not be broken, and will display beyond the expected width.</p> 080 // * 081 // * <p>If there are tabs in inString, you are going to get results that are 082 // * a bit strange. Tabs are a single character but are displayed as 4 or 8 083 // * spaces. Remove the tabs.</p> 084 // * 085 // * @param str text which is in need of word-wrapping, may be null 086 // * @param newLineChars the characters that define a newline, null treated as \n 087 // * @param lineLength the column to wrap the words at 088 // * @return the text with all the long lines word-wrapped 089 // * <code>null</code> if null string input 090 // */ 091 // public static String wrapText(String str, String newLineChars, int lineLength) { 092 // if (str == null) { 093 // return null; 094 // } 095 // if (newLineChars == null) { 096 // newLineChars = "\n"; 097 // } 098 // StringTokenizer lineTokenizer = new StringTokenizer(str, newLineChars, true); 099 // StringBuffer stringBuffer = new StringBuffer(); 100 // 101 // while (lineTokenizer.hasMoreTokens()) { 102 // try { 103 // String nextLine = lineTokenizer.nextToken(); 104 // 105 // if (nextLine.length() > lineLength) { 106 // // This line is long enough to be wrapped. 107 // nextLine = wrapLine(nextLine, null, lineLength, false); 108 // } 109 // 110 // stringBuffer.append(nextLine); 111 // 112 // } catch (NoSuchElementException nsee) { 113 // // thrown by nextToken(), but I don't know why it would 114 // break; 115 // } 116 // } 117 // 118 // return stringBuffer.toString(); 119 // } 120 121 // Wrapping 122 //----------------------------------------------------------------------- 123 /** 124 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 125 * 126 * <p>New lines will be separated by the system property line separator. 127 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 128 * 129 * <p>Leading spaces on a new line are stripped. 130 * Trailing spaces are not stripped.</p> 131 * 132 * <pre> 133 * WordUtils.wrap(null, *) = null 134 * WordUtils.wrap("", *) = "" 135 * </pre> 136 * 137 * @param str the String to be word wrapped, may be null 138 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 139 * @return a line with newlines inserted, <code>null</code> if null input 140 */ 141 public static String wrap(String str, int wrapLength) { 142 return wrap(str, wrapLength, null, false); 143 } 144 145 /** 146 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 147 * 148 * <p>Leading spaces on a new line are stripped. 149 * Trailing spaces are not stripped.</p> 150 * 151 * <pre> 152 * WordUtils.wrap(null, *, *, *) = null 153 * WordUtils.wrap("", *, *, *) = "" 154 * </pre> 155 * 156 * @param str the String to be word wrapped, may be null 157 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 158 * @param newLineStr the string to insert for a new line, 159 * <code>null</code> uses the system property line separator 160 * @param wrapLongWords true if long words (such as URLs) should be wrapped 161 * @return a line with newlines inserted, <code>null</code> if null input 162 */ 163 public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) { 164 if (str == null) { 165 return null; 166 } 167 if (newLineStr == null) { 168 newLineStr = SystemUtils.LINE_SEPARATOR; 169 } 170 if (wrapLength < 1) { 171 wrapLength = 1; 172 } 173 int inputLineLength = str.length(); 174 int offset = 0; 175 StringBuffer wrappedLine = new StringBuffer(inputLineLength + 32); 176 177 while ((inputLineLength - offset) > wrapLength) { 178 if (str.charAt(offset) == ' ') { 179 offset++; 180 continue; 181 } 182 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset); 183 184 if (spaceToWrapAt >= offset) { 185 // normal case 186 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 187 wrappedLine.append(newLineStr); 188 offset = spaceToWrapAt + 1; 189 190 } else { 191 // really long word or URL 192 if (wrapLongWords) { 193 // wrap really long word one line at a time 194 wrappedLine.append(str.substring(offset, wrapLength + offset)); 195 wrappedLine.append(newLineStr); 196 offset += wrapLength; 197 } else { 198 // do not wrap really long word, just extend beyond limit 199 spaceToWrapAt = str.indexOf(' ', wrapLength + offset); 200 if (spaceToWrapAt >= 0) { 201 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 202 wrappedLine.append(newLineStr); 203 offset = spaceToWrapAt + 1; 204 } else { 205 wrappedLine.append(str.substring(offset)); 206 offset = inputLineLength; 207 } 208 } 209 } 210 } 211 212 // Whatever is left in line is short enough to just pass through 213 wrappedLine.append(str.substring(offset)); 214 215 return wrappedLine.toString(); 216 } 217 218 // Capitalizing 219 //----------------------------------------------------------------------- 220 /** 221 * <p>Capitalizes all the whitespace separated words in a String. 222 * Only the first letter of each word is changed. To convert the 223 * rest of each word to lowercase at the same time, 224 * use {@link #capitalizeFully(String)}.</p> 225 * 226 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 227 * A <code>null</code> input String returns <code>null</code>. 228 * Capitalization uses the unicode title case, normally equivalent to 229 * upper case.</p> 230 * 231 * <pre> 232 * WordUtils.capitalize(null) = null 233 * WordUtils.capitalize("") = "" 234 * WordUtils.capitalize("i am FINE") = "I Am FINE" 235 * </pre> 236 * 237 * @param str the String to capitalize, may be null 238 * @return capitalized String, <code>null</code> if null String input 239 * @see #uncapitalize(String) 240 * @see #capitalizeFully(String) 241 */ 242 public static String capitalize(String str) { 243 return capitalize(str, null); 244 } 245 246 /** 247 * <p>Capitalizes all the delimiter separated words in a String. 248 * Only the first letter of each word is changed. To convert the 249 * rest of each word to lowercase at the same time, 250 * use {@link #capitalizeFully(String, char[])}.</p> 251 * 252 * <p>The delimiters represent a set of characters understood to separate words. 253 * The first string character and the first non-delimiter character after a 254 * delimiter will be capitalized. </p> 255 * 256 * <p>A <code>null</code> input String returns <code>null</code>. 257 * Capitalization uses the unicode title case, normally equivalent to 258 * upper case.</p> 259 * 260 * <pre> 261 * WordUtils.capitalize(null, *) = null 262 * WordUtils.capitalize("", *) = "" 263 * WordUtils.capitalize(*, new char[0]) = * 264 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 265 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 266 * </pre> 267 * 268 * @param str the String to capitalize, may be null 269 * @param delimiters set of characters to determine capitalization, null means whitespace 270 * @return capitalized String, <code>null</code> if null String input 271 * @see #uncapitalize(String) 272 * @see #capitalizeFully(String) 273 * @since 2.1 274 */ 275 public static String capitalize(String str, char[] delimiters) { 276 int delimLen = (delimiters == null ? -1 : delimiters.length); 277 if (str == null || str.length() == 0 || delimLen == 0) { 278 return str; 279 } 280 int strLen = str.length(); 281 StringBuffer buffer = new StringBuffer(strLen); 282 boolean capitalizeNext = true; 283 for (int i = 0; i < strLen; i++) { 284 char ch = str.charAt(i); 285 286 if (isDelimiter(ch, delimiters)) { 287 buffer.append(ch); 288 capitalizeNext = true; 289 } else if (capitalizeNext) { 290 buffer.append(Character.toTitleCase(ch)); 291 capitalizeNext = false; 292 } else { 293 buffer.append(ch); 294 } 295 } 296 return buffer.toString(); 297 } 298 299 //----------------------------------------------------------------------- 300 /** 301 * <p>Converts all the whitespace separated words in a String into capitalized words, 302 * that is each word is made up of a titlecase character and then a series of 303 * lowercase characters. </p> 304 * 305 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 306 * A <code>null</code> input String returns <code>null</code>. 307 * Capitalization uses the unicode title case, normally equivalent to 308 * upper case.</p> 309 * 310 * <pre> 311 * WordUtils.capitalizeFully(null) = null 312 * WordUtils.capitalizeFully("") = "" 313 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 314 * </pre> 315 * 316 * @param str the String to capitalize, may be null 317 * @return capitalized String, <code>null</code> if null String input 318 */ 319 public static String capitalizeFully(String str) { 320 return capitalizeFully(str, null); 321 } 322 323 /** 324 * <p>Converts all the delimiter separated words in a String into capitalized words, 325 * that is each word is made up of a titlecase character and then a series of 326 * lowercase characters. </p> 327 * 328 * <p>The delimiters represent a set of characters understood to separate words. 329 * The first string character and the first non-delimiter character after a 330 * delimiter will be capitalized. </p> 331 * 332 * <p>A <code>null</code> input String returns <code>null</code>. 333 * Capitalization uses the unicode title case, normally equivalent to 334 * upper case.</p> 335 * 336 * <pre> 337 * WordUtils.capitalizeFully(null, *) = null 338 * WordUtils.capitalizeFully("", *) = "" 339 * WordUtils.capitalizeFully(*, null) = * 340 * WordUtils.capitalizeFully(*, new char[0]) = * 341 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 342 * </pre> 343 * 344 * @param str the String to capitalize, may be null 345 * @param delimiters set of characters to determine capitalization, null means whitespace 346 * @return capitalized String, <code>null</code> if null String input 347 * @since 2.1 348 */ 349 public static String capitalizeFully(String str, char[] delimiters) { 350 int delimLen = (delimiters == null ? -1 : delimiters.length); 351 if (str == null || str.length() == 0 || delimLen == 0) { 352 return str; 353 } 354 str = str.toLowerCase(); 355 return capitalize(str, delimiters); 356 } 357 358 //----------------------------------------------------------------------- 359 /** 360 * <p>Uncapitalizes all the whitespace separated words in a String. 361 * Only the first letter of each word is changed.</p> 362 * 363 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 364 * A <code>null</code> input String returns <code>null</code>.</p> 365 * 366 * <pre> 367 * WordUtils.uncapitalize(null) = null 368 * WordUtils.uncapitalize("") = "" 369 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 370 * </pre> 371 * 372 * @param str the String to uncapitalize, may be null 373 * @return uncapitalized String, <code>null</code> if null String input 374 * @see #capitalize(String) 375 */ 376 public static String uncapitalize(String str) { 377 return uncapitalize(str, null); 378 } 379 380 /** 381 * <p>Uncapitalizes all the whitespace separated words in a String. 382 * Only the first letter of each word is changed.</p> 383 * 384 * <p>The delimiters represent a set of characters understood to separate words. 385 * The first string character and the first non-delimiter character after a 386 * delimiter will be uncapitalized. </p> 387 * 388 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 389 * A <code>null</code> input String returns <code>null</code>.</p> 390 * 391 * <pre> 392 * WordUtils.uncapitalize(null, *) = null 393 * WordUtils.uncapitalize("", *) = "" 394 * WordUtils.uncapitalize(*, null) = * 395 * WordUtils.uncapitalize(*, new char[0]) = * 396 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 397 * </pre> 398 * 399 * @param str the String to uncapitalize, may be null 400 * @param delimiters set of characters to determine uncapitalization, null means whitespace 401 * @return uncapitalized String, <code>null</code> if null String input 402 * @see #capitalize(String) 403 * @since 2.1 404 */ 405 public static String uncapitalize(String str, char[] delimiters) { 406 int delimLen = (delimiters == null ? -1 : delimiters.length); 407 if (str == null || str.length() == 0 || delimLen == 0) { 408 return str; 409 } 410 int strLen = str.length(); 411 StringBuffer buffer = new StringBuffer(strLen); 412 boolean uncapitalizeNext = true; 413 for (int i = 0; i < strLen; i++) { 414 char ch = str.charAt(i); 415 416 if (isDelimiter(ch, delimiters)) { 417 buffer.append(ch); 418 uncapitalizeNext = true; 419 } else if (uncapitalizeNext) { 420 buffer.append(Character.toLowerCase(ch)); 421 uncapitalizeNext = false; 422 } else { 423 buffer.append(ch); 424 } 425 } 426 return buffer.toString(); 427 } 428 429 //----------------------------------------------------------------------- 430 /** 431 * <p>Swaps the case of a String using a word based algorithm.</p> 432 * 433 * <ul> 434 * <li>Upper case character converts to Lower case</li> 435 * <li>Title case character converts to Lower case</li> 436 * <li>Lower case character after Whitespace or at start converts to Title case</li> 437 * <li>Other Lower case character converts to Upper case</li> 438 * </ul> 439 * 440 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 441 * A <code>null</code> input String returns <code>null</code>.</p> 442 * 443 * <pre> 444 * StringUtils.swapCase(null) = null 445 * StringUtils.swapCase("") = "" 446 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 447 * </pre> 448 * 449 * @param str the String to swap case, may be null 450 * @return the changed String, <code>null</code> if null String input 451 */ 452 public static String swapCase(String str) { 453 int strLen; 454 if (str == null || (strLen = str.length()) == 0) { 455 return str; 456 } 457 StringBuffer buffer = new StringBuffer(strLen); 458 459 boolean whitespace = true; 460 char ch = 0; 461 char tmp = 0; 462 463 for (int i = 0; i < strLen; i++) { 464 ch = str.charAt(i); 465 if (Character.isUpperCase(ch)) { 466 tmp = Character.toLowerCase(ch); 467 } else if (Character.isTitleCase(ch)) { 468 tmp = Character.toLowerCase(ch); 469 } else if (Character.isLowerCase(ch)) { 470 if (whitespace) { 471 tmp = Character.toTitleCase(ch); 472 } else { 473 tmp = Character.toUpperCase(ch); 474 } 475 } else { 476 tmp = ch; 477 } 478 buffer.append(tmp); 479 whitespace = Character.isWhitespace(ch); 480 } 481 return buffer.toString(); 482 } 483 484 //----------------------------------------------------------------------- 485 /** 486 * <p>Extracts the initial letters from each word in the String.</p> 487 * 488 * <p>The first letter of the string and all first letters after 489 * whitespace are returned as a new string. 490 * Their case is not changed.</p> 491 * 492 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 493 * A <code>null</code> input String returns <code>null</code>.</p> 494 * 495 * <pre> 496 * WordUtils.initials(null) = null 497 * WordUtils.initials("") = "" 498 * WordUtils.initials("Ben John Lee") = "BJL" 499 * WordUtils.initials("Ben J.Lee") = "BJ" 500 * </pre> 501 * 502 * @param str the String to get initials from, may be null 503 * @return String of initial letters, <code>null</code> if null String input 504 * @see #initials(String,char[]) 505 * @since 2.2 506 */ 507 public static String initials(String str) { 508 return initials(str, null); 509 } 510 511 /** 512 * <p>Extracts the initial letters from each word in the String.</p> 513 * 514 * <p>The first letter of the string and all first letters after the 515 * defined delimiters are returned as a new string. 516 * Their case is not changed.</p> 517 * 518 * <p>If the delimiters array is null, then Whitespace is used. 519 * Whitespace is defined by {@link Character#isWhitespace(char)}. 520 * A <code>null</code> input String returns <code>null</code>. 521 * An empty delimiter array returns an empty String.</p> 522 * 523 * <pre> 524 * WordUtils.initials(null, *) = null 525 * WordUtils.initials("", *) = "" 526 * WordUtils.initials("Ben John Lee", null) = "BJL" 527 * WordUtils.initials("Ben J.Lee", null) = "BJ" 528 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 529 * WordUtils.initials(*, new char[0]) = "" 530 * </pre> 531 * 532 * @param str the String to get initials from, may be null 533 * @param delimiters set of characters to determine words, null means whitespace 534 * @return String of initial letters, <code>null</code> if null String input 535 * @see #initials(String) 536 * @since 2.2 537 */ 538 public static String initials(String str, char[] delimiters) { 539 if (str == null || str.length() == 0) { 540 return str; 541 } 542 if (delimiters != null && delimiters.length == 0) { 543 return ""; 544 } 545 int strLen = str.length(); 546 char[] buf = new char[strLen / 2 + 1]; 547 int count = 0; 548 boolean lastWasGap = true; 549 for (int i = 0; i < strLen; i++) { 550 char ch = str.charAt(i); 551 552 if (isDelimiter(ch, delimiters)) { 553 lastWasGap = true; 554 } else if (lastWasGap) { 555 buf[count++] = ch; 556 lastWasGap = false; 557 } else { 558 // ignore ch 559 } 560 } 561 return new String(buf, 0, count); 562 } 563 564 //----------------------------------------------------------------------- 565 /** 566 * Is the character a delimiter. 567 * 568 * @param ch the character to check 569 * @param delimiters the delimiters 570 * @return true if it is a delimiter 571 */ 572 private static boolean isDelimiter(char ch, char[] delimiters) { 573 if (delimiters == null) { 574 return Character.isWhitespace(ch); 575 } 576 for (int i = 0, isize = delimiters.length; i < isize; i++) { 577 if (ch == delimiters[i]) { 578 return true; 579 } 580 } 581 return false; 582 } 583 584 //----------------------------------------------------------------------- 585 /** 586 * Abbreviates a string nicely. 587 * 588 * This method searches for the first space after the lower limit and abbreviates 589 * the String there. It will also append any String passed as a parameter 590 * to the end of the String. The upper limit can be specified to forcibly 591 * abbreviate a String. 592 * 593 * @param str the string to be abbreviated. If null is passed, null is returned. 594 * If the empty String is passed, the empty string is returned. 595 * @param lower the lower limit. 596 * @param upper the upper limit; specify -1 if no limit is desired. 597 * If the upper limit is lower than the lower limit, it will be 598 * adjusted to be the same as the lower limit. 599 * @param appendToEnd String to be appended to the end of the abbreviated string. 600 * This is appended ONLY if the string was indeed abbreviated. 601 * The append does not count towards the lower or upper limits. 602 * @return the abbreviated String. 603 * @since 2.4 604 */ 605 public static String abbreviate(String str, int lower, int upper, String appendToEnd) { 606 // initial parameter checks 607 if (str == null) { 608 return null; 609 } 610 if (str.length() == 0) { 611 return StringUtils.EMPTY; 612 } 613 614 // if the lower value is greater than the length of the string, 615 // set to the length of the string 616 if (lower > str.length()) { 617 lower = str.length(); 618 } 619 // if the upper value is -1 (i.e. no limit) or is greater 620 // than the length of the string, set to the length of the string 621 if (upper == -1 || upper > str.length()) { 622 upper = str.length(); 623 } 624 // if upper is less than lower, raise it to lower 625 if (upper < lower) { 626 upper = lower; 627 } 628 629 StringBuffer result = new StringBuffer(); 630 int index = StringUtils.indexOf(str, " ", lower); 631 if (index == -1) { 632 result.append(str.substring(0, upper)); 633 // only if abbreviation has occured do we append the appendToEnd value 634 if (upper != str.length()) { 635 result.append(StringUtils.defaultString(appendToEnd)); 636 } 637 } else if (index > upper) { 638 result.append(str.substring(0, upper)); 639 result.append(StringUtils.defaultString(appendToEnd)); 640 } else { 641 result.append(str.substring(0, index)); 642 result.append(StringUtils.defaultString(appendToEnd)); 643 } 644 return result.toString(); 645 } 646 647 }