001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3.text; 018 019import org.apache.commons.lang3.StringUtils; 020import org.apache.commons.lang3.SystemUtils; 021 022/** 023 * <p>Operations on Strings that contain words.</p> 024 * 025 * <p>This class tries to handle <code>null</code> input gracefully. 026 * An exception will not be thrown for a <code>null</code> input. 027 * Each method documents its behaviour in more detail.</p> 028 * 029 * @since 2.0 030 * @version $Id: WordUtils.java 1436770 2013-01-22 07:09:45Z ggregory $ 031 */ 032public class WordUtils { 033 034 /** 035 * <p><code>WordUtils</code> instances should NOT be constructed in 036 * standard programming. Instead, the class should be used as 037 * <code>WordUtils.wrap("foo bar", 20);</code>.</p> 038 * 039 * <p>This constructor is public to permit tools that require a JavaBean 040 * instance to operate.</p> 041 */ 042 public WordUtils() { 043 super(); 044 } 045 046 // Wrapping 047 //-------------------------------------------------------------------------- 048 /** 049 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 050 * 051 * <p>New lines will be separated by the system property line separator. 052 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 053 * 054 * <p>Leading spaces on a new line are stripped. 055 * Trailing spaces are not stripped.</p> 056 * 057 * <pre> 058 * WordUtils.wrap(null, *) = null 059 * WordUtils.wrap("", *) = "" 060 * </pre> 061 * 062 * @param str the String to be word wrapped, may be null 063 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 064 * @return a line with newlines inserted, <code>null</code> if null input 065 */ 066 public static String wrap(final String str, final int wrapLength) { 067 return wrap(str, wrapLength, null, false); 068 } 069 070 /** 071 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p> 072 * 073 * <p>Leading spaces on a new line are stripped. 074 * Trailing spaces are not stripped.</p> 075 * 076 * <pre> 077 * WordUtils.wrap(null, *, *, *) = null 078 * WordUtils.wrap("", *, *, *) = "" 079 * </pre> 080 * 081 * @param str the String to be word wrapped, may be null 082 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 083 * @param newLineStr the string to insert for a new line, 084 * <code>null</code> uses the system property line separator 085 * @param wrapLongWords true if long words (such as URLs) should be wrapped 086 * @return a line with newlines inserted, <code>null</code> if null input 087 */ 088 public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) { 089 if (str == null) { 090 return null; 091 } 092 if (newLineStr == null) { 093 newLineStr = SystemUtils.LINE_SEPARATOR; 094 } 095 if (wrapLength < 1) { 096 wrapLength = 1; 097 } 098 final int inputLineLength = str.length(); 099 int offset = 0; 100 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 101 102 while (inputLineLength - offset > wrapLength) { 103 if (str.charAt(offset) == ' ') { 104 offset++; 105 continue; 106 } 107 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset); 108 109 if (spaceToWrapAt >= offset) { 110 // normal case 111 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 112 wrappedLine.append(newLineStr); 113 offset = spaceToWrapAt + 1; 114 115 } else { 116 // really long word or URL 117 if (wrapLongWords) { 118 // wrap really long word one line at a time 119 wrappedLine.append(str.substring(offset, wrapLength + offset)); 120 wrappedLine.append(newLineStr); 121 offset += wrapLength; 122 } else { 123 // do not wrap really long word, just extend beyond limit 124 spaceToWrapAt = str.indexOf(' ', wrapLength + offset); 125 if (spaceToWrapAt >= 0) { 126 wrappedLine.append(str.substring(offset, spaceToWrapAt)); 127 wrappedLine.append(newLineStr); 128 offset = spaceToWrapAt + 1; 129 } else { 130 wrappedLine.append(str.substring(offset)); 131 offset = inputLineLength; 132 } 133 } 134 } 135 } 136 137 // Whatever is left in line is short enough to just pass through 138 wrappedLine.append(str.substring(offset)); 139 140 return wrappedLine.toString(); 141 } 142 143 // Capitalizing 144 //----------------------------------------------------------------------- 145 /** 146 * <p>Capitalizes all the whitespace separated words in a String. 147 * Only the first letter of each word is changed. To convert the 148 * rest of each word to lowercase at the same time, 149 * use {@link #capitalizeFully(String)}.</p> 150 * 151 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 152 * A <code>null</code> input String returns <code>null</code>. 153 * Capitalization uses the Unicode title case, normally equivalent to 154 * upper case.</p> 155 * 156 * <pre> 157 * WordUtils.capitalize(null) = null 158 * WordUtils.capitalize("") = "" 159 * WordUtils.capitalize("i am FINE") = "I Am FINE" 160 * </pre> 161 * 162 * @param str the String to capitalize, may be null 163 * @return capitalized String, <code>null</code> if null String input 164 * @see #uncapitalize(String) 165 * @see #capitalizeFully(String) 166 */ 167 public static String capitalize(final String str) { 168 return capitalize(str, null); 169 } 170 171 /** 172 * <p>Capitalizes all the delimiter separated words in a String. 173 * Only the first letter of each word is changed. To convert the 174 * rest of each word to lowercase at the same time, 175 * use {@link #capitalizeFully(String, char[])}.</p> 176 * 177 * <p>The delimiters represent a set of characters understood to separate words. 178 * The first string character and the first non-delimiter character after a 179 * delimiter will be capitalized. </p> 180 * 181 * <p>A <code>null</code> input String returns <code>null</code>. 182 * Capitalization uses the Unicode title case, normally equivalent to 183 * upper case.</p> 184 * 185 * <pre> 186 * WordUtils.capitalize(null, *) = null 187 * WordUtils.capitalize("", *) = "" 188 * WordUtils.capitalize(*, new char[0]) = * 189 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 190 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 191 * </pre> 192 * 193 * @param str the String to capitalize, may be null 194 * @param delimiters set of characters to determine capitalization, null means whitespace 195 * @return capitalized String, <code>null</code> if null String input 196 * @see #uncapitalize(String) 197 * @see #capitalizeFully(String) 198 * @since 2.1 199 */ 200 public static String capitalize(final String str, final char... delimiters) { 201 final int delimLen = delimiters == null ? -1 : delimiters.length; 202 if (StringUtils.isEmpty(str) || delimLen == 0) { 203 return str; 204 } 205 final char[] buffer = str.toCharArray(); 206 boolean capitalizeNext = true; 207 for (int i = 0; i < buffer.length; i++) { 208 final char ch = buffer[i]; 209 if (isDelimiter(ch, delimiters)) { 210 capitalizeNext = true; 211 } else if (capitalizeNext) { 212 buffer[i] = Character.toTitleCase(ch); 213 capitalizeNext = false; 214 } 215 } 216 return new String(buffer); 217 } 218 219 //----------------------------------------------------------------------- 220 /** 221 * <p>Converts all the whitespace separated words in a String into capitalized words, 222 * that is each word is made up of a titlecase character and then a series of 223 * lowercase characters. </p> 224 * 225 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 226 * A <code>null</code> input String returns <code>null</code>. 227 * Capitalization uses the Unicode title case, normally equivalent to 228 * upper case.</p> 229 * 230 * <pre> 231 * WordUtils.capitalizeFully(null) = null 232 * WordUtils.capitalizeFully("") = "" 233 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 234 * </pre> 235 * 236 * @param str the String to capitalize, may be null 237 * @return capitalized String, <code>null</code> if null String input 238 */ 239 public static String capitalizeFully(final String str) { 240 return capitalizeFully(str, null); 241 } 242 243 /** 244 * <p>Converts all the delimiter separated words in a String into capitalized words, 245 * that is each word is made up of a titlecase character and then a series of 246 * lowercase characters. </p> 247 * 248 * <p>The delimiters represent a set of characters understood to separate words. 249 * The first string character and the first non-delimiter character after a 250 * delimiter will be capitalized. </p> 251 * 252 * <p>A <code>null</code> input String returns <code>null</code>. 253 * Capitalization uses the Unicode title case, normally equivalent to 254 * upper case.</p> 255 * 256 * <pre> 257 * WordUtils.capitalizeFully(null, *) = null 258 * WordUtils.capitalizeFully("", *) = "" 259 * WordUtils.capitalizeFully(*, null) = * 260 * WordUtils.capitalizeFully(*, new char[0]) = * 261 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 262 * </pre> 263 * 264 * @param str the String to capitalize, may be null 265 * @param delimiters set of characters to determine capitalization, null means whitespace 266 * @return capitalized String, <code>null</code> if null String input 267 * @since 2.1 268 */ 269 public static String capitalizeFully(String str, final char... delimiters) { 270 final int delimLen = delimiters == null ? -1 : delimiters.length; 271 if (StringUtils.isEmpty(str) || delimLen == 0) { 272 return str; 273 } 274 str = str.toLowerCase(); 275 return capitalize(str, delimiters); 276 } 277 278 //----------------------------------------------------------------------- 279 /** 280 * <p>Uncapitalizes all the whitespace separated words in a String. 281 * Only the first letter of each word is changed.</p> 282 * 283 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 284 * A <code>null</code> input String returns <code>null</code>.</p> 285 * 286 * <pre> 287 * WordUtils.uncapitalize(null) = null 288 * WordUtils.uncapitalize("") = "" 289 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 290 * </pre> 291 * 292 * @param str the String to uncapitalize, may be null 293 * @return uncapitalized String, <code>null</code> if null String input 294 * @see #capitalize(String) 295 */ 296 public static String uncapitalize(final String str) { 297 return uncapitalize(str, null); 298 } 299 300 /** 301 * <p>Uncapitalizes all the whitespace separated words in a String. 302 * Only the first letter of each word is changed.</p> 303 * 304 * <p>The delimiters represent a set of characters understood to separate words. 305 * The first string character and the first non-delimiter character after a 306 * delimiter will be uncapitalized. </p> 307 * 308 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 309 * A <code>null</code> input String returns <code>null</code>.</p> 310 * 311 * <pre> 312 * WordUtils.uncapitalize(null, *) = null 313 * WordUtils.uncapitalize("", *) = "" 314 * WordUtils.uncapitalize(*, null) = * 315 * WordUtils.uncapitalize(*, new char[0]) = * 316 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 317 * </pre> 318 * 319 * @param str the String to uncapitalize, may be null 320 * @param delimiters set of characters to determine uncapitalization, null means whitespace 321 * @return uncapitalized String, <code>null</code> if null String input 322 * @see #capitalize(String) 323 * @since 2.1 324 */ 325 public static String uncapitalize(final String str, final char... delimiters) { 326 final int delimLen = delimiters == null ? -1 : delimiters.length; 327 if (StringUtils.isEmpty(str) || delimLen == 0) { 328 return str; 329 } 330 final char[] buffer = str.toCharArray(); 331 boolean uncapitalizeNext = true; 332 for (int i = 0; i < buffer.length; i++) { 333 final char ch = buffer[i]; 334 if (isDelimiter(ch, delimiters)) { 335 uncapitalizeNext = true; 336 } else if (uncapitalizeNext) { 337 buffer[i] = Character.toLowerCase(ch); 338 uncapitalizeNext = false; 339 } 340 } 341 return new String(buffer); 342 } 343 344 //----------------------------------------------------------------------- 345 /** 346 * <p>Swaps the case of a String using a word based algorithm.</p> 347 * 348 * <ul> 349 * <li>Upper case character converts to Lower case</li> 350 * <li>Title case character converts to Lower case</li> 351 * <li>Lower case character after Whitespace or at start converts to Title case</li> 352 * <li>Other Lower case character converts to Upper case</li> 353 * </ul> 354 * 355 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 356 * A <code>null</code> input String returns <code>null</code>.</p> 357 * 358 * <pre> 359 * StringUtils.swapCase(null) = null 360 * StringUtils.swapCase("") = "" 361 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 362 * </pre> 363 * 364 * @param str the String to swap case, may be null 365 * @return the changed String, <code>null</code> if null String input 366 */ 367 public static String swapCase(final String str) { 368 if (StringUtils.isEmpty(str)) { 369 return str; 370 } 371 final char[] buffer = str.toCharArray(); 372 373 boolean whitespace = true; 374 375 for (int i = 0; i < buffer.length; i++) { 376 final char ch = buffer[i]; 377 if (Character.isUpperCase(ch)) { 378 buffer[i] = Character.toLowerCase(ch); 379 whitespace = false; 380 } else if (Character.isTitleCase(ch)) { 381 buffer[i] = Character.toLowerCase(ch); 382 whitespace = false; 383 } else if (Character.isLowerCase(ch)) { 384 if (whitespace) { 385 buffer[i] = Character.toTitleCase(ch); 386 whitespace = false; 387 } else { 388 buffer[i] = Character.toUpperCase(ch); 389 } 390 } else { 391 whitespace = Character.isWhitespace(ch); 392 } 393 } 394 return new String(buffer); 395 } 396 397 //----------------------------------------------------------------------- 398 /** 399 * <p>Extracts the initial letters from each word in the String.</p> 400 * 401 * <p>The first letter of the string and all first letters after 402 * whitespace are returned as a new string. 403 * Their case is not changed.</p> 404 * 405 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 406 * A <code>null</code> input String returns <code>null</code>.</p> 407 * 408 * <pre> 409 * WordUtils.initials(null) = null 410 * WordUtils.initials("") = "" 411 * WordUtils.initials("Ben John Lee") = "BJL" 412 * WordUtils.initials("Ben J.Lee") = "BJ" 413 * </pre> 414 * 415 * @param str the String to get initials from, may be null 416 * @return String of initial letters, <code>null</code> if null String input 417 * @see #initials(String,char[]) 418 * @since 2.2 419 */ 420 public static String initials(final String str) { 421 return initials(str, null); 422 } 423 424 /** 425 * <p>Extracts the initial letters from each word in the String.</p> 426 * 427 * <p>The first letter of the string and all first letters after the 428 * defined delimiters are returned as a new string. 429 * Their case is not changed.</p> 430 * 431 * <p>If the delimiters array is null, then Whitespace is used. 432 * Whitespace is defined by {@link Character#isWhitespace(char)}. 433 * A <code>null</code> input String returns <code>null</code>. 434 * An empty delimiter array returns an empty String.</p> 435 * 436 * <pre> 437 * WordUtils.initials(null, *) = null 438 * WordUtils.initials("", *) = "" 439 * WordUtils.initials("Ben John Lee", null) = "BJL" 440 * WordUtils.initials("Ben J.Lee", null) = "BJ" 441 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 442 * WordUtils.initials(*, new char[0]) = "" 443 * </pre> 444 * 445 * @param str the String to get initials from, may be null 446 * @param delimiters set of characters to determine words, null means whitespace 447 * @return String of initial letters, <code>null</code> if null String input 448 * @see #initials(String) 449 * @since 2.2 450 */ 451 public static String initials(final String str, final char... delimiters) { 452 if (StringUtils.isEmpty(str)) { 453 return str; 454 } 455 if (delimiters != null && delimiters.length == 0) { 456 return ""; 457 } 458 final int strLen = str.length(); 459 final char[] buf = new char[strLen / 2 + 1]; 460 int count = 0; 461 boolean lastWasGap = true; 462 for (int i = 0; i < strLen; i++) { 463 final char ch = str.charAt(i); 464 465 if (isDelimiter(ch, delimiters)) { 466 lastWasGap = true; 467 } else if (lastWasGap) { 468 buf[count++] = ch; 469 lastWasGap = false; 470 } else { 471 continue; // ignore ch 472 } 473 } 474 return new String(buf, 0, count); 475 } 476 477 //----------------------------------------------------------------------- 478 /** 479 * Is the character a delimiter. 480 * 481 * @param ch the character to check 482 * @param delimiters the delimiters 483 * @return true if it is a delimiter 484 */ 485 private static boolean isDelimiter(final char ch, final char[] delimiters) { 486 if (delimiters == null) { 487 return Character.isWhitespace(ch); 488 } 489 for (final char delimiter : delimiters) { 490 if (ch == delimiter) { 491 return true; 492 } 493 } 494 return false; 495 } 496 497}