001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.lang3.text;
018
019 import org.apache.commons.lang3.StringUtils;
020 import org.apache.commons.lang3.SystemUtils;
021
022 /**
023 * <p>Operations on Strings that contain words.</p>
024 *
025 * <p>This class tries to handle <code>null</code> input gracefully.
026 * An exception will not be thrown for a <code>null</code> input.
027 * Each method documents its behaviour in more detail.</p>
028 *
029 * @since 2.0
030 * @version $Id: WordUtils.java 1144929 2011-07-10 18:26:16Z ggregory $
031 */
032 public class WordUtils {
033
034 /**
035 * <p><code>WordUtils</code> instances should NOT be constructed in
036 * standard programming. Instead, the class should be used as
037 * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
038 *
039 * <p>This constructor is public to permit tools that require a JavaBean
040 * instance to operate.</p>
041 */
042 public WordUtils() {
043 super();
044 }
045
046 // Wrapping
047 //--------------------------------------------------------------------------
048 /**
049 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
050 *
051 * <p>New lines will be separated by the system property line separator.
052 * Very long words, such as URLs will <i>not</i> be wrapped.</p>
053 *
054 * <p>Leading spaces on a new line are stripped.
055 * Trailing spaces are not stripped.</p>
056 *
057 * <pre>
058 * WordUtils.wrap(null, *) = null
059 * WordUtils.wrap("", *) = ""
060 * </pre>
061 *
062 * @param str the String to be word wrapped, may be null
063 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
064 * @return a line with newlines inserted, <code>null</code> if null input
065 */
066 public static String wrap(String str, int wrapLength) {
067 return wrap(str, wrapLength, null, false);
068 }
069
070 /**
071 * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
072 *
073 * <p>Leading spaces on a new line are stripped.
074 * Trailing spaces are not stripped.</p>
075 *
076 * <pre>
077 * WordUtils.wrap(null, *, *, *) = null
078 * WordUtils.wrap("", *, *, *) = ""
079 * </pre>
080 *
081 * @param str the String to be word wrapped, may be null
082 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1
083 * @param newLineStr the string to insert for a new line,
084 * <code>null</code> uses the system property line separator
085 * @param wrapLongWords true if long words (such as URLs) should be wrapped
086 * @return a line with newlines inserted, <code>null</code> if null input
087 */
088 public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
089 if (str == null) {
090 return null;
091 }
092 if (newLineStr == null) {
093 newLineStr = SystemUtils.LINE_SEPARATOR;
094 }
095 if (wrapLength < 1) {
096 wrapLength = 1;
097 }
098 int inputLineLength = str.length();
099 int offset = 0;
100 StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
101
102 while ((inputLineLength - offset) > wrapLength) {
103 if (str.charAt(offset) == ' ') {
104 offset++;
105 continue;
106 }
107 int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
108
109 if (spaceToWrapAt >= offset) {
110 // normal case
111 wrappedLine.append(str.substring(offset, spaceToWrapAt));
112 wrappedLine.append(newLineStr);
113 offset = spaceToWrapAt + 1;
114
115 } else {
116 // really long word or URL
117 if (wrapLongWords) {
118 // wrap really long word one line at a time
119 wrappedLine.append(str.substring(offset, wrapLength + offset));
120 wrappedLine.append(newLineStr);
121 offset += wrapLength;
122 } else {
123 // do not wrap really long word, just extend beyond limit
124 spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
125 if (spaceToWrapAt >= 0) {
126 wrappedLine.append(str.substring(offset, spaceToWrapAt));
127 wrappedLine.append(newLineStr);
128 offset = spaceToWrapAt + 1;
129 } else {
130 wrappedLine.append(str.substring(offset));
131 offset = inputLineLength;
132 }
133 }
134 }
135 }
136
137 // Whatever is left in line is short enough to just pass through
138 wrappedLine.append(str.substring(offset));
139
140 return wrappedLine.toString();
141 }
142
143 // Capitalizing
144 //-----------------------------------------------------------------------
145 /**
146 * <p>Capitalizes all the whitespace separated words in a String.
147 * Only the first letter of each word is changed. To convert the
148 * rest of each word to lowercase at the same time,
149 * use {@link #capitalizeFully(String)}.</p>
150 *
151 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
152 * A <code>null</code> input String returns <code>null</code>.
153 * Capitalization uses the unicode title case, normally equivalent to
154 * upper case.</p>
155 *
156 * <pre>
157 * WordUtils.capitalize(null) = null
158 * WordUtils.capitalize("") = ""
159 * WordUtils.capitalize("i am FINE") = "I Am FINE"
160 * </pre>
161 *
162 * @param str the String to capitalize, may be null
163 * @return capitalized String, <code>null</code> if null String input
164 * @see #uncapitalize(String)
165 * @see #capitalizeFully(String)
166 */
167 public static String capitalize(String str) {
168 return capitalize(str, null);
169 }
170
171 /**
172 * <p>Capitalizes all the delimiter separated words in a String.
173 * Only the first letter of each word is changed. To convert the
174 * rest of each word to lowercase at the same time,
175 * use {@link #capitalizeFully(String, char[])}.</p>
176 *
177 * <p>The delimiters represent a set of characters understood to separate words.
178 * The first string character and the first non-delimiter character after a
179 * delimiter will be capitalized. </p>
180 *
181 * <p>A <code>null</code> input String returns <code>null</code>.
182 * Capitalization uses the unicode title case, normally equivalent to
183 * upper case.</p>
184 *
185 * <pre>
186 * WordUtils.capitalize(null, *) = null
187 * WordUtils.capitalize("", *) = ""
188 * WordUtils.capitalize(*, new char[0]) = *
189 * WordUtils.capitalize("i am fine", null) = "I Am Fine"
190 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
191 * </pre>
192 *
193 * @param str the String to capitalize, may be null
194 * @param delimiters set of characters to determine capitalization, null means whitespace
195 * @return capitalized String, <code>null</code> if null String input
196 * @see #uncapitalize(String)
197 * @see #capitalizeFully(String)
198 * @since 2.1
199 */
200 public static String capitalize(String str, char... delimiters) {
201 int delimLen = delimiters == null ? -1 : delimiters.length;
202 if (StringUtils.isEmpty(str) || delimLen == 0) {
203 return str;
204 }
205 char[] buffer = str.toCharArray();
206 boolean capitalizeNext = true;
207 for (int i = 0; i < buffer.length; i++) {
208 char ch = buffer[i];
209 if (isDelimiter(ch, delimiters)) {
210 capitalizeNext = true;
211 } else if (capitalizeNext) {
212 buffer[i] = Character.toTitleCase(ch);
213 capitalizeNext = false;
214 }
215 }
216 return new String(buffer);
217 }
218
219 //-----------------------------------------------------------------------
220 /**
221 * <p>Converts all the whitespace separated words in a String into capitalized words,
222 * that is each word is made up of a titlecase character and then a series of
223 * lowercase characters. </p>
224 *
225 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
226 * A <code>null</code> input String returns <code>null</code>.
227 * Capitalization uses the unicode title case, normally equivalent to
228 * upper case.</p>
229 *
230 * <pre>
231 * WordUtils.capitalizeFully(null) = null
232 * WordUtils.capitalizeFully("") = ""
233 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
234 * </pre>
235 *
236 * @param str the String to capitalize, may be null
237 * @return capitalized String, <code>null</code> if null String input
238 */
239 public static String capitalizeFully(String str) {
240 return capitalizeFully(str, null);
241 }
242
243 /**
244 * <p>Converts all the delimiter separated words in a String into capitalized words,
245 * that is each word is made up of a titlecase character and then a series of
246 * lowercase characters. </p>
247 *
248 * <p>The delimiters represent a set of characters understood to separate words.
249 * The first string character and the first non-delimiter character after a
250 * delimiter will be capitalized. </p>
251 *
252 * <p>A <code>null</code> input String returns <code>null</code>.
253 * Capitalization uses the unicode title case, normally equivalent to
254 * upper case.</p>
255 *
256 * <pre>
257 * WordUtils.capitalizeFully(null, *) = null
258 * WordUtils.capitalizeFully("", *) = ""
259 * WordUtils.capitalizeFully(*, null) = *
260 * WordUtils.capitalizeFully(*, new char[0]) = *
261 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
262 * </pre>
263 *
264 * @param str the String to capitalize, may be null
265 * @param delimiters set of characters to determine capitalization, null means whitespace
266 * @return capitalized String, <code>null</code> if null String input
267 * @since 2.1
268 */
269 public static String capitalizeFully(String str, char... delimiters) {
270 int delimLen = (delimiters == null ? -1 : delimiters.length);
271 if (StringUtils.isEmpty(str) || delimLen == 0) {
272 return str;
273 }
274 str = str.toLowerCase();
275 return capitalize(str, delimiters);
276 }
277
278 //-----------------------------------------------------------------------
279 /**
280 * <p>Uncapitalizes all the whitespace separated words in a String.
281 * Only the first letter of each word is changed.</p>
282 *
283 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
284 * A <code>null</code> input String returns <code>null</code>.</p>
285 *
286 * <pre>
287 * WordUtils.uncapitalize(null) = null
288 * WordUtils.uncapitalize("") = ""
289 * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
290 * </pre>
291 *
292 * @param str the String to uncapitalize, may be null
293 * @return uncapitalized String, <code>null</code> if null String input
294 * @see #capitalize(String)
295 */
296 public static String uncapitalize(String str) {
297 return uncapitalize(str, null);
298 }
299
300 /**
301 * <p>Uncapitalizes all the whitespace separated words in a String.
302 * Only the first letter of each word is changed.</p>
303 *
304 * <p>The delimiters represent a set of characters understood to separate words.
305 * The first string character and the first non-delimiter character after a
306 * delimiter will be uncapitalized. </p>
307 *
308 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
309 * A <code>null</code> input String returns <code>null</code>.</p>
310 *
311 * <pre>
312 * WordUtils.uncapitalize(null, *) = null
313 * WordUtils.uncapitalize("", *) = ""
314 * WordUtils.uncapitalize(*, null) = *
315 * WordUtils.uncapitalize(*, new char[0]) = *
316 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
317 * </pre>
318 *
319 * @param str the String to uncapitalize, may be null
320 * @param delimiters set of characters to determine uncapitalization, null means whitespace
321 * @return uncapitalized String, <code>null</code> if null String input
322 * @see #capitalize(String)
323 * @since 2.1
324 */
325 public static String uncapitalize(String str, char... delimiters) {
326 int delimLen = (delimiters == null ? -1 : delimiters.length);
327 if (StringUtils.isEmpty(str) || delimLen == 0) {
328 return str;
329 }
330 char[] buffer = str.toCharArray();
331 boolean uncapitalizeNext = true;
332 for (int i = 0; i < buffer.length; i++) {
333 char ch = buffer[i];
334 if (isDelimiter(ch, delimiters)) {
335 uncapitalizeNext = true;
336 } else if (uncapitalizeNext) {
337 buffer[i] = Character.toLowerCase(ch);
338 uncapitalizeNext = false;
339 }
340 }
341 return new String(buffer);
342 }
343
344 //-----------------------------------------------------------------------
345 /**
346 * <p>Swaps the case of a String using a word based algorithm.</p>
347 *
348 * <ul>
349 * <li>Upper case character converts to Lower case</li>
350 * <li>Title case character converts to Lower case</li>
351 * <li>Lower case character after Whitespace or at start converts to Title case</li>
352 * <li>Other Lower case character converts to Upper case</li>
353 * </ul>
354 *
355 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
356 * A <code>null</code> input String returns <code>null</code>.</p>
357 *
358 * <pre>
359 * StringUtils.swapCase(null) = null
360 * StringUtils.swapCase("") = ""
361 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
362 * </pre>
363 *
364 * @param str the String to swap case, may be null
365 * @return the changed String, <code>null</code> if null String input
366 */
367 public static String swapCase(String str) {
368 if (StringUtils.isEmpty(str)) {
369 return str;
370 }
371 char[] buffer = str.toCharArray();
372
373 boolean whitespace = true;
374
375 for (int i = 0; i < buffer.length; i++) {
376 char ch = buffer[i];
377 if (Character.isUpperCase(ch)) {
378 buffer[i] = Character.toLowerCase(ch);
379 whitespace = false;
380 } else if (Character.isTitleCase(ch)) {
381 buffer[i] = Character.toLowerCase(ch);
382 whitespace = false;
383 } else if (Character.isLowerCase(ch)) {
384 if (whitespace) {
385 buffer[i] = Character.toTitleCase(ch);
386 whitespace = false;
387 } else {
388 buffer[i] = Character.toUpperCase(ch);
389 }
390 } else {
391 whitespace = Character.isWhitespace(ch);
392 }
393 }
394 return new String(buffer);
395 }
396
397 //-----------------------------------------------------------------------
398 /**
399 * <p>Extracts the initial letters from each word in the String.</p>
400 *
401 * <p>The first letter of the string and all first letters after
402 * whitespace are returned as a new string.
403 * Their case is not changed.</p>
404 *
405 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
406 * A <code>null</code> input String returns <code>null</code>.</p>
407 *
408 * <pre>
409 * WordUtils.initials(null) = null
410 * WordUtils.initials("") = ""
411 * WordUtils.initials("Ben John Lee") = "BJL"
412 * WordUtils.initials("Ben J.Lee") = "BJ"
413 * </pre>
414 *
415 * @param str the String to get initials from, may be null
416 * @return String of initial letters, <code>null</code> if null String input
417 * @see #initials(String,char[])
418 * @since 2.2
419 */
420 public static String initials(String str) {
421 return initials(str, null);
422 }
423
424 /**
425 * <p>Extracts the initial letters from each word in the String.</p>
426 *
427 * <p>The first letter of the string and all first letters after the
428 * defined delimiters are returned as a new string.
429 * Their case is not changed.</p>
430 *
431 * <p>If the delimiters array is null, then Whitespace is used.
432 * Whitespace is defined by {@link Character#isWhitespace(char)}.
433 * A <code>null</code> input String returns <code>null</code>.
434 * An empty delimiter array returns an empty String.</p>
435 *
436 * <pre>
437 * WordUtils.initials(null, *) = null
438 * WordUtils.initials("", *) = ""
439 * WordUtils.initials("Ben John Lee", null) = "BJL"
440 * WordUtils.initials("Ben J.Lee", null) = "BJ"
441 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
442 * WordUtils.initials(*, new char[0]) = ""
443 * </pre>
444 *
445 * @param str the String to get initials from, may be null
446 * @param delimiters set of characters to determine words, null means whitespace
447 * @return String of initial letters, <code>null</code> if null String input
448 * @see #initials(String)
449 * @since 2.2
450 */
451 public static String initials(String str, char... delimiters) {
452 if (StringUtils.isEmpty(str)) {
453 return str;
454 }
455 if (delimiters != null && delimiters.length == 0) {
456 return "";
457 }
458 int strLen = str.length();
459 char[] buf = new char[strLen / 2 + 1];
460 int count = 0;
461 boolean lastWasGap = true;
462 for (int i = 0; i < strLen; i++) {
463 char ch = str.charAt(i);
464
465 if (isDelimiter(ch, delimiters)) {
466 lastWasGap = true;
467 } else if (lastWasGap) {
468 buf[count++] = ch;
469 lastWasGap = false;
470 } else {
471 continue; // ignore ch
472 }
473 }
474 return new String(buf, 0, count);
475 }
476
477 //-----------------------------------------------------------------------
478 /**
479 * Is the character a delimiter.
480 *
481 * @param ch the character to check
482 * @param delimiters the delimiters
483 * @return true if it is a delimiter
484 */
485 private static boolean isDelimiter(char ch, char[] delimiters) {
486 if (delimiters == null) {
487 return Character.isWhitespace(ch);
488 }
489 for (char delimiter : delimiters) {
490 if (ch == delimiter) {
491 return true;
492 }
493 }
494 return false;
495 }
496
497 }