View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import org.apache.commons.lang3.StringUtils;
20  import org.apache.commons.lang3.SystemUtils;
21  
22  /**
23   * <p>Operations on Strings that contain words.</p>
24   * 
25   * <p>This class tries to handle <code>null</code> input gracefully.
26   * An exception will not be thrown for a <code>null</code> input.
27   * Each method documents its behaviour in more detail.</p>
28   * 
29   * @since 2.0
30   * @version $Id: WordUtils.java 1436770 2013-01-22 07:09:45Z ggregory $
31   */
32  public class WordUtils {
33  
34      /**
35       * <p><code>WordUtils</code> instances should NOT be constructed in
36       * standard programming. Instead, the class should be used as
37       * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
38       *
39       * <p>This constructor is public to permit tools that require a JavaBean
40       * instance to operate.</p>
41       */
42      public WordUtils() {
43        super();
44      }
45  
46      // Wrapping
47      //--------------------------------------------------------------------------
48      /**
49       * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
50       * 
51       * <p>New lines will be separated by the system property line separator.
52       * Very long words, such as URLs will <i>not</i> be wrapped.</p>
53       * 
54       * <p>Leading spaces on a new line are stripped.
55       * Trailing spaces are not stripped.</p>
56       *
57       * <pre>
58       * WordUtils.wrap(null, *) = null
59       * WordUtils.wrap("", *) = ""
60       * </pre>
61       *
62       * @param str  the String to be word wrapped, may be null
63       * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
64       * @return a line with newlines inserted, <code>null</code> if null input
65       */
66      public static String wrap(final String str, final int wrapLength) {
67          return wrap(str, wrapLength, null, false);
68      }
69      
70      /**
71       * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
72       * 
73       * <p>Leading spaces on a new line are stripped.
74       * Trailing spaces are not stripped.</p>
75       * 
76       * <pre>
77       * WordUtils.wrap(null, *, *, *) = null
78       * WordUtils.wrap("", *, *, *) = ""
79       * </pre>
80       *
81       * @param str  the String to be word wrapped, may be null
82       * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
83       * @param newLineStr  the string to insert for a new line, 
84       *  <code>null</code> uses the system property line separator
85       * @param wrapLongWords  true if long words (such as URLs) should be wrapped
86       * @return a line with newlines inserted, <code>null</code> if null input
87       */
88      public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) {
89          if (str == null) {
90              return null;
91          }
92          if (newLineStr == null) {
93              newLineStr = SystemUtils.LINE_SEPARATOR;
94          }
95          if (wrapLength < 1) {
96              wrapLength = 1;
97          }
98          final int inputLineLength = str.length();
99          int offset = 0;
100         final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
101         
102         while (inputLineLength - offset > wrapLength) {
103             if (str.charAt(offset) == ' ') {
104                 offset++;
105                 continue;
106             }
107             int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
108 
109             if (spaceToWrapAt >= offset) {
110                 // normal case
111                 wrappedLine.append(str.substring(offset, spaceToWrapAt));
112                 wrappedLine.append(newLineStr);
113                 offset = spaceToWrapAt + 1;
114                 
115             } else {
116                 // really long word or URL
117                 if (wrapLongWords) {
118                     // wrap really long word one line at a time
119                     wrappedLine.append(str.substring(offset, wrapLength + offset));
120                     wrappedLine.append(newLineStr);
121                     offset += wrapLength;
122                 } else {
123                     // do not wrap really long word, just extend beyond limit
124                     spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
125                     if (spaceToWrapAt >= 0) {
126                         wrappedLine.append(str.substring(offset, spaceToWrapAt));
127                         wrappedLine.append(newLineStr);
128                         offset = spaceToWrapAt + 1;
129                     } else {
130                         wrappedLine.append(str.substring(offset));
131                         offset = inputLineLength;
132                     }
133                 }
134             }
135         }
136 
137         // Whatever is left in line is short enough to just pass through
138         wrappedLine.append(str.substring(offset));
139 
140         return wrappedLine.toString();
141     }
142 
143     // Capitalizing
144     //-----------------------------------------------------------------------
145     /**
146      * <p>Capitalizes all the whitespace separated words in a String.
147      * Only the first letter of each word is changed. To convert the 
148      * rest of each word to lowercase at the same time, 
149      * use {@link #capitalizeFully(String)}.</p>
150      *
151      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
152      * A <code>null</code> input String returns <code>null</code>.
153      * Capitalization uses the Unicode title case, normally equivalent to
154      * upper case.</p>
155      *
156      * <pre>
157      * WordUtils.capitalize(null)        = null
158      * WordUtils.capitalize("")          = ""
159      * WordUtils.capitalize("i am FINE") = "I Am FINE"
160      * </pre>
161      * 
162      * @param str  the String to capitalize, may be null
163      * @return capitalized String, <code>null</code> if null String input
164      * @see #uncapitalize(String)
165      * @see #capitalizeFully(String)
166      */
167     public static String capitalize(final String str) {
168         return capitalize(str, null);
169     }
170 
171     /**
172      * <p>Capitalizes all the delimiter separated words in a String.
173      * Only the first letter of each word is changed. To convert the 
174      * rest of each word to lowercase at the same time, 
175      * use {@link #capitalizeFully(String, char[])}.</p>
176      *
177      * <p>The delimiters represent a set of characters understood to separate words.
178      * The first string character and the first non-delimiter character after a
179      * delimiter will be capitalized. </p>
180      *
181      * <p>A <code>null</code> input String returns <code>null</code>.
182      * Capitalization uses the Unicode title case, normally equivalent to
183      * upper case.</p>
184      *
185      * <pre>
186      * WordUtils.capitalize(null, *)            = null
187      * WordUtils.capitalize("", *)              = ""
188      * WordUtils.capitalize(*, new char[0])     = *
189      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
190      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
191      * </pre>
192      * 
193      * @param str  the String to capitalize, may be null
194      * @param delimiters  set of characters to determine capitalization, null means whitespace
195      * @return capitalized String, <code>null</code> if null String input
196      * @see #uncapitalize(String)
197      * @see #capitalizeFully(String)
198      * @since 2.1
199      */
200     public static String capitalize(final String str, final char... delimiters) {
201         final int delimLen = delimiters == null ? -1 : delimiters.length;
202         if (StringUtils.isEmpty(str) || delimLen == 0) {
203             return str;
204         }
205         final char[] buffer = str.toCharArray();
206         boolean capitalizeNext = true;
207         for (int i = 0; i < buffer.length; i++) {
208             final char ch = buffer[i];
209             if (isDelimiter(ch, delimiters)) {
210                 capitalizeNext = true;
211             } else if (capitalizeNext) {
212                 buffer[i] = Character.toTitleCase(ch);
213                 capitalizeNext = false;
214             }
215         }
216         return new String(buffer);
217     }
218 
219     //-----------------------------------------------------------------------
220     /**
221      * <p>Converts all the whitespace separated words in a String into capitalized words, 
222      * that is each word is made up of a titlecase character and then a series of 
223      * lowercase characters.  </p>
224      *
225      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
226      * A <code>null</code> input String returns <code>null</code>.
227      * Capitalization uses the Unicode title case, normally equivalent to
228      * upper case.</p>
229      *
230      * <pre>
231      * WordUtils.capitalizeFully(null)        = null
232      * WordUtils.capitalizeFully("")          = ""
233      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
234      * </pre>
235      * 
236      * @param str  the String to capitalize, may be null
237      * @return capitalized String, <code>null</code> if null String input
238      */
239     public static String capitalizeFully(final String str) {
240         return capitalizeFully(str, null);
241     }
242 
243     /**
244      * <p>Converts all the delimiter separated words in a String into capitalized words, 
245      * that is each word is made up of a titlecase character and then a series of 
246      * lowercase characters. </p>
247      *
248      * <p>The delimiters represent a set of characters understood to separate words.
249      * The first string character and the first non-delimiter character after a
250      * delimiter will be capitalized. </p>
251      *
252      * <p>A <code>null</code> input String returns <code>null</code>.
253      * Capitalization uses the Unicode title case, normally equivalent to
254      * upper case.</p>
255      *
256      * <pre>
257      * WordUtils.capitalizeFully(null, *)            = null
258      * WordUtils.capitalizeFully("", *)              = ""
259      * WordUtils.capitalizeFully(*, null)            = *
260      * WordUtils.capitalizeFully(*, new char[0])     = *
261      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
262      * </pre>
263      * 
264      * @param str  the String to capitalize, may be null
265      * @param delimiters  set of characters to determine capitalization, null means whitespace
266      * @return capitalized String, <code>null</code> if null String input
267      * @since 2.1
268      */
269     public static String capitalizeFully(String str, final char... delimiters) {
270         final int delimLen = delimiters == null ? -1 : delimiters.length;
271         if (StringUtils.isEmpty(str) || delimLen == 0) {
272             return str;
273         }
274         str = str.toLowerCase();
275         return capitalize(str, delimiters);
276     }
277 
278     //-----------------------------------------------------------------------
279     /**
280      * <p>Uncapitalizes all the whitespace separated words in a String.
281      * Only the first letter of each word is changed.</p>
282      *
283      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
284      * A <code>null</code> input String returns <code>null</code>.</p>
285      *
286      * <pre>
287      * WordUtils.uncapitalize(null)        = null
288      * WordUtils.uncapitalize("")          = ""
289      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
290      * </pre>
291      * 
292      * @param str  the String to uncapitalize, may be null
293      * @return uncapitalized String, <code>null</code> if null String input
294      * @see #capitalize(String)
295      */
296     public static String uncapitalize(final String str) {
297         return uncapitalize(str, null);
298     }
299 
300     /**
301      * <p>Uncapitalizes all the whitespace separated words in a String.
302      * Only the first letter of each word is changed.</p>
303      *
304      * <p>The delimiters represent a set of characters understood to separate words.
305      * The first string character and the first non-delimiter character after a
306      * delimiter will be uncapitalized. </p>
307      *
308      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
309      * A <code>null</code> input String returns <code>null</code>.</p>
310      *
311      * <pre>
312      * WordUtils.uncapitalize(null, *)            = null
313      * WordUtils.uncapitalize("", *)              = ""
314      * WordUtils.uncapitalize(*, null)            = *
315      * WordUtils.uncapitalize(*, new char[0])     = *
316      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
317      * </pre>
318      * 
319      * @param str  the String to uncapitalize, may be null
320      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
321      * @return uncapitalized String, <code>null</code> if null String input
322      * @see #capitalize(String)
323      * @since 2.1
324      */
325     public static String uncapitalize(final String str, final char... delimiters) {
326         final int delimLen = delimiters == null ? -1 : delimiters.length;
327         if (StringUtils.isEmpty(str) || delimLen == 0) {
328             return str;
329         }
330         final char[] buffer = str.toCharArray();
331         boolean uncapitalizeNext = true;
332         for (int i = 0; i < buffer.length; i++) {
333             final char ch = buffer[i];
334             if (isDelimiter(ch, delimiters)) {
335                 uncapitalizeNext = true;
336             } else if (uncapitalizeNext) {
337                 buffer[i] = Character.toLowerCase(ch);
338                 uncapitalizeNext = false;
339             }
340         }
341         return new String(buffer);
342     }
343 
344     //-----------------------------------------------------------------------
345     /**
346      * <p>Swaps the case of a String using a word based algorithm.</p>
347      * 
348      * <ul>
349      *  <li>Upper case character converts to Lower case</li>
350      *  <li>Title case character converts to Lower case</li>
351      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
352      *  <li>Other Lower case character converts to Upper case</li>
353      * </ul>
354      * 
355      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
356      * A <code>null</code> input String returns <code>null</code>.</p>
357      * 
358      * <pre>
359      * StringUtils.swapCase(null)                 = null
360      * StringUtils.swapCase("")                   = ""
361      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
362      * </pre>
363      * 
364      * @param str  the String to swap case, may be null
365      * @return the changed String, <code>null</code> if null String input
366      */
367     public static String swapCase(final String str) {
368         if (StringUtils.isEmpty(str)) {
369             return str;
370         }
371         final char[] buffer = str.toCharArray();
372 
373         boolean whitespace = true;
374 
375         for (int i = 0; i < buffer.length; i++) {
376             final char ch = buffer[i];
377             if (Character.isUpperCase(ch)) {
378                 buffer[i] = Character.toLowerCase(ch);
379                 whitespace = false;
380             } else if (Character.isTitleCase(ch)) {
381                 buffer[i] = Character.toLowerCase(ch);
382                 whitespace = false;
383             } else if (Character.isLowerCase(ch)) {
384                 if (whitespace) {
385                     buffer[i] = Character.toTitleCase(ch);
386                     whitespace = false;
387                 } else {
388                     buffer[i] = Character.toUpperCase(ch);
389                 }
390             } else {
391                 whitespace = Character.isWhitespace(ch);
392             }
393         }
394         return new String(buffer);
395     }
396 
397     //-----------------------------------------------------------------------
398     /**
399      * <p>Extracts the initial letters from each word in the String.</p>
400      * 
401      * <p>The first letter of the string and all first letters after
402      * whitespace are returned as a new string.
403      * Their case is not changed.</p>
404      *
405      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
406      * A <code>null</code> input String returns <code>null</code>.</p>
407      *
408      * <pre>
409      * WordUtils.initials(null)             = null
410      * WordUtils.initials("")               = ""
411      * WordUtils.initials("Ben John Lee")   = "BJL"
412      * WordUtils.initials("Ben J.Lee")      = "BJ"
413      * </pre>
414      *
415      * @param str  the String to get initials from, may be null
416      * @return String of initial letters, <code>null</code> if null String input
417      * @see #initials(String,char[])
418      * @since 2.2
419      */
420     public static String initials(final String str) {
421         return initials(str, null);
422     }
423 
424     /**
425      * <p>Extracts the initial letters from each word in the String.</p>
426      * 
427      * <p>The first letter of the string and all first letters after the
428      * defined delimiters are returned as a new string.
429      * Their case is not changed.</p>
430      *
431      * <p>If the delimiters array is null, then Whitespace is used.
432      * Whitespace is defined by {@link Character#isWhitespace(char)}.
433      * A <code>null</code> input String returns <code>null</code>.
434      * An empty delimiter array returns an empty String.</p>
435      *
436      * <pre>
437      * WordUtils.initials(null, *)                = null
438      * WordUtils.initials("", *)                  = ""
439      * WordUtils.initials("Ben John Lee", null)   = "BJL"
440      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
441      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
442      * WordUtils.initials(*, new char[0])         = ""
443      * </pre>
444      * 
445      * @param str  the String to get initials from, may be null
446      * @param delimiters  set of characters to determine words, null means whitespace
447      * @return String of initial letters, <code>null</code> if null String input
448      * @see #initials(String)
449      * @since 2.2
450      */
451     public static String initials(final String str, final char... delimiters) {
452         if (StringUtils.isEmpty(str)) {
453             return str;
454         }
455         if (delimiters != null && delimiters.length == 0) {
456             return "";
457         }
458         final int strLen = str.length();
459         final char[] buf = new char[strLen / 2 + 1];
460         int count = 0;
461         boolean lastWasGap = true;
462         for (int i = 0; i < strLen; i++) {
463             final char ch = str.charAt(i);
464 
465             if (isDelimiter(ch, delimiters)) {
466                 lastWasGap = true;
467             } else if (lastWasGap) {
468                 buf[count++] = ch;
469                 lastWasGap = false;
470             } else {
471                 continue; // ignore ch
472             }
473         }
474         return new String(buf, 0, count);
475     }
476 
477     //-----------------------------------------------------------------------
478     /**
479      * Is the character a delimiter.
480      *
481      * @param ch  the character to check
482      * @param delimiters  the delimiters
483      * @return true if it is a delimiter
484      */
485     private static boolean isDelimiter(final char ch, final char[] delimiters) {
486         if (delimiters == null) {
487             return Character.isWhitespace(ch);
488         }
489         for (final char delimiter : delimiters) {
490             if (ch == delimiter) {
491                 return true;
492             }
493         }
494         return false;
495     }
496 
497 }