View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import org.apache.commons.lang3.StringUtils;
20  import org.apache.commons.lang3.SystemUtils;
21  
22  /**
23   * <p>Operations on Strings that contain words.</p>
24   * 
25   * <p>This class tries to handle <code>null</code> input gracefully.
26   * An exception will not be thrown for a <code>null</code> input.
27   * Each method documents its behaviour in more detail.</p>
28   * 
29   * @since 2.0
30   * @version $Id: WordUtils.java 1586649 2014-04-11 13:28:30Z britter $
31   */
32  public class WordUtils {
33  
34      /**
35       * <p><code>WordUtils</code> instances should NOT be constructed in
36       * standard programming. Instead, the class should be used as
37       * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
38       *
39       * <p>This constructor is public to permit tools that require a JavaBean
40       * instance to operate.</p>
41       */
42      public WordUtils() {
43        super();
44      }
45  
46      // Wrapping
47      //--------------------------------------------------------------------------
48      /**
49       * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
50       * 
51       * <p>New lines will be separated by the system property line separator.
52       * Very long words, such as URLs will <i>not</i> be wrapped.</p>
53       * 
54       * <p>Leading spaces on a new line are stripped.
55       * Trailing spaces are not stripped.</p>
56       *
57       * <table border="1" summary="Wrap Results">
58       *  <tr>
59       *   <th>input</th>
60       *   <th>wrapLength</th>
61       *   <th>result</th>
62       *  </tr>
63       *  <tr>
64       *   <td>null</td>
65       *   <td>*</td>
66       *   <td>null</td>
67       *  </tr>
68       *  <tr>
69       *   <td>""</td>
70       *   <td>*</td>
71       *   <td>""</td>
72       *  </tr>
73       *  <tr>
74       *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
75       *   <td>20</td>
76       *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
77       *  </tr>
78       *  <tr>
79       *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
80       *   <td>20</td>
81       *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
82       *  </tr>
83       *  <tr>
84       *   <td>"Click here, http://commons.apache.org, to jump to the commons website"</td>
85       *   <td>20</td>
86       *   <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td>
87       *  </tr>
88       * </table>
89       *
90       * (assuming that '\n' is the systems line separator)
91       *
92       * @param str  the String to be word wrapped, may be null
93       * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
94       * @return a line with newlines inserted, <code>null</code> if null input
95       */
96      public static String wrap(final String str, final int wrapLength) {
97          return wrap(str, wrapLength, null, false);
98      }
99      
100     /**
101      * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
102      * 
103      * <p>Leading spaces on a new line are stripped.
104      * Trailing spaces are not stripped.</p>
105      *
106      * <table border="1" summary="Wrap Results">
107      *  <tr>
108      *   <th>input</th>
109      *   <th>wrapLenght</th>
110      *   <th>newLineString</th>
111      *   <th>wrapLongWords</th>
112      *   <th>result</th>
113      *  </tr>
114      *  <tr>
115      *   <td>null</td>
116      *   <td>*</td>
117      *   <td>*</td>
118      *   <td>true/false</td>
119      *   <td>null</td>
120      *  </tr>
121      *  <tr>
122      *   <td>""</td>
123      *   <td>*</td>
124      *   <td>*</td>
125      *   <td>true/false</td>
126      *   <td>""</td>
127      *  </tr>
128      *  <tr>
129      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
130      *   <td>20</td>
131      *   <td>"\n"</td>
132      *   <td>true/false</td>
133      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
134      *  </tr>
135      *  <tr>
136      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
137      *   <td>20</td>
138      *   <td>"&lt;br /&gt;"</td>
139      *   <td>true/false</td>
140      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
141      *  </tr>
142      *  <tr>
143      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
144      *   <td>20</td>
145      *   <td>null</td>
146      *   <td>true/false</td>
147      *   <td>"Here is one line of" + systemNewLine + "text that is going" + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
148      *  </tr>
149      *  <tr>
150      *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
151      *   <td>20</td>
152      *   <td>"\n"</td>
153      *   <td>false</td>
154      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
155      *  </tr>
156      *  <tr>
157      *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
158      *   <td>20</td>
159      *   <td>"\n"</td>
160      *   <td>true</td>
161      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
162      *  </tr>
163      * </table>
164      *
165      * @param str  the String to be word wrapped, may be null
166      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
167      * @param newLineStr  the string to insert for a new line, 
168      *  <code>null</code> uses the system property line separator
169      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
170      * @return a line with newlines inserted, <code>null</code> if null input
171      */
172     public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) {
173         if (str == null) {
174             return null;
175         }
176         if (newLineStr == null) {
177             newLineStr = SystemUtils.LINE_SEPARATOR;
178         }
179         if (wrapLength < 1) {
180             wrapLength = 1;
181         }
182         final int inputLineLength = str.length();
183         int offset = 0;
184         final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
185         
186         while (offset < inputLineLength) {
187             if (str.charAt(offset) == ' ') {
188                 offset++;
189                 continue;
190             }
191             // only last line without leading spaces is left
192             if(inputLineLength - offset <= wrapLength) {
193                 break;
194             }
195             int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
196 
197             if (spaceToWrapAt >= offset) {
198                 // normal case
199                 wrappedLine.append(str.substring(offset, spaceToWrapAt));
200                 wrappedLine.append(newLineStr);
201                 offset = spaceToWrapAt + 1;
202                 
203             } else {
204                 // really long word or URL
205                 if (wrapLongWords) {
206                     // wrap really long word one line at a time
207                     wrappedLine.append(str.substring(offset, wrapLength + offset));
208                     wrappedLine.append(newLineStr);
209                     offset += wrapLength;
210                 } else {
211                     // do not wrap really long word, just extend beyond limit
212                     spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
213                     if (spaceToWrapAt >= 0) {
214                         wrappedLine.append(str.substring(offset, spaceToWrapAt));
215                         wrappedLine.append(newLineStr);
216                         offset = spaceToWrapAt + 1;
217                     } else {
218                         wrappedLine.append(str.substring(offset));
219                         offset = inputLineLength;
220                     }
221                 }
222             }
223         }
224 
225         // Whatever is left in line is short enough to just pass through
226         wrappedLine.append(str.substring(offset));
227 
228         return wrappedLine.toString();
229     }
230 
231     // Capitalizing
232     //-----------------------------------------------------------------------
233     /**
234      * <p>Capitalizes all the whitespace separated words in a String.
235      * Only the first letter of each word is changed. To convert the 
236      * rest of each word to lowercase at the same time, 
237      * use {@link #capitalizeFully(String)}.</p>
238      *
239      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
240      * A <code>null</code> input String returns <code>null</code>.
241      * Capitalization uses the Unicode title case, normally equivalent to
242      * upper case.</p>
243      *
244      * <pre>
245      * WordUtils.capitalize(null)        = null
246      * WordUtils.capitalize("")          = ""
247      * WordUtils.capitalize("i am FINE") = "I Am FINE"
248      * </pre>
249      * 
250      * @param str  the String to capitalize, may be null
251      * @return capitalized String, <code>null</code> if null String input
252      * @see #uncapitalize(String)
253      * @see #capitalizeFully(String)
254      */
255     public static String capitalize(final String str) {
256         return capitalize(str, null);
257     }
258 
259     /**
260      * <p>Capitalizes all the delimiter separated words in a String.
261      * Only the first letter of each word is changed. To convert the 
262      * rest of each word to lowercase at the same time, 
263      * use {@link #capitalizeFully(String, char[])}.</p>
264      *
265      * <p>The delimiters represent a set of characters understood to separate words.
266      * The first string character and the first non-delimiter character after a
267      * delimiter will be capitalized. </p>
268      *
269      * <p>A <code>null</code> input String returns <code>null</code>.
270      * Capitalization uses the Unicode title case, normally equivalent to
271      * upper case.</p>
272      *
273      * <pre>
274      * WordUtils.capitalize(null, *)            = null
275      * WordUtils.capitalize("", *)              = ""
276      * WordUtils.capitalize(*, new char[0])     = *
277      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
278      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
279      * </pre>
280      * 
281      * @param str  the String to capitalize, may be null
282      * @param delimiters  set of characters to determine capitalization, null means whitespace
283      * @return capitalized String, <code>null</code> if null String input
284      * @see #uncapitalize(String)
285      * @see #capitalizeFully(String)
286      * @since 2.1
287      */
288     public static String capitalize(final String str, final char... delimiters) {
289         final int delimLen = delimiters == null ? -1 : delimiters.length;
290         if (StringUtils.isEmpty(str) || delimLen == 0) {
291             return str;
292         }
293         final char[] buffer = str.toCharArray();
294         boolean capitalizeNext = true;
295         for (int i = 0; i < buffer.length; i++) {
296             final char ch = buffer[i];
297             if (isDelimiter(ch, delimiters)) {
298                 capitalizeNext = true;
299             } else if (capitalizeNext) {
300                 buffer[i] = Character.toTitleCase(ch);
301                 capitalizeNext = false;
302             }
303         }
304         return new String(buffer);
305     }
306 
307     //-----------------------------------------------------------------------
308     /**
309      * <p>Converts all the whitespace separated words in a String into capitalized words, 
310      * that is each word is made up of a titlecase character and then a series of 
311      * lowercase characters.  </p>
312      *
313      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
314      * A <code>null</code> input String returns <code>null</code>.
315      * Capitalization uses the Unicode title case, normally equivalent to
316      * upper case.</p>
317      *
318      * <pre>
319      * WordUtils.capitalizeFully(null)        = null
320      * WordUtils.capitalizeFully("")          = ""
321      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
322      * </pre>
323      * 
324      * @param str  the String to capitalize, may be null
325      * @return capitalized String, <code>null</code> if null String input
326      */
327     public static String capitalizeFully(final String str) {
328         return capitalizeFully(str, null);
329     }
330 
331     /**
332      * <p>Converts all the delimiter separated words in a String into capitalized words, 
333      * that is each word is made up of a titlecase character and then a series of 
334      * lowercase characters. </p>
335      *
336      * <p>The delimiters represent a set of characters understood to separate words.
337      * The first string character and the first non-delimiter character after a
338      * delimiter will be capitalized. </p>
339      *
340      * <p>A <code>null</code> input String returns <code>null</code>.
341      * Capitalization uses the Unicode title case, normally equivalent to
342      * upper case.</p>
343      *
344      * <pre>
345      * WordUtils.capitalizeFully(null, *)            = null
346      * WordUtils.capitalizeFully("", *)              = ""
347      * WordUtils.capitalizeFully(*, null)            = *
348      * WordUtils.capitalizeFully(*, new char[0])     = *
349      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
350      * </pre>
351      * 
352      * @param str  the String to capitalize, may be null
353      * @param delimiters  set of characters to determine capitalization, null means whitespace
354      * @return capitalized String, <code>null</code> if null String input
355      * @since 2.1
356      */
357     public static String capitalizeFully(String str, final char... delimiters) {
358         final int delimLen = delimiters == null ? -1 : delimiters.length;
359         if (StringUtils.isEmpty(str) || delimLen == 0) {
360             return str;
361         }
362         str = str.toLowerCase();
363         return capitalize(str, delimiters);
364     }
365 
366     //-----------------------------------------------------------------------
367     /**
368      * <p>Uncapitalizes all the whitespace separated words in a String.
369      * Only the first letter of each word is changed.</p>
370      *
371      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
372      * A <code>null</code> input String returns <code>null</code>.</p>
373      *
374      * <pre>
375      * WordUtils.uncapitalize(null)        = null
376      * WordUtils.uncapitalize("")          = ""
377      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
378      * </pre>
379      * 
380      * @param str  the String to uncapitalize, may be null
381      * @return uncapitalized String, <code>null</code> if null String input
382      * @see #capitalize(String)
383      */
384     public static String uncapitalize(final String str) {
385         return uncapitalize(str, null);
386     }
387 
388     /**
389      * <p>Uncapitalizes all the whitespace separated words in a String.
390      * Only the first letter of each word is changed.</p>
391      *
392      * <p>The delimiters represent a set of characters understood to separate words.
393      * The first string character and the first non-delimiter character after a
394      * delimiter will be uncapitalized. </p>
395      *
396      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
397      * A <code>null</code> input String returns <code>null</code>.</p>
398      *
399      * <pre>
400      * WordUtils.uncapitalize(null, *)            = null
401      * WordUtils.uncapitalize("", *)              = ""
402      * WordUtils.uncapitalize(*, null)            = *
403      * WordUtils.uncapitalize(*, new char[0])     = *
404      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
405      * </pre>
406      * 
407      * @param str  the String to uncapitalize, may be null
408      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
409      * @return uncapitalized String, <code>null</code> if null String input
410      * @see #capitalize(String)
411      * @since 2.1
412      */
413     public static String uncapitalize(final String str, final char... delimiters) {
414         final int delimLen = delimiters == null ? -1 : delimiters.length;
415         if (StringUtils.isEmpty(str) || delimLen == 0) {
416             return str;
417         }
418         final char[] buffer = str.toCharArray();
419         boolean uncapitalizeNext = true;
420         for (int i = 0; i < buffer.length; i++) {
421             final char ch = buffer[i];
422             if (isDelimiter(ch, delimiters)) {
423                 uncapitalizeNext = true;
424             } else if (uncapitalizeNext) {
425                 buffer[i] = Character.toLowerCase(ch);
426                 uncapitalizeNext = false;
427             }
428         }
429         return new String(buffer);
430     }
431 
432     //-----------------------------------------------------------------------
433     /**
434      * <p>Swaps the case of a String using a word based algorithm.</p>
435      * 
436      * <ul>
437      *  <li>Upper case character converts to Lower case</li>
438      *  <li>Title case character converts to Lower case</li>
439      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
440      *  <li>Other Lower case character converts to Upper case</li>
441      * </ul>
442      * 
443      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
444      * A <code>null</code> input String returns <code>null</code>.</p>
445      * 
446      * <pre>
447      * StringUtils.swapCase(null)                 = null
448      * StringUtils.swapCase("")                   = ""
449      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
450      * </pre>
451      * 
452      * @param str  the String to swap case, may be null
453      * @return the changed String, <code>null</code> if null String input
454      */
455     public static String swapCase(final String str) {
456         if (StringUtils.isEmpty(str)) {
457             return str;
458         }
459         final char[] buffer = str.toCharArray();
460 
461         boolean whitespace = true;
462 
463         for (int i = 0; i < buffer.length; i++) {
464             final char ch = buffer[i];
465             if (Character.isUpperCase(ch)) {
466                 buffer[i] = Character.toLowerCase(ch);
467                 whitespace = false;
468             } else if (Character.isTitleCase(ch)) {
469                 buffer[i] = Character.toLowerCase(ch);
470                 whitespace = false;
471             } else if (Character.isLowerCase(ch)) {
472                 if (whitespace) {
473                     buffer[i] = Character.toTitleCase(ch);
474                     whitespace = false;
475                 } else {
476                     buffer[i] = Character.toUpperCase(ch);
477                 }
478             } else {
479                 whitespace = Character.isWhitespace(ch);
480             }
481         }
482         return new String(buffer);
483     }
484 
485     //-----------------------------------------------------------------------
486     /**
487      * <p>Extracts the initial letters from each word in the String.</p>
488      * 
489      * <p>The first letter of the string and all first letters after
490      * whitespace are returned as a new string.
491      * Their case is not changed.</p>
492      *
493      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
494      * A <code>null</code> input String returns <code>null</code>.</p>
495      *
496      * <pre>
497      * WordUtils.initials(null)             = null
498      * WordUtils.initials("")               = ""
499      * WordUtils.initials("Ben John Lee")   = "BJL"
500      * WordUtils.initials("Ben J.Lee")      = "BJ"
501      * </pre>
502      *
503      * @param str  the String to get initials from, may be null
504      * @return String of initial letters, <code>null</code> if null String input
505      * @see #initials(String,char[])
506      * @since 2.2
507      */
508     public static String initials(final String str) {
509         return initials(str, null);
510     }
511 
512     /**
513      * <p>Extracts the initial letters from each word in the String.</p>
514      * 
515      * <p>The first letter of the string and all first letters after the
516      * defined delimiters are returned as a new string.
517      * Their case is not changed.</p>
518      *
519      * <p>If the delimiters array is null, then Whitespace is used.
520      * Whitespace is defined by {@link Character#isWhitespace(char)}.
521      * A <code>null</code> input String returns <code>null</code>.
522      * An empty delimiter array returns an empty String.</p>
523      *
524      * <pre>
525      * WordUtils.initials(null, *)                = null
526      * WordUtils.initials("", *)                  = ""
527      * WordUtils.initials("Ben John Lee", null)   = "BJL"
528      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
529      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
530      * WordUtils.initials(*, new char[0])         = ""
531      * </pre>
532      * 
533      * @param str  the String to get initials from, may be null
534      * @param delimiters  set of characters to determine words, null means whitespace
535      * @return String of initial letters, <code>null</code> if null String input
536      * @see #initials(String)
537      * @since 2.2
538      */
539     public static String initials(final String str, final char... delimiters) {
540         if (StringUtils.isEmpty(str)) {
541             return str;
542         }
543         if (delimiters != null && delimiters.length == 0) {
544             return "";
545         }
546         final int strLen = str.length();
547         final char[] buf = new char[strLen / 2 + 1];
548         int count = 0;
549         boolean lastWasGap = true;
550         for (int i = 0; i < strLen; i++) {
551             final char ch = str.charAt(i);
552 
553             if (isDelimiter(ch, delimiters)) {
554                 lastWasGap = true;
555             } else if (lastWasGap) {
556                 buf[count++] = ch;
557                 lastWasGap = false;
558             } else {
559                 continue; // ignore ch
560             }
561         }
562         return new String(buf, 0, count);
563     }
564 
565     //-----------------------------------------------------------------------
566     /**
567      * Is the character a delimiter.
568      *
569      * @param ch  the character to check
570      * @param delimiters  the delimiters
571      * @return true if it is a delimiter
572      */
573     private static boolean isDelimiter(final char ch, final char[] delimiters) {
574         if (delimiters == null) {
575             return Character.isWhitespace(ch);
576         }
577         for (final char delimiter : delimiters) {
578             if (ch == delimiter) {
579                 return true;
580             }
581         }
582         return false;
583     }
584 
585 }