View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang3.text;
18  
19  import org.apache.commons.lang3.ArrayUtils;
20  import org.apache.commons.lang3.StringUtils;
21  import org.apache.commons.lang3.SystemUtils;
22  
23  import java.util.regex.Pattern;
24  
25  /**
26   * <p>Operations on Strings that contain words.</p>
27   * 
28   * <p>This class tries to handle <code>null</code> input gracefully.
29   * An exception will not be thrown for a <code>null</code> input.
30   * Each method documents its behaviour in more detail.</p>
31   * 
32   * @since 2.0
33   */
34  public class WordUtils {
35  
36      /**
37       * <p><code>WordUtils</code> instances should NOT be constructed in
38       * standard programming. Instead, the class should be used as
39       * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
40       *
41       * <p>This constructor is public to permit tools that require a JavaBean
42       * instance to operate.</p>
43       */
44      public WordUtils() {
45        super();
46      }
47  
48      // Wrapping
49      //--------------------------------------------------------------------------
50      /**
51       * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
52       * 
53       * <p>New lines will be separated by the system property line separator.
54       * Very long words, such as URLs will <i>not</i> be wrapped.</p>
55       * 
56       * <p>Leading spaces on a new line are stripped.
57       * Trailing spaces are not stripped.</p>
58       *
59       * <table border="1" summary="Wrap Results">
60       *  <tr>
61       *   <th>input</th>
62       *   <th>wrapLength</th>
63       *   <th>result</th>
64       *  </tr>
65       *  <tr>
66       *   <td>null</td>
67       *   <td>*</td>
68       *   <td>null</td>
69       *  </tr>
70       *  <tr>
71       *   <td>""</td>
72       *   <td>*</td>
73       *   <td>""</td>
74       *  </tr>
75       *  <tr>
76       *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
77       *   <td>20</td>
78       *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
79       *  </tr>
80       *  <tr>
81       *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
82       *   <td>20</td>
83       *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
84       *  </tr>
85       *  <tr>
86       *   <td>"Click here, http://commons.apache.org, to jump to the commons website"</td>
87       *   <td>20</td>
88       *   <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td>
89       *  </tr>
90       * </table>
91       *
92       * (assuming that '\n' is the systems line separator)
93       *
94       * @param str  the String to be word wrapped, may be null
95       * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
96       * @return a line with newlines inserted, <code>null</code> if null input
97       */
98      public static String wrap(final String str, final int wrapLength) {
99          return wrap(str, wrapLength, null, false);
100     }
101     
102     /**
103      * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
104      * 
105      * <p>Leading spaces on a new line are stripped.
106      * Trailing spaces are not stripped.</p>
107      *
108      * <table border="1" summary="Wrap Results">
109      *  <tr>
110      *   <th>input</th>
111      *   <th>wrapLenght</th>
112      *   <th>newLineString</th>
113      *   <th>wrapLongWords</th>
114      *   <th>result</th>
115      *  </tr>
116      *  <tr>
117      *   <td>null</td>
118      *   <td>*</td>
119      *   <td>*</td>
120      *   <td>true/false</td>
121      *   <td>null</td>
122      *  </tr>
123      *  <tr>
124      *   <td>""</td>
125      *   <td>*</td>
126      *   <td>*</td>
127      *   <td>true/false</td>
128      *   <td>""</td>
129      *  </tr>
130      *  <tr>
131      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
132      *   <td>20</td>
133      *   <td>"\n"</td>
134      *   <td>true/false</td>
135      *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
136      *  </tr>
137      *  <tr>
138      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
139      *   <td>20</td>
140      *   <td>"&lt;br /&gt;"</td>
141      *   <td>true/false</td>
142      *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
143      *  </tr>
144      *  <tr>
145      *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
146      *   <td>20</td>
147      *   <td>null</td>
148      *   <td>true/false</td>
149      *   <td>"Here is one line of" + systemNewLine + "text that is going" + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
150      *  </tr>
151      *  <tr>
152      *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
153      *   <td>20</td>
154      *   <td>"\n"</td>
155      *   <td>false</td>
156      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
157      *  </tr>
158      *  <tr>
159      *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
160      *   <td>20</td>
161      *   <td>"\n"</td>
162      *   <td>true</td>
163      *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
164      *  </tr>
165      * </table>
166      *
167      * @param str  the String to be word wrapped, may be null
168      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
169      * @param newLineStr  the string to insert for a new line, 
170      *  <code>null</code> uses the system property line separator
171      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
172      * @return a line with newlines inserted, <code>null</code> if null input
173      */
174     public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) {
175         if (str == null) {
176             return null;
177         }
178         if (newLineStr == null) {
179             newLineStr = SystemUtils.LINE_SEPARATOR;
180         }
181         if (wrapLength < 1) {
182             wrapLength = 1;
183         }
184         final int inputLineLength = str.length();
185         int offset = 0;
186         final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
187         
188         while (offset < inputLineLength) {
189             if (str.charAt(offset) == ' ') {
190                 offset++;
191                 continue;
192             }
193             // only last line without leading spaces is left
194             if(inputLineLength - offset <= wrapLength) {
195                 break;
196             }
197             int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
198 
199             if (spaceToWrapAt >= offset) {
200                 // normal case
201                 wrappedLine.append(str.substring(offset, spaceToWrapAt));
202                 wrappedLine.append(newLineStr);
203                 offset = spaceToWrapAt + 1;
204                 
205             } else {
206                 // really long word or URL
207                 if (wrapLongWords) {
208                     // wrap really long word one line at a time
209                     wrappedLine.append(str.substring(offset, wrapLength + offset));
210                     wrappedLine.append(newLineStr);
211                     offset += wrapLength;
212                 } else {
213                     // do not wrap really long word, just extend beyond limit
214                     spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
215                     if (spaceToWrapAt >= 0) {
216                         wrappedLine.append(str.substring(offset, spaceToWrapAt));
217                         wrappedLine.append(newLineStr);
218                         offset = spaceToWrapAt + 1;
219                     } else {
220                         wrappedLine.append(str.substring(offset));
221                         offset = inputLineLength;
222                     }
223                 }
224             }
225         }
226 
227         // Whatever is left in line is short enough to just pass through
228         wrappedLine.append(str.substring(offset));
229 
230         return wrappedLine.toString();
231     }
232 
233     // Capitalizing
234     //-----------------------------------------------------------------------
235     /**
236      * <p>Capitalizes all the whitespace separated words in a String.
237      * Only the first letter of each word is changed. To convert the 
238      * rest of each word to lowercase at the same time, 
239      * use {@link #capitalizeFully(String)}.</p>
240      *
241      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
242      * A <code>null</code> input String returns <code>null</code>.
243      * Capitalization uses the Unicode title case, normally equivalent to
244      * upper case.</p>
245      *
246      * <pre>
247      * WordUtils.capitalize(null)        = null
248      * WordUtils.capitalize("")          = ""
249      * WordUtils.capitalize("i am FINE") = "I Am FINE"
250      * </pre>
251      * 
252      * @param str  the String to capitalize, may be null
253      * @return capitalized String, <code>null</code> if null String input
254      * @see #uncapitalize(String)
255      * @see #capitalizeFully(String)
256      */
257     public static String capitalize(final String str) {
258         return capitalize(str, null);
259     }
260 
261     /**
262      * <p>Capitalizes all the delimiter separated words in a String.
263      * Only the first letter of each word is changed. To convert the 
264      * rest of each word to lowercase at the same time, 
265      * use {@link #capitalizeFully(String, char[])}.</p>
266      *
267      * <p>The delimiters represent a set of characters understood to separate words.
268      * The first string character and the first non-delimiter character after a
269      * delimiter will be capitalized. </p>
270      *
271      * <p>A <code>null</code> input String returns <code>null</code>.
272      * Capitalization uses the Unicode title case, normally equivalent to
273      * upper case.</p>
274      *
275      * <pre>
276      * WordUtils.capitalize(null, *)            = null
277      * WordUtils.capitalize("", *)              = ""
278      * WordUtils.capitalize(*, new char[0])     = *
279      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
280      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
281      * </pre>
282      * 
283      * @param str  the String to capitalize, may be null
284      * @param delimiters  set of characters to determine capitalization, null means whitespace
285      * @return capitalized String, <code>null</code> if null String input
286      * @see #uncapitalize(String)
287      * @see #capitalizeFully(String)
288      * @since 2.1
289      */
290     public static String capitalize(final String str, final char... delimiters) {
291         final int delimLen = delimiters == null ? -1 : delimiters.length;
292         if (StringUtils.isEmpty(str) || delimLen == 0) {
293             return str;
294         }
295         final char[] buffer = str.toCharArray();
296         boolean capitalizeNext = true;
297         for (int i = 0; i < buffer.length; i++) {
298             final char ch = buffer[i];
299             if (isDelimiter(ch, delimiters)) {
300                 capitalizeNext = true;
301             } else if (capitalizeNext) {
302                 buffer[i] = Character.toTitleCase(ch);
303                 capitalizeNext = false;
304             }
305         }
306         return new String(buffer);
307     }
308 
309     //-----------------------------------------------------------------------
310     /**
311      * <p>Converts all the whitespace separated words in a String into capitalized words, 
312      * that is each word is made up of a titlecase character and then a series of 
313      * lowercase characters.  </p>
314      *
315      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
316      * A <code>null</code> input String returns <code>null</code>.
317      * Capitalization uses the Unicode title case, normally equivalent to
318      * upper case.</p>
319      *
320      * <pre>
321      * WordUtils.capitalizeFully(null)        = null
322      * WordUtils.capitalizeFully("")          = ""
323      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
324      * </pre>
325      * 
326      * @param str  the String to capitalize, may be null
327      * @return capitalized String, <code>null</code> if null String input
328      */
329     public static String capitalizeFully(final String str) {
330         return capitalizeFully(str, null);
331     }
332 
333     /**
334      * <p>Converts all the delimiter separated words in a String into capitalized words, 
335      * that is each word is made up of a titlecase character and then a series of 
336      * lowercase characters. </p>
337      *
338      * <p>The delimiters represent a set of characters understood to separate words.
339      * The first string character and the first non-delimiter character after a
340      * delimiter will be capitalized. </p>
341      *
342      * <p>A <code>null</code> input String returns <code>null</code>.
343      * Capitalization uses the Unicode title case, normally equivalent to
344      * upper case.</p>
345      *
346      * <pre>
347      * WordUtils.capitalizeFully(null, *)            = null
348      * WordUtils.capitalizeFully("", *)              = ""
349      * WordUtils.capitalizeFully(*, null)            = *
350      * WordUtils.capitalizeFully(*, new char[0])     = *
351      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
352      * </pre>
353      * 
354      * @param str  the String to capitalize, may be null
355      * @param delimiters  set of characters to determine capitalization, null means whitespace
356      * @return capitalized String, <code>null</code> if null String input
357      * @since 2.1
358      */
359     public static String capitalizeFully(String str, final char... delimiters) {
360         final int delimLen = delimiters == null ? -1 : delimiters.length;
361         if (StringUtils.isEmpty(str) || delimLen == 0) {
362             return str;
363         }
364         str = str.toLowerCase();
365         return capitalize(str, delimiters);
366     }
367 
368     //-----------------------------------------------------------------------
369     /**
370      * <p>Uncapitalizes all the whitespace separated words in a String.
371      * Only the first letter of each word is changed.</p>
372      *
373      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
374      * A <code>null</code> input String returns <code>null</code>.</p>
375      *
376      * <pre>
377      * WordUtils.uncapitalize(null)        = null
378      * WordUtils.uncapitalize("")          = ""
379      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
380      * </pre>
381      * 
382      * @param str  the String to uncapitalize, may be null
383      * @return uncapitalized String, <code>null</code> if null String input
384      * @see #capitalize(String)
385      */
386     public static String uncapitalize(final String str) {
387         return uncapitalize(str, null);
388     }
389 
390     /**
391      * <p>Uncapitalizes all the whitespace separated words in a String.
392      * Only the first letter of each word is changed.</p>
393      *
394      * <p>The delimiters represent a set of characters understood to separate words.
395      * The first string character and the first non-delimiter character after a
396      * delimiter will be uncapitalized. </p>
397      *
398      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
399      * A <code>null</code> input String returns <code>null</code>.</p>
400      *
401      * <pre>
402      * WordUtils.uncapitalize(null, *)            = null
403      * WordUtils.uncapitalize("", *)              = ""
404      * WordUtils.uncapitalize(*, null)            = *
405      * WordUtils.uncapitalize(*, new char[0])     = *
406      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
407      * </pre>
408      * 
409      * @param str  the String to uncapitalize, may be null
410      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
411      * @return uncapitalized String, <code>null</code> if null String input
412      * @see #capitalize(String)
413      * @since 2.1
414      */
415     public static String uncapitalize(final String str, final char... delimiters) {
416         final int delimLen = delimiters == null ? -1 : delimiters.length;
417         if (StringUtils.isEmpty(str) || delimLen == 0) {
418             return str;
419         }
420         final char[] buffer = str.toCharArray();
421         boolean uncapitalizeNext = true;
422         for (int i = 0; i < buffer.length; i++) {
423             final char ch = buffer[i];
424             if (isDelimiter(ch, delimiters)) {
425                 uncapitalizeNext = true;
426             } else if (uncapitalizeNext) {
427                 buffer[i] = Character.toLowerCase(ch);
428                 uncapitalizeNext = false;
429             }
430         }
431         return new String(buffer);
432     }
433 
434     //-----------------------------------------------------------------------
435     /**
436      * <p>Swaps the case of a String using a word based algorithm.</p>
437      * 
438      * <ul>
439      *  <li>Upper case character converts to Lower case</li>
440      *  <li>Title case character converts to Lower case</li>
441      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
442      *  <li>Other Lower case character converts to Upper case</li>
443      * </ul>
444      * 
445      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
446      * A <code>null</code> input String returns <code>null</code>.</p>
447      * 
448      * <pre>
449      * StringUtils.swapCase(null)                 = null
450      * StringUtils.swapCase("")                   = ""
451      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
452      * </pre>
453      * 
454      * @param str  the String to swap case, may be null
455      * @return the changed String, <code>null</code> if null String input
456      */
457     public static String swapCase(final String str) {
458         if (StringUtils.isEmpty(str)) {
459             return str;
460         }
461         final char[] buffer = str.toCharArray();
462 
463         boolean whitespace = true;
464 
465         for (int i = 0; i < buffer.length; i++) {
466             final char ch = buffer[i];
467             if (Character.isUpperCase(ch)) {
468                 buffer[i] = Character.toLowerCase(ch);
469                 whitespace = false;
470             } else if (Character.isTitleCase(ch)) {
471                 buffer[i] = Character.toLowerCase(ch);
472                 whitespace = false;
473             } else if (Character.isLowerCase(ch)) {
474                 if (whitespace) {
475                     buffer[i] = Character.toTitleCase(ch);
476                     whitespace = false;
477                 } else {
478                     buffer[i] = Character.toUpperCase(ch);
479                 }
480             } else {
481                 whitespace = Character.isWhitespace(ch);
482             }
483         }
484         return new String(buffer);
485     }
486 
487     //-----------------------------------------------------------------------
488     /**
489      * <p>Extracts the initial letters from each word in the String.</p>
490      * 
491      * <p>The first letter of the string and all first letters after
492      * whitespace are returned as a new string.
493      * Their case is not changed.</p>
494      *
495      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
496      * A <code>null</code> input String returns <code>null</code>.</p>
497      *
498      * <pre>
499      * WordUtils.initials(null)             = null
500      * WordUtils.initials("")               = ""
501      * WordUtils.initials("Ben John Lee")   = "BJL"
502      * WordUtils.initials("Ben J.Lee")      = "BJ"
503      * </pre>
504      *
505      * @param str  the String to get initials from, may be null
506      * @return String of initial letters, <code>null</code> if null String input
507      * @see #initials(String,char[])
508      * @since 2.2
509      */
510     public static String initials(final String str) {
511         return initials(str, null);
512     }
513 
514     /**
515      * <p>Extracts the initial letters from each word in the String.</p>
516      * 
517      * <p>The first letter of the string and all first letters after the
518      * defined delimiters are returned as a new string.
519      * Their case is not changed.</p>
520      *
521      * <p>If the delimiters array is null, then Whitespace is used.
522      * Whitespace is defined by {@link Character#isWhitespace(char)}.
523      * A <code>null</code> input String returns <code>null</code>.
524      * An empty delimiter array returns an empty String.</p>
525      *
526      * <pre>
527      * WordUtils.initials(null, *)                = null
528      * WordUtils.initials("", *)                  = ""
529      * WordUtils.initials("Ben John Lee", null)   = "BJL"
530      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
531      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
532      * WordUtils.initials(*, new char[0])         = ""
533      * </pre>
534      * 
535      * @param str  the String to get initials from, may be null
536      * @param delimiters  set of characters to determine words, null means whitespace
537      * @return String of initial letters, <code>null</code> if null String input
538      * @see #initials(String)
539      * @since 2.2
540      */
541     public static String initials(final String str, final char... delimiters) {
542         if (StringUtils.isEmpty(str)) {
543             return str;
544         }
545         if (delimiters != null && delimiters.length == 0) {
546             return "";
547         }
548         final int strLen = str.length();
549         final char[] buf = new char[strLen / 2 + 1];
550         int count = 0;
551         boolean lastWasGap = true;
552         for (int i = 0; i < strLen; i++) {
553             final char ch = str.charAt(i);
554 
555             if (isDelimiter(ch, delimiters)) {
556                 lastWasGap = true;
557             } else if (lastWasGap) {
558                 buf[count++] = ch;
559                 lastWasGap = false;
560             } else {
561                 continue; // ignore ch
562             }
563         }
564         return new String(buf, 0, count);
565     }
566 
567     //-----------------------------------------------------------------------
568     /**
569      * <p>Checks if the String contains all words in the given array.</p>
570      *
571      * <p>
572      * A {@code null} String will return {@code false}. A {@code null, zero
573      * length search array or if one element of array is null will return {@code false}.
574      * </p>
575      *
576      * <pre>
577      * WordUtils.containsAllWords(null, *)            = false
578      * WordUtils.containsAllWords("", *)              = false
579      * WordUtils.containsAllWords(*, null)            = false
580      * WordUtils.containsAllWords(*, [])              = false
581      * WordUtils.containsAllWords("abcd", "ab", "cd") = false
582      * WordUtils.containsAllWords("abc def", "def", "abc") = true
583      * </pre>
584      *
585      *
586      * @param str The str to check, may be null
587      * @param words The array of String words to search for, may be null
588      * @return {@code true} if all search words are found, {@code false} otherwise
589      */
590     public static boolean containsAllWords(CharSequence word, CharSequence... words) {
591         if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
592             return false;
593         }
594         for (CharSequence w : words) {
595             if (StringUtils.isBlank(w)) {
596                 return false;
597             }
598             Pattern p = Pattern.compile(".*\\b" + w + "\\b.*");
599             if (!p.matcher(word).matches()) {
600                 return false;
601             }
602         }
603         return true;
604     }
605 
606     //-----------------------------------------------------------------------
607     /**
608      * Is the character a delimiter.
609      *
610      * @param ch  the character to check
611      * @param delimiters  the delimiters
612      * @return true if it is a delimiter
613      */
614     private static boolean isDelimiter(final char ch, final char[] delimiters) {
615         if (delimiters == null) {
616             return Character.isWhitespace(ch);
617         }
618         for (final char delimiter : delimiters) {
619             if (ch == delimiter) {
620                 return true;
621             }
622         }
623         return false;
624     }
625 
626 }