View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.lang;
18  
19  /**
20   * <p>Operations on Strings that contain words.</p>
21   * 
22   * <p>This class tries to handle <code>null</code> input gracefully.
23   * An exception will not be thrown for a <code>null</code> input.
24   * Each method documents its behaviour in more detail.</p>
25   * 
26   * @author Apache Jakarta Velocity
27   * @author Stephen Colebourne
28   * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
29   * @author Gary Gregory
30   * @since 2.0
31   * @version $Id: WordUtils.java 618884 2008-02-06 04:37:17Z bayard $
32   */
33  public class WordUtils {
34  
35      /**
36       * <p><code>WordUtils</code> instances should NOT be constructed in
37       * standard programming. Instead, the class should be used as
38       * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
39       *
40       * <p>This constructor is public to permit tools that require a JavaBean
41       * instance to operate.</p>
42       */
43      public WordUtils() {
44        super();
45      }
46  
47      // Wrapping
48      //--------------------------------------------------------------------------
49  //    /**
50  //     * <p>Wraps a block of text to a specified line length using '\n' as
51  //     * a newline.</p>
52  //     *
53  //     * <p>This method takes a block of text, which might have long lines in it
54  //     * and wraps the long lines based on the supplied lineLength parameter.</p>
55  //     * 
56  //     * <p>If a single word is longer than the line length (eg. a URL), it will
57  //     * not be broken, and will display beyond the expected width.</p>
58  //     * 
59  //     * <p>If there are tabs in inString, you are going to get results that are
60  //     * a bit strange. Tabs are a single character but are displayed as 4 or 8
61  //     * spaces. Remove the tabs.</p>
62  //     *
63  //     * @param str  text which is in need of word-wrapping, may be null
64  //     * @param lineLength  the column to wrap the words at
65  //     * @return the text with all the long lines word-wrapped
66  //     *  <code>null</code> if null string input
67  //     */
68  //    public static String wrapText(String str, int lineLength) {
69  //        return wrap(str, null, lineLength);
70  //    }
71      
72  //    /**
73  //     * <p>Wraps a block of text to a specified line length.</p>
74  //     *
75  //     * <p>This method takes a block of text, which might have long lines in it
76  //     * and wraps the long lines based on the supplied lineLength parameter.</p>
77  //     * 
78  //     * <p>If a single word is longer than the wrapColumn (eg. a URL), it will
79  //     * not be broken, and will display beyond the expected width.</p>
80  //     * 
81  //     * <p>If there are tabs in inString, you are going to get results that are
82  //     * a bit strange. Tabs are a single character but are displayed as 4 or 8
83  //     * spaces. Remove the tabs.</p>
84  //     *
85  //     * @param str  text which is in need of word-wrapping, may be null
86  //     * @param newLineChars  the characters that define a newline, null treated as \n
87  //     * @param lineLength  the column to wrap the words at
88  //     * @return the text with all the long lines word-wrapped
89  //     *  <code>null</code> if null string input
90  //     */
91  //    public static String wrapText(String str, String newLineChars, int lineLength) {
92  //        if (str == null) {
93  //            return null;
94  //        }
95  //        if (newLineChars == null) {
96  //            newLineChars = "\n";
97  //        }
98  //        StringTokenizer lineTokenizer = new StringTokenizer(str, newLineChars, true);
99  //        StringBuffer stringBuffer = new StringBuffer();
100 //
101 //        while (lineTokenizer.hasMoreTokens()) {
102 //            try {
103 //                String nextLine = lineTokenizer.nextToken();
104 //
105 //                if (nextLine.length() > lineLength) {
106 //                    // This line is long enough to be wrapped.
107 //                    nextLine = wrapLine(nextLine, null, lineLength, false);
108 //                }
109 //
110 //                stringBuffer.append(nextLine);
111 //
112 //            } catch (NoSuchElementException nsee) {
113 //                // thrown by nextToken(), but I don't know why it would
114 //                break;
115 //            }
116 //        }
117 //
118 //        return stringBuffer.toString();
119 //    }
120 
121     // Wrapping
122     //-----------------------------------------------------------------------
123     /**
124      * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
125      * 
126      * <p>New lines will be separated by the system property line separator.
127      * Very long words, such as URLs will <i>not</i> be wrapped.</p>
128      * 
129      * <p>Leading spaces on a new line are stripped.
130      * Trailing spaces are not stripped.</p>
131      *
132      * <pre>
133      * WordUtils.wrap(null, *) = null
134      * WordUtils.wrap("", *) = ""
135      * </pre>
136      *
137      * @param str  the String to be word wrapped, may be null
138      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
139      * @return a line with newlines inserted, <code>null</code> if null input
140      */
141     public static String wrap(String str, int wrapLength) {
142         return wrap(str, wrapLength, null, false);
143     }
144     
145     /**
146      * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
147      * 
148      * <p>Leading spaces on a new line are stripped.
149      * Trailing spaces are not stripped.</p>
150      * 
151      * <pre>
152      * WordUtils.wrap(null, *, *, *) = null
153      * WordUtils.wrap("", *, *, *) = ""
154      * </pre>
155      *
156      * @param str  the String to be word wrapped, may be null
157      * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
158      * @param newLineStr  the string to insert for a new line, 
159      *  <code>null</code> uses the system property line separator
160      * @param wrapLongWords  true if long words (such as URLs) should be wrapped
161      * @return a line with newlines inserted, <code>null</code> if null input
162      */
163     public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
164         if (str == null) {
165             return null;
166         }
167         if (newLineStr == null) {
168             newLineStr = SystemUtils.LINE_SEPARATOR;
169         }
170         if (wrapLength < 1) {
171             wrapLength = 1;
172         }
173         int inputLineLength = str.length();
174         int offset = 0;
175         StringBuffer wrappedLine = new StringBuffer(inputLineLength + 32);
176         
177         while ((inputLineLength - offset) > wrapLength) {
178             if (str.charAt(offset) == ' ') {
179                 offset++;
180                 continue;
181             }
182             int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
183 
184             if (spaceToWrapAt >= offset) {
185                 // normal case
186                 wrappedLine.append(str.substring(offset, spaceToWrapAt));
187                 wrappedLine.append(newLineStr);
188                 offset = spaceToWrapAt + 1;
189                 
190             } else {
191                 // really long word or URL
192                 if (wrapLongWords) {
193                     // wrap really long word one line at a time
194                     wrappedLine.append(str.substring(offset, wrapLength + offset));
195                     wrappedLine.append(newLineStr);
196                     offset += wrapLength;
197                 } else {
198                     // do not wrap really long word, just extend beyond limit
199                     spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
200                     if (spaceToWrapAt >= 0) {
201                         wrappedLine.append(str.substring(offset, spaceToWrapAt));
202                         wrappedLine.append(newLineStr);
203                         offset = spaceToWrapAt + 1;
204                     } else {
205                         wrappedLine.append(str.substring(offset));
206                         offset = inputLineLength;
207                     }
208                 }
209             }
210         }
211 
212         // Whatever is left in line is short enough to just pass through
213         wrappedLine.append(str.substring(offset));
214 
215         return wrappedLine.toString();
216     }
217 
218     // Capitalizing
219     //-----------------------------------------------------------------------
220     /**
221      * <p>Capitalizes all the whitespace separated words in a String.
222      * Only the first letter of each word is changed. To convert the 
223      * rest of each word to lowercase at the same time, 
224      * use {@link #capitalizeFully(String)}.</p>
225      *
226      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
227      * A <code>null</code> input String returns <code>null</code>.
228      * Capitalization uses the unicode title case, normally equivalent to
229      * upper case.</p>
230      *
231      * <pre>
232      * WordUtils.capitalize(null)        = null
233      * WordUtils.capitalize("")          = ""
234      * WordUtils.capitalize("i am FINE") = "I Am FINE"
235      * </pre>
236      * 
237      * @param str  the String to capitalize, may be null
238      * @return capitalized String, <code>null</code> if null String input
239      * @see #uncapitalize(String)
240      * @see #capitalizeFully(String)
241      */
242     public static String capitalize(String str) {
243         return capitalize(str, null);
244     }
245 
246     /**
247      * <p>Capitalizes all the delimiter separated words in a String.
248      * Only the first letter of each word is changed. To convert the 
249      * rest of each word to lowercase at the same time, 
250      * use {@link #capitalizeFully(String, char[])}.</p>
251      *
252      * <p>The delimiters represent a set of characters understood to separate words.
253      * The first string character and the first non-delimiter character after a
254      * delimiter will be capitalized. </p>
255      *
256      * <p>A <code>null</code> input String returns <code>null</code>.
257      * Capitalization uses the unicode title case, normally equivalent to
258      * upper case.</p>
259      *
260      * <pre>
261      * WordUtils.capitalize(null, *)            = null
262      * WordUtils.capitalize("", *)              = ""
263      * WordUtils.capitalize(*, new char[0])     = *
264      * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
265      * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
266      * </pre>
267      * 
268      * @param str  the String to capitalize, may be null
269      * @param delimiters  set of characters to determine capitalization, null means whitespace
270      * @return capitalized String, <code>null</code> if null String input
271      * @see #uncapitalize(String)
272      * @see #capitalizeFully(String)
273      * @since 2.1
274      */
275     public static String capitalize(String str, char[] delimiters) {
276         int delimLen = (delimiters == null ? -1 : delimiters.length);
277         if (str == null || str.length() == 0 || delimLen == 0) {
278             return str;
279         }
280         int strLen = str.length();
281         StringBuffer buffer = new StringBuffer(strLen);
282         boolean capitalizeNext = true;
283         for (int i = 0; i < strLen; i++) {
284             char ch = str.charAt(i);
285 
286             if (isDelimiter(ch, delimiters)) {
287                 buffer.append(ch);
288                 capitalizeNext = true;
289             } else if (capitalizeNext) {
290                 buffer.append(Character.toTitleCase(ch));
291                 capitalizeNext = false;
292             } else {
293                 buffer.append(ch);
294             }
295         }
296         return buffer.toString();
297     }
298 
299     //-----------------------------------------------------------------------
300     /**
301      * <p>Converts all the whitespace separated words in a String into capitalized words, 
302      * that is each word is made up of a titlecase character and then a series of 
303      * lowercase characters.  </p>
304      *
305      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
306      * A <code>null</code> input String returns <code>null</code>.
307      * Capitalization uses the unicode title case, normally equivalent to
308      * upper case.</p>
309      *
310      * <pre>
311      * WordUtils.capitalizeFully(null)        = null
312      * WordUtils.capitalizeFully("")          = ""
313      * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
314      * </pre>
315      * 
316      * @param str  the String to capitalize, may be null
317      * @return capitalized String, <code>null</code> if null String input
318      */
319     public static String capitalizeFully(String str) {
320         return capitalizeFully(str, null);
321     }
322 
323     /**
324      * <p>Converts all the delimiter separated words in a String into capitalized words, 
325      * that is each word is made up of a titlecase character and then a series of 
326      * lowercase characters. </p>
327      *
328      * <p>The delimiters represent a set of characters understood to separate words.
329      * The first string character and the first non-delimiter character after a
330      * delimiter will be capitalized. </p>
331      *
332      * <p>A <code>null</code> input String returns <code>null</code>.
333      * Capitalization uses the unicode title case, normally equivalent to
334      * upper case.</p>
335      *
336      * <pre>
337      * WordUtils.capitalizeFully(null, *)            = null
338      * WordUtils.capitalizeFully("", *)              = ""
339      * WordUtils.capitalizeFully(*, null)            = *
340      * WordUtils.capitalizeFully(*, new char[0])     = *
341      * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
342      * </pre>
343      * 
344      * @param str  the String to capitalize, may be null
345      * @param delimiters  set of characters to determine capitalization, null means whitespace
346      * @return capitalized String, <code>null</code> if null String input
347      * @since 2.1
348      */
349     public static String capitalizeFully(String str, char[] delimiters) {
350         int delimLen = (delimiters == null ? -1 : delimiters.length);
351         if (str == null || str.length() == 0 || delimLen == 0) {
352             return str;
353         }
354         str = str.toLowerCase();
355         return capitalize(str, delimiters);
356     }
357 
358     //-----------------------------------------------------------------------
359     /**
360      * <p>Uncapitalizes all the whitespace separated words in a String.
361      * Only the first letter of each word is changed.</p>
362      *
363      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
364      * A <code>null</code> input String returns <code>null</code>.</p>
365      *
366      * <pre>
367      * WordUtils.uncapitalize(null)        = null
368      * WordUtils.uncapitalize("")          = ""
369      * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
370      * </pre>
371      * 
372      * @param str  the String to uncapitalize, may be null
373      * @return uncapitalized String, <code>null</code> if null String input
374      * @see #capitalize(String)
375      */
376     public static String uncapitalize(String str) {
377         return uncapitalize(str, null);
378     }
379 
380     /**
381      * <p>Uncapitalizes all the whitespace separated words in a String.
382      * Only the first letter of each word is changed.</p>
383      *
384      * <p>The delimiters represent a set of characters understood to separate words.
385      * The first string character and the first non-delimiter character after a
386      * delimiter will be uncapitalized. </p>
387      *
388      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
389      * A <code>null</code> input String returns <code>null</code>.</p>
390      *
391      * <pre>
392      * WordUtils.uncapitalize(null, *)            = null
393      * WordUtils.uncapitalize("", *)              = ""
394      * WordUtils.uncapitalize(*, null)            = *
395      * WordUtils.uncapitalize(*, new char[0])     = *
396      * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
397      * </pre>
398      * 
399      * @param str  the String to uncapitalize, may be null
400      * @param delimiters  set of characters to determine uncapitalization, null means whitespace
401      * @return uncapitalized String, <code>null</code> if null String input
402      * @see #capitalize(String)
403      * @since 2.1
404      */
405     public static String uncapitalize(String str, char[] delimiters) {
406         int delimLen = (delimiters == null ? -1 : delimiters.length);
407         if (str == null || str.length() == 0 || delimLen == 0) {
408             return str;
409         }
410         int strLen = str.length();
411         StringBuffer buffer = new StringBuffer(strLen);
412         boolean uncapitalizeNext = true;
413         for (int i = 0; i < strLen; i++) {
414             char ch = str.charAt(i);
415 
416             if (isDelimiter(ch, delimiters)) {
417                 buffer.append(ch);
418                 uncapitalizeNext = true;
419             } else if (uncapitalizeNext) {
420                 buffer.append(Character.toLowerCase(ch));
421                 uncapitalizeNext = false;
422             } else {
423                 buffer.append(ch);
424             }
425         }
426         return buffer.toString();
427     }
428 
429     //-----------------------------------------------------------------------
430     /**
431      * <p>Swaps the case of a String using a word based algorithm.</p>
432      * 
433      * <ul>
434      *  <li>Upper case character converts to Lower case</li>
435      *  <li>Title case character converts to Lower case</li>
436      *  <li>Lower case character after Whitespace or at start converts to Title case</li>
437      *  <li>Other Lower case character converts to Upper case</li>
438      * </ul>
439      * 
440      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
441      * A <code>null</code> input String returns <code>null</code>.</p>
442      * 
443      * <pre>
444      * StringUtils.swapCase(null)                 = null
445      * StringUtils.swapCase("")                   = ""
446      * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
447      * </pre>
448      * 
449      * @param str  the String to swap case, may be null
450      * @return the changed String, <code>null</code> if null String input
451      */
452     public static String swapCase(String str) {
453         int strLen;
454         if (str == null || (strLen = str.length()) == 0) {
455             return str;
456         }
457         StringBuffer buffer = new StringBuffer(strLen);
458 
459         boolean whitespace = true;
460         char ch = 0;
461         char tmp = 0;
462 
463         for (int i = 0; i < strLen; i++) {
464             ch = str.charAt(i);
465             if (Character.isUpperCase(ch)) {
466                 tmp = Character.toLowerCase(ch);
467             } else if (Character.isTitleCase(ch)) {
468                 tmp = Character.toLowerCase(ch);
469             } else if (Character.isLowerCase(ch)) {
470                 if (whitespace) {
471                     tmp = Character.toTitleCase(ch);
472                 } else {
473                     tmp = Character.toUpperCase(ch);
474                 }
475             } else {
476                 tmp = ch;
477             }
478             buffer.append(tmp);
479             whitespace = Character.isWhitespace(ch);
480         }
481         return buffer.toString();
482     }
483 
484     //-----------------------------------------------------------------------
485     /**
486      * <p>Extracts the initial letters from each word in the String.</p>
487      * 
488      * <p>The first letter of the string and all first letters after
489      * whitespace are returned as a new string.
490      * Their case is not changed.</p>
491      *
492      * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
493      * A <code>null</code> input String returns <code>null</code>.</p>
494      *
495      * <pre>
496      * WordUtils.initials(null)             = null
497      * WordUtils.initials("")               = ""
498      * WordUtils.initials("Ben John Lee")   = "BJL"
499      * WordUtils.initials("Ben J.Lee")      = "BJ"
500      * </pre>
501      *
502      * @param str  the String to get initials from, may be null
503      * @return String of initial letters, <code>null</code> if null String input
504      * @see #initials(String,char[])
505      * @since 2.2
506      */
507     public static String initials(String str) {
508         return initials(str, null);
509     }
510 
511     /**
512      * <p>Extracts the initial letters from each word in the String.</p>
513      * 
514      * <p>The first letter of the string and all first letters after the
515      * defined delimiters are returned as a new string.
516      * Their case is not changed.</p>
517      *
518      * <p>If the delimiters array is null, then Whitespace is used.
519      * Whitespace is defined by {@link Character#isWhitespace(char)}.
520      * A <code>null</code> input String returns <code>null</code>.
521      * An empty delimiter array returns an empty String.</p>
522      *
523      * <pre>
524      * WordUtils.initials(null, *)                = null
525      * WordUtils.initials("", *)                  = ""
526      * WordUtils.initials("Ben John Lee", null)   = "BJL"
527      * WordUtils.initials("Ben J.Lee", null)      = "BJ"
528      * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
529      * WordUtils.initials(*, new char[0])         = ""
530      * </pre>
531      * 
532      * @param str  the String to get initials from, may be null
533      * @param delimiters  set of characters to determine words, null means whitespace
534      * @return String of initial letters, <code>null</code> if null String input
535      * @see #initials(String)
536      * @since 2.2
537      */
538     public static String initials(String str, char[] delimiters) {
539         if (str == null || str.length() == 0) {
540             return str;
541         }
542         if (delimiters != null && delimiters.length == 0) {
543             return "";
544         }
545         int strLen = str.length();
546         char[] buf = new char[strLen / 2 + 1];
547         int count = 0;
548         boolean lastWasGap = true;
549         for (int i = 0; i < strLen; i++) {
550             char ch = str.charAt(i);
551 
552             if (isDelimiter(ch, delimiters)) {
553                 lastWasGap = true;
554             } else if (lastWasGap) {
555                 buf[count++] = ch;
556                 lastWasGap = false;
557             } else {
558                 // ignore ch
559             }
560         }
561         return new String(buf, 0, count);
562     }
563 
564     //-----------------------------------------------------------------------
565     /**
566      * Is the character a delimiter.
567      *
568      * @param ch  the character to check
569      * @param delimiters  the delimiters
570      * @return true if it is a delimiter
571      */
572     private static boolean isDelimiter(char ch, char[] delimiters) {
573         if (delimiters == null) {
574             return Character.isWhitespace(ch);
575         }
576         for (int i = 0, isize = delimiters.length; i < isize; i++) {
577             if (ch == delimiters[i]) {
578                 return true;
579             }
580         }
581         return false;
582     }
583 
584     //-----------------------------------------------------------------------
585     /**
586      * Abbreviates a string nicely.
587      * 
588      * This method searches for the first space after the lower limit and abbreviates
589      * the String there. It will also append any String passed as a parameter
590      * to the end of the String. The upper limit can be specified to forcibly
591      * abbreviate a String.
592      * 
593      * @param str         the string to be abbreviated. If null is passed, null is returned.
594      *                    If the empty String is passed, the empty string is returned.
595      * @param lower       the lower limit.
596      * @param upper       the upper limit; specify -1 if no limit is desired.
597      *                    If the upper limit is lower than the lower limit, it will be
598      *                    adjusted to be the same as the lower limit.
599      * @param appendToEnd String to be appended to the end of the abbreviated string.
600      *                    This is appended ONLY if the string was indeed abbreviated.
601      *                    The append does not count towards the lower or upper limits.
602      * @return the abbreviated String.
603      * @since 2.4
604      */
605     public static String abbreviate(String str, int lower, int upper, String appendToEnd) {
606         // initial parameter checks
607         if (str == null) {
608             return null;
609         }
610         if (str.length() == 0) {
611             return StringUtils.EMPTY;
612         }
613 
614         // if the upper value is -1 (i.e. no limit) or is greater
615         // than the length of the string, set to the length of the string
616         if (upper == -1 || upper > str.length()) {
617             upper = str.length();
618         }
619         // if upper is less than lower, raise it to lower
620         if (upper < lower) {
621             upper = lower;
622         }
623 
624         StringBuffer result = new StringBuffer();
625         int index = StringUtils.indexOf(str, " ", lower);
626         if (index == -1) {
627             result.append(str.substring(0, upper));
628             // only if abbreviation has occured do we append the appendToEnd value
629             if (upper != str.length()) {
630                 result.append(StringUtils.defaultString(appendToEnd));
631             }
632         } else if (index > upper) {
633             result.append(str.substring(0, upper));
634             result.append(StringUtils.defaultString(appendToEnd));
635         } else {
636             result.append(str.substring(0, index));
637             result.append(StringUtils.defaultString(appendToEnd));
638         }
639         return result.toString();
640     }
641 
642 }