001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang;
018    
019    /**
020     * <p>Operations on Strings that contain words.</p>
021     * 
022     * <p>This class tries to handle <code>null</code> input gracefully.
023     * An exception will not be thrown for a <code>null</code> input.
024     * Each method documents its behaviour in more detail.</p>
025     * 
026     * @author Apache Jakarta Velocity
027     * @author Apache Software Foundation
028     * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
029     * @author Gary Gregory
030     * @since 2.0
031     * @version $Id: WordUtils.java 905636 2010-02-02 14:03:32Z niallp $
032     */
033    public class WordUtils {
034    
035        /**
036         * <p><code>WordUtils</code> instances should NOT be constructed in
037         * standard programming. Instead, the class should be used as
038         * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
039         *
040         * <p>This constructor is public to permit tools that require a JavaBean
041         * instance to operate.</p>
042         */
043        public WordUtils() {
044          super();
045        }
046    
047        // Wrapping
048        //--------------------------------------------------------------------------
049    //    /**
050    //     * <p>Wraps a block of text to a specified line length using '\n' as
051    //     * a newline.</p>
052    //     *
053    //     * <p>This method takes a block of text, which might have long lines in it
054    //     * and wraps the long lines based on the supplied lineLength parameter.</p>
055    //     * 
056    //     * <p>If a single word is longer than the line length (eg. a URL), it will
057    //     * not be broken, and will display beyond the expected width.</p>
058    //     * 
059    //     * <p>If there are tabs in inString, you are going to get results that are
060    //     * a bit strange. Tabs are a single character but are displayed as 4 or 8
061    //     * spaces. Remove the tabs.</p>
062    //     *
063    //     * @param str  text which is in need of word-wrapping, may be null
064    //     * @param lineLength  the column to wrap the words at
065    //     * @return the text with all the long lines word-wrapped
066    //     *  <code>null</code> if null string input
067    //     */
068    //    public static String wrapText(String str, int lineLength) {
069    //        return wrap(str, null, lineLength);
070    //    }
071        
072    //    /**
073    //     * <p>Wraps a block of text to a specified line length.</p>
074    //     *
075    //     * <p>This method takes a block of text, which might have long lines in it
076    //     * and wraps the long lines based on the supplied lineLength parameter.</p>
077    //     * 
078    //     * <p>If a single word is longer than the wrapColumn (eg. a URL), it will
079    //     * not be broken, and will display beyond the expected width.</p>
080    //     * 
081    //     * <p>If there are tabs in inString, you are going to get results that are
082    //     * a bit strange. Tabs are a single character but are displayed as 4 or 8
083    //     * spaces. Remove the tabs.</p>
084    //     *
085    //     * @param str  text which is in need of word-wrapping, may be null
086    //     * @param newLineChars  the characters that define a newline, null treated as \n
087    //     * @param lineLength  the column to wrap the words at
088    //     * @return the text with all the long lines word-wrapped
089    //     *  <code>null</code> if null string input
090    //     */
091    //    public static String wrapText(String str, String newLineChars, int lineLength) {
092    //        if (str == null) {
093    //            return null;
094    //        }
095    //        if (newLineChars == null) {
096    //            newLineChars = "\n";
097    //        }
098    //        StringTokenizer lineTokenizer = new StringTokenizer(str, newLineChars, true);
099    //        StringBuffer stringBuffer = new StringBuffer();
100    //
101    //        while (lineTokenizer.hasMoreTokens()) {
102    //            try {
103    //                String nextLine = lineTokenizer.nextToken();
104    //
105    //                if (nextLine.length() > lineLength) {
106    //                    // This line is long enough to be wrapped.
107    //                    nextLine = wrapLine(nextLine, null, lineLength, false);
108    //                }
109    //
110    //                stringBuffer.append(nextLine);
111    //
112    //            } catch (NoSuchElementException nsee) {
113    //                // thrown by nextToken(), but I don't know why it would
114    //                break;
115    //            }
116    //        }
117    //
118    //        return stringBuffer.toString();
119    //    }
120    
121        // Wrapping
122        //-----------------------------------------------------------------------
123        /**
124         * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
125         * 
126         * <p>New lines will be separated by the system property line separator.
127         * Very long words, such as URLs will <i>not</i> be wrapped.</p>
128         * 
129         * <p>Leading spaces on a new line are stripped.
130         * Trailing spaces are not stripped.</p>
131         *
132         * <pre>
133         * WordUtils.wrap(null, *) = null
134         * WordUtils.wrap("", *) = ""
135         * </pre>
136         *
137         * @param str  the String to be word wrapped, may be null
138         * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
139         * @return a line with newlines inserted, <code>null</code> if null input
140         */
141        public static String wrap(String str, int wrapLength) {
142            return wrap(str, wrapLength, null, false);
143        }
144        
145        /**
146         * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
147         * 
148         * <p>Leading spaces on a new line are stripped.
149         * Trailing spaces are not stripped.</p>
150         * 
151         * <pre>
152         * WordUtils.wrap(null, *, *, *) = null
153         * WordUtils.wrap("", *, *, *) = ""
154         * </pre>
155         *
156         * @param str  the String to be word wrapped, may be null
157         * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
158         * @param newLineStr  the string to insert for a new line, 
159         *  <code>null</code> uses the system property line separator
160         * @param wrapLongWords  true if long words (such as URLs) should be wrapped
161         * @return a line with newlines inserted, <code>null</code> if null input
162         */
163        public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
164            if (str == null) {
165                return null;
166            }
167            if (newLineStr == null) {
168                newLineStr = SystemUtils.LINE_SEPARATOR;
169            }
170            if (wrapLength < 1) {
171                wrapLength = 1;
172            }
173            int inputLineLength = str.length();
174            int offset = 0;
175            StringBuffer wrappedLine = new StringBuffer(inputLineLength + 32);
176            
177            while ((inputLineLength - offset) > wrapLength) {
178                if (str.charAt(offset) == ' ') {
179                    offset++;
180                    continue;
181                }
182                int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
183    
184                if (spaceToWrapAt >= offset) {
185                    // normal case
186                    wrappedLine.append(str.substring(offset, spaceToWrapAt));
187                    wrappedLine.append(newLineStr);
188                    offset = spaceToWrapAt + 1;
189                    
190                } else {
191                    // really long word or URL
192                    if (wrapLongWords) {
193                        // wrap really long word one line at a time
194                        wrappedLine.append(str.substring(offset, wrapLength + offset));
195                        wrappedLine.append(newLineStr);
196                        offset += wrapLength;
197                    } else {
198                        // do not wrap really long word, just extend beyond limit
199                        spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
200                        if (spaceToWrapAt >= 0) {
201                            wrappedLine.append(str.substring(offset, spaceToWrapAt));
202                            wrappedLine.append(newLineStr);
203                            offset = spaceToWrapAt + 1;
204                        } else {
205                            wrappedLine.append(str.substring(offset));
206                            offset = inputLineLength;
207                        }
208                    }
209                }
210            }
211    
212            // Whatever is left in line is short enough to just pass through
213            wrappedLine.append(str.substring(offset));
214    
215            return wrappedLine.toString();
216        }
217    
218        // Capitalizing
219        //-----------------------------------------------------------------------
220        /**
221         * <p>Capitalizes all the whitespace separated words in a String.
222         * Only the first letter of each word is changed. To convert the 
223         * rest of each word to lowercase at the same time, 
224         * use {@link #capitalizeFully(String)}.</p>
225         *
226         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
227         * A <code>null</code> input String returns <code>null</code>.
228         * Capitalization uses the unicode title case, normally equivalent to
229         * upper case.</p>
230         *
231         * <pre>
232         * WordUtils.capitalize(null)        = null
233         * WordUtils.capitalize("")          = ""
234         * WordUtils.capitalize("i am FINE") = "I Am FINE"
235         * </pre>
236         * 
237         * @param str  the String to capitalize, may be null
238         * @return capitalized String, <code>null</code> if null String input
239         * @see #uncapitalize(String)
240         * @see #capitalizeFully(String)
241         */
242        public static String capitalize(String str) {
243            return capitalize(str, null);
244        }
245    
246        /**
247         * <p>Capitalizes all the delimiter separated words in a String.
248         * Only the first letter of each word is changed. To convert the 
249         * rest of each word to lowercase at the same time, 
250         * use {@link #capitalizeFully(String, char[])}.</p>
251         *
252         * <p>The delimiters represent a set of characters understood to separate words.
253         * The first string character and the first non-delimiter character after a
254         * delimiter will be capitalized. </p>
255         *
256         * <p>A <code>null</code> input String returns <code>null</code>.
257         * Capitalization uses the unicode title case, normally equivalent to
258         * upper case.</p>
259         *
260         * <pre>
261         * WordUtils.capitalize(null, *)            = null
262         * WordUtils.capitalize("", *)              = ""
263         * WordUtils.capitalize(*, new char[0])     = *
264         * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
265         * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
266         * </pre>
267         * 
268         * @param str  the String to capitalize, may be null
269         * @param delimiters  set of characters to determine capitalization, null means whitespace
270         * @return capitalized String, <code>null</code> if null String input
271         * @see #uncapitalize(String)
272         * @see #capitalizeFully(String)
273         * @since 2.1
274         */
275        public static String capitalize(String str, char[] delimiters) {
276            int delimLen = (delimiters == null ? -1 : delimiters.length);
277            if (str == null || str.length() == 0 || delimLen == 0) {
278                return str;
279            }
280            int strLen = str.length();
281            StringBuffer buffer = new StringBuffer(strLen);
282            boolean capitalizeNext = true;
283            for (int i = 0; i < strLen; i++) {
284                char ch = str.charAt(i);
285    
286                if (isDelimiter(ch, delimiters)) {
287                    buffer.append(ch);
288                    capitalizeNext = true;
289                } else if (capitalizeNext) {
290                    buffer.append(Character.toTitleCase(ch));
291                    capitalizeNext = false;
292                } else {
293                    buffer.append(ch);
294                }
295            }
296            return buffer.toString();
297        }
298    
299        //-----------------------------------------------------------------------
300        /**
301         * <p>Converts all the whitespace separated words in a String into capitalized words, 
302         * that is each word is made up of a titlecase character and then a series of 
303         * lowercase characters.  </p>
304         *
305         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
306         * A <code>null</code> input String returns <code>null</code>.
307         * Capitalization uses the unicode title case, normally equivalent to
308         * upper case.</p>
309         *
310         * <pre>
311         * WordUtils.capitalizeFully(null)        = null
312         * WordUtils.capitalizeFully("")          = ""
313         * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
314         * </pre>
315         * 
316         * @param str  the String to capitalize, may be null
317         * @return capitalized String, <code>null</code> if null String input
318         */
319        public static String capitalizeFully(String str) {
320            return capitalizeFully(str, null);
321        }
322    
323        /**
324         * <p>Converts all the delimiter separated words in a String into capitalized words, 
325         * that is each word is made up of a titlecase character and then a series of 
326         * lowercase characters. </p>
327         *
328         * <p>The delimiters represent a set of characters understood to separate words.
329         * The first string character and the first non-delimiter character after a
330         * delimiter will be capitalized. </p>
331         *
332         * <p>A <code>null</code> input String returns <code>null</code>.
333         * Capitalization uses the unicode title case, normally equivalent to
334         * upper case.</p>
335         *
336         * <pre>
337         * WordUtils.capitalizeFully(null, *)            = null
338         * WordUtils.capitalizeFully("", *)              = ""
339         * WordUtils.capitalizeFully(*, null)            = *
340         * WordUtils.capitalizeFully(*, new char[0])     = *
341         * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
342         * </pre>
343         * 
344         * @param str  the String to capitalize, may be null
345         * @param delimiters  set of characters to determine capitalization, null means whitespace
346         * @return capitalized String, <code>null</code> if null String input
347         * @since 2.1
348         */
349        public static String capitalizeFully(String str, char[] delimiters) {
350            int delimLen = (delimiters == null ? -1 : delimiters.length);
351            if (str == null || str.length() == 0 || delimLen == 0) {
352                return str;
353            }
354            str = str.toLowerCase();
355            return capitalize(str, delimiters);
356        }
357    
358        //-----------------------------------------------------------------------
359        /**
360         * <p>Uncapitalizes all the whitespace separated words in a String.
361         * Only the first letter of each word is changed.</p>
362         *
363         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
364         * A <code>null</code> input String returns <code>null</code>.</p>
365         *
366         * <pre>
367         * WordUtils.uncapitalize(null)        = null
368         * WordUtils.uncapitalize("")          = ""
369         * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
370         * </pre>
371         * 
372         * @param str  the String to uncapitalize, may be null
373         * @return uncapitalized String, <code>null</code> if null String input
374         * @see #capitalize(String)
375         */
376        public static String uncapitalize(String str) {
377            return uncapitalize(str, null);
378        }
379    
380        /**
381         * <p>Uncapitalizes all the whitespace separated words in a String.
382         * Only the first letter of each word is changed.</p>
383         *
384         * <p>The delimiters represent a set of characters understood to separate words.
385         * The first string character and the first non-delimiter character after a
386         * delimiter will be uncapitalized. </p>
387         *
388         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
389         * A <code>null</code> input String returns <code>null</code>.</p>
390         *
391         * <pre>
392         * WordUtils.uncapitalize(null, *)            = null
393         * WordUtils.uncapitalize("", *)              = ""
394         * WordUtils.uncapitalize(*, null)            = *
395         * WordUtils.uncapitalize(*, new char[0])     = *
396         * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
397         * </pre>
398         * 
399         * @param str  the String to uncapitalize, may be null
400         * @param delimiters  set of characters to determine uncapitalization, null means whitespace
401         * @return uncapitalized String, <code>null</code> if null String input
402         * @see #capitalize(String)
403         * @since 2.1
404         */
405        public static String uncapitalize(String str, char[] delimiters) {
406            int delimLen = (delimiters == null ? -1 : delimiters.length);
407            if (str == null || str.length() == 0 || delimLen == 0) {
408                return str;
409            }
410            int strLen = str.length();
411            StringBuffer buffer = new StringBuffer(strLen);
412            boolean uncapitalizeNext = true;
413            for (int i = 0; i < strLen; i++) {
414                char ch = str.charAt(i);
415    
416                if (isDelimiter(ch, delimiters)) {
417                    buffer.append(ch);
418                    uncapitalizeNext = true;
419                } else if (uncapitalizeNext) {
420                    buffer.append(Character.toLowerCase(ch));
421                    uncapitalizeNext = false;
422                } else {
423                    buffer.append(ch);
424                }
425            }
426            return buffer.toString();
427        }
428    
429        //-----------------------------------------------------------------------
430        /**
431         * <p>Swaps the case of a String using a word based algorithm.</p>
432         * 
433         * <ul>
434         *  <li>Upper case character converts to Lower case</li>
435         *  <li>Title case character converts to Lower case</li>
436         *  <li>Lower case character after Whitespace or at start converts to Title case</li>
437         *  <li>Other Lower case character converts to Upper case</li>
438         * </ul>
439         * 
440         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
441         * A <code>null</code> input String returns <code>null</code>.</p>
442         * 
443         * <pre>
444         * StringUtils.swapCase(null)                 = null
445         * StringUtils.swapCase("")                   = ""
446         * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
447         * </pre>
448         * 
449         * @param str  the String to swap case, may be null
450         * @return the changed String, <code>null</code> if null String input
451         */
452        public static String swapCase(String str) {
453            int strLen;
454            if (str == null || (strLen = str.length()) == 0) {
455                return str;
456            }
457            StringBuffer buffer = new StringBuffer(strLen);
458    
459            boolean whitespace = true;
460            char ch = 0;
461            char tmp = 0;
462    
463            for (int i = 0; i < strLen; i++) {
464                ch = str.charAt(i);
465                if (Character.isUpperCase(ch)) {
466                    tmp = Character.toLowerCase(ch);
467                } else if (Character.isTitleCase(ch)) {
468                    tmp = Character.toLowerCase(ch);
469                } else if (Character.isLowerCase(ch)) {
470                    if (whitespace) {
471                        tmp = Character.toTitleCase(ch);
472                    } else {
473                        tmp = Character.toUpperCase(ch);
474                    }
475                } else {
476                    tmp = ch;
477                }
478                buffer.append(tmp);
479                whitespace = Character.isWhitespace(ch);
480            }
481            return buffer.toString();
482        }
483    
484        //-----------------------------------------------------------------------
485        /**
486         * <p>Extracts the initial letters from each word in the String.</p>
487         * 
488         * <p>The first letter of the string and all first letters after
489         * whitespace are returned as a new string.
490         * Their case is not changed.</p>
491         *
492         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
493         * A <code>null</code> input String returns <code>null</code>.</p>
494         *
495         * <pre>
496         * WordUtils.initials(null)             = null
497         * WordUtils.initials("")               = ""
498         * WordUtils.initials("Ben John Lee")   = "BJL"
499         * WordUtils.initials("Ben J.Lee")      = "BJ"
500         * </pre>
501         *
502         * @param str  the String to get initials from, may be null
503         * @return String of initial letters, <code>null</code> if null String input
504         * @see #initials(String,char[])
505         * @since 2.2
506         */
507        public static String initials(String str) {
508            return initials(str, null);
509        }
510    
511        /**
512         * <p>Extracts the initial letters from each word in the String.</p>
513         * 
514         * <p>The first letter of the string and all first letters after the
515         * defined delimiters are returned as a new string.
516         * Their case is not changed.</p>
517         *
518         * <p>If the delimiters array is null, then Whitespace is used.
519         * Whitespace is defined by {@link Character#isWhitespace(char)}.
520         * A <code>null</code> input String returns <code>null</code>.
521         * An empty delimiter array returns an empty String.</p>
522         *
523         * <pre>
524         * WordUtils.initials(null, *)                = null
525         * WordUtils.initials("", *)                  = ""
526         * WordUtils.initials("Ben John Lee", null)   = "BJL"
527         * WordUtils.initials("Ben J.Lee", null)      = "BJ"
528         * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
529         * WordUtils.initials(*, new char[0])         = ""
530         * </pre>
531         * 
532         * @param str  the String to get initials from, may be null
533         * @param delimiters  set of characters to determine words, null means whitespace
534         * @return String of initial letters, <code>null</code> if null String input
535         * @see #initials(String)
536         * @since 2.2
537         */
538        public static String initials(String str, char[] delimiters) {
539            if (str == null || str.length() == 0) {
540                return str;
541            }
542            if (delimiters != null && delimiters.length == 0) {
543                return "";
544            }
545            int strLen = str.length();
546            char[] buf = new char[strLen / 2 + 1];
547            int count = 0;
548            boolean lastWasGap = true;
549            for (int i = 0; i < strLen; i++) {
550                char ch = str.charAt(i);
551    
552                if (isDelimiter(ch, delimiters)) {
553                    lastWasGap = true;
554                } else if (lastWasGap) {
555                    buf[count++] = ch;
556                    lastWasGap = false;
557                } else {
558                    // ignore ch
559                }
560            }
561            return new String(buf, 0, count);
562        }
563    
564        //-----------------------------------------------------------------------
565        /**
566         * Is the character a delimiter.
567         *
568         * @param ch  the character to check
569         * @param delimiters  the delimiters
570         * @return true if it is a delimiter
571         */
572        private static boolean isDelimiter(char ch, char[] delimiters) {
573            if (delimiters == null) {
574                return Character.isWhitespace(ch);
575            }
576            for (int i = 0, isize = delimiters.length; i < isize; i++) {
577                if (ch == delimiters[i]) {
578                    return true;
579                }
580            }
581            return false;
582        }
583    
584        //-----------------------------------------------------------------------
585        /**
586         * Abbreviates a string nicely.
587         * 
588         * This method searches for the first space after the lower limit and abbreviates
589         * the String there. It will also append any String passed as a parameter
590         * to the end of the String. The upper limit can be specified to forcibly
591         * abbreviate a String.
592         * 
593         * @param str         the string to be abbreviated. If null is passed, null is returned.
594         *                    If the empty String is passed, the empty string is returned.
595         * @param lower       the lower limit.
596         * @param upper       the upper limit; specify -1 if no limit is desired.
597         *                    If the upper limit is lower than the lower limit, it will be
598         *                    adjusted to be the same as the lower limit.
599         * @param appendToEnd String to be appended to the end of the abbreviated string.
600         *                    This is appended ONLY if the string was indeed abbreviated.
601         *                    The append does not count towards the lower or upper limits.
602         * @return the abbreviated String.
603         * @since 2.4
604         */
605        public static String abbreviate(String str, int lower, int upper, String appendToEnd) {
606            // initial parameter checks
607            if (str == null) {
608                return null;
609            }
610            if (str.length() == 0) {
611                return StringUtils.EMPTY;
612            }
613    
614            // if the lower value is greater than the length of the string,
615            // set to the length of the string
616            if (lower > str.length()) {
617                lower = str.length();    
618            }
619            // if the upper value is -1 (i.e. no limit) or is greater
620            // than the length of the string, set to the length of the string
621            if (upper == -1 || upper > str.length()) {
622                upper = str.length();
623            }
624            // if upper is less than lower, raise it to lower
625            if (upper < lower) {
626                upper = lower;
627            }
628    
629            StringBuffer result = new StringBuffer();
630            int index = StringUtils.indexOf(str, " ", lower);
631            if (index == -1) {
632                result.append(str.substring(0, upper));
633                // only if abbreviation has occured do we append the appendToEnd value
634                if (upper != str.length()) {
635                    result.append(StringUtils.defaultString(appendToEnd));
636                }
637            } else if (index > upper) {
638                result.append(str.substring(0, upper));
639                result.append(StringUtils.defaultString(appendToEnd));
640            } else {
641                result.append(str.substring(0, index));
642                result.append(StringUtils.defaultString(appendToEnd));
643            }
644            return result.toString();
645        }
646    
647    }