001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3.text;
018    
019    import org.apache.commons.lang3.StringUtils;
020    import org.apache.commons.lang3.SystemUtils;
021    
022    /**
023     * <p>Operations on Strings that contain words.</p>
024     * 
025     * <p>This class tries to handle <code>null</code> input gracefully.
026     * An exception will not be thrown for a <code>null</code> input.
027     * Each method documents its behaviour in more detail.</p>
028     * 
029     * @author Apache Software Foundation
030     * @author Apache Jakarta Velocity
031     * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
032     * @author Gary Gregory
033     * @since 2.0
034     * @version $Id: WordUtils.java 925967 2010-03-22 06:16:49Z bayard $
035     */
036    public class WordUtils {
037    
038        /**
039         * <p><code>WordUtils</code> instances should NOT be constructed in
040         * standard programming. Instead, the class should be used as
041         * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
042         *
043         * <p>This constructor is public to permit tools that require a JavaBean
044         * instance to operate.</p>
045         */
046        public WordUtils() {
047          super();
048        }
049    
050        // Wrapping
051        //--------------------------------------------------------------------------
052        /**
053         * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
054         * 
055         * <p>New lines will be separated by the system property line separator.
056         * Very long words, such as URLs will <i>not</i> be wrapped.</p>
057         * 
058         * <p>Leading spaces on a new line are stripped.
059         * Trailing spaces are not stripped.</p>
060         *
061         * <pre>
062         * WordUtils.wrap(null, *) = null
063         * WordUtils.wrap("", *) = ""
064         * </pre>
065         *
066         * @param str  the String to be word wrapped, may be null
067         * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
068         * @return a line with newlines inserted, <code>null</code> if null input
069         */
070        public static String wrap(String str, int wrapLength) {
071            return wrap(str, wrapLength, null, false);
072        }
073        
074        /**
075         * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
076         * 
077         * <p>Leading spaces on a new line are stripped.
078         * Trailing spaces are not stripped.</p>
079         * 
080         * <pre>
081         * WordUtils.wrap(null, *, *, *) = null
082         * WordUtils.wrap("", *, *, *) = ""
083         * </pre>
084         *
085         * @param str  the String to be word wrapped, may be null
086         * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
087         * @param newLineStr  the string to insert for a new line, 
088         *  <code>null</code> uses the system property line separator
089         * @param wrapLongWords  true if long words (such as URLs) should be wrapped
090         * @return a line with newlines inserted, <code>null</code> if null input
091         */
092        public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
093            if (str == null) {
094                return null;
095            }
096            if (newLineStr == null) {
097                newLineStr = SystemUtils.LINE_SEPARATOR;
098            }
099            if (wrapLength < 1) {
100                wrapLength = 1;
101            }
102            int inputLineLength = str.length();
103            int offset = 0;
104            StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
105            
106            while ((inputLineLength - offset) > wrapLength) {
107                if (str.charAt(offset) == ' ') {
108                    offset++;
109                    continue;
110                }
111                int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
112    
113                if (spaceToWrapAt >= offset) {
114                    // normal case
115                    wrappedLine.append(str.substring(offset, spaceToWrapAt));
116                    wrappedLine.append(newLineStr);
117                    offset = spaceToWrapAt + 1;
118                    
119                } else {
120                    // really long word or URL
121                    if (wrapLongWords) {
122                        // wrap really long word one line at a time
123                        wrappedLine.append(str.substring(offset, wrapLength + offset));
124                        wrappedLine.append(newLineStr);
125                        offset += wrapLength;
126                    } else {
127                        // do not wrap really long word, just extend beyond limit
128                        spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
129                        if (spaceToWrapAt >= 0) {
130                            wrappedLine.append(str.substring(offset, spaceToWrapAt));
131                            wrappedLine.append(newLineStr);
132                            offset = spaceToWrapAt + 1;
133                        } else {
134                            wrappedLine.append(str.substring(offset));
135                            offset = inputLineLength;
136                        }
137                    }
138                }
139            }
140    
141            // Whatever is left in line is short enough to just pass through
142            wrappedLine.append(str.substring(offset));
143    
144            return wrappedLine.toString();
145        }
146    
147        // Capitalizing
148        //-----------------------------------------------------------------------
149        /**
150         * <p>Capitalizes all the whitespace separated words in a String.
151         * Only the first letter of each word is changed. To convert the 
152         * rest of each word to lowercase at the same time, 
153         * use {@link #capitalizeFully(String)}.</p>
154         *
155         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
156         * A <code>null</code> input String returns <code>null</code>.
157         * Capitalization uses the unicode title case, normally equivalent to
158         * upper case.</p>
159         *
160         * <pre>
161         * WordUtils.capitalize(null)        = null
162         * WordUtils.capitalize("")          = ""
163         * WordUtils.capitalize("i am FINE") = "I Am FINE"
164         * </pre>
165         * 
166         * @param str  the String to capitalize, may be null
167         * @return capitalized String, <code>null</code> if null String input
168         * @see #uncapitalize(String)
169         * @see #capitalizeFully(String)
170         */
171        public static String capitalize(String str) {
172            return capitalize(str, null);
173        }
174    
175        /**
176         * <p>Capitalizes all the delimiter separated words in a String.
177         * Only the first letter of each word is changed. To convert the 
178         * rest of each word to lowercase at the same time, 
179         * use {@link #capitalizeFully(String, char[])}.</p>
180         *
181         * <p>The delimiters represent a set of characters understood to separate words.
182         * The first string character and the first non-delimiter character after a
183         * delimiter will be capitalized. </p>
184         *
185         * <p>A <code>null</code> input String returns <code>null</code>.
186         * Capitalization uses the unicode title case, normally equivalent to
187         * upper case.</p>
188         *
189         * <pre>
190         * WordUtils.capitalize(null, *)            = null
191         * WordUtils.capitalize("", *)              = ""
192         * WordUtils.capitalize(*, new char[0])     = *
193         * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
194         * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
195         * </pre>
196         * 
197         * @param str  the String to capitalize, may be null
198         * @param delimiters  set of characters to determine capitalization, null means whitespace
199         * @return capitalized String, <code>null</code> if null String input
200         * @see #uncapitalize(String)
201         * @see #capitalizeFully(String)
202         * @since 2.1
203         */
204        public static String capitalize(String str, char... delimiters) {
205            int delimLen = (delimiters == null ? -1 : delimiters.length);
206            if (str == null || str.length() == 0 || delimLen == 0) {
207                return str;
208            }
209            int strLen = str.length();
210            StringBuilder buffer = new StringBuilder(strLen);
211            boolean capitalizeNext = true;
212            for (int i = 0; i < strLen; i++) {
213                char ch = str.charAt(i);
214    
215                if (isDelimiter(ch, delimiters)) {
216                    buffer.append(ch);
217                    capitalizeNext = true;
218                } else if (capitalizeNext) {
219                    buffer.append(Character.toTitleCase(ch));
220                    capitalizeNext = false;
221                } else {
222                    buffer.append(ch);
223                }
224            }
225            return buffer.toString();
226        }
227    
228        //-----------------------------------------------------------------------
229        /**
230         * <p>Converts all the whitespace separated words in a String into capitalized words, 
231         * that is each word is made up of a titlecase character and then a series of 
232         * lowercase characters.  </p>
233         *
234         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
235         * A <code>null</code> input String returns <code>null</code>.
236         * Capitalization uses the unicode title case, normally equivalent to
237         * upper case.</p>
238         *
239         * <pre>
240         * WordUtils.capitalizeFully(null)        = null
241         * WordUtils.capitalizeFully("")          = ""
242         * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
243         * </pre>
244         * 
245         * @param str  the String to capitalize, may be null
246         * @return capitalized String, <code>null</code> if null String input
247         */
248        public static String capitalizeFully(String str) {
249            return capitalizeFully(str, null);
250        }
251    
252        /**
253         * <p>Converts all the delimiter separated words in a String into capitalized words, 
254         * that is each word is made up of a titlecase character and then a series of 
255         * lowercase characters. </p>
256         *
257         * <p>The delimiters represent a set of characters understood to separate words.
258         * The first string character and the first non-delimiter character after a
259         * delimiter will be capitalized. </p>
260         *
261         * <p>A <code>null</code> input String returns <code>null</code>.
262         * Capitalization uses the unicode title case, normally equivalent to
263         * upper case.</p>
264         *
265         * <pre>
266         * WordUtils.capitalizeFully(null, *)            = null
267         * WordUtils.capitalizeFully("", *)              = ""
268         * WordUtils.capitalizeFully(*, null)            = *
269         * WordUtils.capitalizeFully(*, new char[0])     = *
270         * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
271         * </pre>
272         * 
273         * @param str  the String to capitalize, may be null
274         * @param delimiters  set of characters to determine capitalization, null means whitespace
275         * @return capitalized String, <code>null</code> if null String input
276         * @since 2.1
277         */
278        public static String capitalizeFully(String str, char... delimiters) {
279            int delimLen = (delimiters == null ? -1 : delimiters.length);
280            if (str == null || str.length() == 0 || delimLen == 0) {
281                return str;
282            }
283            str = str.toLowerCase();
284            return capitalize(str, delimiters);
285        }
286    
287        //-----------------------------------------------------------------------
288        /**
289         * <p>Uncapitalizes all the whitespace separated words in a String.
290         * Only the first letter of each word is changed.</p>
291         *
292         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
293         * A <code>null</code> input String returns <code>null</code>.</p>
294         *
295         * <pre>
296         * WordUtils.uncapitalize(null)        = null
297         * WordUtils.uncapitalize("")          = ""
298         * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
299         * </pre>
300         * 
301         * @param str  the String to uncapitalize, may be null
302         * @return uncapitalized String, <code>null</code> if null String input
303         * @see #capitalize(String)
304         */
305        public static String uncapitalize(String str) {
306            return uncapitalize(str, null);
307        }
308    
309        /**
310         * <p>Uncapitalizes all the whitespace separated words in a String.
311         * Only the first letter of each word is changed.</p>
312         *
313         * <p>The delimiters represent a set of characters understood to separate words.
314         * The first string character and the first non-delimiter character after a
315         * delimiter will be uncapitalized. </p>
316         *
317         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
318         * A <code>null</code> input String returns <code>null</code>.</p>
319         *
320         * <pre>
321         * WordUtils.uncapitalize(null, *)            = null
322         * WordUtils.uncapitalize("", *)              = ""
323         * WordUtils.uncapitalize(*, null)            = *
324         * WordUtils.uncapitalize(*, new char[0])     = *
325         * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
326         * </pre>
327         * 
328         * @param str  the String to uncapitalize, may be null
329         * @param delimiters  set of characters to determine uncapitalization, null means whitespace
330         * @return uncapitalized String, <code>null</code> if null String input
331         * @see #capitalize(String)
332         * @since 2.1
333         */
334        public static String uncapitalize(String str, char... delimiters) {
335            int delimLen = (delimiters == null ? -1 : delimiters.length);
336            if (str == null || str.length() == 0 || delimLen == 0) {
337                return str;
338            }
339            int strLen = str.length();
340            StringBuilder buffer = new StringBuilder(strLen);
341            boolean uncapitalizeNext = true;
342            for (int i = 0; i < strLen; i++) {
343                char ch = str.charAt(i);
344    
345                if (isDelimiter(ch, delimiters)) {
346                    buffer.append(ch);
347                    uncapitalizeNext = true;
348                } else if (uncapitalizeNext) {
349                    buffer.append(Character.toLowerCase(ch));
350                    uncapitalizeNext = false;
351                } else {
352                    buffer.append(ch);
353                }
354            }
355            return buffer.toString();
356        }
357    
358        //-----------------------------------------------------------------------
359        /**
360         * <p>Swaps the case of a String using a word based algorithm.</p>
361         * 
362         * <ul>
363         *  <li>Upper case character converts to Lower case</li>
364         *  <li>Title case character converts to Lower case</li>
365         *  <li>Lower case character after Whitespace or at start converts to Title case</li>
366         *  <li>Other Lower case character converts to Upper case</li>
367         * </ul>
368         * 
369         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
370         * A <code>null</code> input String returns <code>null</code>.</p>
371         * 
372         * <pre>
373         * StringUtils.swapCase(null)                 = null
374         * StringUtils.swapCase("")                   = ""
375         * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
376         * </pre>
377         * 
378         * @param str  the String to swap case, may be null
379         * @return the changed String, <code>null</code> if null String input
380         */
381        public static String swapCase(String str) {
382            int strLen;
383            if (str == null || (strLen = str.length()) == 0) {
384                return str;
385            }
386            StringBuilder buffer = new StringBuilder(strLen);
387    
388            boolean whitespace = true;
389            char ch = 0;
390            char tmp = 0;
391    
392            for (int i = 0; i < strLen; i++) {
393                ch = str.charAt(i);
394                if (Character.isUpperCase(ch)) {
395                    tmp = Character.toLowerCase(ch);
396                } else if (Character.isTitleCase(ch)) {
397                    tmp = Character.toLowerCase(ch);
398                } else if (Character.isLowerCase(ch)) {
399                    if (whitespace) {
400                        tmp = Character.toTitleCase(ch);
401                    } else {
402                        tmp = Character.toUpperCase(ch);
403                    }
404                } else {
405                    tmp = ch;
406                }
407                buffer.append(tmp);
408                whitespace = Character.isWhitespace(ch);
409            }
410            return buffer.toString();
411        }
412    
413        //-----------------------------------------------------------------------
414        /**
415         * <p>Extracts the initial letters from each word in the String.</p>
416         * 
417         * <p>The first letter of the string and all first letters after
418         * whitespace are returned as a new string.
419         * Their case is not changed.</p>
420         *
421         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
422         * A <code>null</code> input String returns <code>null</code>.</p>
423         *
424         * <pre>
425         * WordUtils.initials(null)             = null
426         * WordUtils.initials("")               = ""
427         * WordUtils.initials("Ben John Lee")   = "BJL"
428         * WordUtils.initials("Ben J.Lee")      = "BJ"
429         * </pre>
430         *
431         * @param str  the String to get initials from, may be null
432         * @return String of initial letters, <code>null</code> if null String input
433         * @see #initials(String,char[])
434         * @since 2.2
435         */
436        public static String initials(String str) {
437            return initials(str, null);
438        }
439    
440        /**
441         * <p>Extracts the initial letters from each word in the String.</p>
442         * 
443         * <p>The first letter of the string and all first letters after the
444         * defined delimiters are returned as a new string.
445         * Their case is not changed.</p>
446         *
447         * <p>If the delimiters array is null, then Whitespace is used.
448         * Whitespace is defined by {@link Character#isWhitespace(char)}.
449         * A <code>null</code> input String returns <code>null</code>.
450         * An empty delimiter array returns an empty String.</p>
451         *
452         * <pre>
453         * WordUtils.initials(null, *)                = null
454         * WordUtils.initials("", *)                  = ""
455         * WordUtils.initials("Ben John Lee", null)   = "BJL"
456         * WordUtils.initials("Ben J.Lee", null)      = "BJ"
457         * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
458         * WordUtils.initials(*, new char[0])         = ""
459         * </pre>
460         * 
461         * @param str  the String to get initials from, may be null
462         * @param delimiters  set of characters to determine words, null means whitespace
463         * @return String of initial letters, <code>null</code> if null String input
464         * @see #initials(String)
465         * @since 2.2
466         */
467        public static String initials(String str, char... delimiters) {
468            if (str == null || str.length() == 0) {
469                return str;
470            }
471            if (delimiters != null && delimiters.length == 0) {
472                return "";
473            }
474            int strLen = str.length();
475            char[] buf = new char[strLen / 2 + 1];
476            int count = 0;
477            boolean lastWasGap = true;
478            for (int i = 0; i < strLen; i++) {
479                char ch = str.charAt(i);
480    
481                if (isDelimiter(ch, delimiters)) {
482                    lastWasGap = true;
483                } else if (lastWasGap) {
484                    buf[count++] = ch;
485                    lastWasGap = false;
486                } else {
487                    // ignore ch
488                }
489            }
490            return new String(buf, 0, count);
491        }
492    
493        //-----------------------------------------------------------------------
494        /**
495         * Is the character a delimiter.
496         *
497         * @param ch  the character to check
498         * @param delimiters  the delimiters
499         * @return true if it is a delimiter
500         */
501        private static boolean isDelimiter(char ch, char[] delimiters) {
502            if (delimiters == null) {
503                return Character.isWhitespace(ch);
504            }
505            for (int i = 0, isize = delimiters.length; i < isize; i++) {
506                if (ch == delimiters[i]) {
507                    return true;
508                }
509            }
510            return false;
511        }
512    
513        //-----------------------------------------------------------------------
514        /**
515         * Abbreviates a string nicely.
516         * 
517         * This method searches for the first space after the lower limit and abbreviates
518         * the String there. It will also append any String passed as a parameter
519         * to the end of the String. The upper limit can be specified to forcibly
520         * abbreviate a String.
521         * 
522         * @param str         the string to be abbreviated. If null is passed, null is returned.
523         *                    If the empty String is passed, the empty string is returned.
524         * @param lower       the lower limit.
525         * @param upper       the upper limit; specify -1 if no limit is desired.
526         *                    If the upper limit is lower than the lower limit, it will be
527         *                    adjusted to be the same as the lower limit.
528         * @param appendToEnd String to be appended to the end of the abbreviated string.
529         *                    This is appended ONLY if the string was indeed abbreviated.
530         *                    The append does not count towards the lower or upper limits.
531         * @return the abbreviated String.
532         * @since 2.4
533         */
534        public static String abbreviate(String str, int lower, int upper, String appendToEnd) {
535            // initial parameter checks
536            if (str == null) {
537                return null;
538            }
539            if (str.length() == 0) {
540                return StringUtils.EMPTY;
541            }
542    
543            // if the lower value is greater than the length of the string,
544            // set to the length of the string
545            if (lower > str.length()) {
546                lower = str.length();    
547            }
548            // if the upper value is -1 (i.e. no limit) or is greater
549            // than the length of the string, set to the length of the string
550            if (upper == -1 || upper > str.length()) {
551                upper = str.length();
552            }
553            // if upper is less than lower, raise it to lower
554            if (upper < lower) {
555                upper = lower;
556            }
557    
558            StringBuilder result = new StringBuilder();
559            int index = StringUtils.indexOf(str, " ", lower);
560            if (index == -1) {
561                result.append(str.substring(0, upper));
562                // only if abbreviation has occured do we append the appendToEnd value
563                if (upper != str.length()) {
564                    result.append(StringUtils.defaultString(appendToEnd));
565                }
566            } else if (index > upper) {
567                result.append(str.substring(0, upper));
568                result.append(StringUtils.defaultString(appendToEnd));
569            } else {
570                result.append(str.substring(0, index));
571                result.append(StringUtils.defaultString(appendToEnd));
572            }
573            return result.toString();
574        }
575    
576    }