001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3.text;
018    
019    import org.apache.commons.lang3.StringUtils;
020    import org.apache.commons.lang3.SystemUtils;
021    
022    /**
023     * <p>Operations on Strings that contain words.</p>
024     * 
025     * <p>This class tries to handle <code>null</code> input gracefully.
026     * An exception will not be thrown for a <code>null</code> input.
027     * Each method documents its behaviour in more detail.</p>
028     * 
029     * @since 2.0
030     * @version $Id: WordUtils.java 1144929 2011-07-10 18:26:16Z ggregory $
031     */
032    public class WordUtils {
033    
034        /**
035         * <p><code>WordUtils</code> instances should NOT be constructed in
036         * standard programming. Instead, the class should be used as
037         * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
038         *
039         * <p>This constructor is public to permit tools that require a JavaBean
040         * instance to operate.</p>
041         */
042        public WordUtils() {
043          super();
044        }
045    
046        // Wrapping
047        //--------------------------------------------------------------------------
048        /**
049         * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
050         * 
051         * <p>New lines will be separated by the system property line separator.
052         * Very long words, such as URLs will <i>not</i> be wrapped.</p>
053         * 
054         * <p>Leading spaces on a new line are stripped.
055         * Trailing spaces are not stripped.</p>
056         *
057         * <pre>
058         * WordUtils.wrap(null, *) = null
059         * WordUtils.wrap("", *) = ""
060         * </pre>
061         *
062         * @param str  the String to be word wrapped, may be null
063         * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
064         * @return a line with newlines inserted, <code>null</code> if null input
065         */
066        public static String wrap(String str, int wrapLength) {
067            return wrap(str, wrapLength, null, false);
068        }
069        
070        /**
071         * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
072         * 
073         * <p>Leading spaces on a new line are stripped.
074         * Trailing spaces are not stripped.</p>
075         * 
076         * <pre>
077         * WordUtils.wrap(null, *, *, *) = null
078         * WordUtils.wrap("", *, *, *) = ""
079         * </pre>
080         *
081         * @param str  the String to be word wrapped, may be null
082         * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
083         * @param newLineStr  the string to insert for a new line, 
084         *  <code>null</code> uses the system property line separator
085         * @param wrapLongWords  true if long words (such as URLs) should be wrapped
086         * @return a line with newlines inserted, <code>null</code> if null input
087         */
088        public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
089            if (str == null) {
090                return null;
091            }
092            if (newLineStr == null) {
093                newLineStr = SystemUtils.LINE_SEPARATOR;
094            }
095            if (wrapLength < 1) {
096                wrapLength = 1;
097            }
098            int inputLineLength = str.length();
099            int offset = 0;
100            StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
101            
102            while ((inputLineLength - offset) > wrapLength) {
103                if (str.charAt(offset) == ' ') {
104                    offset++;
105                    continue;
106                }
107                int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
108    
109                if (spaceToWrapAt >= offset) {
110                    // normal case
111                    wrappedLine.append(str.substring(offset, spaceToWrapAt));
112                    wrappedLine.append(newLineStr);
113                    offset = spaceToWrapAt + 1;
114                    
115                } else {
116                    // really long word or URL
117                    if (wrapLongWords) {
118                        // wrap really long word one line at a time
119                        wrappedLine.append(str.substring(offset, wrapLength + offset));
120                        wrappedLine.append(newLineStr);
121                        offset += wrapLength;
122                    } else {
123                        // do not wrap really long word, just extend beyond limit
124                        spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
125                        if (spaceToWrapAt >= 0) {
126                            wrappedLine.append(str.substring(offset, spaceToWrapAt));
127                            wrappedLine.append(newLineStr);
128                            offset = spaceToWrapAt + 1;
129                        } else {
130                            wrappedLine.append(str.substring(offset));
131                            offset = inputLineLength;
132                        }
133                    }
134                }
135            }
136    
137            // Whatever is left in line is short enough to just pass through
138            wrappedLine.append(str.substring(offset));
139    
140            return wrappedLine.toString();
141        }
142    
143        // Capitalizing
144        //-----------------------------------------------------------------------
145        /**
146         * <p>Capitalizes all the whitespace separated words in a String.
147         * Only the first letter of each word is changed. To convert the 
148         * rest of each word to lowercase at the same time, 
149         * use {@link #capitalizeFully(String)}.</p>
150         *
151         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
152         * A <code>null</code> input String returns <code>null</code>.
153         * Capitalization uses the unicode title case, normally equivalent to
154         * upper case.</p>
155         *
156         * <pre>
157         * WordUtils.capitalize(null)        = null
158         * WordUtils.capitalize("")          = ""
159         * WordUtils.capitalize("i am FINE") = "I Am FINE"
160         * </pre>
161         * 
162         * @param str  the String to capitalize, may be null
163         * @return capitalized String, <code>null</code> if null String input
164         * @see #uncapitalize(String)
165         * @see #capitalizeFully(String)
166         */
167        public static String capitalize(String str) {
168            return capitalize(str, null);
169        }
170    
171        /**
172         * <p>Capitalizes all the delimiter separated words in a String.
173         * Only the first letter of each word is changed. To convert the 
174         * rest of each word to lowercase at the same time, 
175         * use {@link #capitalizeFully(String, char[])}.</p>
176         *
177         * <p>The delimiters represent a set of characters understood to separate words.
178         * The first string character and the first non-delimiter character after a
179         * delimiter will be capitalized. </p>
180         *
181         * <p>A <code>null</code> input String returns <code>null</code>.
182         * Capitalization uses the unicode title case, normally equivalent to
183         * upper case.</p>
184         *
185         * <pre>
186         * WordUtils.capitalize(null, *)            = null
187         * WordUtils.capitalize("", *)              = ""
188         * WordUtils.capitalize(*, new char[0])     = *
189         * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
190         * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
191         * </pre>
192         * 
193         * @param str  the String to capitalize, may be null
194         * @param delimiters  set of characters to determine capitalization, null means whitespace
195         * @return capitalized String, <code>null</code> if null String input
196         * @see #uncapitalize(String)
197         * @see #capitalizeFully(String)
198         * @since 2.1
199         */
200        public static String capitalize(String str, char... delimiters) {
201            int delimLen = delimiters == null ? -1 : delimiters.length;
202            if (StringUtils.isEmpty(str) || delimLen == 0) {
203                return str;
204            }
205            char[] buffer = str.toCharArray();
206            boolean capitalizeNext = true;
207            for (int i = 0; i < buffer.length; i++) {
208                char ch = buffer[i];
209                if (isDelimiter(ch, delimiters)) {
210                    capitalizeNext = true;
211                } else if (capitalizeNext) {
212                    buffer[i] = Character.toTitleCase(ch);
213                    capitalizeNext = false;
214                }
215            }
216            return new String(buffer);
217        }
218    
219        //-----------------------------------------------------------------------
220        /**
221         * <p>Converts all the whitespace separated words in a String into capitalized words, 
222         * that is each word is made up of a titlecase character and then a series of 
223         * lowercase characters.  </p>
224         *
225         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
226         * A <code>null</code> input String returns <code>null</code>.
227         * Capitalization uses the unicode title case, normally equivalent to
228         * upper case.</p>
229         *
230         * <pre>
231         * WordUtils.capitalizeFully(null)        = null
232         * WordUtils.capitalizeFully("")          = ""
233         * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
234         * </pre>
235         * 
236         * @param str  the String to capitalize, may be null
237         * @return capitalized String, <code>null</code> if null String input
238         */
239        public static String capitalizeFully(String str) {
240            return capitalizeFully(str, null);
241        }
242    
243        /**
244         * <p>Converts all the delimiter separated words in a String into capitalized words, 
245         * that is each word is made up of a titlecase character and then a series of 
246         * lowercase characters. </p>
247         *
248         * <p>The delimiters represent a set of characters understood to separate words.
249         * The first string character and the first non-delimiter character after a
250         * delimiter will be capitalized. </p>
251         *
252         * <p>A <code>null</code> input String returns <code>null</code>.
253         * Capitalization uses the unicode title case, normally equivalent to
254         * upper case.</p>
255         *
256         * <pre>
257         * WordUtils.capitalizeFully(null, *)            = null
258         * WordUtils.capitalizeFully("", *)              = ""
259         * WordUtils.capitalizeFully(*, null)            = *
260         * WordUtils.capitalizeFully(*, new char[0])     = *
261         * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
262         * </pre>
263         * 
264         * @param str  the String to capitalize, may be null
265         * @param delimiters  set of characters to determine capitalization, null means whitespace
266         * @return capitalized String, <code>null</code> if null String input
267         * @since 2.1
268         */
269        public static String capitalizeFully(String str, char... delimiters) {
270            int delimLen = (delimiters == null ? -1 : delimiters.length);
271            if (StringUtils.isEmpty(str) || delimLen == 0) {
272                return str;
273            }
274            str = str.toLowerCase();
275            return capitalize(str, delimiters);
276        }
277    
278        //-----------------------------------------------------------------------
279        /**
280         * <p>Uncapitalizes all the whitespace separated words in a String.
281         * Only the first letter of each word is changed.</p>
282         *
283         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
284         * A <code>null</code> input String returns <code>null</code>.</p>
285         *
286         * <pre>
287         * WordUtils.uncapitalize(null)        = null
288         * WordUtils.uncapitalize("")          = ""
289         * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
290         * </pre>
291         * 
292         * @param str  the String to uncapitalize, may be null
293         * @return uncapitalized String, <code>null</code> if null String input
294         * @see #capitalize(String)
295         */
296        public static String uncapitalize(String str) {
297            return uncapitalize(str, null);
298        }
299    
300        /**
301         * <p>Uncapitalizes all the whitespace separated words in a String.
302         * Only the first letter of each word is changed.</p>
303         *
304         * <p>The delimiters represent a set of characters understood to separate words.
305         * The first string character and the first non-delimiter character after a
306         * delimiter will be uncapitalized. </p>
307         *
308         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
309         * A <code>null</code> input String returns <code>null</code>.</p>
310         *
311         * <pre>
312         * WordUtils.uncapitalize(null, *)            = null
313         * WordUtils.uncapitalize("", *)              = ""
314         * WordUtils.uncapitalize(*, null)            = *
315         * WordUtils.uncapitalize(*, new char[0])     = *
316         * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
317         * </pre>
318         * 
319         * @param str  the String to uncapitalize, may be null
320         * @param delimiters  set of characters to determine uncapitalization, null means whitespace
321         * @return uncapitalized String, <code>null</code> if null String input
322         * @see #capitalize(String)
323         * @since 2.1
324         */
325        public static String uncapitalize(String str, char... delimiters) {
326            int delimLen = (delimiters == null ? -1 : delimiters.length);
327            if (StringUtils.isEmpty(str) || delimLen == 0) {
328                return str;
329            }
330            char[] buffer = str.toCharArray();
331            boolean uncapitalizeNext = true;
332            for (int i = 0; i < buffer.length; i++) {
333                char ch = buffer[i];
334                if (isDelimiter(ch, delimiters)) {
335                    uncapitalizeNext = true;
336                } else if (uncapitalizeNext) {
337                    buffer[i] = Character.toLowerCase(ch);
338                    uncapitalizeNext = false;
339                }
340            }
341            return new String(buffer);
342        }
343    
344        //-----------------------------------------------------------------------
345        /**
346         * <p>Swaps the case of a String using a word based algorithm.</p>
347         * 
348         * <ul>
349         *  <li>Upper case character converts to Lower case</li>
350         *  <li>Title case character converts to Lower case</li>
351         *  <li>Lower case character after Whitespace or at start converts to Title case</li>
352         *  <li>Other Lower case character converts to Upper case</li>
353         * </ul>
354         * 
355         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
356         * A <code>null</code> input String returns <code>null</code>.</p>
357         * 
358         * <pre>
359         * StringUtils.swapCase(null)                 = null
360         * StringUtils.swapCase("")                   = ""
361         * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
362         * </pre>
363         * 
364         * @param str  the String to swap case, may be null
365         * @return the changed String, <code>null</code> if null String input
366         */
367        public static String swapCase(String str) {
368            if (StringUtils.isEmpty(str)) {
369                return str;
370            }
371            char[] buffer = str.toCharArray();
372    
373            boolean whitespace = true;
374    
375            for (int i = 0; i < buffer.length; i++) {
376                char ch = buffer[i];
377                if (Character.isUpperCase(ch)) {
378                    buffer[i] = Character.toLowerCase(ch);
379                    whitespace = false;
380                } else if (Character.isTitleCase(ch)) {
381                    buffer[i] = Character.toLowerCase(ch);
382                    whitespace = false;
383                } else if (Character.isLowerCase(ch)) {
384                    if (whitespace) {
385                        buffer[i] = Character.toTitleCase(ch);
386                        whitespace = false;
387                    } else {
388                        buffer[i] = Character.toUpperCase(ch);
389                    }
390                } else {
391                    whitespace = Character.isWhitespace(ch);
392                }
393            }
394            return new String(buffer);
395        }
396    
397        //-----------------------------------------------------------------------
398        /**
399         * <p>Extracts the initial letters from each word in the String.</p>
400         * 
401         * <p>The first letter of the string and all first letters after
402         * whitespace are returned as a new string.
403         * Their case is not changed.</p>
404         *
405         * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
406         * A <code>null</code> input String returns <code>null</code>.</p>
407         *
408         * <pre>
409         * WordUtils.initials(null)             = null
410         * WordUtils.initials("")               = ""
411         * WordUtils.initials("Ben John Lee")   = "BJL"
412         * WordUtils.initials("Ben J.Lee")      = "BJ"
413         * </pre>
414         *
415         * @param str  the String to get initials from, may be null
416         * @return String of initial letters, <code>null</code> if null String input
417         * @see #initials(String,char[])
418         * @since 2.2
419         */
420        public static String initials(String str) {
421            return initials(str, null);
422        }
423    
424        /**
425         * <p>Extracts the initial letters from each word in the String.</p>
426         * 
427         * <p>The first letter of the string and all first letters after the
428         * defined delimiters are returned as a new string.
429         * Their case is not changed.</p>
430         *
431         * <p>If the delimiters array is null, then Whitespace is used.
432         * Whitespace is defined by {@link Character#isWhitespace(char)}.
433         * A <code>null</code> input String returns <code>null</code>.
434         * An empty delimiter array returns an empty String.</p>
435         *
436         * <pre>
437         * WordUtils.initials(null, *)                = null
438         * WordUtils.initials("", *)                  = ""
439         * WordUtils.initials("Ben John Lee", null)   = "BJL"
440         * WordUtils.initials("Ben J.Lee", null)      = "BJ"
441         * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
442         * WordUtils.initials(*, new char[0])         = ""
443         * </pre>
444         * 
445         * @param str  the String to get initials from, may be null
446         * @param delimiters  set of characters to determine words, null means whitespace
447         * @return String of initial letters, <code>null</code> if null String input
448         * @see #initials(String)
449         * @since 2.2
450         */
451        public static String initials(String str, char... delimiters) {
452            if (StringUtils.isEmpty(str)) {
453                return str;
454            }
455            if (delimiters != null && delimiters.length == 0) {
456                return "";
457            }
458            int strLen = str.length();
459            char[] buf = new char[strLen / 2 + 1];
460            int count = 0;
461            boolean lastWasGap = true;
462            for (int i = 0; i < strLen; i++) {
463                char ch = str.charAt(i);
464    
465                if (isDelimiter(ch, delimiters)) {
466                    lastWasGap = true;
467                } else if (lastWasGap) {
468                    buf[count++] = ch;
469                    lastWasGap = false;
470                } else {
471                    continue; // ignore ch
472                }
473            }
474            return new String(buf, 0, count);
475        }
476    
477        //-----------------------------------------------------------------------
478        /**
479         * Is the character a delimiter.
480         *
481         * @param ch  the character to check
482         * @param delimiters  the delimiters
483         * @return true if it is a delimiter
484         */
485        private static boolean isDelimiter(char ch, char[] delimiters) {
486            if (delimiters == null) {
487                return Character.isWhitespace(ch);
488            }
489            for (char delimiter : delimiters) {
490                if (ch == delimiter) {
491                    return true;
492                }
493            }
494            return false;
495        }
496    
497    }