001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import org.apache.commons.lang3.StringUtils;
020import org.apache.commons.lang3.SystemUtils;
021
022/**
023 * <p>Operations on Strings that contain words.</p>
024 * 
025 * <p>This class tries to handle <code>null</code> input gracefully.
026 * An exception will not be thrown for a <code>null</code> input.
027 * Each method documents its behaviour in more detail.</p>
028 * 
029 * @since 2.0
030 * @version $Id: WordUtils.java 1436770 2013-01-22 07:09:45Z ggregory $
031 */
032public class WordUtils {
033
034    /**
035     * <p><code>WordUtils</code> instances should NOT be constructed in
036     * standard programming. Instead, the class should be used as
037     * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
038     *
039     * <p>This constructor is public to permit tools that require a JavaBean
040     * instance to operate.</p>
041     */
042    public WordUtils() {
043      super();
044    }
045
046    // Wrapping
047    //--------------------------------------------------------------------------
048    /**
049     * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
050     * 
051     * <p>New lines will be separated by the system property line separator.
052     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
053     * 
054     * <p>Leading spaces on a new line are stripped.
055     * Trailing spaces are not stripped.</p>
056     *
057     * <pre>
058     * WordUtils.wrap(null, *) = null
059     * WordUtils.wrap("", *) = ""
060     * </pre>
061     *
062     * @param str  the String to be word wrapped, may be null
063     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
064     * @return a line with newlines inserted, <code>null</code> if null input
065     */
066    public static String wrap(final String str, final int wrapLength) {
067        return wrap(str, wrapLength, null, false);
068    }
069    
070    /**
071     * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
072     * 
073     * <p>Leading spaces on a new line are stripped.
074     * Trailing spaces are not stripped.</p>
075     * 
076     * <pre>
077     * WordUtils.wrap(null, *, *, *) = null
078     * WordUtils.wrap("", *, *, *) = ""
079     * </pre>
080     *
081     * @param str  the String to be word wrapped, may be null
082     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
083     * @param newLineStr  the string to insert for a new line, 
084     *  <code>null</code> uses the system property line separator
085     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
086     * @return a line with newlines inserted, <code>null</code> if null input
087     */
088    public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) {
089        if (str == null) {
090            return null;
091        }
092        if (newLineStr == null) {
093            newLineStr = SystemUtils.LINE_SEPARATOR;
094        }
095        if (wrapLength < 1) {
096            wrapLength = 1;
097        }
098        final int inputLineLength = str.length();
099        int offset = 0;
100        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
101        
102        while (inputLineLength - offset > wrapLength) {
103            if (str.charAt(offset) == ' ') {
104                offset++;
105                continue;
106            }
107            int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
108
109            if (spaceToWrapAt >= offset) {
110                // normal case
111                wrappedLine.append(str.substring(offset, spaceToWrapAt));
112                wrappedLine.append(newLineStr);
113                offset = spaceToWrapAt + 1;
114                
115            } else {
116                // really long word or URL
117                if (wrapLongWords) {
118                    // wrap really long word one line at a time
119                    wrappedLine.append(str.substring(offset, wrapLength + offset));
120                    wrappedLine.append(newLineStr);
121                    offset += wrapLength;
122                } else {
123                    // do not wrap really long word, just extend beyond limit
124                    spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
125                    if (spaceToWrapAt >= 0) {
126                        wrappedLine.append(str.substring(offset, spaceToWrapAt));
127                        wrappedLine.append(newLineStr);
128                        offset = spaceToWrapAt + 1;
129                    } else {
130                        wrappedLine.append(str.substring(offset));
131                        offset = inputLineLength;
132                    }
133                }
134            }
135        }
136
137        // Whatever is left in line is short enough to just pass through
138        wrappedLine.append(str.substring(offset));
139
140        return wrappedLine.toString();
141    }
142
143    // Capitalizing
144    //-----------------------------------------------------------------------
145    /**
146     * <p>Capitalizes all the whitespace separated words in a String.
147     * Only the first letter of each word is changed. To convert the 
148     * rest of each word to lowercase at the same time, 
149     * use {@link #capitalizeFully(String)}.</p>
150     *
151     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
152     * A <code>null</code> input String returns <code>null</code>.
153     * Capitalization uses the Unicode title case, normally equivalent to
154     * upper case.</p>
155     *
156     * <pre>
157     * WordUtils.capitalize(null)        = null
158     * WordUtils.capitalize("")          = ""
159     * WordUtils.capitalize("i am FINE") = "I Am FINE"
160     * </pre>
161     * 
162     * @param str  the String to capitalize, may be null
163     * @return capitalized String, <code>null</code> if null String input
164     * @see #uncapitalize(String)
165     * @see #capitalizeFully(String)
166     */
167    public static String capitalize(final String str) {
168        return capitalize(str, null);
169    }
170
171    /**
172     * <p>Capitalizes all the delimiter separated words in a String.
173     * Only the first letter of each word is changed. To convert the 
174     * rest of each word to lowercase at the same time, 
175     * use {@link #capitalizeFully(String, char[])}.</p>
176     *
177     * <p>The delimiters represent a set of characters understood to separate words.
178     * The first string character and the first non-delimiter character after a
179     * delimiter will be capitalized. </p>
180     *
181     * <p>A <code>null</code> input String returns <code>null</code>.
182     * Capitalization uses the Unicode title case, normally equivalent to
183     * upper case.</p>
184     *
185     * <pre>
186     * WordUtils.capitalize(null, *)            = null
187     * WordUtils.capitalize("", *)              = ""
188     * WordUtils.capitalize(*, new char[0])     = *
189     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
190     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
191     * </pre>
192     * 
193     * @param str  the String to capitalize, may be null
194     * @param delimiters  set of characters to determine capitalization, null means whitespace
195     * @return capitalized String, <code>null</code> if null String input
196     * @see #uncapitalize(String)
197     * @see #capitalizeFully(String)
198     * @since 2.1
199     */
200    public static String capitalize(final String str, final char... delimiters) {
201        final int delimLen = delimiters == null ? -1 : delimiters.length;
202        if (StringUtils.isEmpty(str) || delimLen == 0) {
203            return str;
204        }
205        final char[] buffer = str.toCharArray();
206        boolean capitalizeNext = true;
207        for (int i = 0; i < buffer.length; i++) {
208            final char ch = buffer[i];
209            if (isDelimiter(ch, delimiters)) {
210                capitalizeNext = true;
211            } else if (capitalizeNext) {
212                buffer[i] = Character.toTitleCase(ch);
213                capitalizeNext = false;
214            }
215        }
216        return new String(buffer);
217    }
218
219    //-----------------------------------------------------------------------
220    /**
221     * <p>Converts all the whitespace separated words in a String into capitalized words, 
222     * that is each word is made up of a titlecase character and then a series of 
223     * lowercase characters.  </p>
224     *
225     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
226     * A <code>null</code> input String returns <code>null</code>.
227     * Capitalization uses the Unicode title case, normally equivalent to
228     * upper case.</p>
229     *
230     * <pre>
231     * WordUtils.capitalizeFully(null)        = null
232     * WordUtils.capitalizeFully("")          = ""
233     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
234     * </pre>
235     * 
236     * @param str  the String to capitalize, may be null
237     * @return capitalized String, <code>null</code> if null String input
238     */
239    public static String capitalizeFully(final String str) {
240        return capitalizeFully(str, null);
241    }
242
243    /**
244     * <p>Converts all the delimiter separated words in a String into capitalized words, 
245     * that is each word is made up of a titlecase character and then a series of 
246     * lowercase characters. </p>
247     *
248     * <p>The delimiters represent a set of characters understood to separate words.
249     * The first string character and the first non-delimiter character after a
250     * delimiter will be capitalized. </p>
251     *
252     * <p>A <code>null</code> input String returns <code>null</code>.
253     * Capitalization uses the Unicode title case, normally equivalent to
254     * upper case.</p>
255     *
256     * <pre>
257     * WordUtils.capitalizeFully(null, *)            = null
258     * WordUtils.capitalizeFully("", *)              = ""
259     * WordUtils.capitalizeFully(*, null)            = *
260     * WordUtils.capitalizeFully(*, new char[0])     = *
261     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
262     * </pre>
263     * 
264     * @param str  the String to capitalize, may be null
265     * @param delimiters  set of characters to determine capitalization, null means whitespace
266     * @return capitalized String, <code>null</code> if null String input
267     * @since 2.1
268     */
269    public static String capitalizeFully(String str, final char... delimiters) {
270        final int delimLen = delimiters == null ? -1 : delimiters.length;
271        if (StringUtils.isEmpty(str) || delimLen == 0) {
272            return str;
273        }
274        str = str.toLowerCase();
275        return capitalize(str, delimiters);
276    }
277
278    //-----------------------------------------------------------------------
279    /**
280     * <p>Uncapitalizes all the whitespace separated words in a String.
281     * Only the first letter of each word is changed.</p>
282     *
283     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
284     * A <code>null</code> input String returns <code>null</code>.</p>
285     *
286     * <pre>
287     * WordUtils.uncapitalize(null)        = null
288     * WordUtils.uncapitalize("")          = ""
289     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
290     * </pre>
291     * 
292     * @param str  the String to uncapitalize, may be null
293     * @return uncapitalized String, <code>null</code> if null String input
294     * @see #capitalize(String)
295     */
296    public static String uncapitalize(final String str) {
297        return uncapitalize(str, null);
298    }
299
300    /**
301     * <p>Uncapitalizes all the whitespace separated words in a String.
302     * Only the first letter of each word is changed.</p>
303     *
304     * <p>The delimiters represent a set of characters understood to separate words.
305     * The first string character and the first non-delimiter character after a
306     * delimiter will be uncapitalized. </p>
307     *
308     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
309     * A <code>null</code> input String returns <code>null</code>.</p>
310     *
311     * <pre>
312     * WordUtils.uncapitalize(null, *)            = null
313     * WordUtils.uncapitalize("", *)              = ""
314     * WordUtils.uncapitalize(*, null)            = *
315     * WordUtils.uncapitalize(*, new char[0])     = *
316     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
317     * </pre>
318     * 
319     * @param str  the String to uncapitalize, may be null
320     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
321     * @return uncapitalized String, <code>null</code> if null String input
322     * @see #capitalize(String)
323     * @since 2.1
324     */
325    public static String uncapitalize(final String str, final char... delimiters) {
326        final int delimLen = delimiters == null ? -1 : delimiters.length;
327        if (StringUtils.isEmpty(str) || delimLen == 0) {
328            return str;
329        }
330        final char[] buffer = str.toCharArray();
331        boolean uncapitalizeNext = true;
332        for (int i = 0; i < buffer.length; i++) {
333            final char ch = buffer[i];
334            if (isDelimiter(ch, delimiters)) {
335                uncapitalizeNext = true;
336            } else if (uncapitalizeNext) {
337                buffer[i] = Character.toLowerCase(ch);
338                uncapitalizeNext = false;
339            }
340        }
341        return new String(buffer);
342    }
343
344    //-----------------------------------------------------------------------
345    /**
346     * <p>Swaps the case of a String using a word based algorithm.</p>
347     * 
348     * <ul>
349     *  <li>Upper case character converts to Lower case</li>
350     *  <li>Title case character converts to Lower case</li>
351     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
352     *  <li>Other Lower case character converts to Upper case</li>
353     * </ul>
354     * 
355     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
356     * A <code>null</code> input String returns <code>null</code>.</p>
357     * 
358     * <pre>
359     * StringUtils.swapCase(null)                 = null
360     * StringUtils.swapCase("")                   = ""
361     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
362     * </pre>
363     * 
364     * @param str  the String to swap case, may be null
365     * @return the changed String, <code>null</code> if null String input
366     */
367    public static String swapCase(final String str) {
368        if (StringUtils.isEmpty(str)) {
369            return str;
370        }
371        final char[] buffer = str.toCharArray();
372
373        boolean whitespace = true;
374
375        for (int i = 0; i < buffer.length; i++) {
376            final char ch = buffer[i];
377            if (Character.isUpperCase(ch)) {
378                buffer[i] = Character.toLowerCase(ch);
379                whitespace = false;
380            } else if (Character.isTitleCase(ch)) {
381                buffer[i] = Character.toLowerCase(ch);
382                whitespace = false;
383            } else if (Character.isLowerCase(ch)) {
384                if (whitespace) {
385                    buffer[i] = Character.toTitleCase(ch);
386                    whitespace = false;
387                } else {
388                    buffer[i] = Character.toUpperCase(ch);
389                }
390            } else {
391                whitespace = Character.isWhitespace(ch);
392            }
393        }
394        return new String(buffer);
395    }
396
397    //-----------------------------------------------------------------------
398    /**
399     * <p>Extracts the initial letters from each word in the String.</p>
400     * 
401     * <p>The first letter of the string and all first letters after
402     * whitespace are returned as a new string.
403     * Their case is not changed.</p>
404     *
405     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
406     * A <code>null</code> input String returns <code>null</code>.</p>
407     *
408     * <pre>
409     * WordUtils.initials(null)             = null
410     * WordUtils.initials("")               = ""
411     * WordUtils.initials("Ben John Lee")   = "BJL"
412     * WordUtils.initials("Ben J.Lee")      = "BJ"
413     * </pre>
414     *
415     * @param str  the String to get initials from, may be null
416     * @return String of initial letters, <code>null</code> if null String input
417     * @see #initials(String,char[])
418     * @since 2.2
419     */
420    public static String initials(final String str) {
421        return initials(str, null);
422    }
423
424    /**
425     * <p>Extracts the initial letters from each word in the String.</p>
426     * 
427     * <p>The first letter of the string and all first letters after the
428     * defined delimiters are returned as a new string.
429     * Their case is not changed.</p>
430     *
431     * <p>If the delimiters array is null, then Whitespace is used.
432     * Whitespace is defined by {@link Character#isWhitespace(char)}.
433     * A <code>null</code> input String returns <code>null</code>.
434     * An empty delimiter array returns an empty String.</p>
435     *
436     * <pre>
437     * WordUtils.initials(null, *)                = null
438     * WordUtils.initials("", *)                  = ""
439     * WordUtils.initials("Ben John Lee", null)   = "BJL"
440     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
441     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
442     * WordUtils.initials(*, new char[0])         = ""
443     * </pre>
444     * 
445     * @param str  the String to get initials from, may be null
446     * @param delimiters  set of characters to determine words, null means whitespace
447     * @return String of initial letters, <code>null</code> if null String input
448     * @see #initials(String)
449     * @since 2.2
450     */
451    public static String initials(final String str, final char... delimiters) {
452        if (StringUtils.isEmpty(str)) {
453            return str;
454        }
455        if (delimiters != null && delimiters.length == 0) {
456            return "";
457        }
458        final int strLen = str.length();
459        final char[] buf = new char[strLen / 2 + 1];
460        int count = 0;
461        boolean lastWasGap = true;
462        for (int i = 0; i < strLen; i++) {
463            final char ch = str.charAt(i);
464
465            if (isDelimiter(ch, delimiters)) {
466                lastWasGap = true;
467            } else if (lastWasGap) {
468                buf[count++] = ch;
469                lastWasGap = false;
470            } else {
471                continue; // ignore ch
472            }
473        }
474        return new String(buf, 0, count);
475    }
476
477    //-----------------------------------------------------------------------
478    /**
479     * Is the character a delimiter.
480     *
481     * @param ch  the character to check
482     * @param delimiters  the delimiters
483     * @return true if it is a delimiter
484     */
485    private static boolean isDelimiter(final char ch, final char[] delimiters) {
486        if (delimiters == null) {
487            return Character.isWhitespace(ch);
488        }
489        for (final char delimiter : delimiters) {
490            if (ch == delimiter) {
491                return true;
492            }
493        }
494        return false;
495    }
496
497}