001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import org.apache.commons.lang3.StringUtils;
020import org.apache.commons.lang3.SystemUtils;
021
022/**
023 * <p>Operations on Strings that contain words.</p>
024 * 
025 * <p>This class tries to handle <code>null</code> input gracefully.
026 * An exception will not be thrown for a <code>null</code> input.
027 * Each method documents its behaviour in more detail.</p>
028 * 
029 * @since 2.0
030 * @version $Id: WordUtils.java 1586649 2014-04-11 13:28:30Z britter $
031 */
032public class WordUtils {
033
034    /**
035     * <p><code>WordUtils</code> instances should NOT be constructed in
036     * standard programming. Instead, the class should be used as
037     * <code>WordUtils.wrap("foo bar", 20);</code>.</p>
038     *
039     * <p>This constructor is public to permit tools that require a JavaBean
040     * instance to operate.</p>
041     */
042    public WordUtils() {
043      super();
044    }
045
046    // Wrapping
047    //--------------------------------------------------------------------------
048    /**
049     * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
050     * 
051     * <p>New lines will be separated by the system property line separator.
052     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
053     * 
054     * <p>Leading spaces on a new line are stripped.
055     * Trailing spaces are not stripped.</p>
056     *
057     * <table border="1" summary="Wrap Results">
058     *  <tr>
059     *   <th>input</th>
060     *   <th>wrapLength</th>
061     *   <th>result</th>
062     *  </tr>
063     *  <tr>
064     *   <td>null</td>
065     *   <td>*</td>
066     *   <td>null</td>
067     *  </tr>
068     *  <tr>
069     *   <td>""</td>
070     *   <td>*</td>
071     *   <td>""</td>
072     *  </tr>
073     *  <tr>
074     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
075     *   <td>20</td>
076     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
077     *  </tr>
078     *  <tr>
079     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
080     *   <td>20</td>
081     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
082     *  </tr>
083     *  <tr>
084     *   <td>"Click here, http://commons.apache.org, to jump to the commons website"</td>
085     *   <td>20</td>
086     *   <td>"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"</td>
087     *  </tr>
088     * </table>
089     *
090     * (assuming that '\n' is the systems line separator)
091     *
092     * @param str  the String to be word wrapped, may be null
093     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
094     * @return a line with newlines inserted, <code>null</code> if null input
095     */
096    public static String wrap(final String str, final int wrapLength) {
097        return wrap(str, wrapLength, null, false);
098    }
099    
100    /**
101     * <p>Wraps a single line of text, identifying words by <code>' '</code>.</p>
102     * 
103     * <p>Leading spaces on a new line are stripped.
104     * Trailing spaces are not stripped.</p>
105     *
106     * <table border="1" summary="Wrap Results">
107     *  <tr>
108     *   <th>input</th>
109     *   <th>wrapLenght</th>
110     *   <th>newLineString</th>
111     *   <th>wrapLongWords</th>
112     *   <th>result</th>
113     *  </tr>
114     *  <tr>
115     *   <td>null</td>
116     *   <td>*</td>
117     *   <td>*</td>
118     *   <td>true/false</td>
119     *   <td>null</td>
120     *  </tr>
121     *  <tr>
122     *   <td>""</td>
123     *   <td>*</td>
124     *   <td>*</td>
125     *   <td>true/false</td>
126     *   <td>""</td>
127     *  </tr>
128     *  <tr>
129     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
130     *   <td>20</td>
131     *   <td>"\n"</td>
132     *   <td>true/false</td>
133     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
134     *  </tr>
135     *  <tr>
136     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
137     *   <td>20</td>
138     *   <td>"&lt;br /&gt;"</td>
139     *   <td>true/false</td>
140     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
141     *  </tr>
142     *  <tr>
143     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
144     *   <td>20</td>
145     *   <td>null</td>
146     *   <td>true/false</td>
147     *   <td>"Here is one line of" + systemNewLine + "text that is going" + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
148     *  </tr>
149     *  <tr>
150     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
151     *   <td>20</td>
152     *   <td>"\n"</td>
153     *   <td>false</td>
154     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"</td>
155     *  </tr>
156     *  <tr>
157     *   <td>"Click here to jump to the commons website - http://commons.apache.org"</td>
158     *   <td>20</td>
159     *   <td>"\n"</td>
160     *   <td>true</td>
161     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
162     *  </tr>
163     * </table>
164     *
165     * @param str  the String to be word wrapped, may be null
166     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
167     * @param newLineStr  the string to insert for a new line, 
168     *  <code>null</code> uses the system property line separator
169     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
170     * @return a line with newlines inserted, <code>null</code> if null input
171     */
172    public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords) {
173        if (str == null) {
174            return null;
175        }
176        if (newLineStr == null) {
177            newLineStr = SystemUtils.LINE_SEPARATOR;
178        }
179        if (wrapLength < 1) {
180            wrapLength = 1;
181        }
182        final int inputLineLength = str.length();
183        int offset = 0;
184        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
185        
186        while (offset < inputLineLength) {
187            if (str.charAt(offset) == ' ') {
188                offset++;
189                continue;
190            }
191            // only last line without leading spaces is left
192            if(inputLineLength - offset <= wrapLength) {
193                break;
194            }
195            int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);
196
197            if (spaceToWrapAt >= offset) {
198                // normal case
199                wrappedLine.append(str.substring(offset, spaceToWrapAt));
200                wrappedLine.append(newLineStr);
201                offset = spaceToWrapAt + 1;
202                
203            } else {
204                // really long word or URL
205                if (wrapLongWords) {
206                    // wrap really long word one line at a time
207                    wrappedLine.append(str.substring(offset, wrapLength + offset));
208                    wrappedLine.append(newLineStr);
209                    offset += wrapLength;
210                } else {
211                    // do not wrap really long word, just extend beyond limit
212                    spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
213                    if (spaceToWrapAt >= 0) {
214                        wrappedLine.append(str.substring(offset, spaceToWrapAt));
215                        wrappedLine.append(newLineStr);
216                        offset = spaceToWrapAt + 1;
217                    } else {
218                        wrappedLine.append(str.substring(offset));
219                        offset = inputLineLength;
220                    }
221                }
222            }
223        }
224
225        // Whatever is left in line is short enough to just pass through
226        wrappedLine.append(str.substring(offset));
227
228        return wrappedLine.toString();
229    }
230
231    // Capitalizing
232    //-----------------------------------------------------------------------
233    /**
234     * <p>Capitalizes all the whitespace separated words in a String.
235     * Only the first letter of each word is changed. To convert the 
236     * rest of each word to lowercase at the same time, 
237     * use {@link #capitalizeFully(String)}.</p>
238     *
239     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
240     * A <code>null</code> input String returns <code>null</code>.
241     * Capitalization uses the Unicode title case, normally equivalent to
242     * upper case.</p>
243     *
244     * <pre>
245     * WordUtils.capitalize(null)        = null
246     * WordUtils.capitalize("")          = ""
247     * WordUtils.capitalize("i am FINE") = "I Am FINE"
248     * </pre>
249     * 
250     * @param str  the String to capitalize, may be null
251     * @return capitalized String, <code>null</code> if null String input
252     * @see #uncapitalize(String)
253     * @see #capitalizeFully(String)
254     */
255    public static String capitalize(final String str) {
256        return capitalize(str, null);
257    }
258
259    /**
260     * <p>Capitalizes all the delimiter separated words in a String.
261     * Only the first letter of each word is changed. To convert the 
262     * rest of each word to lowercase at the same time, 
263     * use {@link #capitalizeFully(String, char[])}.</p>
264     *
265     * <p>The delimiters represent a set of characters understood to separate words.
266     * The first string character and the first non-delimiter character after a
267     * delimiter will be capitalized. </p>
268     *
269     * <p>A <code>null</code> input String returns <code>null</code>.
270     * Capitalization uses the Unicode title case, normally equivalent to
271     * upper case.</p>
272     *
273     * <pre>
274     * WordUtils.capitalize(null, *)            = null
275     * WordUtils.capitalize("", *)              = ""
276     * WordUtils.capitalize(*, new char[0])     = *
277     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
278     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
279     * </pre>
280     * 
281     * @param str  the String to capitalize, may be null
282     * @param delimiters  set of characters to determine capitalization, null means whitespace
283     * @return capitalized String, <code>null</code> if null String input
284     * @see #uncapitalize(String)
285     * @see #capitalizeFully(String)
286     * @since 2.1
287     */
288    public static String capitalize(final String str, final char... delimiters) {
289        final int delimLen = delimiters == null ? -1 : delimiters.length;
290        if (StringUtils.isEmpty(str) || delimLen == 0) {
291            return str;
292        }
293        final char[] buffer = str.toCharArray();
294        boolean capitalizeNext = true;
295        for (int i = 0; i < buffer.length; i++) {
296            final char ch = buffer[i];
297            if (isDelimiter(ch, delimiters)) {
298                capitalizeNext = true;
299            } else if (capitalizeNext) {
300                buffer[i] = Character.toTitleCase(ch);
301                capitalizeNext = false;
302            }
303        }
304        return new String(buffer);
305    }
306
307    //-----------------------------------------------------------------------
308    /**
309     * <p>Converts all the whitespace separated words in a String into capitalized words, 
310     * that is each word is made up of a titlecase character and then a series of 
311     * lowercase characters.  </p>
312     *
313     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
314     * A <code>null</code> input String returns <code>null</code>.
315     * Capitalization uses the Unicode title case, normally equivalent to
316     * upper case.</p>
317     *
318     * <pre>
319     * WordUtils.capitalizeFully(null)        = null
320     * WordUtils.capitalizeFully("")          = ""
321     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
322     * </pre>
323     * 
324     * @param str  the String to capitalize, may be null
325     * @return capitalized String, <code>null</code> if null String input
326     */
327    public static String capitalizeFully(final String str) {
328        return capitalizeFully(str, null);
329    }
330
331    /**
332     * <p>Converts all the delimiter separated words in a String into capitalized words, 
333     * that is each word is made up of a titlecase character and then a series of 
334     * lowercase characters. </p>
335     *
336     * <p>The delimiters represent a set of characters understood to separate words.
337     * The first string character and the first non-delimiter character after a
338     * delimiter will be capitalized. </p>
339     *
340     * <p>A <code>null</code> input String returns <code>null</code>.
341     * Capitalization uses the Unicode title case, normally equivalent to
342     * upper case.</p>
343     *
344     * <pre>
345     * WordUtils.capitalizeFully(null, *)            = null
346     * WordUtils.capitalizeFully("", *)              = ""
347     * WordUtils.capitalizeFully(*, null)            = *
348     * WordUtils.capitalizeFully(*, new char[0])     = *
349     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
350     * </pre>
351     * 
352     * @param str  the String to capitalize, may be null
353     * @param delimiters  set of characters to determine capitalization, null means whitespace
354     * @return capitalized String, <code>null</code> if null String input
355     * @since 2.1
356     */
357    public static String capitalizeFully(String str, final char... delimiters) {
358        final int delimLen = delimiters == null ? -1 : delimiters.length;
359        if (StringUtils.isEmpty(str) || delimLen == 0) {
360            return str;
361        }
362        str = str.toLowerCase();
363        return capitalize(str, delimiters);
364    }
365
366    //-----------------------------------------------------------------------
367    /**
368     * <p>Uncapitalizes all the whitespace separated words in a String.
369     * Only the first letter of each word is changed.</p>
370     *
371     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
372     * A <code>null</code> input String returns <code>null</code>.</p>
373     *
374     * <pre>
375     * WordUtils.uncapitalize(null)        = null
376     * WordUtils.uncapitalize("")          = ""
377     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
378     * </pre>
379     * 
380     * @param str  the String to uncapitalize, may be null
381     * @return uncapitalized String, <code>null</code> if null String input
382     * @see #capitalize(String)
383     */
384    public static String uncapitalize(final String str) {
385        return uncapitalize(str, null);
386    }
387
388    /**
389     * <p>Uncapitalizes all the whitespace separated words in a String.
390     * Only the first letter of each word is changed.</p>
391     *
392     * <p>The delimiters represent a set of characters understood to separate words.
393     * The first string character and the first non-delimiter character after a
394     * delimiter will be uncapitalized. </p>
395     *
396     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
397     * A <code>null</code> input String returns <code>null</code>.</p>
398     *
399     * <pre>
400     * WordUtils.uncapitalize(null, *)            = null
401     * WordUtils.uncapitalize("", *)              = ""
402     * WordUtils.uncapitalize(*, null)            = *
403     * WordUtils.uncapitalize(*, new char[0])     = *
404     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
405     * </pre>
406     * 
407     * @param str  the String to uncapitalize, may be null
408     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
409     * @return uncapitalized String, <code>null</code> if null String input
410     * @see #capitalize(String)
411     * @since 2.1
412     */
413    public static String uncapitalize(final String str, final char... delimiters) {
414        final int delimLen = delimiters == null ? -1 : delimiters.length;
415        if (StringUtils.isEmpty(str) || delimLen == 0) {
416            return str;
417        }
418        final char[] buffer = str.toCharArray();
419        boolean uncapitalizeNext = true;
420        for (int i = 0; i < buffer.length; i++) {
421            final char ch = buffer[i];
422            if (isDelimiter(ch, delimiters)) {
423                uncapitalizeNext = true;
424            } else if (uncapitalizeNext) {
425                buffer[i] = Character.toLowerCase(ch);
426                uncapitalizeNext = false;
427            }
428        }
429        return new String(buffer);
430    }
431
432    //-----------------------------------------------------------------------
433    /**
434     * <p>Swaps the case of a String using a word based algorithm.</p>
435     * 
436     * <ul>
437     *  <li>Upper case character converts to Lower case</li>
438     *  <li>Title case character converts to Lower case</li>
439     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
440     *  <li>Other Lower case character converts to Upper case</li>
441     * </ul>
442     * 
443     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
444     * A <code>null</code> input String returns <code>null</code>.</p>
445     * 
446     * <pre>
447     * StringUtils.swapCase(null)                 = null
448     * StringUtils.swapCase("")                   = ""
449     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
450     * </pre>
451     * 
452     * @param str  the String to swap case, may be null
453     * @return the changed String, <code>null</code> if null String input
454     */
455    public static String swapCase(final String str) {
456        if (StringUtils.isEmpty(str)) {
457            return str;
458        }
459        final char[] buffer = str.toCharArray();
460
461        boolean whitespace = true;
462
463        for (int i = 0; i < buffer.length; i++) {
464            final char ch = buffer[i];
465            if (Character.isUpperCase(ch)) {
466                buffer[i] = Character.toLowerCase(ch);
467                whitespace = false;
468            } else if (Character.isTitleCase(ch)) {
469                buffer[i] = Character.toLowerCase(ch);
470                whitespace = false;
471            } else if (Character.isLowerCase(ch)) {
472                if (whitespace) {
473                    buffer[i] = Character.toTitleCase(ch);
474                    whitespace = false;
475                } else {
476                    buffer[i] = Character.toUpperCase(ch);
477                }
478            } else {
479                whitespace = Character.isWhitespace(ch);
480            }
481        }
482        return new String(buffer);
483    }
484
485    //-----------------------------------------------------------------------
486    /**
487     * <p>Extracts the initial letters from each word in the String.</p>
488     * 
489     * <p>The first letter of the string and all first letters after
490     * whitespace are returned as a new string.
491     * Their case is not changed.</p>
492     *
493     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
494     * A <code>null</code> input String returns <code>null</code>.</p>
495     *
496     * <pre>
497     * WordUtils.initials(null)             = null
498     * WordUtils.initials("")               = ""
499     * WordUtils.initials("Ben John Lee")   = "BJL"
500     * WordUtils.initials("Ben J.Lee")      = "BJ"
501     * </pre>
502     *
503     * @param str  the String to get initials from, may be null
504     * @return String of initial letters, <code>null</code> if null String input
505     * @see #initials(String,char[])
506     * @since 2.2
507     */
508    public static String initials(final String str) {
509        return initials(str, null);
510    }
511
512    /**
513     * <p>Extracts the initial letters from each word in the String.</p>
514     * 
515     * <p>The first letter of the string and all first letters after the
516     * defined delimiters are returned as a new string.
517     * Their case is not changed.</p>
518     *
519     * <p>If the delimiters array is null, then Whitespace is used.
520     * Whitespace is defined by {@link Character#isWhitespace(char)}.
521     * A <code>null</code> input String returns <code>null</code>.
522     * An empty delimiter array returns an empty String.</p>
523     *
524     * <pre>
525     * WordUtils.initials(null, *)                = null
526     * WordUtils.initials("", *)                  = ""
527     * WordUtils.initials("Ben John Lee", null)   = "BJL"
528     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
529     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
530     * WordUtils.initials(*, new char[0])         = ""
531     * </pre>
532     * 
533     * @param str  the String to get initials from, may be null
534     * @param delimiters  set of characters to determine words, null means whitespace
535     * @return String of initial letters, <code>null</code> if null String input
536     * @see #initials(String)
537     * @since 2.2
538     */
539    public static String initials(final String str, final char... delimiters) {
540        if (StringUtils.isEmpty(str)) {
541            return str;
542        }
543        if (delimiters != null && delimiters.length == 0) {
544            return "";
545        }
546        final int strLen = str.length();
547        final char[] buf = new char[strLen / 2 + 1];
548        int count = 0;
549        boolean lastWasGap = true;
550        for (int i = 0; i < strLen; i++) {
551            final char ch = str.charAt(i);
552
553            if (isDelimiter(ch, delimiters)) {
554                lastWasGap = true;
555            } else if (lastWasGap) {
556                buf[count++] = ch;
557                lastWasGap = false;
558            } else {
559                continue; // ignore ch
560            }
561        }
562        return new String(buf, 0, count);
563    }
564
565    //-----------------------------------------------------------------------
566    /**
567     * Is the character a delimiter.
568     *
569     * @param ch  the character to check
570     * @param delimiters  the delimiters
571     * @return true if it is a delimiter
572     */
573    private static boolean isDelimiter(final char ch, final char[] delimiters) {
574        if (delimiters == null) {
575            return Character.isWhitespace(ch);
576        }
577        for (final char delimiter : delimiters) {
578            if (ch == delimiter) {
579                return true;
580            }
581        }
582        return false;
583    }
584
585}