001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang;
018    
019    /**
020     * <p>Operations on char primitives and Character objects.</p>
021     *
022     * <p>This class tries to handle <code>null</code> input gracefully.
023     * An exception will not be thrown for a <code>null</code> input.
024     * Each method documents its behaviour in more detail.</p>
025     * 
026     * <p>#ThreadSafe#</p>
027     * @author Apache Software Foundation
028     * @since 2.1
029     * @version $Id: CharUtils.java 1056988 2011-01-09 17:58:53Z niallp $
030     */
031    public class CharUtils {
032        
033        private static final String CHAR_STRING = 
034            "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" +
035            "\b\t\n\u000b\f\r\u000e\u000f" +
036            "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" +
037            "\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f" +
038            "\u0020\u0021\"\u0023\u0024\u0025\u0026\u0027" +
039            "\u0028\u0029\u002a\u002b\u002c\u002d\u002e\u002f" +
040            "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037" +
041            "\u0038\u0039\u003a\u003b\u003c\u003d\u003e\u003f" +
042            "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047" +
043            "\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f" +
044            "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057" +
045            "\u0058\u0059\u005a\u005b\\\u005d\u005e\u005f" +
046            "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067" +
047            "\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f" +
048            "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077" +
049            "\u0078\u0079\u007a\u007b\u007c\u007d\u007e\u007f";
050        
051        private static final String[] CHAR_STRING_ARRAY = new String[128];
052        private static final Character[] CHAR_ARRAY = new Character[128];
053        
054        /**
055         * <code>\u000a</code> linefeed LF ('\n').
056         * 
057         * @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089">JLF: Escape Sequences
058         *      for Character and String Literals</a>
059         * @since 2.2
060         */
061        public static final char LF = '\n';
062    
063        /**
064         * <code>\u000d</code> carriage return CR ('\r').
065         * 
066         * @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089">JLF: Escape Sequences
067         *      for Character and String Literals</a>
068         * @since 2.2
069         */
070        public static final char CR = '\r';
071        
072    
073        static {
074            for (int i = 127; i >= 0; i--) {
075                CHAR_STRING_ARRAY[i] = CHAR_STRING.substring(i, i + 1);
076                CHAR_ARRAY[i] = new Character((char) i);
077            }
078        }
079    
080        /**
081         * <p><code>CharUtils</code> instances should NOT be constructed in standard programming.
082         * Instead, the class should be used as <code>CharUtils.toString('c');</code>.</p>
083         *
084         * <p>This constructor is public to permit tools that require a JavaBean instance
085         * to operate.</p>
086         */
087        public CharUtils() {
088          super();
089        }
090    
091        //-----------------------------------------------------------------------
092        /**
093         * <p>Converts the character to a Character.</p>
094         * 
095         * <p>For ASCII 7 bit characters, this uses a cache that will return the
096         * same Character object each time.</p>
097         *
098         * <pre>
099         *   CharUtils.toCharacterObject(' ')  = ' '
100         *   CharUtils.toCharacterObject('A')  = 'A'
101         * </pre>
102         *
103         * @param ch  the character to convert
104         * @return a Character of the specified character
105         */
106        public static Character toCharacterObject(char ch) {
107            if (ch < CHAR_ARRAY.length) {
108                return CHAR_ARRAY[ch];
109            }
110            return new Character(ch);
111        }
112        
113        /**
114         * <p>Converts the String to a Character using the first character, returning
115         * null for empty Strings.</p>
116         * 
117         * <p>For ASCII 7 bit characters, this uses a cache that will return the
118         * same Character object each time.</p>
119         * 
120         * <pre>
121         *   CharUtils.toCharacterObject(null) = null
122         *   CharUtils.toCharacterObject("")   = null
123         *   CharUtils.toCharacterObject("A")  = 'A'
124         *   CharUtils.toCharacterObject("BA") = 'B'
125         * </pre>
126         *
127         * @param str  the character to convert
128         * @return the Character value of the first letter of the String
129         */
130        public static Character toCharacterObject(String str) {
131            if (StringUtils.isEmpty(str)) {
132                return null;
133            }
134            return toCharacterObject(str.charAt(0));
135        }
136        
137        //-----------------------------------------------------------------------
138        /**
139         * <p>Converts the Character to a char throwing an exception for <code>null</code>.</p>
140         * 
141         * <pre>
142         *   CharUtils.toChar(null) = IllegalArgumentException
143         *   CharUtils.toChar(' ')  = ' '
144         *   CharUtils.toChar('A')  = 'A'
145         * </pre>
146         *
147         * @param ch  the character to convert
148         * @return the char value of the Character
149         * @throws IllegalArgumentException if the Character is null
150         */
151        public static char toChar(Character ch) {
152            if (ch == null) {
153                throw new IllegalArgumentException("The Character must not be null");
154            }
155            return ch.charValue();
156        }
157        
158        /**
159         * <p>Converts the Character to a char handling <code>null</code>.</p>
160         * 
161         * <pre>
162         *   CharUtils.toChar(null, 'X') = 'X'
163         *   CharUtils.toChar(' ', 'X')  = ' '
164         *   CharUtils.toChar('A', 'X')  = 'A'
165         * </pre>
166         *
167         * @param ch  the character to convert
168         * @param defaultValue  the value to use if the  Character is null
169         * @return the char value of the Character or the default if null
170         */
171        public static char toChar(Character ch, char defaultValue) {
172            if (ch == null) {
173                return defaultValue;
174            }
175            return ch.charValue();
176        }
177        
178        //-----------------------------------------------------------------------
179        /**
180         * <p>Converts the String to a char using the first character, throwing
181         * an exception on empty Strings.</p>
182         * 
183         * <pre>
184         *   CharUtils.toChar(null) = IllegalArgumentException
185         *   CharUtils.toChar("")   = IllegalArgumentException
186         *   CharUtils.toChar("A")  = 'A'
187         *   CharUtils.toChar("BA") = 'B'
188         * </pre>
189         *
190         * @param str  the character to convert
191         * @return the char value of the first letter of the String
192         * @throws IllegalArgumentException if the String is empty
193         */
194        public static char toChar(String str) {
195            if (StringUtils.isEmpty(str)) {
196                throw new IllegalArgumentException("The String must not be empty");
197            }
198            return str.charAt(0);
199        }
200        
201        /**
202         * <p>Converts the String to a char using the first character, defaulting
203         * the value on empty Strings.</p>
204         * 
205         * <pre>
206         *   CharUtils.toChar(null, 'X') = 'X'
207         *   CharUtils.toChar("", 'X')   = 'X'
208         *   CharUtils.toChar("A", 'X')  = 'A'
209         *   CharUtils.toChar("BA", 'X') = 'B'
210         * </pre>
211         *
212         * @param str  the character to convert
213         * @param defaultValue  the value to use if the  Character is null
214         * @return the char value of the first letter of the String or the default if null
215         */
216        public static char toChar(String str, char defaultValue) {
217            if (StringUtils.isEmpty(str)) {
218                return defaultValue;
219            }
220            return str.charAt(0);
221        }
222        
223        //-----------------------------------------------------------------------
224        /**
225         * <p>Converts the character to the Integer it represents, throwing an
226         * exception if the character is not numeric.</p>
227         * 
228         * <p>This method coverts the char '1' to the int 1 and so on.</p>
229         *
230         * <pre>
231         *   CharUtils.toIntValue('3')  = 3
232         *   CharUtils.toIntValue('A')  = IllegalArgumentException
233         * </pre>
234         *
235         * @param ch  the character to convert
236         * @return the int value of the character
237         * @throws IllegalArgumentException if the character is not ASCII numeric
238         */
239        public static int toIntValue(char ch) {
240            if (isAsciiNumeric(ch) == false) {
241                throw new IllegalArgumentException("The character " + ch + " is not in the range '0' - '9'");
242            }
243            return ch - 48;
244        }
245        
246        /**
247         * <p>Converts the character to the Integer it represents, throwing an
248         * exception if the character is not numeric.</p>
249         * 
250         * <p>This method coverts the char '1' to the int 1 and so on.</p>
251         *
252         * <pre>
253         *   CharUtils.toIntValue('3', -1)  = 3
254         *   CharUtils.toIntValue('A', -1)  = -1
255         * </pre>
256         *
257         * @param ch  the character to convert
258         * @param defaultValue  the default value to use if the character is not numeric
259         * @return the int value of the character
260         */
261        public static int toIntValue(char ch, int defaultValue) {
262            if (isAsciiNumeric(ch) == false) {
263                return defaultValue;
264            }
265            return ch - 48;
266        }
267        
268        /**
269         * <p>Converts the character to the Integer it represents, throwing an
270         * exception if the character is not numeric.</p>
271         * 
272         * <p>This method coverts the char '1' to the int 1 and so on.</p>
273         *
274         * <pre>
275         *   CharUtils.toIntValue(null) = IllegalArgumentException
276         *   CharUtils.toIntValue('3')  = 3
277         *   CharUtils.toIntValue('A')  = IllegalArgumentException
278         * </pre>
279         *
280         * @param ch  the character to convert, not null
281         * @return the int value of the character
282         * @throws IllegalArgumentException if the Character is not ASCII numeric or is null
283         */
284        public static int toIntValue(Character ch) {
285            if (ch == null) {
286                throw new IllegalArgumentException("The character must not be null");
287            }
288            return toIntValue(ch.charValue());
289        }
290        
291        /**
292         * <p>Converts the character to the Integer it represents, throwing an
293         * exception if the character is not numeric.</p>
294         * 
295         * <p>This method coverts the char '1' to the int 1 and so on.</p>
296         *
297         * <pre>
298         *   CharUtils.toIntValue(null, -1) = -1
299         *   CharUtils.toIntValue('3', -1)  = 3
300         *   CharUtils.toIntValue('A', -1)  = -1
301         * </pre>
302         *
303         * @param ch  the character to convert
304         * @param defaultValue  the default value to use if the character is not numeric
305         * @return the int value of the character
306         */
307        public static int toIntValue(Character ch, int defaultValue) {
308            if (ch == null) {
309                return defaultValue;
310            }
311            return toIntValue(ch.charValue(), defaultValue);
312        }
313        
314        //-----------------------------------------------------------------------
315        /**
316         * <p>Converts the character to a String that contains the one character.</p>
317         * 
318         * <p>For ASCII 7 bit characters, this uses a cache that will return the
319         * same String object each time.</p>
320         *
321         * <pre>
322         *   CharUtils.toString(' ')  = " "
323         *   CharUtils.toString('A')  = "A"
324         * </pre>
325         *
326         * @param ch  the character to convert
327         * @return a String containing the one specified character
328         */
329        public static String toString(char ch) {
330            if (ch < 128) {
331                return CHAR_STRING_ARRAY[ch];
332            }
333            return new String(new char[] {ch});
334        }
335        
336        /**
337         * <p>Converts the character to a String that contains the one character.</p>
338         * 
339         * <p>For ASCII 7 bit characters, this uses a cache that will return the
340         * same String object each time.</p>
341         * 
342         * <p>If <code>null</code> is passed in, <code>null</code> will be returned.</p>
343         *
344         * <pre>
345         *   CharUtils.toString(null) = null
346         *   CharUtils.toString(' ')  = " "
347         *   CharUtils.toString('A')  = "A"
348         * </pre>
349         *
350         * @param ch  the character to convert
351         * @return a String containing the one specified character
352         */
353        public static String toString(Character ch) {
354            if (ch == null) {
355                return null;
356            }
357            return toString(ch.charValue());
358        }
359        
360        //--------------------------------------------------------------------------
361        /**
362         * <p>Converts the string to the unicode format '\u0020'.</p>
363         * 
364         * <p>This format is the Java source code format.</p>
365         *
366         * <pre>
367         *   CharUtils.unicodeEscaped(' ') = "\u0020"
368         *   CharUtils.unicodeEscaped('A') = "\u0041"
369         * </pre>
370         * 
371         * @param ch  the character to convert
372         * @return the escaped unicode string
373         */
374        public static String unicodeEscaped(char ch) {
375            if (ch < 0x10) {
376                return "\\u000" + Integer.toHexString(ch);
377            } else if (ch < 0x100) {
378                return "\\u00" + Integer.toHexString(ch);
379            } else if (ch < 0x1000) {
380                return "\\u0" + Integer.toHexString(ch);
381            }
382            return "\\u" + Integer.toHexString(ch);
383        }
384        
385        /**
386         * <p>Converts the string to the unicode format '\u0020'.</p>
387         * 
388         * <p>This format is the Java source code format.</p>
389         * 
390         * <p>If <code>null</code> is passed in, <code>null</code> will be returned.</p>
391         *
392         * <pre>
393         *   CharUtils.unicodeEscaped(null) = null
394         *   CharUtils.unicodeEscaped(' ')  = "\u0020"
395         *   CharUtils.unicodeEscaped('A')  = "\u0041"
396         * </pre>
397         * 
398         * @param ch  the character to convert, may be null
399         * @return the escaped unicode string, null if null input
400         */
401        public static String unicodeEscaped(Character ch) {
402            if (ch == null) {
403                return null;
404            }
405            return unicodeEscaped(ch.charValue());
406        }
407        
408        //--------------------------------------------------------------------------
409        /**
410         * <p>Checks whether the character is ASCII 7 bit.</p>
411         *
412         * <pre>
413         *   CharUtils.isAscii('a')  = true
414         *   CharUtils.isAscii('A')  = true
415         *   CharUtils.isAscii('3')  = true
416         *   CharUtils.isAscii('-')  = true
417         *   CharUtils.isAscii('\n') = true
418         *   CharUtils.isAscii('&copy;') = false
419         * </pre>
420         * 
421         * @param ch  the character to check
422         * @return true if less than 128
423         */
424        public static boolean isAscii(char ch) {
425            return ch < 128;
426        }
427        
428        /**
429         * <p>Checks whether the character is ASCII 7 bit printable.</p>
430         *
431         * <pre>
432         *   CharUtils.isAsciiPrintable('a')  = true
433         *   CharUtils.isAsciiPrintable('A')  = true
434         *   CharUtils.isAsciiPrintable('3')  = true
435         *   CharUtils.isAsciiPrintable('-')  = true
436         *   CharUtils.isAsciiPrintable('\n') = false
437         *   CharUtils.isAsciiPrintable('&copy;') = false
438         * </pre>
439         * 
440         * @param ch  the character to check
441         * @return true if between 32 and 126 inclusive
442         */
443        public static boolean isAsciiPrintable(char ch) {
444            return ch >= 32 && ch < 127;
445        }
446        
447        /**
448         * <p>Checks whether the character is ASCII 7 bit control.</p>
449         *
450         * <pre>
451         *   CharUtils.isAsciiControl('a')  = false
452         *   CharUtils.isAsciiControl('A')  = false
453         *   CharUtils.isAsciiControl('3')  = false
454         *   CharUtils.isAsciiControl('-')  = false
455         *   CharUtils.isAsciiControl('\n') = true
456         *   CharUtils.isAsciiControl('&copy;') = false
457         * </pre>
458         * 
459         * @param ch  the character to check
460         * @return true if less than 32 or equals 127
461         */
462        public static boolean isAsciiControl(char ch) {
463            return ch < 32 || ch == 127;
464        }
465        
466        /**
467         * <p>Checks whether the character is ASCII 7 bit alphabetic.</p>
468         *
469         * <pre>
470         *   CharUtils.isAsciiAlpha('a')  = true
471         *   CharUtils.isAsciiAlpha('A')  = true
472         *   CharUtils.isAsciiAlpha('3')  = false
473         *   CharUtils.isAsciiAlpha('-')  = false
474         *   CharUtils.isAsciiAlpha('\n') = false
475         *   CharUtils.isAsciiAlpha('&copy;') = false
476         * </pre>
477         * 
478         * @param ch  the character to check
479         * @return true if between 65 and 90 or 97 and 122 inclusive
480         */
481        public static boolean isAsciiAlpha(char ch) {
482            return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
483        }
484        
485        /**
486         * <p>Checks whether the character is ASCII 7 bit alphabetic upper case.</p>
487         *
488         * <pre>
489         *   CharUtils.isAsciiAlphaUpper('a')  = false
490         *   CharUtils.isAsciiAlphaUpper('A')  = true
491         *   CharUtils.isAsciiAlphaUpper('3')  = false
492         *   CharUtils.isAsciiAlphaUpper('-')  = false
493         *   CharUtils.isAsciiAlphaUpper('\n') = false
494         *   CharUtils.isAsciiAlphaUpper('&copy;') = false
495         * </pre>
496         * 
497         * @param ch  the character to check
498         * @return true if between 65 and 90 inclusive
499         */
500        public static boolean isAsciiAlphaUpper(char ch) {
501            return ch >= 'A' && ch <= 'Z';
502        }
503        
504        /**
505         * <p>Checks whether the character is ASCII 7 bit alphabetic lower case.</p>
506         *
507         * <pre>
508         *   CharUtils.isAsciiAlphaLower('a')  = true
509         *   CharUtils.isAsciiAlphaLower('A')  = false
510         *   CharUtils.isAsciiAlphaLower('3')  = false
511         *   CharUtils.isAsciiAlphaLower('-')  = false
512         *   CharUtils.isAsciiAlphaLower('\n') = false
513         *   CharUtils.isAsciiAlphaLower('&copy;') = false
514         * </pre>
515         * 
516         * @param ch  the character to check
517         * @return true if between 97 and 122 inclusive
518         */
519        public static boolean isAsciiAlphaLower(char ch) {
520            return ch >= 'a' && ch <= 'z';
521        }
522        
523        /**
524         * <p>Checks whether the character is ASCII 7 bit numeric.</p>
525         *
526         * <pre>
527         *   CharUtils.isAsciiNumeric('a')  = false
528         *   CharUtils.isAsciiNumeric('A')  = false
529         *   CharUtils.isAsciiNumeric('3')  = true
530         *   CharUtils.isAsciiNumeric('-')  = false
531         *   CharUtils.isAsciiNumeric('\n') = false
532         *   CharUtils.isAsciiNumeric('&copy;') = false
533         * </pre>
534         * 
535         * @param ch  the character to check
536         * @return true if between 48 and 57 inclusive
537         */
538        public static boolean isAsciiNumeric(char ch) {
539            return ch >= '0' && ch <= '9';
540        }
541        
542        /**
543         * <p>Checks whether the character is ASCII 7 bit numeric.</p>
544         *
545         * <pre>
546         *   CharUtils.isAsciiAlphanumeric('a')  = true
547         *   CharUtils.isAsciiAlphanumeric('A')  = true
548         *   CharUtils.isAsciiAlphanumeric('3')  = true
549         *   CharUtils.isAsciiAlphanumeric('-')  = false
550         *   CharUtils.isAsciiAlphanumeric('\n') = false
551         *   CharUtils.isAsciiAlphanumeric('&copy;') = false
552         * </pre>
553         * 
554         * @param ch  the character to check
555         * @return true if between 48 and 57 or 65 and 90 or 97 and 122 inclusive
556         */
557        public static boolean isAsciiAlphanumeric(char ch) {
558            return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9');
559        }
560    
561        // ----------------- Following code copied from Apache Harmony (Character class)
562        /**
563         * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit
564         * that is used for representing supplementary characters in UTF-16
565         * encoding.
566         *
567         * @param ch
568         *            the character to test.
569         * @return {@code true} if {@code ch} is a high-surrogate code unit;
570         *         {@code false} otherwise.
571         */
572        static boolean isHighSurrogate(char ch) {
573            return ('\uD800' <= ch && '\uDBFF' >= ch);
574        }
575        
576    }