001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.lang; 018 019 /** 020 * <p>Operations on char primitives and Character objects.</p> 021 * 022 * <p>This class tries to handle <code>null</code> input gracefully. 023 * An exception will not be thrown for a <code>null</code> input. 024 * Each method documents its behaviour in more detail.</p> 025 * 026 * <p>#ThreadSafe#</p> 027 * @author Apache Software Foundation 028 * @since 2.1 029 * @version $Id: CharUtils.java 1056988 2011-01-09 17:58:53Z niallp $ 030 */ 031 public class CharUtils { 032 033 private static final String CHAR_STRING = 034 "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007" + 035 "\b\t\n\u000b\f\r\u000e\u000f" + 036 "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017" + 037 "\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f" + 038 "\u0020\u0021\"\u0023\u0024\u0025\u0026\u0027" + 039 "\u0028\u0029\u002a\u002b\u002c\u002d\u002e\u002f" + 040 "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037" + 041 "\u0038\u0039\u003a\u003b\u003c\u003d\u003e\u003f" + 042 "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047" + 043 "\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f" + 044 "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057" + 045 "\u0058\u0059\u005a\u005b\\\u005d\u005e\u005f" + 046 "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067" + 047 "\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f" + 048 "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077" + 049 "\u0078\u0079\u007a\u007b\u007c\u007d\u007e\u007f"; 050 051 private static final String[] CHAR_STRING_ARRAY = new String[128]; 052 private static final Character[] CHAR_ARRAY = new Character[128]; 053 054 /** 055 * <code>\u000a</code> linefeed LF ('\n'). 056 * 057 * @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089">JLF: Escape Sequences 058 * for Character and String Literals</a> 059 * @since 2.2 060 */ 061 public static final char LF = '\n'; 062 063 /** 064 * <code>\u000d</code> carriage return CR ('\r'). 065 * 066 * @see <a href="http://java.sun.com/docs/books/jls/third_edition/html/lexical.html#101089">JLF: Escape Sequences 067 * for Character and String Literals</a> 068 * @since 2.2 069 */ 070 public static final char CR = '\r'; 071 072 073 static { 074 for (int i = 127; i >= 0; i--) { 075 CHAR_STRING_ARRAY[i] = CHAR_STRING.substring(i, i + 1); 076 CHAR_ARRAY[i] = new Character((char) i); 077 } 078 } 079 080 /** 081 * <p><code>CharUtils</code> instances should NOT be constructed in standard programming. 082 * Instead, the class should be used as <code>CharUtils.toString('c');</code>.</p> 083 * 084 * <p>This constructor is public to permit tools that require a JavaBean instance 085 * to operate.</p> 086 */ 087 public CharUtils() { 088 super(); 089 } 090 091 //----------------------------------------------------------------------- 092 /** 093 * <p>Converts the character to a Character.</p> 094 * 095 * <p>For ASCII 7 bit characters, this uses a cache that will return the 096 * same Character object each time.</p> 097 * 098 * <pre> 099 * CharUtils.toCharacterObject(' ') = ' ' 100 * CharUtils.toCharacterObject('A') = 'A' 101 * </pre> 102 * 103 * @param ch the character to convert 104 * @return a Character of the specified character 105 */ 106 public static Character toCharacterObject(char ch) { 107 if (ch < CHAR_ARRAY.length) { 108 return CHAR_ARRAY[ch]; 109 } 110 return new Character(ch); 111 } 112 113 /** 114 * <p>Converts the String to a Character using the first character, returning 115 * null for empty Strings.</p> 116 * 117 * <p>For ASCII 7 bit characters, this uses a cache that will return the 118 * same Character object each time.</p> 119 * 120 * <pre> 121 * CharUtils.toCharacterObject(null) = null 122 * CharUtils.toCharacterObject("") = null 123 * CharUtils.toCharacterObject("A") = 'A' 124 * CharUtils.toCharacterObject("BA") = 'B' 125 * </pre> 126 * 127 * @param str the character to convert 128 * @return the Character value of the first letter of the String 129 */ 130 public static Character toCharacterObject(String str) { 131 if (StringUtils.isEmpty(str)) { 132 return null; 133 } 134 return toCharacterObject(str.charAt(0)); 135 } 136 137 //----------------------------------------------------------------------- 138 /** 139 * <p>Converts the Character to a char throwing an exception for <code>null</code>.</p> 140 * 141 * <pre> 142 * CharUtils.toChar(null) = IllegalArgumentException 143 * CharUtils.toChar(' ') = ' ' 144 * CharUtils.toChar('A') = 'A' 145 * </pre> 146 * 147 * @param ch the character to convert 148 * @return the char value of the Character 149 * @throws IllegalArgumentException if the Character is null 150 */ 151 public static char toChar(Character ch) { 152 if (ch == null) { 153 throw new IllegalArgumentException("The Character must not be null"); 154 } 155 return ch.charValue(); 156 } 157 158 /** 159 * <p>Converts the Character to a char handling <code>null</code>.</p> 160 * 161 * <pre> 162 * CharUtils.toChar(null, 'X') = 'X' 163 * CharUtils.toChar(' ', 'X') = ' ' 164 * CharUtils.toChar('A', 'X') = 'A' 165 * </pre> 166 * 167 * @param ch the character to convert 168 * @param defaultValue the value to use if the Character is null 169 * @return the char value of the Character or the default if null 170 */ 171 public static char toChar(Character ch, char defaultValue) { 172 if (ch == null) { 173 return defaultValue; 174 } 175 return ch.charValue(); 176 } 177 178 //----------------------------------------------------------------------- 179 /** 180 * <p>Converts the String to a char using the first character, throwing 181 * an exception on empty Strings.</p> 182 * 183 * <pre> 184 * CharUtils.toChar(null) = IllegalArgumentException 185 * CharUtils.toChar("") = IllegalArgumentException 186 * CharUtils.toChar("A") = 'A' 187 * CharUtils.toChar("BA") = 'B' 188 * </pre> 189 * 190 * @param str the character to convert 191 * @return the char value of the first letter of the String 192 * @throws IllegalArgumentException if the String is empty 193 */ 194 public static char toChar(String str) { 195 if (StringUtils.isEmpty(str)) { 196 throw new IllegalArgumentException("The String must not be empty"); 197 } 198 return str.charAt(0); 199 } 200 201 /** 202 * <p>Converts the String to a char using the first character, defaulting 203 * the value on empty Strings.</p> 204 * 205 * <pre> 206 * CharUtils.toChar(null, 'X') = 'X' 207 * CharUtils.toChar("", 'X') = 'X' 208 * CharUtils.toChar("A", 'X') = 'A' 209 * CharUtils.toChar("BA", 'X') = 'B' 210 * </pre> 211 * 212 * @param str the character to convert 213 * @param defaultValue the value to use if the Character is null 214 * @return the char value of the first letter of the String or the default if null 215 */ 216 public static char toChar(String str, char defaultValue) { 217 if (StringUtils.isEmpty(str)) { 218 return defaultValue; 219 } 220 return str.charAt(0); 221 } 222 223 //----------------------------------------------------------------------- 224 /** 225 * <p>Converts the character to the Integer it represents, throwing an 226 * exception if the character is not numeric.</p> 227 * 228 * <p>This method coverts the char '1' to the int 1 and so on.</p> 229 * 230 * <pre> 231 * CharUtils.toIntValue('3') = 3 232 * CharUtils.toIntValue('A') = IllegalArgumentException 233 * </pre> 234 * 235 * @param ch the character to convert 236 * @return the int value of the character 237 * @throws IllegalArgumentException if the character is not ASCII numeric 238 */ 239 public static int toIntValue(char ch) { 240 if (isAsciiNumeric(ch) == false) { 241 throw new IllegalArgumentException("The character " + ch + " is not in the range '0' - '9'"); 242 } 243 return ch - 48; 244 } 245 246 /** 247 * <p>Converts the character to the Integer it represents, throwing an 248 * exception if the character is not numeric.</p> 249 * 250 * <p>This method coverts the char '1' to the int 1 and so on.</p> 251 * 252 * <pre> 253 * CharUtils.toIntValue('3', -1) = 3 254 * CharUtils.toIntValue('A', -1) = -1 255 * </pre> 256 * 257 * @param ch the character to convert 258 * @param defaultValue the default value to use if the character is not numeric 259 * @return the int value of the character 260 */ 261 public static int toIntValue(char ch, int defaultValue) { 262 if (isAsciiNumeric(ch) == false) { 263 return defaultValue; 264 } 265 return ch - 48; 266 } 267 268 /** 269 * <p>Converts the character to the Integer it represents, throwing an 270 * exception if the character is not numeric.</p> 271 * 272 * <p>This method coverts the char '1' to the int 1 and so on.</p> 273 * 274 * <pre> 275 * CharUtils.toIntValue(null) = IllegalArgumentException 276 * CharUtils.toIntValue('3') = 3 277 * CharUtils.toIntValue('A') = IllegalArgumentException 278 * </pre> 279 * 280 * @param ch the character to convert, not null 281 * @return the int value of the character 282 * @throws IllegalArgumentException if the Character is not ASCII numeric or is null 283 */ 284 public static int toIntValue(Character ch) { 285 if (ch == null) { 286 throw new IllegalArgumentException("The character must not be null"); 287 } 288 return toIntValue(ch.charValue()); 289 } 290 291 /** 292 * <p>Converts the character to the Integer it represents, throwing an 293 * exception if the character is not numeric.</p> 294 * 295 * <p>This method coverts the char '1' to the int 1 and so on.</p> 296 * 297 * <pre> 298 * CharUtils.toIntValue(null, -1) = -1 299 * CharUtils.toIntValue('3', -1) = 3 300 * CharUtils.toIntValue('A', -1) = -1 301 * </pre> 302 * 303 * @param ch the character to convert 304 * @param defaultValue the default value to use if the character is not numeric 305 * @return the int value of the character 306 */ 307 public static int toIntValue(Character ch, int defaultValue) { 308 if (ch == null) { 309 return defaultValue; 310 } 311 return toIntValue(ch.charValue(), defaultValue); 312 } 313 314 //----------------------------------------------------------------------- 315 /** 316 * <p>Converts the character to a String that contains the one character.</p> 317 * 318 * <p>For ASCII 7 bit characters, this uses a cache that will return the 319 * same String object each time.</p> 320 * 321 * <pre> 322 * CharUtils.toString(' ') = " " 323 * CharUtils.toString('A') = "A" 324 * </pre> 325 * 326 * @param ch the character to convert 327 * @return a String containing the one specified character 328 */ 329 public static String toString(char ch) { 330 if (ch < 128) { 331 return CHAR_STRING_ARRAY[ch]; 332 } 333 return new String(new char[] {ch}); 334 } 335 336 /** 337 * <p>Converts the character to a String that contains the one character.</p> 338 * 339 * <p>For ASCII 7 bit characters, this uses a cache that will return the 340 * same String object each time.</p> 341 * 342 * <p>If <code>null</code> is passed in, <code>null</code> will be returned.</p> 343 * 344 * <pre> 345 * CharUtils.toString(null) = null 346 * CharUtils.toString(' ') = " " 347 * CharUtils.toString('A') = "A" 348 * </pre> 349 * 350 * @param ch the character to convert 351 * @return a String containing the one specified character 352 */ 353 public static String toString(Character ch) { 354 if (ch == null) { 355 return null; 356 } 357 return toString(ch.charValue()); 358 } 359 360 //-------------------------------------------------------------------------- 361 /** 362 * <p>Converts the string to the unicode format '\u0020'.</p> 363 * 364 * <p>This format is the Java source code format.</p> 365 * 366 * <pre> 367 * CharUtils.unicodeEscaped(' ') = "\u0020" 368 * CharUtils.unicodeEscaped('A') = "\u0041" 369 * </pre> 370 * 371 * @param ch the character to convert 372 * @return the escaped unicode string 373 */ 374 public static String unicodeEscaped(char ch) { 375 if (ch < 0x10) { 376 return "\\u000" + Integer.toHexString(ch); 377 } else if (ch < 0x100) { 378 return "\\u00" + Integer.toHexString(ch); 379 } else if (ch < 0x1000) { 380 return "\\u0" + Integer.toHexString(ch); 381 } 382 return "\\u" + Integer.toHexString(ch); 383 } 384 385 /** 386 * <p>Converts the string to the unicode format '\u0020'.</p> 387 * 388 * <p>This format is the Java source code format.</p> 389 * 390 * <p>If <code>null</code> is passed in, <code>null</code> will be returned.</p> 391 * 392 * <pre> 393 * CharUtils.unicodeEscaped(null) = null 394 * CharUtils.unicodeEscaped(' ') = "\u0020" 395 * CharUtils.unicodeEscaped('A') = "\u0041" 396 * </pre> 397 * 398 * @param ch the character to convert, may be null 399 * @return the escaped unicode string, null if null input 400 */ 401 public static String unicodeEscaped(Character ch) { 402 if (ch == null) { 403 return null; 404 } 405 return unicodeEscaped(ch.charValue()); 406 } 407 408 //-------------------------------------------------------------------------- 409 /** 410 * <p>Checks whether the character is ASCII 7 bit.</p> 411 * 412 * <pre> 413 * CharUtils.isAscii('a') = true 414 * CharUtils.isAscii('A') = true 415 * CharUtils.isAscii('3') = true 416 * CharUtils.isAscii('-') = true 417 * CharUtils.isAscii('\n') = true 418 * CharUtils.isAscii('©') = false 419 * </pre> 420 * 421 * @param ch the character to check 422 * @return true if less than 128 423 */ 424 public static boolean isAscii(char ch) { 425 return ch < 128; 426 } 427 428 /** 429 * <p>Checks whether the character is ASCII 7 bit printable.</p> 430 * 431 * <pre> 432 * CharUtils.isAsciiPrintable('a') = true 433 * CharUtils.isAsciiPrintable('A') = true 434 * CharUtils.isAsciiPrintable('3') = true 435 * CharUtils.isAsciiPrintable('-') = true 436 * CharUtils.isAsciiPrintable('\n') = false 437 * CharUtils.isAsciiPrintable('©') = false 438 * </pre> 439 * 440 * @param ch the character to check 441 * @return true if between 32 and 126 inclusive 442 */ 443 public static boolean isAsciiPrintable(char ch) { 444 return ch >= 32 && ch < 127; 445 } 446 447 /** 448 * <p>Checks whether the character is ASCII 7 bit control.</p> 449 * 450 * <pre> 451 * CharUtils.isAsciiControl('a') = false 452 * CharUtils.isAsciiControl('A') = false 453 * CharUtils.isAsciiControl('3') = false 454 * CharUtils.isAsciiControl('-') = false 455 * CharUtils.isAsciiControl('\n') = true 456 * CharUtils.isAsciiControl('©') = false 457 * </pre> 458 * 459 * @param ch the character to check 460 * @return true if less than 32 or equals 127 461 */ 462 public static boolean isAsciiControl(char ch) { 463 return ch < 32 || ch == 127; 464 } 465 466 /** 467 * <p>Checks whether the character is ASCII 7 bit alphabetic.</p> 468 * 469 * <pre> 470 * CharUtils.isAsciiAlpha('a') = true 471 * CharUtils.isAsciiAlpha('A') = true 472 * CharUtils.isAsciiAlpha('3') = false 473 * CharUtils.isAsciiAlpha('-') = false 474 * CharUtils.isAsciiAlpha('\n') = false 475 * CharUtils.isAsciiAlpha('©') = false 476 * </pre> 477 * 478 * @param ch the character to check 479 * @return true if between 65 and 90 or 97 and 122 inclusive 480 */ 481 public static boolean isAsciiAlpha(char ch) { 482 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); 483 } 484 485 /** 486 * <p>Checks whether the character is ASCII 7 bit alphabetic upper case.</p> 487 * 488 * <pre> 489 * CharUtils.isAsciiAlphaUpper('a') = false 490 * CharUtils.isAsciiAlphaUpper('A') = true 491 * CharUtils.isAsciiAlphaUpper('3') = false 492 * CharUtils.isAsciiAlphaUpper('-') = false 493 * CharUtils.isAsciiAlphaUpper('\n') = false 494 * CharUtils.isAsciiAlphaUpper('©') = false 495 * </pre> 496 * 497 * @param ch the character to check 498 * @return true if between 65 and 90 inclusive 499 */ 500 public static boolean isAsciiAlphaUpper(char ch) { 501 return ch >= 'A' && ch <= 'Z'; 502 } 503 504 /** 505 * <p>Checks whether the character is ASCII 7 bit alphabetic lower case.</p> 506 * 507 * <pre> 508 * CharUtils.isAsciiAlphaLower('a') = true 509 * CharUtils.isAsciiAlphaLower('A') = false 510 * CharUtils.isAsciiAlphaLower('3') = false 511 * CharUtils.isAsciiAlphaLower('-') = false 512 * CharUtils.isAsciiAlphaLower('\n') = false 513 * CharUtils.isAsciiAlphaLower('©') = false 514 * </pre> 515 * 516 * @param ch the character to check 517 * @return true if between 97 and 122 inclusive 518 */ 519 public static boolean isAsciiAlphaLower(char ch) { 520 return ch >= 'a' && ch <= 'z'; 521 } 522 523 /** 524 * <p>Checks whether the character is ASCII 7 bit numeric.</p> 525 * 526 * <pre> 527 * CharUtils.isAsciiNumeric('a') = false 528 * CharUtils.isAsciiNumeric('A') = false 529 * CharUtils.isAsciiNumeric('3') = true 530 * CharUtils.isAsciiNumeric('-') = false 531 * CharUtils.isAsciiNumeric('\n') = false 532 * CharUtils.isAsciiNumeric('©') = false 533 * </pre> 534 * 535 * @param ch the character to check 536 * @return true if between 48 and 57 inclusive 537 */ 538 public static boolean isAsciiNumeric(char ch) { 539 return ch >= '0' && ch <= '9'; 540 } 541 542 /** 543 * <p>Checks whether the character is ASCII 7 bit numeric.</p> 544 * 545 * <pre> 546 * CharUtils.isAsciiAlphanumeric('a') = true 547 * CharUtils.isAsciiAlphanumeric('A') = true 548 * CharUtils.isAsciiAlphanumeric('3') = true 549 * CharUtils.isAsciiAlphanumeric('-') = false 550 * CharUtils.isAsciiAlphanumeric('\n') = false 551 * CharUtils.isAsciiAlphanumeric('©') = false 552 * </pre> 553 * 554 * @param ch the character to check 555 * @return true if between 48 and 57 or 65 and 90 or 97 and 122 inclusive 556 */ 557 public static boolean isAsciiAlphanumeric(char ch) { 558 return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9'); 559 } 560 561 // ----------------- Following code copied from Apache Harmony (Character class) 562 /** 563 * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit 564 * that is used for representing supplementary characters in UTF-16 565 * encoding. 566 * 567 * @param ch 568 * the character to test. 569 * @return {@code true} if {@code ch} is a high-surrogate code unit; 570 * {@code false} otherwise. 571 */ 572 static boolean isHighSurrogate(char ch) { 573 return ('\uD800' <= ch && '\uDBFF' >= ch); 574 } 575 576 }