1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.lang3;
18
19 import java.util.Objects;
20
21 /**
22 * Operations on char primitives and Character objects.
23 *
24 * <p>This class tries to handle {@code null} input gracefully.
25 * An exception will not be thrown for a {@code null} input.
26 * Each method documents its behavior in more detail.</p>
27 *
28 * <p>#ThreadSafe#</p>
29 * @since 2.1
30 */
31 public class CharUtils {
32
33 private static final String[] CHAR_STRING_ARRAY = ArrayUtils.setAll(new String[128], i -> String.valueOf((char) i));
34
35 private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};
36
37 /**
38 * Linefeed character LF ({@code '\n'}, Unicode 000a).
39 *
40 * @see <a href="https://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.10.6">JLF: Escape Sequences
41 * for Character and String Literals</a>
42 * @since 2.2
43 */
44 public static final char LF = '\n';
45
46 /**
47 * Carriage return character CR ('\r', Unicode 000d).
48 *
49 * @see <a href="https://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.10.6">JLF: Escape Sequences
50 * for Character and String Literals</a>
51 * @since 2.2
52 */
53 public static final char CR = '\r';
54
55 /**
56 * {@code \u0000} null control character ('\0'), abbreviated NUL.
57 *
58 * @since 3.6
59 */
60 public static final char NUL = '\0';
61
62 /**
63 * Compares two {@code char} values numerically. This is the same functionality as provided in Java 7.
64 *
65 * @param x the first {@code char} to compare
66 * @param y the second {@code char} to compare
67 * @return the value {@code 0} if {@code x == y};
68 * a value less than {@code 0} if {@code x < y}; and
69 * a value greater than {@code 0} if {@code x > y}
70 * @since 3.4
71 */
72 public static int compare(final char x, final char y) {
73 return x - y;
74 }
75
76 /**
77 * Tests whether the character is ASCII 7 bit.
78 *
79 * <pre>
80 * CharUtils.isAscii('a') = true
81 * CharUtils.isAscii('A') = true
82 * CharUtils.isAscii('3') = true
83 * CharUtils.isAscii('-') = true
84 * CharUtils.isAscii('\n') = true
85 * CharUtils.isAscii('©') = false
86 * </pre>
87 *
88 * @param ch the character to check
89 * @return true if less than 128
90 */
91 public static boolean isAscii(final char ch) {
92 return ch < 128;
93 }
94
95 /**
96 * Tests whether the character is ASCII 7 bit alphabetic.
97 *
98 * <pre>
99 * CharUtils.isAsciiAlpha('a') = true
100 * CharUtils.isAsciiAlpha('A') = true
101 * CharUtils.isAsciiAlpha('3') = false
102 * CharUtils.isAsciiAlpha('-') = false
103 * CharUtils.isAsciiAlpha('\n') = false
104 * CharUtils.isAsciiAlpha('©') = false
105 * </pre>
106 *
107 * @param ch the character to check
108 * @return true if between 65 and 90 or 97 and 122 inclusive
109 */
110 public static boolean isAsciiAlpha(final char ch) {
111 return isAsciiAlphaUpper(ch) || isAsciiAlphaLower(ch);
112 }
113
114 /**
115 * Tests whether the character is ASCII 7 bit alphabetic lower case.
116 *
117 * <pre>
118 * CharUtils.isAsciiAlphaLower('a') = true
119 * CharUtils.isAsciiAlphaLower('A') = false
120 * CharUtils.isAsciiAlphaLower('3') = false
121 * CharUtils.isAsciiAlphaLower('-') = false
122 * CharUtils.isAsciiAlphaLower('\n') = false
123 * CharUtils.isAsciiAlphaLower('©') = false
124 * </pre>
125 *
126 * @param ch the character to check
127 * @return true if between 97 and 122 inclusive
128 */
129 public static boolean isAsciiAlphaLower(final char ch) {
130 return ch >= 'a' && ch <= 'z';
131 }
132
133 /**
134 * Tests whether the character is ASCII 7 bit numeric.
135 *
136 * <pre>
137 * CharUtils.isAsciiAlphanumeric('a') = true
138 * CharUtils.isAsciiAlphanumeric('A') = true
139 * CharUtils.isAsciiAlphanumeric('3') = true
140 * CharUtils.isAsciiAlphanumeric('-') = false
141 * CharUtils.isAsciiAlphanumeric('\n') = false
142 * CharUtils.isAsciiAlphanumeric('©') = false
143 * </pre>
144 *
145 * @param ch the character to check
146 * @return true if between 48 and 57 or 65 and 90 or 97 and 122 inclusive
147 */
148 public static boolean isAsciiAlphanumeric(final char ch) {
149 return isAsciiAlpha(ch) || isAsciiNumeric(ch);
150 }
151
152 /**
153 * Tests whether the character is ASCII 7 bit alphabetic upper case.
154 *
155 * <pre>
156 * CharUtils.isAsciiAlphaUpper('a') = false
157 * CharUtils.isAsciiAlphaUpper('A') = true
158 * CharUtils.isAsciiAlphaUpper('3') = false
159 * CharUtils.isAsciiAlphaUpper('-') = false
160 * CharUtils.isAsciiAlphaUpper('\n') = false
161 * CharUtils.isAsciiAlphaUpper('©') = false
162 * </pre>
163 *
164 * @param ch the character to check
165 * @return true if between 65 and 90 inclusive
166 */
167 public static boolean isAsciiAlphaUpper(final char ch) {
168 return ch >= 'A' && ch <= 'Z';
169 }
170
171 /**
172 * Tests whether the character is ASCII 7 bit control.
173 *
174 * <pre>
175 * CharUtils.isAsciiControl('a') = false
176 * CharUtils.isAsciiControl('A') = false
177 * CharUtils.isAsciiControl('3') = false
178 * CharUtils.isAsciiControl('-') = false
179 * CharUtils.isAsciiControl('\n') = true
180 * CharUtils.isAsciiControl('©') = false
181 * </pre>
182 *
183 * @param ch the character to check
184 * @return true if less than 32 or equals 127
185 */
186 public static boolean isAsciiControl(final char ch) {
187 return ch < 32 || ch == 127;
188 }
189
190 /**
191 * Tests whether the character is ASCII 7 bit numeric.
192 *
193 * <pre>
194 * CharUtils.isAsciiNumeric('a') = false
195 * CharUtils.isAsciiNumeric('A') = false
196 * CharUtils.isAsciiNumeric('3') = true
197 * CharUtils.isAsciiNumeric('-') = false
198 * CharUtils.isAsciiNumeric('\n') = false
199 * CharUtils.isAsciiNumeric('©') = false
200 * </pre>
201 *
202 * @param ch the character to check
203 * @return true if between 48 and 57 inclusive
204 */
205 public static boolean isAsciiNumeric(final char ch) {
206 return ch >= '0' && ch <= '9';
207 }
208
209 /**
210 * Tests whether the character is ASCII 7 bit printable.
211 *
212 * <pre>
213 * CharUtils.isAsciiPrintable('a') = true
214 * CharUtils.isAsciiPrintable('A') = true
215 * CharUtils.isAsciiPrintable('3') = true
216 * CharUtils.isAsciiPrintable('-') = true
217 * CharUtils.isAsciiPrintable('\n') = false
218 * CharUtils.isAsciiPrintable('©') = false
219 * </pre>
220 *
221 * @param ch the character to check
222 * @return true if between 32 and 126 inclusive
223 */
224 public static boolean isAsciiPrintable(final char ch) {
225 return ch >= 32 && ch < 127;
226 }
227
228 /**
229 * Tests whether a character is a hexadecimal character.
230 *
231 * <pre>
232 * CharUtils.isHex('0') = true
233 * CharUtils.isHex('3') = true
234 * CharUtils.isHex('9') = true
235 * CharUtils.isHex('a') = true
236 * CharUtils.isHex('f') = true
237 * CharUtils.isHex('g') = false
238 * CharUtils.isHex('A') = true
239 * CharUtils.isHex('F') = true
240 * CharUtils.isHex('G') = false
241 * CharUtils.isHex('#') = false
242 * CharUtils.isHex('-') = false
243 * CharUtils.isHex('\n') = false
244 * CharUtils.isHex('©') = false
245 * </pre>
246 *
247 * @param ch the character to test.
248 * @return true if character is a hexadecimal character.
249 * @since 3.18.0
250 */
251 public static boolean isHex(final char ch) {
252 return isAsciiNumeric(ch) || ch >= 'a' && ch <= 'f' || ch >= 'A' && ch <= 'F';
253 }
254
255 /**
256 * Tests if the given char is an octal digit. Octal digits are the character representations of the digits 0 to 7.
257 *
258 * @param ch the char to check
259 * @return true if the given char is the character representation of one of the digits from 0 to 7
260 * @since 3.18.0
261 */
262 public static boolean isOctal(final char ch) {
263 return ch >= '0' && ch <= '7';
264 }
265
266 /**
267 * Converts the Character to a char throwing an exception for {@code null}.
268 *
269 * <pre>
270 * CharUtils.toChar(' ') = ' '
271 * CharUtils.toChar('A') = 'A'
272 * CharUtils.toChar(null) throws IllegalArgumentException
273 * </pre>
274 *
275 * @param ch the character to convert
276 * @return the char value of the Character
277 * @throws NullPointerException if the Character is null
278 */
279 public static char toChar(final Character ch) {
280 return Objects.requireNonNull(ch, "ch").charValue();
281 }
282
283 /**
284 * Converts the Character to a char handling {@code null}.
285 *
286 * <pre>
287 * CharUtils.toChar(null, 'X') = 'X'
288 * CharUtils.toChar(' ', 'X') = ' '
289 * CharUtils.toChar('A', 'X') = 'A'
290 * </pre>
291 *
292 * @param ch the character to convert
293 * @param defaultValue the value to use if the Character is null
294 * @return the char value of the Character or the default if null
295 */
296 public static char toChar(final Character ch, final char defaultValue) {
297 return ch != null ? ch.charValue() : defaultValue;
298 }
299
300 /**
301 * Converts the String to a char using the first character, throwing
302 * an exception on empty Strings.
303 *
304 * <pre>
305 * CharUtils.toChar("A") = 'A'
306 * CharUtils.toChar("BA") = 'B'
307 * CharUtils.toChar(null) throws IllegalArgumentException
308 * CharUtils.toChar("") throws IllegalArgumentException
309 * </pre>
310 *
311 * @param str the character to convert
312 * @return the char value of the first letter of the String
313 * @throws NullPointerException if the string is null
314 * @throws IllegalArgumentException if the String is empty
315 */
316 public static char toChar(final String str) {
317 Validate.notEmpty(str, "The String must not be empty");
318 return str.charAt(0);
319 }
320
321 /**
322 * Converts the String to a char using the first character, defaulting
323 * the value on empty Strings.
324 *
325 * <pre>
326 * CharUtils.toChar(null, 'X') = 'X'
327 * CharUtils.toChar("", 'X') = 'X'
328 * CharUtils.toChar("A", 'X') = 'A'
329 * CharUtils.toChar("BA", 'X') = 'B'
330 * </pre>
331 *
332 * @param str the character to convert
333 * @param defaultValue the value to use if the Character is null
334 * @return the char value of the first letter of the String or the default if null
335 */
336 public static char toChar(final String str, final char defaultValue) {
337 return StringUtils.isEmpty(str) ? defaultValue : str.charAt(0);
338 }
339
340 /**
341 * Delegates to {@link Character#valueOf(char)}.
342 *
343 * @param c the character to convert
344 * @return a {@code Character} representing {@code c}.
345 * @deprecated Use {@link Character#valueOf(char)}.
346 */
347 @Deprecated
348 public static Character toCharacterObject(final char c) {
349 return Character.valueOf(c);
350 }
351
352 /**
353 * Converts the String to a Character using the first character, returning
354 * null for empty Strings.
355 *
356 * <p>For ASCII 7 bit characters, this uses a cache that will return the
357 * same Character object each time.</p>
358 *
359 * <pre>
360 * CharUtils.toCharacterObject(null) = null
361 * CharUtils.toCharacterObject("") = null
362 * CharUtils.toCharacterObject("A") = 'A'
363 * CharUtils.toCharacterObject("BA") = 'B'
364 * </pre>
365 *
366 * @param str the character to convert
367 * @return the Character value of the first letter of the String
368 */
369 public static Character toCharacterObject(final String str) {
370 return StringUtils.isEmpty(str) ? null : Character.valueOf(str.charAt(0));
371 }
372
373 /**
374 * Converts the character to the Integer it represents, throwing an
375 * exception if the character is not numeric.
376 *
377 * <p>This method converts the char '1' to the int 1 and so on.</p>
378 *
379 * <pre>
380 * CharUtils.toIntValue('3') = 3
381 * CharUtils.toIntValue('A') throws IllegalArgumentException
382 * </pre>
383 *
384 * @param ch the character to convert
385 * @return the int value of the character
386 * @throws IllegalArgumentException if the character is not ASCII numeric
387 */
388 public static int toIntValue(final char ch) {
389 if (!isAsciiNumeric(ch)) {
390 throw new IllegalArgumentException("The character " + ch + " is not in the range '0' - '9'");
391 }
392 return ch - 48;
393 }
394
395 /**
396 * Converts the character to the Integer it represents, throwing an
397 * exception if the character is not numeric.
398 *
399 * <p>This method converts the char '1' to the int 1 and so on.</p>
400 *
401 * <pre>
402 * CharUtils.toIntValue('3', -1) = 3
403 * CharUtils.toIntValue('A', -1) = -1
404 * </pre>
405 *
406 * @param ch the character to convert
407 * @param defaultValue the default value to use if the character is not numeric
408 * @return the int value of the character
409 */
410 public static int toIntValue(final char ch, final int defaultValue) {
411 return isAsciiNumeric(ch) ? ch - 48 : defaultValue;
412 }
413
414 /**
415 * Converts the character to the Integer it represents, throwing an
416 * exception if the character is not numeric.
417 *
418 * <p>This method converts the char '1' to the int 1 and so on.</p>
419 *
420 * <pre>
421 * CharUtils.toIntValue('3') = 3
422 * CharUtils.toIntValue(null) throws IllegalArgumentException
423 * CharUtils.toIntValue('A') throws IllegalArgumentException
424 * </pre>
425 *
426 * @param ch the character to convert, not null
427 * @return the int value of the character
428 * @throws NullPointerException if the Character is null
429 * @throws IllegalArgumentException if the Character is not ASCII numeric
430 */
431 public static int toIntValue(final Character ch) {
432 return toIntValue(toChar(ch));
433 }
434
435 /**
436 * Converts the character to the Integer it represents, throwing an
437 * exception if the character is not numeric.
438 *
439 * <p>This method converts the char '1' to the int 1 and so on.</p>
440 *
441 * <pre>
442 * CharUtils.toIntValue(null, -1) = -1
443 * CharUtils.toIntValue('3', -1) = 3
444 * CharUtils.toIntValue('A', -1) = -1
445 * </pre>
446 *
447 * @param ch the character to convert
448 * @param defaultValue the default value to use if the character is not numeric
449 * @return the int value of the character
450 */
451 public static int toIntValue(final Character ch, final int defaultValue) {
452 return ch != null ? toIntValue(ch.charValue(), defaultValue) : defaultValue;
453 }
454
455 /**
456 * Converts the character to a String that contains the one character.
457 *
458 * <p>For ASCII 7 bit characters, this uses a cache that will return the
459 * same String object each time.</p>
460 *
461 * <pre>
462 * CharUtils.toString(' ') = " "
463 * CharUtils.toString('A') = "A"
464 * </pre>
465 *
466 * @param ch the character to convert
467 * @return a String containing the one specified character
468 */
469 public static String toString(final char ch) {
470 if (ch < CHAR_STRING_ARRAY.length) {
471 return CHAR_STRING_ARRAY[ch];
472 }
473 return String.valueOf(ch);
474 }
475
476 /**
477 * Converts the character to a String that contains the one character.
478 *
479 * <p>For ASCII 7 bit characters, this uses a cache that will return the
480 * same String object each time.</p>
481 *
482 * <p>If {@code null} is passed in, {@code null} will be returned.</p>
483 *
484 * <pre>
485 * CharUtils.toString(null) = null
486 * CharUtils.toString(' ') = " "
487 * CharUtils.toString('A') = "A"
488 * </pre>
489 *
490 * @param ch the character to convert
491 * @return a String containing the one specified character
492 */
493 public static String toString(final Character ch) {
494 return ch != null ? toString(ch.charValue()) : null;
495 }
496
497 /**
498 * Converts the string to the Unicode format '\u0020'.
499 *
500 * <p>This format is the Java source code format.</p>
501 *
502 * <pre>
503 * CharUtils.unicodeEscaped(' ') = "\u0020"
504 * CharUtils.unicodeEscaped('A') = "\u0041"
505 * </pre>
506 *
507 * @param ch the character to convert
508 * @return the escaped Unicode string
509 */
510 public static String unicodeEscaped(final char ch) {
511 return "\\u" +
512 HEX_DIGITS[ch >> 12 & 15] +
513 HEX_DIGITS[ch >> 8 & 15] +
514 HEX_DIGITS[ch >> 4 & 15] +
515 HEX_DIGITS[ch & 15];
516 }
517
518 /**
519 * Converts the string to the Unicode format '\u0020'.
520 *
521 * <p>This format is the Java source code format.</p>
522 *
523 * <p>If {@code null} is passed in, {@code null} will be returned.</p>
524 *
525 * <pre>
526 * CharUtils.unicodeEscaped(null) = null
527 * CharUtils.unicodeEscaped(' ') = "\u0020"
528 * CharUtils.unicodeEscaped('A') = "\u0041"
529 * </pre>
530 *
531 * @param ch the character to convert, may be null
532 * @return the escaped Unicode string, null if null input
533 */
534 public static String unicodeEscaped(final Character ch) {
535 return ch != null ? unicodeEscaped(ch.charValue()) : null;
536 }
537
538 /**
539 * {@link CharUtils} instances should NOT be constructed in standard programming.
540 * Instead, the class should be used as {@code CharUtils.toString('c');}.
541 *
542 * <p>This constructor is public to permit tools that require a JavaBean instance
543 * to operate.</p>
544 *
545 * @deprecated TODO Make private in 4.0.
546 */
547 @Deprecated
548 public CharUtils() {
549 // empty
550 }
551 }