CharUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.lang3;

  18. import java.util.Objects;

  19. /**
  20.  * Operations on char primitives and Character objects.
  21.  *
  22.  * <p>This class tries to handle {@code null} input gracefully.
  23.  * An exception will not be thrown for a {@code null} input.
  24.  * Each method documents its behavior in more detail.</p>
  25.  *
  26.  * <p>#ThreadSafe#</p>
  27.  * @since 2.1
  28.  */
  29. public class CharUtils {

  30.     private static final String[] CHAR_STRING_ARRAY = new String[128];

  31.     private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f'};

  32.     /**
  33.      * Linefeed character LF ({@code '\n'}, Unicode 000a).
  34.      *
  35.      * @see <a href="https://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.10.6">JLF: Escape Sequences
  36.      *      for Character and String Literals</a>
  37.      * @since 2.2
  38.      */
  39.     public static final char LF = '\n';

  40.     /**
  41.      * Carriage return character CR ('\r', Unicode 000d).
  42.      *
  43.      * @see <a href="https://docs.oracle.com/javase/specs/jls/se8/html/jls-3.html#jls-3.10.6">JLF: Escape Sequences
  44.      *      for Character and String Literals</a>
  45.      * @since 2.2
  46.      */
  47.     public static final char CR = '\r';

  48.     /**
  49.      * {@code \u0000} null control character ('\0'), abbreviated NUL.
  50.      *
  51.      * @since 3.6
  52.      */
  53.     public static final char NUL = '\0';

  54.     static {
  55.         ArrayUtils.setAll(CHAR_STRING_ARRAY, i -> String.valueOf((char) i));
  56.     }

  57.     /**
  58.      * Compares two {@code char} values numerically. This is the same functionality as provided in Java 7.
  59.      *
  60.      * @param x the first {@code char} to compare
  61.      * @param y the second {@code char} to compare
  62.      * @return the value {@code 0} if {@code x == y};
  63.      *         a value less than {@code 0} if {@code x < y}; and
  64.      *         a value greater than {@code 0} if {@code x > y}
  65.      * @since 3.4
  66.      */
  67.     public static int compare(final char x, final char y) {
  68.         return x - y;
  69.     }

  70.     /**
  71.      * Checks whether the character is ASCII 7 bit.
  72.      *
  73.      * <pre>
  74.      *   CharUtils.isAscii('a')  = true
  75.      *   CharUtils.isAscii('A')  = true
  76.      *   CharUtils.isAscii('3')  = true
  77.      *   CharUtils.isAscii('-')  = true
  78.      *   CharUtils.isAscii('\n') = true
  79.      *   CharUtils.isAscii('&copy;') = false
  80.      * </pre>
  81.      *
  82.      * @param ch  the character to check
  83.      * @return true if less than 128
  84.      */
  85.     public static boolean isAscii(final char ch) {
  86.         return ch < 128;
  87.     }

  88.     /**
  89.      * Checks whether the character is ASCII 7 bit alphabetic.
  90.      *
  91.      * <pre>
  92.      *   CharUtils.isAsciiAlpha('a')  = true
  93.      *   CharUtils.isAsciiAlpha('A')  = true
  94.      *   CharUtils.isAsciiAlpha('3')  = false
  95.      *   CharUtils.isAsciiAlpha('-')  = false
  96.      *   CharUtils.isAsciiAlpha('\n') = false
  97.      *   CharUtils.isAsciiAlpha('&copy;') = false
  98.      * </pre>
  99.      *
  100.      * @param ch  the character to check
  101.      * @return true if between 65 and 90 or 97 and 122 inclusive
  102.      */
  103.     public static boolean isAsciiAlpha(final char ch) {
  104.         return isAsciiAlphaUpper(ch) || isAsciiAlphaLower(ch);
  105.     }

  106.     /**
  107.      * Checks whether the character is ASCII 7 bit alphabetic lower case.
  108.      *
  109.      * <pre>
  110.      *   CharUtils.isAsciiAlphaLower('a')  = true
  111.      *   CharUtils.isAsciiAlphaLower('A')  = false
  112.      *   CharUtils.isAsciiAlphaLower('3')  = false
  113.      *   CharUtils.isAsciiAlphaLower('-')  = false
  114.      *   CharUtils.isAsciiAlphaLower('\n') = false
  115.      *   CharUtils.isAsciiAlphaLower('&copy;') = false
  116.      * </pre>
  117.      *
  118.      * @param ch  the character to check
  119.      * @return true if between 97 and 122 inclusive
  120.      */
  121.     public static boolean isAsciiAlphaLower(final char ch) {
  122.         return ch >= 'a' && ch <= 'z';
  123.     }

  124.     /**
  125.      * Checks whether the character is ASCII 7 bit numeric.
  126.      *
  127.      * <pre>
  128.      *   CharUtils.isAsciiAlphanumeric('a')  = true
  129.      *   CharUtils.isAsciiAlphanumeric('A')  = true
  130.      *   CharUtils.isAsciiAlphanumeric('3')  = true
  131.      *   CharUtils.isAsciiAlphanumeric('-')  = false
  132.      *   CharUtils.isAsciiAlphanumeric('\n') = false
  133.      *   CharUtils.isAsciiAlphanumeric('&copy;') = false
  134.      * </pre>
  135.      *
  136.      * @param ch  the character to check
  137.      * @return true if between 48 and 57 or 65 and 90 or 97 and 122 inclusive
  138.      */
  139.     public static boolean isAsciiAlphanumeric(final char ch) {
  140.         return isAsciiAlpha(ch) || isAsciiNumeric(ch);
  141.     }

  142.     /**
  143.      * Checks whether the character is ASCII 7 bit alphabetic upper case.
  144.      *
  145.      * <pre>
  146.      *   CharUtils.isAsciiAlphaUpper('a')  = false
  147.      *   CharUtils.isAsciiAlphaUpper('A')  = true
  148.      *   CharUtils.isAsciiAlphaUpper('3')  = false
  149.      *   CharUtils.isAsciiAlphaUpper('-')  = false
  150.      *   CharUtils.isAsciiAlphaUpper('\n') = false
  151.      *   CharUtils.isAsciiAlphaUpper('&copy;') = false
  152.      * </pre>
  153.      *
  154.      * @param ch  the character to check
  155.      * @return true if between 65 and 90 inclusive
  156.      */
  157.     public static boolean isAsciiAlphaUpper(final char ch) {
  158.         return ch >= 'A' && ch <= 'Z';
  159.     }

  160.     /**
  161.      * Checks whether the character is ASCII 7 bit control.
  162.      *
  163.      * <pre>
  164.      *   CharUtils.isAsciiControl('a')  = false
  165.      *   CharUtils.isAsciiControl('A')  = false
  166.      *   CharUtils.isAsciiControl('3')  = false
  167.      *   CharUtils.isAsciiControl('-')  = false
  168.      *   CharUtils.isAsciiControl('\n') = true
  169.      *   CharUtils.isAsciiControl('&copy;') = false
  170.      * </pre>
  171.      *
  172.      * @param ch  the character to check
  173.      * @return true if less than 32 or equals 127
  174.      */
  175.     public static boolean isAsciiControl(final char ch) {
  176.         return ch < 32 || ch == 127;
  177.     }

  178.     /**
  179.      * Checks whether the character is ASCII 7 bit numeric.
  180.      *
  181.      * <pre>
  182.      *   CharUtils.isAsciiNumeric('a')  = false
  183.      *   CharUtils.isAsciiNumeric('A')  = false
  184.      *   CharUtils.isAsciiNumeric('3')  = true
  185.      *   CharUtils.isAsciiNumeric('-')  = false
  186.      *   CharUtils.isAsciiNumeric('\n') = false
  187.      *   CharUtils.isAsciiNumeric('&copy;') = false
  188.      * </pre>
  189.      *
  190.      * @param ch  the character to check
  191.      * @return true if between 48 and 57 inclusive
  192.      */
  193.     public static boolean isAsciiNumeric(final char ch) {
  194.         return ch >= '0' && ch <= '9';
  195.     }

  196.     /**
  197.      * Checks whether the character is ASCII 7 bit printable.
  198.      *
  199.      * <pre>
  200.      *   CharUtils.isAsciiPrintable('a')  = true
  201.      *   CharUtils.isAsciiPrintable('A')  = true
  202.      *   CharUtils.isAsciiPrintable('3')  = true
  203.      *   CharUtils.isAsciiPrintable('-')  = true
  204.      *   CharUtils.isAsciiPrintable('\n') = false
  205.      *   CharUtils.isAsciiPrintable('&copy;') = false
  206.      * </pre>
  207.      *
  208.      * @param ch  the character to check
  209.      * @return true if between 32 and 126 inclusive
  210.      */
  211.     public static boolean isAsciiPrintable(final char ch) {
  212.         return ch >= 32 && ch < 127;
  213.     }

  214.     /**
  215.      * Converts the Character to a char throwing an exception for {@code null}.
  216.      *
  217.      * <pre>
  218.      *   CharUtils.toChar(' ')  = ' '
  219.      *   CharUtils.toChar('A')  = 'A'
  220.      *   CharUtils.toChar(null) throws IllegalArgumentException
  221.      * </pre>
  222.      *
  223.      * @param ch  the character to convert
  224.      * @return the char value of the Character
  225.      * @throws NullPointerException if the Character is null
  226.      */
  227.     public static char toChar(final Character ch) {
  228.         return Objects.requireNonNull(ch, "ch").charValue();
  229.     }

  230.     /**
  231.      * Converts the Character to a char handling {@code null}.
  232.      *
  233.      * <pre>
  234.      *   CharUtils.toChar(null, 'X') = 'X'
  235.      *   CharUtils.toChar(' ', 'X')  = ' '
  236.      *   CharUtils.toChar('A', 'X')  = 'A'
  237.      * </pre>
  238.      *
  239.      * @param ch  the character to convert
  240.      * @param defaultValue  the value to use if the  Character is null
  241.      * @return the char value of the Character or the default if null
  242.      */
  243.     public static char toChar(final Character ch, final char defaultValue) {
  244.         return ch != null ? ch.charValue() : defaultValue;
  245.     }

  246.     /**
  247.      * Converts the String to a char using the first character, throwing
  248.      * an exception on empty Strings.
  249.      *
  250.      * <pre>
  251.      *   CharUtils.toChar("A")  = 'A'
  252.      *   CharUtils.toChar("BA") = 'B'
  253.      *   CharUtils.toChar(null) throws IllegalArgumentException
  254.      *   CharUtils.toChar("")   throws IllegalArgumentException
  255.      * </pre>
  256.      *
  257.      * @param str  the character to convert
  258.      * @return the char value of the first letter of the String
  259.      * @throws NullPointerException if the string is null
  260.      * @throws IllegalArgumentException if the String is empty
  261.      */
  262.     public static char toChar(final String str) {
  263.         Validate.notEmpty(str, "The String must not be empty");
  264.         return str.charAt(0);
  265.     }

  266.     /**
  267.      * Converts the String to a char using the first character, defaulting
  268.      * the value on empty Strings.
  269.      *
  270.      * <pre>
  271.      *   CharUtils.toChar(null, 'X') = 'X'
  272.      *   CharUtils.toChar("", 'X')   = 'X'
  273.      *   CharUtils.toChar("A", 'X')  = 'A'
  274.      *   CharUtils.toChar("BA", 'X') = 'B'
  275.      * </pre>
  276.      *
  277.      * @param str  the character to convert
  278.      * @param defaultValue  the value to use if the  Character is null
  279.      * @return the char value of the first letter of the String or the default if null
  280.      */
  281.     public static char toChar(final String str, final char defaultValue) {
  282.         return StringUtils.isEmpty(str) ? defaultValue : str.charAt(0);
  283.     }

  284.     /**
  285.      * Delegates to {@link Character#valueOf(char)}.
  286.      *
  287.      * @param c the character to convert
  288.      * @return a {@code Character} representing {@code c}.
  289.      * @deprecated Use {@link Character#valueOf(char)}.
  290.      */
  291.     @Deprecated
  292.     public static Character toCharacterObject(final char c) {
  293.         return Character.valueOf(c);
  294.     }

  295.     /**
  296.      * Converts the String to a Character using the first character, returning
  297.      * null for empty Strings.
  298.      *
  299.      * <p>For ASCII 7 bit characters, this uses a cache that will return the
  300.      * same Character object each time.</p>
  301.      *
  302.      * <pre>
  303.      *   CharUtils.toCharacterObject(null) = null
  304.      *   CharUtils.toCharacterObject("")   = null
  305.      *   CharUtils.toCharacterObject("A")  = 'A'
  306.      *   CharUtils.toCharacterObject("BA") = 'B'
  307.      * </pre>
  308.      *
  309.      * @param str  the character to convert
  310.      * @return the Character value of the first letter of the String
  311.      */
  312.     public static Character toCharacterObject(final String str) {
  313.         return StringUtils.isEmpty(str) ? null : Character.valueOf(str.charAt(0));
  314.     }

  315.     /**
  316.      * Converts the character to the Integer it represents, throwing an
  317.      * exception if the character is not numeric.
  318.      *
  319.      * <p>This method converts the char '1' to the int 1 and so on.</p>
  320.      *
  321.      * <pre>
  322.      *   CharUtils.toIntValue('3')  = 3
  323.      *   CharUtils.toIntValue('A')  throws IllegalArgumentException
  324.      * </pre>
  325.      *
  326.      * @param ch  the character to convert
  327.      * @return the int value of the character
  328.      * @throws IllegalArgumentException if the character is not ASCII numeric
  329.      */
  330.     public static int toIntValue(final char ch) {
  331.         if (!isAsciiNumeric(ch)) {
  332.             throw new IllegalArgumentException("The character " + ch + " is not in the range '0' - '9'");
  333.         }
  334.         return ch - 48;
  335.     }

  336.     /**
  337.      * Converts the character to the Integer it represents, throwing an
  338.      * exception if the character is not numeric.
  339.      *
  340.      * <p>This method converts the char '1' to the int 1 and so on.</p>
  341.      *
  342.      * <pre>
  343.      *   CharUtils.toIntValue('3', -1)  = 3
  344.      *   CharUtils.toIntValue('A', -1)  = -1
  345.      * </pre>
  346.      *
  347.      * @param ch  the character to convert
  348.      * @param defaultValue  the default value to use if the character is not numeric
  349.      * @return the int value of the character
  350.      */
  351.     public static int toIntValue(final char ch, final int defaultValue) {
  352.         return isAsciiNumeric(ch) ? ch - 48 : defaultValue;
  353.     }

  354.     /**
  355.      * Converts the character to the Integer it represents, throwing an
  356.      * exception if the character is not numeric.
  357.      *
  358.      * <p>This method converts the char '1' to the int 1 and so on.</p>
  359.      *
  360.      * <pre>
  361.      *   CharUtils.toIntValue('3')  = 3
  362.      *   CharUtils.toIntValue(null) throws IllegalArgumentException
  363.      *   CharUtils.toIntValue('A')  throws IllegalArgumentException
  364.      * </pre>
  365.      *
  366.      * @param ch  the character to convert, not null
  367.      * @return the int value of the character
  368.      * @throws NullPointerException if the Character is null
  369.      * @throws IllegalArgumentException if the Character is not ASCII numeric
  370.      */
  371.     public static int toIntValue(final Character ch) {
  372.         return toIntValue(toChar(ch));
  373.     }

  374.     /**
  375.      * Converts the character to the Integer it represents, throwing an
  376.      * exception if the character is not numeric.
  377.      *
  378.      * <p>This method converts the char '1' to the int 1 and so on.</p>
  379.      *
  380.      * <pre>
  381.      *   CharUtils.toIntValue(null, -1) = -1
  382.      *   CharUtils.toIntValue('3', -1)  = 3
  383.      *   CharUtils.toIntValue('A', -1)  = -1
  384.      * </pre>
  385.      *
  386.      * @param ch  the character to convert
  387.      * @param defaultValue  the default value to use if the character is not numeric
  388.      * @return the int value of the character
  389.      */
  390.     public static int toIntValue(final Character ch, final int defaultValue) {
  391.         return ch != null ? toIntValue(ch.charValue(), defaultValue) : defaultValue;
  392.     }

  393.     /**
  394.      * Converts the character to a String that contains the one character.
  395.      *
  396.      * <p>For ASCII 7 bit characters, this uses a cache that will return the
  397.      * same String object each time.</p>
  398.      *
  399.      * <pre>
  400.      *   CharUtils.toString(' ')  = " "
  401.      *   CharUtils.toString('A')  = "A"
  402.      * </pre>
  403.      *
  404.      * @param ch  the character to convert
  405.      * @return a String containing the one specified character
  406.      */
  407.     public static String toString(final char ch) {
  408.         if (ch < CHAR_STRING_ARRAY.length) {
  409.             return CHAR_STRING_ARRAY[ch];
  410.         }
  411.         return String.valueOf(ch);
  412.     }

  413.     /**
  414.      * Converts the character to a String that contains the one character.
  415.      *
  416.      * <p>For ASCII 7 bit characters, this uses a cache that will return the
  417.      * same String object each time.</p>
  418.      *
  419.      * <p>If {@code null} is passed in, {@code null} will be returned.</p>
  420.      *
  421.      * <pre>
  422.      *   CharUtils.toString(null) = null
  423.      *   CharUtils.toString(' ')  = " "
  424.      *   CharUtils.toString('A')  = "A"
  425.      * </pre>
  426.      *
  427.      * @param ch  the character to convert
  428.      * @return a String containing the one specified character
  429.      */
  430.     public static String toString(final Character ch) {
  431.         return ch != null ? toString(ch.charValue()) : null;
  432.     }

  433.     /**
  434.      * Converts the string to the Unicode format '\u0020'.
  435.      *
  436.      * <p>This format is the Java source code format.</p>
  437.      *
  438.      * <pre>
  439.      *   CharUtils.unicodeEscaped(' ') = "\u0020"
  440.      *   CharUtils.unicodeEscaped('A') = "\u0041"
  441.      * </pre>
  442.      *
  443.      * @param ch  the character to convert
  444.      * @return the escaped Unicode string
  445.      */
  446.     public static String unicodeEscaped(final char ch) {
  447.         return "\\u" +
  448.             HEX_DIGITS[ch >> 12 & 15] +
  449.             HEX_DIGITS[ch >> 8 & 15] +
  450.             HEX_DIGITS[ch >> 4 & 15] +
  451.             HEX_DIGITS[ch & 15];
  452.     }

  453.     /**
  454.      * Converts the string to the Unicode format '\u0020'.
  455.      *
  456.      * <p>This format is the Java source code format.</p>
  457.      *
  458.      * <p>If {@code null} is passed in, {@code null} will be returned.</p>
  459.      *
  460.      * <pre>
  461.      *   CharUtils.unicodeEscaped(null) = null
  462.      *   CharUtils.unicodeEscaped(' ')  = "\u0020"
  463.      *   CharUtils.unicodeEscaped('A')  = "\u0041"
  464.      * </pre>
  465.      *
  466.      * @param ch  the character to convert, may be null
  467.      * @return the escaped Unicode string, null if null input
  468.      */
  469.     public static String unicodeEscaped(final Character ch) {
  470.         return ch != null ? unicodeEscaped(ch.charValue()) : null;
  471.     }

  472.     /**
  473.      * {@link CharUtils} instances should NOT be constructed in standard programming.
  474.      * Instead, the class should be used as {@code CharUtils.toString('c');}.
  475.      *
  476.      * <p>This constructor is public to permit tools that require a JavaBean instance
  477.      * to operate.</p>
  478.      *
  479.      * @deprecated TODO Make private in 4.0.
  480.      */
  481.     @Deprecated
  482.     public CharUtils() {
  483.         // empty
  484.     }
  485. }