StringUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.io.UnsupportedEncodingException;
  19. import java.nio.ByteBuffer;
  20. import java.nio.charset.Charset;
  21. import java.nio.charset.StandardCharsets;

  22. import org.apache.commons.codec.CharEncoding;

  23. /**
  24.  * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
  25.  * specified in standard {@link Charset}.
  26.  *
  27.  * <p>
  28.  * This class is immutable and thread-safe.
  29.  * </p>
  30.  *
  31.  * @see CharEncoding
  32.  * @see Charset
  33.  * @see StandardCharsets
  34.  * @since 1.4
  35.  */
  36. public class StringUtils {

  37.     /**
  38.      * <p>
  39.      * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters.
  40.      * </p>
  41.      *
  42.      * <p>
  43.      * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal.
  44.      * The comparison is case sensitive.
  45.      * </p>
  46.      *
  47.      * <pre>
  48.      * StringUtils.equals(null, null)   = true
  49.      * StringUtils.equals(null, "abc")  = false
  50.      * StringUtils.equals("abc", null)  = false
  51.      * StringUtils.equals("abc", "abc") = true
  52.      * StringUtils.equals("abc", "ABC") = false
  53.      * </pre>
  54.      *
  55.      * <p>
  56.      * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
  57.      * </p>
  58.      *
  59.      * @see Object#equals(Object)
  60.      * @param cs1
  61.      *            the first CharSequence, may be {@code null}
  62.      * @param cs2
  63.      *            the second CharSequence, may be {@code null}
  64.      * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
  65.      * @since 1.10
  66.      */
  67.     public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
  68.         if (cs1 == cs2) {
  69.             return true;
  70.         }
  71.         if (cs1 == null || cs2 == null) {
  72.             return false;
  73.         }
  74.         if (cs1 instanceof String && cs2 instanceof String) {
  75.             return cs1.equals(cs2);
  76.         }
  77.         return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
  78.     }

  79.     /**
  80.      * Calls {@link String#getBytes(Charset)}
  81.      *
  82.      * @param string
  83.      *            The string to encode (if null, return null).
  84.      * @param charset
  85.      *            The {@link Charset} to encode the {@code String}
  86.      * @return the encoded bytes
  87.      */
  88.     private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
  89.         if (string == null) {
  90.             return null;
  91.         }
  92.         return ByteBuffer.wrap(string.getBytes(charset));
  93.     }

  94.     /**
  95.      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
  96.      * array.
  97.      *
  98.      * @param string
  99.      *            the String to encode, may be {@code null}
  100.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  101.      * @throws NullPointerException
  102.      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
  103.      *             required by the Java platform specification.
  104.      * @see Charset
  105.      * @see #getBytesUnchecked(String, String)
  106.      * @since 1.11
  107.      */
  108.     public static ByteBuffer getByteBufferUtf8(final String string) {
  109.         return getByteBuffer(string, StandardCharsets.UTF_8);
  110.     }

  111.     /**
  112.      * Calls {@link String#getBytes(Charset)}
  113.      *
  114.      * @param string
  115.      *            The string to encode (if null, return null).
  116.      * @param charset
  117.      *            The {@link Charset} to encode the {@code String}
  118.      * @return the encoded bytes
  119.      */
  120.     private static byte[] getBytes(final String string, final Charset charset) {
  121.         return string == null ? null : string.getBytes(charset);
  122.     }

  123.     /**
  124.      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
  125.      * byte array.
  126.      *
  127.      * @param string
  128.      *            the String to encode, may be {@code null}
  129.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  130.      * @throws NullPointerException
  131.      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
  132.      *             since it is required by the Java platform specification.
  133.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  134.      * @see Charset
  135.      * @see #getBytesUnchecked(String, String)
  136.      */
  137.     public static byte[] getBytesIso8859_1(final String string) {
  138.         return getBytes(string, StandardCharsets.ISO_8859_1);
  139.     }

  140.     /**
  141.      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
  142.      * array.
  143.      * <p>
  144.      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
  145.      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
  146.      * </p>
  147.      *
  148.      * @param string
  149.      *            the String to encode, may be {@code null}
  150.      * @param charsetName
  151.      *            The name of a required {@link java.nio.charset.Charset}
  152.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  153.      * @throws IllegalStateException
  154.      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
  155.      *             required charset name.
  156.      * @see CharEncoding
  157.      * @see String#getBytes(String)
  158.      */
  159.     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
  160.         if (string == null) {
  161.             return null;
  162.         }
  163.         try {
  164.             return string.getBytes(charsetName);
  165.         } catch (final UnsupportedEncodingException e) {
  166.             throw newIllegalStateException(charsetName, e);
  167.         }
  168.     }

  169.     /**
  170.      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
  171.      * array.
  172.      *
  173.      * @param string
  174.      *            the String to encode, may be {@code null}
  175.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  176.      * @throws NullPointerException
  177.      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
  178.      *             required by the Java platform specification.
  179.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  180.      * @see Charset
  181.      * @see #getBytesUnchecked(String, String)
  182.      */
  183.     public static byte[] getBytesUsAscii(final String string) {
  184.         return getBytes(string, StandardCharsets.US_ASCII);
  185.     }

  186.     /**
  187.      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
  188.      * array.
  189.      *
  190.      * @param string
  191.      *            the String to encode, may be {@code null}
  192.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  193.      * @throws NullPointerException
  194.      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
  195.      *             required by the Java platform specification.
  196.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  197.      * @see Charset
  198.      * @see #getBytesUnchecked(String, String)
  199.      */
  200.     public static byte[] getBytesUtf16(final String string) {
  201.         return getBytes(string, StandardCharsets.UTF_16);
  202.     }

  203.     /**
  204.      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
  205.      * array.
  206.      *
  207.      * @param string
  208.      *            the String to encode, may be {@code null}
  209.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  210.      * @throws NullPointerException
  211.      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
  212.      *             required by the Java platform specification.
  213.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  214.      * @see Charset
  215.      * @see #getBytesUnchecked(String, String)
  216.      */
  217.     public static byte[] getBytesUtf16Be(final String string) {
  218.         return getBytes(string, StandardCharsets.UTF_16BE);
  219.     }

  220.     /**
  221.      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
  222.      * array.
  223.      *
  224.      * @param string
  225.      *            the String to encode, may be {@code null}
  226.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  227.      * @throws NullPointerException
  228.      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
  229.      *             required by the Java platform specification.
  230.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  231.      * @see Charset
  232.      * @see #getBytesUnchecked(String, String)
  233.      */
  234.     public static byte[] getBytesUtf16Le(final String string) {
  235.         return getBytes(string, StandardCharsets.UTF_16LE);
  236.     }

  237.     /**
  238.      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
  239.      * array.
  240.      *
  241.      * @param string
  242.      *            the String to encode, may be {@code null}
  243.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  244.      * @throws NullPointerException
  245.      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
  246.      *             required by the Java platform specification.
  247.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  248.      * @see Charset
  249.      * @see #getBytesUnchecked(String, String)
  250.      */
  251.     public static byte[] getBytesUtf8(final String string) {
  252.         return getBytes(string, StandardCharsets.UTF_8);
  253.     }

  254.     private static IllegalStateException newIllegalStateException(final String charsetName, final UnsupportedEncodingException e) {
  255.         return new IllegalStateException(charsetName + ": " + e);
  256.     }

  257.     /**
  258.      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
  259.      *
  260.      * @param bytes
  261.      *            The bytes to be decoded into characters
  262.      * @param charset
  263.      *            The {@link Charset} to encode the {@code String}; not {@code null}
  264.      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
  265.      *         or {@code null} if the input byte array was {@code null}.
  266.      * @throws NullPointerException
  267.      *             Thrown if charset is {@code null}
  268.      */
  269.     private static String newString(final byte[] bytes, final Charset charset) {
  270.         return bytes == null ? null : new String(bytes, charset);
  271.     }

  272.     /**
  273.      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
  274.      * <p>
  275.      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
  276.      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
  277.      * </p>
  278.      *
  279.      * @param bytes
  280.      *            The bytes to be decoded into characters, may be {@code null}
  281.      * @param charsetName
  282.      *            The name of a required {@link java.nio.charset.Charset}
  283.      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
  284.      *         or {@code null} if the input byte array was {@code null}.
  285.      * @throws IllegalStateException
  286.      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
  287.      *             required charset name.
  288.      * @see CharEncoding
  289.      * @see String#String(byte[], String)
  290.      */
  291.     public static String newString(final byte[] bytes, final String charsetName) {
  292.         if (bytes == null) {
  293.             return null;
  294.         }
  295.         try {
  296.             return new String(bytes, charsetName);
  297.         } catch (final UnsupportedEncodingException e) {
  298.             throw newIllegalStateException(charsetName, e);
  299.         }
  300.     }

  301.     /**
  302.      * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset.
  303.      *
  304.      * @param bytes
  305.      *            The bytes to be decoded into characters, may be {@code null}
  306.      * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or
  307.      *         {@code null} if the input byte array was {@code null}.
  308.      * @throws NullPointerException
  309.      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
  310.      *             since it is required by the Java platform specification.
  311.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  312.      */
  313.     public static String newStringIso8859_1(final byte[] bytes) {
  314.         return newString(bytes, StandardCharsets.ISO_8859_1);
  315.     }

  316.     /**
  317.      * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset.
  318.      *
  319.      * @param bytes
  320.      *            The bytes to be decoded into characters
  321.      * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset,
  322.      *         or {@code null} if the input byte array was {@code null}.
  323.      * @throws NullPointerException
  324.      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
  325.      *             required by the Java platform specification.
  326.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  327.      */
  328.     public static String newStringUsAscii(final byte[] bytes) {
  329.         return newString(bytes, StandardCharsets.US_ASCII);
  330.     }

  331.     /**
  332.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset.
  333.      *
  334.      * @param bytes
  335.      *            The bytes to be decoded into characters
  336.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset
  337.      *         or {@code null} if the input byte array was {@code null}.
  338.      * @throws NullPointerException
  339.      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
  340.      *             required by the Java platform specification.
  341.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  342.      */
  343.     public static String newStringUtf16(final byte[] bytes) {
  344.         return newString(bytes, StandardCharsets.UTF_16);
  345.     }

  346.     /**
  347.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset.
  348.      *
  349.      * @param bytes
  350.      *            The bytes to be decoded into characters
  351.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset,
  352.      *         or {@code null} if the input byte array was {@code null}.
  353.      * @throws NullPointerException
  354.      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
  355.      *             required by the Java platform specification.
  356.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  357.      */
  358.     public static String newStringUtf16Be(final byte[] bytes) {
  359.         return newString(bytes, StandardCharsets.UTF_16BE);
  360.     }

  361.     /**
  362.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset.
  363.      *
  364.      * @param bytes
  365.      *            The bytes to be decoded into characters
  366.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset,
  367.      *         or {@code null} if the input byte array was {@code null}.
  368.      * @throws NullPointerException
  369.      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
  370.      *             required by the Java platform specification.
  371.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  372.      */
  373.     public static String newStringUtf16Le(final byte[] bytes) {
  374.         return newString(bytes, StandardCharsets.UTF_16LE);
  375.     }

  376.     /**
  377.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset.
  378.      *
  379.      * @param bytes
  380.      *            The bytes to be decoded into characters
  381.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset,
  382.      *         or {@code null} if the input byte array was {@code null}.
  383.      * @throws NullPointerException
  384.      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
  385.      *             required by the Java platform specification.
  386.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  387.      */
  388.     public static String newStringUtf8(final byte[] bytes) {
  389.         return newString(bytes, StandardCharsets.UTF_8);
  390.     }

  391.     /**
  392.      * TODO Make private in 2.0.
  393.      *
  394.      * @deprecated TODO Make private in 2.0.
  395.      */
  396.     @Deprecated
  397.     public StringUtils() {
  398.         // empty
  399.     }
  400. }