StringUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      https://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.io.UnsupportedEncodingException;
  19. import java.nio.ByteBuffer;
  20. import java.nio.charset.Charset;
  21. import java.nio.charset.StandardCharsets;

  22. import org.apache.commons.codec.CharEncoding;

  23. /**
  24.  * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
  25.  * specified in standard {@link Charset}.
  26.  *
  27.  * <p>This class is immutable and thread-safe.</p>
  28.  *
  29.  * @see CharEncoding
  30.  * @see Charset
  31.  * @see StandardCharsets
  32.  * @since 1.4
  33.  */
  34. public class StringUtils {

  35.     /**
  36.      * <p>
  37.      * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters.
  38.      * </p>
  39.      *
  40.      * <p>
  41.      * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal.
  42.      * The comparison is case sensitive.
  43.      * </p>
  44.      *
  45.      * <pre>
  46.      * StringUtils.equals(null, null)   = true
  47.      * StringUtils.equals(null, "abc")  = false
  48.      * StringUtils.equals("abc", null)  = false
  49.      * StringUtils.equals("abc", "abc") = true
  50.      * StringUtils.equals("abc", "ABC") = false
  51.      * </pre>
  52.      *
  53.      * <p>
  54.      * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
  55.      * </p>
  56.      *
  57.      * @see Object#equals(Object)
  58.      * @param cs1
  59.      *            the first CharSequence, may be {@code null}
  60.      * @param cs2
  61.      *            the second CharSequence, may be {@code null}
  62.      * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
  63.      * @since 1.10
  64.      */
  65.     public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
  66.         if (cs1 == cs2) {
  67.             return true;
  68.         }
  69.         if (cs1 == null || cs2 == null) {
  70.             return false;
  71.         }
  72.         if (cs1 instanceof String && cs2 instanceof String) {
  73.             return cs1.equals(cs2);
  74.         }
  75.         return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
  76.     }

  77.     /**
  78.      * Calls {@link String#getBytes(Charset)}
  79.      *
  80.      * @param string
  81.      *            The string to encode (if null, return null).
  82.      * @param charset
  83.      *            The {@link Charset} to encode the {@code String}
  84.      * @return the encoded bytes
  85.      */
  86.     private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
  87.         if (string == null) {
  88.             return null;
  89.         }
  90.         return ByteBuffer.wrap(string.getBytes(charset));
  91.     }

  92.     /**
  93.      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
  94.      * array.
  95.      *
  96.      * @param string
  97.      *            the String to encode, may be {@code null}
  98.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  99.      * @throws NullPointerException
  100.      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
  101.      *             required by the Java platform specification.
  102.      * @see Charset
  103.      * @see #getBytesUnchecked(String, String)
  104.      * @since 1.11
  105.      */
  106.     public static ByteBuffer getByteBufferUtf8(final String string) {
  107.         return getByteBuffer(string, StandardCharsets.UTF_8);
  108.     }

  109.     /**
  110.      * Calls {@link String#getBytes(Charset)}
  111.      *
  112.      * @param string
  113.      *            The string to encode (if null, return null).
  114.      * @param charset
  115.      *            The {@link Charset} to encode the {@code String}
  116.      * @return the encoded bytes
  117.      */
  118.     private static byte[] getBytes(final String string, final Charset charset) {
  119.         return string == null ? null : string.getBytes(charset);
  120.     }

  121.     /**
  122.      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
  123.      * byte array.
  124.      *
  125.      * @param string
  126.      *            the String to encode, may be {@code null}
  127.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  128.      * @throws NullPointerException
  129.      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
  130.      *             since it is required by the Java platform specification.
  131.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  132.      * @see Charset
  133.      * @see #getBytesUnchecked(String, String)
  134.      */
  135.     public static byte[] getBytesIso8859_1(final String string) {
  136.         return getBytes(string, StandardCharsets.ISO_8859_1);
  137.     }

  138.     /**
  139.      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
  140.      * array.
  141.      * <p>
  142.      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
  143.      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
  144.      * </p>
  145.      *
  146.      * @param string
  147.      *            the String to encode, may be {@code null}
  148.      * @param charsetName
  149.      *            The name of a required {@link java.nio.charset.Charset}
  150.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  151.      * @throws IllegalStateException
  152.      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
  153.      *             required charset name.
  154.      * @see CharEncoding
  155.      * @see String#getBytes(String)
  156.      */
  157.     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
  158.         if (string == null) {
  159.             return null;
  160.         }
  161.         try {
  162.             return string.getBytes(charsetName);
  163.         } catch (final UnsupportedEncodingException e) {
  164.             throw StringUtils.newIllegalStateException(charsetName, e);
  165.         }
  166.     }

  167.     /**
  168.      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
  169.      * array.
  170.      *
  171.      * @param string
  172.      *            the String to encode, may be {@code null}
  173.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  174.      * @throws NullPointerException
  175.      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
  176.      *             required by the Java platform specification.
  177.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  178.      * @see Charset
  179.      * @see #getBytesUnchecked(String, String)
  180.      */
  181.     public static byte[] getBytesUsAscii(final String string) {
  182.         return getBytes(string, StandardCharsets.US_ASCII);
  183.     }

  184.     /**
  185.      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
  186.      * array.
  187.      *
  188.      * @param string
  189.      *            the String to encode, may be {@code null}
  190.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  191.      * @throws NullPointerException
  192.      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
  193.      *             required by the Java platform specification.
  194.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  195.      * @see Charset
  196.      * @see #getBytesUnchecked(String, String)
  197.      */
  198.     public static byte[] getBytesUtf16(final String string) {
  199.         return getBytes(string, StandardCharsets.UTF_16);
  200.     }

  201.     /**
  202.      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
  203.      * array.
  204.      *
  205.      * @param string
  206.      *            the String to encode, may be {@code null}
  207.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  208.      * @throws NullPointerException
  209.      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
  210.      *             required by the Java platform specification.
  211.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  212.      * @see Charset
  213.      * @see #getBytesUnchecked(String, String)
  214.      */
  215.     public static byte[] getBytesUtf16Be(final String string) {
  216.         return getBytes(string, StandardCharsets.UTF_16BE);
  217.     }

  218.     /**
  219.      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
  220.      * array.
  221.      *
  222.      * @param string
  223.      *            the String to encode, may be {@code null}
  224.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  225.      * @throws NullPointerException
  226.      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
  227.      *             required by the Java platform specification.
  228.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  229.      * @see Charset
  230.      * @see #getBytesUnchecked(String, String)
  231.      */
  232.     public static byte[] getBytesUtf16Le(final String string) {
  233.         return getBytes(string, StandardCharsets.UTF_16LE);
  234.     }

  235.     /**
  236.      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
  237.      * array.
  238.      *
  239.      * @param string
  240.      *            the String to encode, may be {@code null}
  241.      * @return encoded bytes, or {@code null} if the input string was {@code null}
  242.      * @throws NullPointerException
  243.      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
  244.      *             required by the Java platform specification.
  245.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  246.      * @see Charset
  247.      * @see #getBytesUnchecked(String, String)
  248.      */
  249.     public static byte[] getBytesUtf8(final String string) {
  250.         return getBytes(string, StandardCharsets.UTF_8);
  251.     }

  252.     private static IllegalStateException newIllegalStateException(final String charsetName,
  253.                                                                   final UnsupportedEncodingException e) {
  254.         return new IllegalStateException(charsetName + ": " + e);
  255.     }

  256.     /**
  257.      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
  258.      *
  259.      * @param bytes
  260.      *            The bytes to be decoded into characters
  261.      * @param charset
  262.      *            The {@link Charset} to encode the {@code String}; not {@code null}
  263.      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
  264.      *         or {@code null} if the input byte array was {@code null}.
  265.      * @throws NullPointerException
  266.      *             Thrown if charset is {@code null}
  267.      */
  268.     private static String newString(final byte[] bytes, final Charset charset) {
  269.         return bytes == null ? null : new String(bytes, charset);
  270.     }

  271.     /**
  272.      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
  273.      * <p>
  274.      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
  275.      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
  276.      * </p>
  277.      *
  278.      * @param bytes
  279.      *            The bytes to be decoded into characters, may be {@code null}
  280.      * @param charsetName
  281.      *            The name of a required {@link java.nio.charset.Charset}
  282.      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
  283.      *         or {@code null} if the input byte array was {@code null}.
  284.      * @throws IllegalStateException
  285.      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
  286.      *             required charset name.
  287.      * @see CharEncoding
  288.      * @see String#String(byte[], String)
  289.      */
  290.     public static String newString(final byte[] bytes, final String charsetName) {
  291.         if (bytes == null) {
  292.             return null;
  293.         }
  294.         try {
  295.             return new String(bytes, charsetName);
  296.         } catch (final UnsupportedEncodingException e) {
  297.             throw StringUtils.newIllegalStateException(charsetName, e);
  298.         }
  299.     }

  300.     /**
  301.      * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset.
  302.      *
  303.      * @param bytes
  304.      *            The bytes to be decoded into characters, may be {@code null}
  305.      * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or
  306.      *         {@code null} if the input byte array was {@code null}.
  307.      * @throws NullPointerException
  308.      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
  309.      *             since it is required by the Java platform specification.
  310.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  311.      */
  312.     public static String newStringIso8859_1(final byte[] bytes) {
  313.         return newString(bytes, StandardCharsets.ISO_8859_1);
  314.     }

  315.     /**
  316.      * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset.
  317.      *
  318.      * @param bytes
  319.      *            The bytes to be decoded into characters
  320.      * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset,
  321.      *         or {@code null} if the input byte array was {@code null}.
  322.      * @throws NullPointerException
  323.      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
  324.      *             required by the Java platform specification.
  325.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  326.      */
  327.     public static String newStringUsAscii(final byte[] bytes) {
  328.         return newString(bytes, StandardCharsets.US_ASCII);
  329.     }

  330.     /**
  331.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset.
  332.      *
  333.      * @param bytes
  334.      *            The bytes to be decoded into characters
  335.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset
  336.      *         or {@code null} if the input byte array was {@code null}.
  337.      * @throws NullPointerException
  338.      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
  339.      *             required by the Java platform specification.
  340.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  341.      */
  342.     public static String newStringUtf16(final byte[] bytes) {
  343.         return newString(bytes, StandardCharsets.UTF_16);
  344.     }

  345.     /**
  346.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset.
  347.      *
  348.      * @param bytes
  349.      *            The bytes to be decoded into characters
  350.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset,
  351.      *         or {@code null} if the input byte array was {@code null}.
  352.      * @throws NullPointerException
  353.      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
  354.      *             required by the Java platform specification.
  355.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  356.      */
  357.     public static String newStringUtf16Be(final byte[] bytes) {
  358.         return newString(bytes, StandardCharsets.UTF_16BE);
  359.     }

  360.     /**
  361.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset.
  362.      *
  363.      * @param bytes
  364.      *            The bytes to be decoded into characters
  365.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset,
  366.      *         or {@code null} if the input byte array was {@code null}.
  367.      * @throws NullPointerException
  368.      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
  369.      *             required by the Java platform specification.
  370.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  371.      */
  372.     public static String newStringUtf16Le(final byte[] bytes) {
  373.         return newString(bytes, StandardCharsets.UTF_16LE);
  374.     }

  375.     /**
  376.      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset.
  377.      *
  378.      * @param bytes
  379.      *            The bytes to be decoded into characters
  380.      * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset,
  381.      *         or {@code null} if the input byte array was {@code null}.
  382.      * @throws NullPointerException
  383.      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
  384.      *             required by the Java platform specification.
  385.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  386.      */
  387.     public static String newStringUtf8(final byte[] bytes) {
  388.         return newString(bytes, StandardCharsets.UTF_8);
  389.     }

  390.     /**
  391.      * TODO Make private in 2.0.
  392.      *
  393.      * @deprecated TODO Make private in 2.0.
  394.      */
  395.     @Deprecated
  396.     public StringUtils() {
  397.         // empty
  398.     }
  399. }