StringUtils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.binary;

  18. import java.io.UnsupportedEncodingException;
  19. import java.nio.ByteBuffer;
  20. import java.nio.charset.Charset;

  21. import org.apache.commons.codec.CharEncoding;
  22. import org.apache.commons.codec.Charsets;

  23. /**
  24.  * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
  25.  * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
  26.  * Standard charsets</a>.
  27.  *
  28.  * <p>This class is immutable and thread-safe.</p>
  29.  *
  30.  * @see CharEncoding
  31.  * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  32.  * @version $Id: StringUtils.java 1789539 2017-03-30 16:36:28Z sebb $
  33.  * @since 1.4
  34.  */
  35. public class StringUtils {

  36.     /**
  37.      * <p>
  38.      * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
  39.      * </p>
  40.      *
  41.      * <p>
  42.      * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
  43.      * The comparison is case sensitive.
  44.      * </p>
  45.      *
  46.      * <pre>
  47.      * StringUtils.equals(null, null)   = true
  48.      * StringUtils.equals(null, "abc")  = false
  49.      * StringUtils.equals("abc", null)  = false
  50.      * StringUtils.equals("abc", "abc") = true
  51.      * StringUtils.equals("abc", "ABC") = false
  52.      * </pre>
  53.      *
  54.      * <p>
  55.      * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
  56.      * </p>
  57.      *
  58.      * @see Object#equals(Object)
  59.      * @param cs1
  60.      *            the first CharSequence, may be <code>null</code>
  61.      * @param cs2
  62.      *            the second CharSequence, may be <code>null</code>
  63.      * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
  64.      * @since 1.10
  65.      */
  66.     public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
  67.         if (cs1 == cs2) {
  68.             return true;
  69.         }
  70.         if (cs1 == null || cs2 == null) {
  71.             return false;
  72.         }
  73.         if (cs1 instanceof String && cs2 instanceof String) {
  74.             return cs1.equals(cs2);
  75.         }
  76.         return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
  77.     }

  78.     /**
  79.      * Calls {@link String#getBytes(Charset)}
  80.      *
  81.      * @param string
  82.      *            The string to encode (if null, return null).
  83.      * @param charset
  84.      *            The {@link Charset} to encode the <code>String</code>
  85.      * @return the encoded bytes
  86.      */
  87.     private static byte[] getBytes(final String string, final Charset charset) {
  88.         if (string == null) {
  89.             return null;
  90.         }
  91.         return string.getBytes(charset);
  92.     }

  93.     /**
  94.      * Calls {@link String#getBytes(Charset)}
  95.      *
  96.      * @param string
  97.      *            The string to encode (if null, return null).
  98.      * @param charset
  99.      *            The {@link Charset} to encode the <code>String</code>
  100.      * @return the encoded bytes
  101.      */
  102.     private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
  103.         if (string == null) {
  104.             return null;
  105.         }
  106.         return ByteBuffer.wrap(string.getBytes(charset));
  107.     }

  108.     /**
  109.      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
  110.      * array.
  111.      *
  112.      * @param string
  113.      *            the String to encode, may be <code>null</code>
  114.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  115.      * @throws NullPointerException
  116.      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
  117.      *             required by the Java platform specification.
  118.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  119.      * @see #getBytesUnchecked(String, String)
  120.      * @since 1.11
  121.      */
  122.     public static ByteBuffer getByteBufferUtf8(final String string) {
  123.         return getByteBuffer(string, Charsets.UTF_8);
  124.     }

  125.     /**
  126.      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
  127.      * byte array.
  128.      *
  129.      * @param string
  130.      *            the String to encode, may be <code>null</code>
  131.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  132.      * @throws NullPointerException
  133.      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
  134.      *             required by the Java platform specification.
  135.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  136.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  137.      * @see #getBytesUnchecked(String, String)
  138.      */
  139.     public static byte[] getBytesIso8859_1(final String string) {
  140.         return getBytes(string, Charsets.ISO_8859_1);
  141.     }


  142.     /**
  143.      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
  144.      * array.
  145.      * <p>
  146.      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
  147.      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
  148.      * </p>
  149.      *
  150.      * @param string
  151.      *            the String to encode, may be <code>null</code>
  152.      * @param charsetName
  153.      *            The name of a required {@link java.nio.charset.Charset}
  154.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  155.      * @throws IllegalStateException
  156.      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
  157.      *             required charset name.
  158.      * @see CharEncoding
  159.      * @see String#getBytes(String)
  160.      */
  161.     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
  162.         if (string == null) {
  163.             return null;
  164.         }
  165.         try {
  166.             return string.getBytes(charsetName);
  167.         } catch (final UnsupportedEncodingException e) {
  168.             throw StringUtils.newIllegalStateException(charsetName, e);
  169.         }
  170.     }

  171.     /**
  172.      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
  173.      * array.
  174.      *
  175.      * @param string
  176.      *            the String to encode, may be <code>null</code>
  177.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  178.      * @throws NullPointerException
  179.      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
  180.      *             required by the Java platform specification.
  181.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  182.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  183.      * @see #getBytesUnchecked(String, String)
  184.      */
  185.     public static byte[] getBytesUsAscii(final String string) {
  186.         return getBytes(string, Charsets.US_ASCII);
  187.     }

  188.     /**
  189.      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
  190.      * array.
  191.      *
  192.      * @param string
  193.      *            the String to encode, may be <code>null</code>
  194.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  195.      * @throws NullPointerException
  196.      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
  197.      *             required by the Java platform specification.
  198.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  199.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  200.      * @see #getBytesUnchecked(String, String)
  201.      */
  202.     public static byte[] getBytesUtf16(final String string) {
  203.         return getBytes(string, Charsets.UTF_16);
  204.     }

  205.     /**
  206.      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
  207.      * array.
  208.      *
  209.      * @param string
  210.      *            the String to encode, may be <code>null</code>
  211.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  212.      * @throws NullPointerException
  213.      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
  214.      *             required by the Java platform specification.
  215.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  216.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  217.      * @see #getBytesUnchecked(String, String)
  218.      */
  219.     public static byte[] getBytesUtf16Be(final String string) {
  220.         return getBytes(string, Charsets.UTF_16BE);
  221.     }

  222.     /**
  223.      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
  224.      * array.
  225.      *
  226.      * @param string
  227.      *            the String to encode, may be <code>null</code>
  228.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  229.      * @throws NullPointerException
  230.      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
  231.      *             required by the Java platform specification.
  232.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  233.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  234.      * @see #getBytesUnchecked(String, String)
  235.      */
  236.     public static byte[] getBytesUtf16Le(final String string) {
  237.         return getBytes(string, Charsets.UTF_16LE);
  238.     }

  239.     /**
  240.      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
  241.      * array.
  242.      *
  243.      * @param string
  244.      *            the String to encode, may be <code>null</code>
  245.      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
  246.      * @throws NullPointerException
  247.      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
  248.      *             required by the Java platform specification.
  249.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  250.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  251.      * @see #getBytesUnchecked(String, String)
  252.      */
  253.     public static byte[] getBytesUtf8(final String string) {
  254.         return getBytes(string, Charsets.UTF_8);
  255.     }

  256.     private static IllegalStateException newIllegalStateException(final String charsetName,
  257.                                                                   final UnsupportedEncodingException e) {
  258.         return new IllegalStateException(charsetName + ": " + e);
  259.     }

  260.     /**
  261.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
  262.      *
  263.      * @param bytes
  264.      *            The bytes to be decoded into characters
  265.      * @param charset
  266.      *            The {@link Charset} to encode the <code>String</code>; not {@code null}
  267.      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
  268.      *         or <code>null</code> if the input byte array was <code>null</code>.
  269.      * @throws NullPointerException
  270.      *             Thrown if charset is {@code null}
  271.      */
  272.     private static String newString(final byte[] bytes, final Charset charset) {
  273.         return bytes == null ? null : new String(bytes, charset);
  274.     }

  275.     /**
  276.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
  277.      * <p>
  278.      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
  279.      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
  280.      * </p>
  281.      *
  282.      * @param bytes
  283.      *            The bytes to be decoded into characters, may be <code>null</code>
  284.      * @param charsetName
  285.      *            The name of a required {@link java.nio.charset.Charset}
  286.      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
  287.      *         or <code>null</code> if the input byte array was <code>null</code>.
  288.      * @throws IllegalStateException
  289.      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
  290.      *             required charset name.
  291.      * @see CharEncoding
  292.      * @see String#String(byte[], String)
  293.      */
  294.     public static String newString(final byte[] bytes, final String charsetName) {
  295.         if (bytes == null) {
  296.             return null;
  297.         }
  298.         try {
  299.             return new String(bytes, charsetName);
  300.         } catch (final UnsupportedEncodingException e) {
  301.             throw StringUtils.newIllegalStateException(charsetName, e);
  302.         }
  303.     }

  304.     /**
  305.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
  306.      *
  307.      * @param bytes
  308.      *            The bytes to be decoded into characters, may be <code>null</code>
  309.      * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
  310.      *         <code>null</code> if the input byte array was <code>null</code>.
  311.      * @throws NullPointerException
  312.      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
  313.      *             required by the Java platform specification.
  314.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  315.      */
  316.     public static String newStringIso8859_1(final byte[] bytes) {
  317.         return newString(bytes, Charsets.ISO_8859_1);
  318.     }

  319.     /**
  320.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
  321.      *
  322.      * @param bytes
  323.      *            The bytes to be decoded into characters
  324.      * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
  325.      *         or <code>null</code> if the input byte array was <code>null</code>.
  326.      * @throws NullPointerException
  327.      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
  328.      *             required by the Java platform specification.
  329.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  330.      */
  331.     public static String newStringUsAscii(final byte[] bytes) {
  332.         return newString(bytes, Charsets.US_ASCII);
  333.     }

  334.     /**
  335.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
  336.      *
  337.      * @param bytes
  338.      *            The bytes to be decoded into characters
  339.      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
  340.      *         or <code>null</code> if the input byte array was <code>null</code>.
  341.      * @throws NullPointerException
  342.      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
  343.      *             required by the Java platform specification.
  344.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  345.      */
  346.     public static String newStringUtf16(final byte[] bytes) {
  347.         return newString(bytes, Charsets.UTF_16);
  348.     }

  349.     /**
  350.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
  351.      *
  352.      * @param bytes
  353.      *            The bytes to be decoded into characters
  354.      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
  355.      *         or <code>null</code> if the input byte array was <code>null</code>.
  356.      * @throws NullPointerException
  357.      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
  358.      *             required by the Java platform specification.
  359.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  360.      */
  361.     public static String newStringUtf16Be(final byte[] bytes) {
  362.         return newString(bytes, Charsets.UTF_16BE);
  363.     }

  364.     /**
  365.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
  366.      *
  367.      * @param bytes
  368.      *            The bytes to be decoded into characters
  369.      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
  370.      *         or <code>null</code> if the input byte array was <code>null</code>.
  371.      * @throws NullPointerException
  372.      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
  373.      *             required by the Java platform specification.
  374.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  375.      */
  376.     public static String newStringUtf16Le(final byte[] bytes) {
  377.         return newString(bytes, Charsets.UTF_16LE);
  378.     }

  379.     /**
  380.      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
  381.      *
  382.      * @param bytes
  383.      *            The bytes to be decoded into characters
  384.      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
  385.      *         or <code>null</code> if the input byte array was <code>null</code>.
  386.      * @throws NullPointerException
  387.      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
  388.      *             required by the Java platform specification.
  389.      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
  390.      */
  391.     public static String newStringUtf8(final byte[] bytes) {
  392.         return newString(bytes, Charsets.UTF_8);
  393.     }

  394. }