001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.binary;
019
020import java.io.UnsupportedEncodingException;
021import java.nio.ByteBuffer;
022import java.nio.charset.Charset;
023
024import org.apache.commons.codec.CharEncoding;
025import org.apache.commons.codec.Charsets;
026
027/**
028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
029 * specified in <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">
030 * Standard charsets</a>.
031 *
032 * <p>This class is immutable and thread-safe.</p>
033 *
034 * @see CharEncoding
035 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
036 * @since 1.4
037 */
038public class StringUtils {
039
040    /**
041     * <p>
042     * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
043     * </p>
044     *
045     * <p>
046     * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
047     * The comparison is case sensitive.
048     * </p>
049     *
050     * <pre>
051     * StringUtils.equals(null, null)   = true
052     * StringUtils.equals(null, "abc")  = false
053     * StringUtils.equals("abc", null)  = false
054     * StringUtils.equals("abc", "abc") = true
055     * StringUtils.equals("abc", "ABC") = false
056     * </pre>
057     *
058     * <p>
059     * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
060     * </p>
061     *
062     * @see Object#equals(Object)
063     * @param cs1
064     *            the first CharSequence, may be <code>null</code>
065     * @param cs2
066     *            the second CharSequence, may be <code>null</code>
067     * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
068     * @since 1.10
069     */
070    public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
071        if (cs1 == cs2) {
072            return true;
073        }
074        if (cs1 == null || cs2 == null) {
075            return false;
076        }
077        if (cs1 instanceof String && cs2 instanceof String) {
078            return cs1.equals(cs2);
079        }
080        return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
081    }
082
083    /**
084     * Calls {@link String#getBytes(Charset)}
085     *
086     * @param string
087     *            The string to encode (if null, return null).
088     * @param charset
089     *            The {@link Charset} to encode the <code>String</code>
090     * @return the encoded bytes
091     */
092    private static byte[] getBytes(final String string, final Charset charset) {
093        if (string == null) {
094            return null;
095        }
096        return string.getBytes(charset);
097    }
098
099    /**
100     * Calls {@link String#getBytes(Charset)}
101     *
102     * @param string
103     *            The string to encode (if null, return null).
104     * @param charset
105     *            The {@link Charset} to encode the <code>String</code>
106     * @return the encoded bytes
107     */
108    private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
109        if (string == null) {
110            return null;
111        }
112        return ByteBuffer.wrap(string.getBytes(charset));
113    }
114
115    /**
116     * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
117     * array.
118     *
119     * @param string
120     *            the String to encode, may be <code>null</code>
121     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
122     * @throws NullPointerException
123     *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
124     *             required by the Java platform specification.
125     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126     * @see #getBytesUnchecked(String, String)
127     * @since 1.11
128     */
129    public static ByteBuffer getByteBufferUtf8(final String string) {
130        return getByteBuffer(string, Charsets.UTF_8);
131    }
132
133    /**
134     * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
135     * byte array.
136     *
137     * @param string
138     *            the String to encode, may be <code>null</code>
139     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
140     * @throws NullPointerException
141     *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
142     *             required by the Java platform specification.
143     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
144     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
145     * @see #getBytesUnchecked(String, String)
146     */
147    public static byte[] getBytesIso8859_1(final String string) {
148        return getBytes(string, Charsets.ISO_8859_1);
149    }
150
151
152    /**
153     * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
154     * array.
155     * <p>
156     * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
157     * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
158     * </p>
159     *
160     * @param string
161     *            the String to encode, may be <code>null</code>
162     * @param charsetName
163     *            The name of a required {@link java.nio.charset.Charset}
164     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
165     * @throws IllegalStateException
166     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
167     *             required charset name.
168     * @see CharEncoding
169     * @see String#getBytes(String)
170     */
171    public static byte[] getBytesUnchecked(final String string, final String charsetName) {
172        if (string == null) {
173            return null;
174        }
175        try {
176            return string.getBytes(charsetName);
177        } catch (final UnsupportedEncodingException e) {
178            throw StringUtils.newIllegalStateException(charsetName, e);
179        }
180    }
181
182    /**
183     * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
184     * array.
185     *
186     * @param string
187     *            the String to encode, may be <code>null</code>
188     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
189     * @throws NullPointerException
190     *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
191     *             required by the Java platform specification.
192     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
193     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
194     * @see #getBytesUnchecked(String, String)
195     */
196    public static byte[] getBytesUsAscii(final String string) {
197        return getBytes(string, Charsets.US_ASCII);
198    }
199
200    /**
201     * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
202     * array.
203     *
204     * @param string
205     *            the String to encode, may be <code>null</code>
206     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
207     * @throws NullPointerException
208     *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
209     *             required by the Java platform specification.
210     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
211     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
212     * @see #getBytesUnchecked(String, String)
213     */
214    public static byte[] getBytesUtf16(final String string) {
215        return getBytes(string, Charsets.UTF_16);
216    }
217
218    /**
219     * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
220     * array.
221     *
222     * @param string
223     *            the String to encode, may be <code>null</code>
224     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
225     * @throws NullPointerException
226     *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
227     *             required by the Java platform specification.
228     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
229     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
230     * @see #getBytesUnchecked(String, String)
231     */
232    public static byte[] getBytesUtf16Be(final String string) {
233        return getBytes(string, Charsets.UTF_16BE);
234    }
235
236    /**
237     * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
238     * array.
239     *
240     * @param string
241     *            the String to encode, may be <code>null</code>
242     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
243     * @throws NullPointerException
244     *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
245     *             required by the Java platform specification.
246     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
247     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
248     * @see #getBytesUnchecked(String, String)
249     */
250    public static byte[] getBytesUtf16Le(final String string) {
251        return getBytes(string, Charsets.UTF_16LE);
252    }
253
254    /**
255     * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
256     * array.
257     *
258     * @param string
259     *            the String to encode, may be <code>null</code>
260     * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
261     * @throws NullPointerException
262     *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
263     *             required by the Java platform specification.
264     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
265     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
266     * @see #getBytesUnchecked(String, String)
267     */
268    public static byte[] getBytesUtf8(final String string) {
269        return getBytes(string, Charsets.UTF_8);
270    }
271
272    private static IllegalStateException newIllegalStateException(final String charsetName,
273                                                                  final UnsupportedEncodingException e) {
274        return new IllegalStateException(charsetName + ": " + e);
275    }
276
277    /**
278     * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
279     *
280     * @param bytes
281     *            The bytes to be decoded into characters
282     * @param charset
283     *            The {@link Charset} to encode the <code>String</code>; not {@code null}
284     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
285     *         or <code>null</code> if the input byte array was <code>null</code>.
286     * @throws NullPointerException
287     *             Thrown if charset is {@code null}
288     */
289    private static String newString(final byte[] bytes, final Charset charset) {
290        return bytes == null ? null : new String(bytes, charset);
291    }
292
293    /**
294     * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
295     * <p>
296     * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
297     * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
298     * </p>
299     *
300     * @param bytes
301     *            The bytes to be decoded into characters, may be <code>null</code>
302     * @param charsetName
303     *            The name of a required {@link java.nio.charset.Charset}
304     * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
305     *         or <code>null</code> if the input byte array was <code>null</code>.
306     * @throws IllegalStateException
307     *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
308     *             required charset name.
309     * @see CharEncoding
310     * @see String#String(byte[], String)
311     */
312    public static String newString(final byte[] bytes, final String charsetName) {
313        if (bytes == null) {
314            return null;
315        }
316        try {
317            return new String(bytes, charsetName);
318        } catch (final UnsupportedEncodingException e) {
319            throw StringUtils.newIllegalStateException(charsetName, e);
320        }
321    }
322
323    /**
324     * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
325     *
326     * @param bytes
327     *            The bytes to be decoded into characters, may be <code>null</code>
328     * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
329     *         <code>null</code> if the input byte array was <code>null</code>.
330     * @throws NullPointerException
331     *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
332     *             required by the Java platform specification.
333     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
334     */
335    public static String newStringIso8859_1(final byte[] bytes) {
336        return newString(bytes, Charsets.ISO_8859_1);
337    }
338
339    /**
340     * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
341     *
342     * @param bytes
343     *            The bytes to be decoded into characters
344     * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
345     *         or <code>null</code> if the input byte array was <code>null</code>.
346     * @throws NullPointerException
347     *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
348     *             required by the Java platform specification.
349     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
350     */
351    public static String newStringUsAscii(final byte[] bytes) {
352        return newString(bytes, Charsets.US_ASCII);
353    }
354
355    /**
356     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
357     *
358     * @param bytes
359     *            The bytes to be decoded into characters
360     * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
361     *         or <code>null</code> if the input byte array was <code>null</code>.
362     * @throws NullPointerException
363     *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
364     *             required by the Java platform specification.
365     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
366     */
367    public static String newStringUtf16(final byte[] bytes) {
368        return newString(bytes, Charsets.UTF_16);
369    }
370
371    /**
372     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
373     *
374     * @param bytes
375     *            The bytes to be decoded into characters
376     * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
377     *         or <code>null</code> if the input byte array was <code>null</code>.
378     * @throws NullPointerException
379     *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
380     *             required by the Java platform specification.
381     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
382     */
383    public static String newStringUtf16Be(final byte[] bytes) {
384        return newString(bytes, Charsets.UTF_16BE);
385    }
386
387    /**
388     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
389     *
390     * @param bytes
391     *            The bytes to be decoded into characters
392     * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
393     *         or <code>null</code> if the input byte array was <code>null</code>.
394     * @throws NullPointerException
395     *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
396     *             required by the Java platform specification.
397     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
398     */
399    public static String newStringUtf16Le(final byte[] bytes) {
400        return newString(bytes, Charsets.UTF_16LE);
401    }
402
403    /**
404     * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
405     *
406     * @param bytes
407     *            The bytes to be decoded into characters
408     * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
409     *         or <code>null</code> if the input byte array was <code>null</code>.
410     * @throws NullPointerException
411     *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
412     *             required by the Java platform specification.
413     * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
414     */
415    public static String newStringUtf8(final byte[] bytes) {
416        return newString(bytes, Charsets.UTF_8);
417    }
418
419}