001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import java.io.UnsupportedEncodingException;
021    import java.nio.charset.Charset;
022    
023    import org.apache.commons.codec.CharEncoding;
024    import org.apache.commons.codec.Charsets;
025    
026    /**
027     * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
028     * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
029     * Standard charsets</a>.
030     *
031     * <p>This class is immutable and thread-safe.</p>
032     *
033     * @see CharEncoding
034     * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
035     * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
036     * @since 1.4
037     */
038    public class StringUtils {
039    
040        /**
041         * Calls {@link String#getBytes(Charset)}
042         *
043         * @param string
044         *            The string to encode (if null, return null).
045         * @param charset
046         *            The {@link Charset} to encode the {@code String}
047         * @return the encoded bytes
048         */
049        private static byte[] getBytes(final String string, final Charset charset) {
050            if (string == null) {
051                return null;
052            }
053            return string.getBytes(charset);
054        }
055    
056        /**
057         * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
058         * byte array.
059         *
060         * @param string
061         *            the String to encode, may be {@code null}
062         * @return encoded bytes, or {@code null} if the input string was {@code null}
063         * @throws NullPointerException
064         *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
065         *             required by the Java platform specification.
066         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
067         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
068         * @see #getBytesUnchecked(String, String)
069         */
070        public static byte[] getBytesIso8859_1(final String string) {
071            return getBytes(string, Charsets.ISO_8859_1);
072        }
073    
074    
075        /**
076         * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
077         * array.
078         * <p>
079         * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
080         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
081         * </p>
082         *
083         * @param string
084         *            the String to encode, may be {@code null}
085         * @param charsetName
086         *            The name of a required {@link java.nio.charset.Charset}
087         * @return encoded bytes, or {@code null} if the input string was {@code null}
088         * @throws IllegalStateException
089         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
090         *             required charset name.
091         * @see CharEncoding
092         * @see String#getBytes(String)
093         */
094        public static byte[] getBytesUnchecked(final String string, final String charsetName) {
095            if (string == null) {
096                return null;
097            }
098            try {
099                return string.getBytes(charsetName);
100            } catch (final UnsupportedEncodingException e) {
101                throw StringUtils.newIllegalStateException(charsetName, e);
102            }
103        }
104    
105        /**
106         * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107         * array.
108         *
109         * @param string
110         *            the String to encode, may be {@code null}
111         * @return encoded bytes, or {@code null} if the input string was {@code null}
112         * @throws NullPointerException
113         *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114         *             required by the Java platform specification.
115         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117         * @see #getBytesUnchecked(String, String)
118         */
119        public static byte[] getBytesUsAscii(final String string) {
120            return getBytes(string, Charsets.US_ASCII);
121        }
122    
123        /**
124         * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125         * array.
126         *
127         * @param string
128         *            the String to encode, may be {@code null}
129         * @return encoded bytes, or {@code null} if the input string was {@code null}
130         * @throws NullPointerException
131         *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132         *             required by the Java platform specification.
133         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135         * @see #getBytesUnchecked(String, String)
136         */
137        public static byte[] getBytesUtf16(final String string) {
138            return getBytes(string, Charsets.UTF_16);
139        }
140    
141        /**
142         * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143         * array.
144         *
145         * @param string
146         *            the String to encode, may be {@code null}
147         * @return encoded bytes, or {@code null} if the input string was {@code null}
148         * @throws NullPointerException
149         *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150         *             required by the Java platform specification.
151         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153         * @see #getBytesUnchecked(String, String)
154         */
155        public static byte[] getBytesUtf16Be(final String string) {
156            return getBytes(string, Charsets.UTF_16BE);
157        }
158    
159        /**
160         * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161         * array.
162         *
163         * @param string
164         *            the String to encode, may be {@code null}
165         * @return encoded bytes, or {@code null} if the input string was {@code null}
166         * @throws NullPointerException
167         *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168         *             required by the Java platform specification.
169         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171         * @see #getBytesUnchecked(String, String)
172         */
173        public static byte[] getBytesUtf16Le(final String string) {
174            return getBytes(string, Charsets.UTF_16LE);
175        }
176    
177        /**
178         * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179         * array.
180         *
181         * @param string
182         *            the String to encode, may be {@code null}
183         * @return encoded bytes, or {@code null} if the input string was {@code null}
184         * @throws NullPointerException
185         *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186         *             required by the Java platform specification.
187         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189         * @see #getBytesUnchecked(String, String)
190         */
191        public static byte[] getBytesUtf8(final String string) {
192            return getBytes(string, Charsets.UTF_8);
193        }
194    
195        private static IllegalStateException newIllegalStateException(final String charsetName,
196                                                                      final UnsupportedEncodingException e) {
197            return new IllegalStateException(charsetName + ": " + e);
198        }
199    
200        /**
201         * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
202         *
203         * @param bytes
204         *            The bytes to be decoded into characters
205         * @param charset
206         *            The {@link Charset} to encode the {@code String}
207         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
208         *         or {@code null} if the input byte array was {@code null}.
209         * @throws NullPointerException
210         *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
211         *             required by the Java platform specification.
212         */
213        private static String newString(final byte[] bytes, final Charset charset) {
214            return bytes == null ? null : new String(bytes, charset);
215        }
216    
217        /**
218         * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
219         * <p>
220         * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
221         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
222         * </p>
223         *
224         * @param bytes
225         *            The bytes to be decoded into characters, may be {@code null}
226         * @param charsetName
227         *            The name of a required {@link java.nio.charset.Charset}
228         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
229         *         or {@code null} if the input byte array was {@code null}.
230         * @throws IllegalStateException
231         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
232         *             required charset name.
233         * @see CharEncoding
234         * @see String#String(byte[], String)
235         */
236        public static String newString(final byte[] bytes, final String charsetName) {
237            if (bytes == null) {
238                return null;
239            }
240            try {
241                return new String(bytes, charsetName);
242            } catch (final UnsupportedEncodingException e) {
243                throw StringUtils.newIllegalStateException(charsetName, e);
244            }
245        }
246    
247        /**
248         * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
249         *
250         * @param bytes
251         *            The bytes to be decoded into characters, may be {@code null}
252         * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
253         *         {@code null} if the input byte array was {@code null}.
254         * @throws NullPointerException
255         *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
256         *             required by the Java platform specification.
257         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
258         */
259        public static String newStringIso8859_1(final byte[] bytes) {
260            return new String(bytes, Charsets.ISO_8859_1);
261        }
262    
263        /**
264         * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
265         *
266         * @param bytes
267         *            The bytes to be decoded into characters
268         * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
269         *         or {@code null} if the input byte array was {@code null}.
270         * @throws NullPointerException
271         *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
272         *             required by the Java platform specification.
273         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
274         */
275        public static String newStringUsAscii(final byte[] bytes) {
276            return new String(bytes, Charsets.US_ASCII);
277        }
278    
279        /**
280         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
281         *
282         * @param bytes
283         *            The bytes to be decoded into characters
284         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
285         *         or {@code null} if the input byte array was {@code null}.
286         * @throws NullPointerException
287         *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
288         *             required by the Java platform specification.
289         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
290         */
291        public static String newStringUtf16(final byte[] bytes) {
292            return new String(bytes, Charsets.UTF_16);
293        }
294    
295        /**
296         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
297         *
298         * @param bytes
299         *            The bytes to be decoded into characters
300         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
301         *         or {@code null} if the input byte array was {@code null}.
302         * @throws NullPointerException
303         *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
304         *             required by the Java platform specification.
305         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
306         */
307        public static String newStringUtf16Be(final byte[] bytes) {
308            return new String(bytes, Charsets.UTF_16BE);
309        }
310    
311        /**
312         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
313         *
314         * @param bytes
315         *            The bytes to be decoded into characters
316         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
317         *         or {@code null} if the input byte array was {@code null}.
318         * @throws NullPointerException
319         *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
320         *             required by the Java platform specification.
321         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
322         */
323        public static String newStringUtf16Le(final byte[] bytes) {
324            return new String(bytes, Charsets.UTF_16LE);
325        }
326    
327        /**
328         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
329         *
330         * @param bytes
331         *            The bytes to be decoded into characters
332         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
333         *         or {@code null} if the input byte array was {@code null}.
334         * @throws NullPointerException
335         *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
336         *             required by the Java platform specification.
337         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
338         */
339        public static String newStringUtf8(final byte[] bytes) {
340            return newString(bytes, Charsets.UTF_8);
341        }
342    
343    }