001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.binary;
019    
020    import java.io.UnsupportedEncodingException;
021    import java.nio.charset.Charset;
022    
023    import org.apache.commons.codec.CharEncoding;
024    import org.apache.commons.codec.Charsets;
025    
026    /**
027     * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
028     * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
029     * Standard charsets</a>.
030     *
031     * <p>This class is immutable and thread-safe.</p>
032     *
033     * @see CharEncoding
034     * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
035     * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
036     * @since 1.4
037     */
038    public class StringUtils {
039    
040        /**
041         * Calls {@link String#getBytes(Charset)}
042         *
043         * @param string
044         *            The string to encode (if null, return null).
045         * @param charset
046         *            The {@link Charset} to encode the {@code String}
047         * @return the encoded bytes
048         */
049        private static byte[] getBytes(String string, Charset charset) {
050            if (string == null) {
051                return null;
052            }
053            return string.getBytes(charset);
054        }
055    
056        /**
057         * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
058         * byte array.
059         *
060         * @param string
061         *            the String to encode, may be {@code null}
062         * @return encoded bytes, or {@code null} if the input string was {@code null}
063         * @throws NullPointerException
064         *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
065         *             required by the Java platform specification.
066         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
067         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
068         * @see #getBytesUnchecked(String, String)
069         */
070        public static byte[] getBytesIso8859_1(String string) {
071            return getBytes(string, Charsets.ISO_8859_1);
072        }
073    
074    
075        /**
076         * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
077         * array.
078         * <p>
079         * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
080         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
081         * </p>
082         *
083         * @param string
084         *            the String to encode, may be {@code null}
085         * @param charsetName
086         *            The name of a required {@link java.nio.charset.Charset}
087         * @return encoded bytes, or {@code null} if the input string was {@code null}
088         * @throws IllegalStateException
089         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
090         *             required charset name.
091         * @see CharEncoding
092         * @see String#getBytes(String)
093         */
094        public static byte[] getBytesUnchecked(String string, String charsetName) {
095            if (string == null) {
096                return null;
097            }
098            try {
099                return string.getBytes(charsetName);
100            } catch (UnsupportedEncodingException e) {
101                throw StringUtils.newIllegalStateException(charsetName, e);
102            }
103        }
104    
105        /**
106         * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107         * array.
108         *
109         * @param string
110         *            the String to encode, may be {@code null}
111         * @return encoded bytes, or {@code null} if the input string was {@code null}
112         * @throws NullPointerException
113         *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114         *             required by the Java platform specification.
115         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117         * @see #getBytesUnchecked(String, String)
118         */
119        public static byte[] getBytesUsAscii(String string) {
120            return getBytes(string, Charsets.US_ASCII);
121        }
122    
123        /**
124         * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125         * array.
126         *
127         * @param string
128         *            the String to encode, may be {@code null}
129         * @return encoded bytes, or {@code null} if the input string was {@code null}
130         * @throws NullPointerException
131         *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132         *             required by the Java platform specification.
133         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135         * @see #getBytesUnchecked(String, String)
136         */
137        public static byte[] getBytesUtf16(String string) {
138            return getBytes(string, Charsets.UTF_16);
139        }
140    
141        /**
142         * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143         * array.
144         *
145         * @param string
146         *            the String to encode, may be {@code null}
147         * @return encoded bytes, or {@code null} if the input string was {@code null}
148         * @throws NullPointerException
149         *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150         *             required by the Java platform specification.
151         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153         * @see #getBytesUnchecked(String, String)
154         */
155        public static byte[] getBytesUtf16Be(String string) {
156            return getBytes(string, Charsets.UTF_16BE);
157        }
158    
159        /**
160         * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161         * array.
162         *
163         * @param string
164         *            the String to encode, may be {@code null}
165         * @return encoded bytes, or {@code null} if the input string was {@code null}
166         * @throws NullPointerException
167         *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168         *             required by the Java platform specification.
169         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171         * @see #getBytesUnchecked(String, String)
172         */
173        public static byte[] getBytesUtf16Le(String string) {
174            return getBytes(string, Charsets.UTF_16LE);
175        }
176    
177        /**
178         * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179         * array.
180         *
181         * @param string
182         *            the String to encode, may be {@code null}
183         * @return encoded bytes, or {@code null} if the input string was {@code null}
184         * @throws NullPointerException
185         *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186         *             required by the Java platform specification.
187         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189         * @see #getBytesUnchecked(String, String)
190         */
191        public static byte[] getBytesUtf8(String string) {
192            return getBytes(string, Charsets.UTF_8);
193        }
194    
195        private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
196            return new IllegalStateException(charsetName + ": " + e);
197        }
198    
199        /**
200         * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
201         *
202         * @param bytes
203         *            The bytes to be decoded into characters
204         * @param charset
205         *            The {@link Charset} to encode the {@code String}
206         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
207         *         or {@code null} if the input byte array was {@code null}.
208         * @throws NullPointerException
209         *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
210         *             required by the Java platform specification.
211         */
212        private static String newString(byte[] bytes, Charset charset) {
213            return bytes == null ? null : new String(bytes, charset);
214        }
215    
216        /**
217         * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
218         * <p>
219         * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
220         * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
221         * </p>
222         *
223         * @param bytes
224         *            The bytes to be decoded into characters, may be {@code null}
225         * @param charsetName
226         *            The name of a required {@link java.nio.charset.Charset}
227         * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
228         *         or {@code null} if the input byte array was {@code null}.
229         * @throws IllegalStateException
230         *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
231         *             required charset name.
232         * @see CharEncoding
233         * @see String#String(byte[], String)
234         */
235        public static String newString(byte[] bytes, String charsetName) {
236            if (bytes == null) {
237                return null;
238            }
239            try {
240                return new String(bytes, charsetName);
241            } catch (UnsupportedEncodingException e) {
242                throw StringUtils.newIllegalStateException(charsetName, e);
243            }
244        }
245    
246        /**
247         * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
248         *
249         * @param bytes
250         *            The bytes to be decoded into characters, may be {@code null}
251         * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
252         *         {@code null} if the input byte array was {@code null}.
253         * @throws NullPointerException
254         *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
255         *             required by the Java platform specification.
256         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
257         */
258        public static String newStringIso8859_1(byte[] bytes) {
259            return new String(bytes, Charsets.ISO_8859_1);
260        }
261    
262        /**
263         * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
264         *
265         * @param bytes
266         *            The bytes to be decoded into characters
267         * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
268         *         or {@code null} if the input byte array was {@code null}.
269         * @throws NullPointerException
270         *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
271         *             required by the Java platform specification.
272         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
273         */
274        public static String newStringUsAscii(byte[] bytes) {
275            return new String(bytes, Charsets.US_ASCII);
276        }
277    
278        /**
279         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
280         *
281         * @param bytes
282         *            The bytes to be decoded into characters
283         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
284         *         or {@code null} if the input byte array was {@code null}.
285         * @throws NullPointerException
286         *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
287         *             required by the Java platform specification.
288         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
289         */
290        public static String newStringUtf16(byte[] bytes) {
291            return new String(bytes, Charsets.UTF_16);
292        }
293    
294        /**
295         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
296         *
297         * @param bytes
298         *            The bytes to be decoded into characters
299         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
300         *         or {@code null} if the input byte array was {@code null}.
301         * @throws NullPointerException
302         *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
303         *             required by the Java platform specification.
304         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
305         */
306        public static String newStringUtf16Be(byte[] bytes) {
307            return new String(bytes, Charsets.UTF_16BE);
308        }
309    
310        /**
311         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
312         *
313         * @param bytes
314         *            The bytes to be decoded into characters
315         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
316         *         or {@code null} if the input byte array was {@code null}.
317         * @throws NullPointerException
318         *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
319         *             required by the Java platform specification.
320         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
321         */
322        public static String newStringUtf16Le(byte[] bytes) {
323            return new String(bytes, Charsets.UTF_16LE);
324        }
325    
326        /**
327         * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
328         *
329         * @param bytes
330         *            The bytes to be decoded into characters
331         * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
332         *         or {@code null} if the input byte array was {@code null}.
333         * @throws NullPointerException
334         *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
335         *             required by the Java platform specification.
336         * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
337         */
338        public static String newStringUtf8(byte[] bytes) {
339            return newString(bytes, Charsets.UTF_8);
340        }
341    
342    }