1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22
23 import org.apache.commons.codec.CharEncoding;
24 import org.apache.commons.codec.Charsets;
25
26 /**
27 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
28 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29 * Standard charsets</a>.
30 *
31 * <p>This class is immutable and thread-safe.</p>
32 *
33 * @see CharEncoding
34 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
35 * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
36 * @since 1.4
37 */
38 public class StringUtils {
39
40 /**
41 * Calls {@link String#getBytes(Charset)}
42 *
43 * @param string
44 * The string to encode (if null, return null).
45 * @param charset
46 * The {@link Charset} to encode the {@code String}
47 * @return the encoded bytes
48 */
49 private static byte[] getBytes(final String string, final Charset charset) {
50 if (string == null) {
51 return null;
52 }
53 return string.getBytes(charset);
54 }
55
56 /**
57 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
58 * byte array.
59 *
60 * @param string
61 * the String to encode, may be {@code null}
62 * @return encoded bytes, or {@code null} if the input string was {@code null}
63 * @throws NullPointerException
64 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
65 * required by the Java platform specification.
66 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
67 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
68 * @see #getBytesUnchecked(String, String)
69 */
70 public static byte[] getBytesIso8859_1(final String string) {
71 return getBytes(string, Charsets.ISO_8859_1);
72 }
73
74
75 /**
76 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
77 * array.
78 * <p>
79 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
80 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
81 * </p>
82 *
83 * @param string
84 * the String to encode, may be {@code null}
85 * @param charsetName
86 * The name of a required {@link java.nio.charset.Charset}
87 * @return encoded bytes, or {@code null} if the input string was {@code null}
88 * @throws IllegalStateException
89 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
90 * required charset name.
91 * @see CharEncoding
92 * @see String#getBytes(String)
93 */
94 public static byte[] getBytesUnchecked(final String string, final String charsetName) {
95 if (string == null) {
96 return null;
97 }
98 try {
99 return string.getBytes(charsetName);
100 } catch (final UnsupportedEncodingException e) {
101 throw StringUtils.newIllegalStateException(charsetName, e);
102 }
103 }
104
105 /**
106 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107 * array.
108 *
109 * @param string
110 * the String to encode, may be {@code null}
111 * @return encoded bytes, or {@code null} if the input string was {@code null}
112 * @throws NullPointerException
113 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114 * required by the Java platform specification.
115 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117 * @see #getBytesUnchecked(String, String)
118 */
119 public static byte[] getBytesUsAscii(final String string) {
120 return getBytes(string, Charsets.US_ASCII);
121 }
122
123 /**
124 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125 * array.
126 *
127 * @param string
128 * the String to encode, may be {@code null}
129 * @return encoded bytes, or {@code null} if the input string was {@code null}
130 * @throws NullPointerException
131 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132 * required by the Java platform specification.
133 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135 * @see #getBytesUnchecked(String, String)
136 */
137 public static byte[] getBytesUtf16(final String string) {
138 return getBytes(string, Charsets.UTF_16);
139 }
140
141 /**
142 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143 * array.
144 *
145 * @param string
146 * the String to encode, may be {@code null}
147 * @return encoded bytes, or {@code null} if the input string was {@code null}
148 * @throws NullPointerException
149 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150 * required by the Java platform specification.
151 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153 * @see #getBytesUnchecked(String, String)
154 */
155 public static byte[] getBytesUtf16Be(final String string) {
156 return getBytes(string, Charsets.UTF_16BE);
157 }
158
159 /**
160 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161 * array.
162 *
163 * @param string
164 * the String to encode, may be {@code null}
165 * @return encoded bytes, or {@code null} if the input string was {@code null}
166 * @throws NullPointerException
167 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168 * required by the Java platform specification.
169 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171 * @see #getBytesUnchecked(String, String)
172 */
173 public static byte[] getBytesUtf16Le(final String string) {
174 return getBytes(string, Charsets.UTF_16LE);
175 }
176
177 /**
178 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179 * array.
180 *
181 * @param string
182 * the String to encode, may be {@code null}
183 * @return encoded bytes, or {@code null} if the input string was {@code null}
184 * @throws NullPointerException
185 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186 * required by the Java platform specification.
187 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189 * @see #getBytesUnchecked(String, String)
190 */
191 public static byte[] getBytesUtf8(final String string) {
192 return getBytes(string, Charsets.UTF_8);
193 }
194
195 private static IllegalStateException newIllegalStateException(final String charsetName,
196 final UnsupportedEncodingException e) {
197 return new IllegalStateException(charsetName + ": " + e);
198 }
199
200 /**
201 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
202 *
203 * @param bytes
204 * The bytes to be decoded into characters
205 * @param charset
206 * The {@link Charset} to encode the {@code String}
207 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
208 * or {@code null} if the input byte array was {@code null}.
209 * @throws NullPointerException
210 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
211 * required by the Java platform specification.
212 */
213 private static String newString(final byte[] bytes, final Charset charset) {
214 return bytes == null ? null : new String(bytes, charset);
215 }
216
217 /**
218 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
219 * <p>
220 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
221 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
222 * </p>
223 *
224 * @param bytes
225 * The bytes to be decoded into characters, may be {@code null}
226 * @param charsetName
227 * The name of a required {@link java.nio.charset.Charset}
228 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
229 * or {@code null} if the input byte array was {@code null}.
230 * @throws IllegalStateException
231 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
232 * required charset name.
233 * @see CharEncoding
234 * @see String#String(byte[], String)
235 */
236 public static String newString(final byte[] bytes, final String charsetName) {
237 if (bytes == null) {
238 return null;
239 }
240 try {
241 return new String(bytes, charsetName);
242 } catch (final UnsupportedEncodingException e) {
243 throw StringUtils.newIllegalStateException(charsetName, e);
244 }
245 }
246
247 /**
248 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
249 *
250 * @param bytes
251 * The bytes to be decoded into characters, may be {@code null}
252 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
253 * {@code null} if the input byte array was {@code null}.
254 * @throws NullPointerException
255 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
256 * required by the Java platform specification.
257 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
258 */
259 public static String newStringIso8859_1(final byte[] bytes) {
260 return new String(bytes, Charsets.ISO_8859_1);
261 }
262
263 /**
264 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
265 *
266 * @param bytes
267 * The bytes to be decoded into characters
268 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
269 * or {@code null} if the input byte array was {@code null}.
270 * @throws NullPointerException
271 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
272 * required by the Java platform specification.
273 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
274 */
275 public static String newStringUsAscii(final byte[] bytes) {
276 return new String(bytes, Charsets.US_ASCII);
277 }
278
279 /**
280 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
281 *
282 * @param bytes
283 * The bytes to be decoded into characters
284 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
285 * or {@code null} if the input byte array was {@code null}.
286 * @throws NullPointerException
287 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
288 * required by the Java platform specification.
289 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
290 */
291 public static String newStringUtf16(final byte[] bytes) {
292 return new String(bytes, Charsets.UTF_16);
293 }
294
295 /**
296 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
297 *
298 * @param bytes
299 * The bytes to be decoded into characters
300 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
301 * or {@code null} if the input byte array was {@code null}.
302 * @throws NullPointerException
303 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
304 * required by the Java platform specification.
305 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
306 */
307 public static String newStringUtf16Be(final byte[] bytes) {
308 return new String(bytes, Charsets.UTF_16BE);
309 }
310
311 /**
312 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
313 *
314 * @param bytes
315 * The bytes to be decoded into characters
316 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
317 * or {@code null} if the input byte array was {@code null}.
318 * @throws NullPointerException
319 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
320 * required by the Java platform specification.
321 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
322 */
323 public static String newStringUtf16Le(final byte[] bytes) {
324 return new String(bytes, Charsets.UTF_16LE);
325 }
326
327 /**
328 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
329 *
330 * @param bytes
331 * The bytes to be decoded into characters
332 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
333 * or {@code null} if the input byte array was {@code null}.
334 * @throws NullPointerException
335 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
336 * required by the Java platform specification.
337 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
338 */
339 public static String newStringUtf8(final byte[] bytes) {
340 return newString(bytes, Charsets.UTF_8);
341 }
342
343 }