1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22
23 import org.apache.commons.codec.CharEncoding;
24 import org.apache.commons.codec.Charsets;
25
26 /**
27 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
28 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29 * Standard charsets</a>.
30 *
31 * <p>This class is immutable and thread-safe.</p>
32 *
33 * @see CharEncoding
34 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
35 * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
36 * @since 1.4
37 */
38 public class StringUtils {
39
40 /**
41 * Calls {@link String#getBytes(Charset)}
42 *
43 * @param string
44 * The string to encode (if null, return null).
45 * @param charset
46 * The {@link Charset} to encode the {@code String}
47 * @return the encoded bytes
48 */
49 private static byte[] getBytes(String string, Charset charset) {
50 if (string == null) {
51 return null;
52 }
53 return string.getBytes(charset);
54 }
55
56 /**
57 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
58 * byte array.
59 *
60 * @param string
61 * the String to encode, may be {@code null}
62 * @return encoded bytes, or {@code null} if the input string was {@code null}
63 * @throws NullPointerException
64 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
65 * required by the Java platform specification.
66 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
67 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
68 * @see #getBytesUnchecked(String, String)
69 */
70 public static byte[] getBytesIso8859_1(String string) {
71 return getBytes(string, Charsets.ISO_8859_1);
72 }
73
74
75 /**
76 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
77 * array.
78 * <p>
79 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
80 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
81 * </p>
82 *
83 * @param string
84 * the String to encode, may be {@code null}
85 * @param charsetName
86 * The name of a required {@link java.nio.charset.Charset}
87 * @return encoded bytes, or {@code null} if the input string was {@code null}
88 * @throws IllegalStateException
89 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
90 * required charset name.
91 * @see CharEncoding
92 * @see String#getBytes(String)
93 */
94 public static byte[] getBytesUnchecked(String string, String charsetName) {
95 if (string == null) {
96 return null;
97 }
98 try {
99 return string.getBytes(charsetName);
100 } catch (UnsupportedEncodingException e) {
101 throw StringUtils.newIllegalStateException(charsetName, e);
102 }
103 }
104
105 /**
106 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107 * array.
108 *
109 * @param string
110 * the String to encode, may be {@code null}
111 * @return encoded bytes, or {@code null} if the input string was {@code null}
112 * @throws NullPointerException
113 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114 * required by the Java platform specification.
115 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117 * @see #getBytesUnchecked(String, String)
118 */
119 public static byte[] getBytesUsAscii(String string) {
120 return getBytes(string, Charsets.US_ASCII);
121 }
122
123 /**
124 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125 * array.
126 *
127 * @param string
128 * the String to encode, may be {@code null}
129 * @return encoded bytes, or {@code null} if the input string was {@code null}
130 * @throws NullPointerException
131 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132 * required by the Java platform specification.
133 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135 * @see #getBytesUnchecked(String, String)
136 */
137 public static byte[] getBytesUtf16(String string) {
138 return getBytes(string, Charsets.UTF_16);
139 }
140
141 /**
142 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143 * array.
144 *
145 * @param string
146 * the String to encode, may be {@code null}
147 * @return encoded bytes, or {@code null} if the input string was {@code null}
148 * @throws NullPointerException
149 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150 * required by the Java platform specification.
151 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153 * @see #getBytesUnchecked(String, String)
154 */
155 public static byte[] getBytesUtf16Be(String string) {
156 return getBytes(string, Charsets.UTF_16BE);
157 }
158
159 /**
160 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161 * array.
162 *
163 * @param string
164 * the String to encode, may be {@code null}
165 * @return encoded bytes, or {@code null} if the input string was {@code null}
166 * @throws NullPointerException
167 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168 * required by the Java platform specification.
169 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171 * @see #getBytesUnchecked(String, String)
172 */
173 public static byte[] getBytesUtf16Le(String string) {
174 return getBytes(string, Charsets.UTF_16LE);
175 }
176
177 /**
178 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179 * array.
180 *
181 * @param string
182 * the String to encode, may be {@code null}
183 * @return encoded bytes, or {@code null} if the input string was {@code null}
184 * @throws NullPointerException
185 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186 * required by the Java platform specification.
187 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189 * @see #getBytesUnchecked(String, String)
190 */
191 public static byte[] getBytesUtf8(String string) {
192 return getBytes(string, Charsets.UTF_8);
193 }
194
195 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
196 return new IllegalStateException(charsetName + ": " + e);
197 }
198
199 /**
200 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
201 *
202 * @param bytes
203 * The bytes to be decoded into characters
204 * @param charset
205 * The {@link Charset} to encode the {@code String}
206 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
207 * or {@code null} if the input byte array was {@code null}.
208 * @throws NullPointerException
209 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
210 * required by the Java platform specification.
211 */
212 private static String newString(byte[] bytes, Charset charset) {
213 return bytes == null ? null : new String(bytes, charset);
214 }
215
216 /**
217 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
218 * <p>
219 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
220 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
221 * </p>
222 *
223 * @param bytes
224 * The bytes to be decoded into characters, may be {@code null}
225 * @param charsetName
226 * The name of a required {@link java.nio.charset.Charset}
227 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
228 * or {@code null} if the input byte array was {@code null}.
229 * @throws IllegalStateException
230 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
231 * required charset name.
232 * @see CharEncoding
233 * @see String#String(byte[], String)
234 */
235 public static String newString(byte[] bytes, String charsetName) {
236 if (bytes == null) {
237 return null;
238 }
239 try {
240 return new String(bytes, charsetName);
241 } catch (UnsupportedEncodingException e) {
242 throw StringUtils.newIllegalStateException(charsetName, e);
243 }
244 }
245
246 /**
247 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
248 *
249 * @param bytes
250 * The bytes to be decoded into characters, may be {@code null}
251 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
252 * {@code null} if the input byte array was {@code null}.
253 * @throws NullPointerException
254 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
255 * required by the Java platform specification.
256 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
257 */
258 public static String newStringIso8859_1(byte[] bytes) {
259 return new String(bytes, Charsets.ISO_8859_1);
260 }
261
262 /**
263 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
264 *
265 * @param bytes
266 * The bytes to be decoded into characters
267 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
268 * or {@code null} if the input byte array was {@code null}.
269 * @throws NullPointerException
270 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
271 * required by the Java platform specification.
272 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
273 */
274 public static String newStringUsAscii(byte[] bytes) {
275 return new String(bytes, Charsets.US_ASCII);
276 }
277
278 /**
279 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
280 *
281 * @param bytes
282 * The bytes to be decoded into characters
283 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
284 * or {@code null} if the input byte array was {@code null}.
285 * @throws NullPointerException
286 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
287 * required by the Java platform specification.
288 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
289 */
290 public static String newStringUtf16(byte[] bytes) {
291 return new String(bytes, Charsets.UTF_16);
292 }
293
294 /**
295 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
296 *
297 * @param bytes
298 * The bytes to be decoded into characters
299 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
300 * or {@code null} if the input byte array was {@code null}.
301 * @throws NullPointerException
302 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
303 * required by the Java platform specification.
304 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
305 */
306 public static String newStringUtf16Be(byte[] bytes) {
307 return new String(bytes, Charsets.UTF_16BE);
308 }
309
310 /**
311 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
312 *
313 * @param bytes
314 * The bytes to be decoded into characters
315 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
316 * or {@code null} if the input byte array was {@code null}.
317 * @throws NullPointerException
318 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
319 * required by the Java platform specification.
320 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
321 */
322 public static String newStringUtf16Le(byte[] bytes) {
323 return new String(bytes, Charsets.UTF_16LE);
324 }
325
326 /**
327 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
328 *
329 * @param bytes
330 * The bytes to be decoded into characters
331 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
332 * or {@code null} if the input byte array was {@code null}.
333 * @throws NullPointerException
334 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
335 * required by the Java platform specification.
336 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
337 */
338 public static String newStringUtf8(byte[] bytes) {
339 return newString(bytes, Charsets.UTF_8);
340 }
341
342 }