001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import java.io.UnsupportedEncodingException;
021 import java.nio.charset.Charset;
022
023 import org.apache.commons.codec.CharEncoding;
024 import org.apache.commons.codec.Charsets;
025
026 /**
027 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
028 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
029 * Standard charsets</a>.
030 *
031 * <p>This class is immutable and thread-safe.</p>
032 *
033 * @see CharEncoding
034 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
035 * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
036 * @since 1.4
037 */
038 public class StringUtils {
039
040 /**
041 * Calls {@link String#getBytes(Charset)}
042 *
043 * @param string
044 * The string to encode (if null, return null).
045 * @param charset
046 * The {@link Charset} to encode the {@code String}
047 * @return the encoded bytes
048 */
049 private static byte[] getBytes(String string, Charset charset) {
050 if (string == null) {
051 return null;
052 }
053 return string.getBytes(charset);
054 }
055
056 /**
057 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
058 * byte array.
059 *
060 * @param string
061 * the String to encode, may be {@code null}
062 * @return encoded bytes, or {@code null} if the input string was {@code null}
063 * @throws NullPointerException
064 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
065 * required by the Java platform specification.
066 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
067 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
068 * @see #getBytesUnchecked(String, String)
069 */
070 public static byte[] getBytesIso8859_1(String string) {
071 return getBytes(string, Charsets.ISO_8859_1);
072 }
073
074
075 /**
076 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
077 * array.
078 * <p>
079 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
080 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
081 * </p>
082 *
083 * @param string
084 * the String to encode, may be {@code null}
085 * @param charsetName
086 * The name of a required {@link java.nio.charset.Charset}
087 * @return encoded bytes, or {@code null} if the input string was {@code null}
088 * @throws IllegalStateException
089 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
090 * required charset name.
091 * @see CharEncoding
092 * @see String#getBytes(String)
093 */
094 public static byte[] getBytesUnchecked(String string, String charsetName) {
095 if (string == null) {
096 return null;
097 }
098 try {
099 return string.getBytes(charsetName);
100 } catch (UnsupportedEncodingException e) {
101 throw StringUtils.newIllegalStateException(charsetName, e);
102 }
103 }
104
105 /**
106 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107 * array.
108 *
109 * @param string
110 * the String to encode, may be {@code null}
111 * @return encoded bytes, or {@code null} if the input string was {@code null}
112 * @throws NullPointerException
113 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114 * required by the Java platform specification.
115 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117 * @see #getBytesUnchecked(String, String)
118 */
119 public static byte[] getBytesUsAscii(String string) {
120 return getBytes(string, Charsets.US_ASCII);
121 }
122
123 /**
124 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125 * array.
126 *
127 * @param string
128 * the String to encode, may be {@code null}
129 * @return encoded bytes, or {@code null} if the input string was {@code null}
130 * @throws NullPointerException
131 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132 * required by the Java platform specification.
133 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135 * @see #getBytesUnchecked(String, String)
136 */
137 public static byte[] getBytesUtf16(String string) {
138 return getBytes(string, Charsets.UTF_16);
139 }
140
141 /**
142 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143 * array.
144 *
145 * @param string
146 * the String to encode, may be {@code null}
147 * @return encoded bytes, or {@code null} if the input string was {@code null}
148 * @throws NullPointerException
149 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150 * required by the Java platform specification.
151 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153 * @see #getBytesUnchecked(String, String)
154 */
155 public static byte[] getBytesUtf16Be(String string) {
156 return getBytes(string, Charsets.UTF_16BE);
157 }
158
159 /**
160 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161 * array.
162 *
163 * @param string
164 * the String to encode, may be {@code null}
165 * @return encoded bytes, or {@code null} if the input string was {@code null}
166 * @throws NullPointerException
167 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168 * required by the Java platform specification.
169 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171 * @see #getBytesUnchecked(String, String)
172 */
173 public static byte[] getBytesUtf16Le(String string) {
174 return getBytes(string, Charsets.UTF_16LE);
175 }
176
177 /**
178 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179 * array.
180 *
181 * @param string
182 * the String to encode, may be {@code null}
183 * @return encoded bytes, or {@code null} if the input string was {@code null}
184 * @throws NullPointerException
185 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186 * required by the Java platform specification.
187 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189 * @see #getBytesUnchecked(String, String)
190 */
191 public static byte[] getBytesUtf8(String string) {
192 return getBytes(string, Charsets.UTF_8);
193 }
194
195 private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
196 return new IllegalStateException(charsetName + ": " + e);
197 }
198
199 /**
200 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
201 *
202 * @param bytes
203 * The bytes to be decoded into characters
204 * @param charset
205 * The {@link Charset} to encode the {@code String}
206 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
207 * or {@code null} if the input byte array was {@code null}.
208 * @throws NullPointerException
209 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
210 * required by the Java platform specification.
211 */
212 private static String newString(byte[] bytes, Charset charset) {
213 return bytes == null ? null : new String(bytes, charset);
214 }
215
216 /**
217 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
218 * <p>
219 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
220 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
221 * </p>
222 *
223 * @param bytes
224 * The bytes to be decoded into characters, may be {@code null}
225 * @param charsetName
226 * The name of a required {@link java.nio.charset.Charset}
227 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
228 * or {@code null} if the input byte array was {@code null}.
229 * @throws IllegalStateException
230 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
231 * required charset name.
232 * @see CharEncoding
233 * @see String#String(byte[], String)
234 */
235 public static String newString(byte[] bytes, String charsetName) {
236 if (bytes == null) {
237 return null;
238 }
239 try {
240 return new String(bytes, charsetName);
241 } catch (UnsupportedEncodingException e) {
242 throw StringUtils.newIllegalStateException(charsetName, e);
243 }
244 }
245
246 /**
247 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
248 *
249 * @param bytes
250 * The bytes to be decoded into characters, may be {@code null}
251 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
252 * {@code null} if the input byte array was {@code null}.
253 * @throws NullPointerException
254 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
255 * required by the Java platform specification.
256 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
257 */
258 public static String newStringIso8859_1(byte[] bytes) {
259 return new String(bytes, Charsets.ISO_8859_1);
260 }
261
262 /**
263 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
264 *
265 * @param bytes
266 * The bytes to be decoded into characters
267 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
268 * or {@code null} if the input byte array was {@code null}.
269 * @throws NullPointerException
270 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
271 * required by the Java platform specification.
272 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
273 */
274 public static String newStringUsAscii(byte[] bytes) {
275 return new String(bytes, Charsets.US_ASCII);
276 }
277
278 /**
279 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
280 *
281 * @param bytes
282 * The bytes to be decoded into characters
283 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
284 * or {@code null} if the input byte array was {@code null}.
285 * @throws NullPointerException
286 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
287 * required by the Java platform specification.
288 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
289 */
290 public static String newStringUtf16(byte[] bytes) {
291 return new String(bytes, Charsets.UTF_16);
292 }
293
294 /**
295 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
296 *
297 * @param bytes
298 * The bytes to be decoded into characters
299 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
300 * or {@code null} if the input byte array was {@code null}.
301 * @throws NullPointerException
302 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
303 * required by the Java platform specification.
304 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
305 */
306 public static String newStringUtf16Be(byte[] bytes) {
307 return new String(bytes, Charsets.UTF_16BE);
308 }
309
310 /**
311 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
312 *
313 * @param bytes
314 * The bytes to be decoded into characters
315 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
316 * or {@code null} if the input byte array was {@code null}.
317 * @throws NullPointerException
318 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
319 * required by the Java platform specification.
320 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
321 */
322 public static String newStringUtf16Le(byte[] bytes) {
323 return new String(bytes, Charsets.UTF_16LE);
324 }
325
326 /**
327 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
328 *
329 * @param bytes
330 * The bytes to be decoded into characters
331 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
332 * or {@code null} if the input byte array was {@code null}.
333 * @throws NullPointerException
334 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
335 * required by the Java platform specification.
336 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
337 */
338 public static String newStringUtf8(byte[] bytes) {
339 return newString(bytes, Charsets.UTF_8);
340 }
341
342 }