001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.binary;
019
020 import java.io.UnsupportedEncodingException;
021 import java.nio.charset.Charset;
022
023 import org.apache.commons.codec.CharEncoding;
024 import org.apache.commons.codec.Charsets;
025
026 /**
027 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
028 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
029 * Standard charsets</a>.
030 *
031 * <p>This class is immutable and thread-safe.</p>
032 *
033 * @see CharEncoding
034 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
035 * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
036 * @since 1.4
037 */
038 public class StringUtils {
039
040 /**
041 * Calls {@link String#getBytes(Charset)}
042 *
043 * @param string
044 * The string to encode (if null, return null).
045 * @param charset
046 * The {@link Charset} to encode the {@code String}
047 * @return the encoded bytes
048 */
049 private static byte[] getBytes(final String string, final Charset charset) {
050 if (string == null) {
051 return null;
052 }
053 return string.getBytes(charset);
054 }
055
056 /**
057 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
058 * byte array.
059 *
060 * @param string
061 * the String to encode, may be {@code null}
062 * @return encoded bytes, or {@code null} if the input string was {@code null}
063 * @throws NullPointerException
064 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
065 * required by the Java platform specification.
066 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
067 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
068 * @see #getBytesUnchecked(String, String)
069 */
070 public static byte[] getBytesIso8859_1(final String string) {
071 return getBytes(string, Charsets.ISO_8859_1);
072 }
073
074
075 /**
076 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
077 * array.
078 * <p>
079 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
080 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
081 * </p>
082 *
083 * @param string
084 * the String to encode, may be {@code null}
085 * @param charsetName
086 * The name of a required {@link java.nio.charset.Charset}
087 * @return encoded bytes, or {@code null} if the input string was {@code null}
088 * @throws IllegalStateException
089 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
090 * required charset name.
091 * @see CharEncoding
092 * @see String#getBytes(String)
093 */
094 public static byte[] getBytesUnchecked(final String string, final String charsetName) {
095 if (string == null) {
096 return null;
097 }
098 try {
099 return string.getBytes(charsetName);
100 } catch (final UnsupportedEncodingException e) {
101 throw StringUtils.newIllegalStateException(charsetName, e);
102 }
103 }
104
105 /**
106 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107 * array.
108 *
109 * @param string
110 * the String to encode, may be {@code null}
111 * @return encoded bytes, or {@code null} if the input string was {@code null}
112 * @throws NullPointerException
113 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114 * required by the Java platform specification.
115 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117 * @see #getBytesUnchecked(String, String)
118 */
119 public static byte[] getBytesUsAscii(final String string) {
120 return getBytes(string, Charsets.US_ASCII);
121 }
122
123 /**
124 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125 * array.
126 *
127 * @param string
128 * the String to encode, may be {@code null}
129 * @return encoded bytes, or {@code null} if the input string was {@code null}
130 * @throws NullPointerException
131 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132 * required by the Java platform specification.
133 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135 * @see #getBytesUnchecked(String, String)
136 */
137 public static byte[] getBytesUtf16(final String string) {
138 return getBytes(string, Charsets.UTF_16);
139 }
140
141 /**
142 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143 * array.
144 *
145 * @param string
146 * the String to encode, may be {@code null}
147 * @return encoded bytes, or {@code null} if the input string was {@code null}
148 * @throws NullPointerException
149 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150 * required by the Java platform specification.
151 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153 * @see #getBytesUnchecked(String, String)
154 */
155 public static byte[] getBytesUtf16Be(final String string) {
156 return getBytes(string, Charsets.UTF_16BE);
157 }
158
159 /**
160 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161 * array.
162 *
163 * @param string
164 * the String to encode, may be {@code null}
165 * @return encoded bytes, or {@code null} if the input string was {@code null}
166 * @throws NullPointerException
167 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168 * required by the Java platform specification.
169 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171 * @see #getBytesUnchecked(String, String)
172 */
173 public static byte[] getBytesUtf16Le(final String string) {
174 return getBytes(string, Charsets.UTF_16LE);
175 }
176
177 /**
178 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179 * array.
180 *
181 * @param string
182 * the String to encode, may be {@code null}
183 * @return encoded bytes, or {@code null} if the input string was {@code null}
184 * @throws NullPointerException
185 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186 * required by the Java platform specification.
187 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189 * @see #getBytesUnchecked(String, String)
190 */
191 public static byte[] getBytesUtf8(final String string) {
192 return getBytes(string, Charsets.UTF_8);
193 }
194
195 private static IllegalStateException newIllegalStateException(final String charsetName,
196 final UnsupportedEncodingException e) {
197 return new IllegalStateException(charsetName + ": " + e);
198 }
199
200 /**
201 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
202 *
203 * @param bytes
204 * The bytes to be decoded into characters
205 * @param charset
206 * The {@link Charset} to encode the {@code String}
207 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
208 * or {@code null} if the input byte array was {@code null}.
209 * @throws NullPointerException
210 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
211 * required by the Java platform specification.
212 */
213 private static String newString(final byte[] bytes, final Charset charset) {
214 return bytes == null ? null : new String(bytes, charset);
215 }
216
217 /**
218 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
219 * <p>
220 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
221 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
222 * </p>
223 *
224 * @param bytes
225 * The bytes to be decoded into characters, may be {@code null}
226 * @param charsetName
227 * The name of a required {@link java.nio.charset.Charset}
228 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
229 * or {@code null} if the input byte array was {@code null}.
230 * @throws IllegalStateException
231 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
232 * required charset name.
233 * @see CharEncoding
234 * @see String#String(byte[], String)
235 */
236 public static String newString(final byte[] bytes, final String charsetName) {
237 if (bytes == null) {
238 return null;
239 }
240 try {
241 return new String(bytes, charsetName);
242 } catch (final UnsupportedEncodingException e) {
243 throw StringUtils.newIllegalStateException(charsetName, e);
244 }
245 }
246
247 /**
248 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
249 *
250 * @param bytes
251 * The bytes to be decoded into characters, may be {@code null}
252 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
253 * {@code null} if the input byte array was {@code null}.
254 * @throws NullPointerException
255 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
256 * required by the Java platform specification.
257 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
258 */
259 public static String newStringIso8859_1(final byte[] bytes) {
260 return new String(bytes, Charsets.ISO_8859_1);
261 }
262
263 /**
264 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
265 *
266 * @param bytes
267 * The bytes to be decoded into characters
268 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
269 * or {@code null} if the input byte array was {@code null}.
270 * @throws NullPointerException
271 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
272 * required by the Java platform specification.
273 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
274 */
275 public static String newStringUsAscii(final byte[] bytes) {
276 return new String(bytes, Charsets.US_ASCII);
277 }
278
279 /**
280 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
281 *
282 * @param bytes
283 * The bytes to be decoded into characters
284 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
285 * or {@code null} if the input byte array was {@code null}.
286 * @throws NullPointerException
287 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
288 * required by the Java platform specification.
289 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
290 */
291 public static String newStringUtf16(final byte[] bytes) {
292 return new String(bytes, Charsets.UTF_16);
293 }
294
295 /**
296 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
297 *
298 * @param bytes
299 * The bytes to be decoded into characters
300 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
301 * or {@code null} if the input byte array was {@code null}.
302 * @throws NullPointerException
303 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
304 * required by the Java platform specification.
305 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
306 */
307 public static String newStringUtf16Be(final byte[] bytes) {
308 return new String(bytes, Charsets.UTF_16BE);
309 }
310
311 /**
312 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
313 *
314 * @param bytes
315 * The bytes to be decoded into characters
316 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
317 * or {@code null} if the input byte array was {@code null}.
318 * @throws NullPointerException
319 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
320 * required by the Java platform specification.
321 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
322 */
323 public static String newStringUtf16Le(final byte[] bytes) {
324 return new String(bytes, Charsets.UTF_16LE);
325 }
326
327 /**
328 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
329 *
330 * @param bytes
331 * The bytes to be decoded into characters
332 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
333 * or {@code null} if the input byte array was {@code null}.
334 * @throws NullPointerException
335 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
336 * required by the Java platform specification.
337 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
338 */
339 public static String newStringUtf8(final byte[] bytes) {
340 return newString(bytes, Charsets.UTF_8);
341 }
342
343 }