View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  
23  import org.apache.commons.codec.CharEncoding;
24  import org.apache.commons.codec.Charsets;
25  
26  /**
27   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
28   * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29   * Standard charsets</a>.
30   *
31   * <p>This class is immutable and thread-safe.</p>
32   *
33   * @see CharEncoding
34   * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
35   * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
36   * @since 1.4
37   */
38  public class StringUtils {
39  
40      /**
41       * Calls {@link String#getBytes(Charset)}
42       *
43       * @param string
44       *            The string to encode (if null, return null).
45       * @param charset
46       *            The {@link Charset} to encode the {@code String}
47       * @return the encoded bytes
48       */
49      private static byte[] getBytes(final String string, final Charset charset) {
50          if (string == null) {
51              return null;
52          }
53          return string.getBytes(charset);
54      }
55  
56      /**
57       * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
58       * byte array.
59       *
60       * @param string
61       *            the String to encode, may be {@code null}
62       * @return encoded bytes, or {@code null} if the input string was {@code null}
63       * @throws NullPointerException
64       *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
65       *             required by the Java platform specification.
66       * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
67       * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
68       * @see #getBytesUnchecked(String, String)
69       */
70      public static byte[] getBytesIso8859_1(final String string) {
71          return getBytes(string, Charsets.ISO_8859_1);
72      }
73  
74  
75      /**
76       * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
77       * array.
78       * <p>
79       * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
80       * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
81       * </p>
82       *
83       * @param string
84       *            the String to encode, may be {@code null}
85       * @param charsetName
86       *            The name of a required {@link java.nio.charset.Charset}
87       * @return encoded bytes, or {@code null} if the input string was {@code null}
88       * @throws IllegalStateException
89       *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
90       *             required charset name.
91       * @see CharEncoding
92       * @see String#getBytes(String)
93       */
94      public static byte[] getBytesUnchecked(final String string, final String charsetName) {
95          if (string == null) {
96              return null;
97          }
98          try {
99              return string.getBytes(charsetName);
100         } catch (final UnsupportedEncodingException e) {
101             throw StringUtils.newIllegalStateException(charsetName, e);
102         }
103     }
104 
105     /**
106      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107      * array.
108      *
109      * @param string
110      *            the String to encode, may be {@code null}
111      * @return encoded bytes, or {@code null} if the input string was {@code null}
112      * @throws NullPointerException
113      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114      *             required by the Java platform specification.
115      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117      * @see #getBytesUnchecked(String, String)
118      */
119     public static byte[] getBytesUsAscii(final String string) {
120         return getBytes(string, Charsets.US_ASCII);
121     }
122 
123     /**
124      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125      * array.
126      *
127      * @param string
128      *            the String to encode, may be {@code null}
129      * @return encoded bytes, or {@code null} if the input string was {@code null}
130      * @throws NullPointerException
131      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132      *             required by the Java platform specification.
133      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135      * @see #getBytesUnchecked(String, String)
136      */
137     public static byte[] getBytesUtf16(final String string) {
138         return getBytes(string, Charsets.UTF_16);
139     }
140 
141     /**
142      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143      * array.
144      *
145      * @param string
146      *            the String to encode, may be {@code null}
147      * @return encoded bytes, or {@code null} if the input string was {@code null}
148      * @throws NullPointerException
149      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150      *             required by the Java platform specification.
151      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153      * @see #getBytesUnchecked(String, String)
154      */
155     public static byte[] getBytesUtf16Be(final String string) {
156         return getBytes(string, Charsets.UTF_16BE);
157     }
158 
159     /**
160      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161      * array.
162      *
163      * @param string
164      *            the String to encode, may be {@code null}
165      * @return encoded bytes, or {@code null} if the input string was {@code null}
166      * @throws NullPointerException
167      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168      *             required by the Java platform specification.
169      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171      * @see #getBytesUnchecked(String, String)
172      */
173     public static byte[] getBytesUtf16Le(final String string) {
174         return getBytes(string, Charsets.UTF_16LE);
175     }
176 
177     /**
178      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179      * array.
180      *
181      * @param string
182      *            the String to encode, may be {@code null}
183      * @return encoded bytes, or {@code null} if the input string was {@code null}
184      * @throws NullPointerException
185      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186      *             required by the Java platform specification.
187      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189      * @see #getBytesUnchecked(String, String)
190      */
191     public static byte[] getBytesUtf8(final String string) {
192         return getBytes(string, Charsets.UTF_8);
193     }
194 
195     private static IllegalStateException newIllegalStateException(final String charsetName,
196                                                                   final UnsupportedEncodingException e) {
197         return new IllegalStateException(charsetName + ": " + e);
198     }
199 
200     /**
201      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
202      *
203      * @param bytes
204      *            The bytes to be decoded into characters
205      * @param charset
206      *            The {@link Charset} to encode the {@code String}
207      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
208      *         or {@code null} if the input byte array was {@code null}.
209      * @throws NullPointerException
210      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
211      *             required by the Java platform specification.
212      */
213     private static String newString(final byte[] bytes, final Charset charset) {
214         return bytes == null ? null : new String(bytes, charset);
215     }
216 
217     /**
218      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
219      * <p>
220      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
221      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
222      * </p>
223      *
224      * @param bytes
225      *            The bytes to be decoded into characters, may be {@code null}
226      * @param charsetName
227      *            The name of a required {@link java.nio.charset.Charset}
228      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
229      *         or {@code null} if the input byte array was {@code null}.
230      * @throws IllegalStateException
231      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
232      *             required charset name.
233      * @see CharEncoding
234      * @see String#String(byte[], String)
235      */
236     public static String newString(final byte[] bytes, final String charsetName) {
237         if (bytes == null) {
238             return null;
239         }
240         try {
241             return new String(bytes, charsetName);
242         } catch (final UnsupportedEncodingException e) {
243             throw StringUtils.newIllegalStateException(charsetName, e);
244         }
245     }
246 
247     /**
248      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
249      *
250      * @param bytes
251      *            The bytes to be decoded into characters, may be {@code null}
252      * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
253      *         {@code null} if the input byte array was {@code null}.
254      * @throws NullPointerException
255      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
256      *             required by the Java platform specification.
257      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
258      */
259     public static String newStringIso8859_1(final byte[] bytes) {
260         return new String(bytes, Charsets.ISO_8859_1);
261     }
262 
263     /**
264      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
265      *
266      * @param bytes
267      *            The bytes to be decoded into characters
268      * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
269      *         or {@code null} if the input byte array was {@code null}.
270      * @throws NullPointerException
271      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
272      *             required by the Java platform specification.
273      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
274      */
275     public static String newStringUsAscii(final byte[] bytes) {
276         return new String(bytes, Charsets.US_ASCII);
277     }
278 
279     /**
280      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
281      *
282      * @param bytes
283      *            The bytes to be decoded into characters
284      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
285      *         or {@code null} if the input byte array was {@code null}.
286      * @throws NullPointerException
287      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
288      *             required by the Java platform specification.
289      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
290      */
291     public static String newStringUtf16(final byte[] bytes) {
292         return new String(bytes, Charsets.UTF_16);
293     }
294 
295     /**
296      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
297      *
298      * @param bytes
299      *            The bytes to be decoded into characters
300      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
301      *         or {@code null} if the input byte array was {@code null}.
302      * @throws NullPointerException
303      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
304      *             required by the Java platform specification.
305      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
306      */
307     public static String newStringUtf16Be(final byte[] bytes) {
308         return new String(bytes, Charsets.UTF_16BE);
309     }
310 
311     /**
312      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
313      *
314      * @param bytes
315      *            The bytes to be decoded into characters
316      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
317      *         or {@code null} if the input byte array was {@code null}.
318      * @throws NullPointerException
319      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
320      *             required by the Java platform specification.
321      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
322      */
323     public static String newStringUtf16Le(final byte[] bytes) {
324         return new String(bytes, Charsets.UTF_16LE);
325     }
326 
327     /**
328      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
329      *
330      * @param bytes
331      *            The bytes to be decoded into characters
332      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
333      *         or {@code null} if the input byte array was {@code null}.
334      * @throws NullPointerException
335      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
336      *             required by the Java platform specification.
337      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
338      */
339     public static String newStringUtf8(final byte[] bytes) {
340         return newString(bytes, Charsets.UTF_8);
341     }
342 
343 }