View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  
23  import org.apache.commons.codec.CharEncoding;
24  import org.apache.commons.codec.Charsets;
25  
26  /**
27   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
28   * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29   * Standard charsets</a>.
30   *
31   * <p>This class is immutable and thread-safe.</p>
32   *
33   * @see CharEncoding
34   * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
35   * @version $Id: StringUtils.html 889935 2013-12-11 05:05:13Z ggregory $
36   * @since 1.4
37   */
38  public class StringUtils {
39  
40      /**
41       * Calls {@link String#getBytes(Charset)}
42       *
43       * @param string
44       *            The string to encode (if null, return null).
45       * @param charset
46       *            The {@link Charset} to encode the {@code String}
47       * @return the encoded bytes
48       */
49      private static byte[] getBytes(String string, Charset charset) {
50          if (string == null) {
51              return null;
52          }
53          return string.getBytes(charset);
54      }
55  
56      /**
57       * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
58       * byte array.
59       *
60       * @param string
61       *            the String to encode, may be {@code null}
62       * @return encoded bytes, or {@code null} if the input string was {@code null}
63       * @throws NullPointerException
64       *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
65       *             required by the Java platform specification.
66       * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
67       * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
68       * @see #getBytesUnchecked(String, String)
69       */
70      public static byte[] getBytesIso8859_1(String string) {
71          return getBytes(string, Charsets.ISO_8859_1);
72      }
73  
74  
75      /**
76       * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
77       * array.
78       * <p>
79       * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
80       * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
81       * </p>
82       *
83       * @param string
84       *            the String to encode, may be {@code null}
85       * @param charsetName
86       *            The name of a required {@link java.nio.charset.Charset}
87       * @return encoded bytes, or {@code null} if the input string was {@code null}
88       * @throws IllegalStateException
89       *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
90       *             required charset name.
91       * @see CharEncoding
92       * @see String#getBytes(String)
93       */
94      public static byte[] getBytesUnchecked(String string, String charsetName) {
95          if (string == null) {
96              return null;
97          }
98          try {
99              return string.getBytes(charsetName);
100         } catch (UnsupportedEncodingException e) {
101             throw StringUtils.newIllegalStateException(charsetName, e);
102         }
103     }
104 
105     /**
106      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
107      * array.
108      *
109      * @param string
110      *            the String to encode, may be {@code null}
111      * @return encoded bytes, or {@code null} if the input string was {@code null}
112      * @throws NullPointerException
113      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
114      *             required by the Java platform specification.
115      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
116      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
117      * @see #getBytesUnchecked(String, String)
118      */
119     public static byte[] getBytesUsAscii(String string) {
120         return getBytes(string, Charsets.US_ASCII);
121     }
122 
123     /**
124      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
125      * array.
126      *
127      * @param string
128      *            the String to encode, may be {@code null}
129      * @return encoded bytes, or {@code null} if the input string was {@code null}
130      * @throws NullPointerException
131      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
132      *             required by the Java platform specification.
133      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
134      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
135      * @see #getBytesUnchecked(String, String)
136      */
137     public static byte[] getBytesUtf16(String string) {
138         return getBytes(string, Charsets.UTF_16);
139     }
140 
141     /**
142      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
143      * array.
144      *
145      * @param string
146      *            the String to encode, may be {@code null}
147      * @return encoded bytes, or {@code null} if the input string was {@code null}
148      * @throws NullPointerException
149      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
150      *             required by the Java platform specification.
151      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
152      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
153      * @see #getBytesUnchecked(String, String)
154      */
155     public static byte[] getBytesUtf16Be(String string) {
156         return getBytes(string, Charsets.UTF_16BE);
157     }
158 
159     /**
160      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
161      * array.
162      *
163      * @param string
164      *            the String to encode, may be {@code null}
165      * @return encoded bytes, or {@code null} if the input string was {@code null}
166      * @throws NullPointerException
167      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
168      *             required by the Java platform specification.
169      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
170      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
171      * @see #getBytesUnchecked(String, String)
172      */
173     public static byte[] getBytesUtf16Le(String string) {
174         return getBytes(string, Charsets.UTF_16LE);
175     }
176 
177     /**
178      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
179      * array.
180      *
181      * @param string
182      *            the String to encode, may be {@code null}
183      * @return encoded bytes, or {@code null} if the input string was {@code null}
184      * @throws NullPointerException
185      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
186      *             required by the Java platform specification.
187      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
188      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
189      * @see #getBytesUnchecked(String, String)
190      */
191     public static byte[] getBytesUtf8(String string) {
192         return getBytes(string, Charsets.UTF_8);
193     }
194 
195     private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
196         return new IllegalStateException(charsetName + ": " + e);
197     }
198 
199     /**
200      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
201      *
202      * @param bytes
203      *            The bytes to be decoded into characters
204      * @param charset
205      *            The {@link Charset} to encode the {@code String}
206      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
207      *         or {@code null} if the input byte array was {@code null}.
208      * @throws NullPointerException
209      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
210      *             required by the Java platform specification.
211      */
212     private static String newString(byte[] bytes, Charset charset) {
213         return bytes == null ? null : new String(bytes, charset);
214     }
215 
216     /**
217      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
218      * <p>
219      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
220      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
221      * </p>
222      *
223      * @param bytes
224      *            The bytes to be decoded into characters, may be {@code null}
225      * @param charsetName
226      *            The name of a required {@link java.nio.charset.Charset}
227      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
228      *         or {@code null} if the input byte array was {@code null}.
229      * @throws IllegalStateException
230      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
231      *             required charset name.
232      * @see CharEncoding
233      * @see String#String(byte[], String)
234      */
235     public static String newString(byte[] bytes, String charsetName) {
236         if (bytes == null) {
237             return null;
238         }
239         try {
240             return new String(bytes, charsetName);
241         } catch (UnsupportedEncodingException e) {
242             throw StringUtils.newIllegalStateException(charsetName, e);
243         }
244     }
245 
246     /**
247      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
248      *
249      * @param bytes
250      *            The bytes to be decoded into characters, may be {@code null}
251      * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
252      *         {@code null} if the input byte array was {@code null}.
253      * @throws NullPointerException
254      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
255      *             required by the Java platform specification.
256      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
257      */
258     public static String newStringIso8859_1(byte[] bytes) {
259         return new String(bytes, Charsets.ISO_8859_1);
260     }
261 
262     /**
263      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
264      *
265      * @param bytes
266      *            The bytes to be decoded into characters
267      * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
268      *         or {@code null} if the input byte array was {@code null}.
269      * @throws NullPointerException
270      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
271      *             required by the Java platform specification.
272      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
273      */
274     public static String newStringUsAscii(byte[] bytes) {
275         return new String(bytes, Charsets.US_ASCII);
276     }
277 
278     /**
279      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
280      *
281      * @param bytes
282      *            The bytes to be decoded into characters
283      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
284      *         or {@code null} if the input byte array was {@code null}.
285      * @throws NullPointerException
286      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
287      *             required by the Java platform specification.
288      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
289      */
290     public static String newStringUtf16(byte[] bytes) {
291         return new String(bytes, Charsets.UTF_16);
292     }
293 
294     /**
295      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
296      *
297      * @param bytes
298      *            The bytes to be decoded into characters
299      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
300      *         or {@code null} if the input byte array was {@code null}.
301      * @throws NullPointerException
302      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
303      *             required by the Java platform specification.
304      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
305      */
306     public static String newStringUtf16Be(byte[] bytes) {
307         return new String(bytes, Charsets.UTF_16BE);
308     }
309 
310     /**
311      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
312      *
313      * @param bytes
314      *            The bytes to be decoded into characters
315      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
316      *         or {@code null} if the input byte array was {@code null}.
317      * @throws NullPointerException
318      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
319      *             required by the Java platform specification.
320      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
321      */
322     public static String newStringUtf16Le(byte[] bytes) {
323         return new String(bytes, Charsets.UTF_16LE);
324     }
325 
326     /**
327      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
328      *
329      * @param bytes
330      *            The bytes to be decoded into characters
331      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
332      *         or {@code null} if the input byte array was {@code null}.
333      * @throws NullPointerException
334      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
335      *             required by the Java platform specification.
336      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
337      */
338     public static String newStringUtf8(byte[] bytes) {
339         return newString(bytes, Charsets.UTF_8);
340     }
341 
342 }