View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  
23  import org.apache.commons.codec.CharEncoding;
24  import org.apache.commons.codec.Charsets;
25  
26  /**
27   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
28   * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29   * Standard charsets</a>.
30   *
31   * <p>This class is immutable and thread-safe.</p>
32   *
33   * @see CharEncoding
34   * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
35   * @version $Id: StringUtils.html 928559 2014-11-10 02:53:54Z ggregory $
36   * @since 1.4
37   */
38  public class StringUtils {
39  
40      /**
41       * <p>
42       * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
43       * </p>
44       *
45       * <p>
46       * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
47       * The comparison is case sensitive.
48       * </p>
49       *
50       * <pre>
51       * StringUtils.equals(null, null)   = true
52       * StringUtils.equals(null, "abc")  = false
53       * StringUtils.equals("abc", null)  = false
54       * StringUtils.equals("abc", "abc") = true
55       * StringUtils.equals("abc", "ABC") = false
56       * </pre>
57       *
58       * <p>
59       * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
60       * </p>
61       *
62       * @see Object#equals(Object)
63       * @param cs1
64       *            the first CharSequence, may be <code>null</code>
65       * @param cs2
66       *            the second CharSequence, may be <code>null</code>
67       * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
68       * @since 1.10
69       */
70      public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
71          if (cs1 == cs2) {
72              return true;
73          }
74          if (cs1 == null || cs2 == null) {
75              return false;
76          }
77          if (cs1 instanceof String && cs2 instanceof String) {
78              return cs1.equals(cs2);
79          }
80          return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length()));
81      }
82  
83      /**
84       * Calls {@link String#getBytes(Charset)}
85       *
86       * @param string
87       *            The string to encode (if null, return null).
88       * @param charset
89       *            The {@link Charset} to encode the <code>String</code>
90       * @return the encoded bytes
91       */
92      private static byte[] getBytes(final String string, final Charset charset) {
93          if (string == null) {
94              return null;
95          }
96          return string.getBytes(charset);
97      }
98  
99      /**
100      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
101      * byte array.
102      *
103      * @param string
104      *            the String to encode, may be <code>null</code>
105      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
106      * @throws NullPointerException
107      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
108      *             required by the Java platform specification.
109      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
110      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
111      * @see #getBytesUnchecked(String, String)
112      */
113     public static byte[] getBytesIso8859_1(final String string) {
114         return getBytes(string, Charsets.ISO_8859_1);
115     }
116 
117 
118     /**
119      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
120      * array.
121      * <p>
122      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
123      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
124      * </p>
125      *
126      * @param string
127      *            the String to encode, may be <code>null</code>
128      * @param charsetName
129      *            The name of a required {@link java.nio.charset.Charset}
130      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
131      * @throws IllegalStateException
132      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
133      *             required charset name.
134      * @see CharEncoding
135      * @see String#getBytes(String)
136      */
137     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
138         if (string == null) {
139             return null;
140         }
141         try {
142             return string.getBytes(charsetName);
143         } catch (final UnsupportedEncodingException e) {
144             throw StringUtils.newIllegalStateException(charsetName, e);
145         }
146     }
147 
148     /**
149      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
150      * array.
151      *
152      * @param string
153      *            the String to encode, may be <code>null</code>
154      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
155      * @throws NullPointerException
156      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
157      *             required by the Java platform specification.
158      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
159      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
160      * @see #getBytesUnchecked(String, String)
161      */
162     public static byte[] getBytesUsAscii(final String string) {
163         return getBytes(string, Charsets.US_ASCII);
164     }
165 
166     /**
167      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
168      * array.
169      *
170      * @param string
171      *            the String to encode, may be <code>null</code>
172      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
173      * @throws NullPointerException
174      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
175      *             required by the Java platform specification.
176      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
177      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
178      * @see #getBytesUnchecked(String, String)
179      */
180     public static byte[] getBytesUtf16(final String string) {
181         return getBytes(string, Charsets.UTF_16);
182     }
183 
184     /**
185      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
186      * array.
187      *
188      * @param string
189      *            the String to encode, may be <code>null</code>
190      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
191      * @throws NullPointerException
192      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
193      *             required by the Java platform specification.
194      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
195      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
196      * @see #getBytesUnchecked(String, String)
197      */
198     public static byte[] getBytesUtf16Be(final String string) {
199         return getBytes(string, Charsets.UTF_16BE);
200     }
201 
202     /**
203      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
204      * array.
205      *
206      * @param string
207      *            the String to encode, may be <code>null</code>
208      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
209      * @throws NullPointerException
210      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
211      *             required by the Java platform specification.
212      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
213      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
214      * @see #getBytesUnchecked(String, String)
215      */
216     public static byte[] getBytesUtf16Le(final String string) {
217         return getBytes(string, Charsets.UTF_16LE);
218     }
219 
220     /**
221      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
222      * array.
223      *
224      * @param string
225      *            the String to encode, may be <code>null</code>
226      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
227      * @throws NullPointerException
228      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
229      *             required by the Java platform specification.
230      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
231      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
232      * @see #getBytesUnchecked(String, String)
233      */
234     public static byte[] getBytesUtf8(final String string) {
235         return getBytes(string, Charsets.UTF_8);
236     }
237 
238     private static IllegalStateException newIllegalStateException(final String charsetName,
239                                                                   final UnsupportedEncodingException e) {
240         return new IllegalStateException(charsetName + ": " + e);
241     }
242 
243     /**
244      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
245      *
246      * @param bytes
247      *            The bytes to be decoded into characters
248      * @param charset
249      *            The {@link Charset} to encode the <code>String</code>
250      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
251      *         or <code>null</code> if the input byte array was <code>null</code>.
252      * @throws NullPointerException
253      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
254      *             required by the Java platform specification.
255      */
256     private static String newString(final byte[] bytes, final Charset charset) {
257         return bytes == null ? null : new String(bytes, charset);
258     }
259 
260     /**
261      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
262      * <p>
263      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
264      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
265      * </p>
266      *
267      * @param bytes
268      *            The bytes to be decoded into characters, may be <code>null</code>
269      * @param charsetName
270      *            The name of a required {@link java.nio.charset.Charset}
271      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
272      *         or <code>null</code> if the input byte array was <code>null</code>.
273      * @throws IllegalStateException
274      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
275      *             required charset name.
276      * @see CharEncoding
277      * @see String#String(byte[], String)
278      */
279     public static String newString(final byte[] bytes, final String charsetName) {
280         if (bytes == null) {
281             return null;
282         }
283         try {
284             return new String(bytes, charsetName);
285         } catch (final UnsupportedEncodingException e) {
286             throw StringUtils.newIllegalStateException(charsetName, e);
287         }
288     }
289 
290     /**
291      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
292      *
293      * @param bytes
294      *            The bytes to be decoded into characters, may be <code>null</code>
295      * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
296      *         <code>null</code> if the input byte array was <code>null</code>.
297      * @throws NullPointerException
298      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
299      *             required by the Java platform specification.
300      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
301      */
302     public static String newStringIso8859_1(final byte[] bytes) {
303         return new String(bytes, Charsets.ISO_8859_1);
304     }
305 
306     /**
307      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
308      *
309      * @param bytes
310      *            The bytes to be decoded into characters
311      * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
312      *         or <code>null</code> if the input byte array was <code>null</code>.
313      * @throws NullPointerException
314      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
315      *             required by the Java platform specification.
316      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
317      */
318     public static String newStringUsAscii(final byte[] bytes) {
319         return new String(bytes, Charsets.US_ASCII);
320     }
321 
322     /**
323      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
324      *
325      * @param bytes
326      *            The bytes to be decoded into characters
327      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
328      *         or <code>null</code> if the input byte array was <code>null</code>.
329      * @throws NullPointerException
330      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
331      *             required by the Java platform specification.
332      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
333      */
334     public static String newStringUtf16(final byte[] bytes) {
335         return new String(bytes, Charsets.UTF_16);
336     }
337 
338     /**
339      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
340      *
341      * @param bytes
342      *            The bytes to be decoded into characters
343      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
344      *         or <code>null</code> if the input byte array was <code>null</code>.
345      * @throws NullPointerException
346      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
347      *             required by the Java platform specification.
348      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
349      */
350     public static String newStringUtf16Be(final byte[] bytes) {
351         return new String(bytes, Charsets.UTF_16BE);
352     }
353 
354     /**
355      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
356      *
357      * @param bytes
358      *            The bytes to be decoded into characters
359      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
360      *         or <code>null</code> if the input byte array was <code>null</code>.
361      * @throws NullPointerException
362      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
363      *             required by the Java platform specification.
364      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
365      */
366     public static String newStringUtf16Le(final byte[] bytes) {
367         return new String(bytes, Charsets.UTF_16LE);
368     }
369 
370     /**
371      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
372      *
373      * @param bytes
374      *            The bytes to be decoded into characters
375      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
376      *         or <code>null</code> if the input byte array was <code>null</code>.
377      * @throws NullPointerException
378      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
379      *             required by the Java platform specification.
380      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
381      */
382     public static String newStringUtf8(final byte[] bytes) {
383         return newString(bytes, Charsets.UTF_8);
384     }
385 
386 }