View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.ByteBuffer;
22  import java.nio.charset.Charset;
23  
24  import org.apache.commons.codec.CharEncoding;
25  import org.apache.commons.codec.Charsets;
26  
27  /**
28   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
29   * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
30   * Standard charsets</a>.
31   *
32   * <p>This class is immutable and thread-safe.</p>
33   *
34   * @see CharEncoding
35   * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
36   * @version $Id: StringUtils.java 1789539 2017-03-30 16:36:28Z sebb $
37   * @since 1.4
38   */
39  public class StringUtils {
40  
41      /**
42       * <p>
43       * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
44       * </p>
45       *
46       * <p>
47       * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
48       * The comparison is case sensitive.
49       * </p>
50       *
51       * <pre>
52       * StringUtils.equals(null, null)   = true
53       * StringUtils.equals(null, "abc")  = false
54       * StringUtils.equals("abc", null)  = false
55       * StringUtils.equals("abc", "abc") = true
56       * StringUtils.equals("abc", "ABC") = false
57       * </pre>
58       *
59       * <p>
60       * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
61       * </p>
62       *
63       * @see Object#equals(Object)
64       * @param cs1
65       *            the first CharSequence, may be <code>null</code>
66       * @param cs2
67       *            the second CharSequence, may be <code>null</code>
68       * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
69       * @since 1.10
70       */
71      public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
72          if (cs1 == cs2) {
73              return true;
74          }
75          if (cs1 == null || cs2 == null) {
76              return false;
77          }
78          if (cs1 instanceof String && cs2 instanceof String) {
79              return cs1.equals(cs2);
80          }
81          return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
82      }
83  
84      /**
85       * Calls {@link String#getBytes(Charset)}
86       *
87       * @param string
88       *            The string to encode (if null, return null).
89       * @param charset
90       *            The {@link Charset} to encode the <code>String</code>
91       * @return the encoded bytes
92       */
93      private static byte[] getBytes(final String string, final Charset charset) {
94          if (string == null) {
95              return null;
96          }
97          return string.getBytes(charset);
98      }
99  
100     /**
101      * Calls {@link String#getBytes(Charset)}
102      *
103      * @param string
104      *            The string to encode (if null, return null).
105      * @param charset
106      *            The {@link Charset} to encode the <code>String</code>
107      * @return the encoded bytes
108      */
109     private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
110         if (string == null) {
111             return null;
112         }
113         return ByteBuffer.wrap(string.getBytes(charset));
114     }
115 
116     /**
117      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
118      * array.
119      *
120      * @param string
121      *            the String to encode, may be <code>null</code>
122      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
123      * @throws NullPointerException
124      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
125      *             required by the Java platform specification.
126      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
127      * @see #getBytesUnchecked(String, String)
128      * @since 1.11
129      */
130     public static ByteBuffer getByteBufferUtf8(final String string) {
131         return getByteBuffer(string, Charsets.UTF_8);
132     }
133 
134     /**
135      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
136      * byte array.
137      *
138      * @param string
139      *            the String to encode, may be <code>null</code>
140      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
141      * @throws NullPointerException
142      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
143      *             required by the Java platform specification.
144      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
145      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
146      * @see #getBytesUnchecked(String, String)
147      */
148     public static byte[] getBytesIso8859_1(final String string) {
149         return getBytes(string, Charsets.ISO_8859_1);
150     }
151 
152 
153     /**
154      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
155      * array.
156      * <p>
157      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
158      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
159      * </p>
160      *
161      * @param string
162      *            the String to encode, may be <code>null</code>
163      * @param charsetName
164      *            The name of a required {@link java.nio.charset.Charset}
165      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
166      * @throws IllegalStateException
167      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
168      *             required charset name.
169      * @see CharEncoding
170      * @see String#getBytes(String)
171      */
172     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
173         if (string == null) {
174             return null;
175         }
176         try {
177             return string.getBytes(charsetName);
178         } catch (final UnsupportedEncodingException e) {
179             throw StringUtils.newIllegalStateException(charsetName, e);
180         }
181     }
182 
183     /**
184      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
185      * array.
186      *
187      * @param string
188      *            the String to encode, may be <code>null</code>
189      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
190      * @throws NullPointerException
191      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
192      *             required by the Java platform specification.
193      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
194      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
195      * @see #getBytesUnchecked(String, String)
196      */
197     public static byte[] getBytesUsAscii(final String string) {
198         return getBytes(string, Charsets.US_ASCII);
199     }
200 
201     /**
202      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
203      * array.
204      *
205      * @param string
206      *            the String to encode, may be <code>null</code>
207      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
208      * @throws NullPointerException
209      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
210      *             required by the Java platform specification.
211      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
212      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
213      * @see #getBytesUnchecked(String, String)
214      */
215     public static byte[] getBytesUtf16(final String string) {
216         return getBytes(string, Charsets.UTF_16);
217     }
218 
219     /**
220      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
221      * array.
222      *
223      * @param string
224      *            the String to encode, may be <code>null</code>
225      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
226      * @throws NullPointerException
227      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
228      *             required by the Java platform specification.
229      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
230      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
231      * @see #getBytesUnchecked(String, String)
232      */
233     public static byte[] getBytesUtf16Be(final String string) {
234         return getBytes(string, Charsets.UTF_16BE);
235     }
236 
237     /**
238      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
239      * array.
240      *
241      * @param string
242      *            the String to encode, may be <code>null</code>
243      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
244      * @throws NullPointerException
245      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
246      *             required by the Java platform specification.
247      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
248      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
249      * @see #getBytesUnchecked(String, String)
250      */
251     public static byte[] getBytesUtf16Le(final String string) {
252         return getBytes(string, Charsets.UTF_16LE);
253     }
254 
255     /**
256      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
257      * array.
258      *
259      * @param string
260      *            the String to encode, may be <code>null</code>
261      * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
262      * @throws NullPointerException
263      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
264      *             required by the Java platform specification.
265      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
266      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
267      * @see #getBytesUnchecked(String, String)
268      */
269     public static byte[] getBytesUtf8(final String string) {
270         return getBytes(string, Charsets.UTF_8);
271     }
272 
273     private static IllegalStateException newIllegalStateException(final String charsetName,
274                                                                   final UnsupportedEncodingException e) {
275         return new IllegalStateException(charsetName + ": " + e);
276     }
277 
278     /**
279      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
280      *
281      * @param bytes
282      *            The bytes to be decoded into characters
283      * @param charset
284      *            The {@link Charset} to encode the <code>String</code>; not {@code null}
285      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
286      *         or <code>null</code> if the input byte array was <code>null</code>.
287      * @throws NullPointerException
288      *             Thrown if charset is {@code null}
289      */
290     private static String newString(final byte[] bytes, final Charset charset) {
291         return bytes == null ? null : new String(bytes, charset);
292     }
293 
294     /**
295      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
296      * <p>
297      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
298      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
299      * </p>
300      *
301      * @param bytes
302      *            The bytes to be decoded into characters, may be <code>null</code>
303      * @param charsetName
304      *            The name of a required {@link java.nio.charset.Charset}
305      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
306      *         or <code>null</code> if the input byte array was <code>null</code>.
307      * @throws IllegalStateException
308      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
309      *             required charset name.
310      * @see CharEncoding
311      * @see String#String(byte[], String)
312      */
313     public static String newString(final byte[] bytes, final String charsetName) {
314         if (bytes == null) {
315             return null;
316         }
317         try {
318             return new String(bytes, charsetName);
319         } catch (final UnsupportedEncodingException e) {
320             throw StringUtils.newIllegalStateException(charsetName, e);
321         }
322     }
323 
324     /**
325      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
326      *
327      * @param bytes
328      *            The bytes to be decoded into characters, may be <code>null</code>
329      * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
330      *         <code>null</code> if the input byte array was <code>null</code>.
331      * @throws NullPointerException
332      *             Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
333      *             required by the Java platform specification.
334      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
335      */
336     public static String newStringIso8859_1(final byte[] bytes) {
337         return newString(bytes, Charsets.ISO_8859_1);
338     }
339 
340     /**
341      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
342      *
343      * @param bytes
344      *            The bytes to be decoded into characters
345      * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
346      *         or <code>null</code> if the input byte array was <code>null</code>.
347      * @throws NullPointerException
348      *             Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
349      *             required by the Java platform specification.
350      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
351      */
352     public static String newStringUsAscii(final byte[] bytes) {
353         return newString(bytes, Charsets.US_ASCII);
354     }
355 
356     /**
357      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
358      *
359      * @param bytes
360      *            The bytes to be decoded into characters
361      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
362      *         or <code>null</code> if the input byte array was <code>null</code>.
363      * @throws NullPointerException
364      *             Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
365      *             required by the Java platform specification.
366      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
367      */
368     public static String newStringUtf16(final byte[] bytes) {
369         return newString(bytes, Charsets.UTF_16);
370     }
371 
372     /**
373      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
374      *
375      * @param bytes
376      *            The bytes to be decoded into characters
377      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
378      *         or <code>null</code> if the input byte array was <code>null</code>.
379      * @throws NullPointerException
380      *             Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
381      *             required by the Java platform specification.
382      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
383      */
384     public static String newStringUtf16Be(final byte[] bytes) {
385         return newString(bytes, Charsets.UTF_16BE);
386     }
387 
388     /**
389      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
390      *
391      * @param bytes
392      *            The bytes to be decoded into characters
393      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
394      *         or <code>null</code> if the input byte array was <code>null</code>.
395      * @throws NullPointerException
396      *             Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
397      *             required by the Java platform specification.
398      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
399      */
400     public static String newStringUtf16Le(final byte[] bytes) {
401         return newString(bytes, Charsets.UTF_16LE);
402     }
403 
404     /**
405      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
406      *
407      * @param bytes
408      *            The bytes to be decoded into characters
409      * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
410      *         or <code>null</code> if the input byte array was <code>null</code>.
411      * @throws NullPointerException
412      *             Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
413      *             required by the Java platform specification.
414      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
415      */
416     public static String newStringUtf8(final byte[] bytes) {
417         return newString(bytes, Charsets.UTF_8);
418     }
419 
420 }