View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      https://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.binary;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.ByteBuffer;
22  import java.nio.charset.Charset;
23  import java.nio.charset.StandardCharsets;
24  
25  import org.apache.commons.codec.CharEncoding;
26  
27  /**
28   * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
29   * specified in standard {@link Charset}.
30   *
31   * <p>
32   * This class is immutable and thread-safe.
33   * </p>
34   *
35   * @see CharEncoding
36   * @see Charset
37   * @see StandardCharsets
38   * @since 1.4
39   */
40  public class StringUtils {
41  
42      /**
43       * <p>
44       * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters.
45       * </p>
46       *
47       * <p>
48       * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal.
49       * The comparison is case sensitive.
50       * </p>
51       *
52       * <pre>
53       * StringUtils.equals(null, null)   = true
54       * StringUtils.equals(null, "abc")  = false
55       * StringUtils.equals("abc", null)  = false
56       * StringUtils.equals("abc", "abc") = true
57       * StringUtils.equals("abc", "ABC") = false
58       * </pre>
59       *
60       * <p>
61       * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
62       * </p>
63       *
64       * @see Object#equals(Object)
65       * @param cs1
66       *            the first CharSequence, may be {@code null}
67       * @param cs2
68       *            the second CharSequence, may be {@code null}
69       * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
70       * @since 1.10
71       */
72      public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
73          if (cs1 == cs2) {
74              return true;
75          }
76          if (cs1 == null || cs2 == null) {
77              return false;
78          }
79          if (cs1 instanceof String && cs2 instanceof String) {
80              return cs1.equals(cs2);
81          }
82          return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
83      }
84  
85      /**
86       * Calls {@link String#getBytes(Charset)}
87       *
88       * @param string
89       *            The string to encode (if null, return null).
90       * @param charset
91       *            The {@link Charset} to encode the {@code String}
92       * @return the encoded bytes
93       */
94      private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
95          if (string == null) {
96              return null;
97          }
98          return ByteBuffer.wrap(string.getBytes(charset));
99      }
100 
101     /**
102      * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
103      * array.
104      *
105      * @param string
106      *            the String to encode, may be {@code null}
107      * @return encoded bytes, or {@code null} if the input string was {@code null}
108      * @throws NullPointerException
109      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
110      *             required by the Java platform specification.
111      * @see Charset
112      * @see #getBytesUnchecked(String, String)
113      * @since 1.11
114      */
115     public static ByteBuffer getByteBufferUtf8(final String string) {
116         return getByteBuffer(string, StandardCharsets.UTF_8);
117     }
118 
119     /**
120      * Calls {@link String#getBytes(Charset)}
121      *
122      * @param string
123      *            The string to encode (if null, return null).
124      * @param charset
125      *            The {@link Charset} to encode the {@code String}
126      * @return the encoded bytes
127      */
128     private static byte[] getBytes(final String string, final Charset charset) {
129         return string == null ? null : string.getBytes(charset);
130     }
131 
132     /**
133      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
134      * byte array.
135      *
136      * @param string
137      *            the String to encode, may be {@code null}
138      * @return encoded bytes, or {@code null} if the input string was {@code null}
139      * @throws NullPointerException
140      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
141      *             since it is required by the Java platform specification.
142      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
143      * @see Charset
144      * @see #getBytesUnchecked(String, String)
145      */
146     public static byte[] getBytesIso8859_1(final String string) {
147         return getBytes(string, StandardCharsets.ISO_8859_1);
148     }
149 
150     /**
151      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
152      * array.
153      * <p>
154      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
155      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
156      * </p>
157      *
158      * @param string
159      *            the String to encode, may be {@code null}
160      * @param charsetName
161      *            The name of a required {@link java.nio.charset.Charset}
162      * @return encoded bytes, or {@code null} if the input string was {@code null}
163      * @throws IllegalStateException
164      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
165      *             required charset name.
166      * @see CharEncoding
167      * @see String#getBytes(String)
168      */
169     public static byte[] getBytesUnchecked(final String string, final String charsetName) {
170         if (string == null) {
171             return null;
172         }
173         try {
174             return string.getBytes(charsetName);
175         } catch (final UnsupportedEncodingException e) {
176             throw newIllegalStateException(charsetName, e);
177         }
178     }
179 
180     /**
181      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
182      * array.
183      *
184      * @param string
185      *            the String to encode, may be {@code null}
186      * @return encoded bytes, or {@code null} if the input string was {@code null}
187      * @throws NullPointerException
188      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
189      *             required by the Java platform specification.
190      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
191      * @see Charset
192      * @see #getBytesUnchecked(String, String)
193      */
194     public static byte[] getBytesUsAscii(final String string) {
195         return getBytes(string, StandardCharsets.US_ASCII);
196     }
197 
198     /**
199      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
200      * array.
201      *
202      * @param string
203      *            the String to encode, may be {@code null}
204      * @return encoded bytes, or {@code null} if the input string was {@code null}
205      * @throws NullPointerException
206      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
207      *             required by the Java platform specification.
208      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
209      * @see Charset
210      * @see #getBytesUnchecked(String, String)
211      */
212     public static byte[] getBytesUtf16(final String string) {
213         return getBytes(string, StandardCharsets.UTF_16);
214     }
215 
216     /**
217      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
218      * array.
219      *
220      * @param string
221      *            the String to encode, may be {@code null}
222      * @return encoded bytes, or {@code null} if the input string was {@code null}
223      * @throws NullPointerException
224      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
225      *             required by the Java platform specification.
226      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
227      * @see Charset
228      * @see #getBytesUnchecked(String, String)
229      */
230     public static byte[] getBytesUtf16Be(final String string) {
231         return getBytes(string, StandardCharsets.UTF_16BE);
232     }
233 
234     /**
235      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
236      * array.
237      *
238      * @param string
239      *            the String to encode, may be {@code null}
240      * @return encoded bytes, or {@code null} if the input string was {@code null}
241      * @throws NullPointerException
242      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
243      *             required by the Java platform specification.
244      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
245      * @see Charset
246      * @see #getBytesUnchecked(String, String)
247      */
248     public static byte[] getBytesUtf16Le(final String string) {
249         return getBytes(string, StandardCharsets.UTF_16LE);
250     }
251 
252     /**
253      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
254      * array.
255      *
256      * @param string
257      *            the String to encode, may be {@code null}
258      * @return encoded bytes, or {@code null} if the input string was {@code null}
259      * @throws NullPointerException
260      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
261      *             required by the Java platform specification.
262      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
263      * @see Charset
264      * @see #getBytesUnchecked(String, String)
265      */
266     public static byte[] getBytesUtf8(final String string) {
267         return getBytes(string, StandardCharsets.UTF_8);
268     }
269 
270     private static IllegalStateException newIllegalStateException(final String charsetName, final UnsupportedEncodingException e) {
271         return new IllegalStateException(charsetName + ": " + e);
272     }
273 
274     /**
275      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
276      *
277      * @param bytes
278      *            The bytes to be decoded into characters
279      * @param charset
280      *            The {@link Charset} to encode the {@code String}; not {@code null}
281      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
282      *         or {@code null} if the input byte array was {@code null}.
283      * @throws NullPointerException
284      *             Thrown if charset is {@code null}
285      */
286     private static String newString(final byte[] bytes, final Charset charset) {
287         return bytes == null ? null : new String(bytes, charset);
288     }
289 
290     /**
291      * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
292      * <p>
293      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
294      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
295      * </p>
296      *
297      * @param bytes
298      *            The bytes to be decoded into characters, may be {@code null}
299      * @param charsetName
300      *            The name of a required {@link java.nio.charset.Charset}
301      * @return A new {@code String} decoded from the specified array of bytes using the given charset,
302      *         or {@code null} if the input byte array was {@code null}.
303      * @throws IllegalStateException
304      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
305      *             required charset name.
306      * @see CharEncoding
307      * @see String#String(byte[], String)
308      */
309     public static String newString(final byte[] bytes, final String charsetName) {
310         if (bytes == null) {
311             return null;
312         }
313         try {
314             return new String(bytes, charsetName);
315         } catch (final UnsupportedEncodingException e) {
316             throw newIllegalStateException(charsetName, e);
317         }
318     }
319 
320     /**
321      * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset.
322      *
323      * @param bytes
324      *            The bytes to be decoded into characters, may be {@code null}
325      * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or
326      *         {@code null} if the input byte array was {@code null}.
327      * @throws NullPointerException
328      *             Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
329      *             since it is required by the Java platform specification.
330      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
331      */
332     public static String newStringIso8859_1(final byte[] bytes) {
333         return newString(bytes, StandardCharsets.ISO_8859_1);
334     }
335 
336     /**
337      * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset.
338      *
339      * @param bytes
340      *            The bytes to be decoded into characters
341      * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset,
342      *         or {@code null} if the input byte array was {@code null}.
343      * @throws NullPointerException
344      *             Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
345      *             required by the Java platform specification.
346      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
347      */
348     public static String newStringUsAscii(final byte[] bytes) {
349         return newString(bytes, StandardCharsets.US_ASCII);
350     }
351 
352     /**
353      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset.
354      *
355      * @param bytes
356      *            The bytes to be decoded into characters
357      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset
358      *         or {@code null} if the input byte array was {@code null}.
359      * @throws NullPointerException
360      *             Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
361      *             required by the Java platform specification.
362      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
363      */
364     public static String newStringUtf16(final byte[] bytes) {
365         return newString(bytes, StandardCharsets.UTF_16);
366     }
367 
368     /**
369      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset.
370      *
371      * @param bytes
372      *            The bytes to be decoded into characters
373      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset,
374      *         or {@code null} if the input byte array was {@code null}.
375      * @throws NullPointerException
376      *             Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
377      *             required by the Java platform specification.
378      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
379      */
380     public static String newStringUtf16Be(final byte[] bytes) {
381         return newString(bytes, StandardCharsets.UTF_16BE);
382     }
383 
384     /**
385      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset.
386      *
387      * @param bytes
388      *            The bytes to be decoded into characters
389      * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset,
390      *         or {@code null} if the input byte array was {@code null}.
391      * @throws NullPointerException
392      *             Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
393      *             required by the Java platform specification.
394      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
395      */
396     public static String newStringUtf16Le(final byte[] bytes) {
397         return newString(bytes, StandardCharsets.UTF_16LE);
398     }
399 
400     /**
401      * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset.
402      *
403      * @param bytes
404      *            The bytes to be decoded into characters
405      * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset,
406      *         or {@code null} if the input byte array was {@code null}.
407      * @throws NullPointerException
408      *             Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
409      *             required by the Java platform specification.
410      * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
411      */
412     public static String newStringUtf8(final byte[] bytes) {
413         return newString(bytes, StandardCharsets.UTF_8);
414     }
415 
416     /**
417      * TODO Make private in 2.0.
418      *
419      * @deprecated TODO Make private in 2.0.
420      */
421     @Deprecated
422     public StringUtils() {
423         // empty
424     }
425 }