1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.ByteBuffer;
22 import java.nio.charset.Charset;
23
24 import org.apache.commons.codec.CharEncoding;
25 import org.apache.commons.codec.Charsets;
26
27 /**
28 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
29 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
30 * Standard charsets</a>.
31 *
32 * <p>This class is immutable and thread-safe.</p>
33 *
34 * @see CharEncoding
35 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
36 * @version $Id: StringUtils.java 1789539 2017-03-30 16:36:28Z sebb $
37 * @since 1.4
38 */
39 public class StringUtils {
40
41 /**
42 * <p>
43 * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
44 * </p>
45 *
46 * <p>
47 * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
48 * The comparison is case sensitive.
49 * </p>
50 *
51 * <pre>
52 * StringUtils.equals(null, null) = true
53 * StringUtils.equals(null, "abc") = false
54 * StringUtils.equals("abc", null) = false
55 * StringUtils.equals("abc", "abc") = true
56 * StringUtils.equals("abc", "ABC") = false
57 * </pre>
58 *
59 * <p>
60 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
61 * </p>
62 *
63 * @see Object#equals(Object)
64 * @param cs1
65 * the first CharSequence, may be <code>null</code>
66 * @param cs2
67 * the second CharSequence, may be <code>null</code>
68 * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
69 * @since 1.10
70 */
71 public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
72 if (cs1 == cs2) {
73 return true;
74 }
75 if (cs1 == null || cs2 == null) {
76 return false;
77 }
78 if (cs1 instanceof String && cs2 instanceof String) {
79 return cs1.equals(cs2);
80 }
81 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
82 }
83
84 /**
85 * Calls {@link String#getBytes(Charset)}
86 *
87 * @param string
88 * The string to encode (if null, return null).
89 * @param charset
90 * The {@link Charset} to encode the <code>String</code>
91 * @return the encoded bytes
92 */
93 private static byte[] getBytes(final String string, final Charset charset) {
94 if (string == null) {
95 return null;
96 }
97 return string.getBytes(charset);
98 }
99
100 /**
101 * Calls {@link String#getBytes(Charset)}
102 *
103 * @param string
104 * The string to encode (if null, return null).
105 * @param charset
106 * The {@link Charset} to encode the <code>String</code>
107 * @return the encoded bytes
108 */
109 private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
110 if (string == null) {
111 return null;
112 }
113 return ByteBuffer.wrap(string.getBytes(charset));
114 }
115
116 /**
117 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
118 * array.
119 *
120 * @param string
121 * the String to encode, may be <code>null</code>
122 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
123 * @throws NullPointerException
124 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
125 * required by the Java platform specification.
126 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
127 * @see #getBytesUnchecked(String, String)
128 * @since 1.11
129 */
130 public static ByteBuffer getByteBufferUtf8(final String string) {
131 return getByteBuffer(string, Charsets.UTF_8);
132 }
133
134 /**
135 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
136 * byte array.
137 *
138 * @param string
139 * the String to encode, may be <code>null</code>
140 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
141 * @throws NullPointerException
142 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
143 * required by the Java platform specification.
144 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
145 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
146 * @see #getBytesUnchecked(String, String)
147 */
148 public static byte[] getBytesIso8859_1(final String string) {
149 return getBytes(string, Charsets.ISO_8859_1);
150 }
151
152
153 /**
154 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
155 * array.
156 * <p>
157 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
158 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
159 * </p>
160 *
161 * @param string
162 * the String to encode, may be <code>null</code>
163 * @param charsetName
164 * The name of a required {@link java.nio.charset.Charset}
165 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
166 * @throws IllegalStateException
167 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
168 * required charset name.
169 * @see CharEncoding
170 * @see String#getBytes(String)
171 */
172 public static byte[] getBytesUnchecked(final String string, final String charsetName) {
173 if (string == null) {
174 return null;
175 }
176 try {
177 return string.getBytes(charsetName);
178 } catch (final UnsupportedEncodingException e) {
179 throw StringUtils.newIllegalStateException(charsetName, e);
180 }
181 }
182
183 /**
184 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
185 * array.
186 *
187 * @param string
188 * the String to encode, may be <code>null</code>
189 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
190 * @throws NullPointerException
191 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
192 * required by the Java platform specification.
193 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
194 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
195 * @see #getBytesUnchecked(String, String)
196 */
197 public static byte[] getBytesUsAscii(final String string) {
198 return getBytes(string, Charsets.US_ASCII);
199 }
200
201 /**
202 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
203 * array.
204 *
205 * @param string
206 * the String to encode, may be <code>null</code>
207 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
208 * @throws NullPointerException
209 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
210 * required by the Java platform specification.
211 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
212 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
213 * @see #getBytesUnchecked(String, String)
214 */
215 public static byte[] getBytesUtf16(final String string) {
216 return getBytes(string, Charsets.UTF_16);
217 }
218
219 /**
220 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
221 * array.
222 *
223 * @param string
224 * the String to encode, may be <code>null</code>
225 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
226 * @throws NullPointerException
227 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
228 * required by the Java platform specification.
229 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
230 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
231 * @see #getBytesUnchecked(String, String)
232 */
233 public static byte[] getBytesUtf16Be(final String string) {
234 return getBytes(string, Charsets.UTF_16BE);
235 }
236
237 /**
238 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
239 * array.
240 *
241 * @param string
242 * the String to encode, may be <code>null</code>
243 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
244 * @throws NullPointerException
245 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
246 * required by the Java platform specification.
247 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
248 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
249 * @see #getBytesUnchecked(String, String)
250 */
251 public static byte[] getBytesUtf16Le(final String string) {
252 return getBytes(string, Charsets.UTF_16LE);
253 }
254
255 /**
256 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
257 * array.
258 *
259 * @param string
260 * the String to encode, may be <code>null</code>
261 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
262 * @throws NullPointerException
263 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
264 * required by the Java platform specification.
265 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
266 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
267 * @see #getBytesUnchecked(String, String)
268 */
269 public static byte[] getBytesUtf8(final String string) {
270 return getBytes(string, Charsets.UTF_8);
271 }
272
273 private static IllegalStateException newIllegalStateException(final String charsetName,
274 final UnsupportedEncodingException e) {
275 return new IllegalStateException(charsetName + ": " + e);
276 }
277
278 /**
279 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
280 *
281 * @param bytes
282 * The bytes to be decoded into characters
283 * @param charset
284 * The {@link Charset} to encode the <code>String</code>; not {@code null}
285 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
286 * or <code>null</code> if the input byte array was <code>null</code>.
287 * @throws NullPointerException
288 * Thrown if charset is {@code null}
289 */
290 private static String newString(final byte[] bytes, final Charset charset) {
291 return bytes == null ? null : new String(bytes, charset);
292 }
293
294 /**
295 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
296 * <p>
297 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
298 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
299 * </p>
300 *
301 * @param bytes
302 * The bytes to be decoded into characters, may be <code>null</code>
303 * @param charsetName
304 * The name of a required {@link java.nio.charset.Charset}
305 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
306 * or <code>null</code> if the input byte array was <code>null</code>.
307 * @throws IllegalStateException
308 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
309 * required charset name.
310 * @see CharEncoding
311 * @see String#String(byte[], String)
312 */
313 public static String newString(final byte[] bytes, final String charsetName) {
314 if (bytes == null) {
315 return null;
316 }
317 try {
318 return new String(bytes, charsetName);
319 } catch (final UnsupportedEncodingException e) {
320 throw StringUtils.newIllegalStateException(charsetName, e);
321 }
322 }
323
324 /**
325 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
326 *
327 * @param bytes
328 * The bytes to be decoded into characters, may be <code>null</code>
329 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
330 * <code>null</code> if the input byte array was <code>null</code>.
331 * @throws NullPointerException
332 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
333 * required by the Java platform specification.
334 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
335 */
336 public static String newStringIso8859_1(final byte[] bytes) {
337 return newString(bytes, Charsets.ISO_8859_1);
338 }
339
340 /**
341 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
342 *
343 * @param bytes
344 * The bytes to be decoded into characters
345 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
346 * or <code>null</code> if the input byte array was <code>null</code>.
347 * @throws NullPointerException
348 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
349 * required by the Java platform specification.
350 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
351 */
352 public static String newStringUsAscii(final byte[] bytes) {
353 return newString(bytes, Charsets.US_ASCII);
354 }
355
356 /**
357 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
358 *
359 * @param bytes
360 * The bytes to be decoded into characters
361 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
362 * or <code>null</code> if the input byte array was <code>null</code>.
363 * @throws NullPointerException
364 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
365 * required by the Java platform specification.
366 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
367 */
368 public static String newStringUtf16(final byte[] bytes) {
369 return newString(bytes, Charsets.UTF_16);
370 }
371
372 /**
373 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
374 *
375 * @param bytes
376 * The bytes to be decoded into characters
377 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
378 * or <code>null</code> if the input byte array was <code>null</code>.
379 * @throws NullPointerException
380 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
381 * required by the Java platform specification.
382 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
383 */
384 public static String newStringUtf16Be(final byte[] bytes) {
385 return newString(bytes, Charsets.UTF_16BE);
386 }
387
388 /**
389 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
390 *
391 * @param bytes
392 * The bytes to be decoded into characters
393 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
394 * or <code>null</code> if the input byte array was <code>null</code>.
395 * @throws NullPointerException
396 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
397 * required by the Java platform specification.
398 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
399 */
400 public static String newStringUtf16Le(final byte[] bytes) {
401 return newString(bytes, Charsets.UTF_16LE);
402 }
403
404 /**
405 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
406 *
407 * @param bytes
408 * The bytes to be decoded into characters
409 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
410 * or <code>null</code> if the input byte array was <code>null</code>.
411 * @throws NullPointerException
412 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
413 * required by the Java platform specification.
414 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
415 */
416 public static String newStringUtf8(final byte[] bytes) {
417 return newString(bytes, Charsets.UTF_8);
418 }
419
420 }