1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.ByteBuffer;
22 import java.nio.charset.Charset;
23 import java.nio.charset.StandardCharsets;
24
25 import org.apache.commons.codec.CharEncoding;
26
27 /**
28 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
29 * specified in standard {@link Charset}.
30 *
31 * <p>
32 * This class is immutable and thread-safe.
33 * </p>
34 *
35 * @see CharEncoding
36 * @see Charset
37 * @see StandardCharsets
38 * @since 1.4
39 */
40 public class StringUtils {
41
42 /**
43 * <p>
44 * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters.
45 * </p>
46 *
47 * <p>
48 * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal.
49 * The comparison is case sensitive.
50 * </p>
51 *
52 * <pre>
53 * StringUtils.equals(null, null) = true
54 * StringUtils.equals(null, "abc") = false
55 * StringUtils.equals("abc", null) = false
56 * StringUtils.equals("abc", "abc") = true
57 * StringUtils.equals("abc", "ABC") = false
58 * </pre>
59 *
60 * <p>
61 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
62 * </p>
63 *
64 * @see Object#equals(Object)
65 * @param cs1
66 * the first CharSequence, may be {@code null}
67 * @param cs2
68 * the second CharSequence, may be {@code null}
69 * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null}
70 * @since 1.10
71 */
72 public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
73 if (cs1 == cs2) {
74 return true;
75 }
76 if (cs1 == null || cs2 == null) {
77 return false;
78 }
79 if (cs1 instanceof String && cs2 instanceof String) {
80 return cs1.equals(cs2);
81 }
82 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length());
83 }
84
85 /**
86 * Calls {@link String#getBytes(Charset)}
87 *
88 * @param string
89 * The string to encode (if null, return null).
90 * @param charset
91 * The {@link Charset} to encode the {@code String}
92 * @return the encoded bytes
93 */
94 private static ByteBuffer getByteBuffer(final String string, final Charset charset) {
95 if (string == null) {
96 return null;
97 }
98 return ByteBuffer.wrap(string.getBytes(charset));
99 }
100
101 /**
102 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte
103 * array.
104 *
105 * @param string
106 * the String to encode, may be {@code null}
107 * @return encoded bytes, or {@code null} if the input string was {@code null}
108 * @throws NullPointerException
109 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
110 * required by the Java platform specification.
111 * @see Charset
112 * @see #getBytesUnchecked(String, String)
113 * @since 1.11
114 */
115 public static ByteBuffer getByteBufferUtf8(final String string) {
116 return getByteBuffer(string, StandardCharsets.UTF_8);
117 }
118
119 /**
120 * Calls {@link String#getBytes(Charset)}
121 *
122 * @param string
123 * The string to encode (if null, return null).
124 * @param charset
125 * The {@link Charset} to encode the {@code String}
126 * @return the encoded bytes
127 */
128 private static byte[] getBytes(final String string, final Charset charset) {
129 return string == null ? null : string.getBytes(charset);
130 }
131
132 /**
133 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
134 * byte array.
135 *
136 * @param string
137 * the String to encode, may be {@code null}
138 * @return encoded bytes, or {@code null} if the input string was {@code null}
139 * @throws NullPointerException
140 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
141 * since it is required by the Java platform specification.
142 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
143 * @see Charset
144 * @see #getBytesUnchecked(String, String)
145 */
146 public static byte[] getBytesIso8859_1(final String string) {
147 return getBytes(string, StandardCharsets.ISO_8859_1);
148 }
149
150 /**
151 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
152 * array.
153 * <p>
154 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
155 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
156 * </p>
157 *
158 * @param string
159 * the String to encode, may be {@code null}
160 * @param charsetName
161 * The name of a required {@link java.nio.charset.Charset}
162 * @return encoded bytes, or {@code null} if the input string was {@code null}
163 * @throws IllegalStateException
164 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
165 * required charset name.
166 * @see CharEncoding
167 * @see String#getBytes(String)
168 */
169 public static byte[] getBytesUnchecked(final String string, final String charsetName) {
170 if (string == null) {
171 return null;
172 }
173 try {
174 return string.getBytes(charsetName);
175 } catch (final UnsupportedEncodingException e) {
176 throw newIllegalStateException(charsetName, e);
177 }
178 }
179
180 /**
181 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
182 * array.
183 *
184 * @param string
185 * the String to encode, may be {@code null}
186 * @return encoded bytes, or {@code null} if the input string was {@code null}
187 * @throws NullPointerException
188 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
189 * required by the Java platform specification.
190 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
191 * @see Charset
192 * @see #getBytesUnchecked(String, String)
193 */
194 public static byte[] getBytesUsAscii(final String string) {
195 return getBytes(string, StandardCharsets.US_ASCII);
196 }
197
198 /**
199 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
200 * array.
201 *
202 * @param string
203 * the String to encode, may be {@code null}
204 * @return encoded bytes, or {@code null} if the input string was {@code null}
205 * @throws NullPointerException
206 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
207 * required by the Java platform specification.
208 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
209 * @see Charset
210 * @see #getBytesUnchecked(String, String)
211 */
212 public static byte[] getBytesUtf16(final String string) {
213 return getBytes(string, StandardCharsets.UTF_16);
214 }
215
216 /**
217 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
218 * array.
219 *
220 * @param string
221 * the String to encode, may be {@code null}
222 * @return encoded bytes, or {@code null} if the input string was {@code null}
223 * @throws NullPointerException
224 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
225 * required by the Java platform specification.
226 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
227 * @see Charset
228 * @see #getBytesUnchecked(String, String)
229 */
230 public static byte[] getBytesUtf16Be(final String string) {
231 return getBytes(string, StandardCharsets.UTF_16BE);
232 }
233
234 /**
235 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
236 * array.
237 *
238 * @param string
239 * the String to encode, may be {@code null}
240 * @return encoded bytes, or {@code null} if the input string was {@code null}
241 * @throws NullPointerException
242 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
243 * required by the Java platform specification.
244 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
245 * @see Charset
246 * @see #getBytesUnchecked(String, String)
247 */
248 public static byte[] getBytesUtf16Le(final String string) {
249 return getBytes(string, StandardCharsets.UTF_16LE);
250 }
251
252 /**
253 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
254 * array.
255 *
256 * @param string
257 * the String to encode, may be {@code null}
258 * @return encoded bytes, or {@code null} if the input string was {@code null}
259 * @throws NullPointerException
260 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
261 * required by the Java platform specification.
262 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
263 * @see Charset
264 * @see #getBytesUnchecked(String, String)
265 */
266 public static byte[] getBytesUtf8(final String string) {
267 return getBytes(string, StandardCharsets.UTF_8);
268 }
269
270 private static IllegalStateException newIllegalStateException(final String charsetName, final UnsupportedEncodingException e) {
271 return new IllegalStateException(charsetName + ": " + e);
272 }
273
274 /**
275 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
276 *
277 * @param bytes
278 * The bytes to be decoded into characters
279 * @param charset
280 * The {@link Charset} to encode the {@code String}; not {@code null}
281 * @return A new {@code String} decoded from the specified array of bytes using the given charset,
282 * or {@code null} if the input byte array was {@code null}.
283 * @throws NullPointerException
284 * Thrown if charset is {@code null}
285 */
286 private static String newString(final byte[] bytes, final Charset charset) {
287 return bytes == null ? null : new String(bytes, charset);
288 }
289
290 /**
291 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset.
292 * <p>
293 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
294 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
295 * </p>
296 *
297 * @param bytes
298 * The bytes to be decoded into characters, may be {@code null}
299 * @param charsetName
300 * The name of a required {@link java.nio.charset.Charset}
301 * @return A new {@code String} decoded from the specified array of bytes using the given charset,
302 * or {@code null} if the input byte array was {@code null}.
303 * @throws IllegalStateException
304 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
305 * required charset name.
306 * @see CharEncoding
307 * @see String#String(byte[], String)
308 */
309 public static String newString(final byte[] bytes, final String charsetName) {
310 if (bytes == null) {
311 return null;
312 }
313 try {
314 return new String(bytes, charsetName);
315 } catch (final UnsupportedEncodingException e) {
316 throw newIllegalStateException(charsetName, e);
317 }
318 }
319
320 /**
321 * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset.
322 *
323 * @param bytes
324 * The bytes to be decoded into characters, may be {@code null}
325 * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or
326 * {@code null} if the input byte array was {@code null}.
327 * @throws NullPointerException
328 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen
329 * since it is required by the Java platform specification.
330 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
331 */
332 public static String newStringIso8859_1(final byte[] bytes) {
333 return newString(bytes, StandardCharsets.ISO_8859_1);
334 }
335
336 /**
337 * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset.
338 *
339 * @param bytes
340 * The bytes to be decoded into characters
341 * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset,
342 * or {@code null} if the input byte array was {@code null}.
343 * @throws NullPointerException
344 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is
345 * required by the Java platform specification.
346 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
347 */
348 public static String newStringUsAscii(final byte[] bytes) {
349 return newString(bytes, StandardCharsets.US_ASCII);
350 }
351
352 /**
353 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset.
354 *
355 * @param bytes
356 * The bytes to be decoded into characters
357 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset
358 * or {@code null} if the input byte array was {@code null}.
359 * @throws NullPointerException
360 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is
361 * required by the Java platform specification.
362 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
363 */
364 public static String newStringUtf16(final byte[] bytes) {
365 return newString(bytes, StandardCharsets.UTF_16);
366 }
367
368 /**
369 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset.
370 *
371 * @param bytes
372 * The bytes to be decoded into characters
373 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset,
374 * or {@code null} if the input byte array was {@code null}.
375 * @throws NullPointerException
376 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is
377 * required by the Java platform specification.
378 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
379 */
380 public static String newStringUtf16Be(final byte[] bytes) {
381 return newString(bytes, StandardCharsets.UTF_16BE);
382 }
383
384 /**
385 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset.
386 *
387 * @param bytes
388 * The bytes to be decoded into characters
389 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset,
390 * or {@code null} if the input byte array was {@code null}.
391 * @throws NullPointerException
392 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is
393 * required by the Java platform specification.
394 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
395 */
396 public static String newStringUtf16Le(final byte[] bytes) {
397 return newString(bytes, StandardCharsets.UTF_16LE);
398 }
399
400 /**
401 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset.
402 *
403 * @param bytes
404 * The bytes to be decoded into characters
405 * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset,
406 * or {@code null} if the input byte array was {@code null}.
407 * @throws NullPointerException
408 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is
409 * required by the Java platform specification.
410 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
411 */
412 public static String newStringUtf8(final byte[] bytes) {
413 return newString(bytes, StandardCharsets.UTF_8);
414 }
415
416 /**
417 * TODO Make private in 2.0.
418 *
419 * @deprecated TODO Make private in 2.0.
420 */
421 @Deprecated
422 public StringUtils() {
423 // empty
424 }
425 }