1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.binary;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22
23 import org.apache.commons.codec.CharEncoding;
24 import org.apache.commons.codec.Charsets;
25
26 /**
27 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are
28 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">
29 * Standard charsets</a>.
30 *
31 * <p>This class is immutable and thread-safe.</p>
32 *
33 * @see CharEncoding
34 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
35 * @version $Id: StringUtils.html 928559 2014-11-10 02:53:54Z ggregory $
36 * @since 1.4
37 */
38 public class StringUtils {
39
40 /**
41 * <p>
42 * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters.
43 * </p>
44 *
45 * <p>
46 * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal.
47 * The comparison is case sensitive.
48 * </p>
49 *
50 * <pre>
51 * StringUtils.equals(null, null) = true
52 * StringUtils.equals(null, "abc") = false
53 * StringUtils.equals("abc", null) = false
54 * StringUtils.equals("abc", "abc") = true
55 * StringUtils.equals("abc", "ABC") = false
56 * </pre>
57 *
58 * <p>
59 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release).
60 * </p>
61 *
62 * @see Object#equals(Object)
63 * @param cs1
64 * the first CharSequence, may be <code>null</code>
65 * @param cs2
66 * the second CharSequence, may be <code>null</code>
67 * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code>
68 * @since 1.10
69 */
70 public static boolean equals(final CharSequence cs1, final CharSequence cs2) {
71 if (cs1 == cs2) {
72 return true;
73 }
74 if (cs1 == null || cs2 == null) {
75 return false;
76 }
77 if (cs1 instanceof String && cs2 instanceof String) {
78 return cs1.equals(cs2);
79 }
80 return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length()));
81 }
82
83 /**
84 * Calls {@link String#getBytes(Charset)}
85 *
86 * @param string
87 * The string to encode (if null, return null).
88 * @param charset
89 * The {@link Charset} to encode the <code>String</code>
90 * @return the encoded bytes
91 */
92 private static byte[] getBytes(final String string, final Charset charset) {
93 if (string == null) {
94 return null;
95 }
96 return string.getBytes(charset);
97 }
98
99 /**
100 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
101 * byte array.
102 *
103 * @param string
104 * the String to encode, may be <code>null</code>
105 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
106 * @throws NullPointerException
107 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
108 * required by the Java platform specification.
109 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
110 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
111 * @see #getBytesUnchecked(String, String)
112 */
113 public static byte[] getBytesIso8859_1(final String string) {
114 return getBytes(string, Charsets.ISO_8859_1);
115 }
116
117
118 /**
119 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
120 * array.
121 * <p>
122 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
123 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
124 * </p>
125 *
126 * @param string
127 * the String to encode, may be <code>null</code>
128 * @param charsetName
129 * The name of a required {@link java.nio.charset.Charset}
130 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
131 * @throws IllegalStateException
132 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
133 * required charset name.
134 * @see CharEncoding
135 * @see String#getBytes(String)
136 */
137 public static byte[] getBytesUnchecked(final String string, final String charsetName) {
138 if (string == null) {
139 return null;
140 }
141 try {
142 return string.getBytes(charsetName);
143 } catch (final UnsupportedEncodingException e) {
144 throw StringUtils.newIllegalStateException(charsetName, e);
145 }
146 }
147
148 /**
149 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
150 * array.
151 *
152 * @param string
153 * the String to encode, may be <code>null</code>
154 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
155 * @throws NullPointerException
156 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
157 * required by the Java platform specification.
158 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
159 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
160 * @see #getBytesUnchecked(String, String)
161 */
162 public static byte[] getBytesUsAscii(final String string) {
163 return getBytes(string, Charsets.US_ASCII);
164 }
165
166 /**
167 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
168 * array.
169 *
170 * @param string
171 * the String to encode, may be <code>null</code>
172 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
173 * @throws NullPointerException
174 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
175 * required by the Java platform specification.
176 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
177 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
178 * @see #getBytesUnchecked(String, String)
179 */
180 public static byte[] getBytesUtf16(final String string) {
181 return getBytes(string, Charsets.UTF_16);
182 }
183
184 /**
185 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
186 * array.
187 *
188 * @param string
189 * the String to encode, may be <code>null</code>
190 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
191 * @throws NullPointerException
192 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
193 * required by the Java platform specification.
194 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
195 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
196 * @see #getBytesUnchecked(String, String)
197 */
198 public static byte[] getBytesUtf16Be(final String string) {
199 return getBytes(string, Charsets.UTF_16BE);
200 }
201
202 /**
203 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
204 * array.
205 *
206 * @param string
207 * the String to encode, may be <code>null</code>
208 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
209 * @throws NullPointerException
210 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
211 * required by the Java platform specification.
212 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
213 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
214 * @see #getBytesUnchecked(String, String)
215 */
216 public static byte[] getBytesUtf16Le(final String string) {
217 return getBytes(string, Charsets.UTF_16LE);
218 }
219
220 /**
221 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
222 * array.
223 *
224 * @param string
225 * the String to encode, may be <code>null</code>
226 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code>
227 * @throws NullPointerException
228 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
229 * required by the Java platform specification.
230 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
231 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
232 * @see #getBytesUnchecked(String, String)
233 */
234 public static byte[] getBytesUtf8(final String string) {
235 return getBytes(string, Charsets.UTF_8);
236 }
237
238 private static IllegalStateException newIllegalStateException(final String charsetName,
239 final UnsupportedEncodingException e) {
240 return new IllegalStateException(charsetName + ": " + e);
241 }
242
243 /**
244 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
245 *
246 * @param bytes
247 * The bytes to be decoded into characters
248 * @param charset
249 * The {@link Charset} to encode the <code>String</code>
250 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
251 * or <code>null</code> if the input byte array was <code>null</code>.
252 * @throws NullPointerException
253 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
254 * required by the Java platform specification.
255 */
256 private static String newString(final byte[] bytes, final Charset charset) {
257 return bytes == null ? null : new String(bytes, charset);
258 }
259
260 /**
261 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
262 * <p>
263 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
264 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
265 * </p>
266 *
267 * @param bytes
268 * The bytes to be decoded into characters, may be <code>null</code>
269 * @param charsetName
270 * The name of a required {@link java.nio.charset.Charset}
271 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset,
272 * or <code>null</code> if the input byte array was <code>null</code>.
273 * @throws IllegalStateException
274 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
275 * required charset name.
276 * @see CharEncoding
277 * @see String#String(byte[], String)
278 */
279 public static String newString(final byte[] bytes, final String charsetName) {
280 if (bytes == null) {
281 return null;
282 }
283 try {
284 return new String(bytes, charsetName);
285 } catch (final UnsupportedEncodingException e) {
286 throw StringUtils.newIllegalStateException(charsetName, e);
287 }
288 }
289
290 /**
291 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
292 *
293 * @param bytes
294 * The bytes to be decoded into characters, may be <code>null</code>
295 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or
296 * <code>null</code> if the input byte array was <code>null</code>.
297 * @throws NullPointerException
298 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is
299 * required by the Java platform specification.
300 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
301 */
302 public static String newStringIso8859_1(final byte[] bytes) {
303 return new String(bytes, Charsets.ISO_8859_1);
304 }
305
306 /**
307 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
308 *
309 * @param bytes
310 * The bytes to be decoded into characters
311 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset,
312 * or <code>null</code> if the input byte array was <code>null</code>.
313 * @throws NullPointerException
314 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is
315 * required by the Java platform specification.
316 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
317 */
318 public static String newStringUsAscii(final byte[] bytes) {
319 return new String(bytes, Charsets.US_ASCII);
320 }
321
322 /**
323 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
324 *
325 * @param bytes
326 * The bytes to be decoded into characters
327 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset
328 * or <code>null</code> if the input byte array was <code>null</code>.
329 * @throws NullPointerException
330 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is
331 * required by the Java platform specification.
332 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
333 */
334 public static String newStringUtf16(final byte[] bytes) {
335 return new String(bytes, Charsets.UTF_16);
336 }
337
338 /**
339 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
340 *
341 * @param bytes
342 * The bytes to be decoded into characters
343 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset,
344 * or <code>null</code> if the input byte array was <code>null</code>.
345 * @throws NullPointerException
346 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is
347 * required by the Java platform specification.
348 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
349 */
350 public static String newStringUtf16Be(final byte[] bytes) {
351 return new String(bytes, Charsets.UTF_16BE);
352 }
353
354 /**
355 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
356 *
357 * @param bytes
358 * The bytes to be decoded into characters
359 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset,
360 * or <code>null</code> if the input byte array was <code>null</code>.
361 * @throws NullPointerException
362 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is
363 * required by the Java platform specification.
364 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
365 */
366 public static String newStringUtf16Le(final byte[] bytes) {
367 return new String(bytes, Charsets.UTF_16LE);
368 }
369
370 /**
371 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
372 *
373 * @param bytes
374 * The bytes to be decoded into characters
375 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset,
376 * or <code>null</code> if the input byte array was <code>null</code>.
377 * @throws NullPointerException
378 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is
379 * required by the Java platform specification.
380 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException
381 */
382 public static String newStringUtf8(final byte[] bytes) {
383 return newString(bytes, Charsets.UTF_8);
384 }
385
386 }