001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.ByteBuffer; 022import java.nio.charset.Charset; 023import java.nio.charset.StandardCharsets; 024 025import org.apache.commons.codec.CharEncoding; 026 027/** 028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 029 * specified in standard {@link Charset}. 030 * 031 * <p>This class is immutable and thread-safe.</p> 032 * 033 * @see CharEncoding 034 * @see Charset 035 * @see StandardCharsets 036 * @since 1.4 037 */ 038public class StringUtils { 039 040 /** 041 * <p> 042 * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters. 043 * </p> 044 * 045 * <p> 046 * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal. 047 * The comparison is case sensitive. 048 * </p> 049 * 050 * <pre> 051 * StringUtils.equals(null, null) = true 052 * StringUtils.equals(null, "abc") = false 053 * StringUtils.equals("abc", null) = false 054 * StringUtils.equals("abc", "abc") = true 055 * StringUtils.equals("abc", "ABC") = false 056 * </pre> 057 * 058 * <p> 059 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). 060 * </p> 061 * 062 * @see Object#equals(Object) 063 * @param cs1 064 * the first CharSequence, may be {@code null} 065 * @param cs2 066 * the second CharSequence, may be {@code null} 067 * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null} 068 * @since 1.10 069 */ 070 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { 071 if (cs1 == cs2) { 072 return true; 073 } 074 if (cs1 == null || cs2 == null) { 075 return false; 076 } 077 if (cs1 instanceof String && cs2 instanceof String) { 078 return cs1.equals(cs2); 079 } 080 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); 081 } 082 083 /** 084 * Calls {@link String#getBytes(Charset)} 085 * 086 * @param string 087 * The string to encode (if null, return null). 088 * @param charset 089 * The {@link Charset} to encode the {@code String} 090 * @return the encoded bytes 091 */ 092 private static ByteBuffer getByteBuffer(final String string, final Charset charset) { 093 if (string == null) { 094 return null; 095 } 096 return ByteBuffer.wrap(string.getBytes(charset)); 097 } 098 099 /** 100 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte 101 * array. 102 * 103 * @param string 104 * the String to encode, may be {@code null} 105 * @return encoded bytes, or {@code null} if the input string was {@code null} 106 * @throws NullPointerException 107 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 108 * required by the Java platform specification. 109 * @see Charset 110 * @see #getBytesUnchecked(String, String) 111 * @since 1.11 112 */ 113 public static ByteBuffer getByteBufferUtf8(final String string) { 114 return getByteBuffer(string, StandardCharsets.UTF_8); 115 } 116 117 /** 118 * Calls {@link String#getBytes(Charset)} 119 * 120 * @param string 121 * The string to encode (if null, return null). 122 * @param charset 123 * The {@link Charset} to encode the {@code String} 124 * @return the encoded bytes 125 */ 126 private static byte[] getBytes(final String string, final Charset charset) { 127 return string == null ? null : string.getBytes(charset); 128 } 129 130 /** 131 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 132 * byte array. 133 * 134 * @param string 135 * the String to encode, may be {@code null} 136 * @return encoded bytes, or {@code null} if the input string was {@code null} 137 * @throws NullPointerException 138 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen 139 * since it is required by the Java platform specification. 140 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 141 * @see Charset 142 * @see #getBytesUnchecked(String, String) 143 */ 144 public static byte[] getBytesIso8859_1(final String string) { 145 return getBytes(string, StandardCharsets.ISO_8859_1); 146 } 147 148 /** 149 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 150 * array. 151 * <p> 152 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 153 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 154 * </p> 155 * 156 * @param string 157 * the String to encode, may be {@code null} 158 * @param charsetName 159 * The name of a required {@link java.nio.charset.Charset} 160 * @return encoded bytes, or {@code null} if the input string was {@code null} 161 * @throws IllegalStateException 162 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 163 * required charset name. 164 * @see CharEncoding 165 * @see String#getBytes(String) 166 */ 167 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 168 if (string == null) { 169 return null; 170 } 171 try { 172 return string.getBytes(charsetName); 173 } catch (final UnsupportedEncodingException e) { 174 throw StringUtils.newIllegalStateException(charsetName, e); 175 } 176 } 177 178 /** 179 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 180 * array. 181 * 182 * @param string 183 * the String to encode, may be {@code null} 184 * @return encoded bytes, or {@code null} if the input string was {@code null} 185 * @throws NullPointerException 186 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is 187 * required by the Java platform specification. 188 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 189 * @see Charset 190 * @see #getBytesUnchecked(String, String) 191 */ 192 public static byte[] getBytesUsAscii(final String string) { 193 return getBytes(string, StandardCharsets.US_ASCII); 194 } 195 196 /** 197 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 198 * array. 199 * 200 * @param string 201 * the String to encode, may be {@code null} 202 * @return encoded bytes, or {@code null} if the input string was {@code null} 203 * @throws NullPointerException 204 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is 205 * required by the Java platform specification. 206 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 207 * @see Charset 208 * @see #getBytesUnchecked(String, String) 209 */ 210 public static byte[] getBytesUtf16(final String string) { 211 return getBytes(string, StandardCharsets.UTF_16); 212 } 213 214 /** 215 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 216 * array. 217 * 218 * @param string 219 * the String to encode, may be {@code null} 220 * @return encoded bytes, or {@code null} if the input string was {@code null} 221 * @throws NullPointerException 222 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is 223 * required by the Java platform specification. 224 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 225 * @see Charset 226 * @see #getBytesUnchecked(String, String) 227 */ 228 public static byte[] getBytesUtf16Be(final String string) { 229 return getBytes(string, StandardCharsets.UTF_16BE); 230 } 231 232 /** 233 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 234 * array. 235 * 236 * @param string 237 * the String to encode, may be {@code null} 238 * @return encoded bytes, or {@code null} if the input string was {@code null} 239 * @throws NullPointerException 240 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is 241 * required by the Java platform specification. 242 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 243 * @see Charset 244 * @see #getBytesUnchecked(String, String) 245 */ 246 public static byte[] getBytesUtf16Le(final String string) { 247 return getBytes(string, StandardCharsets.UTF_16LE); 248 } 249 250 /** 251 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 252 * array. 253 * 254 * @param string 255 * the String to encode, may be {@code null} 256 * @return encoded bytes, or {@code null} if the input string was {@code null} 257 * @throws NullPointerException 258 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 259 * required by the Java platform specification. 260 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 261 * @see Charset 262 * @see #getBytesUnchecked(String, String) 263 */ 264 public static byte[] getBytesUtf8(final String string) { 265 return getBytes(string, StandardCharsets.UTF_8); 266 } 267 268 private static IllegalStateException newIllegalStateException(final String charsetName, 269 final UnsupportedEncodingException e) { 270 return new IllegalStateException(charsetName + ": " + e); 271 } 272 273 /** 274 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset. 275 * 276 * @param bytes 277 * The bytes to be decoded into characters 278 * @param charset 279 * The {@link Charset} to encode the {@code String}; not {@code null} 280 * @return A new {@code String} decoded from the specified array of bytes using the given charset, 281 * or {@code null} if the input byte array was {@code null}. 282 * @throws NullPointerException 283 * Thrown if charset is {@code null} 284 */ 285 private static String newString(final byte[] bytes, final Charset charset) { 286 return bytes == null ? null : new String(bytes, charset); 287 } 288 289 /** 290 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset. 291 * <p> 292 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 293 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 294 * </p> 295 * 296 * @param bytes 297 * The bytes to be decoded into characters, may be {@code null} 298 * @param charsetName 299 * The name of a required {@link java.nio.charset.Charset} 300 * @return A new {@code String} decoded from the specified array of bytes using the given charset, 301 * or {@code null} if the input byte array was {@code null}. 302 * @throws IllegalStateException 303 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 304 * required charset name. 305 * @see CharEncoding 306 * @see String#String(byte[], String) 307 */ 308 public static String newString(final byte[] bytes, final String charsetName) { 309 if (bytes == null) { 310 return null; 311 } 312 try { 313 return new String(bytes, charsetName); 314 } catch (final UnsupportedEncodingException e) { 315 throw StringUtils.newIllegalStateException(charsetName, e); 316 } 317 } 318 319 /** 320 * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset. 321 * 322 * @param bytes 323 * The bytes to be decoded into characters, may be {@code null} 324 * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or 325 * {@code null} if the input byte array was {@code null}. 326 * @throws NullPointerException 327 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen 328 * since it is required by the Java platform specification. 329 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 330 */ 331 public static String newStringIso8859_1(final byte[] bytes) { 332 return newString(bytes, StandardCharsets.ISO_8859_1); 333 } 334 335 /** 336 * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset. 337 * 338 * @param bytes 339 * The bytes to be decoded into characters 340 * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset, 341 * or {@code null} if the input byte array was {@code null}. 342 * @throws NullPointerException 343 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is 344 * required by the Java platform specification. 345 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 346 */ 347 public static String newStringUsAscii(final byte[] bytes) { 348 return newString(bytes, StandardCharsets.US_ASCII); 349 } 350 351 /** 352 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset. 353 * 354 * @param bytes 355 * The bytes to be decoded into characters 356 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset 357 * or {@code null} if the input byte array was {@code null}. 358 * @throws NullPointerException 359 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is 360 * required by the Java platform specification. 361 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 362 */ 363 public static String newStringUtf16(final byte[] bytes) { 364 return newString(bytes, StandardCharsets.UTF_16); 365 } 366 367 /** 368 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset. 369 * 370 * @param bytes 371 * The bytes to be decoded into characters 372 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset, 373 * or {@code null} if the input byte array was {@code null}. 374 * @throws NullPointerException 375 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is 376 * required by the Java platform specification. 377 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 378 */ 379 public static String newStringUtf16Be(final byte[] bytes) { 380 return newString(bytes, StandardCharsets.UTF_16BE); 381 } 382 383 /** 384 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset. 385 * 386 * @param bytes 387 * The bytes to be decoded into characters 388 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset, 389 * or {@code null} if the input byte array was {@code null}. 390 * @throws NullPointerException 391 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is 392 * required by the Java platform specification. 393 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 394 */ 395 public static String newStringUtf16Le(final byte[] bytes) { 396 return newString(bytes, StandardCharsets.UTF_16LE); 397 } 398 399 /** 400 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset. 401 * 402 * @param bytes 403 * The bytes to be decoded into characters 404 * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset, 405 * or {@code null} if the input byte array was {@code null}. 406 * @throws NullPointerException 407 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 408 * required by the Java platform specification. 409 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 410 */ 411 public static String newStringUtf8(final byte[] bytes) { 412 return newString(bytes, StandardCharsets.UTF_8); 413 } 414 415 /** 416 * TODO Make private in 2.0. 417 * 418 * @deprecated TODO Make private in 2.0. 419 */ 420 @Deprecated 421 public StringUtils() { 422 // empty 423 } 424}