001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.ByteBuffer; 022import java.nio.charset.Charset; 023import java.nio.charset.StandardCharsets; 024 025import org.apache.commons.codec.CharEncoding; 026 027/** 028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 029 * specified in standard {@link Charset}. 030 * 031 * <p> 032 * This class is immutable and thread-safe. 033 * </p> 034 * 035 * @see CharEncoding 036 * @see Charset 037 * @see StandardCharsets 038 * @since 1.4 039 */ 040public class StringUtils { 041 042 /** 043 * <p> 044 * Compares two CharSequences, returning {@code true} if they represent equal sequences of characters. 045 * </p> 046 * 047 * <p> 048 * {@code null}s are handled without exceptions. Two {@code null} references are considered to be equal. 049 * The comparison is case sensitive. 050 * </p> 051 * 052 * <pre> 053 * StringUtils.equals(null, null) = true 054 * StringUtils.equals(null, "abc") = false 055 * StringUtils.equals("abc", null) = false 056 * StringUtils.equals("abc", "abc") = true 057 * StringUtils.equals("abc", "ABC") = false 058 * </pre> 059 * 060 * <p> 061 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). 062 * </p> 063 * 064 * @see Object#equals(Object) 065 * @param cs1 066 * the first CharSequence, may be {@code null} 067 * @param cs2 068 * the second CharSequence, may be {@code null} 069 * @return {@code true} if the CharSequences are equal (case-sensitive), or both {@code null} 070 * @since 1.10 071 */ 072 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { 073 if (cs1 == cs2) { 074 return true; 075 } 076 if (cs1 == null || cs2 == null) { 077 return false; 078 } 079 if (cs1 instanceof String && cs2 instanceof String) { 080 return cs1.equals(cs2); 081 } 082 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); 083 } 084 085 /** 086 * Calls {@link String#getBytes(Charset)} 087 * 088 * @param string 089 * The string to encode (if null, return null). 090 * @param charset 091 * The {@link Charset} to encode the {@code String} 092 * @return the encoded bytes 093 */ 094 private static ByteBuffer getByteBuffer(final String string, final Charset charset) { 095 if (string == null) { 096 return null; 097 } 098 return ByteBuffer.wrap(string.getBytes(charset)); 099 } 100 101 /** 102 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte 103 * array. 104 * 105 * @param string 106 * the String to encode, may be {@code null} 107 * @return encoded bytes, or {@code null} if the input string was {@code null} 108 * @throws NullPointerException 109 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 110 * required by the Java platform specification. 111 * @see Charset 112 * @see #getBytesUnchecked(String, String) 113 * @since 1.11 114 */ 115 public static ByteBuffer getByteBufferUtf8(final String string) { 116 return getByteBuffer(string, StandardCharsets.UTF_8); 117 } 118 119 /** 120 * Calls {@link String#getBytes(Charset)} 121 * 122 * @param string 123 * The string to encode (if null, return null). 124 * @param charset 125 * The {@link Charset} to encode the {@code String} 126 * @return the encoded bytes 127 */ 128 private static byte[] getBytes(final String string, final Charset charset) { 129 return string == null ? null : string.getBytes(charset); 130 } 131 132 /** 133 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 134 * byte array. 135 * 136 * @param string 137 * the String to encode, may be {@code null} 138 * @return encoded bytes, or {@code null} if the input string was {@code null} 139 * @throws NullPointerException 140 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen 141 * since it is required by the Java platform specification. 142 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 143 * @see Charset 144 * @see #getBytesUnchecked(String, String) 145 */ 146 public static byte[] getBytesIso8859_1(final String string) { 147 return getBytes(string, StandardCharsets.ISO_8859_1); 148 } 149 150 /** 151 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 152 * array. 153 * <p> 154 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 155 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 156 * </p> 157 * 158 * @param string 159 * the String to encode, may be {@code null} 160 * @param charsetName 161 * The name of a required {@link java.nio.charset.Charset} 162 * @return encoded bytes, or {@code null} if the input string was {@code null} 163 * @throws IllegalStateException 164 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 165 * required charset name. 166 * @see CharEncoding 167 * @see String#getBytes(String) 168 */ 169 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 170 if (string == null) { 171 return null; 172 } 173 try { 174 return string.getBytes(charsetName); 175 } catch (final UnsupportedEncodingException e) { 176 throw newIllegalStateException(charsetName, e); 177 } 178 } 179 180 /** 181 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 182 * array. 183 * 184 * @param string 185 * the String to encode, may be {@code null} 186 * @return encoded bytes, or {@code null} if the input string was {@code null} 187 * @throws NullPointerException 188 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is 189 * required by the Java platform specification. 190 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 191 * @see Charset 192 * @see #getBytesUnchecked(String, String) 193 */ 194 public static byte[] getBytesUsAscii(final String string) { 195 return getBytes(string, StandardCharsets.US_ASCII); 196 } 197 198 /** 199 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 200 * array. 201 * 202 * @param string 203 * the String to encode, may be {@code null} 204 * @return encoded bytes, or {@code null} if the input string was {@code null} 205 * @throws NullPointerException 206 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is 207 * required by the Java platform specification. 208 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 209 * @see Charset 210 * @see #getBytesUnchecked(String, String) 211 */ 212 public static byte[] getBytesUtf16(final String string) { 213 return getBytes(string, StandardCharsets.UTF_16); 214 } 215 216 /** 217 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 218 * array. 219 * 220 * @param string 221 * the String to encode, may be {@code null} 222 * @return encoded bytes, or {@code null} if the input string was {@code null} 223 * @throws NullPointerException 224 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is 225 * required by the Java platform specification. 226 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 227 * @see Charset 228 * @see #getBytesUnchecked(String, String) 229 */ 230 public static byte[] getBytesUtf16Be(final String string) { 231 return getBytes(string, StandardCharsets.UTF_16BE); 232 } 233 234 /** 235 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 236 * array. 237 * 238 * @param string 239 * the String to encode, may be {@code null} 240 * @return encoded bytes, or {@code null} if the input string was {@code null} 241 * @throws NullPointerException 242 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is 243 * required by the Java platform specification. 244 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 245 * @see Charset 246 * @see #getBytesUnchecked(String, String) 247 */ 248 public static byte[] getBytesUtf16Le(final String string) { 249 return getBytes(string, StandardCharsets.UTF_16LE); 250 } 251 252 /** 253 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 254 * array. 255 * 256 * @param string 257 * the String to encode, may be {@code null} 258 * @return encoded bytes, or {@code null} if the input string was {@code null} 259 * @throws NullPointerException 260 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 261 * required by the Java platform specification. 262 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 263 * @see Charset 264 * @see #getBytesUnchecked(String, String) 265 */ 266 public static byte[] getBytesUtf8(final String string) { 267 return getBytes(string, StandardCharsets.UTF_8); 268 } 269 270 private static IllegalStateException newIllegalStateException(final String charsetName, final UnsupportedEncodingException e) { 271 return new IllegalStateException(charsetName + ": " + e); 272 } 273 274 /** 275 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset. 276 * 277 * @param bytes 278 * The bytes to be decoded into characters 279 * @param charset 280 * The {@link Charset} to encode the {@code String}; not {@code null} 281 * @return A new {@code String} decoded from the specified array of bytes using the given charset, 282 * or {@code null} if the input byte array was {@code null}. 283 * @throws NullPointerException 284 * Thrown if charset is {@code null} 285 */ 286 private static String newString(final byte[] bytes, final Charset charset) { 287 return bytes == null ? null : new String(bytes, charset); 288 } 289 290 /** 291 * Constructs a new {@code String} by decoding the specified array of bytes using the given charset. 292 * <p> 293 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 294 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 295 * </p> 296 * 297 * @param bytes 298 * The bytes to be decoded into characters, may be {@code null} 299 * @param charsetName 300 * The name of a required {@link java.nio.charset.Charset} 301 * @return A new {@code String} decoded from the specified array of bytes using the given charset, 302 * or {@code null} if the input byte array was {@code null}. 303 * @throws IllegalStateException 304 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 305 * required charset name. 306 * @see CharEncoding 307 * @see String#String(byte[], String) 308 */ 309 public static String newString(final byte[] bytes, final String charsetName) { 310 if (bytes == null) { 311 return null; 312 } 313 try { 314 return new String(bytes, charsetName); 315 } catch (final UnsupportedEncodingException e) { 316 throw newIllegalStateException(charsetName, e); 317 } 318 } 319 320 /** 321 * Constructs a new {@code String} by decoding the specified array of bytes using the ISO-8859-1 charset. 322 * 323 * @param bytes 324 * The bytes to be decoded into characters, may be {@code null} 325 * @return A new {@code String} decoded from the specified array of bytes using the ISO-8859-1 charset, or 326 * {@code null} if the input byte array was {@code null}. 327 * @throws NullPointerException 328 * Thrown if {@link StandardCharsets#ISO_8859_1} is not initialized, which should never happen 329 * since it is required by the Java platform specification. 330 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 331 */ 332 public static String newStringIso8859_1(final byte[] bytes) { 333 return newString(bytes, StandardCharsets.ISO_8859_1); 334 } 335 336 /** 337 * Constructs a new {@code String} by decoding the specified array of bytes using the US-ASCII charset. 338 * 339 * @param bytes 340 * The bytes to be decoded into characters 341 * @return A new {@code String} decoded from the specified array of bytes using the US-ASCII charset, 342 * or {@code null} if the input byte array was {@code null}. 343 * @throws NullPointerException 344 * Thrown if {@link StandardCharsets#US_ASCII} is not initialized, which should never happen since it is 345 * required by the Java platform specification. 346 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 347 */ 348 public static String newStringUsAscii(final byte[] bytes) { 349 return newString(bytes, StandardCharsets.US_ASCII); 350 } 351 352 /** 353 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16 charset. 354 * 355 * @param bytes 356 * The bytes to be decoded into characters 357 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16 charset 358 * or {@code null} if the input byte array was {@code null}. 359 * @throws NullPointerException 360 * Thrown if {@link StandardCharsets#UTF_16} is not initialized, which should never happen since it is 361 * required by the Java platform specification. 362 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 363 */ 364 public static String newStringUtf16(final byte[] bytes) { 365 return newString(bytes, StandardCharsets.UTF_16); 366 } 367 368 /** 369 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16BE charset. 370 * 371 * @param bytes 372 * The bytes to be decoded into characters 373 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16BE charset, 374 * or {@code null} if the input byte array was {@code null}. 375 * @throws NullPointerException 376 * Thrown if {@link StandardCharsets#UTF_16BE} is not initialized, which should never happen since it is 377 * required by the Java platform specification. 378 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 379 */ 380 public static String newStringUtf16Be(final byte[] bytes) { 381 return newString(bytes, StandardCharsets.UTF_16BE); 382 } 383 384 /** 385 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-16LE charset. 386 * 387 * @param bytes 388 * The bytes to be decoded into characters 389 * @return A new {@code String} decoded from the specified array of bytes using the UTF-16LE charset, 390 * or {@code null} if the input byte array was {@code null}. 391 * @throws NullPointerException 392 * Thrown if {@link StandardCharsets#UTF_16LE} is not initialized, which should never happen since it is 393 * required by the Java platform specification. 394 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 395 */ 396 public static String newStringUtf16Le(final byte[] bytes) { 397 return newString(bytes, StandardCharsets.UTF_16LE); 398 } 399 400 /** 401 * Constructs a new {@code String} by decoding the specified array of bytes using the UTF-8 charset. 402 * 403 * @param bytes 404 * The bytes to be decoded into characters 405 * @return A new {@code String} decoded from the specified array of bytes using the UTF-8 charset, 406 * or {@code null} if the input byte array was {@code null}. 407 * @throws NullPointerException 408 * Thrown if {@link StandardCharsets#UTF_8} is not initialized, which should never happen since it is 409 * required by the Java platform specification. 410 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 411 */ 412 public static String newStringUtf8(final byte[] bytes) { 413 return newString(bytes, StandardCharsets.UTF_8); 414 } 415 416 /** 417 * TODO Make private in 2.0. 418 * 419 * @deprecated TODO Make private in 2.0. 420 */ 421 @Deprecated 422 public StringUtils() { 423 // empty 424 } 425}