001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.ByteBuffer; 022import java.nio.charset.Charset; 023 024import org.apache.commons.codec.CharEncoding; 025import org.apache.commons.codec.Charsets; 026 027/** 028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 029 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"> 030 * Standard charsets</a>. 031 * 032 * <p>This class is immutable and thread-safe.</p> 033 * 034 * @see CharEncoding 035 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 036 * @version $Id: StringUtils.java 1789539 2017-03-30 16:36:28Z sebb $ 037 * @since 1.4 038 */ 039public class StringUtils { 040 041 /** 042 * <p> 043 * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters. 044 * </p> 045 * 046 * <p> 047 * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal. 048 * The comparison is case sensitive. 049 * </p> 050 * 051 * <pre> 052 * StringUtils.equals(null, null) = true 053 * StringUtils.equals(null, "abc") = false 054 * StringUtils.equals("abc", null) = false 055 * StringUtils.equals("abc", "abc") = true 056 * StringUtils.equals("abc", "ABC") = false 057 * </pre> 058 * 059 * <p> 060 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). 061 * </p> 062 * 063 * @see Object#equals(Object) 064 * @param cs1 065 * the first CharSequence, may be <code>null</code> 066 * @param cs2 067 * the second CharSequence, may be <code>null</code> 068 * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code> 069 * @since 1.10 070 */ 071 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { 072 if (cs1 == cs2) { 073 return true; 074 } 075 if (cs1 == null || cs2 == null) { 076 return false; 077 } 078 if (cs1 instanceof String && cs2 instanceof String) { 079 return cs1.equals(cs2); 080 } 081 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); 082 } 083 084 /** 085 * Calls {@link String#getBytes(Charset)} 086 * 087 * @param string 088 * The string to encode (if null, return null). 089 * @param charset 090 * The {@link Charset} to encode the <code>String</code> 091 * @return the encoded bytes 092 */ 093 private static byte[] getBytes(final String string, final Charset charset) { 094 if (string == null) { 095 return null; 096 } 097 return string.getBytes(charset); 098 } 099 100 /** 101 * Calls {@link String#getBytes(Charset)} 102 * 103 * @param string 104 * The string to encode (if null, return null). 105 * @param charset 106 * The {@link Charset} to encode the <code>String</code> 107 * @return the encoded bytes 108 */ 109 private static ByteBuffer getByteBuffer(final String string, final Charset charset) { 110 if (string == null) { 111 return null; 112 } 113 return ByteBuffer.wrap(string.getBytes(charset)); 114 } 115 116 /** 117 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte 118 * array. 119 * 120 * @param string 121 * the String to encode, may be <code>null</code> 122 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 123 * @throws NullPointerException 124 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 125 * required by the Java platform specification. 126 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 127 * @see #getBytesUnchecked(String, String) 128 * @since 1.11 129 */ 130 public static ByteBuffer getByteBufferUtf8(final String string) { 131 return getByteBuffer(string, Charsets.UTF_8); 132 } 133 134 /** 135 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 136 * byte array. 137 * 138 * @param string 139 * the String to encode, may be <code>null</code> 140 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 141 * @throws NullPointerException 142 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 143 * required by the Java platform specification. 144 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 145 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 146 * @see #getBytesUnchecked(String, String) 147 */ 148 public static byte[] getBytesIso8859_1(final String string) { 149 return getBytes(string, Charsets.ISO_8859_1); 150 } 151 152 153 /** 154 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 155 * array. 156 * <p> 157 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 158 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 159 * </p> 160 * 161 * @param string 162 * the String to encode, may be <code>null</code> 163 * @param charsetName 164 * The name of a required {@link java.nio.charset.Charset} 165 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 166 * @throws IllegalStateException 167 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 168 * required charset name. 169 * @see CharEncoding 170 * @see String#getBytes(String) 171 */ 172 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 173 if (string == null) { 174 return null; 175 } 176 try { 177 return string.getBytes(charsetName); 178 } catch (final UnsupportedEncodingException e) { 179 throw StringUtils.newIllegalStateException(charsetName, e); 180 } 181 } 182 183 /** 184 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 185 * array. 186 * 187 * @param string 188 * the String to encode, may be <code>null</code> 189 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 190 * @throws NullPointerException 191 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 192 * required by the Java platform specification. 193 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 194 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 195 * @see #getBytesUnchecked(String, String) 196 */ 197 public static byte[] getBytesUsAscii(final String string) { 198 return getBytes(string, Charsets.US_ASCII); 199 } 200 201 /** 202 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 203 * array. 204 * 205 * @param string 206 * the String to encode, may be <code>null</code> 207 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 208 * @throws NullPointerException 209 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 210 * required by the Java platform specification. 211 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 212 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 213 * @see #getBytesUnchecked(String, String) 214 */ 215 public static byte[] getBytesUtf16(final String string) { 216 return getBytes(string, Charsets.UTF_16); 217 } 218 219 /** 220 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 221 * array. 222 * 223 * @param string 224 * the String to encode, may be <code>null</code> 225 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 226 * @throws NullPointerException 227 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 228 * required by the Java platform specification. 229 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 230 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 231 * @see #getBytesUnchecked(String, String) 232 */ 233 public static byte[] getBytesUtf16Be(final String string) { 234 return getBytes(string, Charsets.UTF_16BE); 235 } 236 237 /** 238 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 239 * array. 240 * 241 * @param string 242 * the String to encode, may be <code>null</code> 243 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 244 * @throws NullPointerException 245 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 246 * required by the Java platform specification. 247 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 248 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 249 * @see #getBytesUnchecked(String, String) 250 */ 251 public static byte[] getBytesUtf16Le(final String string) { 252 return getBytes(string, Charsets.UTF_16LE); 253 } 254 255 /** 256 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 257 * array. 258 * 259 * @param string 260 * the String to encode, may be <code>null</code> 261 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 262 * @throws NullPointerException 263 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 264 * required by the Java platform specification. 265 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 266 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 267 * @see #getBytesUnchecked(String, String) 268 */ 269 public static byte[] getBytesUtf8(final String string) { 270 return getBytes(string, Charsets.UTF_8); 271 } 272 273 private static IllegalStateException newIllegalStateException(final String charsetName, 274 final UnsupportedEncodingException e) { 275 return new IllegalStateException(charsetName + ": " + e); 276 } 277 278 /** 279 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 280 * 281 * @param bytes 282 * The bytes to be decoded into characters 283 * @param charset 284 * The {@link Charset} to encode the <code>String</code>; not {@code null} 285 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 286 * or <code>null</code> if the input byte array was <code>null</code>. 287 * @throws NullPointerException 288 * Thrown if charset is {@code null} 289 */ 290 private static String newString(final byte[] bytes, final Charset charset) { 291 return bytes == null ? null : new String(bytes, charset); 292 } 293 294 /** 295 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 296 * <p> 297 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 298 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 299 * </p> 300 * 301 * @param bytes 302 * The bytes to be decoded into characters, may be <code>null</code> 303 * @param charsetName 304 * The name of a required {@link java.nio.charset.Charset} 305 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 306 * or <code>null</code> if the input byte array was <code>null</code>. 307 * @throws IllegalStateException 308 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 309 * required charset name. 310 * @see CharEncoding 311 * @see String#String(byte[], String) 312 */ 313 public static String newString(final byte[] bytes, final String charsetName) { 314 if (bytes == null) { 315 return null; 316 } 317 try { 318 return new String(bytes, charsetName); 319 } catch (final UnsupportedEncodingException e) { 320 throw StringUtils.newIllegalStateException(charsetName, e); 321 } 322 } 323 324 /** 325 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 326 * 327 * @param bytes 328 * The bytes to be decoded into characters, may be <code>null</code> 329 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or 330 * <code>null</code> if the input byte array was <code>null</code>. 331 * @throws NullPointerException 332 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 333 * required by the Java platform specification. 334 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 335 */ 336 public static String newStringIso8859_1(final byte[] bytes) { 337 return newString(bytes, Charsets.ISO_8859_1); 338 } 339 340 /** 341 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 342 * 343 * @param bytes 344 * The bytes to be decoded into characters 345 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, 346 * or <code>null</code> if the input byte array was <code>null</code>. 347 * @throws NullPointerException 348 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 349 * required by the Java platform specification. 350 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 351 */ 352 public static String newStringUsAscii(final byte[] bytes) { 353 return newString(bytes, Charsets.US_ASCII); 354 } 355 356 /** 357 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 358 * 359 * @param bytes 360 * The bytes to be decoded into characters 361 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset 362 * or <code>null</code> if the input byte array was <code>null</code>. 363 * @throws NullPointerException 364 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 365 * required by the Java platform specification. 366 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 367 */ 368 public static String newStringUtf16(final byte[] bytes) { 369 return newString(bytes, Charsets.UTF_16); 370 } 371 372 /** 373 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 374 * 375 * @param bytes 376 * The bytes to be decoded into characters 377 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, 378 * or <code>null</code> if the input byte array was <code>null</code>. 379 * @throws NullPointerException 380 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 381 * required by the Java platform specification. 382 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 383 */ 384 public static String newStringUtf16Be(final byte[] bytes) { 385 return newString(bytes, Charsets.UTF_16BE); 386 } 387 388 /** 389 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 390 * 391 * @param bytes 392 * The bytes to be decoded into characters 393 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, 394 * or <code>null</code> if the input byte array was <code>null</code>. 395 * @throws NullPointerException 396 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 397 * required by the Java platform specification. 398 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 399 */ 400 public static String newStringUtf16Le(final byte[] bytes) { 401 return newString(bytes, Charsets.UTF_16LE); 402 } 403 404 /** 405 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 406 * 407 * @param bytes 408 * The bytes to be decoded into characters 409 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, 410 * or <code>null</code> if the input byte array was <code>null</code>. 411 * @throws NullPointerException 412 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 413 * required by the Java platform specification. 414 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 415 */ 416 public static String newStringUtf8(final byte[] bytes) { 417 return newString(bytes, Charsets.UTF_8); 418 } 419 420}