001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.ByteBuffer; 022import java.nio.charset.Charset; 023 024import org.apache.commons.codec.CharEncoding; 025import org.apache.commons.codec.Charsets; 026 027/** 028 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 029 * specified in <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html"> 030 * Standard charsets</a>. 031 * 032 * <p>This class is immutable and thread-safe.</p> 033 * 034 * @see CharEncoding 035 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 036 * @since 1.4 037 */ 038public class StringUtils { 039 040 /** 041 * <p> 042 * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters. 043 * </p> 044 * 045 * <p> 046 * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal. 047 * The comparison is case sensitive. 048 * </p> 049 * 050 * <pre> 051 * StringUtils.equals(null, null) = true 052 * StringUtils.equals(null, "abc") = false 053 * StringUtils.equals("abc", null) = false 054 * StringUtils.equals("abc", "abc") = true 055 * StringUtils.equals("abc", "ABC") = false 056 * </pre> 057 * 058 * <p> 059 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). 060 * </p> 061 * 062 * @see Object#equals(Object) 063 * @param cs1 064 * the first CharSequence, may be <code>null</code> 065 * @param cs2 066 * the second CharSequence, may be <code>null</code> 067 * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code> 068 * @since 1.10 069 */ 070 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { 071 if (cs1 == cs2) { 072 return true; 073 } 074 if (cs1 == null || cs2 == null) { 075 return false; 076 } 077 if (cs1 instanceof String && cs2 instanceof String) { 078 return cs1.equals(cs2); 079 } 080 return cs1.length() == cs2.length() && CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, cs1.length()); 081 } 082 083 /** 084 * Calls {@link String#getBytes(Charset)} 085 * 086 * @param string 087 * The string to encode (if null, return null). 088 * @param charset 089 * The {@link Charset} to encode the <code>String</code> 090 * @return the encoded bytes 091 */ 092 private static byte[] getBytes(final String string, final Charset charset) { 093 if (string == null) { 094 return null; 095 } 096 return string.getBytes(charset); 097 } 098 099 /** 100 * Calls {@link String#getBytes(Charset)} 101 * 102 * @param string 103 * The string to encode (if null, return null). 104 * @param charset 105 * The {@link Charset} to encode the <code>String</code> 106 * @return the encoded bytes 107 */ 108 private static ByteBuffer getByteBuffer(final String string, final Charset charset) { 109 if (string == null) { 110 return null; 111 } 112 return ByteBuffer.wrap(string.getBytes(charset)); 113 } 114 115 /** 116 * Encodes the given string into a byte buffer using the UTF-8 charset, storing the result into a new byte 117 * array. 118 * 119 * @param string 120 * the String to encode, may be <code>null</code> 121 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 122 * @throws NullPointerException 123 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 124 * required by the Java platform specification. 125 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 126 * @see #getBytesUnchecked(String, String) 127 * @since 1.11 128 */ 129 public static ByteBuffer getByteBufferUtf8(final String string) { 130 return getByteBuffer(string, Charsets.UTF_8); 131 } 132 133 /** 134 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 135 * byte array. 136 * 137 * @param string 138 * the String to encode, may be <code>null</code> 139 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 140 * @throws NullPointerException 141 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 142 * required by the Java platform specification. 143 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 144 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 145 * @see #getBytesUnchecked(String, String) 146 */ 147 public static byte[] getBytesIso8859_1(final String string) { 148 return getBytes(string, Charsets.ISO_8859_1); 149 } 150 151 152 /** 153 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 154 * array. 155 * <p> 156 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 157 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 158 * </p> 159 * 160 * @param string 161 * the String to encode, may be <code>null</code> 162 * @param charsetName 163 * The name of a required {@link java.nio.charset.Charset} 164 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 165 * @throws IllegalStateException 166 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 167 * required charset name. 168 * @see CharEncoding 169 * @see String#getBytes(String) 170 */ 171 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 172 if (string == null) { 173 return null; 174 } 175 try { 176 return string.getBytes(charsetName); 177 } catch (final UnsupportedEncodingException e) { 178 throw StringUtils.newIllegalStateException(charsetName, e); 179 } 180 } 181 182 /** 183 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 184 * array. 185 * 186 * @param string 187 * the String to encode, may be <code>null</code> 188 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 189 * @throws NullPointerException 190 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 191 * required by the Java platform specification. 192 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 193 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 194 * @see #getBytesUnchecked(String, String) 195 */ 196 public static byte[] getBytesUsAscii(final String string) { 197 return getBytes(string, Charsets.US_ASCII); 198 } 199 200 /** 201 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 202 * array. 203 * 204 * @param string 205 * the String to encode, may be <code>null</code> 206 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 207 * @throws NullPointerException 208 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 209 * required by the Java platform specification. 210 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 211 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 212 * @see #getBytesUnchecked(String, String) 213 */ 214 public static byte[] getBytesUtf16(final String string) { 215 return getBytes(string, Charsets.UTF_16); 216 } 217 218 /** 219 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 220 * array. 221 * 222 * @param string 223 * the String to encode, may be <code>null</code> 224 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 225 * @throws NullPointerException 226 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 227 * required by the Java platform specification. 228 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 229 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 230 * @see #getBytesUnchecked(String, String) 231 */ 232 public static byte[] getBytesUtf16Be(final String string) { 233 return getBytes(string, Charsets.UTF_16BE); 234 } 235 236 /** 237 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 238 * array. 239 * 240 * @param string 241 * the String to encode, may be <code>null</code> 242 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 243 * @throws NullPointerException 244 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 245 * required by the Java platform specification. 246 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 247 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 248 * @see #getBytesUnchecked(String, String) 249 */ 250 public static byte[] getBytesUtf16Le(final String string) { 251 return getBytes(string, Charsets.UTF_16LE); 252 } 253 254 /** 255 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 256 * array. 257 * 258 * @param string 259 * the String to encode, may be <code>null</code> 260 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 261 * @throws NullPointerException 262 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 263 * required by the Java platform specification. 264 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 265 * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 266 * @see #getBytesUnchecked(String, String) 267 */ 268 public static byte[] getBytesUtf8(final String string) { 269 return getBytes(string, Charsets.UTF_8); 270 } 271 272 private static IllegalStateException newIllegalStateException(final String charsetName, 273 final UnsupportedEncodingException e) { 274 return new IllegalStateException(charsetName + ": " + e); 275 } 276 277 /** 278 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 279 * 280 * @param bytes 281 * The bytes to be decoded into characters 282 * @param charset 283 * The {@link Charset} to encode the <code>String</code>; not {@code null} 284 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 285 * or <code>null</code> if the input byte array was <code>null</code>. 286 * @throws NullPointerException 287 * Thrown if charset is {@code null} 288 */ 289 private static String newString(final byte[] bytes, final Charset charset) { 290 return bytes == null ? null : new String(bytes, charset); 291 } 292 293 /** 294 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 295 * <p> 296 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 297 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 298 * </p> 299 * 300 * @param bytes 301 * The bytes to be decoded into characters, may be <code>null</code> 302 * @param charsetName 303 * The name of a required {@link java.nio.charset.Charset} 304 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 305 * or <code>null</code> if the input byte array was <code>null</code>. 306 * @throws IllegalStateException 307 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 308 * required charset name. 309 * @see CharEncoding 310 * @see String#String(byte[], String) 311 */ 312 public static String newString(final byte[] bytes, final String charsetName) { 313 if (bytes == null) { 314 return null; 315 } 316 try { 317 return new String(bytes, charsetName); 318 } catch (final UnsupportedEncodingException e) { 319 throw StringUtils.newIllegalStateException(charsetName, e); 320 } 321 } 322 323 /** 324 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 325 * 326 * @param bytes 327 * The bytes to be decoded into characters, may be <code>null</code> 328 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or 329 * <code>null</code> if the input byte array was <code>null</code>. 330 * @throws NullPointerException 331 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 332 * required by the Java platform specification. 333 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 334 */ 335 public static String newStringIso8859_1(final byte[] bytes) { 336 return newString(bytes, Charsets.ISO_8859_1); 337 } 338 339 /** 340 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 341 * 342 * @param bytes 343 * The bytes to be decoded into characters 344 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, 345 * or <code>null</code> if the input byte array was <code>null</code>. 346 * @throws NullPointerException 347 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 348 * required by the Java platform specification. 349 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 350 */ 351 public static String newStringUsAscii(final byte[] bytes) { 352 return newString(bytes, Charsets.US_ASCII); 353 } 354 355 /** 356 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 357 * 358 * @param bytes 359 * The bytes to be decoded into characters 360 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset 361 * or <code>null</code> if the input byte array was <code>null</code>. 362 * @throws NullPointerException 363 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 364 * required by the Java platform specification. 365 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 366 */ 367 public static String newStringUtf16(final byte[] bytes) { 368 return newString(bytes, Charsets.UTF_16); 369 } 370 371 /** 372 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 373 * 374 * @param bytes 375 * The bytes to be decoded into characters 376 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, 377 * or <code>null</code> if the input byte array was <code>null</code>. 378 * @throws NullPointerException 379 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 380 * required by the Java platform specification. 381 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 382 */ 383 public static String newStringUtf16Be(final byte[] bytes) { 384 return newString(bytes, Charsets.UTF_16BE); 385 } 386 387 /** 388 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 389 * 390 * @param bytes 391 * The bytes to be decoded into characters 392 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, 393 * or <code>null</code> if the input byte array was <code>null</code>. 394 * @throws NullPointerException 395 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 396 * required by the Java platform specification. 397 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 398 */ 399 public static String newStringUtf16Le(final byte[] bytes) { 400 return newString(bytes, Charsets.UTF_16LE); 401 } 402 403 /** 404 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 405 * 406 * @param bytes 407 * The bytes to be decoded into characters 408 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, 409 * or <code>null</code> if the input byte array was <code>null</code>. 410 * @throws NullPointerException 411 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 412 * required by the Java platform specification. 413 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 414 */ 415 public static String newStringUtf8(final byte[] bytes) { 416 return newString(bytes, Charsets.UTF_8); 417 } 418 419}