001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022 023import org.apache.commons.codec.CharEncoding; 024import org.apache.commons.codec.Charsets; 025 026/** 027 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 028 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"> 029 * Standard charsets</a>. 030 * 031 * <p>This class is immutable and thread-safe.</p> 032 * 033 * @see CharEncoding 034 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 035 * @version $Id: StringUtils.html 928559 2014-11-10 02:53:54Z ggregory $ 036 * @since 1.4 037 */ 038public class StringUtils { 039 040 /** 041 * <p> 042 * Compares two CharSequences, returning <code>true</code> if they represent equal sequences of characters. 043 * </p> 044 * 045 * <p> 046 * <code>null</code>s are handled without exceptions. Two <code>null</code> references are considered to be equal. 047 * The comparison is case sensitive. 048 * </p> 049 * 050 * <pre> 051 * StringUtils.equals(null, null) = true 052 * StringUtils.equals(null, "abc") = false 053 * StringUtils.equals("abc", null) = false 054 * StringUtils.equals("abc", "abc") = true 055 * StringUtils.equals("abc", "ABC") = false 056 * </pre> 057 * 058 * <p> 059 * Copied from Apache Commons Lang r1583482 on April 10, 2014 (day of 3.3.2 release). 060 * </p> 061 * 062 * @see Object#equals(Object) 063 * @param cs1 064 * the first CharSequence, may be <code>null</code> 065 * @param cs2 066 * the second CharSequence, may be <code>null</code> 067 * @return <code>true</code> if the CharSequences are equal (case-sensitive), or both <code>null</code> 068 * @since 1.10 069 */ 070 public static boolean equals(final CharSequence cs1, final CharSequence cs2) { 071 if (cs1 == cs2) { 072 return true; 073 } 074 if (cs1 == null || cs2 == null) { 075 return false; 076 } 077 if (cs1 instanceof String && cs2 instanceof String) { 078 return cs1.equals(cs2); 079 } 080 return CharSequenceUtils.regionMatches(cs1, false, 0, cs2, 0, Math.max(cs1.length(), cs2.length())); 081 } 082 083 /** 084 * Calls {@link String#getBytes(Charset)} 085 * 086 * @param string 087 * The string to encode (if null, return null). 088 * @param charset 089 * The {@link Charset} to encode the <code>String</code> 090 * @return the encoded bytes 091 */ 092 private static byte[] getBytes(final String string, final Charset charset) { 093 if (string == null) { 094 return null; 095 } 096 return string.getBytes(charset); 097 } 098 099 /** 100 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 101 * byte array. 102 * 103 * @param string 104 * the String to encode, may be <code>null</code> 105 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 106 * @throws NullPointerException 107 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 108 * required by the Java platform specification. 109 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 110 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 111 * @see #getBytesUnchecked(String, String) 112 */ 113 public static byte[] getBytesIso8859_1(final String string) { 114 return getBytes(string, Charsets.ISO_8859_1); 115 } 116 117 118 /** 119 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 120 * array. 121 * <p> 122 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 123 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 124 * </p> 125 * 126 * @param string 127 * the String to encode, may be <code>null</code> 128 * @param charsetName 129 * The name of a required {@link java.nio.charset.Charset} 130 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 131 * @throws IllegalStateException 132 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 133 * required charset name. 134 * @see CharEncoding 135 * @see String#getBytes(String) 136 */ 137 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 138 if (string == null) { 139 return null; 140 } 141 try { 142 return string.getBytes(charsetName); 143 } catch (final UnsupportedEncodingException e) { 144 throw StringUtils.newIllegalStateException(charsetName, e); 145 } 146 } 147 148 /** 149 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 150 * array. 151 * 152 * @param string 153 * the String to encode, may be <code>null</code> 154 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 155 * @throws NullPointerException 156 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 157 * required by the Java platform specification. 158 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 159 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 160 * @see #getBytesUnchecked(String, String) 161 */ 162 public static byte[] getBytesUsAscii(final String string) { 163 return getBytes(string, Charsets.US_ASCII); 164 } 165 166 /** 167 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 168 * array. 169 * 170 * @param string 171 * the String to encode, may be <code>null</code> 172 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 173 * @throws NullPointerException 174 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 175 * required by the Java platform specification. 176 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 177 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 178 * @see #getBytesUnchecked(String, String) 179 */ 180 public static byte[] getBytesUtf16(final String string) { 181 return getBytes(string, Charsets.UTF_16); 182 } 183 184 /** 185 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 186 * array. 187 * 188 * @param string 189 * the String to encode, may be <code>null</code> 190 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 191 * @throws NullPointerException 192 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 193 * required by the Java platform specification. 194 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 195 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 196 * @see #getBytesUnchecked(String, String) 197 */ 198 public static byte[] getBytesUtf16Be(final String string) { 199 return getBytes(string, Charsets.UTF_16BE); 200 } 201 202 /** 203 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 204 * array. 205 * 206 * @param string 207 * the String to encode, may be <code>null</code> 208 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 209 * @throws NullPointerException 210 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 211 * required by the Java platform specification. 212 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 213 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 214 * @see #getBytesUnchecked(String, String) 215 */ 216 public static byte[] getBytesUtf16Le(final String string) { 217 return getBytes(string, Charsets.UTF_16LE); 218 } 219 220 /** 221 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 222 * array. 223 * 224 * @param string 225 * the String to encode, may be <code>null</code> 226 * @return encoded bytes, or <code>null</code> if the input string was <code>null</code> 227 * @throws NullPointerException 228 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 229 * required by the Java platform specification. 230 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 231 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 232 * @see #getBytesUnchecked(String, String) 233 */ 234 public static byte[] getBytesUtf8(final String string) { 235 return getBytes(string, Charsets.UTF_8); 236 } 237 238 private static IllegalStateException newIllegalStateException(final String charsetName, 239 final UnsupportedEncodingException e) { 240 return new IllegalStateException(charsetName + ": " + e); 241 } 242 243 /** 244 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 245 * 246 * @param bytes 247 * The bytes to be decoded into characters 248 * @param charset 249 * The {@link Charset} to encode the <code>String</code> 250 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 251 * or <code>null</code> if the input byte array was <code>null</code>. 252 * @throws NullPointerException 253 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 254 * required by the Java platform specification. 255 */ 256 private static String newString(final byte[] bytes, final Charset charset) { 257 return bytes == null ? null : new String(bytes, charset); 258 } 259 260 /** 261 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 262 * <p> 263 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 264 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 265 * </p> 266 * 267 * @param bytes 268 * The bytes to be decoded into characters, may be <code>null</code> 269 * @param charsetName 270 * The name of a required {@link java.nio.charset.Charset} 271 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 272 * or <code>null</code> if the input byte array was <code>null</code>. 273 * @throws IllegalStateException 274 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 275 * required charset name. 276 * @see CharEncoding 277 * @see String#String(byte[], String) 278 */ 279 public static String newString(final byte[] bytes, final String charsetName) { 280 if (bytes == null) { 281 return null; 282 } 283 try { 284 return new String(bytes, charsetName); 285 } catch (final UnsupportedEncodingException e) { 286 throw StringUtils.newIllegalStateException(charsetName, e); 287 } 288 } 289 290 /** 291 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 292 * 293 * @param bytes 294 * The bytes to be decoded into characters, may be <code>null</code> 295 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or 296 * <code>null</code> if the input byte array was <code>null</code>. 297 * @throws NullPointerException 298 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 299 * required by the Java platform specification. 300 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 301 */ 302 public static String newStringIso8859_1(final byte[] bytes) { 303 return new String(bytes, Charsets.ISO_8859_1); 304 } 305 306 /** 307 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 308 * 309 * @param bytes 310 * The bytes to be decoded into characters 311 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, 312 * or <code>null</code> if the input byte array was <code>null</code>. 313 * @throws NullPointerException 314 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 315 * required by the Java platform specification. 316 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 317 */ 318 public static String newStringUsAscii(final byte[] bytes) { 319 return new String(bytes, Charsets.US_ASCII); 320 } 321 322 /** 323 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 324 * 325 * @param bytes 326 * The bytes to be decoded into characters 327 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset 328 * or <code>null</code> if the input byte array was <code>null</code>. 329 * @throws NullPointerException 330 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 331 * required by the Java platform specification. 332 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 333 */ 334 public static String newStringUtf16(final byte[] bytes) { 335 return new String(bytes, Charsets.UTF_16); 336 } 337 338 /** 339 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 340 * 341 * @param bytes 342 * The bytes to be decoded into characters 343 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, 344 * or <code>null</code> if the input byte array was <code>null</code>. 345 * @throws NullPointerException 346 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 347 * required by the Java platform specification. 348 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 349 */ 350 public static String newStringUtf16Be(final byte[] bytes) { 351 return new String(bytes, Charsets.UTF_16BE); 352 } 353 354 /** 355 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 356 * 357 * @param bytes 358 * The bytes to be decoded into characters 359 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, 360 * or <code>null</code> if the input byte array was <code>null</code>. 361 * @throws NullPointerException 362 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 363 * required by the Java platform specification. 364 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 365 */ 366 public static String newStringUtf16Le(final byte[] bytes) { 367 return new String(bytes, Charsets.UTF_16LE); 368 } 369 370 /** 371 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 372 * 373 * @param bytes 374 * The bytes to be decoded into characters 375 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, 376 * or <code>null</code> if the input byte array was <code>null</code>. 377 * @throws NullPointerException 378 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 379 * required by the Java platform specification. 380 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 381 */ 382 public static String newStringUtf8(final byte[] bytes) { 383 return newString(bytes, Charsets.UTF_8); 384 } 385 386}