001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.binary; 019 020import java.io.UnsupportedEncodingException; 021import java.nio.charset.Charset; 022 023import org.apache.commons.codec.CharEncoding; 024import org.apache.commons.codec.Charsets; 025 026/** 027 * Converts String to and from bytes using the encodings required by the Java specification. These encodings are 028 * specified in <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html"> 029 * Standard charsets</a>. 030 * 031 * <p>This class is immutable and thread-safe.</p> 032 * 033 * @see CharEncoding 034 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 035 * @version $Id: StringUtils.html 891688 2013-12-24 20:49:46Z ggregory $ 036 * @since 1.4 037 */ 038public class StringUtils { 039 040 /** 041 * Calls {@link String#getBytes(Charset)} 042 * 043 * @param string 044 * The string to encode (if null, return null). 045 * @param charset 046 * The {@link Charset} to encode the {@code String} 047 * @return the encoded bytes 048 */ 049 private static byte[] getBytes(final String string, final Charset charset) { 050 if (string == null) { 051 return null; 052 } 053 return string.getBytes(charset); 054 } 055 056 /** 057 * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new 058 * byte array. 059 * 060 * @param string 061 * the String to encode, may be {@code null} 062 * @return encoded bytes, or {@code null} if the input string was {@code null} 063 * @throws NullPointerException 064 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 065 * required by the Java platform specification. 066 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 067 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 068 * @see #getBytesUnchecked(String, String) 069 */ 070 public static byte[] getBytesIso8859_1(final String string) { 071 return getBytes(string, Charsets.ISO_8859_1); 072 } 073 074 075 /** 076 * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte 077 * array. 078 * <p> 079 * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which 080 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 081 * </p> 082 * 083 * @param string 084 * the String to encode, may be {@code null} 085 * @param charsetName 086 * The name of a required {@link java.nio.charset.Charset} 087 * @return encoded bytes, or {@code null} if the input string was {@code null} 088 * @throws IllegalStateException 089 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 090 * required charset name. 091 * @see CharEncoding 092 * @see String#getBytes(String) 093 */ 094 public static byte[] getBytesUnchecked(final String string, final String charsetName) { 095 if (string == null) { 096 return null; 097 } 098 try { 099 return string.getBytes(charsetName); 100 } catch (final UnsupportedEncodingException e) { 101 throw StringUtils.newIllegalStateException(charsetName, e); 102 } 103 } 104 105 /** 106 * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte 107 * array. 108 * 109 * @param string 110 * the String to encode, may be {@code null} 111 * @return encoded bytes, or {@code null} if the input string was {@code null} 112 * @throws NullPointerException 113 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 114 * required by the Java platform specification. 115 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 116 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 117 * @see #getBytesUnchecked(String, String) 118 */ 119 public static byte[] getBytesUsAscii(final String string) { 120 return getBytes(string, Charsets.US_ASCII); 121 } 122 123 /** 124 * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte 125 * array. 126 * 127 * @param string 128 * the String to encode, may be {@code null} 129 * @return encoded bytes, or {@code null} if the input string was {@code null} 130 * @throws NullPointerException 131 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 132 * required by the Java platform specification. 133 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 134 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 135 * @see #getBytesUnchecked(String, String) 136 */ 137 public static byte[] getBytesUtf16(final String string) { 138 return getBytes(string, Charsets.UTF_16); 139 } 140 141 /** 142 * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte 143 * array. 144 * 145 * @param string 146 * the String to encode, may be {@code null} 147 * @return encoded bytes, or {@code null} if the input string was {@code null} 148 * @throws NullPointerException 149 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 150 * required by the Java platform specification. 151 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 152 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 153 * @see #getBytesUnchecked(String, String) 154 */ 155 public static byte[] getBytesUtf16Be(final String string) { 156 return getBytes(string, Charsets.UTF_16BE); 157 } 158 159 /** 160 * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte 161 * array. 162 * 163 * @param string 164 * the String to encode, may be {@code null} 165 * @return encoded bytes, or {@code null} if the input string was {@code null} 166 * @throws NullPointerException 167 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 168 * required by the Java platform specification. 169 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 170 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 171 * @see #getBytesUnchecked(String, String) 172 */ 173 public static byte[] getBytesUtf16Le(final String string) { 174 return getBytes(string, Charsets.UTF_16LE); 175 } 176 177 /** 178 * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte 179 * array. 180 * 181 * @param string 182 * the String to encode, may be {@code null} 183 * @return encoded bytes, or {@code null} if the input string was {@code null} 184 * @throws NullPointerException 185 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 186 * required by the Java platform specification. 187 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 188 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 189 * @see #getBytesUnchecked(String, String) 190 */ 191 public static byte[] getBytesUtf8(final String string) { 192 return getBytes(string, Charsets.UTF_8); 193 } 194 195 private static IllegalStateException newIllegalStateException(final String charsetName, 196 final UnsupportedEncodingException e) { 197 return new IllegalStateException(charsetName + ": " + e); 198 } 199 200 /** 201 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 202 * 203 * @param bytes 204 * The bytes to be decoded into characters 205 * @param charset 206 * The {@link Charset} to encode the {@code String} 207 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 208 * or {@code null} if the input byte array was {@code null}. 209 * @throws NullPointerException 210 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 211 * required by the Java platform specification. 212 */ 213 private static String newString(final byte[] bytes, final Charset charset) { 214 return bytes == null ? null : new String(bytes, charset); 215 } 216 217 /** 218 * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset. 219 * <p> 220 * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which 221 * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE. 222 * </p> 223 * 224 * @param bytes 225 * The bytes to be decoded into characters, may be {@code null} 226 * @param charsetName 227 * The name of a required {@link java.nio.charset.Charset} 228 * @return A new <code>String</code> decoded from the specified array of bytes using the given charset, 229 * or {@code null} if the input byte array was {@code null}. 230 * @throws IllegalStateException 231 * Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a 232 * required charset name. 233 * @see CharEncoding 234 * @see String#String(byte[], String) 235 */ 236 public static String newString(final byte[] bytes, final String charsetName) { 237 if (bytes == null) { 238 return null; 239 } 240 try { 241 return new String(bytes, charsetName); 242 } catch (final UnsupportedEncodingException e) { 243 throw StringUtils.newIllegalStateException(charsetName, e); 244 } 245 } 246 247 /** 248 * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset. 249 * 250 * @param bytes 251 * The bytes to be decoded into characters, may be {@code null} 252 * @return A new <code>String</code> decoded from the specified array of bytes using the ISO-8859-1 charset, or 253 * {@code null} if the input byte array was {@code null}. 254 * @throws NullPointerException 255 * Thrown if {@link Charsets#ISO_8859_1} is not initialized, which should never happen since it is 256 * required by the Java platform specification. 257 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 258 */ 259 public static String newStringIso8859_1(final byte[] bytes) { 260 return new String(bytes, Charsets.ISO_8859_1); 261 } 262 263 /** 264 * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset. 265 * 266 * @param bytes 267 * The bytes to be decoded into characters 268 * @return A new <code>String</code> decoded from the specified array of bytes using the US-ASCII charset, 269 * or {@code null} if the input byte array was {@code null}. 270 * @throws NullPointerException 271 * Thrown if {@link Charsets#US_ASCII} is not initialized, which should never happen since it is 272 * required by the Java platform specification. 273 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 274 */ 275 public static String newStringUsAscii(final byte[] bytes) { 276 return new String(bytes, Charsets.US_ASCII); 277 } 278 279 /** 280 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset. 281 * 282 * @param bytes 283 * The bytes to be decoded into characters 284 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16 charset 285 * or {@code null} if the input byte array was {@code null}. 286 * @throws NullPointerException 287 * Thrown if {@link Charsets#UTF_16} is not initialized, which should never happen since it is 288 * required by the Java platform specification. 289 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 290 */ 291 public static String newStringUtf16(final byte[] bytes) { 292 return new String(bytes, Charsets.UTF_16); 293 } 294 295 /** 296 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset. 297 * 298 * @param bytes 299 * The bytes to be decoded into characters 300 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16BE charset, 301 * or {@code null} if the input byte array was {@code null}. 302 * @throws NullPointerException 303 * Thrown if {@link Charsets#UTF_16BE} is not initialized, which should never happen since it is 304 * required by the Java platform specification. 305 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 306 */ 307 public static String newStringUtf16Be(final byte[] bytes) { 308 return new String(bytes, Charsets.UTF_16BE); 309 } 310 311 /** 312 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset. 313 * 314 * @param bytes 315 * The bytes to be decoded into characters 316 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-16LE charset, 317 * or {@code null} if the input byte array was {@code null}. 318 * @throws NullPointerException 319 * Thrown if {@link Charsets#UTF_16LE} is not initialized, which should never happen since it is 320 * required by the Java platform specification. 321 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 322 */ 323 public static String newStringUtf16Le(final byte[] bytes) { 324 return new String(bytes, Charsets.UTF_16LE); 325 } 326 327 /** 328 * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset. 329 * 330 * @param bytes 331 * The bytes to be decoded into characters 332 * @return A new <code>String</code> decoded from the specified array of bytes using the UTF-8 charset, 333 * or {@code null} if the input byte array was {@code null}. 334 * @throws NullPointerException 335 * Thrown if {@link Charsets#UTF_8} is not initialized, which should never happen since it is 336 * required by the Java platform specification. 337 * @since As of 1.7, throws {@link NullPointerException} instead of UnsupportedEncodingException 338 */ 339 public static String newStringUtf8(final byte[] bytes) { 340 return newString(bytes, Charsets.UTF_8); 341 } 342 343}