1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.net; 19 20 import java.io.ByteArrayOutputStream; 21 import java.io.UnsupportedEncodingException; 22 import java.nio.charset.Charset; 23 import java.util.BitSet; 24 25 import org.apache.commons.codec.BinaryDecoder; 26 import org.apache.commons.codec.BinaryEncoder; 27 import org.apache.commons.codec.Charsets; 28 import org.apache.commons.codec.DecoderException; 29 import org.apache.commons.codec.EncoderException; 30 import org.apache.commons.codec.StringDecoder; 31 import org.apache.commons.codec.StringEncoder; 32 import org.apache.commons.codec.binary.StringUtils; 33 34 /** 35 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>. 36 * <p> 37 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to 38 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are 39 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the 40 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable 41 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping 42 * gateway. 43 * <p> 44 * Note: 45 * <p> 46 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec 47 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec 48 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy 49 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. 50 * <p> 51 * This class is immutable and thread-safe. 52 * 53 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: 54 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> 55 * 56 * @since 1.3 57 * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $ 58 */ 59 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 60 /** 61 * The default charset used for string decoding and encoding. 62 */ 63 private final Charset charset; 64 65 /** 66 * BitSet of printable characters as defined in RFC 1521. 67 */ 68 private static final BitSet PRINTABLE_CHARS = new BitSet(256); 69 70 private static final byte ESCAPE_CHAR = '='; 71 72 private static final byte TAB = 9; 73 74 private static final byte SPACE = 32; 75 // Static initializer for printable chars collection 76 static { 77 // alpha characters 78 for (int i = 33; i <= 60; i++) { 79 PRINTABLE_CHARS.set(i); 80 } 81 for (int i = 62; i <= 126; i++) { 82 PRINTABLE_CHARS.set(i); 83 } 84 PRINTABLE_CHARS.set(TAB); 85 PRINTABLE_CHARS.set(SPACE); 86 } 87 88 /** 89 * Default constructor. 90 */ 91 public QuotedPrintableCodec() { 92 this(Charsets.UTF_8); 93 } 94 95 /** 96 * Constructor which allows for the selection of a default charset. 97 * 98 * @param charset 99 * the default string charset to use. 100 * @throws UnsupportedCharsetException 101 * If the named charset is unavailable 102 * @since 1.7 103 */ 104 public QuotedPrintableCodec(Charset charset) { 105 this.charset = charset; 106 } 107 108 /** 109 * Constructor which allows for the selection of a default charset. 110 * 111 * @param charsetName 112 * the default string charset to use. 113 * @throws java.nio.charset.UnsupportedCharsetException 114 * If the named charset is unavailable 115 * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable 116 */ 117 public QuotedPrintableCodec(String charsetName) { 118 this(Charset.forName(charsetName)); 119 } 120 121 /** 122 * Encodes byte into its quoted-printable representation. 123 * 124 * @param b 125 * byte to encode 126 * @param buffer 127 * the buffer to write to 128 */ 129 private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) { 130 buffer.write(ESCAPE_CHAR); 131 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); 132 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); 133 buffer.write(hex1); 134 buffer.write(hex2); 135 } 136 137 /** 138 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. 139 * <p> 140 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 141 * RFC 1521 and is suitable for encoding binary data and unformatted text. 142 * 143 * @param printable 144 * bitset of characters deemed quoted-printable 145 * @param bytes 146 * array of bytes to be encoded 147 * @return array of bytes containing quoted-printable data 148 */ 149 public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) { 150 if (bytes == null) { 151 return null; 152 } 153 if (printable == null) { 154 printable = PRINTABLE_CHARS; 155 } 156 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 157 for (byte c : bytes) { 158 int b = c; 159 if (b < 0) { 160 b = 256 + b; 161 } 162 if (printable.get(b)) { 163 buffer.write(b); 164 } else { 165 encodeQuotedPrintable(b, buffer); 166 } 167 } 168 return buffer.toByteArray(); 169 } 170 171 /** 172 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted 173 * back to their original representation. 174 * <p> 175 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 176 * RFC 1521. 177 * 178 * @param bytes 179 * array of quoted-printable characters 180 * @return array of original bytes 181 * @throws DecoderException 182 * Thrown if quoted-printable decoding is unsuccessful 183 */ 184 public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException { 185 if (bytes == null) { 186 return null; 187 } 188 ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 189 for (int i = 0; i < bytes.length; i++) { 190 int b = bytes[i]; 191 if (b == ESCAPE_CHAR) { 192 try { 193 int u = Utils.digit16(bytes[++i]); 194 int l = Utils.digit16(bytes[++i]); 195 buffer.write((char) ((u << 4) + l)); 196 } catch (ArrayIndexOutOfBoundsException e) { 197 throw new DecoderException("Invalid quoted-printable encoding", e); 198 } 199 } else { 200 buffer.write(b); 201 } 202 } 203 return buffer.toByteArray(); 204 } 205 206 /** 207 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. 208 * <p> 209 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 210 * RFC 1521 and is suitable for encoding binary data and unformatted text. 211 * 212 * @param bytes 213 * array of bytes to be encoded 214 * @return array of bytes containing quoted-printable data 215 */ 216 @Override 217 public byte[] encode(byte[] bytes) { 218 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes); 219 } 220 221 /** 222 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted 223 * back to their original representation. 224 * <p> 225 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 226 * RFC 1521. 227 * 228 * @param bytes 229 * array of quoted-printable characters 230 * @return array of original bytes 231 * @throws DecoderException 232 * Thrown if quoted-printable decoding is unsuccessful 233 */ 234 @Override 235 public byte[] decode(byte[] bytes) throws DecoderException { 236 return decodeQuotedPrintable(bytes); 237 } 238 239 /** 240 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. 241 * <p> 242 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 243 * RFC 1521 and is suitable for encoding binary data. 244 * 245 * @param str 246 * string to convert to quoted-printable form 247 * @return quoted-printable string 248 * @throws EncoderException 249 * Thrown if quoted-printable encoding is unsuccessful 250 * 251 * @see #getCharset() 252 */ 253 @Override 254 public String encode(String str) throws EncoderException { 255 return this.encode(str, getCharset()); 256 } 257 258 /** 259 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters 260 * are converted back to their original representation. 261 * 262 * @param str 263 * quoted-printable string to convert into its original form 264 * @param charset 265 * the original string charset 266 * @return original string 267 * @throws DecoderException 268 * Thrown if quoted-printable decoding is unsuccessful 269 * @since 1.7 270 */ 271 public String decode(String str, Charset charset) throws DecoderException { 272 if (str == null) { 273 return null; 274 } 275 return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset); 276 } 277 278 /** 279 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters 280 * are converted back to their original representation. 281 * 282 * @param str 283 * quoted-printable string to convert into its original form 284 * @param charset 285 * the original string charset 286 * @return original string 287 * @throws DecoderException 288 * Thrown if quoted-printable decoding is unsuccessful 289 * @throws UnsupportedEncodingException 290 * Thrown if charset is not supported 291 */ 292 public String decode(String str, String charset) throws DecoderException, UnsupportedEncodingException { 293 if (str == null) { 294 return null; 295 } 296 return new String(decode(StringUtils.getBytesUsAscii(str)), charset); 297 } 298 299 /** 300 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are 301 * converted back to their original representation. 302 * 303 * @param str 304 * quoted-printable string to convert into its original form 305 * @return original string 306 * @throws DecoderException 307 * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported. 308 * @see #getCharset() 309 */ 310 @Override 311 public String decode(String str) throws DecoderException { 312 return this.decode(str, this.getCharset()); 313 } 314 315 /** 316 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. 317 * 318 * @param obj 319 * string to convert to a quoted-printable form 320 * @return quoted-printable object 321 * @throws EncoderException 322 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is 323 * unsuccessful 324 */ 325 @Override 326 public Object encode(Object obj) throws EncoderException { 327 if (obj == null) { 328 return null; 329 } else if (obj instanceof byte[]) { 330 return encode((byte[]) obj); 331 } else if (obj instanceof String) { 332 return encode((String) obj); 333 } else { 334 throw new EncoderException("Objects of type " + 335 obj.getClass().getName() + 336 " cannot be quoted-printable encoded"); 337 } 338 } 339 340 /** 341 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original 342 * representation. 343 * 344 * @param obj 345 * quoted-printable object to convert into its original form 346 * @return original object 347 * @throws DecoderException 348 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure 349 * condition is encountered during the decode process. 350 */ 351 @Override 352 public Object decode(Object obj) throws DecoderException { 353 if (obj == null) { 354 return null; 355 } else if (obj instanceof byte[]) { 356 return decode((byte[]) obj); 357 } else if (obj instanceof String) { 358 return decode((String) obj); 359 } else { 360 throw new DecoderException("Objects of type " + 361 obj.getClass().getName() + 362 " cannot be quoted-printable decoded"); 363 } 364 } 365 366 /** 367 * Gets the default charset name used for string decoding and encoding. 368 * 369 * @return the default charset name 370 * @since 1.7 371 */ 372 public Charset getCharset() { 373 return this.charset; 374 } 375 376 /** 377 * Gets the default charset name used for string decoding and encoding. 378 * 379 * @return the default charset name 380 */ 381 public String getDefaultCharset() { 382 return this.charset.name(); 383 } 384 385 /** 386 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. 387 * <p> 388 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 389 * RFC 1521 and is suitable for encoding binary data and unformatted text. 390 * 391 * @param str 392 * string to convert to quoted-printable form 393 * @param charset 394 * the charset for str 395 * @return quoted-printable string 396 * @since 1.7 397 */ 398 public String encode(String str, Charset charset) { 399 if (str == null) { 400 return null; 401 } 402 return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset))); 403 } 404 405 /** 406 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. 407 * <p> 408 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 409 * RFC 1521 and is suitable for encoding binary data and unformatted text. 410 * 411 * @param str 412 * string to convert to quoted-printable form 413 * @param charset 414 * the charset for str 415 * @return quoted-printable string 416 * @throws UnsupportedEncodingException 417 * Thrown if the charset is not supported 418 */ 419 public String encode(String str, String charset) throws UnsupportedEncodingException { 420 if (str == null) { 421 return null; 422 } 423 return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); 424 } 425 }