001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.codec.net; 019 020import java.io.ByteArrayOutputStream; 021import java.io.UnsupportedEncodingException; 022import java.nio.charset.Charset; 023import java.nio.charset.IllegalCharsetNameException; 024import java.nio.charset.UnsupportedCharsetException; 025import java.util.BitSet; 026 027import org.apache.commons.codec.BinaryDecoder; 028import org.apache.commons.codec.BinaryEncoder; 029import org.apache.commons.codec.Charsets; 030import org.apache.commons.codec.DecoderException; 031import org.apache.commons.codec.EncoderException; 032import org.apache.commons.codec.StringDecoder; 033import org.apache.commons.codec.StringEncoder; 034import org.apache.commons.codec.binary.StringUtils; 035 036/** 037 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>. 038 * <p> 039 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to 040 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are 041 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the 042 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable 043 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping 044 * gateway. 045 * <p> 046 * Note: 047 * <p> 048 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec 049 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec 050 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy 051 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec. 052 * <p> 053 * This class is immutable and thread-safe. 054 * 055 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One: 056 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a> 057 * 058 * @since 1.3 059 * @version $Id: QuotedPrintableCodec.html 891688 2013-12-24 20:49:46Z ggregory $ 060 */ 061public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder { 062 /** 063 * The default charset used for string decoding and encoding. 064 */ 065 private final Charset charset; 066 067 /** 068 * BitSet of printable characters as defined in RFC 1521. 069 */ 070 private static final BitSet PRINTABLE_CHARS = new BitSet(256); 071 072 private static final byte ESCAPE_CHAR = '='; 073 074 private static final byte TAB = 9; 075 076 private static final byte SPACE = 32; 077 // Static initializer for printable chars collection 078 static { 079 // alpha characters 080 for (int i = 33; i <= 60; i++) { 081 PRINTABLE_CHARS.set(i); 082 } 083 for (int i = 62; i <= 126; i++) { 084 PRINTABLE_CHARS.set(i); 085 } 086 PRINTABLE_CHARS.set(TAB); 087 PRINTABLE_CHARS.set(SPACE); 088 } 089 090 /** 091 * Default constructor, assumes default charset of {@link Charsets#UTF_8} 092 */ 093 public QuotedPrintableCodec() { 094 this(Charsets.UTF_8); 095 } 096 097 /** 098 * Constructor which allows for the selection of a default charset. 099 * 100 * @param charset 101 * the default string charset to use. 102 * @since 1.7 103 */ 104 public QuotedPrintableCodec(final Charset charset) { 105 this.charset = charset; 106 } 107 108 /** 109 * Constructor which allows for the selection of a default charset. 110 * 111 * @param charsetName 112 * the default string charset to use. 113 * @throws UnsupportedCharsetException 114 * If no support for the named charset is available 115 * in this instance of the Java virtual machine 116 * @throws IllegalArgumentException 117 * If the given charsetName is null 118 * @throws IllegalCharsetNameException 119 * If the given charset name is illegal 120 * 121 * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable 122 */ 123 public QuotedPrintableCodec(final String charsetName) 124 throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException { 125 this(Charset.forName(charsetName)); 126 } 127 128 /** 129 * Encodes byte into its quoted-printable representation. 130 * 131 * @param b 132 * byte to encode 133 * @param buffer 134 * the buffer to write to 135 */ 136 private static final void encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) { 137 buffer.write(ESCAPE_CHAR); 138 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16)); 139 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16)); 140 buffer.write(hex1); 141 buffer.write(hex2); 142 } 143 144 /** 145 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. 146 * <p> 147 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 148 * RFC 1521 and is suitable for encoding binary data and unformatted text. 149 * 150 * @param printable 151 * bitset of characters deemed quoted-printable 152 * @param bytes 153 * array of bytes to be encoded 154 * @return array of bytes containing quoted-printable data 155 */ 156 public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) { 157 if (bytes == null) { 158 return null; 159 } 160 if (printable == null) { 161 printable = PRINTABLE_CHARS; 162 } 163 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 164 for (final byte c : bytes) { 165 int b = c; 166 if (b < 0) { 167 b = 256 + b; 168 } 169 if (printable.get(b)) { 170 buffer.write(b); 171 } else { 172 encodeQuotedPrintable(b, buffer); 173 } 174 } 175 return buffer.toByteArray(); 176 } 177 178 /** 179 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted 180 * back to their original representation. 181 * <p> 182 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 183 * RFC 1521. 184 * 185 * @param bytes 186 * array of quoted-printable characters 187 * @return array of original bytes 188 * @throws DecoderException 189 * Thrown if quoted-printable decoding is unsuccessful 190 */ 191 public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException { 192 if (bytes == null) { 193 return null; 194 } 195 final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); 196 for (int i = 0; i < bytes.length; i++) { 197 final int b = bytes[i]; 198 if (b == ESCAPE_CHAR) { 199 try { 200 final int u = Utils.digit16(bytes[++i]); 201 final int l = Utils.digit16(bytes[++i]); 202 buffer.write((char) ((u << 4) + l)); 203 } catch (final ArrayIndexOutOfBoundsException e) { 204 throw new DecoderException("Invalid quoted-printable encoding", e); 205 } 206 } else { 207 buffer.write(b); 208 } 209 } 210 return buffer.toByteArray(); 211 } 212 213 /** 214 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped. 215 * <p> 216 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 217 * RFC 1521 and is suitable for encoding binary data and unformatted text. 218 * 219 * @param bytes 220 * array of bytes to be encoded 221 * @return array of bytes containing quoted-printable data 222 */ 223 @Override 224 public byte[] encode(final byte[] bytes) { 225 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes); 226 } 227 228 /** 229 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted 230 * back to their original representation. 231 * <p> 232 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 233 * RFC 1521. 234 * 235 * @param bytes 236 * array of quoted-printable characters 237 * @return array of original bytes 238 * @throws DecoderException 239 * Thrown if quoted-printable decoding is unsuccessful 240 */ 241 @Override 242 public byte[] decode(final byte[] bytes) throws DecoderException { 243 return decodeQuotedPrintable(bytes); 244 } 245 246 /** 247 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped. 248 * <p> 249 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 250 * RFC 1521 and is suitable for encoding binary data. 251 * 252 * @param str 253 * string to convert to quoted-printable form 254 * @return quoted-printable string 255 * @throws EncoderException 256 * Thrown if quoted-printable encoding is unsuccessful 257 * 258 * @see #getCharset() 259 */ 260 @Override 261 public String encode(final String str) throws EncoderException { 262 return this.encode(str, getCharset()); 263 } 264 265 /** 266 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters 267 * are converted back to their original representation. 268 * 269 * @param str 270 * quoted-printable string to convert into its original form 271 * @param charset 272 * the original string charset 273 * @return original string 274 * @throws DecoderException 275 * Thrown if quoted-printable decoding is unsuccessful 276 * @since 1.7 277 */ 278 public String decode(final String str, final Charset charset) throws DecoderException { 279 if (str == null) { 280 return null; 281 } 282 return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset); 283 } 284 285 /** 286 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters 287 * are converted back to their original representation. 288 * 289 * @param str 290 * quoted-printable string to convert into its original form 291 * @param charset 292 * the original string charset 293 * @return original string 294 * @throws DecoderException 295 * Thrown if quoted-printable decoding is unsuccessful 296 * @throws UnsupportedEncodingException 297 * Thrown if charset is not supported 298 */ 299 public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException { 300 if (str == null) { 301 return null; 302 } 303 return new String(decode(StringUtils.getBytesUsAscii(str)), charset); 304 } 305 306 /** 307 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are 308 * converted back to their original representation. 309 * 310 * @param str 311 * quoted-printable string to convert into its original form 312 * @return original string 313 * @throws DecoderException 314 * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported. 315 * @see #getCharset() 316 */ 317 @Override 318 public String decode(final String str) throws DecoderException { 319 return this.decode(str, this.getCharset()); 320 } 321 322 /** 323 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped. 324 * 325 * @param obj 326 * string to convert to a quoted-printable form 327 * @return quoted-printable object 328 * @throws EncoderException 329 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is 330 * unsuccessful 331 */ 332 @Override 333 public Object encode(final Object obj) throws EncoderException { 334 if (obj == null) { 335 return null; 336 } else if (obj instanceof byte[]) { 337 return encode((byte[]) obj); 338 } else if (obj instanceof String) { 339 return encode((String) obj); 340 } else { 341 throw new EncoderException("Objects of type " + 342 obj.getClass().getName() + 343 " cannot be quoted-printable encoded"); 344 } 345 } 346 347 /** 348 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original 349 * representation. 350 * 351 * @param obj 352 * quoted-printable object to convert into its original form 353 * @return original object 354 * @throws DecoderException 355 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure 356 * condition is encountered during the decode process. 357 */ 358 @Override 359 public Object decode(final Object obj) throws DecoderException { 360 if (obj == null) { 361 return null; 362 } else if (obj instanceof byte[]) { 363 return decode((byte[]) obj); 364 } else if (obj instanceof String) { 365 return decode((String) obj); 366 } else { 367 throw new DecoderException("Objects of type " + 368 obj.getClass().getName() + 369 " cannot be quoted-printable decoded"); 370 } 371 } 372 373 /** 374 * Gets the default charset name used for string decoding and encoding. 375 * 376 * @return the default charset name 377 * @since 1.7 378 */ 379 public Charset getCharset() { 380 return this.charset; 381 } 382 383 /** 384 * Gets the default charset name used for string decoding and encoding. 385 * 386 * @return the default charset name 387 */ 388 public String getDefaultCharset() { 389 return this.charset.name(); 390 } 391 392 /** 393 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. 394 * <p> 395 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 396 * RFC 1521 and is suitable for encoding binary data and unformatted text. 397 * 398 * @param str 399 * string to convert to quoted-printable form 400 * @param charset 401 * the charset for str 402 * @return quoted-printable string 403 * @since 1.7 404 */ 405 public String encode(final String str, final Charset charset) { 406 if (str == null) { 407 return null; 408 } 409 return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset))); 410 } 411 412 /** 413 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped. 414 * <p> 415 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in 416 * RFC 1521 and is suitable for encoding binary data and unformatted text. 417 * 418 * @param str 419 * string to convert to quoted-printable form 420 * @param charset 421 * the charset for str 422 * @return quoted-printable string 423 * @throws UnsupportedEncodingException 424 * Thrown if the charset is not supported 425 */ 426 public String encode(final String str, final String charset) throws UnsupportedEncodingException { 427 if (str == null) { 428 return null; 429 } 430 return StringUtils.newStringUsAscii(encode(str.getBytes(charset))); 431 } 432}