001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.net;
019
020 import java.io.ByteArrayOutputStream;
021 import java.io.UnsupportedEncodingException;
022 import java.nio.charset.Charset;
023 import java.util.BitSet;
024
025 import org.apache.commons.codec.BinaryDecoder;
026 import org.apache.commons.codec.BinaryEncoder;
027 import org.apache.commons.codec.Charsets;
028 import org.apache.commons.codec.DecoderException;
029 import org.apache.commons.codec.EncoderException;
030 import org.apache.commons.codec.StringDecoder;
031 import org.apache.commons.codec.StringEncoder;
032 import org.apache.commons.codec.binary.StringUtils;
033
034 /**
035 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
036 * <p>
037 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
038 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
039 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
040 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
041 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
042 * gateway.
043 * <p>
044 * Note:
045 * <p>
046 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
047 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
048 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
049 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
050 * <p>
051 * This class is immutable and thread-safe.
052 *
053 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
054 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
055 *
056 * @since 1.3
057 * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $
058 */
059 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
060 /**
061 * The default charset used for string decoding and encoding.
062 */
063 private final Charset charset;
064
065 /**
066 * BitSet of printable characters as defined in RFC 1521.
067 */
068 private static final BitSet PRINTABLE_CHARS = new BitSet(256);
069
070 private static final byte ESCAPE_CHAR = '=';
071
072 private static final byte TAB = 9;
073
074 private static final byte SPACE = 32;
075 // Static initializer for printable chars collection
076 static {
077 // alpha characters
078 for (int i = 33; i <= 60; i++) {
079 PRINTABLE_CHARS.set(i);
080 }
081 for (int i = 62; i <= 126; i++) {
082 PRINTABLE_CHARS.set(i);
083 }
084 PRINTABLE_CHARS.set(TAB);
085 PRINTABLE_CHARS.set(SPACE);
086 }
087
088 /**
089 * Default constructor.
090 */
091 public QuotedPrintableCodec() {
092 this(Charsets.UTF_8);
093 }
094
095 /**
096 * Constructor which allows for the selection of a default charset.
097 *
098 * @param charset
099 * the default string charset to use.
100 * @throws UnsupportedCharsetException
101 * If the named charset is unavailable
102 * @since 1.7
103 */
104 public QuotedPrintableCodec(Charset charset) {
105 this.charset = charset;
106 }
107
108 /**
109 * Constructor which allows for the selection of a default charset.
110 *
111 * @param charsetName
112 * the default string charset to use.
113 * @throws java.nio.charset.UnsupportedCharsetException
114 * If the named charset is unavailable
115 * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
116 */
117 public QuotedPrintableCodec(String charsetName) {
118 this(Charset.forName(charsetName));
119 }
120
121 /**
122 * Encodes byte into its quoted-printable representation.
123 *
124 * @param b
125 * byte to encode
126 * @param buffer
127 * the buffer to write to
128 */
129 private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
130 buffer.write(ESCAPE_CHAR);
131 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
132 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
133 buffer.write(hex1);
134 buffer.write(hex2);
135 }
136
137 /**
138 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
139 * <p>
140 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
141 * RFC 1521 and is suitable for encoding binary data and unformatted text.
142 *
143 * @param printable
144 * bitset of characters deemed quoted-printable
145 * @param bytes
146 * array of bytes to be encoded
147 * @return array of bytes containing quoted-printable data
148 */
149 public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
150 if (bytes == null) {
151 return null;
152 }
153 if (printable == null) {
154 printable = PRINTABLE_CHARS;
155 }
156 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
157 for (byte c : bytes) {
158 int b = c;
159 if (b < 0) {
160 b = 256 + b;
161 }
162 if (printable.get(b)) {
163 buffer.write(b);
164 } else {
165 encodeQuotedPrintable(b, buffer);
166 }
167 }
168 return buffer.toByteArray();
169 }
170
171 /**
172 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
173 * back to their original representation.
174 * <p>
175 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
176 * RFC 1521.
177 *
178 * @param bytes
179 * array of quoted-printable characters
180 * @return array of original bytes
181 * @throws DecoderException
182 * Thrown if quoted-printable decoding is unsuccessful
183 */
184 public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
185 if (bytes == null) {
186 return null;
187 }
188 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
189 for (int i = 0; i < bytes.length; i++) {
190 int b = bytes[i];
191 if (b == ESCAPE_CHAR) {
192 try {
193 int u = Utils.digit16(bytes[++i]);
194 int l = Utils.digit16(bytes[++i]);
195 buffer.write((char) ((u << 4) + l));
196 } catch (ArrayIndexOutOfBoundsException e) {
197 throw new DecoderException("Invalid quoted-printable encoding", e);
198 }
199 } else {
200 buffer.write(b);
201 }
202 }
203 return buffer.toByteArray();
204 }
205
206 /**
207 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
208 * <p>
209 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
210 * RFC 1521 and is suitable for encoding binary data and unformatted text.
211 *
212 * @param bytes
213 * array of bytes to be encoded
214 * @return array of bytes containing quoted-printable data
215 */
216 @Override
217 public byte[] encode(byte[] bytes) {
218 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
219 }
220
221 /**
222 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
223 * back to their original representation.
224 * <p>
225 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
226 * RFC 1521.
227 *
228 * @param bytes
229 * array of quoted-printable characters
230 * @return array of original bytes
231 * @throws DecoderException
232 * Thrown if quoted-printable decoding is unsuccessful
233 */
234 @Override
235 public byte[] decode(byte[] bytes) throws DecoderException {
236 return decodeQuotedPrintable(bytes);
237 }
238
239 /**
240 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
241 * <p>
242 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
243 * RFC 1521 and is suitable for encoding binary data.
244 *
245 * @param str
246 * string to convert to quoted-printable form
247 * @return quoted-printable string
248 * @throws EncoderException
249 * Thrown if quoted-printable encoding is unsuccessful
250 *
251 * @see #getCharset()
252 */
253 @Override
254 public String encode(String str) throws EncoderException {
255 return this.encode(str, getCharset());
256 }
257
258 /**
259 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
260 * are converted back to their original representation.
261 *
262 * @param str
263 * quoted-printable string to convert into its original form
264 * @param charset
265 * the original string charset
266 * @return original string
267 * @throws DecoderException
268 * Thrown if quoted-printable decoding is unsuccessful
269 * @since 1.7
270 */
271 public String decode(String str, Charset charset) throws DecoderException {
272 if (str == null) {
273 return null;
274 }
275 return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
276 }
277
278 /**
279 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
280 * are converted back to their original representation.
281 *
282 * @param str
283 * quoted-printable string to convert into its original form
284 * @param charset
285 * the original string charset
286 * @return original string
287 * @throws DecoderException
288 * Thrown if quoted-printable decoding is unsuccessful
289 * @throws UnsupportedEncodingException
290 * Thrown if charset is not supported
291 */
292 public String decode(String str, String charset) throws DecoderException, UnsupportedEncodingException {
293 if (str == null) {
294 return null;
295 }
296 return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
297 }
298
299 /**
300 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
301 * converted back to their original representation.
302 *
303 * @param str
304 * quoted-printable string to convert into its original form
305 * @return original string
306 * @throws DecoderException
307 * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
308 * @see #getCharset()
309 */
310 @Override
311 public String decode(String str) throws DecoderException {
312 return this.decode(str, this.getCharset());
313 }
314
315 /**
316 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
317 *
318 * @param obj
319 * string to convert to a quoted-printable form
320 * @return quoted-printable object
321 * @throws EncoderException
322 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
323 * unsuccessful
324 */
325 @Override
326 public Object encode(Object obj) throws EncoderException {
327 if (obj == null) {
328 return null;
329 } else if (obj instanceof byte[]) {
330 return encode((byte[]) obj);
331 } else if (obj instanceof String) {
332 return encode((String) obj);
333 } else {
334 throw new EncoderException("Objects of type " +
335 obj.getClass().getName() +
336 " cannot be quoted-printable encoded");
337 }
338 }
339
340 /**
341 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
342 * representation.
343 *
344 * @param obj
345 * quoted-printable object to convert into its original form
346 * @return original object
347 * @throws DecoderException
348 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
349 * condition is encountered during the decode process.
350 */
351 @Override
352 public Object decode(Object obj) throws DecoderException {
353 if (obj == null) {
354 return null;
355 } else if (obj instanceof byte[]) {
356 return decode((byte[]) obj);
357 } else if (obj instanceof String) {
358 return decode((String) obj);
359 } else {
360 throw new DecoderException("Objects of type " +
361 obj.getClass().getName() +
362 " cannot be quoted-printable decoded");
363 }
364 }
365
366 /**
367 * Gets the default charset name used for string decoding and encoding.
368 *
369 * @return the default charset name
370 * @since 1.7
371 */
372 public Charset getCharset() {
373 return this.charset;
374 }
375
376 /**
377 * Gets the default charset name used for string decoding and encoding.
378 *
379 * @return the default charset name
380 */
381 public String getDefaultCharset() {
382 return this.charset.name();
383 }
384
385 /**
386 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
387 * <p>
388 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
389 * RFC 1521 and is suitable for encoding binary data and unformatted text.
390 *
391 * @param str
392 * string to convert to quoted-printable form
393 * @param charset
394 * the charset for str
395 * @return quoted-printable string
396 * @since 1.7
397 */
398 public String encode(String str, Charset charset) {
399 if (str == null) {
400 return null;
401 }
402 return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
403 }
404
405 /**
406 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
407 * <p>
408 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
409 * RFC 1521 and is suitable for encoding binary data and unformatted text.
410 *
411 * @param str
412 * string to convert to quoted-printable form
413 * @param charset
414 * the charset for str
415 * @return quoted-printable string
416 * @throws UnsupportedEncodingException
417 * Thrown if the charset is not supported
418 */
419 public String encode(String str, String charset) throws UnsupportedEncodingException {
420 if (str == null) {
421 return null;
422 }
423 return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
424 }
425 }