1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.nio.charset.Charset;
23 import java.nio.charset.IllegalCharsetNameException;
24 import java.nio.charset.UnsupportedCharsetException;
25 import java.util.BitSet;
26
27 import org.apache.commons.codec.BinaryDecoder;
28 import org.apache.commons.codec.BinaryEncoder;
29 import org.apache.commons.codec.Charsets;
30 import org.apache.commons.codec.DecoderException;
31 import org.apache.commons.codec.EncoderException;
32 import org.apache.commons.codec.StringDecoder;
33 import org.apache.commons.codec.StringEncoder;
34 import org.apache.commons.codec.binary.StringUtils;
35
36 /**
37 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
38 * <p>
39 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
40 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
41 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
42 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
43 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
44 * gateway.
45 * <p>
46 * Note:
47 * <p>
48 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
49 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
50 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
51 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
52 * <p>
53 * This class is immutable and thread-safe.
54 *
55 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
56 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
57 *
58 * @since 1.3
59 * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $
60 */
61 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
62 /**
63 * The default charset used for string decoding and encoding.
64 */
65 private final Charset charset;
66
67 /**
68 * BitSet of printable characters as defined in RFC 1521.
69 */
70 private static final BitSet PRINTABLE_CHARS = new BitSet(256);
71
72 private static final byte ESCAPE_CHAR = '=';
73
74 private static final byte TAB = 9;
75
76 private static final byte SPACE = 32;
77 // Static initializer for printable chars collection
78 static {
79 // alpha characters
80 for (int i = 33; i <= 60; i++) {
81 PRINTABLE_CHARS.set(i);
82 }
83 for (int i = 62; i <= 126; i++) {
84 PRINTABLE_CHARS.set(i);
85 }
86 PRINTABLE_CHARS.set(TAB);
87 PRINTABLE_CHARS.set(SPACE);
88 }
89
90 /**
91 * Default constructor, assumes default charset of {@link Charsets#UTF_8}
92 */
93 public QuotedPrintableCodec() {
94 this(Charsets.UTF_8);
95 }
96
97 /**
98 * Constructor which allows for the selection of a default charset.
99 *
100 * @param charset
101 * the default string charset to use.
102 * @since 1.7
103 */
104 public QuotedPrintableCodec(final Charset charset) {
105 this.charset = charset;
106 }
107
108 /**
109 * Constructor which allows for the selection of a default charset.
110 *
111 * @param charsetName
112 * the default string charset to use.
113 * @throws UnsupportedCharsetException
114 * If no support for the named charset is available
115 * in this instance of the Java virtual machine
116 * @throws IllegalArgumentException
117 * If the given charsetName is null
118 * @throws IllegalCharsetNameException
119 * If the given charset name is illegal
120 *
121 * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
122 */
123 public QuotedPrintableCodec(final String charsetName)
124 throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
125 this(Charset.forName(charsetName));
126 }
127
128 /**
129 * Encodes byte into its quoted-printable representation.
130 *
131 * @param b
132 * byte to encode
133 * @param buffer
134 * the buffer to write to
135 */
136 private static final void encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) {
137 buffer.write(ESCAPE_CHAR);
138 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
139 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
140 buffer.write(hex1);
141 buffer.write(hex2);
142 }
143
144 /**
145 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
146 * <p>
147 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
148 * RFC 1521 and is suitable for encoding binary data and unformatted text.
149 *
150 * @param printable
151 * bitset of characters deemed quoted-printable
152 * @param bytes
153 * array of bytes to be encoded
154 * @return array of bytes containing quoted-printable data
155 */
156 public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) {
157 if (bytes == null) {
158 return null;
159 }
160 if (printable == null) {
161 printable = PRINTABLE_CHARS;
162 }
163 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
164 for (final byte c : bytes) {
165 int b = c;
166 if (b < 0) {
167 b = 256 + b;
168 }
169 if (printable.get(b)) {
170 buffer.write(b);
171 } else {
172 encodeQuotedPrintable(b, buffer);
173 }
174 }
175 return buffer.toByteArray();
176 }
177
178 /**
179 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
180 * back to their original representation.
181 * <p>
182 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
183 * RFC 1521.
184 *
185 * @param bytes
186 * array of quoted-printable characters
187 * @return array of original bytes
188 * @throws DecoderException
189 * Thrown if quoted-printable decoding is unsuccessful
190 */
191 public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException {
192 if (bytes == null) {
193 return null;
194 }
195 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
196 for (int i = 0; i < bytes.length; i++) {
197 final int b = bytes[i];
198 if (b == ESCAPE_CHAR) {
199 try {
200 final int u = Utils.digit16(bytes[++i]);
201 final int l = Utils.digit16(bytes[++i]);
202 buffer.write((char) ((u << 4) + l));
203 } catch (final ArrayIndexOutOfBoundsException e) {
204 throw new DecoderException("Invalid quoted-printable encoding", e);
205 }
206 } else {
207 buffer.write(b);
208 }
209 }
210 return buffer.toByteArray();
211 }
212
213 /**
214 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
215 * <p>
216 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
217 * RFC 1521 and is suitable for encoding binary data and unformatted text.
218 *
219 * @param bytes
220 * array of bytes to be encoded
221 * @return array of bytes containing quoted-printable data
222 */
223 @Override
224 public byte[] encode(final byte[] bytes) {
225 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
226 }
227
228 /**
229 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
230 * back to their original representation.
231 * <p>
232 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
233 * RFC 1521.
234 *
235 * @param bytes
236 * array of quoted-printable characters
237 * @return array of original bytes
238 * @throws DecoderException
239 * Thrown if quoted-printable decoding is unsuccessful
240 */
241 @Override
242 public byte[] decode(final byte[] bytes) throws DecoderException {
243 return decodeQuotedPrintable(bytes);
244 }
245
246 /**
247 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
248 * <p>
249 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
250 * RFC 1521 and is suitable for encoding binary data.
251 *
252 * @param str
253 * string to convert to quoted-printable form
254 * @return quoted-printable string
255 * @throws EncoderException
256 * Thrown if quoted-printable encoding is unsuccessful
257 *
258 * @see #getCharset()
259 */
260 @Override
261 public String encode(final String str) throws EncoderException {
262 return this.encode(str, getCharset());
263 }
264
265 /**
266 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
267 * are converted back to their original representation.
268 *
269 * @param str
270 * quoted-printable string to convert into its original form
271 * @param charset
272 * the original string charset
273 * @return original string
274 * @throws DecoderException
275 * Thrown if quoted-printable decoding is unsuccessful
276 * @since 1.7
277 */
278 public String decode(final String str, final Charset charset) throws DecoderException {
279 if (str == null) {
280 return null;
281 }
282 return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
283 }
284
285 /**
286 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
287 * are converted back to their original representation.
288 *
289 * @param str
290 * quoted-printable string to convert into its original form
291 * @param charset
292 * the original string charset
293 * @return original string
294 * @throws DecoderException
295 * Thrown if quoted-printable decoding is unsuccessful
296 * @throws UnsupportedEncodingException
297 * Thrown if charset is not supported
298 */
299 public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
300 if (str == null) {
301 return null;
302 }
303 return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
304 }
305
306 /**
307 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
308 * converted back to their original representation.
309 *
310 * @param str
311 * quoted-printable string to convert into its original form
312 * @return original string
313 * @throws DecoderException
314 * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
315 * @see #getCharset()
316 */
317 @Override
318 public String decode(final String str) throws DecoderException {
319 return this.decode(str, this.getCharset());
320 }
321
322 /**
323 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
324 *
325 * @param obj
326 * string to convert to a quoted-printable form
327 * @return quoted-printable object
328 * @throws EncoderException
329 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
330 * unsuccessful
331 */
332 @Override
333 public Object encode(final Object obj) throws EncoderException {
334 if (obj == null) {
335 return null;
336 } else if (obj instanceof byte[]) {
337 return encode((byte[]) obj);
338 } else if (obj instanceof String) {
339 return encode((String) obj);
340 } else {
341 throw new EncoderException("Objects of type " +
342 obj.getClass().getName() +
343 " cannot be quoted-printable encoded");
344 }
345 }
346
347 /**
348 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
349 * representation.
350 *
351 * @param obj
352 * quoted-printable object to convert into its original form
353 * @return original object
354 * @throws DecoderException
355 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
356 * condition is encountered during the decode process.
357 */
358 @Override
359 public Object decode(final Object obj) throws DecoderException {
360 if (obj == null) {
361 return null;
362 } else if (obj instanceof byte[]) {
363 return decode((byte[]) obj);
364 } else if (obj instanceof String) {
365 return decode((String) obj);
366 } else {
367 throw new DecoderException("Objects of type " +
368 obj.getClass().getName() +
369 " cannot be quoted-printable decoded");
370 }
371 }
372
373 /**
374 * Gets the default charset name used for string decoding and encoding.
375 *
376 * @return the default charset name
377 * @since 1.7
378 */
379 public Charset getCharset() {
380 return this.charset;
381 }
382
383 /**
384 * Gets the default charset name used for string decoding and encoding.
385 *
386 * @return the default charset name
387 */
388 public String getDefaultCharset() {
389 return this.charset.name();
390 }
391
392 /**
393 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
394 * <p>
395 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
396 * RFC 1521 and is suitable for encoding binary data and unformatted text.
397 *
398 * @param str
399 * string to convert to quoted-printable form
400 * @param charset
401 * the charset for str
402 * @return quoted-printable string
403 * @since 1.7
404 */
405 public String encode(final String str, final Charset charset) {
406 if (str == null) {
407 return null;
408 }
409 return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
410 }
411
412 /**
413 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
414 * <p>
415 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
416 * RFC 1521 and is suitable for encoding binary data and unformatted text.
417 *
418 * @param str
419 * string to convert to quoted-printable form
420 * @param charset
421 * the charset for str
422 * @return quoted-printable string
423 * @throws UnsupportedEncodingException
424 * Thrown if the charset is not supported
425 */
426 public String encode(final String str, final String charset) throws UnsupportedEncodingException {
427 if (str == null) {
428 return null;
429 }
430 return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
431 }
432 }