View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.UnsupportedEncodingException;
22  import java.nio.charset.Charset;
23  import java.util.BitSet;
24  
25  import org.apache.commons.codec.BinaryDecoder;
26  import org.apache.commons.codec.BinaryEncoder;
27  import org.apache.commons.codec.Charsets;
28  import org.apache.commons.codec.DecoderException;
29  import org.apache.commons.codec.EncoderException;
30  import org.apache.commons.codec.StringDecoder;
31  import org.apache.commons.codec.StringEncoder;
32  import org.apache.commons.codec.binary.StringUtils;
33  
34  /**
35   * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
36   * <p>
37   * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
38   * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
39   * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
40   * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
41   * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
42   * gateway.
43   * <p>
44   * Note:
45   * <p>
46   * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
47   * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
48   * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
49   * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
50   * <p>
51   * This class is immutable and thread-safe.
52   *
53   * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
54   *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
55   *
56   * @since 1.3
57   * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $
58   */
59  public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
60      /**
61       * The default charset used for string decoding and encoding.
62       */
63      private final Charset charset;
64  
65      /**
66       * BitSet of printable characters as defined in RFC 1521.
67       */
68      private static final BitSet PRINTABLE_CHARS = new BitSet(256);
69  
70      private static final byte ESCAPE_CHAR = '=';
71  
72      private static final byte TAB = 9;
73  
74      private static final byte SPACE = 32;
75      // Static initializer for printable chars collection
76      static {
77          // alpha characters
78          for (int i = 33; i <= 60; i++) {
79              PRINTABLE_CHARS.set(i);
80          }
81          for (int i = 62; i <= 126; i++) {
82              PRINTABLE_CHARS.set(i);
83          }
84          PRINTABLE_CHARS.set(TAB);
85          PRINTABLE_CHARS.set(SPACE);
86      }
87  
88      /**
89       * Default constructor.
90       */
91      public QuotedPrintableCodec() {
92          this(Charsets.UTF_8);
93      }
94  
95      /**
96       * Constructor which allows for the selection of a default charset.
97       *
98       * @param charset
99       *            the default string charset to use.
100      * @throws UnsupportedCharsetException
101      *             If the named charset is unavailable
102      * @since 1.7
103      */
104     public QuotedPrintableCodec(Charset charset) {
105         this.charset = charset;
106     }
107 
108     /**
109      * Constructor which allows for the selection of a default charset.
110      *
111      * @param charsetName
112      *            the default string charset to use.
113      * @throws java.nio.charset.UnsupportedCharsetException
114      *             If the named charset is unavailable
115      * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
116      */
117     public QuotedPrintableCodec(String charsetName) {
118         this(Charset.forName(charsetName));
119     }
120 
121     /**
122      * Encodes byte into its quoted-printable representation.
123      *
124      * @param b
125      *            byte to encode
126      * @param buffer
127      *            the buffer to write to
128      */
129     private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
130         buffer.write(ESCAPE_CHAR);
131         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
132         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
133         buffer.write(hex1);
134         buffer.write(hex2);
135     }
136 
137     /**
138      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
139      * <p>
140      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
141      * RFC 1521 and is suitable for encoding binary data and unformatted text.
142      *
143      * @param printable
144      *            bitset of characters deemed quoted-printable
145      * @param bytes
146      *            array of bytes to be encoded
147      * @return array of bytes containing quoted-printable data
148      */
149     public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
150         if (bytes == null) {
151             return null;
152         }
153         if (printable == null) {
154             printable = PRINTABLE_CHARS;
155         }
156         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
157         for (byte c : bytes) {
158             int b = c;
159             if (b < 0) {
160                 b = 256 + b;
161             }
162             if (printable.get(b)) {
163                 buffer.write(b);
164             } else {
165                 encodeQuotedPrintable(b, buffer);
166             }
167         }
168         return buffer.toByteArray();
169     }
170 
171     /**
172      * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
173      * back to their original representation.
174      * <p>
175      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
176      * RFC 1521.
177      *
178      * @param bytes
179      *            array of quoted-printable characters
180      * @return array of original bytes
181      * @throws DecoderException
182      *             Thrown if quoted-printable decoding is unsuccessful
183      */
184     public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
185         if (bytes == null) {
186             return null;
187         }
188         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
189         for (int i = 0; i < bytes.length; i++) {
190             int b = bytes[i];
191             if (b == ESCAPE_CHAR) {
192                 try {
193                     int u = Utils.digit16(bytes[++i]);
194                     int l = Utils.digit16(bytes[++i]);
195                     buffer.write((char) ((u << 4) + l));
196                 } catch (ArrayIndexOutOfBoundsException e) {
197                     throw new DecoderException("Invalid quoted-printable encoding", e);
198                 }
199             } else {
200                 buffer.write(b);
201             }
202         }
203         return buffer.toByteArray();
204     }
205 
206     /**
207      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
208      * <p>
209      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
210      * RFC 1521 and is suitable for encoding binary data and unformatted text.
211      *
212      * @param bytes
213      *            array of bytes to be encoded
214      * @return array of bytes containing quoted-printable data
215      */
216     @Override
217     public byte[] encode(byte[] bytes) {
218         return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
219     }
220 
221     /**
222      * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
223      * back to their original representation.
224      * <p>
225      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
226      * RFC 1521.
227      *
228      * @param bytes
229      *            array of quoted-printable characters
230      * @return array of original bytes
231      * @throws DecoderException
232      *             Thrown if quoted-printable decoding is unsuccessful
233      */
234     @Override
235     public byte[] decode(byte[] bytes) throws DecoderException {
236         return decodeQuotedPrintable(bytes);
237     }
238 
239     /**
240      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
241      * <p>
242      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
243      * RFC 1521 and is suitable for encoding binary data.
244      *
245      * @param str
246      *            string to convert to quoted-printable form
247      * @return quoted-printable string
248      * @throws EncoderException
249      *             Thrown if quoted-printable encoding is unsuccessful
250      *
251      * @see #getCharset()
252      */
253     @Override
254     public String encode(String str) throws EncoderException {
255         return this.encode(str, getCharset());
256     }
257 
258     /**
259      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
260      * are converted back to their original representation.
261      *
262      * @param str
263      *            quoted-printable string to convert into its original form
264      * @param charset
265      *            the original string charset
266      * @return original string
267      * @throws DecoderException
268      *             Thrown if quoted-printable decoding is unsuccessful
269      * @since 1.7
270      */
271     public String decode(String str, Charset charset) throws DecoderException {
272         if (str == null) {
273             return null;
274         }
275         return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
276     }
277 
278     /**
279      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
280      * are converted back to their original representation.
281      *
282      * @param str
283      *            quoted-printable string to convert into its original form
284      * @param charset
285      *            the original string charset
286      * @return original string
287      * @throws DecoderException
288      *             Thrown if quoted-printable decoding is unsuccessful
289      * @throws UnsupportedEncodingException
290      *             Thrown if charset is not supported
291      */
292     public String decode(String str, String charset) throws DecoderException, UnsupportedEncodingException {
293         if (str == null) {
294             return null;
295         }
296         return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
297     }
298 
299     /**
300      * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
301      * converted back to their original representation.
302      *
303      * @param str
304      *            quoted-printable string to convert into its original form
305      * @return original string
306      * @throws DecoderException
307      *             Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
308      * @see #getCharset()
309      */
310     @Override
311     public String decode(String str) throws DecoderException {
312         return this.decode(str, this.getCharset());
313     }
314 
315     /**
316      * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
317      *
318      * @param obj
319      *            string to convert to a quoted-printable form
320      * @return quoted-printable object
321      * @throws EncoderException
322      *             Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
323      *             unsuccessful
324      */
325     @Override
326     public Object encode(Object obj) throws EncoderException {
327         if (obj == null) {
328             return null;
329         } else if (obj instanceof byte[]) {
330             return encode((byte[]) obj);
331         } else if (obj instanceof String) {
332             return encode((String) obj);
333         } else {
334             throw new EncoderException("Objects of type " +
335                   obj.getClass().getName() +
336                   " cannot be quoted-printable encoded");
337         }
338     }
339 
340     /**
341      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
342      * representation.
343      *
344      * @param obj
345      *            quoted-printable object to convert into its original form
346      * @return original object
347      * @throws DecoderException
348      *             Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
349      *             condition is encountered during the decode process.
350      */
351     @Override
352     public Object decode(Object obj) throws DecoderException {
353         if (obj == null) {
354             return null;
355         } else if (obj instanceof byte[]) {
356             return decode((byte[]) obj);
357         } else if (obj instanceof String) {
358             return decode((String) obj);
359         } else {
360             throw new DecoderException("Objects of type " +
361                   obj.getClass().getName() +
362                   " cannot be quoted-printable decoded");
363         }
364     }
365 
366     /**
367      * Gets the default charset name used for string decoding and encoding.
368      *
369      * @return the default charset name
370      * @since 1.7
371      */
372     public Charset getCharset() {
373         return this.charset;
374     }
375 
376     /**
377      * Gets the default charset name used for string decoding and encoding.
378      *
379      * @return the default charset name
380      */
381     public String getDefaultCharset() {
382         return this.charset.name();
383     }
384 
385     /**
386      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
387      * <p>
388      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
389      * RFC 1521 and is suitable for encoding binary data and unformatted text.
390      *
391      * @param str
392      *            string to convert to quoted-printable form
393      * @param charset
394      *            the charset for str
395      * @return quoted-printable string
396      * @since 1.7
397      */
398     public String encode(String str, Charset charset) {
399         if (str == null) {
400             return null;
401         }
402         return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
403     }
404 
405     /**
406      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
407      * <p>
408      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
409      * RFC 1521 and is suitable for encoding binary data and unformatted text.
410      *
411      * @param str
412      *            string to convert to quoted-printable form
413      * @param charset
414      *            the charset for str
415      * @return quoted-printable string
416      * @throws UnsupportedEncodingException
417      *             Thrown if the charset is not supported
418      */
419     public String encode(String str, String charset) throws UnsupportedEncodingException {
420         if (str == null) {
421             return null;
422         }
423         return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
424     }
425 }