View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.UnsupportedEncodingException;
22  import java.util.BitSet;
23  
24  import org.apache.commons.codec.BinaryDecoder;
25  import org.apache.commons.codec.BinaryEncoder;
26  import org.apache.commons.codec.CharEncoding;
27  import org.apache.commons.codec.DecoderException;
28  import org.apache.commons.codec.EncoderException;
29  import org.apache.commons.codec.StringDecoder;
30  import org.apache.commons.codec.StringEncoder;
31  import org.apache.commons.codec.binary.StringUtils;
32  
33  /**
34   * <p>
35   * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
36   * </p>
37   * <p>
38   * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
39   * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
40   * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
41   * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
42   * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
43   * gateway.
44   * </p>
45   * 
46   * <p>
47   * Note:
48   * </p>
49   * <p>
50   * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
51   * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
52   * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
53   * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
54   * </p>
55   * 
56   * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
57   *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
58   * 
59   * @author Apache Software Foundation
60   * @since 1.3
61   * @version $Id: QuotedPrintableCodec.java 1157192 2011-08-12 17:27:38Z ggregory $
62   */
63  public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
64      /**
65       * The default charset used for string decoding and encoding.
66       */
67      private final String charset;
68  
69      /**
70       * BitSet of printable characters as defined in RFC 1521.
71       */
72      private static final BitSet PRINTABLE_CHARS = new BitSet(256);
73  
74      private static final byte ESCAPE_CHAR = '=';
75  
76      private static final byte TAB = 9;
77  
78      private static final byte SPACE = 32;
79      // Static initializer for printable chars collection
80      static {
81          // alpha characters
82          for (int i = 33; i <= 60; i++) {
83              PRINTABLE_CHARS.set(i);
84          }
85          for (int i = 62; i <= 126; i++) {
86              PRINTABLE_CHARS.set(i);
87          }
88          PRINTABLE_CHARS.set(TAB);
89          PRINTABLE_CHARS.set(SPACE);
90      }
91  
92      /**
93       * Default constructor.
94       */
95      public QuotedPrintableCodec() {
96          this(CharEncoding.UTF_8);
97      }
98  
99      /**
100      * Constructor which allows for the selection of a default charset
101      * 
102      * @param charset
103      *                  the default string charset to use.
104      */
105     public QuotedPrintableCodec(String charset) {
106         super();
107         this.charset = charset;
108     }
109 
110     /**
111      * Encodes byte into its quoted-printable representation.
112      * 
113      * @param b
114      *                  byte to encode
115      * @param buffer
116      *                  the buffer to write to
117      */
118     private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
119         buffer.write(ESCAPE_CHAR);
120         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
121         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
122         buffer.write(hex1);
123         buffer.write(hex2);
124     }
125 
126     /**
127      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
128      * 
129      * <p>
130      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
131      * RFC 1521 and is suitable for encoding binary data and unformatted text.
132      * </p>
133      * 
134      * @param printable
135      *                  bitset of characters deemed quoted-printable
136      * @param bytes
137      *                  array of bytes to be encoded
138      * @return array of bytes containing quoted-printable data
139      */
140     public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
141         if (bytes == null) {
142             return null;
143         }
144         if (printable == null) {
145             printable = PRINTABLE_CHARS;
146         }
147         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
148         for (byte c : bytes) {
149             int b = c;
150             if (b < 0) {
151                 b = 256 + b;
152             }
153             if (printable.get(b)) {
154                 buffer.write(b);
155             } else {
156                 encodeQuotedPrintable(b, buffer);
157             }
158         }
159         return buffer.toByteArray();
160     }
161 
162     /**
163      * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
164      * back to their original representation.
165      * 
166      * <p>
167      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
168      * RFC 1521.
169      * </p>
170      * 
171      * @param bytes
172      *                  array of quoted-printable characters
173      * @return array of original bytes
174      * @throws DecoderException
175      *                  Thrown if quoted-printable decoding is unsuccessful
176      */
177     public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
178         if (bytes == null) {
179             return null;
180         }
181         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
182         for (int i = 0; i < bytes.length; i++) {
183             int b = bytes[i];
184             if (b == ESCAPE_CHAR) {
185                 try {
186                     int u = Utils.digit16(bytes[++i]);
187                     int l = Utils.digit16(bytes[++i]);
188                     buffer.write((char) ((u << 4) + l));
189                 } catch (ArrayIndexOutOfBoundsException e) {
190                     throw new DecoderException("Invalid quoted-printable encoding", e);
191                 }
192             } else {
193                 buffer.write(b);
194             }
195         }
196         return buffer.toByteArray();
197     }
198 
199     /**
200      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
201      * 
202      * <p>
203      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
204      * RFC 1521 and is suitable for encoding binary data and unformatted text.
205      * </p>
206      * 
207      * @param bytes
208      *                  array of bytes to be encoded
209      * @return array of bytes containing quoted-printable data
210      */
211     public byte[] encode(byte[] bytes) {
212         return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
213     }
214 
215     /**
216      * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
217      * back to their original representation.
218      * 
219      * <p>
220      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
221      * RFC 1521.
222      * </p>
223      * 
224      * @param bytes
225      *                  array of quoted-printable characters
226      * @return array of original bytes
227      * @throws DecoderException
228      *                  Thrown if quoted-printable decoding is unsuccessful
229      */
230     public byte[] decode(byte[] bytes) throws DecoderException {
231         return decodeQuotedPrintable(bytes);
232     }
233 
234     /**
235      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
236      * 
237      * <p>
238      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
239      * RFC 1521 and is suitable for encoding binary data.
240      * </p>
241      * 
242      * @param pString
243      *                  string to convert to quoted-printable form
244      * @return quoted-printable string
245      * 
246      * @throws EncoderException
247      *                  Thrown if quoted-printable encoding is unsuccessful
248      * 
249      * @see #getDefaultCharset()
250      */
251     public String encode(String pString) throws EncoderException {
252         if (pString == null) {
253             return null;
254         }
255         try {
256             return encode(pString, getDefaultCharset());
257         } catch (UnsupportedEncodingException e) {
258             throw new EncoderException(e.getMessage(), e);
259         }
260     }
261 
262     /**
263      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
264      * are converted back to their original representation.
265      * 
266      * @param pString
267      *                  quoted-printable string to convert into its original form
268      * @param charset
269      *                  the original string charset
270      * @return original string
271      * @throws DecoderException
272      *                  Thrown if quoted-printable decoding is unsuccessful
273      * @throws UnsupportedEncodingException
274      *                  Thrown if charset is not supported
275      */
276     public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
277         if (pString == null) {
278             return null;
279         }
280         return new String(decode(StringUtils.getBytesUsAscii(pString)), charset);
281     }
282 
283     /**
284      * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
285      * converted back to their original representation.
286      * 
287      * @param pString
288      *                  quoted-printable string to convert into its original form
289      * @return original string
290      * @throws DecoderException
291      *                  Thrown if quoted-printable decoding is unsuccessful.
292      *                  Thrown if charset is not supported.
293      * @see #getDefaultCharset()
294      */
295     public String decode(String pString) throws DecoderException {
296         if (pString == null) {
297             return null;
298         }
299         try {
300             return decode(pString, getDefaultCharset());
301         } catch (UnsupportedEncodingException e) {
302             throw new DecoderException(e.getMessage(), e);
303         }
304     }
305 
306     /**
307      * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
308      * 
309      * @param pObject
310      *                  string to convert to a quoted-printable form
311      * @return quoted-printable object
312      * @throws EncoderException
313      *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
314      *                  unsuccessful
315      */
316     public Object encode(Object pObject) throws EncoderException {
317         if (pObject == null) {
318             return null;
319         } else if (pObject instanceof byte[]) {
320             return encode((byte[]) pObject);
321         } else if (pObject instanceof String) {
322             return encode((String) pObject);
323         } else {
324             throw new EncoderException("Objects of type " + 
325                   pObject.getClass().getName() + 
326                   " cannot be quoted-printable encoded");
327         }
328     }
329 
330     /**
331      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
332      * representation.
333      * 
334      * @param pObject
335      *                  quoted-printable object to convert into its original form
336      * @return original object
337      * @throws DecoderException
338      *                  Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
339      *                  encountered during the decode process.
340      */
341     public Object decode(Object pObject) throws DecoderException {
342         if (pObject == null) {
343             return null;
344         } else if (pObject instanceof byte[]) {
345             return decode((byte[]) pObject);
346         } else if (pObject instanceof String) {
347             return decode((String) pObject);
348         } else {
349             throw new DecoderException("Objects of type " + 
350                   pObject.getClass().getName() + 
351                   " cannot be quoted-printable decoded");
352         }
353     }
354 
355     /**
356      * Returns the default charset used for string decoding and encoding.
357      * 
358      * @return the default string charset.
359      */
360     public String getDefaultCharset() {
361         return this.charset;
362     }
363 
364     /**
365      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
366      * 
367      * <p>
368      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
369      * RFC 1521 and is suitable for encoding binary data and unformatted text.
370      * </p>
371      * 
372      * @param pString
373      *                  string to convert to quoted-printable form
374      * @param charset
375      *                  the charset for pString
376      * @return quoted-printable string
377      * 
378      * @throws UnsupportedEncodingException
379      *                  Thrown if the charset is not supported
380      */
381     public String encode(String pString, String charset) throws UnsupportedEncodingException {
382         if (pString == null) {
383             return null;
384         }
385         return StringUtils.newStringUsAscii(encode(pString.getBytes(charset)));
386     }
387 }