View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */ 
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.ByteArrayOutputStream;
21  import java.io.UnsupportedEncodingException;
22  import java.util.BitSet;
23  
24  import org.apache.commons.codec.BinaryDecoder;
25  import org.apache.commons.codec.BinaryEncoder;
26  import org.apache.commons.codec.DecoderException;
27  import org.apache.commons.codec.EncoderException;
28  import org.apache.commons.codec.StringDecoder;
29  import org.apache.commons.codec.StringEncoder;
30  
31  /**
32   * <p>
33   * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
34   * </p>
35   * <p>
36   * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
37   * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
38   * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
39   * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
40   * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
41   * gateway.
42   * </p>
43   * 
44   * <p>
45   * Note:
46   * </p>
47   * <p>
48   * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
49   * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
50   * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
51   * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
52   * </p>
53   * 
54   * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
55   *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
56   * 
57   * @author Apache Software Foundation
58   * @since 1.3
59   * @version $Id: QuotedPrintableCodec.java 582446 2007-10-06 04:11:49Z bayard $
60   */
61  public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
62      /**
63       * The default charset used for string decoding and encoding.
64       */
65      private String charset = CharacterEncodingNames.UTF8;
66  
67      /**
68       * BitSet of printable characters as defined in RFC 1521.
69       */
70      private static final BitSet PRINTABLE_CHARS = new BitSet(256);
71  
72      private static final byte ESCAPE_CHAR = '=';
73  
74      private static final byte TAB = 9;
75  
76      private static final byte SPACE = 32;
77      // Static initializer for printable chars collection
78      static {
79          // alpha characters
80          for (int i = 33; i <= 60; i++) {
81              PRINTABLE_CHARS.set(i);
82          }
83          for (int i = 62; i <= 126; i++) {
84              PRINTABLE_CHARS.set(i);
85          }
86          PRINTABLE_CHARS.set(TAB);
87          PRINTABLE_CHARS.set(SPACE);
88      }
89  
90      /**
91       * Default constructor.
92       */
93      public QuotedPrintableCodec() {
94          super();
95      }
96  
97      /**
98       * Constructor which allows for the selection of a default charset
99       * 
100      * @param charset
101      *                  the default string charset to use.
102      */
103     public QuotedPrintableCodec(String charset) {
104         super();
105         this.charset = charset;
106     }
107 
108     /**
109      * Encodes byte into its quoted-printable representation.
110      * 
111      * @param b
112      *                  byte to encode
113      * @param buffer
114      *                  the buffer to write to
115      */
116     private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
117         buffer.write(ESCAPE_CHAR);
118         char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
119         char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
120         buffer.write(hex1);
121         buffer.write(hex2);
122     }
123 
124     /**
125      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
126      * 
127      * <p>
128      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
129      * RFC 1521 and is suitable for encoding binary data and unformatted text.
130      * </p>
131      * 
132      * @param printable
133      *                  bitset of characters deemed quoted-printable
134      * @param bytes
135      *                  array of bytes to be encoded
136      * @return array of bytes containing quoted-printable data
137      */
138     public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
139         if (bytes == null) {
140             return null;
141         }
142         if (printable == null) {
143             printable = PRINTABLE_CHARS;
144         }
145         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
146         for (int i = 0; i < bytes.length; i++) {
147             int b = bytes[i];
148             if (b < 0) {
149                 b = 256 + b;
150             }
151             if (printable.get(b)) {
152                 buffer.write(b);
153             } else {
154                 encodeQuotedPrintable(b, buffer);
155             }
156         }
157         return buffer.toByteArray();
158     }
159 
160     /**
161      * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
162      * back to their original representation.
163      * 
164      * <p>
165      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
166      * RFC 1521.
167      * </p>
168      * 
169      * @param bytes
170      *                  array of quoted-printable characters
171      * @return array of original bytes
172      * @throws DecoderException
173      *                  Thrown if quoted-printable decoding is unsuccessful
174      */
175     public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
176         if (bytes == null) {
177             return null;
178         }
179         ByteArrayOutputStream buffer = new ByteArrayOutputStream();
180         for (int i = 0; i < bytes.length; i++) {
181             int b = bytes[i];
182             if (b == ESCAPE_CHAR) {
183                 try {
184                     int u = Character.digit((char) bytes[++i], 16);
185                     int l = Character.digit((char) bytes[++i], 16);
186                     if (u == -1 || l == -1) {
187                         throw new DecoderException("Invalid quoted-printable encoding");
188                     }
189                     buffer.write((char) ((u << 4) + l));
190                 } catch (ArrayIndexOutOfBoundsException e) {
191                     throw new DecoderException("Invalid quoted-printable encoding");
192                 }
193             } else {
194                 buffer.write(b);
195             }
196         }
197         return buffer.toByteArray();
198     }
199 
200     /**
201      * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
202      * 
203      * <p>
204      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
205      * RFC 1521 and is suitable for encoding binary data and unformatted text.
206      * </p>
207      * 
208      * @param bytes
209      *                  array of bytes to be encoded
210      * @return array of bytes containing quoted-printable data
211      */
212     public byte[] encode(byte[] bytes) {
213         return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
214     }
215 
216     /**
217      * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
218      * back to their original representation.
219      * 
220      * <p>
221      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
222      * RFC 1521.
223      * </p>
224      * 
225      * @param bytes
226      *                  array of quoted-printable characters
227      * @return array of original bytes
228      * @throws DecoderException
229      *                  Thrown if quoted-printable decoding is unsuccessful
230      */
231     public byte[] decode(byte[] bytes) throws DecoderException {
232         return decodeQuotedPrintable(bytes);
233     }
234 
235     /**
236      * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
237      * 
238      * <p>
239      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
240      * RFC 1521 and is suitable for encoding binary data.
241      * </p>
242      * 
243      * @param pString
244      *                  string to convert to quoted-printable form
245      * @return quoted-printable string
246      * 
247      * @throws EncoderException
248      *                  Thrown if quoted-printable encoding is unsuccessful
249      * 
250      * @see #getDefaultCharset()
251      */
252     public String encode(String pString) throws EncoderException {
253         if (pString == null) {
254             return null;
255         }
256         try {
257             return encode(pString, getDefaultCharset());
258         } catch (UnsupportedEncodingException e) {
259             throw new EncoderException(e.getMessage());
260         }
261     }
262 
263     /**
264      * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
265      * are converted back to their original representation.
266      * 
267      * @param pString
268      *                  quoted-printable string to convert into its original form
269      * @param charset
270      *                  the original string charset
271      * @return original string
272      * @throws DecoderException
273      *                  Thrown if quoted-printable decoding is unsuccessful
274      * @throws UnsupportedEncodingException
275      *                  Thrown if charset is not supported
276      */
277     public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
278         if (pString == null) {
279             return null;
280         }
281         return new String(decode(pString.getBytes(CharacterEncodingNames.US_ASCII)), charset);
282     }
283 
284     /**
285      * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
286      * converted back to their original representation.
287      * 
288      * @param pString
289      *                  quoted-printable string to convert into its original form
290      * @return original string
291      * @throws DecoderException
292      *                  Thrown if quoted-printable decoding is unsuccessful.
293      *                  Thrown if charset is not supported.
294      * @see #getDefaultCharset()
295      */
296     public String decode(String pString) throws DecoderException {
297         if (pString == null) {
298             return null;
299         }
300         try {
301             return decode(pString, getDefaultCharset());
302         } catch (UnsupportedEncodingException e) {
303             throw new DecoderException(e.getMessage());
304         }
305     }
306 
307     /**
308      * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
309      * 
310      * @param pObject
311      *                  string to convert to a quoted-printable form
312      * @return quoted-printable object
313      * @throws EncoderException
314      *                  Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
315      *                  unsuccessful
316      */
317     public Object encode(Object pObject) throws EncoderException {
318         if (pObject == null) {
319             return null;
320         } else if (pObject instanceof byte[]) {
321             return encode((byte[]) pObject);
322         } else if (pObject instanceof String) {
323             return encode((String) pObject);
324         } else {
325             throw new EncoderException("Objects of type "
326                 + pObject.getClass().getName()
327                 + " cannot be quoted-printable encoded");
328         }
329     }
330 
331     /**
332      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
333      * representation.
334      * 
335      * @param pObject
336      *                  quoted-printable object to convert into its original form
337      * @return original object
338      * @throws DecoderException
339      *                  Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
340      *                  encountered during the decode process.
341      */
342     public Object decode(Object pObject) throws DecoderException {
343         if (pObject == null) {
344             return null;
345         } else if (pObject instanceof byte[]) {
346             return decode((byte[]) pObject);
347         } else if (pObject instanceof String) {
348             return decode((String) pObject);
349         } else {
350             throw new DecoderException("Objects of type "
351                 + pObject.getClass().getName()
352                 + " cannot be quoted-printable decoded");
353         }
354     }
355 
356     /**
357      * Returns the default charset used for string decoding and encoding.
358      * 
359      * @return the default string charset.
360      */
361     public String getDefaultCharset() {
362         return this.charset;
363     }
364 
365     /**
366      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
367      * 
368      * <p>
369      * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
370      * RFC 1521 and is suitable for encoding binary data and unformatted text.
371      * </p>
372      * 
373      * @param pString
374      *                  string to convert to quoted-printable form
375      * @param charset
376      *                  the charset for pString
377      * @return quoted-printable string
378      * 
379      * @throws UnsupportedEncodingException
380      *                  Thrown if the charset is not supported
381      */
382     public String encode(String pString, String charset) throws UnsupportedEncodingException {
383         if (pString == null) {
384             return null;
385         }
386         return new String(encode(pString.getBytes(charset)), CharacterEncodingNames.US_ASCII);
387     }
388 }