001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.net;
019    
020    import java.io.ByteArrayOutputStream;
021    import java.io.UnsupportedEncodingException;
022    import java.nio.charset.Charset;
023    import java.util.BitSet;
024    
025    import org.apache.commons.codec.BinaryDecoder;
026    import org.apache.commons.codec.BinaryEncoder;
027    import org.apache.commons.codec.Charsets;
028    import org.apache.commons.codec.DecoderException;
029    import org.apache.commons.codec.EncoderException;
030    import org.apache.commons.codec.StringDecoder;
031    import org.apache.commons.codec.StringEncoder;
032    import org.apache.commons.codec.binary.StringUtils;
033    
034    /**
035     * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
036     * <p>
037     * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
038     * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
039     * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
040     * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
041     * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
042     * gateway.
043     * <p>
044     * Note:
045     * <p>
046     * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
047     * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
048     * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
049     * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
050     * <p>
051     * This class is immutable and thread-safe.
052     *
053     * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
054     *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
055     *
056     * @since 1.3
057     * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $
058     */
059    public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
060        /**
061         * The default charset used for string decoding and encoding.
062         */
063        private final Charset charset;
064    
065        /**
066         * BitSet of printable characters as defined in RFC 1521.
067         */
068        private static final BitSet PRINTABLE_CHARS = new BitSet(256);
069    
070        private static final byte ESCAPE_CHAR = '=';
071    
072        private static final byte TAB = 9;
073    
074        private static final byte SPACE = 32;
075        // Static initializer for printable chars collection
076        static {
077            // alpha characters
078            for (int i = 33; i <= 60; i++) {
079                PRINTABLE_CHARS.set(i);
080            }
081            for (int i = 62; i <= 126; i++) {
082                PRINTABLE_CHARS.set(i);
083            }
084            PRINTABLE_CHARS.set(TAB);
085            PRINTABLE_CHARS.set(SPACE);
086        }
087    
088        /**
089         * Default constructor.
090         */
091        public QuotedPrintableCodec() {
092            this(Charsets.UTF_8);
093        }
094    
095        /**
096         * Constructor which allows for the selection of a default charset.
097         *
098         * @param charset
099         *            the default string charset to use.
100         * @throws UnsupportedCharsetException
101         *             If the named charset is unavailable
102         * @since 1.7
103         */
104        public QuotedPrintableCodec(Charset charset) {
105            this.charset = charset;
106        }
107    
108        /**
109         * Constructor which allows for the selection of a default charset.
110         *
111         * @param charsetName
112         *            the default string charset to use.
113         * @throws java.nio.charset.UnsupportedCharsetException
114         *             If the named charset is unavailable
115         * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
116         */
117        public QuotedPrintableCodec(String charsetName) {
118            this(Charset.forName(charsetName));
119        }
120    
121        /**
122         * Encodes byte into its quoted-printable representation.
123         *
124         * @param b
125         *            byte to encode
126         * @param buffer
127         *            the buffer to write to
128         */
129        private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
130            buffer.write(ESCAPE_CHAR);
131            char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
132            char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
133            buffer.write(hex1);
134            buffer.write(hex2);
135        }
136    
137        /**
138         * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
139         * <p>
140         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
141         * RFC 1521 and is suitable for encoding binary data and unformatted text.
142         *
143         * @param printable
144         *            bitset of characters deemed quoted-printable
145         * @param bytes
146         *            array of bytes to be encoded
147         * @return array of bytes containing quoted-printable data
148         */
149        public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
150            if (bytes == null) {
151                return null;
152            }
153            if (printable == null) {
154                printable = PRINTABLE_CHARS;
155            }
156            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
157            for (byte c : bytes) {
158                int b = c;
159                if (b < 0) {
160                    b = 256 + b;
161                }
162                if (printable.get(b)) {
163                    buffer.write(b);
164                } else {
165                    encodeQuotedPrintable(b, buffer);
166                }
167            }
168            return buffer.toByteArray();
169        }
170    
171        /**
172         * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
173         * back to their original representation.
174         * <p>
175         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
176         * RFC 1521.
177         *
178         * @param bytes
179         *            array of quoted-printable characters
180         * @return array of original bytes
181         * @throws DecoderException
182         *             Thrown if quoted-printable decoding is unsuccessful
183         */
184        public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
185            if (bytes == null) {
186                return null;
187            }
188            ByteArrayOutputStream buffer = new ByteArrayOutputStream();
189            for (int i = 0; i < bytes.length; i++) {
190                int b = bytes[i];
191                if (b == ESCAPE_CHAR) {
192                    try {
193                        int u = Utils.digit16(bytes[++i]);
194                        int l = Utils.digit16(bytes[++i]);
195                        buffer.write((char) ((u << 4) + l));
196                    } catch (ArrayIndexOutOfBoundsException e) {
197                        throw new DecoderException("Invalid quoted-printable encoding", e);
198                    }
199                } else {
200                    buffer.write(b);
201                }
202            }
203            return buffer.toByteArray();
204        }
205    
206        /**
207         * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
208         * <p>
209         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
210         * RFC 1521 and is suitable for encoding binary data and unformatted text.
211         *
212         * @param bytes
213         *            array of bytes to be encoded
214         * @return array of bytes containing quoted-printable data
215         */
216        @Override
217        public byte[] encode(byte[] bytes) {
218            return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
219        }
220    
221        /**
222         * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
223         * back to their original representation.
224         * <p>
225         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
226         * RFC 1521.
227         *
228         * @param bytes
229         *            array of quoted-printable characters
230         * @return array of original bytes
231         * @throws DecoderException
232         *             Thrown if quoted-printable decoding is unsuccessful
233         */
234        @Override
235        public byte[] decode(byte[] bytes) throws DecoderException {
236            return decodeQuotedPrintable(bytes);
237        }
238    
239        /**
240         * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
241         * <p>
242         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
243         * RFC 1521 and is suitable for encoding binary data.
244         *
245         * @param str
246         *            string to convert to quoted-printable form
247         * @return quoted-printable string
248         * @throws EncoderException
249         *             Thrown if quoted-printable encoding is unsuccessful
250         *
251         * @see #getCharset()
252         */
253        @Override
254        public String encode(String str) throws EncoderException {
255            return this.encode(str, getCharset());
256        }
257    
258        /**
259         * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
260         * are converted back to their original representation.
261         *
262         * @param str
263         *            quoted-printable string to convert into its original form
264         * @param charset
265         *            the original string charset
266         * @return original string
267         * @throws DecoderException
268         *             Thrown if quoted-printable decoding is unsuccessful
269         * @since 1.7
270         */
271        public String decode(String str, Charset charset) throws DecoderException {
272            if (str == null) {
273                return null;
274            }
275            return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
276        }
277    
278        /**
279         * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
280         * are converted back to their original representation.
281         *
282         * @param str
283         *            quoted-printable string to convert into its original form
284         * @param charset
285         *            the original string charset
286         * @return original string
287         * @throws DecoderException
288         *             Thrown if quoted-printable decoding is unsuccessful
289         * @throws UnsupportedEncodingException
290         *             Thrown if charset is not supported
291         */
292        public String decode(String str, String charset) throws DecoderException, UnsupportedEncodingException {
293            if (str == null) {
294                return null;
295            }
296            return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
297        }
298    
299        /**
300         * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
301         * converted back to their original representation.
302         *
303         * @param str
304         *            quoted-printable string to convert into its original form
305         * @return original string
306         * @throws DecoderException
307         *             Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
308         * @see #getCharset()
309         */
310        @Override
311        public String decode(String str) throws DecoderException {
312            return this.decode(str, this.getCharset());
313        }
314    
315        /**
316         * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
317         *
318         * @param obj
319         *            string to convert to a quoted-printable form
320         * @return quoted-printable object
321         * @throws EncoderException
322         *             Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
323         *             unsuccessful
324         */
325        @Override
326        public Object encode(Object obj) throws EncoderException {
327            if (obj == null) {
328                return null;
329            } else if (obj instanceof byte[]) {
330                return encode((byte[]) obj);
331            } else if (obj instanceof String) {
332                return encode((String) obj);
333            } else {
334                throw new EncoderException("Objects of type " +
335                      obj.getClass().getName() +
336                      " cannot be quoted-printable encoded");
337            }
338        }
339    
340        /**
341         * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
342         * representation.
343         *
344         * @param obj
345         *            quoted-printable object to convert into its original form
346         * @return original object
347         * @throws DecoderException
348         *             Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
349         *             condition is encountered during the decode process.
350         */
351        @Override
352        public Object decode(Object obj) throws DecoderException {
353            if (obj == null) {
354                return null;
355            } else if (obj instanceof byte[]) {
356                return decode((byte[]) obj);
357            } else if (obj instanceof String) {
358                return decode((String) obj);
359            } else {
360                throw new DecoderException("Objects of type " +
361                      obj.getClass().getName() +
362                      " cannot be quoted-printable decoded");
363            }
364        }
365    
366        /**
367         * Gets the default charset name used for string decoding and encoding.
368         *
369         * @return the default charset name
370         * @since 1.7
371         */
372        public Charset getCharset() {
373            return this.charset;
374        }
375    
376        /**
377         * Gets the default charset name used for string decoding and encoding.
378         *
379         * @return the default charset name
380         */
381        public String getDefaultCharset() {
382            return this.charset.name();
383        }
384    
385        /**
386         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
387         * <p>
388         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
389         * RFC 1521 and is suitable for encoding binary data and unformatted text.
390         *
391         * @param str
392         *            string to convert to quoted-printable form
393         * @param charset
394         *            the charset for str
395         * @return quoted-printable string
396         * @since 1.7
397         */
398        public String encode(String str, Charset charset) {
399            if (str == null) {
400                return null;
401            }
402            return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
403        }
404    
405        /**
406         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
407         * <p>
408         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
409         * RFC 1521 and is suitable for encoding binary data and unformatted text.
410         *
411         * @param str
412         *            string to convert to quoted-printable form
413         * @param charset
414         *            the charset for str
415         * @return quoted-printable string
416         * @throws UnsupportedEncodingException
417         *             Thrown if the charset is not supported
418         */
419        public String encode(String str, String charset) throws UnsupportedEncodingException {
420            if (str == null) {
421                return null;
422            }
423            return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
424        }
425    }