001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.net;
019    
020    import java.io.ByteArrayOutputStream;
021    import java.io.UnsupportedEncodingException;
022    import java.nio.charset.Charset;
023    import java.nio.charset.IllegalCharsetNameException;
024    import java.nio.charset.UnsupportedCharsetException;
025    import java.util.BitSet;
026    
027    import org.apache.commons.codec.BinaryDecoder;
028    import org.apache.commons.codec.BinaryEncoder;
029    import org.apache.commons.codec.Charsets;
030    import org.apache.commons.codec.DecoderException;
031    import org.apache.commons.codec.EncoderException;
032    import org.apache.commons.codec.StringDecoder;
033    import org.apache.commons.codec.StringEncoder;
034    import org.apache.commons.codec.binary.StringUtils;
035    
036    /**
037     * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
038     * <p>
039     * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
040     * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
041     * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
042     * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
043     * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
044     * gateway.
045     * <p>
046     * Note:
047     * <p>
048     * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
049     * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
050     * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
051     * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
052     * <p>
053     * This class is immutable and thread-safe.
054     *
055     * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
056     *          Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
057     *
058     * @since 1.3
059     * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $
060     */
061    public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
062        /**
063         * The default charset used for string decoding and encoding.
064         */
065        private final Charset charset;
066    
067        /**
068         * BitSet of printable characters as defined in RFC 1521.
069         */
070        private static final BitSet PRINTABLE_CHARS = new BitSet(256);
071    
072        private static final byte ESCAPE_CHAR = '=';
073    
074        private static final byte TAB = 9;
075    
076        private static final byte SPACE = 32;
077        // Static initializer for printable chars collection
078        static {
079            // alpha characters
080            for (int i = 33; i <= 60; i++) {
081                PRINTABLE_CHARS.set(i);
082            }
083            for (int i = 62; i <= 126; i++) {
084                PRINTABLE_CHARS.set(i);
085            }
086            PRINTABLE_CHARS.set(TAB);
087            PRINTABLE_CHARS.set(SPACE);
088        }
089    
090        /**
091         * Default constructor, assumes default charset of {@link Charsets#UTF_8}
092         */
093        public QuotedPrintableCodec() {
094            this(Charsets.UTF_8);
095        }
096    
097        /**
098         * Constructor which allows for the selection of a default charset.
099         *
100         * @param charset
101         *            the default string charset to use.
102         * @since 1.7
103         */
104        public QuotedPrintableCodec(final Charset charset) {
105            this.charset = charset;
106        }
107    
108        /**
109         * Constructor which allows for the selection of a default charset.
110         *
111         * @param charsetName
112         *            the default string charset to use.
113         * @throws UnsupportedCharsetException
114         *             If no support for the named charset is available
115         *             in this instance of the Java virtual machine
116         * @throws IllegalArgumentException
117         *             If the given charsetName is null
118         * @throws IllegalCharsetNameException
119         *             If the given charset name is illegal
120         *
121         * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
122         */
123        public QuotedPrintableCodec(final String charsetName)
124                throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
125            this(Charset.forName(charsetName));
126        }
127    
128        /**
129         * Encodes byte into its quoted-printable representation.
130         *
131         * @param b
132         *            byte to encode
133         * @param buffer
134         *            the buffer to write to
135         */
136        private static final void encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) {
137            buffer.write(ESCAPE_CHAR);
138            final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
139            final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
140            buffer.write(hex1);
141            buffer.write(hex2);
142        }
143    
144        /**
145         * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
146         * <p>
147         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
148         * RFC 1521 and is suitable for encoding binary data and unformatted text.
149         *
150         * @param printable
151         *            bitset of characters deemed quoted-printable
152         * @param bytes
153         *            array of bytes to be encoded
154         * @return array of bytes containing quoted-printable data
155         */
156        public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) {
157            if (bytes == null) {
158                return null;
159            }
160            if (printable == null) {
161                printable = PRINTABLE_CHARS;
162            }
163            final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
164            for (final byte c : bytes) {
165                int b = c;
166                if (b < 0) {
167                    b = 256 + b;
168                }
169                if (printable.get(b)) {
170                    buffer.write(b);
171                } else {
172                    encodeQuotedPrintable(b, buffer);
173                }
174            }
175            return buffer.toByteArray();
176        }
177    
178        /**
179         * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
180         * back to their original representation.
181         * <p>
182         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
183         * RFC 1521.
184         *
185         * @param bytes
186         *            array of quoted-printable characters
187         * @return array of original bytes
188         * @throws DecoderException
189         *             Thrown if quoted-printable decoding is unsuccessful
190         */
191        public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException {
192            if (bytes == null) {
193                return null;
194            }
195            final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
196            for (int i = 0; i < bytes.length; i++) {
197                final int b = bytes[i];
198                if (b == ESCAPE_CHAR) {
199                    try {
200                        final int u = Utils.digit16(bytes[++i]);
201                        final int l = Utils.digit16(bytes[++i]);
202                        buffer.write((char) ((u << 4) + l));
203                    } catch (final ArrayIndexOutOfBoundsException e) {
204                        throw new DecoderException("Invalid quoted-printable encoding", e);
205                    }
206                } else {
207                    buffer.write(b);
208                }
209            }
210            return buffer.toByteArray();
211        }
212    
213        /**
214         * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
215         * <p>
216         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
217         * RFC 1521 and is suitable for encoding binary data and unformatted text.
218         *
219         * @param bytes
220         *            array of bytes to be encoded
221         * @return array of bytes containing quoted-printable data
222         */
223        @Override
224        public byte[] encode(final byte[] bytes) {
225            return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
226        }
227    
228        /**
229         * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
230         * back to their original representation.
231         * <p>
232         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
233         * RFC 1521.
234         *
235         * @param bytes
236         *            array of quoted-printable characters
237         * @return array of original bytes
238         * @throws DecoderException
239         *             Thrown if quoted-printable decoding is unsuccessful
240         */
241        @Override
242        public byte[] decode(final byte[] bytes) throws DecoderException {
243            return decodeQuotedPrintable(bytes);
244        }
245    
246        /**
247         * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
248         * <p>
249         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
250         * RFC 1521 and is suitable for encoding binary data.
251         *
252         * @param str
253         *            string to convert to quoted-printable form
254         * @return quoted-printable string
255         * @throws EncoderException
256         *             Thrown if quoted-printable encoding is unsuccessful
257         *
258         * @see #getCharset()
259         */
260        @Override
261        public String encode(final String str) throws EncoderException {
262            return this.encode(str, getCharset());
263        }
264    
265        /**
266         * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
267         * are converted back to their original representation.
268         *
269         * @param str
270         *            quoted-printable string to convert into its original form
271         * @param charset
272         *            the original string charset
273         * @return original string
274         * @throws DecoderException
275         *             Thrown if quoted-printable decoding is unsuccessful
276         * @since 1.7
277         */
278        public String decode(final String str, final Charset charset) throws DecoderException {
279            if (str == null) {
280                return null;
281            }
282            return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
283        }
284    
285        /**
286         * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
287         * are converted back to their original representation.
288         *
289         * @param str
290         *            quoted-printable string to convert into its original form
291         * @param charset
292         *            the original string charset
293         * @return original string
294         * @throws DecoderException
295         *             Thrown if quoted-printable decoding is unsuccessful
296         * @throws UnsupportedEncodingException
297         *             Thrown if charset is not supported
298         */
299        public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
300            if (str == null) {
301                return null;
302            }
303            return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
304        }
305    
306        /**
307         * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
308         * converted back to their original representation.
309         *
310         * @param str
311         *            quoted-printable string to convert into its original form
312         * @return original string
313         * @throws DecoderException
314         *             Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
315         * @see #getCharset()
316         */
317        @Override
318        public String decode(final String str) throws DecoderException {
319            return this.decode(str, this.getCharset());
320        }
321    
322        /**
323         * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
324         *
325         * @param obj
326         *            string to convert to a quoted-printable form
327         * @return quoted-printable object
328         * @throws EncoderException
329         *             Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
330         *             unsuccessful
331         */
332        @Override
333        public Object encode(final Object obj) throws EncoderException {
334            if (obj == null) {
335                return null;
336            } else if (obj instanceof byte[]) {
337                return encode((byte[]) obj);
338            } else if (obj instanceof String) {
339                return encode((String) obj);
340            } else {
341                throw new EncoderException("Objects of type " +
342                      obj.getClass().getName() +
343                      " cannot be quoted-printable encoded");
344            }
345        }
346    
347        /**
348         * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
349         * representation.
350         *
351         * @param obj
352         *            quoted-printable object to convert into its original form
353         * @return original object
354         * @throws DecoderException
355         *             Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
356         *             condition is encountered during the decode process.
357         */
358        @Override
359        public Object decode(final Object obj) throws DecoderException {
360            if (obj == null) {
361                return null;
362            } else if (obj instanceof byte[]) {
363                return decode((byte[]) obj);
364            } else if (obj instanceof String) {
365                return decode((String) obj);
366            } else {
367                throw new DecoderException("Objects of type " +
368                      obj.getClass().getName() +
369                      " cannot be quoted-printable decoded");
370            }
371        }
372    
373        /**
374         * Gets the default charset name used for string decoding and encoding.
375         *
376         * @return the default charset name
377         * @since 1.7
378         */
379        public Charset getCharset() {
380            return this.charset;
381        }
382    
383        /**
384         * Gets the default charset name used for string decoding and encoding.
385         *
386         * @return the default charset name
387         */
388        public String getDefaultCharset() {
389            return this.charset.name();
390        }
391    
392        /**
393         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
394         * <p>
395         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
396         * RFC 1521 and is suitable for encoding binary data and unformatted text.
397         *
398         * @param str
399         *            string to convert to quoted-printable form
400         * @param charset
401         *            the charset for str
402         * @return quoted-printable string
403         * @since 1.7
404         */
405        public String encode(final String str, final Charset charset) {
406            if (str == null) {
407                return null;
408            }
409            return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
410        }
411    
412        /**
413         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
414         * <p>
415         * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
416         * RFC 1521 and is suitable for encoding binary data and unformatted text.
417         *
418         * @param str
419         *            string to convert to quoted-printable form
420         * @param charset
421         *            the charset for str
422         * @return quoted-printable string
423         * @throws UnsupportedEncodingException
424         *             Thrown if the charset is not supported
425         */
426        public String encode(final String str, final String charset) throws UnsupportedEncodingException {
427            if (str == null) {
428                return null;
429            }
430            return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
431        }
432    }