001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.net;
019    
020    import java.io.UnsupportedEncodingException;
021    import java.util.BitSet;
022    
023    import org.apache.commons.codec.CharEncoding;
024    import org.apache.commons.codec.DecoderException;
025    import org.apache.commons.codec.EncoderException;
026    import org.apache.commons.codec.StringDecoder;
027    import org.apache.commons.codec.StringEncoder;
028    
029    /**
030     * <p>
031     * Similar to the Quoted-Printable content-transfer-encoding defined in <a
032     * href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
033     * characters to be decipherable on an ASCII terminal without decoding.
034     * </p>
035     * 
036     * <p>
037     * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
038     * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
039     * handling software.
040     * </p>
041     * 
042     * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
043     *          Header Extensions for Non-ASCII Text</a>
044     * 
045     * @author Apache Software Foundation
046     * @since 1.3
047     * @version $Id: QCodec.java 1170351 2011-09-13 21:09:09Z ggregory $
048     */
049    public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
050        /**
051         * The default charset used for string decoding and encoding.
052         */
053        private final String charset;
054    
055        /**
056         * BitSet of printable characters as defined in RFC 1522.
057         */
058        private static final BitSet PRINTABLE_CHARS = new BitSet(256);
059        // Static initializer for printable chars collection
060        static {
061            // alpha characters
062            PRINTABLE_CHARS.set(' ');
063            PRINTABLE_CHARS.set('!');
064            PRINTABLE_CHARS.set('"');
065            PRINTABLE_CHARS.set('#');
066            PRINTABLE_CHARS.set('$');
067            PRINTABLE_CHARS.set('%');
068            PRINTABLE_CHARS.set('&');
069            PRINTABLE_CHARS.set('\'');
070            PRINTABLE_CHARS.set('(');
071            PRINTABLE_CHARS.set(')');
072            PRINTABLE_CHARS.set('*');
073            PRINTABLE_CHARS.set('+');
074            PRINTABLE_CHARS.set(',');
075            PRINTABLE_CHARS.set('-');
076            PRINTABLE_CHARS.set('.');
077            PRINTABLE_CHARS.set('/');
078            for (int i = '0'; i <= '9'; i++) {
079                PRINTABLE_CHARS.set(i);
080            }
081            PRINTABLE_CHARS.set(':');
082            PRINTABLE_CHARS.set(';');
083            PRINTABLE_CHARS.set('<');
084            PRINTABLE_CHARS.set('>');
085            PRINTABLE_CHARS.set('@');
086            for (int i = 'A'; i <= 'Z'; i++) {
087                PRINTABLE_CHARS.set(i);
088            }
089            PRINTABLE_CHARS.set('[');
090            PRINTABLE_CHARS.set('\\');
091            PRINTABLE_CHARS.set(']');
092            PRINTABLE_CHARS.set('^');
093            PRINTABLE_CHARS.set('`');
094            for (int i = 'a'; i <= 'z'; i++) {
095                PRINTABLE_CHARS.set(i);
096            }
097            PRINTABLE_CHARS.set('{');
098            PRINTABLE_CHARS.set('|');
099            PRINTABLE_CHARS.set('}');
100            PRINTABLE_CHARS.set('~');
101        }
102    
103        private static final byte BLANK = 32;
104    
105        private static final byte UNDERSCORE = 95;
106    
107        private boolean encodeBlanks = false;
108    
109        /**
110         * Default constructor.
111         */
112        public QCodec() {
113            this(CharEncoding.UTF_8);
114        }
115    
116        /**
117         * Constructor which allows for the selection of a default charset
118         * 
119         * @param charset
120         *                  the default string charset to use.
121         * 
122         * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
123         */
124        public QCodec(final String charset) {
125            super();
126            this.charset = charset;
127        }
128    
129        @Override
130        protected String getEncoding() {
131            return "Q";
132        }
133    
134        @Override
135        protected byte[] doEncoding(byte[] bytes) {
136            if (bytes == null) {
137                return null;
138            }
139            byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
140            if (this.encodeBlanks) {
141                for (int i = 0; i < data.length; i++) {
142                    if (data[i] == BLANK) {
143                        data[i] = UNDERSCORE;
144                    }
145                }
146            }
147            return data;
148        }
149    
150        @Override
151        protected byte[] doDecoding(byte[] bytes) throws DecoderException {
152            if (bytes == null) {
153                return null;
154            }
155            boolean hasUnderscores = false;
156            for (byte b : bytes) {
157                if (b == UNDERSCORE) {
158                    hasUnderscores = true;
159                    break;
160                }
161            }
162            if (hasUnderscores) {
163                byte[] tmp = new byte[bytes.length];
164                for (int i = 0; i < bytes.length; i++) {
165                    byte b = bytes[i];
166                    if (b != UNDERSCORE) {
167                        tmp[i] = b;
168                    } else {
169                        tmp[i] = BLANK;
170                    }
171                }
172                return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
173            } 
174            return QuotedPrintableCodec.decodeQuotedPrintable(bytes);       
175        }
176    
177        /**
178         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
179         * 
180         * @param pString
181         *                  string to convert to quoted-printable form
182         * @param charset
183         *                  the charset for pString
184         * @return quoted-printable string
185         * 
186         * @throws EncoderException
187         *                  thrown if a failure condition is encountered during the encoding process.
188         */
189        public String encode(final String pString, final String charset) throws EncoderException {
190            if (pString == null) {
191                return null;
192            }
193            try {
194                return encodeText(pString, charset);
195            } catch (UnsupportedEncodingException e) {
196                throw new EncoderException(e.getMessage(), e);
197            }
198        }
199    
200        /**
201         * Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped.
202         * 
203         * @param pString
204         *                  string to convert to quoted-printable form
205         * @return quoted-printable string
206         * 
207         * @throws EncoderException
208         *                  thrown if a failure condition is encountered during the encoding process.
209         */
210        public String encode(String pString) throws EncoderException {
211            if (pString == null) {
212                return null;
213            }
214            return encode(pString, getDefaultCharset());
215        }
216    
217        /**
218         * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
219         * representation.
220         * 
221         * @param pString
222         *                  quoted-printable string to convert into its original form
223         * 
224         * @return original string
225         * 
226         * @throws DecoderException
227         *                  A decoder exception is thrown if a failure condition is encountered during the decode process.
228         */
229        public String decode(String pString) throws DecoderException {
230            if (pString == null) {
231                return null;
232            }
233            try {
234                return decodeText(pString);
235            } catch (UnsupportedEncodingException e) {
236                throw new DecoderException(e.getMessage(), e);
237            }
238        }
239    
240        /**
241         * Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped.
242         * 
243         * @param pObject
244         *                  object to convert to quoted-printable form
245         * @return quoted-printable object
246         * 
247         * @throws EncoderException
248         *                  thrown if a failure condition is encountered during the encoding process.
249         */
250        public Object encode(Object pObject) throws EncoderException {
251            if (pObject == null) {
252                return null;
253            } else if (pObject instanceof String) {
254                return encode((String) pObject);
255            } else {
256                throw new EncoderException("Objects of type " + 
257                      pObject.getClass().getName() + 
258                      " cannot be encoded using Q codec");
259            }
260        }
261    
262        /**
263         * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
264         * representation.
265         * 
266         * @param pObject
267         *                  quoted-printable object to convert into its original form
268         * 
269         * @return original object
270         * 
271         * @throws DecoderException
272         *                  Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is
273         *                  encountered during the decode process.
274         */
275        public Object decode(Object pObject) throws DecoderException {
276            if (pObject == null) {
277                return null;
278            } else if (pObject instanceof String) {
279                return decode((String) pObject);
280            } else {
281                throw new DecoderException("Objects of type " + 
282                      pObject.getClass().getName() + 
283                      " cannot be decoded using Q codec");
284            }
285        }
286    
287        /**
288         * The default charset used for string decoding and encoding.
289         * 
290         * @return the default string charset.
291         */
292        public String getDefaultCharset() {
293            return this.charset;
294        }
295    
296        /**
297         * Tests if optional transformation of SPACE characters is to be used
298         * 
299         * @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
300         */
301        public boolean isEncodeBlanks() {
302            return this.encodeBlanks;
303        }
304    
305        /**
306         * Defines whether optional transformation of SPACE characters is to be used
307         * 
308         * @param b
309         *                  <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
310         */
311        public void setEncodeBlanks(boolean b) {
312            this.encodeBlanks = b;
313        }
314    }