001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    
018    package org.apache.commons.codec.net;
019    
020    import java.io.UnsupportedEncodingException;
021    import java.nio.charset.Charset;
022    import java.util.BitSet;
023    
024    import org.apache.commons.codec.Charsets;
025    import org.apache.commons.codec.DecoderException;
026    import org.apache.commons.codec.EncoderException;
027    import org.apache.commons.codec.StringDecoder;
028    import org.apache.commons.codec.StringEncoder;
029    
030    /**
031     * Similar to the Quoted-Printable content-transfer-encoding defined in
032     * <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
033     * characters to be decipherable on an ASCII terminal without decoding.
034     * <p>
035     * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
036     * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
037     * handling software.
038     * <p>
039     * This class is conditionally thread-safe.
040     * The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)}
041     * but is not volatile, and accesses are not synchronised.
042     * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
043     * is used to ensure safe publication of the value between threads, and must not invoke
044     * {@link #setEncodeBlanks(boolean)} after initial setup.
045     *
046     * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
047     *          Header Extensions for Non-ASCII Text</a>
048     *
049     * @since 1.3
050     * @version $Id: QCodec.html 889935 2013-12-11 05:05:13Z ggregory $
051     */
052    public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
053        /**
054         * The default charset used for string decoding and encoding.
055         */
056        private final Charset charset;
057    
058        /**
059         * BitSet of printable characters as defined in RFC 1522.
060         */
061        private static final BitSet PRINTABLE_CHARS = new BitSet(256);
062        // Static initializer for printable chars collection
063        static {
064            // alpha characters
065            PRINTABLE_CHARS.set(' ');
066            PRINTABLE_CHARS.set('!');
067            PRINTABLE_CHARS.set('"');
068            PRINTABLE_CHARS.set('#');
069            PRINTABLE_CHARS.set('$');
070            PRINTABLE_CHARS.set('%');
071            PRINTABLE_CHARS.set('&');
072            PRINTABLE_CHARS.set('\'');
073            PRINTABLE_CHARS.set('(');
074            PRINTABLE_CHARS.set(')');
075            PRINTABLE_CHARS.set('*');
076            PRINTABLE_CHARS.set('+');
077            PRINTABLE_CHARS.set(',');
078            PRINTABLE_CHARS.set('-');
079            PRINTABLE_CHARS.set('.');
080            PRINTABLE_CHARS.set('/');
081            for (int i = '0'; i <= '9'; i++) {
082                PRINTABLE_CHARS.set(i);
083            }
084            PRINTABLE_CHARS.set(':');
085            PRINTABLE_CHARS.set(';');
086            PRINTABLE_CHARS.set('<');
087            PRINTABLE_CHARS.set('>');
088            PRINTABLE_CHARS.set('@');
089            for (int i = 'A'; i <= 'Z'; i++) {
090                PRINTABLE_CHARS.set(i);
091            }
092            PRINTABLE_CHARS.set('[');
093            PRINTABLE_CHARS.set('\\');
094            PRINTABLE_CHARS.set(']');
095            PRINTABLE_CHARS.set('^');
096            PRINTABLE_CHARS.set('`');
097            for (int i = 'a'; i <= 'z'; i++) {
098                PRINTABLE_CHARS.set(i);
099            }
100            PRINTABLE_CHARS.set('{');
101            PRINTABLE_CHARS.set('|');
102            PRINTABLE_CHARS.set('}');
103            PRINTABLE_CHARS.set('~');
104        }
105    
106        private static final byte BLANK = 32;
107    
108        private static final byte UNDERSCORE = 95;
109    
110        private boolean encodeBlanks = false;
111    
112        /**
113         * Default constructor.
114         */
115        public QCodec() {
116            this(Charsets.UTF_8);
117        }
118    
119        /**
120         * Constructor which allows for the selection of a default charset.
121         *
122         * @param charset
123         *            the default string charset to use.
124         *
125         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126         * @since 1.7
127         */
128        public QCodec(final Charset charset) {
129            super();
130            this.charset = charset;
131        }
132    
133        /**
134         * Constructor which allows for the selection of a default charset.
135         *
136         * @param charsetName
137         *            the charset to use.
138         * @throws java.nio.charset.UnsupportedCharsetException
139         *             If the named charset is unavailable
140         * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
141         * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
142         */
143        public QCodec(final String charsetName) {
144            this(Charset.forName(charsetName));
145        }
146    
147        @Override
148        protected String getEncoding() {
149            return "Q";
150        }
151    
152        @Override
153        protected byte[] doEncoding(final byte[] bytes) {
154            if (bytes == null) {
155                return null;
156            }
157            final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
158            if (this.encodeBlanks) {
159                for (int i = 0; i < data.length; i++) {
160                    if (data[i] == BLANK) {
161                        data[i] = UNDERSCORE;
162                    }
163                }
164            }
165            return data;
166        }
167    
168        @Override
169        protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
170            if (bytes == null) {
171                return null;
172            }
173            boolean hasUnderscores = false;
174            for (final byte b : bytes) {
175                if (b == UNDERSCORE) {
176                    hasUnderscores = true;
177                    break;
178                }
179            }
180            if (hasUnderscores) {
181                final byte[] tmp = new byte[bytes.length];
182                for (int i = 0; i < bytes.length; i++) {
183                    final byte b = bytes[i];
184                    if (b != UNDERSCORE) {
185                        tmp[i] = b;
186                    } else {
187                        tmp[i] = BLANK;
188                    }
189                }
190                return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
191            }
192            return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
193        }
194    
195        /**
196         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
197         *
198         * @param str
199         *            string to convert to quoted-printable form
200         * @param charset
201         *            the charset for str
202         * @return quoted-printable string
203         * @throws EncoderException
204         *             thrown if a failure condition is encountered during the encoding process.
205         * @since 1.7
206         */
207        public String encode(final String str, final Charset charset) throws EncoderException {
208            if (str == null) {
209                return null;
210            }
211            return encodeText(str, charset);
212        }
213    
214        /**
215         * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
216         *
217         * @param str
218         *            string to convert to quoted-printable form
219         * @param charset
220         *            the charset for str
221         * @return quoted-printable string
222         * @throws EncoderException
223         *             thrown if a failure condition is encountered during the encoding process.
224         */
225        public String encode(final String str, final String charset) throws EncoderException {
226            if (str == null) {
227                return null;
228            }
229            try {
230                return encodeText(str, charset);
231            } catch (final UnsupportedEncodingException e) {
232                throw new EncoderException(e.getMessage(), e);
233            }
234        }
235    
236        /**
237         * Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped.
238         *
239         * @param str
240         *            string to convert to quoted-printable form
241         * @return quoted-printable string
242         * @throws EncoderException
243         *             thrown if a failure condition is encountered during the encoding process.
244         */
245        @Override
246        public String encode(final String str) throws EncoderException {
247            if (str == null) {
248                return null;
249            }
250            return encode(str, getCharset());
251        }
252    
253        /**
254         * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
255         * representation.
256         *
257         * @param str
258         *            quoted-printable string to convert into its original form
259         * @return original string
260         * @throws DecoderException
261         *             A decoder exception is thrown if a failure condition is encountered during the decode process.
262         */
263        @Override
264        public String decode(final String str) throws DecoderException {
265            if (str == null) {
266                return null;
267            }
268            try {
269                return decodeText(str);
270            } catch (final UnsupportedEncodingException e) {
271                throw new DecoderException(e.getMessage(), e);
272            }
273        }
274    
275        /**
276         * Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped.
277         *
278         * @param obj
279         *            object to convert to quoted-printable form
280         * @return quoted-printable object
281         * @throws EncoderException
282         *             thrown if a failure condition is encountered during the encoding process.
283         */
284        @Override
285        public Object encode(final Object obj) throws EncoderException {
286            if (obj == null) {
287                return null;
288            } else if (obj instanceof String) {
289                return encode((String) obj);
290            } else {
291                throw new EncoderException("Objects of type " +
292                      obj.getClass().getName() +
293                      " cannot be encoded using Q codec");
294            }
295        }
296    
297        /**
298         * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
299         * representation.
300         *
301         * @param obj
302         *            quoted-printable object to convert into its original form
303         * @return original object
304         * @throws DecoderException
305         *             Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
306         *             during the decode process.
307         */
308        @Override
309        public Object decode(final Object obj) throws DecoderException {
310            if (obj == null) {
311                return null;
312            } else if (obj instanceof String) {
313                return decode((String) obj);
314            } else {
315                throw new DecoderException("Objects of type " +
316                      obj.getClass().getName() +
317                      " cannot be decoded using Q codec");
318            }
319        }
320    
321        /**
322         * Gets the default charset name used for string decoding and encoding.
323         *
324         * @return the default charset name
325         * @since 1.7
326         */
327        public Charset getCharset() {
328            return this.charset;
329        }
330    
331        /**
332         * Gets the default charset name used for string decoding and encoding.
333         *
334         * @return the default charset name
335         */
336        public String getDefaultCharset() {
337            return this.charset.name();
338        }
339    
340        /**
341         * Tests if optional transformation of SPACE characters is to be used
342         *
343         * @return {@code true} if SPACE characters are to be transformed, {@code false} otherwise
344         */
345        public boolean isEncodeBlanks() {
346            return this.encodeBlanks;
347        }
348    
349        /**
350         * Defines whether optional transformation of SPACE characters is to be used
351         *
352         * @param b
353         *            {@code true} if SPACE characters are to be transformed, {@code false} otherwise
354         */
355        public void setEncodeBlanks(final boolean b) {
356            this.encodeBlanks = b;
357        }
358    }