001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.net;
019
020import java.io.UnsupportedEncodingException;
021import java.nio.charset.Charset;
022import java.util.BitSet;
023
024import org.apache.commons.codec.Charsets;
025import org.apache.commons.codec.DecoderException;
026import org.apache.commons.codec.EncoderException;
027import org.apache.commons.codec.StringDecoder;
028import org.apache.commons.codec.StringEncoder;
029
030/**
031 * Similar to the Quoted-Printable content-transfer-encoding defined in
032 * <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
033 * characters to be decipherable on an ASCII terminal without decoding.
034 * <p>
035 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
036 * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
037 * handling software.
038 * <p>
039 * This class is conditionally thread-safe.
040 * The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)}
041 * but is not volatile, and accesses are not synchronised.
042 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
043 * is used to ensure safe publication of the value between threads, and must not invoke
044 * {@link #setEncodeBlanks(boolean)} after initial setup.
045 *
046 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
047 *          Header Extensions for Non-ASCII Text</a>
048 *
049 * @since 1.3
050 */
051public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
052    /**
053     * The default Charset used for string decoding and encoding.
054     */
055    private final Charset charset;
056
057    /**
058     * BitSet of printable characters as defined in RFC 1522.
059     */
060    private static final BitSet PRINTABLE_CHARS = new BitSet(256);
061    // Static initializer for printable chars collection
062    static {
063        // alpha characters
064        PRINTABLE_CHARS.set(' ');
065        PRINTABLE_CHARS.set('!');
066        PRINTABLE_CHARS.set('"');
067        PRINTABLE_CHARS.set('#');
068        PRINTABLE_CHARS.set('$');
069        PRINTABLE_CHARS.set('%');
070        PRINTABLE_CHARS.set('&');
071        PRINTABLE_CHARS.set('\'');
072        PRINTABLE_CHARS.set('(');
073        PRINTABLE_CHARS.set(')');
074        PRINTABLE_CHARS.set('*');
075        PRINTABLE_CHARS.set('+');
076        PRINTABLE_CHARS.set(',');
077        PRINTABLE_CHARS.set('-');
078        PRINTABLE_CHARS.set('.');
079        PRINTABLE_CHARS.set('/');
080        for (int i = '0'; i <= '9'; i++) {
081            PRINTABLE_CHARS.set(i);
082        }
083        PRINTABLE_CHARS.set(':');
084        PRINTABLE_CHARS.set(';');
085        PRINTABLE_CHARS.set('<');
086        PRINTABLE_CHARS.set('>');
087        PRINTABLE_CHARS.set('@');
088        for (int i = 'A'; i <= 'Z'; i++) {
089            PRINTABLE_CHARS.set(i);
090        }
091        PRINTABLE_CHARS.set('[');
092        PRINTABLE_CHARS.set('\\');
093        PRINTABLE_CHARS.set(']');
094        PRINTABLE_CHARS.set('^');
095        PRINTABLE_CHARS.set('`');
096        for (int i = 'a'; i <= 'z'; i++) {
097            PRINTABLE_CHARS.set(i);
098        }
099        PRINTABLE_CHARS.set('{');
100        PRINTABLE_CHARS.set('|');
101        PRINTABLE_CHARS.set('}');
102        PRINTABLE_CHARS.set('~');
103    }
104
105    private static final byte SPACE = 32;
106
107    private static final byte UNDERSCORE = 95;
108
109    private boolean encodeBlanks = false;
110
111    /**
112     * Default constructor.
113     */
114    public QCodec() {
115        this(Charsets.UTF_8);
116    }
117
118    /**
119     * Constructor which allows for the selection of a default Charset.
120     *
121     * @param charset
122     *            the default string Charset to use.
123     *
124     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
125     * @since 1.7
126     */
127    public QCodec(final Charset charset) {
128        super();
129        this.charset = charset;
130    }
131
132    /**
133     * Constructor which allows for the selection of a default Charset.
134     *
135     * @param charsetName
136     *            the Charset to use.
137     * @throws java.nio.charset.UnsupportedCharsetException
138     *             If the named Charset is unavailable
139     * @since 1.7 throws UnsupportedCharsetException if the named Charset is unavailable
140     * @see <a href="http://download.oracle.com/javase/7/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
141     */
142    public QCodec(final String charsetName) {
143        this(Charset.forName(charsetName));
144    }
145
146    @Override
147    protected String getEncoding() {
148        return "Q";
149    }
150
151    @Override
152    protected byte[] doEncoding(final byte[] bytes) {
153        if (bytes == null) {
154            return null;
155        }
156        final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
157        if (this.encodeBlanks) {
158            for (int i = 0; i < data.length; i++) {
159                if (data[i] == SPACE) {
160                    data[i] = UNDERSCORE;
161                }
162            }
163        }
164        return data;
165    }
166
167    @Override
168    protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
169        if (bytes == null) {
170            return null;
171        }
172        boolean hasUnderscores = false;
173        for (final byte b : bytes) {
174            if (b == UNDERSCORE) {
175                hasUnderscores = true;
176                break;
177            }
178        }
179        if (hasUnderscores) {
180            final byte[] tmp = new byte[bytes.length];
181            for (int i = 0; i < bytes.length; i++) {
182                final byte b = bytes[i];
183                if (b != UNDERSCORE) {
184                    tmp[i] = b;
185                } else {
186                    tmp[i] = SPACE;
187                }
188            }
189            return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
190        }
191        return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
192    }
193
194    /**
195     * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped.
196     *
197     * @param sourceStr
198     *            string to convert to quoted-printable form
199     * @param sourceCharset
200     *            the Charset for sourceStr
201     * @return quoted-printable string
202     * @throws EncoderException
203     *             thrown if a failure condition is encountered during the encoding process.
204     * @since 1.7
205     */
206    public String encode(final String sourceStr, final Charset sourceCharset) throws EncoderException {
207        if (sourceStr == null) {
208            return null;
209        }
210        return encodeText(sourceStr, sourceCharset);
211    }
212
213    /**
214     * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped.
215     *
216     * @param sourceStr
217     *            string to convert to quoted-printable form
218     * @param sourceCharset
219     *            the Charset for sourceStr
220     * @return quoted-printable string
221     * @throws EncoderException
222     *             thrown if a failure condition is encountered during the encoding process.
223     */
224    public String encode(final String sourceStr, final String sourceCharset) throws EncoderException {
225        if (sourceStr == null) {
226            return null;
227        }
228        try {
229            return encodeText(sourceStr, sourceCharset);
230        } catch (final UnsupportedEncodingException e) {
231            throw new EncoderException(e.getMessage(), e);
232        }
233    }
234
235    /**
236     * Encodes a string into its quoted-printable form using the default Charset. Unsafe characters are escaped.
237     *
238     * @param sourceStr
239     *            string to convert to quoted-printable form
240     * @return quoted-printable string
241     * @throws EncoderException
242     *             thrown if a failure condition is encountered during the encoding process.
243     */
244    @Override
245    public String encode(final String sourceStr) throws EncoderException {
246        if (sourceStr == null) {
247            return null;
248        }
249        return encode(sourceStr, getCharset());
250    }
251
252    /**
253     * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
254     * representation.
255     *
256     * @param str
257     *            quoted-printable string to convert into its original form
258     * @return original string
259     * @throws DecoderException
260     *             A decoder exception is thrown if a failure condition is encountered during the decode process.
261     */
262    @Override
263    public String decode(final String str) throws DecoderException {
264        if (str == null) {
265            return null;
266        }
267        try {
268            return decodeText(str);
269        } catch (final UnsupportedEncodingException e) {
270            throw new DecoderException(e.getMessage(), e);
271        }
272    }
273
274    /**
275     * Encodes an object into its quoted-printable form using the default Charset. Unsafe characters are escaped.
276     *
277     * @param obj
278     *            object to convert to quoted-printable form
279     * @return quoted-printable object
280     * @throws EncoderException
281     *             thrown if a failure condition is encountered during the encoding process.
282     */
283    @Override
284    public Object encode(final Object obj) throws EncoderException {
285        if (obj == null) {
286            return null;
287        } else if (obj instanceof String) {
288            return encode((String) obj);
289        } else {
290            throw new EncoderException("Objects of type " +
291                  obj.getClass().getName() +
292                  " cannot be encoded using Q codec");
293        }
294    }
295
296    /**
297     * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
298     * representation.
299     *
300     * @param obj
301     *            quoted-printable object to convert into its original form
302     * @return original object
303     * @throws DecoderException
304     *             Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
305     *             during the decode process.
306     */
307    @Override
308    public Object decode(final Object obj) throws DecoderException {
309        if (obj == null) {
310            return null;
311        } else if (obj instanceof String) {
312            return decode((String) obj);
313        } else {
314            throw new DecoderException("Objects of type " +
315                  obj.getClass().getName() +
316                  " cannot be decoded using Q codec");
317        }
318    }
319
320    /**
321     * Gets the default Charset name used for string decoding and encoding.
322     *
323     * @return the default Charset name
324     * @since 1.7
325     */
326    public Charset getCharset() {
327        return this.charset;
328    }
329
330    /**
331     * Gets the default Charset name used for string decoding and encoding.
332     *
333     * @return the default Charset name
334     */
335    public String getDefaultCharset() {
336        return this.charset.name();
337    }
338
339    /**
340     * Tests if optional transformation of SPACE characters is to be used
341     *
342     * @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
343     */
344    public boolean isEncodeBlanks() {
345        return this.encodeBlanks;
346    }
347
348    /**
349     * Defines whether optional transformation of SPACE characters is to be used
350     *
351     * @param b
352     *            <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
353     */
354    public void setEncodeBlanks(final boolean b) {
355        this.encodeBlanks = b;
356    }
357}