001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.net;
019
020import java.io.UnsupportedEncodingException;
021import java.nio.charset.Charset;
022import java.util.BitSet;
023
024import org.apache.commons.codec.Charsets;
025import org.apache.commons.codec.DecoderException;
026import org.apache.commons.codec.EncoderException;
027import org.apache.commons.codec.StringDecoder;
028import org.apache.commons.codec.StringEncoder;
029
030/**
031 * Similar to the Quoted-Printable content-transfer-encoding defined in
032 * <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
033 * characters to be decipherable on an ASCII terminal without decoding.
034 * <p>
035 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
036 * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
037 * handling software.
038 * <p>
039 * This class is conditionally thread-safe.
040 * The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)}
041 * but is not volatile, and accesses are not synchronised.
042 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
043 * is used to ensure safe publication of the value between threads, and must not invoke
044 * {@link #setEncodeBlanks(boolean)} after initial setup.
045 *
046 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
047 *          Header Extensions for Non-ASCII Text</a>
048 *
049 * @since 1.3
050 * @version $Id: QCodec.html 928559 2014-11-10 02:53:54Z ggregory $
051 */
052public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
053    /**
054     * The default charset used for string decoding and encoding.
055     */
056    private final Charset charset;
057
058    /**
059     * BitSet of printable characters as defined in RFC 1522.
060     */
061    private static final BitSet PRINTABLE_CHARS = new BitSet(256);
062    // Static initializer for printable chars collection
063    static {
064        // alpha characters
065        PRINTABLE_CHARS.set(' ');
066        PRINTABLE_CHARS.set('!');
067        PRINTABLE_CHARS.set('"');
068        PRINTABLE_CHARS.set('#');
069        PRINTABLE_CHARS.set('$');
070        PRINTABLE_CHARS.set('%');
071        PRINTABLE_CHARS.set('&');
072        PRINTABLE_CHARS.set('\'');
073        PRINTABLE_CHARS.set('(');
074        PRINTABLE_CHARS.set(')');
075        PRINTABLE_CHARS.set('*');
076        PRINTABLE_CHARS.set('+');
077        PRINTABLE_CHARS.set(',');
078        PRINTABLE_CHARS.set('-');
079        PRINTABLE_CHARS.set('.');
080        PRINTABLE_CHARS.set('/');
081        for (int i = '0'; i <= '9'; i++) {
082            PRINTABLE_CHARS.set(i);
083        }
084        PRINTABLE_CHARS.set(':');
085        PRINTABLE_CHARS.set(';');
086        PRINTABLE_CHARS.set('<');
087        PRINTABLE_CHARS.set('>');
088        PRINTABLE_CHARS.set('@');
089        for (int i = 'A'; i <= 'Z'; i++) {
090            PRINTABLE_CHARS.set(i);
091        }
092        PRINTABLE_CHARS.set('[');
093        PRINTABLE_CHARS.set('\\');
094        PRINTABLE_CHARS.set(']');
095        PRINTABLE_CHARS.set('^');
096        PRINTABLE_CHARS.set('`');
097        for (int i = 'a'; i <= 'z'; i++) {
098            PRINTABLE_CHARS.set(i);
099        }
100        PRINTABLE_CHARS.set('{');
101        PRINTABLE_CHARS.set('|');
102        PRINTABLE_CHARS.set('}');
103        PRINTABLE_CHARS.set('~');
104    }
105
106    private static final byte BLANK = 32;
107
108    private static final byte UNDERSCORE = 95;
109
110    private boolean encodeBlanks = false;
111
112    /**
113     * Default constructor.
114     */
115    public QCodec() {
116        this(Charsets.UTF_8);
117    }
118
119    /**
120     * Constructor which allows for the selection of a default charset.
121     *
122     * @param charset
123     *            the default string charset to use.
124     *
125     * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
126     * @since 1.7
127     */
128    public QCodec(final Charset charset) {
129        super();
130        this.charset = charset;
131    }
132
133    /**
134     * Constructor which allows for the selection of a default charset.
135     *
136     * @param charsetName
137     *            the charset to use.
138     * @throws java.nio.charset.UnsupportedCharsetException
139     *             If the named charset is unavailable
140     * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
141     * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
142     */
143    public QCodec(final String charsetName) {
144        this(Charset.forName(charsetName));
145    }
146
147    @Override
148    protected String getEncoding() {
149        return "Q";
150    }
151
152    @Override
153    protected byte[] doEncoding(final byte[] bytes) {
154        if (bytes == null) {
155            return null;
156        }
157        final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
158        if (this.encodeBlanks) {
159            for (int i = 0; i < data.length; i++) {
160                if (data[i] == BLANK) {
161                    data[i] = UNDERSCORE;
162                }
163            }
164        }
165        return data;
166    }
167
168    @Override
169    protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
170        if (bytes == null) {
171            return null;
172        }
173        boolean hasUnderscores = false;
174        for (final byte b : bytes) {
175            if (b == UNDERSCORE) {
176                hasUnderscores = true;
177                break;
178            }
179        }
180        if (hasUnderscores) {
181            final byte[] tmp = new byte[bytes.length];
182            for (int i = 0; i < bytes.length; i++) {
183                final byte b = bytes[i];
184                if (b != UNDERSCORE) {
185                    tmp[i] = b;
186                } else {
187                    tmp[i] = BLANK;
188                }
189            }
190            return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
191        }
192        return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
193    }
194
195    /**
196     * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
197     *
198     * @param str
199     *            string to convert to quoted-printable form
200     * @param charset
201     *            the charset for str
202     * @return quoted-printable string
203     * @throws EncoderException
204     *             thrown if a failure condition is encountered during the encoding process.
205     * @since 1.7
206     */
207    public String encode(final String str, final Charset charset) throws EncoderException {
208        if (str == null) {
209            return null;
210        }
211        return encodeText(str, charset);
212    }
213
214    /**
215     * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
216     *
217     * @param str
218     *            string to convert to quoted-printable form
219     * @param charset
220     *            the charset for str
221     * @return quoted-printable string
222     * @throws EncoderException
223     *             thrown if a failure condition is encountered during the encoding process.
224     */
225    public String encode(final String str, final String charset) throws EncoderException {
226        if (str == null) {
227            return null;
228        }
229        try {
230            return encodeText(str, charset);
231        } catch (final UnsupportedEncodingException e) {
232            throw new EncoderException(e.getMessage(), e);
233        }
234    }
235
236    /**
237     * Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped.
238     *
239     * @param str
240     *            string to convert to quoted-printable form
241     * @return quoted-printable string
242     * @throws EncoderException
243     *             thrown if a failure condition is encountered during the encoding process.
244     */
245    @Override
246    public String encode(final String str) throws EncoderException {
247        if (str == null) {
248            return null;
249        }
250        return encode(str, getCharset());
251    }
252
253    /**
254     * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
255     * representation.
256     *
257     * @param str
258     *            quoted-printable string to convert into its original form
259     * @return original string
260     * @throws DecoderException
261     *             A decoder exception is thrown if a failure condition is encountered during the decode process.
262     */
263    @Override
264    public String decode(final String str) throws DecoderException {
265        if (str == null) {
266            return null;
267        }
268        try {
269            return decodeText(str);
270        } catch (final UnsupportedEncodingException e) {
271            throw new DecoderException(e.getMessage(), e);
272        }
273    }
274
275    /**
276     * Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped.
277     *
278     * @param obj
279     *            object to convert to quoted-printable form
280     * @return quoted-printable object
281     * @throws EncoderException
282     *             thrown if a failure condition is encountered during the encoding process.
283     */
284    @Override
285    public Object encode(final Object obj) throws EncoderException {
286        if (obj == null) {
287            return null;
288        } else if (obj instanceof String) {
289            return encode((String) obj);
290        } else {
291            throw new EncoderException("Objects of type " +
292                  obj.getClass().getName() +
293                  " cannot be encoded using Q codec");
294        }
295    }
296
297    /**
298     * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
299     * representation.
300     *
301     * @param obj
302     *            quoted-printable object to convert into its original form
303     * @return original object
304     * @throws DecoderException
305     *             Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
306     *             during the decode process.
307     */
308    @Override
309    public Object decode(final Object obj) throws DecoderException {
310        if (obj == null) {
311            return null;
312        } else if (obj instanceof String) {
313            return decode((String) obj);
314        } else {
315            throw new DecoderException("Objects of type " +
316                  obj.getClass().getName() +
317                  " cannot be decoded using Q codec");
318        }
319    }
320
321    /**
322     * Gets the default charset name used for string decoding and encoding.
323     *
324     * @return the default charset name
325     * @since 1.7
326     */
327    public Charset getCharset() {
328        return this.charset;
329    }
330
331    /**
332     * Gets the default charset name used for string decoding and encoding.
333     *
334     * @return the default charset name
335     */
336    public String getDefaultCharset() {
337        return this.charset.name();
338    }
339
340    /**
341     * Tests if optional transformation of SPACE characters is to be used
342     *
343     * @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
344     */
345    public boolean isEncodeBlanks() {
346        return this.encodeBlanks;
347    }
348
349    /**
350     * Defines whether optional transformation of SPACE characters is to be used
351     *
352     * @param b
353     *            <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
354     */
355    public void setEncodeBlanks(final boolean b) {
356        this.encodeBlanks = b;
357    }
358}