001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.codec.net;
019
020import java.io.UnsupportedEncodingException;
021import java.nio.charset.Charset;
022import java.nio.charset.StandardCharsets;
023import java.util.BitSet;
024
025import org.apache.commons.codec.DecoderException;
026import org.apache.commons.codec.EncoderException;
027import org.apache.commons.codec.StringDecoder;
028import org.apache.commons.codec.StringEncoder;
029
030/**
031 * Similar to the Quoted-Printable content-transfer-encoding defined in
032 * <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
033 * characters to be decipherable on an ASCII terminal without decoding.
034 * <p>
035 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
036 * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
037 * handling software.
038 * </p>
039 * <p>
040 * This class is conditionally thread-safe.
041 * The instance field for encoding blanks is mutable {@link #setEncodeBlanks(boolean)}
042 * but is not volatile, and accesses are not synchronized.
043 * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronization
044 * is used to ensure safe publication of the value between threads, and must not invoke
045 * {@link #setEncodeBlanks(boolean)} after initial setup.
046 * </p>
047 *
048 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
049 *          Header Extensions for Non-ASCII Text</a>
050 *
051 * @since 1.3
052 */
053public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
054    /**
055     * BitSet of printable characters as defined in RFC 1522.
056     */
057    private static final BitSet PRINTABLE_CHARS = new BitSet(256);
058
059    // Static initializer for printable chars collection
060    static {
061        // alpha characters
062        PRINTABLE_CHARS.set(' ');
063        PRINTABLE_CHARS.set('!');
064        PRINTABLE_CHARS.set('"');
065        PRINTABLE_CHARS.set('#');
066        PRINTABLE_CHARS.set('$');
067        PRINTABLE_CHARS.set('%');
068        PRINTABLE_CHARS.set('&');
069        PRINTABLE_CHARS.set('\'');
070        PRINTABLE_CHARS.set('(');
071        PRINTABLE_CHARS.set(')');
072        PRINTABLE_CHARS.set('*');
073        PRINTABLE_CHARS.set('+');
074        PRINTABLE_CHARS.set(',');
075        PRINTABLE_CHARS.set('-');
076        PRINTABLE_CHARS.set('.');
077        PRINTABLE_CHARS.set('/');
078        for (int i = '0'; i <= '9'; i++) {
079            PRINTABLE_CHARS.set(i);
080        }
081        PRINTABLE_CHARS.set(':');
082        PRINTABLE_CHARS.set(';');
083        PRINTABLE_CHARS.set('<');
084        PRINTABLE_CHARS.set('>');
085        PRINTABLE_CHARS.set('@');
086        for (int i = 'A'; i <= 'Z'; i++) {
087            PRINTABLE_CHARS.set(i);
088        }
089        PRINTABLE_CHARS.set('[');
090        PRINTABLE_CHARS.set('\\');
091        PRINTABLE_CHARS.set(']');
092        PRINTABLE_CHARS.set('^');
093        PRINTABLE_CHARS.set('`');
094        for (int i = 'a'; i <= 'z'; i++) {
095            PRINTABLE_CHARS.set(i);
096        }
097        PRINTABLE_CHARS.set('{');
098        PRINTABLE_CHARS.set('|');
099        PRINTABLE_CHARS.set('}');
100        PRINTABLE_CHARS.set('~');
101    }
102    private static final byte SPACE = 32;
103
104    private static final byte UNDERSCORE = 95;
105
106    /**
107     * The default Charset used for string decoding and encoding.
108     */
109    private final Charset charset;
110
111    private boolean encodeBlanks;
112
113    /**
114     * Default constructor.
115     */
116    public QCodec() {
117        this(StandardCharsets.UTF_8);
118    }
119
120    /**
121     * Constructor which allows for the selection of a default Charset.
122     *
123     * @param charset
124     *            the default string Charset to use.
125     *
126     * @see Charset
127     * @since 1.7
128     */
129    public QCodec(final Charset charset) {
130        this.charset = charset;
131    }
132
133    /**
134     * Constructor which allows for the selection of a default Charset.
135     *
136     * @param charsetName
137     *            the Charset to use.
138     * @throws java.nio.charset.UnsupportedCharsetException
139     *             If the named Charset is unavailable
140     * @since 1.7 throws UnsupportedCharsetException if the named Charset is unavailable
141     * @see Charset
142     */
143    public QCodec(final String charsetName) {
144        this(Charset.forName(charsetName));
145    }
146
147    /**
148     * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
149     * representation.
150     *
151     * @param obj
152     *            quoted-printable object to convert into its original form
153     * @return original object
154     * @throws DecoderException
155     *             Thrown if the argument is not a {@code String}. Thrown if a failure condition is encountered
156     *             during the decode process.
157     */
158    @Override
159    public Object decode(final Object obj) throws DecoderException {
160        if (obj == null) {
161            return null;
162        }
163        if (obj instanceof String) {
164            return decode((String) obj);
165        }
166        throw new DecoderException("Objects of type " +
167              obj.getClass().getName() +
168              " cannot be decoded using Q codec");
169    }
170
171    /**
172     * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
173     * representation.
174     *
175     * @param str
176     *            quoted-printable string to convert into its original form
177     * @return original string
178     * @throws DecoderException
179     *             A decoder exception is thrown if a failure condition is encountered during the decode process.
180     */
181    @Override
182    public String decode(final String str) throws DecoderException {
183        if (str == null) {
184            return null;
185        }
186        try {
187            return decodeText(str);
188        } catch (final UnsupportedEncodingException e) {
189            throw new DecoderException(e.getMessage(), e);
190        }
191    }
192
193    @Override
194    protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
195        if (bytes == null) {
196            return null;
197        }
198        boolean hasUnderscores = false;
199        for (final byte b : bytes) {
200            if (b == UNDERSCORE) {
201                hasUnderscores = true;
202                break;
203            }
204        }
205        if (hasUnderscores) {
206            final byte[] tmp = new byte[bytes.length];
207            for (int i = 0; i < bytes.length; i++) {
208                final byte b = bytes[i];
209                if (b != UNDERSCORE) {
210                    tmp[i] = b;
211                } else {
212                    tmp[i] = SPACE;
213                }
214            }
215            return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
216        }
217        return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
218    }
219
220    @Override
221    protected byte[] doEncoding(final byte[] bytes) {
222        if (bytes == null) {
223            return null;
224        }
225        final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
226        if (this.encodeBlanks) {
227            for (int i = 0; i < data.length; i++) {
228                if (data[i] == SPACE) {
229                    data[i] = UNDERSCORE;
230                }
231            }
232        }
233        return data;
234    }
235
236    /**
237     * Encodes an object into its quoted-printable form using the default Charset. Unsafe characters are escaped.
238     *
239     * @param obj
240     *            object to convert to quoted-printable form
241     * @return quoted-printable object
242     * @throws EncoderException
243     *             thrown if a failure condition is encountered during the encoding process.
244     */
245    @Override
246    public Object encode(final Object obj) throws EncoderException {
247        if (obj == null) {
248            return null;
249        }
250        if (obj instanceof String) {
251            return encode((String) obj);
252        }
253        throw new EncoderException("Objects of type " +
254              obj.getClass().getName() +
255              " cannot be encoded using Q codec");
256    }
257
258    /**
259     * Encodes a string into its quoted-printable form using the default Charset. Unsafe characters are escaped.
260     *
261     * @param sourceStr
262     *            string to convert to quoted-printable form
263     * @return quoted-printable string
264     * @throws EncoderException
265     *             thrown if a failure condition is encountered during the encoding process.
266     */
267    @Override
268    public String encode(final String sourceStr) throws EncoderException {
269        if (sourceStr == null) {
270            return null;
271        }
272        return encode(sourceStr, getCharset());
273    }
274
275    /**
276     * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped.
277     *
278     * @param sourceStr
279     *            string to convert to quoted-printable form
280     * @param sourceCharset
281     *            the Charset for sourceStr
282     * @return quoted-printable string
283     * @throws EncoderException
284     *             thrown if a failure condition is encountered during the encoding process.
285     * @since 1.7
286     */
287    public String encode(final String sourceStr, final Charset sourceCharset) throws EncoderException {
288        if (sourceStr == null) {
289            return null;
290        }
291        return encodeText(sourceStr, sourceCharset);
292    }
293
294    /**
295     * Encodes a string into its quoted-printable form using the specified Charset. Unsafe characters are escaped.
296     *
297     * @param sourceStr
298     *            string to convert to quoted-printable form
299     * @param sourceCharset
300     *            the Charset for sourceStr
301     * @return quoted-printable string
302     * @throws EncoderException
303     *             thrown if a failure condition is encountered during the encoding process.
304     */
305    public String encode(final String sourceStr, final String sourceCharset) throws EncoderException {
306        if (sourceStr == null) {
307            return null;
308        }
309        try {
310            return encodeText(sourceStr, sourceCharset);
311        } catch (final UnsupportedEncodingException e) {
312            throw new EncoderException(e.getMessage(), e);
313        }
314    }
315
316    /**
317     * Gets the default Charset name used for string decoding and encoding.
318     *
319     * @return the default Charset name
320     * @since 1.7
321     */
322    public Charset getCharset() {
323        return this.charset;
324    }
325
326    /**
327     * Gets the default Charset name used for string decoding and encoding.
328     *
329     * @return the default Charset name
330     */
331    public String getDefaultCharset() {
332        return this.charset.name();
333    }
334
335    @Override
336    protected String getEncoding() {
337        return "Q";
338    }
339
340    /**
341     * Tests if optional transformation of SPACE characters is to be used
342     *
343     * @return {@code true} if SPACE characters are to be transformed, {@code false} otherwise
344     */
345    public boolean isEncodeBlanks() {
346        return this.encodeBlanks;
347    }
348
349    /**
350     * Defines whether optional transformation of SPACE characters is to be used
351     *
352     * @param b
353     *            {@code true} if SPACE characters are to be transformed, {@code false} otherwise
354     */
355    public void setEncodeBlanks(final boolean b) {
356        this.encodeBlanks = b;
357    }
358}