QCodec.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */

  17. package org.apache.commons.codec.net;

  18. import java.io.UnsupportedEncodingException;
  19. import java.nio.charset.Charset;
  20. import java.util.BitSet;

  21. import org.apache.commons.codec.Charsets;
  22. import org.apache.commons.codec.DecoderException;
  23. import org.apache.commons.codec.EncoderException;
  24. import org.apache.commons.codec.StringDecoder;
  25. import org.apache.commons.codec.StringEncoder;

  26. /**
  27.  * Similar to the Quoted-Printable content-transfer-encoding defined in
  28.  * <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a> and designed to allow text containing mostly ASCII
  29.  * characters to be decipherable on an ASCII terminal without decoding.
  30.  * <p>
  31.  * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the encoding of non-ASCII
  32.  * text in various portions of a RFC 822 [2] message header, in a manner which is unlikely to confuse existing message
  33.  * handling software.
  34.  * <p>
  35.  * This class is conditionally thread-safe.
  36.  * The instance field {@link #encodeBlanks} is mutable {@link #setEncodeBlanks(boolean)}
  37.  * but is not volatile, and accesses are not synchronised.
  38.  * If an instance of the class is shared between threads, the caller needs to ensure that suitable synchronisation
  39.  * is used to ensure safe publication of the value between threads, and must not invoke
  40.  * {@link #setEncodeBlanks(boolean)} after initial setup.
  41.  *
  42.  * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: Message
  43.  *          Header Extensions for Non-ASCII Text</a>
  44.  *
  45.  * @since 1.3
  46.  * @version $Id: QCodec.java 1619948 2014-08-22 22:53:55Z ggregory $
  47.  */
  48. public class QCodec extends RFC1522Codec implements StringEncoder, StringDecoder {
  49.     /**
  50.      * The default charset used for string decoding and encoding.
  51.      */
  52.     private final Charset charset;

  53.     /**
  54.      * BitSet of printable characters as defined in RFC 1522.
  55.      */
  56.     private static final BitSet PRINTABLE_CHARS = new BitSet(256);
  57.     // Static initializer for printable chars collection
  58.     static {
  59.         // alpha characters
  60.         PRINTABLE_CHARS.set(' ');
  61.         PRINTABLE_CHARS.set('!');
  62.         PRINTABLE_CHARS.set('"');
  63.         PRINTABLE_CHARS.set('#');
  64.         PRINTABLE_CHARS.set('$');
  65.         PRINTABLE_CHARS.set('%');
  66.         PRINTABLE_CHARS.set('&');
  67.         PRINTABLE_CHARS.set('\'');
  68.         PRINTABLE_CHARS.set('(');
  69.         PRINTABLE_CHARS.set(')');
  70.         PRINTABLE_CHARS.set('*');
  71.         PRINTABLE_CHARS.set('+');
  72.         PRINTABLE_CHARS.set(',');
  73.         PRINTABLE_CHARS.set('-');
  74.         PRINTABLE_CHARS.set('.');
  75.         PRINTABLE_CHARS.set('/');
  76.         for (int i = '0'; i <= '9'; i++) {
  77.             PRINTABLE_CHARS.set(i);
  78.         }
  79.         PRINTABLE_CHARS.set(':');
  80.         PRINTABLE_CHARS.set(';');
  81.         PRINTABLE_CHARS.set('<');
  82.         PRINTABLE_CHARS.set('>');
  83.         PRINTABLE_CHARS.set('@');
  84.         for (int i = 'A'; i <= 'Z'; i++) {
  85.             PRINTABLE_CHARS.set(i);
  86.         }
  87.         PRINTABLE_CHARS.set('[');
  88.         PRINTABLE_CHARS.set('\\');
  89.         PRINTABLE_CHARS.set(']');
  90.         PRINTABLE_CHARS.set('^');
  91.         PRINTABLE_CHARS.set('`');
  92.         for (int i = 'a'; i <= 'z'; i++) {
  93.             PRINTABLE_CHARS.set(i);
  94.         }
  95.         PRINTABLE_CHARS.set('{');
  96.         PRINTABLE_CHARS.set('|');
  97.         PRINTABLE_CHARS.set('}');
  98.         PRINTABLE_CHARS.set('~');
  99.     }

  100.     private static final byte BLANK = 32;

  101.     private static final byte UNDERSCORE = 95;

  102.     private boolean encodeBlanks = false;

  103.     /**
  104.      * Default constructor.
  105.      */
  106.     public QCodec() {
  107.         this(Charsets.UTF_8);
  108.     }

  109.     /**
  110.      * Constructor which allows for the selection of a default charset.
  111.      *
  112.      * @param charset
  113.      *            the default string charset to use.
  114.      *
  115.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  116.      * @since 1.7
  117.      */
  118.     public QCodec(final Charset charset) {
  119.         super();
  120.         this.charset = charset;
  121.     }

  122.     /**
  123.      * Constructor which allows for the selection of a default charset.
  124.      *
  125.      * @param charsetName
  126.      *            the charset to use.
  127.      * @throws java.nio.charset.UnsupportedCharsetException
  128.      *             If the named charset is unavailable
  129.      * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
  130.      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
  131.      */
  132.     public QCodec(final String charsetName) {
  133.         this(Charset.forName(charsetName));
  134.     }

  135.     @Override
  136.     protected String getEncoding() {
  137.         return "Q";
  138.     }

  139.     @Override
  140.     protected byte[] doEncoding(final byte[] bytes) {
  141.         if (bytes == null) {
  142.             return null;
  143.         }
  144.         final byte[] data = QuotedPrintableCodec.encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
  145.         if (this.encodeBlanks) {
  146.             for (int i = 0; i < data.length; i++) {
  147.                 if (data[i] == BLANK) {
  148.                     data[i] = UNDERSCORE;
  149.                 }
  150.             }
  151.         }
  152.         return data;
  153.     }

  154.     @Override
  155.     protected byte[] doDecoding(final byte[] bytes) throws DecoderException {
  156.         if (bytes == null) {
  157.             return null;
  158.         }
  159.         boolean hasUnderscores = false;
  160.         for (final byte b : bytes) {
  161.             if (b == UNDERSCORE) {
  162.                 hasUnderscores = true;
  163.                 break;
  164.             }
  165.         }
  166.         if (hasUnderscores) {
  167.             final byte[] tmp = new byte[bytes.length];
  168.             for (int i = 0; i < bytes.length; i++) {
  169.                 final byte b = bytes[i];
  170.                 if (b != UNDERSCORE) {
  171.                     tmp[i] = b;
  172.                 } else {
  173.                     tmp[i] = BLANK;
  174.                 }
  175.             }
  176.             return QuotedPrintableCodec.decodeQuotedPrintable(tmp);
  177.         }
  178.         return QuotedPrintableCodec.decodeQuotedPrintable(bytes);
  179.     }

  180.     /**
  181.      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
  182.      *
  183.      * @param str
  184.      *            string to convert to quoted-printable form
  185.      * @param charset
  186.      *            the charset for str
  187.      * @return quoted-printable string
  188.      * @throws EncoderException
  189.      *             thrown if a failure condition is encountered during the encoding process.
  190.      * @since 1.7
  191.      */
  192.     public String encode(final String str, final Charset charset) throws EncoderException {
  193.         if (str == null) {
  194.             return null;
  195.         }
  196.         return encodeText(str, charset);
  197.     }

  198.     /**
  199.      * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
  200.      *
  201.      * @param str
  202.      *            string to convert to quoted-printable form
  203.      * @param charset
  204.      *            the charset for str
  205.      * @return quoted-printable string
  206.      * @throws EncoderException
  207.      *             thrown if a failure condition is encountered during the encoding process.
  208.      */
  209.     public String encode(final String str, final String charset) throws EncoderException {
  210.         if (str == null) {
  211.             return null;
  212.         }
  213.         try {
  214.             return encodeText(str, charset);
  215.         } catch (final UnsupportedEncodingException e) {
  216.             throw new EncoderException(e.getMessage(), e);
  217.         }
  218.     }

  219.     /**
  220.      * Encodes a string into its quoted-printable form using the default charset. Unsafe characters are escaped.
  221.      *
  222.      * @param str
  223.      *            string to convert to quoted-printable form
  224.      * @return quoted-printable string
  225.      * @throws EncoderException
  226.      *             thrown if a failure condition is encountered during the encoding process.
  227.      */
  228.     @Override
  229.     public String encode(final String str) throws EncoderException {
  230.         if (str == null) {
  231.             return null;
  232.         }
  233.         return encode(str, getCharset());
  234.     }

  235.     /**
  236.      * Decodes a quoted-printable string into its original form. Escaped characters are converted back to their original
  237.      * representation.
  238.      *
  239.      * @param str
  240.      *            quoted-printable string to convert into its original form
  241.      * @return original string
  242.      * @throws DecoderException
  243.      *             A decoder exception is thrown if a failure condition is encountered during the decode process.
  244.      */
  245.     @Override
  246.     public String decode(final String str) throws DecoderException {
  247.         if (str == null) {
  248.             return null;
  249.         }
  250.         try {
  251.             return decodeText(str);
  252.         } catch (final UnsupportedEncodingException e) {
  253.             throw new DecoderException(e.getMessage(), e);
  254.         }
  255.     }

  256.     /**
  257.      * Encodes an object into its quoted-printable form using the default charset. Unsafe characters are escaped.
  258.      *
  259.      * @param obj
  260.      *            object to convert to quoted-printable form
  261.      * @return quoted-printable object
  262.      * @throws EncoderException
  263.      *             thrown if a failure condition is encountered during the encoding process.
  264.      */
  265.     @Override
  266.     public Object encode(final Object obj) throws EncoderException {
  267.         if (obj == null) {
  268.             return null;
  269.         } else if (obj instanceof String) {
  270.             return encode((String) obj);
  271.         } else {
  272.             throw new EncoderException("Objects of type " +
  273.                   obj.getClass().getName() +
  274.                   " cannot be encoded using Q codec");
  275.         }
  276.     }

  277.     /**
  278.      * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
  279.      * representation.
  280.      *
  281.      * @param obj
  282.      *            quoted-printable object to convert into its original form
  283.      * @return original object
  284.      * @throws DecoderException
  285.      *             Thrown if the argument is not a <code>String</code>. Thrown if a failure condition is encountered
  286.      *             during the decode process.
  287.      */
  288.     @Override
  289.     public Object decode(final Object obj) throws DecoderException {
  290.         if (obj == null) {
  291.             return null;
  292.         } else if (obj instanceof String) {
  293.             return decode((String) obj);
  294.         } else {
  295.             throw new DecoderException("Objects of type " +
  296.                   obj.getClass().getName() +
  297.                   " cannot be decoded using Q codec");
  298.         }
  299.     }

  300.     /**
  301.      * Gets the default charset name used for string decoding and encoding.
  302.      *
  303.      * @return the default charset name
  304.      * @since 1.7
  305.      */
  306.     public Charset getCharset() {
  307.         return this.charset;
  308.     }

  309.     /**
  310.      * Gets the default charset name used for string decoding and encoding.
  311.      *
  312.      * @return the default charset name
  313.      */
  314.     public String getDefaultCharset() {
  315.         return this.charset.name();
  316.     }

  317.     /**
  318.      * Tests if optional transformation of SPACE characters is to be used
  319.      *
  320.      * @return <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
  321.      */
  322.     public boolean isEncodeBlanks() {
  323.         return this.encodeBlanks;
  324.     }

  325.     /**
  326.      * Defines whether optional transformation of SPACE characters is to be used
  327.      *
  328.      * @param b
  329.      *            <code>true</code> if SPACE characters are to be transformed, <code>false</code> otherwise
  330.      */
  331.     public void setEncodeBlanks(final boolean b) {
  332.         this.encodeBlanks = b;
  333.     }
  334. }