View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  
23  import org.apache.commons.codec.DecoderException;
24  import org.apache.commons.codec.EncoderException;
25  import org.apache.commons.codec.binary.StringUtils;
26  
27  /**
28   * Implements methods common to all codecs defined in RFC 1522.
29   * <p>
30   * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
31   * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
32   * is unlikely to confuse existing message handling software.
33   * </p>
34   * <p>
35   * This class is immutable and thread-safe.
36   * </p>
37   *
38   * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
39   *          Message Header Extensions for Non-ASCII Text</a>
40   *
41   * @since 1.3
42   */
43  abstract class RFC1522Codec {
44  
45      /** Separator. */
46      protected static final char SEP = '?';
47  
48      /** Prefix. */
49      protected static final String POSTFIX = "?=";
50  
51      /** Postfix. */
52      protected static final String PREFIX = "=?";
53  
54      /**
55       * Applies an RFC 1522 compliant decoding scheme to the given string of text.
56       * <p>
57       * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
58       * {@link #doDecoding(byte[])}  method of a concrete class to perform the specific decoding.
59       * </p>
60       *
61       * @param text
62       *            a string to decode
63       * @return A new decoded String or {@code null} if the input is {@code null}.
64       * @throws DecoderException
65       *             thrown if there is an error condition during the decoding process.
66       * @throws UnsupportedEncodingException
67       *             thrown if charset specified in the "encoded-word" header is not supported
68       */
69      protected String decodeText(final String text)
70              throws DecoderException, UnsupportedEncodingException {
71          if (text == null) {
72              return null;
73          }
74          if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
75              throw new DecoderException("RFC 1522 violation: malformed encoded content");
76          }
77          final int terminator = text.length() - 2;
78          int from = 2;
79          int to = text.indexOf(SEP, from);
80          if (to == terminator) {
81              throw new DecoderException("RFC 1522 violation: charset token not found");
82          }
83          final String charset = text.substring(from, to);
84          if (charset.isEmpty()) {
85              throw new DecoderException("RFC 1522 violation: charset not specified");
86          }
87          from = to + 1;
88          to = text.indexOf(SEP, from);
89          if (to == terminator) {
90              throw new DecoderException("RFC 1522 violation: encoding token not found");
91          }
92          final String encoding = text.substring(from, to);
93          if (!getEncoding().equalsIgnoreCase(encoding)) {
94              throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
95          }
96          from = to + 1;
97          to = text.indexOf(SEP, from);
98          byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
99          data = doDecoding(data);
100         return new String(data, charset);
101     }
102 
103     /**
104      * Decodes an array of bytes using the defined encoding scheme.
105      *
106      * @param bytes
107      *            Data to be decoded
108      * @return a byte array that contains decoded data
109      * @throws DecoderException
110      *             A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
111      */
112     protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
113 
114     /**
115      * Encodes an array of bytes using the defined encoding scheme.
116      *
117      * @param bytes
118      *            Data to be encoded
119      * @return A byte array containing the encoded data
120      * @throws EncoderException
121      *             thrown if the Encoder encounters a failure condition during the encoding process.
122      */
123     protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
124 
125     /**
126      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
127      * <p>
128      * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
129      * {@link #doEncoding(byte[])}  method of a concrete class to perform the specific encoding.
130      * </p>
131      *
132      * @param text
133      *            a string to encode
134      * @param charset
135      *            a charset to be used
136      * @return RFC 1522 compliant "encoded-word"
137      * @throws EncoderException
138      *             thrown if there is an error condition during the Encoding process.
139      * @see Charset
140      */
141     protected String encodeText(final String text, final Charset charset) throws EncoderException {
142         if (text == null) {
143             return null;
144         }
145         final StringBuilder buffer = new StringBuilder();
146         buffer.append(PREFIX);
147         buffer.append(charset);
148         buffer.append(SEP);
149         buffer.append(this.getEncoding());
150         buffer.append(SEP);
151         buffer.append(StringUtils.newStringUsAscii(this.doEncoding(text.getBytes(charset))));
152         buffer.append(POSTFIX);
153         return buffer.toString();
154     }
155 
156     /**
157      * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
158      * <p>
159      * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
160      * {@link #doEncoding(byte[])}  method of a concrete class to perform the specific encoding.
161      * </p>
162      *
163      * @param text
164      *            a string to encode
165      * @param charsetName
166      *            the charset to use
167      * @return RFC 1522 compliant "encoded-word"
168      * @throws EncoderException
169      *             thrown if there is an error condition during the Encoding process.
170      * @throws UnsupportedEncodingException
171      *             if charset is not available
172      * @see Charset
173      */
174     protected String encodeText(final String text, final String charsetName)
175             throws EncoderException, UnsupportedEncodingException {
176         if (text == null) {
177             return null;
178         }
179         return this.encodeText(text, Charset.forName(charsetName));
180     }
181 
182     /**
183      * Returns the codec name (referred to as encoding in the RFC 1522).
184      *
185      * @return name of the codec
186      */
187     protected abstract String getEncoding();
188 }