View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.UnsupportedEncodingException;
21  import java.nio.charset.Charset;
22  
23  import org.apache.commons.codec.DecoderException;
24  import org.apache.commons.codec.EncoderException;
25  import org.apache.commons.codec.binary.StringUtils;
26  
27  /**
28   * Implements methods common to all codecs defined in RFC 1522.
29   * <p>
30   * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
31   * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
32   * is unlikely to confuse existing message handling software.
33   * <p>
34   * This class is immutable and thread-safe.
35   *
36   * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
37   *          Message Header Extensions for Non-ASCII Text</a>
38   *
39   * @since 1.3
40   * @version $Id: RFC1522Codec.html 889935 2013-12-11 05:05:13Z ggregory $
41   */
42  abstract class RFC1522Codec {
43  
44      /** Separator. */
45      protected static final char SEP = '?';
46  
47      /** Prefix. */
48      protected static final String POSTFIX = "?=";
49  
50      /** Postfix. */
51      protected static final String PREFIX = "=?";
52  
53      /**
54       * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
55       * <p>
56       * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
57       * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
58       *
59       * @param text
60       *            a string to encode
61       * @param charset
62       *            a charset to be used
63       * @return RFC 1522 compliant "encoded-word"
64       * @throws EncoderException
65       *             thrown if there is an error condition during the Encoding process.
66       * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
67       */
68      protected String encodeText(final String text, final Charset charset) throws EncoderException {
69          if (text == null) {
70              return null;
71          }
72          StringBuilder buffer = new StringBuilder();
73          buffer.append(PREFIX);
74          buffer.append(charset);
75          buffer.append(SEP);
76          buffer.append(this.getEncoding());
77          buffer.append(SEP);
78          byte [] rawData = this.doEncoding(text.getBytes(charset));
79          buffer.append(StringUtils.newStringUsAscii(rawData));
80          buffer.append(POSTFIX);
81          return buffer.toString();
82      }
83  
84      /**
85       * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
86       * <p>
87       * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
88       * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
89       *
90       * @param text
91       *            a string to encode
92       * @param charsetName
93       *            the charset to use
94       * @return RFC 1522 compliant "encoded-word"
95       * @throws EncoderException
96       *             thrown if there is an error condition during the Encoding process.
97       * @throws UnsupportedEncodingException
98       *             if charset is not available
99       *
100      * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
101      */
102     protected String encodeText(final String text, final String charsetName)
103             throws EncoderException, UnsupportedEncodingException {
104         if (text == null) {
105             return null;
106         }
107         return this.encodeText(text, Charset.forName(charsetName));
108     }
109 
110     /**
111      * Applies an RFC 1522 compliant decoding scheme to the given string of text.
112      * <p>
113      * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
114      * {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding.
115      *
116      * @param text
117      *            a string to decode
118      * @return A new decoded String or {@code null} if the input is {@code null}.
119      * @throws DecoderException
120      *             thrown if there is an error condition during the decoding process.
121      * @throws UnsupportedEncodingException
122      *             thrown if charset specified in the "encoded-word" header is not supported
123      */
124     protected String decodeText(final String text)
125             throws DecoderException, UnsupportedEncodingException {
126         if (text == null) {
127             return null;
128         }
129         if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
130             throw new DecoderException("RFC 1522 violation: malformed encoded content");
131         }
132         int terminator = text.length() - 2;
133         int from = 2;
134         int to = text.indexOf(SEP, from);
135         if (to == terminator) {
136             throw new DecoderException("RFC 1522 violation: charset token not found");
137         }
138         String charset = text.substring(from, to);
139         if (charset.equals("")) {
140             throw new DecoderException("RFC 1522 violation: charset not specified");
141         }
142         from = to + 1;
143         to = text.indexOf(SEP, from);
144         if (to == terminator) {
145             throw new DecoderException("RFC 1522 violation: encoding token not found");
146         }
147         String encoding = text.substring(from, to);
148         if (!getEncoding().equalsIgnoreCase(encoding)) {
149             throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
150         }
151         from = to + 1;
152         to = text.indexOf(SEP, from);
153         byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
154         data = doDecoding(data);
155         return new String(data, charset);
156     }
157 
158     /**
159      * Returns the codec name (referred to as encoding in the RFC 1522).
160      *
161      * @return name of the codec
162      */
163     protected abstract String getEncoding();
164 
165     /**
166      * Encodes an array of bytes using the defined encoding scheme.
167      *
168      * @param bytes
169      *            Data to be encoded
170      * @return A byte array containing the encoded data
171      * @throws EncoderException
172      *             thrown if the Encoder encounters a failure condition during the encoding process.
173      */
174     protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
175 
176     /**
177      * Decodes an array of bytes using the defined encoding scheme.
178      *
179      * @param bytes
180      *            Data to be decoded
181      * @return a byte array that contains decoded data
182      * @throws DecoderException
183      *             A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
184      */
185     protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
186 }