View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    * 
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   * 
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.codec.net;
19  
20  import java.io.UnsupportedEncodingException;
21  
22  import org.apache.commons.codec.DecoderException;
23  import org.apache.commons.codec.EncoderException;
24  import org.apache.commons.codec.binary.StringUtils;
25  
26  /**
27   * <p>
28   * Implements methods common to all codecs defined in RFC 1522.
29   * </p>
30   * 
31   * <p>
32   * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> 
33   * describes techniques to allow the encoding of non-ASCII text in 
34   * various portions of a RFC 822 [2] message header, in a manner which
35   * is unlikely to confuse existing message handling software.
36   * </p>
37  
38   * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">
39   * MIME (Multipurpose Internet Mail Extensions) Part Two:
40   * Message Header Extensions for Non-ASCII Text</a>
41   * </p>
42   * 
43   * @author Apache Software Foundation
44   * @since 1.3
45   * @version $Id: RFC1522Codec.java 1170351 2011-09-13 21:09:09Z ggregory $
46   */
47  abstract class RFC1522Codec {
48      
49      /**
50       * Separator.
51       */
52      protected static final char SEP = '?';
53  
54      /**
55       * Prefix
56       */
57      protected static final String POSTFIX = "?=";
58  
59      /**
60       * Postfix
61       */
62      protected static final String PREFIX = "=?";
63  
64      /**
65       * Applies an RFC 1522 compliant encoding scheme to the given string of text with the 
66       * given charset. This method constructs the "encoded-word" header common to all the 
67       * RFC 1522 codecs and then invokes {@link #doEncoding(byte [])} method of a concrete 
68       * class to perform the specific encoding.
69       * 
70       * @param text a string to encode
71       * @param charset a charset to be used
72       * 
73       * @return RFC 1522 compliant "encoded-word"
74       * 
75       * @throws EncoderException thrown if there is an error condition during the Encoding 
76       *  process.
77       * @throws UnsupportedEncodingException thrown if charset is not supported 
78       * 
79       * @see <a href="http://download.oracle.com/javase/1.5.0/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
80       */
81      protected String encodeText(final String text, final String charset)
82       throws EncoderException, UnsupportedEncodingException  
83      {
84          if (text == null) {
85              return null;
86          }
87          StringBuffer buffer = new StringBuffer();
88          buffer.append(PREFIX); 
89          buffer.append(charset);
90          buffer.append(SEP);
91          buffer.append(getEncoding());
92          buffer.append(SEP);
93          byte [] rawdata = doEncoding(text.getBytes(charset)); 
94          buffer.append(StringUtils.newStringUsAscii(rawdata));
95          buffer.append(POSTFIX); 
96          return buffer.toString();
97      }
98      
99      /**
100      * Applies an RFC 1522 compliant decoding scheme to the given string of text. This method 
101      * processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes 
102      * {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding.
103      * 
104      * @param text a string to decode
105      * @return A new decoded String or <code>null</code> if the input is <code>null</code>.
106      * 
107      * @throws DecoderException thrown if there is an error condition during the decoding 
108      *  process.
109      * @throws UnsupportedEncodingException thrown if charset specified in the "encoded-word" 
110      *  header is not supported 
111      */
112     protected String decodeText(final String text)
113      throws DecoderException, UnsupportedEncodingException  
114     {
115         if (text == null) {
116             return null;
117         }
118         if ((!text.startsWith(PREFIX)) || (!text.endsWith(POSTFIX))) {
119             throw new DecoderException("RFC 1522 violation: malformed encoded content");
120         }
121         int terminator = text.length() - 2;
122         int from = 2;
123         int to = text.indexOf(SEP, from);
124         if (to == terminator) {
125             throw new DecoderException("RFC 1522 violation: charset token not found");
126         }
127         String charset = text.substring(from, to);
128         if (charset.equals("")) {
129             throw new DecoderException("RFC 1522 violation: charset not specified");
130         }
131         from = to + 1;
132         to = text.indexOf(SEP, from);
133         if (to == terminator) {
134             throw new DecoderException("RFC 1522 violation: encoding token not found");
135         }
136         String encoding = text.substring(from, to);
137         if (!getEncoding().equalsIgnoreCase(encoding)) {
138             throw new DecoderException("This codec cannot decode " + 
139                 encoding + " encoded content");
140         }
141         from = to + 1;
142         to = text.indexOf(SEP, from);
143         byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
144         data = doDecoding(data); 
145         return new String(data, charset);
146     }
147 
148     /**
149      * Returns the codec name (referred to as encoding in the RFC 1522)
150      * 
151      * @return name of the codec
152      */    
153     protected abstract String getEncoding();
154 
155     /**
156      * Encodes an array of bytes using the defined encoding scheme
157      * 
158      * @param bytes Data to be encoded
159      *
160      * @return A byte array containing the encoded data
161      * 
162      * @throws EncoderException thrown if the Encoder encounters a failure condition 
163      *  during the encoding process.
164      */    
165     protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
166 
167     /**
168      * Decodes an array of bytes using the defined encoding scheme
169      * 
170      * @param bytes Data to be decoded
171      *
172      * @return a byte array that contains decoded data
173      * 
174      * @throws DecoderException A decoder exception is thrown if a Decoder encounters a 
175      *  failure condition during the decode process.
176      */    
177     protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
178 }