1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22
23 import org.apache.commons.codec.DecoderException;
24 import org.apache.commons.codec.EncoderException;
25 import org.apache.commons.codec.binary.StringUtils;
26
27 /**
28 * Implements methods common to all codecs defined in RFC 1522.
29 * <p>
30 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
31 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
32 * is unlikely to confuse existing message handling software.
33 * <p>
34 * This class is immutable and thread-safe.
35 *
36 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
37 * Message Header Extensions for Non-ASCII Text</a>
38 *
39 * @since 1.3
40 * @version $Id: RFC1522Codec.html 889935 2013-12-11 05:05:13Z ggregory $
41 */
42 abstract class RFC1522Codec {
43
44 /** Separator. */
45 protected static final char SEP = '?';
46
47 /** Prefix. */
48 protected static final String POSTFIX = "?=";
49
50 /** Postfix. */
51 protected static final String PREFIX = "=?";
52
53 /**
54 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
55 * <p>
56 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
57 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
58 *
59 * @param text
60 * a string to encode
61 * @param charset
62 * a charset to be used
63 * @return RFC 1522 compliant "encoded-word"
64 * @throws EncoderException
65 * thrown if there is an error condition during the Encoding process.
66 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
67 */
68 protected String encodeText(final String text, final Charset charset) throws EncoderException {
69 if (text == null) {
70 return null;
71 }
72 final StringBuilder buffer = new StringBuilder();
73 buffer.append(PREFIX);
74 buffer.append(charset);
75 buffer.append(SEP);
76 buffer.append(this.getEncoding());
77 buffer.append(SEP);
78 final byte [] rawData = this.doEncoding(text.getBytes(charset));
79 buffer.append(StringUtils.newStringUsAscii(rawData));
80 buffer.append(POSTFIX);
81 return buffer.toString();
82 }
83
84 /**
85 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
86 * <p>
87 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
88 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding.
89 *
90 * @param text
91 * a string to encode
92 * @param charsetName
93 * the charset to use
94 * @return RFC 1522 compliant "encoded-word"
95 * @throws EncoderException
96 * thrown if there is an error condition during the Encoding process.
97 * @throws UnsupportedEncodingException
98 * if charset is not available
99 *
100 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
101 */
102 protected String encodeText(final String text, final String charsetName)
103 throws EncoderException, UnsupportedEncodingException {
104 if (text == null) {
105 return null;
106 }
107 return this.encodeText(text, Charset.forName(charsetName));
108 }
109
110 /**
111 * Applies an RFC 1522 compliant decoding scheme to the given string of text.
112 * <p>
113 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
114 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding.
115 *
116 * @param text
117 * a string to decode
118 * @return A new decoded String or {@code null} if the input is {@code null}.
119 * @throws DecoderException
120 * thrown if there is an error condition during the decoding process.
121 * @throws UnsupportedEncodingException
122 * thrown if charset specified in the "encoded-word" header is not supported
123 */
124 protected String decodeText(final String text)
125 throws DecoderException, UnsupportedEncodingException {
126 if (text == null) {
127 return null;
128 }
129 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
130 throw new DecoderException("RFC 1522 violation: malformed encoded content");
131 }
132 final int terminator = text.length() - 2;
133 int from = 2;
134 int to = text.indexOf(SEP, from);
135 if (to == terminator) {
136 throw new DecoderException("RFC 1522 violation: charset token not found");
137 }
138 final String charset = text.substring(from, to);
139 if (charset.equals("")) {
140 throw new DecoderException("RFC 1522 violation: charset not specified");
141 }
142 from = to + 1;
143 to = text.indexOf(SEP, from);
144 if (to == terminator) {
145 throw new DecoderException("RFC 1522 violation: encoding token not found");
146 }
147 final String encoding = text.substring(from, to);
148 if (!getEncoding().equalsIgnoreCase(encoding)) {
149 throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
150 }
151 from = to + 1;
152 to = text.indexOf(SEP, from);
153 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
154 data = doDecoding(data);
155 return new String(data, charset);
156 }
157
158 /**
159 * Returns the codec name (referred to as encoding in the RFC 1522).
160 *
161 * @return name of the codec
162 */
163 protected abstract String getEncoding();
164
165 /**
166 * Encodes an array of bytes using the defined encoding scheme.
167 *
168 * @param bytes
169 * Data to be encoded
170 * @return A byte array containing the encoded data
171 * @throws EncoderException
172 * thrown if the Encoder encounters a failure condition during the encoding process.
173 */
174 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
175
176 /**
177 * Decodes an array of bytes using the defined encoding scheme.
178 *
179 * @param bytes
180 * Data to be decoded
181 * @return a byte array that contains decoded data
182 * @throws DecoderException
183 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
184 */
185 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
186 }