1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.ByteArrayOutputStream;
21 import java.io.UnsupportedEncodingException;
22 import java.util.BitSet;
23
24 import org.apache.commons.codec.BinaryDecoder;
25 import org.apache.commons.codec.BinaryEncoder;
26 import org.apache.commons.codec.DecoderException;
27 import org.apache.commons.codec.EncoderException;
28 import org.apache.commons.codec.StringDecoder;
29 import org.apache.commons.codec.StringEncoder;
30
31 /**
32 * <p>
33 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521 </a>.
34 * </p>
35 * <p>
36 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
37 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
38 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
39 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
40 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
41 * gateway.
42 * </p>
43 *
44 * <p>
45 * Note:
46 * </p>
47 * <p>
48 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
49 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the steamable codec
50 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
51 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
52 * </p>
53 *
54 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
55 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
56 *
57 * @author Apache Software Foundation
58 * @since 1.3
59 * @version $Id: QuotedPrintableCodec.java 582446 2007-10-06 04:11:49Z bayard $
60 */
61 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
62 /**
63 * The default charset used for string decoding and encoding.
64 */
65 private String charset = CharacterEncodingNames.UTF8;
66
67 /**
68 * BitSet of printable characters as defined in RFC 1521.
69 */
70 private static final BitSet PRINTABLE_CHARS = new BitSet(256);
71
72 private static final byte ESCAPE_CHAR = '=';
73
74 private static final byte TAB = 9;
75
76 private static final byte SPACE = 32;
77 // Static initializer for printable chars collection
78 static {
79 // alpha characters
80 for (int i = 33; i <= 60; i++) {
81 PRINTABLE_CHARS.set(i);
82 }
83 for (int i = 62; i <= 126; i++) {
84 PRINTABLE_CHARS.set(i);
85 }
86 PRINTABLE_CHARS.set(TAB);
87 PRINTABLE_CHARS.set(SPACE);
88 }
89
90 /**
91 * Default constructor.
92 */
93 public QuotedPrintableCodec() {
94 super();
95 }
96
97 /**
98 * Constructor which allows for the selection of a default charset
99 *
100 * @param charset
101 * the default string charset to use.
102 */
103 public QuotedPrintableCodec(String charset) {
104 super();
105 this.charset = charset;
106 }
107
108 /**
109 * Encodes byte into its quoted-printable representation.
110 *
111 * @param b
112 * byte to encode
113 * @param buffer
114 * the buffer to write to
115 */
116 private static final void encodeQuotedPrintable(int b, ByteArrayOutputStream buffer) {
117 buffer.write(ESCAPE_CHAR);
118 char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
119 char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
120 buffer.write(hex1);
121 buffer.write(hex2);
122 }
123
124 /**
125 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
126 *
127 * <p>
128 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
129 * RFC 1521 and is suitable for encoding binary data and unformatted text.
130 * </p>
131 *
132 * @param printable
133 * bitset of characters deemed quoted-printable
134 * @param bytes
135 * array of bytes to be encoded
136 * @return array of bytes containing quoted-printable data
137 */
138 public static final byte[] encodeQuotedPrintable(BitSet printable, byte[] bytes) {
139 if (bytes == null) {
140 return null;
141 }
142 if (printable == null) {
143 printable = PRINTABLE_CHARS;
144 }
145 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
146 for (int i = 0; i < bytes.length; i++) {
147 int b = bytes[i];
148 if (b < 0) {
149 b = 256 + b;
150 }
151 if (printable.get(b)) {
152 buffer.write(b);
153 } else {
154 encodeQuotedPrintable(b, buffer);
155 }
156 }
157 return buffer.toByteArray();
158 }
159
160 /**
161 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
162 * back to their original representation.
163 *
164 * <p>
165 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
166 * RFC 1521.
167 * </p>
168 *
169 * @param bytes
170 * array of quoted-printable characters
171 * @return array of original bytes
172 * @throws DecoderException
173 * Thrown if quoted-printable decoding is unsuccessful
174 */
175 public static final byte[] decodeQuotedPrintable(byte[] bytes) throws DecoderException {
176 if (bytes == null) {
177 return null;
178 }
179 ByteArrayOutputStream buffer = new ByteArrayOutputStream();
180 for (int i = 0; i < bytes.length; i++) {
181 int b = bytes[i];
182 if (b == ESCAPE_CHAR) {
183 try {
184 int u = Character.digit((char) bytes[++i], 16);
185 int l = Character.digit((char) bytes[++i], 16);
186 if (u == -1 || l == -1) {
187 throw new DecoderException("Invalid quoted-printable encoding");
188 }
189 buffer.write((char) ((u << 4) + l));
190 } catch (ArrayIndexOutOfBoundsException e) {
191 throw new DecoderException("Invalid quoted-printable encoding");
192 }
193 } else {
194 buffer.write(b);
195 }
196 }
197 return buffer.toByteArray();
198 }
199
200 /**
201 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
202 *
203 * <p>
204 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
205 * RFC 1521 and is suitable for encoding binary data and unformatted text.
206 * </p>
207 *
208 * @param bytes
209 * array of bytes to be encoded
210 * @return array of bytes containing quoted-printable data
211 */
212 public byte[] encode(byte[] bytes) {
213 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
214 }
215
216 /**
217 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
218 * back to their original representation.
219 *
220 * <p>
221 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
222 * RFC 1521.
223 * </p>
224 *
225 * @param bytes
226 * array of quoted-printable characters
227 * @return array of original bytes
228 * @throws DecoderException
229 * Thrown if quoted-printable decoding is unsuccessful
230 */
231 public byte[] decode(byte[] bytes) throws DecoderException {
232 return decodeQuotedPrintable(bytes);
233 }
234
235 /**
236 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
237 *
238 * <p>
239 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
240 * RFC 1521 and is suitable for encoding binary data.
241 * </p>
242 *
243 * @param pString
244 * string to convert to quoted-printable form
245 * @return quoted-printable string
246 *
247 * @throws EncoderException
248 * Thrown if quoted-printable encoding is unsuccessful
249 *
250 * @see #getDefaultCharset()
251 */
252 public String encode(String pString) throws EncoderException {
253 if (pString == null) {
254 return null;
255 }
256 try {
257 return encode(pString, getDefaultCharset());
258 } catch (UnsupportedEncodingException e) {
259 throw new EncoderException(e.getMessage());
260 }
261 }
262
263 /**
264 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
265 * are converted back to their original representation.
266 *
267 * @param pString
268 * quoted-printable string to convert into its original form
269 * @param charset
270 * the original string charset
271 * @return original string
272 * @throws DecoderException
273 * Thrown if quoted-printable decoding is unsuccessful
274 * @throws UnsupportedEncodingException
275 * Thrown if charset is not supported
276 */
277 public String decode(String pString, String charset) throws DecoderException, UnsupportedEncodingException {
278 if (pString == null) {
279 return null;
280 }
281 return new String(decode(pString.getBytes(CharacterEncodingNames.US_ASCII)), charset);
282 }
283
284 /**
285 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
286 * converted back to their original representation.
287 *
288 * @param pString
289 * quoted-printable string to convert into its original form
290 * @return original string
291 * @throws DecoderException
292 * Thrown if quoted-printable decoding is unsuccessful.
293 * Thrown if charset is not supported.
294 * @see #getDefaultCharset()
295 */
296 public String decode(String pString) throws DecoderException {
297 if (pString == null) {
298 return null;
299 }
300 try {
301 return decode(pString, getDefaultCharset());
302 } catch (UnsupportedEncodingException e) {
303 throw new DecoderException(e.getMessage());
304 }
305 }
306
307 /**
308 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
309 *
310 * @param pObject
311 * string to convert to a quoted-printable form
312 * @return quoted-printable object
313 * @throws EncoderException
314 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
315 * unsuccessful
316 */
317 public Object encode(Object pObject) throws EncoderException {
318 if (pObject == null) {
319 return null;
320 } else if (pObject instanceof byte[]) {
321 return encode((byte[]) pObject);
322 } else if (pObject instanceof String) {
323 return encode((String) pObject);
324 } else {
325 throw new EncoderException("Objects of type "
326 + pObject.getClass().getName()
327 + " cannot be quoted-printable encoded");
328 }
329 }
330
331 /**
332 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
333 * representation.
334 *
335 * @param pObject
336 * quoted-printable object to convert into its original form
337 * @return original object
338 * @throws DecoderException
339 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure condition is
340 * encountered during the decode process.
341 */
342 public Object decode(Object pObject) throws DecoderException {
343 if (pObject == null) {
344 return null;
345 } else if (pObject instanceof byte[]) {
346 return decode((byte[]) pObject);
347 } else if (pObject instanceof String) {
348 return decode((String) pObject);
349 } else {
350 throw new DecoderException("Objects of type "
351 + pObject.getClass().getName()
352 + " cannot be quoted-printable decoded");
353 }
354 }
355
356 /**
357 * Returns the default charset used for string decoding and encoding.
358 *
359 * @return the default string charset.
360 */
361 public String getDefaultCharset() {
362 return this.charset;
363 }
364
365 /**
366 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
367 *
368 * <p>
369 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
370 * RFC 1521 and is suitable for encoding binary data and unformatted text.
371 * </p>
372 *
373 * @param pString
374 * string to convert to quoted-printable form
375 * @param charset
376 * the charset for pString
377 * @return quoted-printable string
378 *
379 * @throws UnsupportedEncodingException
380 * Thrown if the charset is not supported
381 */
382 public String encode(String pString, String charset) throws UnsupportedEncodingException {
383 if (pString == null) {
384 return null;
385 }
386 return new String(encode(pString.getBytes(charset)), CharacterEncodingNames.US_ASCII);
387 }
388 }