001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.codec.net;
019
020 import java.io.ByteArrayOutputStream;
021 import java.io.UnsupportedEncodingException;
022 import java.nio.charset.Charset;
023 import java.nio.charset.IllegalCharsetNameException;
024 import java.nio.charset.UnsupportedCharsetException;
025 import java.util.BitSet;
026
027 import org.apache.commons.codec.BinaryDecoder;
028 import org.apache.commons.codec.BinaryEncoder;
029 import org.apache.commons.codec.Charsets;
030 import org.apache.commons.codec.DecoderException;
031 import org.apache.commons.codec.EncoderException;
032 import org.apache.commons.codec.StringDecoder;
033 import org.apache.commons.codec.StringEncoder;
034 import org.apache.commons.codec.binary.StringUtils;
035
036 /**
037 * Codec for the Quoted-Printable section of <a href="http://www.ietf.org/rfc/rfc1521.txt">RFC 1521</a>.
038 * <p>
039 * The Quoted-Printable encoding is intended to represent data that largely consists of octets that correspond to
040 * printable characters in the ASCII character set. It encodes the data in such a way that the resulting octets are
041 * unlikely to be modified by mail transport. If the data being encoded are mostly ASCII text, the encoded form of the
042 * data remains largely recognizable by humans. A body which is entirely ASCII may also be encoded in Quoted-Printable
043 * to ensure the integrity of the data should the message pass through a character- translating, and/or line-wrapping
044 * gateway.
045 * <p>
046 * Note:
047 * <p>
048 * Rules #3, #4, and #5 of the quoted-printable spec are not implemented yet because the complete quoted-printable spec
049 * does not lend itself well into the byte[] oriented codec framework. Complete the codec once the streamable codec
050 * framework is ready. The motivation behind providing the codec in a partial form is that it can already come in handy
051 * for those applications that do not require quoted-printable line formatting (rules #3, #4, #5), for instance Q codec.
052 * <p>
053 * This class is immutable and thread-safe.
054 *
055 * @see <a href="http://www.ietf.org/rfc/rfc1521.txt"> RFC 1521 MIME (Multipurpose Internet Mail Extensions) Part One:
056 * Mechanisms for Specifying and Describing the Format of Internet Message Bodies </a>
057 *
058 * @since 1.3
059 * @version $Id: QuotedPrintableCodec.html 889935 2013-12-11 05:05:13Z ggregory $
060 */
061 public class QuotedPrintableCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
062 /**
063 * The default charset used for string decoding and encoding.
064 */
065 private final Charset charset;
066
067 /**
068 * BitSet of printable characters as defined in RFC 1521.
069 */
070 private static final BitSet PRINTABLE_CHARS = new BitSet(256);
071
072 private static final byte ESCAPE_CHAR = '=';
073
074 private static final byte TAB = 9;
075
076 private static final byte SPACE = 32;
077 // Static initializer for printable chars collection
078 static {
079 // alpha characters
080 for (int i = 33; i <= 60; i++) {
081 PRINTABLE_CHARS.set(i);
082 }
083 for (int i = 62; i <= 126; i++) {
084 PRINTABLE_CHARS.set(i);
085 }
086 PRINTABLE_CHARS.set(TAB);
087 PRINTABLE_CHARS.set(SPACE);
088 }
089
090 /**
091 * Default constructor, assumes default charset of {@link Charsets#UTF_8}
092 */
093 public QuotedPrintableCodec() {
094 this(Charsets.UTF_8);
095 }
096
097 /**
098 * Constructor which allows for the selection of a default charset.
099 *
100 * @param charset
101 * the default string charset to use.
102 * @since 1.7
103 */
104 public QuotedPrintableCodec(final Charset charset) {
105 this.charset = charset;
106 }
107
108 /**
109 * Constructor which allows for the selection of a default charset.
110 *
111 * @param charsetName
112 * the default string charset to use.
113 * @throws UnsupportedCharsetException
114 * If no support for the named charset is available
115 * in this instance of the Java virtual machine
116 * @throws IllegalArgumentException
117 * If the given charsetName is null
118 * @throws IllegalCharsetNameException
119 * If the given charset name is illegal
120 *
121 * @since 1.7 throws UnsupportedCharsetException if the named charset is unavailable
122 */
123 public QuotedPrintableCodec(final String charsetName)
124 throws IllegalCharsetNameException, IllegalArgumentException, UnsupportedCharsetException {
125 this(Charset.forName(charsetName));
126 }
127
128 /**
129 * Encodes byte into its quoted-printable representation.
130 *
131 * @param b
132 * byte to encode
133 * @param buffer
134 * the buffer to write to
135 */
136 private static final void encodeQuotedPrintable(final int b, final ByteArrayOutputStream buffer) {
137 buffer.write(ESCAPE_CHAR);
138 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, 16));
139 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, 16));
140 buffer.write(hex1);
141 buffer.write(hex2);
142 }
143
144 /**
145 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
146 * <p>
147 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
148 * RFC 1521 and is suitable for encoding binary data and unformatted text.
149 *
150 * @param printable
151 * bitset of characters deemed quoted-printable
152 * @param bytes
153 * array of bytes to be encoded
154 * @return array of bytes containing quoted-printable data
155 */
156 public static final byte[] encodeQuotedPrintable(BitSet printable, final byte[] bytes) {
157 if (bytes == null) {
158 return null;
159 }
160 if (printable == null) {
161 printable = PRINTABLE_CHARS;
162 }
163 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
164 for (final byte c : bytes) {
165 int b = c;
166 if (b < 0) {
167 b = 256 + b;
168 }
169 if (printable.get(b)) {
170 buffer.write(b);
171 } else {
172 encodeQuotedPrintable(b, buffer);
173 }
174 }
175 return buffer.toByteArray();
176 }
177
178 /**
179 * Decodes an array quoted-printable characters into an array of original bytes. Escaped characters are converted
180 * back to their original representation.
181 * <p>
182 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
183 * RFC 1521.
184 *
185 * @param bytes
186 * array of quoted-printable characters
187 * @return array of original bytes
188 * @throws DecoderException
189 * Thrown if quoted-printable decoding is unsuccessful
190 */
191 public static final byte[] decodeQuotedPrintable(final byte[] bytes) throws DecoderException {
192 if (bytes == null) {
193 return null;
194 }
195 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
196 for (int i = 0; i < bytes.length; i++) {
197 final int b = bytes[i];
198 if (b == ESCAPE_CHAR) {
199 try {
200 final int u = Utils.digit16(bytes[++i]);
201 final int l = Utils.digit16(bytes[++i]);
202 buffer.write((char) ((u << 4) + l));
203 } catch (final ArrayIndexOutOfBoundsException e) {
204 throw new DecoderException("Invalid quoted-printable encoding", e);
205 }
206 } else {
207 buffer.write(b);
208 }
209 }
210 return buffer.toByteArray();
211 }
212
213 /**
214 * Encodes an array of bytes into an array of quoted-printable 7-bit characters. Unsafe characters are escaped.
215 * <p>
216 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
217 * RFC 1521 and is suitable for encoding binary data and unformatted text.
218 *
219 * @param bytes
220 * array of bytes to be encoded
221 * @return array of bytes containing quoted-printable data
222 */
223 @Override
224 public byte[] encode(final byte[] bytes) {
225 return encodeQuotedPrintable(PRINTABLE_CHARS, bytes);
226 }
227
228 /**
229 * Decodes an array of quoted-printable characters into an array of original bytes. Escaped characters are converted
230 * back to their original representation.
231 * <p>
232 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
233 * RFC 1521.
234 *
235 * @param bytes
236 * array of quoted-printable characters
237 * @return array of original bytes
238 * @throws DecoderException
239 * Thrown if quoted-printable decoding is unsuccessful
240 */
241 @Override
242 public byte[] decode(final byte[] bytes) throws DecoderException {
243 return decodeQuotedPrintable(bytes);
244 }
245
246 /**
247 * Encodes a string into its quoted-printable form using the default string charset. Unsafe characters are escaped.
248 * <p>
249 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
250 * RFC 1521 and is suitable for encoding binary data.
251 *
252 * @param str
253 * string to convert to quoted-printable form
254 * @return quoted-printable string
255 * @throws EncoderException
256 * Thrown if quoted-printable encoding is unsuccessful
257 *
258 * @see #getCharset()
259 */
260 @Override
261 public String encode(final String str) throws EncoderException {
262 return this.encode(str, getCharset());
263 }
264
265 /**
266 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
267 * are converted back to their original representation.
268 *
269 * @param str
270 * quoted-printable string to convert into its original form
271 * @param charset
272 * the original string charset
273 * @return original string
274 * @throws DecoderException
275 * Thrown if quoted-printable decoding is unsuccessful
276 * @since 1.7
277 */
278 public String decode(final String str, final Charset charset) throws DecoderException {
279 if (str == null) {
280 return null;
281 }
282 return new String(this.decode(StringUtils.getBytesUsAscii(str)), charset);
283 }
284
285 /**
286 * Decodes a quoted-printable string into its original form using the specified string charset. Escaped characters
287 * are converted back to their original representation.
288 *
289 * @param str
290 * quoted-printable string to convert into its original form
291 * @param charset
292 * the original string charset
293 * @return original string
294 * @throws DecoderException
295 * Thrown if quoted-printable decoding is unsuccessful
296 * @throws UnsupportedEncodingException
297 * Thrown if charset is not supported
298 */
299 public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
300 if (str == null) {
301 return null;
302 }
303 return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
304 }
305
306 /**
307 * Decodes a quoted-printable string into its original form using the default string charset. Escaped characters are
308 * converted back to their original representation.
309 *
310 * @param str
311 * quoted-printable string to convert into its original form
312 * @return original string
313 * @throws DecoderException
314 * Thrown if quoted-printable decoding is unsuccessful. Thrown if charset is not supported.
315 * @see #getCharset()
316 */
317 @Override
318 public String decode(final String str) throws DecoderException {
319 return this.decode(str, this.getCharset());
320 }
321
322 /**
323 * Encodes an object into its quoted-printable safe form. Unsafe characters are escaped.
324 *
325 * @param obj
326 * string to convert to a quoted-printable form
327 * @return quoted-printable object
328 * @throws EncoderException
329 * Thrown if quoted-printable encoding is not applicable to objects of this type or if encoding is
330 * unsuccessful
331 */
332 @Override
333 public Object encode(final Object obj) throws EncoderException {
334 if (obj == null) {
335 return null;
336 } else if (obj instanceof byte[]) {
337 return encode((byte[]) obj);
338 } else if (obj instanceof String) {
339 return encode((String) obj);
340 } else {
341 throw new EncoderException("Objects of type " +
342 obj.getClass().getName() +
343 " cannot be quoted-printable encoded");
344 }
345 }
346
347 /**
348 * Decodes a quoted-printable object into its original form. Escaped characters are converted back to their original
349 * representation.
350 *
351 * @param obj
352 * quoted-printable object to convert into its original form
353 * @return original object
354 * @throws DecoderException
355 * Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
356 * condition is encountered during the decode process.
357 */
358 @Override
359 public Object decode(final Object obj) throws DecoderException {
360 if (obj == null) {
361 return null;
362 } else if (obj instanceof byte[]) {
363 return decode((byte[]) obj);
364 } else if (obj instanceof String) {
365 return decode((String) obj);
366 } else {
367 throw new DecoderException("Objects of type " +
368 obj.getClass().getName() +
369 " cannot be quoted-printable decoded");
370 }
371 }
372
373 /**
374 * Gets the default charset name used for string decoding and encoding.
375 *
376 * @return the default charset name
377 * @since 1.7
378 */
379 public Charset getCharset() {
380 return this.charset;
381 }
382
383 /**
384 * Gets the default charset name used for string decoding and encoding.
385 *
386 * @return the default charset name
387 */
388 public String getDefaultCharset() {
389 return this.charset.name();
390 }
391
392 /**
393 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
394 * <p>
395 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
396 * RFC 1521 and is suitable for encoding binary data and unformatted text.
397 *
398 * @param str
399 * string to convert to quoted-printable form
400 * @param charset
401 * the charset for str
402 * @return quoted-printable string
403 * @since 1.7
404 */
405 public String encode(final String str, final Charset charset) {
406 if (str == null) {
407 return null;
408 }
409 return StringUtils.newStringUsAscii(this.encode(str.getBytes(charset)));
410 }
411
412 /**
413 * Encodes a string into its quoted-printable form using the specified charset. Unsafe characters are escaped.
414 * <p>
415 * This function implements a subset of quoted-printable encoding specification (rule #1 and rule #2) as defined in
416 * RFC 1521 and is suitable for encoding binary data and unformatted text.
417 *
418 * @param str
419 * string to convert to quoted-printable form
420 * @param charset
421 * the charset for str
422 * @return quoted-printable string
423 * @throws UnsupportedEncodingException
424 * Thrown if the charset is not supported
425 */
426 public String encode(final String str, final String charset) throws UnsupportedEncodingException {
427 if (str == null) {
428 return null;
429 }
430 return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
431 }
432 }