1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.net; 19 20 import java.io.UnsupportedEncodingException; 21 import java.nio.charset.Charset; 22 23 import org.apache.commons.codec.DecoderException; 24 import org.apache.commons.codec.EncoderException; 25 import org.apache.commons.codec.binary.StringUtils; 26 27 /** 28 * Implements methods common to all codecs defined in RFC 1522. 29 * <p> 30 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the 31 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which 32 * is unlikely to confuse existing message handling software. 33 * <p> 34 * This class is immutable and thread-safe. 35 * 36 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: 37 * Message Header Extensions for Non-ASCII Text</a> 38 * 39 * @since 1.3 40 * @version $Id: RFC1522Codec.html 889935 2013-12-11 05:05:13Z ggregory $ 41 */ 42 abstract class RFC1522Codec { 43 44 /** Separator. */ 45 protected static final char SEP = '?'; 46 47 /** Prefix. */ 48 protected static final String POSTFIX = "?="; 49 50 /** Postfix. */ 51 protected static final String PREFIX = "=?"; 52 53 /** 54 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 55 * <p> 56 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 57 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding. 58 * 59 * @param text 60 * a string to encode 61 * @param charset 62 * a charset to be used 63 * @return RFC 1522 compliant "encoded-word" 64 * @throws EncoderException 65 * thrown if there is an error condition during the Encoding process. 66 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 67 */ 68 protected String encodeText(final String text, final Charset charset) throws EncoderException { 69 if (text == null) { 70 return null; 71 } 72 StringBuilder buffer = new StringBuilder(); 73 buffer.append(PREFIX); 74 buffer.append(charset); 75 buffer.append(SEP); 76 buffer.append(this.getEncoding()); 77 buffer.append(SEP); 78 byte [] rawData = this.doEncoding(text.getBytes(charset)); 79 buffer.append(StringUtils.newStringUsAscii(rawData)); 80 buffer.append(POSTFIX); 81 return buffer.toString(); 82 } 83 84 /** 85 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 86 * <p> 87 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 88 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific encoding. 89 * 90 * @param text 91 * a string to encode 92 * @param charsetName 93 * the charset to use 94 * @return RFC 1522 compliant "encoded-word" 95 * @throws EncoderException 96 * thrown if there is an error condition during the Encoding process. 97 * @throws UnsupportedEncodingException 98 * if charset is not available 99 * 100 * @see <a href="http://download.oracle.com/javase/6/docs/api/java/nio/charset/Charset.html">Standard charsets</a> 101 */ 102 protected String encodeText(final String text, final String charsetName) 103 throws EncoderException, UnsupportedEncodingException { 104 if (text == null) { 105 return null; 106 } 107 return this.encodeText(text, Charset.forName(charsetName)); 108 } 109 110 /** 111 * Applies an RFC 1522 compliant decoding scheme to the given string of text. 112 * <p> 113 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes 114 * {@link #doEncoding(byte [])} method of a concrete class to perform the specific decoding. 115 * 116 * @param text 117 * a string to decode 118 * @return A new decoded String or {@code null} if the input is {@code null}. 119 * @throws DecoderException 120 * thrown if there is an error condition during the decoding process. 121 * @throws UnsupportedEncodingException 122 * thrown if charset specified in the "encoded-word" header is not supported 123 */ 124 protected String decodeText(final String text) 125 throws DecoderException, UnsupportedEncodingException { 126 if (text == null) { 127 return null; 128 } 129 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) { 130 throw new DecoderException("RFC 1522 violation: malformed encoded content"); 131 } 132 int terminator = text.length() - 2; 133 int from = 2; 134 int to = text.indexOf(SEP, from); 135 if (to == terminator) { 136 throw new DecoderException("RFC 1522 violation: charset token not found"); 137 } 138 String charset = text.substring(from, to); 139 if (charset.equals("")) { 140 throw new DecoderException("RFC 1522 violation: charset not specified"); 141 } 142 from = to + 1; 143 to = text.indexOf(SEP, from); 144 if (to == terminator) { 145 throw new DecoderException("RFC 1522 violation: encoding token not found"); 146 } 147 String encoding = text.substring(from, to); 148 if (!getEncoding().equalsIgnoreCase(encoding)) { 149 throw new DecoderException("This codec cannot decode " + encoding + " encoded content"); 150 } 151 from = to + 1; 152 to = text.indexOf(SEP, from); 153 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to)); 154 data = doDecoding(data); 155 return new String(data, charset); 156 } 157 158 /** 159 * Returns the codec name (referred to as encoding in the RFC 1522). 160 * 161 * @return name of the codec 162 */ 163 protected abstract String getEncoding(); 164 165 /** 166 * Encodes an array of bytes using the defined encoding scheme. 167 * 168 * @param bytes 169 * Data to be encoded 170 * @return A byte array containing the encoded data 171 * @throws EncoderException 172 * thrown if the Encoder encounters a failure condition during the encoding process. 173 */ 174 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException; 175 176 /** 177 * Decodes an array of bytes using the defined encoding scheme. 178 * 179 * @param bytes 180 * Data to be decoded 181 * @return a byte array that contains decoded data 182 * @throws DecoderException 183 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. 184 */ 185 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException; 186 }