1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.commons.codec.net; 19 20 import java.io.UnsupportedEncodingException; 21 import java.nio.charset.Charset; 22 23 import org.apache.commons.codec.DecoderException; 24 import org.apache.commons.codec.EncoderException; 25 import org.apache.commons.codec.binary.StringUtils; 26 27 /** 28 * Implements methods common to all codecs defined in RFC 1522. 29 * <p> 30 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the 31 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which 32 * is unlikely to confuse existing message handling software. 33 * </p> 34 * <p> 35 * This class is immutable and thread-safe. 36 * </p> 37 * 38 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two: 39 * Message Header Extensions for Non-ASCII Text</a> 40 * 41 * @since 1.3 42 */ 43 abstract class RFC1522Codec { 44 45 /** Separator. */ 46 protected static final char SEP = '?'; 47 48 /** Prefix. */ 49 protected static final String POSTFIX = "?="; 50 51 /** Postfix. */ 52 protected static final String PREFIX = "=?"; 53 54 /** 55 * Applies an RFC 1522 compliant decoding scheme to the given string of text. 56 * <p> 57 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes 58 * {@link #doDecoding(byte[])} method of a concrete class to perform the specific decoding. 59 * </p> 60 * 61 * @param text 62 * a string to decode 63 * @return A new decoded String or {@code null} if the input is {@code null}. 64 * @throws DecoderException 65 * thrown if there is an error condition during the decoding process. 66 * @throws UnsupportedEncodingException 67 * thrown if charset specified in the "encoded-word" header is not supported 68 */ 69 protected String decodeText(final String text) 70 throws DecoderException, UnsupportedEncodingException { 71 if (text == null) { 72 return null; 73 } 74 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) { 75 throw new DecoderException("RFC 1522 violation: malformed encoded content"); 76 } 77 final int terminator = text.length() - 2; 78 int from = 2; 79 int to = text.indexOf(SEP, from); 80 if (to == terminator) { 81 throw new DecoderException("RFC 1522 violation: charset token not found"); 82 } 83 final String charset = text.substring(from, to); 84 if (charset.isEmpty()) { 85 throw new DecoderException("RFC 1522 violation: charset not specified"); 86 } 87 from = to + 1; 88 to = text.indexOf(SEP, from); 89 if (to == terminator) { 90 throw new DecoderException("RFC 1522 violation: encoding token not found"); 91 } 92 final String encoding = text.substring(from, to); 93 if (!getEncoding().equalsIgnoreCase(encoding)) { 94 throw new DecoderException("This codec cannot decode " + encoding + " encoded content"); 95 } 96 from = to + 1; 97 to = text.indexOf(SEP, from); 98 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to)); 99 data = doDecoding(data); 100 return new String(data, charset); 101 } 102 103 /** 104 * Decodes an array of bytes using the defined encoding scheme. 105 * 106 * @param bytes 107 * Data to be decoded 108 * @return a byte array that contains decoded data 109 * @throws DecoderException 110 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process. 111 */ 112 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException; 113 114 /** 115 * Encodes an array of bytes using the defined encoding scheme. 116 * 117 * @param bytes 118 * Data to be encoded 119 * @return A byte array containing the encoded data 120 * @throws EncoderException 121 * thrown if the Encoder encounters a failure condition during the encoding process. 122 */ 123 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException; 124 125 /** 126 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 127 * <p> 128 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 129 * {@link #doEncoding(byte[])} method of a concrete class to perform the specific encoding. 130 * </p> 131 * 132 * @param text 133 * a string to encode 134 * @param charset 135 * a charset to be used 136 * @return RFC 1522 compliant "encoded-word" 137 * @throws EncoderException 138 * thrown if there is an error condition during the Encoding process. 139 * @see Charset 140 */ 141 protected String encodeText(final String text, final Charset charset) throws EncoderException { 142 if (text == null) { 143 return null; 144 } 145 final StringBuilder buffer = new StringBuilder(); 146 buffer.append(PREFIX); 147 buffer.append(charset); 148 buffer.append(SEP); 149 buffer.append(this.getEncoding()); 150 buffer.append(SEP); 151 buffer.append(StringUtils.newStringUsAscii(this.doEncoding(text.getBytes(charset)))); 152 buffer.append(POSTFIX); 153 return buffer.toString(); 154 } 155 156 /** 157 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset. 158 * <p> 159 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes 160 * {@link #doEncoding(byte[])} method of a concrete class to perform the specific encoding. 161 * </p> 162 * 163 * @param text 164 * a string to encode 165 * @param charsetName 166 * the charset to use 167 * @return RFC 1522 compliant "encoded-word" 168 * @throws EncoderException 169 * thrown if there is an error condition during the Encoding process. 170 * @throws UnsupportedEncodingException 171 * if charset is not available 172 * @see Charset 173 */ 174 protected String encodeText(final String text, final String charsetName) 175 throws EncoderException, UnsupportedEncodingException { 176 if (text == null) { 177 return null; 178 } 179 return this.encodeText(text, Charset.forName(charsetName)); 180 } 181 182 /** 183 * Returns the codec name (referred to as encoding in the RFC 1522). 184 * 185 * @return name of the codec 186 */ 187 protected abstract String getEncoding(); 188 }