1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.codec.net;
19
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22 import java.nio.charset.UnsupportedCharsetException;
23 import java.util.Objects;
24
25 import org.apache.commons.codec.DecoderException;
26 import org.apache.commons.codec.EncoderException;
27 import org.apache.commons.codec.binary.StringUtils;
28
29 /**
30 * Implements methods common to all codecs defined in RFC 1522.
31 * <p>
32 * <a href="http://www.ietf.org/rfc/rfc1522.txt">RFC 1522</a> describes techniques to allow the
33 * encoding of non-ASCII text in various portions of a RFC 822 [2] message header, in a manner which
34 * is unlikely to confuse existing message handling software.
35 * </p>
36 * <p>
37 * This class is immutable and thread-safe.
38 * </p>
39 *
40 * @see <a href="http://www.ietf.org/rfc/rfc1522.txt">MIME (Multipurpose Internet Mail Extensions) Part Two:
41 * Message Header Extensions for Non-ASCII Text</a>
42 * @since 1.3
43 */
44 abstract class RFC1522Codec {
45
46 /** Separator. */
47 protected static final char SEP = '?';
48
49 /** Prefix. */
50 protected static final String POSTFIX = "?=";
51
52 /** Postfix. */
53 protected static final String PREFIX = "=?";
54
55 /**
56 * The default Charset used for string decoding and encoding.
57 */
58 protected final Charset charset;
59
60 RFC1522Codec(final Charset charset) {
61 this.charset = Objects.requireNonNull(charset, "charset");
62 }
63
64 /**
65 * Applies an RFC 1522 compliant decoding scheme to the given string of text.
66 * <p>
67 * This method processes the "encoded-word" header common to all the RFC 1522 codecs and then invokes
68 * {@link #doDecoding(byte[])} method of a concrete class to perform the specific decoding.
69 * </p>
70 *
71 * @param text
72 * a string to decode
73 * @return A new decoded String or {@code null} if the input is {@code null}.
74 * @throws DecoderException
75 * thrown if there is an error condition during the decoding process.
76 * @throws UnsupportedEncodingException
77 * thrown if charset specified in the "encoded-word" header is not supported
78 */
79 protected String decodeText(final String text) throws DecoderException, UnsupportedEncodingException {
80 if (text == null) {
81 return null;
82 }
83 if (!text.startsWith(PREFIX) || !text.endsWith(POSTFIX)) {
84 throw new DecoderException("RFC 1522 violation: malformed encoded content");
85 }
86 final int terminator = text.length() - 2;
87 int from = 2;
88 int to = text.indexOf(SEP, from);
89 if (to == terminator) {
90 throw new DecoderException("RFC 1522 violation: charset token not found");
91 }
92 final String charset = text.substring(from, to);
93 if (charset.isEmpty()) {
94 throw new DecoderException("RFC 1522 violation: charset not specified");
95 }
96 from = to + 1;
97 to = text.indexOf(SEP, from);
98 if (to == terminator) {
99 throw new DecoderException("RFC 1522 violation: encoding token not found");
100 }
101 final String encoding = text.substring(from, to);
102 if (!getEncoding().equalsIgnoreCase(encoding)) {
103 throw new DecoderException("This codec cannot decode " + encoding + " encoded content");
104 }
105 from = to + 1;
106 to = text.indexOf(SEP, from);
107 byte[] data = StringUtils.getBytesUsAscii(text.substring(from, to));
108 data = doDecoding(data);
109 return new String(data, charset);
110 }
111
112 /**
113 * Decodes an array of bytes using the defined encoding scheme.
114 *
115 * @param bytes
116 * Data to be decoded
117 * @return a byte array that contains decoded data
118 * @throws DecoderException
119 * A decoder exception is thrown if a Decoder encounters a failure condition during the decode process.
120 */
121 protected abstract byte[] doDecoding(byte[] bytes) throws DecoderException;
122
123 /**
124 * Encodes an array of bytes using the defined encoding scheme.
125 *
126 * @param bytes
127 * Data to be encoded
128 * @return A byte array containing the encoded data
129 * @throws EncoderException
130 * thrown if the Encoder encounters a failure condition during the encoding process.
131 */
132 protected abstract byte[] doEncoding(byte[] bytes) throws EncoderException;
133
134 /**
135 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
136 * <p>
137 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
138 * {@link #doEncoding(byte[])} method of a concrete class to perform the specific encoding.
139 * </p>
140 *
141 * @param text
142 * a string to encode
143 * @param charset
144 * a charset to be used
145 * @return RFC 1522 compliant "encoded-word"
146 * @throws EncoderException
147 * thrown if there is an error condition during the Encoding process.
148 * @see Charset
149 */
150 protected String encodeText(final String text, final Charset charset) throws EncoderException {
151 if (text == null) {
152 return null;
153 }
154 final StringBuilder buffer = new StringBuilder();
155 buffer.append(PREFIX);
156 buffer.append(charset);
157 buffer.append(SEP);
158 buffer.append(getEncoding());
159 buffer.append(SEP);
160 buffer.append(StringUtils.newStringUsAscii(doEncoding(text.getBytes(charset))));
161 buffer.append(POSTFIX);
162 return buffer.toString();
163 }
164
165 /**
166 * Applies an RFC 1522 compliant encoding scheme to the given string of text with the given charset.
167 * <p>
168 * This method constructs the "encoded-word" header common to all the RFC 1522 codecs and then invokes
169 * {@link #doEncoding(byte[])} method of a concrete class to perform the specific encoding.
170 * </p>
171 *
172 * @param text
173 * a string to encode
174 * @param charsetName
175 * the charset to use
176 * @return RFC 1522 compliant "encoded-word"
177 * @throws EncoderException
178 * thrown if there is an error condition during the Encoding process.
179 * @throws UnsupportedCharsetException
180 * if charset is not available
181 * @see Charset
182 */
183 protected String encodeText(final String text, final String charsetName) throws EncoderException {
184 if (text == null) {
185 // Don't attempt charsetName conversion.
186 return null;
187 }
188 return encodeText(text, Charset.forName(charsetName));
189 }
190
191 /**
192 * Gets the default Charset name used for string decoding and encoding.
193 *
194 * @return the default Charset name
195 * @since 1.7
196 */
197 public Charset getCharset() {
198 return charset;
199 }
200
201 /**
202 * Gets the default Charset name used for string decoding and encoding.
203 *
204 * @return the default Charset name
205 */
206 public String getDefaultCharset() {
207 return charset.name();
208 }
209
210 /**
211 * Returns the codec name (referred to as encoding in the RFC 1522).
212 *
213 * @return name of the codec.
214 */
215 protected abstract String getEncoding();
216 }