1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.fileupload2.core;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.IOException;
21 import java.io.UnsupportedEncodingException;
22 import java.nio.charset.StandardCharsets;
23 import java.text.ParseException;
24 import java.util.Base64;
25 import java.util.HashMap;
26 import java.util.Locale;
27 import java.util.Map;
28
29
30
31
32 final class MimeUtils {
33
34
35
36
37 private static final String BASE64_ENCODING_MARKER = "B";
38
39
40
41
42 private static final String QUOTEDPRINTABLE_ENCODING_MARKER = "Q";
43
44
45
46
47 private static final String ENCODED_TOKEN_MARKER = "=?";
48
49
50
51
52 private static final String ENCODED_TOKEN_FINISHER = "?=";
53
54
55
56
57 private static final String LINEAR_WHITESPACE = " \t\r\n";
58
59
60
61
62 private static final Map<String, String> MIME2JAVA = new HashMap<>();
63
64 static {
65 MIME2JAVA.put("iso-2022-cn", "ISO2022CN");
66 MIME2JAVA.put("iso-2022-kr", "ISO2022KR");
67 MIME2JAVA.put("utf-8", "UTF8");
68 MIME2JAVA.put("utf8", "UTF8");
69 MIME2JAVA.put("ja_jp.iso2022-7", "ISO2022JP");
70 MIME2JAVA.put("ja_jp.eucjp", "EUCJIS");
71 MIME2JAVA.put("euc-kr", "KSC5601");
72 MIME2JAVA.put("euckr", "KSC5601");
73 MIME2JAVA.put("us-ascii", StandardCharsets.ISO_8859_1.name());
74 MIME2JAVA.put("x-us-ascii", StandardCharsets.ISO_8859_1.name());
75 }
76
77
78
79
80
81
82
83
84
85
86 static String decodeText(final String text) throws UnsupportedEncodingException {
87
88
89 if (!text.contains(ENCODED_TOKEN_MARKER)) {
90 return text;
91 }
92
93 var offset = 0;
94 final var endOffset = text.length();
95
96 var startWhiteSpace = -1;
97 var endWhiteSpace = -1;
98
99 final var decodedText = new StringBuilder(text.length());
100
101 var previousTokenEncoded = false;
102
103 while (offset < endOffset) {
104 var ch = text.charAt(offset);
105
106
107 if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
108 startWhiteSpace = offset;
109 while (offset < endOffset) {
110
111 ch = text.charAt(offset);
112 if (LINEAR_WHITESPACE.indexOf(ch) == -1) {
113
114
115 endWhiteSpace = offset;
116 break;
117 }
118 offset++;
119 }
120 } else {
121
122 final var wordStart = offset;
123
124 while (offset < endOffset) {
125
126 ch = text.charAt(offset);
127 if (LINEAR_WHITESPACE.indexOf(ch) != -1) {
128 break;
129 }
130 offset++;
131
132
133 }
134
135 final var word = text.substring(wordStart, offset);
136
137 if (word.startsWith(ENCODED_TOKEN_MARKER)) {
138 try {
139
140 final var decodedWord = decodeWord(word);
141
142
143 if (!previousTokenEncoded && startWhiteSpace != -1) {
144 decodedText.append(text, startWhiteSpace, endWhiteSpace);
145 startWhiteSpace = -1;
146 }
147
148 previousTokenEncoded = true;
149
150 decodedText.append(decodedWord);
151
152
153 continue;
154
155 } catch (final ParseException ignored) {
156
157 }
158 }
159
160
161 if (startWhiteSpace != -1) {
162 decodedText.append(text, startWhiteSpace, endWhiteSpace);
163 startWhiteSpace = -1;
164 }
165
166 previousTokenEncoded = false;
167 decodedText.append(word);
168 }
169 }
170
171 return decodedText.toString();
172 }
173
174
175
176
177
178
179
180
181
182
183
184
185 private static String decodeWord(final String word) throws ParseException, UnsupportedEncodingException {
186
187
188
189 final var etmPos = word.indexOf(ENCODED_TOKEN_MARKER);
190 if (etmPos != 0) {
191 throw new ParseException("Invalid RFC 2047 encoded-word: " + word, etmPos);
192 }
193
194 final var charsetPos = word.indexOf('?', 2);
195 if (charsetPos == -1) {
196 throw new ParseException("Missing charset in RFC 2047 encoded-word: " + word, charsetPos);
197 }
198
199
200 final var charset = word.substring(2, charsetPos).toLowerCase(Locale.ENGLISH);
201
202
203 final var encodingPos = word.indexOf('?', charsetPos + 1);
204 if (encodingPos == -1) {
205 throw new ParseException("Missing encoding in RFC 2047 encoded-word: " + word, encodingPos);
206 }
207
208 final var encoding = word.substring(charsetPos + 1, encodingPos);
209
210
211 final var encodedTextPos = word.indexOf(ENCODED_TOKEN_FINISHER, encodingPos + 1);
212 if (encodedTextPos == -1) {
213 throw new ParseException("Missing encoded text in RFC 2047 encoded-word: " + word, encodedTextPos);
214 }
215
216 final var encodedText = word.substring(encodingPos + 1, encodedTextPos);
217
218
219 if (encodedText.isEmpty()) {
220 return "";
221 }
222
223 try {
224
225 final var out = new ByteArrayOutputStream(encodedText.length());
226
227 final var encodedData = encodedText.getBytes(StandardCharsets.US_ASCII);
228
229
230 if (encoding.equals(BASE64_ENCODING_MARKER)) {
231 out.write(Base64.getMimeDecoder().decode(encodedData));
232 } else if (encoding.equals(QUOTEDPRINTABLE_ENCODING_MARKER)) {
233 QuotedPrintableDecoder.decode(encodedData, out);
234 } else {
235 throw new UnsupportedEncodingException("Unknown RFC 2047 encoding: " + encoding);
236 }
237
238 final var decodedData = out.toByteArray();
239 return new String(decodedData, javaCharset(charset));
240 } catch (final IOException e) {
241 throw new UnsupportedEncodingException("Invalid RFC 2047 encoding");
242 }
243 }
244
245
246
247
248
249
250
251
252 private static String javaCharset(final String charset) {
253
254 if (charset == null) {
255 return null;
256 }
257 final var mappedCharset = MIME2JAVA.get(charset.toLowerCase(Locale.ENGLISH));
258
259
260 return mappedCharset == null ? charset : mappedCharset;
261 }
262
263
264
265
266 private MimeUtils() {
267
268 }
269
270 }