1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.fileupload2.core;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21
22 /**
23 * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. This implementation adheres to RFC 5987 in particular, which was
24 * defined for HTTP headers
25 * <p>
26 * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a> and
27 * <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
28 * </p>
29 *
30 * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
31 * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
32 */
33 final class RFC2231Utils {
34
35 /**
36 * The Hexadecimal values char array.
37 */
38 private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
39 /**
40 * The Hexadecimal representation of 127.
41 */
42 private static final byte MASK = 0x7f;
43 /**
44 * The Hexadecimal representation of 128.
45 */
46 private static final int MASK_128 = 0x80;
47 /**
48 * The Hexadecimal decode value.
49 */
50 private static final byte[] HEX_DECODE = new byte[MASK_128];
51
52 // create a ASCII decoded array of Hexadecimal values
53 static {
54 for (var i = 0; i < HEX_DIGITS.length; i++) {
55 HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
56 HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
57 }
58 }
59
60 /**
61 * Decodes a string of text obtained from a HTTP header as per RFC 2231
62 *
63 * <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} will be decoded to {@code This is ***fun***}
64 *
65 * <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate} will be decoded to {@code £ rate}.
66 *
67 * <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} will be decoded to {@code £ and € rates}.
68 *
69 * @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
70 * @return Decoded text based on charset encoding
71 * @throws UnsupportedEncodingException The requested character set wasn't found.
72 */
73 static String decodeText(final String encodedText) throws UnsupportedEncodingException {
74 final var langDelimitStart = encodedText.indexOf('\'');
75 if (langDelimitStart == -1) {
76 // missing charset
77 return encodedText;
78 }
79 final var mimeCharset = encodedText.substring(0, langDelimitStart);
80 final var langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
81 if (langDelimitEnd == -1) {
82 // missing language
83 return encodedText;
84 }
85 final var bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
86 return new String(bytes, getJavaCharset(mimeCharset));
87 }
88
89 /**
90 * Converts {@code text} to their corresponding Hex value.
91 *
92 * @param text - ASCII text input
93 * @return Byte array of characters decoded from ASCII table
94 */
95 private static byte[] fromHex(final String text) {
96 final var shift = 4;
97 final var out = new ByteArrayOutputStream(text.length());
98 for (var i = 0; i < text.length();) {
99 final var c = text.charAt(i++);
100 if (c == '%') {
101 if (i > text.length() - 2) {
102 break; // unterminated sequence
103 }
104 final var b1 = HEX_DECODE[text.charAt(i++) & MASK];
105 final var b2 = HEX_DECODE[text.charAt(i++) & MASK];
106 out.write(b1 << shift | b2);
107 } else {
108 out.write((byte) c);
109 }
110 }
111 return out.toByteArray();
112 }
113
114 private static String getJavaCharset(final String mimeCharset) {
115 // good enough for standard values
116 return mimeCharset;
117 }
118
119 /**
120 * Tests if asterisk (*) at the end of parameter name to indicate, if it has charset and language information to decode the value.
121 *
122 * @param paramName The parameter, which is being checked.
123 * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
124 */
125 static boolean hasEncodedValue(final String paramName) {
126 if (paramName != null) {
127 return paramName.lastIndexOf('*') == paramName.length() - 1;
128 }
129 return false;
130 }
131
132 /**
133 * If {@code paramName} has Asterisk (*) at the end, it will be stripped off, else the passed value will be returned.
134 *
135 * @param paramName The parameter, which is being inspected.
136 * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
137 */
138 static String stripDelimiter(final String paramName) {
139 if (hasEncodedValue(paramName)) {
140 final var paramBuilder = new StringBuilder(paramName);
141 paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
142 return paramBuilder.toString();
143 }
144 return paramName;
145 }
146
147 /**
148 * Private constructor so that no instances can be created. This class contains only static utility methods.
149 */
150 private RFC2231Utils() {
151 }
152 }