View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.fileupload2.core;
18  
19  import java.io.ByteArrayOutputStream;
20  import java.io.UnsupportedEncodingException;
21  
22  /**
23   * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. This implementation adheres to RFC 5987 in particular, which was
24   * defined for HTTP headers
25   * <p>
26   * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a> and
27   * <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
28   * </p>
29   *
30   * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
31   * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
32   */
33  final class RFC2231Utils {
34  
35      /**
36       * The Hexadecimal values char array.
37       */
38      private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
39      /**
40       * The Hexadecimal representation of 127.
41       */
42      private static final byte MASK = 0x7f;
43      /**
44       * The Hexadecimal representation of 128.
45       */
46      private static final int MASK_128 = 0x80;
47      /**
48       * The Hexadecimal decode value.
49       */
50      private static final byte[] HEX_DECODE = new byte[MASK_128];
51  
52      // create a ASCII decoded array of Hexadecimal values
53      static {
54          for (var i = 0; i < HEX_DIGITS.length; i++) {
55              HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
56              HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
57          }
58      }
59  
60      /**
61       * Decodes a string of text obtained from a HTTP header as per RFC 2231
62       *
63       * <b>Eg 1.</b> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} will be decoded to {@code This is ***fun***}
64       *
65       * <b>Eg 2.</b> {@code iso-8859-1'en'%A3%20rate} will be decoded to {@code £ rate}.
66       *
67       * <b>Eg 3.</b> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} will be decoded to {@code £ and € rates}.
68       *
69       * @param encodedText - Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
70       * @return Decoded text based on charset encoding
71       * @throws UnsupportedEncodingException The requested character set wasn't found.
72       */
73      static String decodeText(final String encodedText) throws UnsupportedEncodingException {
74          final var langDelimitStart = encodedText.indexOf('\'');
75          if (langDelimitStart == -1) {
76              // missing charset
77              return encodedText;
78          }
79          final var mimeCharset = encodedText.substring(0, langDelimitStart);
80          final var langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
81          if (langDelimitEnd == -1) {
82              // missing language
83              return encodedText;
84          }
85          final var bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
86          return new String(bytes, getJavaCharset(mimeCharset));
87      }
88  
89      /**
90       * Converts {@code text} to their corresponding Hex value.
91       *
92       * @param text - ASCII text input
93       * @return Byte array of characters decoded from ASCII table
94       */
95      private static byte[] fromHex(final String text) {
96          final var shift = 4;
97          final var out = new ByteArrayOutputStream(text.length());
98          for (var i = 0; i < text.length();) {
99              final var c = text.charAt(i++);
100             if (c == '%') {
101                 if (i > text.length() - 2) {
102                     break; // unterminated sequence
103                 }
104                 final var b1 = HEX_DECODE[text.charAt(i++) & MASK];
105                 final var b2 = HEX_DECODE[text.charAt(i++) & MASK];
106                 out.write(b1 << shift | b2);
107             } else {
108                 out.write((byte) c);
109             }
110         }
111         return out.toByteArray();
112     }
113 
114     private static String getJavaCharset(final String mimeCharset) {
115         // good enough for standard values
116         return mimeCharset;
117     }
118 
119     /**
120      * Tests if asterisk (*) at the end of parameter name to indicate, if it has charset and language information to decode the value.
121      *
122      * @param paramName The parameter, which is being checked.
123      * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
124      */
125     static boolean hasEncodedValue(final String paramName) {
126         if (paramName != null) {
127             return paramName.lastIndexOf('*') == paramName.length() - 1;
128         }
129         return false;
130     }
131 
132     /**
133      * If {@code paramName} has Asterisk (*) at the end, it will be stripped off, else the passed value will be returned.
134      *
135      * @param paramName The parameter, which is being inspected.
136      * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
137      */
138     static String stripDelimiter(final String paramName) {
139         if (hasEncodedValue(paramName)) {
140             final var paramBuilder = new StringBuilder(paramName);
141             paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
142             return paramBuilder.toString();
143         }
144         return paramName;
145     }
146 
147     /**
148      * Private constructor so that no instances can be created. This class contains only static utility methods.
149      */
150     private RFC2231Utils() {
151     }
152 }