RFC2231Utils.java

  1. /*
  2.  * Licensed to the Apache Software Foundation (ASF) under one or more
  3.  * contributor license agreements.  See the NOTICE file distributed with
  4.  * this work for additional information regarding copyright ownership.
  5.  * The ASF licenses this file to You under the Apache License, Version 2.0
  6.  * (the "License"); you may not use this file except in compliance with
  7.  * the License.  You may obtain a copy of the License at
  8.  *
  9.  *      http://www.apache.org/licenses/LICENSE-2.0
  10.  *
  11.  * Unless required by applicable law or agreed to in writing, software
  12.  * distributed under the License is distributed on an "AS IS" BASIS,
  13.  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14.  * See the License for the specific language governing permissions and
  15.  * limitations under the License.
  16.  */
  17. package org.apache.commons.fileupload2.core;

  18. import java.io.ByteArrayOutputStream;
  19. import java.io.UnsupportedEncodingException;

  20. /**
  21.  * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. This implementation adheres to RFC 5987 in particular, which was
  22.  * defined for HTTP headers
  23.  * <p>
  24.  * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a> and
  25.  * <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
  26.  * </p>
  27.  *
  28.  * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
  29.  * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
  30.  */
  31. final class RFC2231Utils {

  32.     /**
  33.      * The Hexadecimal values char array.
  34.      */
  35.     private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
  36.     /**
  37.      * The Hexadecimal representation of 127.
  38.      */
  39.     private static final byte MASK = 0x7f;
  40.     /**
  41.      * The Hexadecimal representation of 128.
  42.      */
  43.     private static final int MASK_128 = 0x80;
  44.     /**
  45.      * The Hexadecimal decode value.
  46.      */
  47.     private static final byte[] HEX_DECODE = new byte[MASK_128];

  48.     // create a ASCII decoded array of Hexadecimal values
  49.     static {
  50.         for (var i = 0; i < HEX_DIGITS.length; i++) {
  51.             HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
  52.             HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
  53.         }
  54.     }

  55.     /**
  56.      * Decodes a string of text obtained from a HTTP header as per RFC 2231
  57.      *
  58.      * <strong>Eg 1.</strong> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} will be decoded to {@code This is ***fun***}
  59.      *
  60.      * <strong>Eg 2.</strong> {@code iso-8859-1'en'%A3%20rate} will be decoded to {@code £ rate}.
  61.      *
  62.      * <strong>Eg 3.</strong> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} will be decoded to {@code £ and € rates}.
  63.      *
  64.      * @param encodedText   Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
  65.      * @return Decoded text based on charset encoding
  66.      * @throws UnsupportedEncodingException The requested character set wasn't found.
  67.      */
  68.     static String decodeText(final String encodedText) throws UnsupportedEncodingException {
  69.         final var langDelimitStart = encodedText.indexOf('\'');
  70.         if (langDelimitStart == -1) {
  71.             // missing charset
  72.             return encodedText;
  73.         }
  74.         final var mimeCharset = encodedText.substring(0, langDelimitStart);
  75.         final var langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
  76.         if (langDelimitEnd == -1) {
  77.             // missing language
  78.             return encodedText;
  79.         }
  80.         final var bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
  81.         return new String(bytes, getJavaCharset(mimeCharset));
  82.     }

  83.     /**
  84.      * Converts {@code text} to their corresponding Hex value.
  85.      *
  86.      * @param text   ASCII text input
  87.      * @return Byte array of characters decoded from ASCII table
  88.      */
  89.     private static byte[] fromHex(final String text) {
  90.         final var shift = 4;
  91.         final var out = new ByteArrayOutputStream(text.length());
  92.         for (var i = 0; i < text.length();) {
  93.             final var c = text.charAt(i++);
  94.             if (c == '%') {
  95.                 if (i > text.length() - 2) {
  96.                     break; // unterminated sequence
  97.                 }
  98.                 final var b1 = HEX_DECODE[text.charAt(i++) & MASK];
  99.                 final var b2 = HEX_DECODE[text.charAt(i++) & MASK];
  100.                 out.write(b1 << shift | b2);
  101.             } else {
  102.                 out.write((byte) c);
  103.             }
  104.         }
  105.         return out.toByteArray();
  106.     }

  107.     private static String getJavaCharset(final String mimeCharset) {
  108.         // good enough for standard values
  109.         return mimeCharset;
  110.     }

  111.     /**
  112.      * Tests if asterisk (*) at the end of parameter name to indicate, if it has charset and language information to decode the value.
  113.      *
  114.      * @param paramName The parameter, which is being checked.
  115.      * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
  116.      */
  117.     static boolean hasEncodedValue(final String paramName) {
  118.         if (paramName != null) {
  119.             return paramName.lastIndexOf('*') == paramName.length() - 1;
  120.         }
  121.         return false;
  122.     }

  123.     /**
  124.      * If {@code paramName} has Asterisk (*) at the end, it will be stripped off, else the passed value will be returned.
  125.      *
  126.      * @param paramName The parameter, which is being inspected.
  127.      * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
  128.      */
  129.     static String stripDelimiter(final String paramName) {
  130.         if (hasEncodedValue(paramName)) {
  131.             final var paramBuilder = new StringBuilder(paramName);
  132.             paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
  133.             return paramBuilder.toString();
  134.         }
  135.         return paramName;
  136.     }

  137.     /**
  138.      * Private constructor so that no instances can be created. This class contains only static utility methods.
  139.      */
  140.     private RFC2231Utils() {
  141.     }
  142. }