RFC2231Utils.java
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.commons.fileupload2.core;
- import java.io.ByteArrayOutputStream;
- import java.io.UnsupportedEncodingException;
- /**
- * Utility class to decode/encode character set on HTTP Header fields based on RFC 2231. This implementation adheres to RFC 5987 in particular, which was
- * defined for HTTP headers
- * <p>
- * RFC 5987 builds on RFC 2231, but has lesser scope like <a href="https://tools.ietf.org/html/rfc5987#section-3.2">mandatory charset definition</a> and
- * <a href="https://tools.ietf.org/html/rfc5987#section-4">no parameter continuation</a>
- * </p>
- *
- * @see <a href="https://tools.ietf.org/html/rfc2231">RFC 2231</a>
- * @see <a href="https://tools.ietf.org/html/rfc5987">RFC 5987</a>
- */
- final class RFC2231Utils {
- /**
- * The Hexadecimal values char array.
- */
- private static final char[] HEX_DIGITS = "0123456789ABCDEF".toCharArray();
- /**
- * The Hexadecimal representation of 127.
- */
- private static final byte MASK = 0x7f;
- /**
- * The Hexadecimal representation of 128.
- */
- private static final int MASK_128 = 0x80;
- /**
- * The Hexadecimal decode value.
- */
- private static final byte[] HEX_DECODE = new byte[MASK_128];
- // create a ASCII decoded array of Hexadecimal values
- static {
- for (var i = 0; i < HEX_DIGITS.length; i++) {
- HEX_DECODE[HEX_DIGITS[i]] = (byte) i;
- HEX_DECODE[Character.toLowerCase(HEX_DIGITS[i])] = (byte) i;
- }
- }
- /**
- * Decodes a string of text obtained from a HTTP header as per RFC 2231
- *
- * <strong>Eg 1.</strong> {@code us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A} will be decoded to {@code This is ***fun***}
- *
- * <strong>Eg 2.</strong> {@code iso-8859-1'en'%A3%20rate} will be decoded to {@code £ rate}.
- *
- * <strong>Eg 3.</strong> {@code UTF-8''%c2%a3%20and%20%e2%82%ac%20rates} will be decoded to {@code £ and € rates}.
- *
- * @param encodedText Text to be decoded has a format of {@code <charset>'<language>'<encoded_value>} and ASCII only
- * @return Decoded text based on charset encoding
- * @throws UnsupportedEncodingException The requested character set wasn't found.
- */
- static String decodeText(final String encodedText) throws UnsupportedEncodingException {
- final var langDelimitStart = encodedText.indexOf('\'');
- if (langDelimitStart == -1) {
- // missing charset
- return encodedText;
- }
- final var mimeCharset = encodedText.substring(0, langDelimitStart);
- final var langDelimitEnd = encodedText.indexOf('\'', langDelimitStart + 1);
- if (langDelimitEnd == -1) {
- // missing language
- return encodedText;
- }
- final var bytes = fromHex(encodedText.substring(langDelimitEnd + 1));
- return new String(bytes, getJavaCharset(mimeCharset));
- }
- /**
- * Converts {@code text} to their corresponding Hex value.
- *
- * @param text ASCII text input
- * @return Byte array of characters decoded from ASCII table
- */
- private static byte[] fromHex(final String text) {
- final var shift = 4;
- final var out = new ByteArrayOutputStream(text.length());
- for (var i = 0; i < text.length();) {
- final var c = text.charAt(i++);
- if (c == '%') {
- if (i > text.length() - 2) {
- break; // unterminated sequence
- }
- final var b1 = HEX_DECODE[text.charAt(i++) & MASK];
- final var b2 = HEX_DECODE[text.charAt(i++) & MASK];
- out.write(b1 << shift | b2);
- } else {
- out.write((byte) c);
- }
- }
- return out.toByteArray();
- }
- private static String getJavaCharset(final String mimeCharset) {
- // good enough for standard values
- return mimeCharset;
- }
- /**
- * Tests if asterisk (*) at the end of parameter name to indicate, if it has charset and language information to decode the value.
- *
- * @param paramName The parameter, which is being checked.
- * @return {@code true}, if encoded as per RFC 2231, {@code false} otherwise
- */
- static boolean hasEncodedValue(final String paramName) {
- if (paramName != null) {
- return paramName.lastIndexOf('*') == paramName.length() - 1;
- }
- return false;
- }
- /**
- * If {@code paramName} has Asterisk (*) at the end, it will be stripped off, else the passed value will be returned.
- *
- * @param paramName The parameter, which is being inspected.
- * @return stripped {@code paramName} of Asterisk (*), if RFC2231 encoded
- */
- static String stripDelimiter(final String paramName) {
- if (hasEncodedValue(paramName)) {
- final var paramBuilder = new StringBuilder(paramName);
- paramBuilder.deleteCharAt(paramName.lastIndexOf('*'));
- return paramBuilder.toString();
- }
- return paramName;
- }
- /**
- * Private constructor so that no instances can be created. This class contains only static utility methods.
- */
- private RFC2231Utils() {
- }
- }