001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.UnsupportedEncodingException; 020import java.util.HashMap; 021import java.util.Locale; 022import java.util.Map; 023 024/** 025 * A simple parser intended to parse sequences of name/value pairs. 026 * <p> 027 * Parameter values are expected to be enclosed in quotes if they contain unsafe characters, such as '=' characters or separators. Parameter values are optional 028 * and can be omitted. 029 * </p> 030 * <p> 031 * {@code param1 = value; param2 = "anything goes; really"; param3} 032 * </p> 033 */ 034public class ParameterParser { 035 036 /** 037 * String to be parsed. 038 */ 039 private char[] chars; 040 041 /** 042 * Current position in the string. 043 */ 044 private int pos; 045 046 /** 047 * Maximum position in the string. 048 */ 049 private int len; 050 051 /** 052 * Start of a token. 053 */ 054 private int i1; 055 056 /** 057 * End of a token. 058 */ 059 private int i2; 060 061 /** 062 * Whether names stored in the map should be converted to lower case. 063 */ 064 private boolean lowerCaseNames; 065 066 /** 067 * Default ParameterParser constructor. 068 */ 069 public ParameterParser() { 070 } 071 072 /** 073 * A helper method to process the parsed token. This method removes leading and trailing blanks as well as enclosing quotation marks, when necessary. 074 * 075 * @param quoted {@code true} if quotation marks are expected, {@code false} otherwise. 076 * @return the token 077 */ 078 private String getToken(final boolean quoted) { 079 // Trim leading white spaces 080 while (i1 < i2 && Character.isWhitespace(chars[i1])) { 081 i1++; 082 } 083 // Trim trailing white spaces 084 while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) { 085 i2--; 086 } 087 // Strip away quotation marks if necessary 088 if (quoted && i2 - i1 >= 2 && chars[i1] == '"' && chars[i2 - 1] == '"') { 089 i1++; 090 i2--; 091 } 092 String result = null; 093 if (i2 > i1) { 094 result = new String(chars, i1, i2 - i1); 095 } 096 return result; 097 } 098 099 /** 100 * Tests if there any characters left to parse. 101 * 102 * @return {@code true} if there are unparsed characters, {@code false} otherwise. 103 */ 104 private boolean hasChar() { 105 return this.pos < this.len; 106 } 107 108 /** 109 * Tests {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. 110 * 111 * @return {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. Otherwise returns {@code false} 112 */ 113 public boolean isLowerCaseNames() { 114 return this.lowerCaseNames; 115 } 116 117 /** 118 * Tests if the given character is present in the array of characters. 119 * 120 * @param ch the character to test for presence in the array of characters 121 * @param charray the array of characters to test against 122 * @return {@code true} if the character is present in the array of characters, {@code false} otherwise. 123 */ 124 private boolean isOneOf(final char ch, final char[] charray) { 125 var result = false; 126 for (final char element : charray) { 127 if (ch == element) { 128 result = true; 129 break; 130 } 131 } 132 return result; 133 } 134 135 /** 136 * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique. 137 * 138 * @param charArray the array of characters that contains a sequence of name/value pairs 139 * @param separator the name/value pairs separator 140 * @return a map of name/value pairs 141 */ 142 public Map<String, String> parse(final char[] charArray, final char separator) { 143 if (charArray == null) { 144 return new HashMap<>(); 145 } 146 return parse(charArray, 0, charArray.length, separator); 147 } 148 149 /** 150 * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique. 151 * 152 * @param charArray the array of characters that contains a sequence of name/value pairs 153 * @param offset - the initial offset. 154 * @param length - the length. 155 * @param separator the name/value pairs separator 156 * @return a map of name/value pairs 157 */ 158 public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) { 159 160 if (charArray == null) { 161 return new HashMap<>(); 162 } 163 final var params = new HashMap<String, String>(); 164 this.chars = charArray.clone(); 165 this.pos = offset; 166 this.len = length; 167 168 String paramName; 169 String paramValue; 170 while (hasChar()) { 171 paramName = parseToken(new char[] { '=', separator }); 172 paramValue = null; 173 if (hasChar() && charArray[pos] == '=') { 174 pos++; // skip '=' 175 paramValue = parseQuotedToken(new char[] { separator }); 176 177 if (paramValue != null) { 178 try { 179 paramValue = RFC2231Utils.hasEncodedValue(paramName) ? RFC2231Utils.decodeText(paramValue) : MimeUtils.decodeText(paramValue); 180 } catch (final UnsupportedEncodingException ignored) { 181 // let's keep the original value in this case 182 } 183 } 184 } 185 if (hasChar() && charArray[pos] == separator) { 186 pos++; // skip separator 187 } 188 if (paramName != null && !paramName.isEmpty()) { 189 paramName = RFC2231Utils.stripDelimiter(paramName); 190 if (this.lowerCaseNames) { 191 paramName = paramName.toLowerCase(Locale.ENGLISH); 192 } 193 params.put(paramName, paramValue); 194 } 195 } 196 return params; 197 } 198 199 /** 200 * Parses a map of name/value pairs from the given string. Names are expected to be unique. 201 * 202 * @param str the string that contains a sequence of name/value pairs 203 * @param separator the name/value pairs separator 204 * @return a map of name/value pairs 205 */ 206 public Map<String, String> parse(final String str, final char separator) { 207 if (str == null) { 208 return new HashMap<>(); 209 } 210 return parse(str.toCharArray(), separator); 211 } 212 213 /** 214 * Parses a map of name/value pairs from the given string. Names are expected to be unique. Multiple separators may be specified and the earliest found in 215 * the input string is used. 216 * 217 * @param str the string that contains a sequence of name/value pairs 218 * @param separators the name/value pairs separators 219 * @return a map of name/value pairs 220 */ 221 public Map<String, String> parse(final String str, final char[] separators) { 222 if (separators == null || separators.length == 0) { 223 return new HashMap<>(); 224 } 225 var separator = separators[0]; 226 if (str != null) { 227 var idx = str.length(); 228 for (final char separator2 : separators) { 229 final var tmp = str.indexOf(separator2); 230 if (tmp != -1 && tmp < idx) { 231 idx = tmp; 232 separator = separator2; 233 } 234 } 235 } 236 return parse(str, separator); 237 } 238 239 /** 240 * Parses out a token until any of the given terminators is encountered outside the quotation marks. 241 * 242 * @param terminators the array of terminating characters. Any of these characters when encountered outside the quotation marks signify the end of the token 243 * @return the token 244 */ 245 private String parseQuotedToken(final char[] terminators) { 246 char ch; 247 i1 = pos; 248 i2 = pos; 249 var quoted = false; 250 var charEscaped = false; 251 while (hasChar()) { 252 ch = chars[pos]; 253 if (!quoted && isOneOf(ch, terminators)) { 254 break; 255 } 256 if (!charEscaped && ch == '"') { 257 quoted = !quoted; 258 } 259 charEscaped = !charEscaped && ch == '\\'; 260 i2++; 261 pos++; 262 263 } 264 return getToken(true); 265 } 266 267 /** 268 * Parses out a token until any of the given terminators is encountered. 269 * 270 * @param terminators the array of terminating characters. Any of these characters when encountered signify the end of the token 271 * @return the token 272 */ 273 private String parseToken(final char[] terminators) { 274 char ch; 275 i1 = pos; 276 i2 = pos; 277 while (hasChar()) { 278 ch = chars[pos]; 279 if (isOneOf(ch, terminators)) { 280 break; 281 } 282 i2++; 283 pos++; 284 } 285 return getToken(false); 286 } 287 288 /** 289 * Sets the flag if parameter names are to be converted to lower case when name/value pairs are parsed. 290 * 291 * @param lowerCaseNames {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. {@code false} otherwise. 292 */ 293 public void setLowerCaseNames(final boolean lowerCaseNames) { 294 this.lowerCaseNames = lowerCaseNames; 295 } 296 297}