View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.fileupload2.core;
18  
19  import java.io.UnsupportedEncodingException;
20  import java.util.HashMap;
21  import java.util.Locale;
22  import java.util.Map;
23  
24  /**
25   * A simple parser intended to parse sequences of name/value pairs.
26   * <p>
27   * Parameter values are expected to be enclosed in quotes if they contain unsafe characters, such as '=' characters or separators. Parameter values are optional
28   * and can be omitted.
29   * </p>
30   * <p>
31   * {@code param1 = value; param2 = "anything goes; really"; param3}
32   * </p>
33   */
34  public class ParameterParser {
35  
36      /**
37       * String to be parsed.
38       */
39      private char[] chars;
40  
41      /**
42       * Current position in the string.
43       */
44      private int pos;
45  
46      /**
47       * Maximum position in the string.
48       */
49      private int len;
50  
51      /**
52       * Start of a token.
53       */
54      private int i1;
55  
56      /**
57       * End of a token.
58       */
59      private int i2;
60  
61      /**
62       * Whether names stored in the map should be converted to lower case.
63       */
64      private boolean lowerCaseNames;
65  
66      /**
67       * Default ParameterParser constructor.
68       */
69      public ParameterParser() {
70      }
71  
72      /**
73       * A helper method to process the parsed token. This method removes leading and trailing blanks as well as enclosing quotation marks, when necessary.
74       *
75       * @param quoted {@code true} if quotation marks are expected, {@code false} otherwise.
76       * @return the token
77       */
78      private String getToken(final boolean quoted) {
79          // Trim leading white spaces
80          while (i1 < i2 && Character.isWhitespace(chars[i1])) {
81              i1++;
82          }
83          // Trim trailing white spaces
84          while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) {
85              i2--;
86          }
87          // Strip away quotation marks if necessary
88          if (quoted && i2 - i1 >= 2 && chars[i1] == '"' && chars[i2 - 1] == '"') {
89              i1++;
90              i2--;
91          }
92          String result = null;
93          if (i2 > i1) {
94              result = new String(chars, i1, i2 - i1);
95          }
96          return result;
97      }
98  
99      /**
100      * Tests if there any characters left to parse.
101      *
102      * @return {@code true} if there are unparsed characters, {@code false} otherwise.
103      */
104     private boolean hasChar() {
105         return this.pos < this.len;
106     }
107 
108     /**
109      * Tests {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed.
110      *
111      * @return {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. Otherwise returns {@code false}
112      */
113     public boolean isLowerCaseNames() {
114         return this.lowerCaseNames;
115     }
116 
117     /**
118      * Tests if the given character is present in the array of characters.
119      *
120      * @param ch      the character to test for presence in the array of characters
121      * @param charray the array of characters to test against
122      * @return {@code true} if the character is present in the array of characters, {@code false} otherwise.
123      */
124     private boolean isOneOf(final char ch, final char[] charray) {
125         var result = false;
126         for (final char element : charray) {
127             if (ch == element) {
128                 result = true;
129                 break;
130             }
131         }
132         return result;
133     }
134 
135     /**
136      * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique.
137      *
138      * @param charArray the array of characters that contains a sequence of name/value pairs
139      * @param separator the name/value pairs separator
140      * @return a map of name/value pairs
141      */
142     public Map<String, String> parse(final char[] charArray, final char separator) {
143         if (charArray == null) {
144             return new HashMap<>();
145         }
146         return parse(charArray, 0, charArray.length, separator);
147     }
148 
149     /**
150      * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique.
151      *
152      * @param charArray the array of characters that contains a sequence of name/value pairs
153      * @param offset    - the initial offset.
154      * @param length    - the length.
155      * @param separator the name/value pairs separator
156      * @return a map of name/value pairs
157      */
158     public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) {
159 
160         if (charArray == null) {
161             return new HashMap<>();
162         }
163         final var params = new HashMap<String, String>();
164         this.chars = charArray.clone();
165         this.pos = offset;
166         this.len = length;
167 
168         String paramName;
169         String paramValue;
170         while (hasChar()) {
171             paramName = parseToken(new char[] { '=', separator });
172             paramValue = null;
173             if (hasChar() && charArray[pos] == '=') {
174                 pos++; // skip '='
175                 paramValue = parseQuotedToken(new char[] { separator });
176 
177                 if (paramValue != null) {
178                     try {
179                         paramValue = RFC2231Utils.hasEncodedValue(paramName) ? RFC2231Utils.decodeText(paramValue) : MimeUtils.decodeText(paramValue);
180                     } catch (final UnsupportedEncodingException ignored) {
181                         // let's keep the original value in this case
182                     }
183                 }
184             }
185             if (hasChar() && charArray[pos] == separator) {
186                 pos++; // skip separator
187             }
188             if (paramName != null && !paramName.isEmpty()) {
189                 paramName = RFC2231Utils.stripDelimiter(paramName);
190                 if (this.lowerCaseNames) {
191                     paramName = paramName.toLowerCase(Locale.ENGLISH);
192                 }
193                 params.put(paramName, paramValue);
194             }
195         }
196         return params;
197     }
198 
199     /**
200      * Parses a map of name/value pairs from the given string. Names are expected to be unique.
201      *
202      * @param str       the string that contains a sequence of name/value pairs
203      * @param separator the name/value pairs separator
204      * @return a map of name/value pairs
205      */
206     public Map<String, String> parse(final String str, final char separator) {
207         if (str == null) {
208             return new HashMap<>();
209         }
210         return parse(str.toCharArray(), separator);
211     }
212 
213     /**
214      * Parses a map of name/value pairs from the given string. Names are expected to be unique. Multiple separators may be specified and the earliest found in
215      * the input string is used.
216      *
217      * @param str        the string that contains a sequence of name/value pairs
218      * @param separators the name/value pairs separators
219      * @return a map of name/value pairs
220      */
221     public Map<String, String> parse(final String str, final char[] separators) {
222         if (separators == null || separators.length == 0) {
223             return new HashMap<>();
224         }
225         var separator = separators[0];
226         if (str != null) {
227             var idx = str.length();
228             for (final char separator2 : separators) {
229                 final var tmp = str.indexOf(separator2);
230                 if (tmp != -1 && tmp < idx) {
231                     idx = tmp;
232                     separator = separator2;
233                 }
234             }
235         }
236         return parse(str, separator);
237     }
238 
239     /**
240      * Parses out a token until any of the given terminators is encountered outside the quotation marks.
241      *
242      * @param terminators the array of terminating characters. Any of these characters when encountered outside the quotation marks signify the end of the token
243      * @return the token
244      */
245     private String parseQuotedToken(final char[] terminators) {
246         char ch;
247         i1 = pos;
248         i2 = pos;
249         var quoted = false;
250         var charEscaped = false;
251         while (hasChar()) {
252             ch = chars[pos];
253             if (!quoted && isOneOf(ch, terminators)) {
254                 break;
255             }
256             if (!charEscaped && ch == '"') {
257                 quoted = !quoted;
258             }
259             charEscaped = !charEscaped && ch == '\\';
260             i2++;
261             pos++;
262 
263         }
264         return getToken(true);
265     }
266 
267     /**
268      * Parses out a token until any of the given terminators is encountered.
269      *
270      * @param terminators the array of terminating characters. Any of these characters when encountered signify the end of the token
271      * @return the token
272      */
273     private String parseToken(final char[] terminators) {
274         char ch;
275         i1 = pos;
276         i2 = pos;
277         while (hasChar()) {
278             ch = chars[pos];
279             if (isOneOf(ch, terminators)) {
280                 break;
281             }
282             i2++;
283             pos++;
284         }
285         return getToken(false);
286     }
287 
288     /**
289      * Sets the flag if parameter names are to be converted to lower case when name/value pairs are parsed.
290      *
291      * @param lowerCaseNames {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. {@code false} otherwise.
292      */
293     public void setLowerCaseNames(final boolean lowerCaseNames) {
294         this.lowerCaseNames = lowerCaseNames;
295     }
296 
297 }