001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.fileupload2.core;
018
019import java.io.UnsupportedEncodingException;
020import java.util.HashMap;
021import java.util.Locale;
022import java.util.Map;
023
024/**
025 * A simple parser intended to parse sequences of name/value pairs.
026 * <p>
027 * Parameter values are expected to be enclosed in quotes if they contain unsafe characters, such as '=' characters or separators. Parameter values are optional
028 * and can be omitted.
029 * </p>
030 * <p>
031 * {@code param1 = value; param2 = "anything goes; really"; param3}
032 * </p>
033 */
034public class ParameterParser {
035
036    /**
037     * String to be parsed.
038     */
039    private char[] chars;
040
041    /**
042     * Current position in the string.
043     */
044    private int pos;
045
046    /**
047     * Maximum position in the string.
048     */
049    private int len;
050
051    /**
052     * Start of a token.
053     */
054    private int i1;
055
056    /**
057     * End of a token.
058     */
059    private int i2;
060
061    /**
062     * Whether names stored in the map should be converted to lower case.
063     */
064    private boolean lowerCaseNames;
065
066    /**
067     * Default ParameterParser constructor.
068     */
069    public ParameterParser() {
070    }
071
072    /**
073     * A helper method to process the parsed token. This method removes leading and trailing blanks as well as enclosing quotation marks, when necessary.
074     *
075     * @param quoted {@code true} if quotation marks are expected, {@code false} otherwise.
076     * @return the token
077     */
078    private String getToken(final boolean quoted) {
079        // Trim leading white spaces
080        while (i1 < i2 && Character.isWhitespace(chars[i1])) {
081            i1++;
082        }
083        // Trim trailing white spaces
084        while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) {
085            i2--;
086        }
087        // Strip away quotation marks if necessary
088        if (quoted && i2 - i1 >= 2 && chars[i1] == '"' && chars[i2 - 1] == '"') {
089            i1++;
090            i2--;
091        }
092        String result = null;
093        if (i2 > i1) {
094            result = new String(chars, i1, i2 - i1);
095        }
096        return result;
097    }
098
099    /**
100     * Tests if there any characters left to parse.
101     *
102     * @return {@code true} if there are unparsed characters, {@code false} otherwise.
103     */
104    private boolean hasChar() {
105        return this.pos < this.len;
106    }
107
108    /**
109     * Tests {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed.
110     *
111     * @return {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. Otherwise returns {@code false}
112     */
113    public boolean isLowerCaseNames() {
114        return this.lowerCaseNames;
115    }
116
117    /**
118     * Tests if the given character is present in the array of characters.
119     *
120     * @param ch      the character to test for presence in the array of characters
121     * @param charray the array of characters to test against
122     * @return {@code true} if the character is present in the array of characters, {@code false} otherwise.
123     */
124    private boolean isOneOf(final char ch, final char[] charray) {
125        var result = false;
126        for (final char element : charray) {
127            if (ch == element) {
128                result = true;
129                break;
130            }
131        }
132        return result;
133    }
134
135    /**
136     * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique.
137     *
138     * @param charArray the array of characters that contains a sequence of name/value pairs
139     * @param separator the name/value pairs separator
140     * @return a map of name/value pairs
141     */
142    public Map<String, String> parse(final char[] charArray, final char separator) {
143        if (charArray == null) {
144            return new HashMap<>();
145        }
146        return parse(charArray, 0, charArray.length, separator);
147    }
148
149    /**
150     * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique.
151     *
152     * @param charArray the array of characters that contains a sequence of name/value pairs
153     * @param offset    - the initial offset.
154     * @param length    - the length.
155     * @param separator the name/value pairs separator
156     * @return a map of name/value pairs
157     */
158    public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) {
159
160        if (charArray == null) {
161            return new HashMap<>();
162        }
163        final var params = new HashMap<String, String>();
164        this.chars = charArray.clone();
165        this.pos = offset;
166        this.len = length;
167
168        String paramName;
169        String paramValue;
170        while (hasChar()) {
171            paramName = parseToken(new char[] { '=', separator });
172            paramValue = null;
173            if (hasChar() && charArray[pos] == '=') {
174                pos++; // skip '='
175                paramValue = parseQuotedToken(new char[] { separator });
176
177                if (paramValue != null) {
178                    try {
179                        paramValue = RFC2231Utils.hasEncodedValue(paramName) ? RFC2231Utils.decodeText(paramValue) : MimeUtils.decodeText(paramValue);
180                    } catch (final UnsupportedEncodingException ignored) {
181                        // let's keep the original value in this case
182                    }
183                }
184            }
185            if (hasChar() && charArray[pos] == separator) {
186                pos++; // skip separator
187            }
188            if (paramName != null && !paramName.isEmpty()) {
189                paramName = RFC2231Utils.stripDelimiter(paramName);
190                if (this.lowerCaseNames) {
191                    paramName = paramName.toLowerCase(Locale.ENGLISH);
192                }
193                params.put(paramName, paramValue);
194            }
195        }
196        return params;
197    }
198
199    /**
200     * Parses a map of name/value pairs from the given string. Names are expected to be unique.
201     *
202     * @param str       the string that contains a sequence of name/value pairs
203     * @param separator the name/value pairs separator
204     * @return a map of name/value pairs
205     */
206    public Map<String, String> parse(final String str, final char separator) {
207        if (str == null) {
208            return new HashMap<>();
209        }
210        return parse(str.toCharArray(), separator);
211    }
212
213    /**
214     * Parses a map of name/value pairs from the given string. Names are expected to be unique. Multiple separators may be specified and the earliest found in
215     * the input string is used.
216     *
217     * @param str        the string that contains a sequence of name/value pairs
218     * @param separators the name/value pairs separators
219     * @return a map of name/value pairs
220     */
221    public Map<String, String> parse(final String str, final char[] separators) {
222        if (separators == null || separators.length == 0) {
223            return new HashMap<>();
224        }
225        var separator = separators[0];
226        if (str != null) {
227            var idx = str.length();
228            for (final char separator2 : separators) {
229                final var tmp = str.indexOf(separator2);
230                if (tmp != -1 && tmp < idx) {
231                    idx = tmp;
232                    separator = separator2;
233                }
234            }
235        }
236        return parse(str, separator);
237    }
238
239    /**
240     * Parses out a token until any of the given terminators is encountered outside the quotation marks.
241     *
242     * @param terminators the array of terminating characters. Any of these characters when encountered outside the quotation marks signify the end of the token
243     * @return the token
244     */
245    private String parseQuotedToken(final char[] terminators) {
246        char ch;
247        i1 = pos;
248        i2 = pos;
249        var quoted = false;
250        var charEscaped = false;
251        while (hasChar()) {
252            ch = chars[pos];
253            if (!quoted && isOneOf(ch, terminators)) {
254                break;
255            }
256            if (!charEscaped && ch == '"') {
257                quoted = !quoted;
258            }
259            charEscaped = !charEscaped && ch == '\\';
260            i2++;
261            pos++;
262
263        }
264        return getToken(true);
265    }
266
267    /**
268     * Parses out a token until any of the given terminators is encountered.
269     *
270     * @param terminators the array of terminating characters. Any of these characters when encountered signify the end of the token
271     * @return the token
272     */
273    private String parseToken(final char[] terminators) {
274        char ch;
275        i1 = pos;
276        i2 = pos;
277        while (hasChar()) {
278            ch = chars[pos];
279            if (isOneOf(ch, terminators)) {
280                break;
281            }
282            i2++;
283            pos++;
284        }
285        return getToken(false);
286    }
287
288    /**
289     * Sets the flag if parameter names are to be converted to lower case when name/value pairs are parsed.
290     *
291     * @param lowerCaseNames {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. {@code false} otherwise.
292     */
293    public void setLowerCaseNames(final boolean lowerCaseNames) {
294        this.lowerCaseNames = lowerCaseNames;
295    }
296
297}