001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.vfs2.provider;
018    
019    import org.apache.commons.vfs2.FileName;
020    import org.apache.commons.vfs2.FileSystemException;
021    import org.apache.commons.vfs2.FileType;
022    import org.apache.commons.vfs2.VFS;
023    import org.apache.commons.vfs2.util.Os;
024    
025    /**
026     * Utilities for dealing with URIs. See RFC 2396 for details.
027     *
028     * @author <a href="http://commons.apache.org/vfs/team-list.html">Commons VFS team</a>
029     *          2005) $
030     */
031    public final class UriParser
032    {
033        /**
034         * The set of valid separators. These are all converted to the normalised
035         * one. Does <i>not</i> contain the normalised separator
036         */
037        // public static final char[] separators = {'\\'};
038        public static final char TRANS_SEPARATOR = '\\';
039    
040        /**
041         * The normalised separator to use.
042         */
043        private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
044    
045        private static final int HEX_BASE = 16;
046    
047        private static final int BITS_IN_HALF_BYTE = 4;
048    
049        private static final char LOW_MASK = 0x0F;
050    
051        private UriParser()
052        {
053        }
054    
055        /**
056         * Extracts the first element of a path.
057         * @param name StringBuilder containing the path.
058         * @return The first element of the path.
059         */
060        public static String extractFirstElement(final StringBuilder name)
061        {
062            final int len = name.length();
063            if (len < 1)
064            {
065                return null;
066            }
067            int startPos = 0;
068            if (name.charAt(0) == SEPARATOR_CHAR)
069            {
070                startPos = 1;
071            }
072            for (int pos = startPos; pos < len; pos++)
073            {
074                if (name.charAt(pos) == SEPARATOR_CHAR)
075                {
076                    // Found a separator
077                    final String elem = name.substring(startPos, pos);
078                    name.delete(startPos, pos + 1);
079                    return elem;
080                }
081            }
082    
083            // No separator
084            final String elem = name.substring(startPos);
085            name.setLength(0);
086            return elem;
087        }
088    
089        /**
090         * Normalises a path. Does the following:
091         * <ul>
092         * <li>Removes empty path elements.
093         * <li>Handles '.' and '..' elements.
094         * <li>Removes trailing separator.
095         * </ul>
096         *
097         * Its assumed that the separators are already fixed.
098         *
099         * @param path The path to normalize.
100         * @return The FileType.
101         * @throws FileSystemException if an error occurs.
102         *
103         *  @see #fixSeparators
104         */
105        public static FileType normalisePath(final StringBuilder path)
106                throws FileSystemException
107        {
108            FileType fileType = FileType.FOLDER;
109            if (path.length() == 0)
110            {
111                return fileType;
112            }
113    
114            if (path.charAt(path.length() - 1) != '/')
115            {
116                fileType = FileType.FILE;
117            }
118    
119            // Adjust separators
120            // fixSeparators(path);
121    
122            // Determine the start of the first element
123            int startFirstElem = 0;
124            if (path.charAt(0) == SEPARATOR_CHAR)
125            {
126                if (path.length() == 1)
127                {
128                    return fileType;
129                }
130                startFirstElem = 1;
131            }
132    
133            // Iterate over each element
134            int startElem = startFirstElem;
135            int maxlen = path.length();
136            while (startElem < maxlen)
137            {
138                // Find the end of the element
139                int endElem = startElem;
140                for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++)
141                {
142                }
143    
144                final int elemLen = endElem - startElem;
145                if (elemLen == 0)
146                {
147                    // An empty element - axe it
148                    path.delete(endElem, endElem + 1);
149                    maxlen = path.length();
150                    continue;
151                }
152                if (elemLen == 1 && path.charAt(startElem) == '.')
153                {
154                    // A '.' element - axe it
155                    path.delete(startElem, endElem + 1);
156                    maxlen = path.length();
157                    continue;
158                }
159                if (elemLen == 2 && path.charAt(startElem) == '.'
160                        && path.charAt(startElem + 1) == '.')
161                {
162                    // A '..' element - remove the previous element
163                    if (startElem == startFirstElem)
164                    {
165                        // Previous element is missing
166                        throw new FileSystemException(
167                                "vfs.provider/invalid-relative-path.error");
168                    }
169    
170                    // Find start of previous element
171                    int pos = startElem - 2;
172                    for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--)
173                    {
174                    }
175                    startElem = pos + 1;
176    
177                    path.delete(startElem, endElem + 1);
178                    maxlen = path.length();
179                    continue;
180                }
181    
182                // A regular element
183                startElem = endElem + 1;
184            }
185    
186            // Remove trailing separator
187            if (!VFS.isUriStyle())
188            {
189                if (maxlen > 0 && path.charAt(maxlen - 1) == SEPARATOR_CHAR
190                        && maxlen > 1)
191                {
192                    path.delete(maxlen - 1, maxlen);
193                }
194            }
195    
196            return fileType;
197        }
198    
199        /**
200         * Normalises the separators in a name.
201         * @param name The StringBuilder containing the name
202         * @return true if the StringBuilder was modified.
203         */
204        public static boolean fixSeparators(final StringBuilder name)
205        {
206            boolean changed = false;
207            final int maxlen = name.length();
208            for (int i = 0; i < maxlen; i++)
209            {
210                final char ch = name.charAt(i);
211                if (ch == TRANS_SEPARATOR)
212                {
213                    name.setCharAt(i, SEPARATOR_CHAR);
214                    changed = true;
215                }
216            }
217            return changed;
218        }
219    
220        /**
221         * Extracts the scheme from a URI.
222         *
223         * @param uri The URI.
224         * @return The scheme name. Returns null if there is no scheme.
225         */
226        public static String extractScheme(final String uri)
227        {
228            return extractScheme(uri, null);
229        }
230    
231        /**
232         * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from
233         * the front of the URI.
234         *
235         * @param uri The URI.
236         * @param buffer Returns the remainder of the URI.
237         * @return The scheme name. Returns null if there is no scheme.
238         */
239        public static String extractScheme(final String uri, final StringBuilder buffer)
240        {
241            if (buffer != null)
242            {
243                buffer.setLength(0);
244                buffer.append(uri);
245            }
246    
247            final int maxPos = uri.length();
248            for (int pos = 0; pos < maxPos; pos++)
249            {
250                final char ch = uri.charAt(pos);
251    
252                if (ch == ':')
253                {
254                    // Found the end of the scheme
255                    final String scheme = uri.substring(0, pos);
256                    if (scheme.length() <= 1 && Os.isFamily(Os.OS_FAMILY_WINDOWS))
257                    {
258                        // This is not a scheme, but a Windows drive letter
259                        return null;
260                    }
261                    if (buffer != null)
262                    {
263                        buffer.delete(0, pos + 1);
264                    }
265                    return scheme.intern();
266                }
267    
268                if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
269                {
270                    // A scheme character
271                    continue;
272                }
273                if (pos > 0
274                        && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.'))
275                {
276                    // A scheme character (these are not allowed as the first
277                    // character of the scheme, but can be used as subsequent
278                    // characters.
279                    continue;
280                }
281    
282                // Not a scheme character
283                break;
284            }
285    
286            // No scheme in URI
287            return null;
288        }
289    
290        /**
291         * Removes %nn encodings from a string.
292         * @param encodedStr The encoded String.
293         * @return The decoded String.
294         * @throws FileSystemException if an error occurs.
295         */
296        public static String decode(final String encodedStr)
297                throws FileSystemException
298        {
299            if (encodedStr == null)
300            {
301                return null;
302            }
303            if (encodedStr.indexOf('%') < 0)
304            {
305                return encodedStr;
306            }
307            final StringBuilder buffer = new StringBuilder(encodedStr);
308            decode(buffer, 0, buffer.length());
309            return buffer.toString();
310        }
311    
312        /**
313         * Removes %nn encodings from a string.
314         * @param buffer StringBuilder containing the string to decode.
315         * @param offset The position in the string to start decoding.
316         * @param length The number of characters to decode.
317         * @throws FileSystemException if an error occurs.
318         */
319        public static void decode(final StringBuilder buffer, final int offset, final int length)
320                throws FileSystemException
321        {
322            int index = offset;
323            int count = length;
324            for (; count > 0; count--, index++)
325            {
326                final char ch = buffer.charAt(index);
327                if (ch != '%')
328                {
329                    continue;
330                }
331                if (count < 3)
332                {
333                    throw new FileSystemException(
334                            "vfs.provider/invalid-escape-sequence.error", buffer
335                                    .substring(index, index + count));
336                }
337    
338                // Decode
339                int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
340                int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
341                if (dig1 == -1 || dig2 == -1)
342                {
343                    throw new FileSystemException(
344                            "vfs.provider/invalid-escape-sequence.error", buffer
345                                    .substring(index, index + 3));
346                }
347                char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
348    
349                // Replace
350                buffer.setCharAt(index, value);
351                buffer.delete(index + 1, index + 3);
352                count -= 2;
353            }
354        }
355    
356        /**
357         * Encodes and appends a string to a StringBuilder.
358         * @param buffer The StringBuilder to append to.
359         * @param unencodedValue The String to encode and append.
360         * @param reserved characters to encode.
361         */
362        public static void appendEncoded(final StringBuilder buffer,
363                final String unencodedValue, final char[] reserved)
364        {
365            final int offset = buffer.length();
366            buffer.append(unencodedValue);
367            encode(buffer, offset, unencodedValue.length(), reserved);
368        }
369    
370        /**
371         * Encodes a set of reserved characters in a StringBuilder, using the URI %nn
372         * encoding. Always encodes % characters.
373         * @param buffer The StringBuilder to append to.
374         * @param offset The position in the buffer to start encoding at.
375         * @param length The number of characters to encode.
376         * @param reserved characters to encode.
377         */
378        public static void encode(final StringBuilder buffer, final int offset,
379                final int length, final char[] reserved)
380        {
381            int index = offset;
382            int count = length;
383            for (; count > 0; index++, count--)
384            {
385                final char ch = buffer.charAt(index);
386                boolean match = ch == '%';
387                if (reserved != null)
388                {
389                    for (int i = 0; !match && i < reserved.length; i++)
390                    {
391                        if (ch == reserved[i])
392                        {
393                            match = true;
394                        }
395                    }
396                }
397                if (match)
398                {
399                    // Encode
400                    char[] digits =
401                        {Character.forDigit(((ch >> BITS_IN_HALF_BYTE) & LOW_MASK), HEX_BASE),
402                         Character.forDigit((ch & LOW_MASK), HEX_BASE)};
403                    buffer.setCharAt(index, '%');
404                    buffer.insert(index + 1, digits);
405                    index += 2;
406                }
407            }
408        }
409    
410        /**
411         * Removes %nn encodings from a string.
412         * @param decodedStr The decoded String.
413         * @return The encoded String.
414         */
415        public static String encode(final String decodedStr)
416        {
417            return encode(decodedStr, null);
418        }
419    
420        /**
421         * Converts "special" characters to their %nn value.
422         * @param decodedStr The decoded String.
423         * @param reserved Characters to encode.
424         * @return The encoded String
425         */
426        public static String encode(final String decodedStr, final char[] reserved)
427        {
428            if (decodedStr == null)
429            {
430                return null;
431            }
432            final StringBuilder buffer = new StringBuilder(decodedStr);
433            encode(buffer, 0, buffer.length(), reserved);
434            return buffer.toString();
435        }
436    
437        /**
438         * Encode an array of Strings.
439         * @param strings The array of Strings to encode.
440         * @return An array of encoded Strings.
441         */
442        public static String[] encode(String[] strings)
443        {
444            if (strings == null)
445            {
446                return null;
447            }
448            for (int i = 0; i < strings.length; i++)
449            {
450                strings[i] = encode(strings[i]);
451            }
452            return strings;
453        }
454    
455        /**
456         * Decodes the String.
457         * @param uri The String to decode.
458         * @throws FileSystemException if an error occurs.
459         */
460        public static void checkUriEncoding(String uri) throws FileSystemException
461        {
462            decode(uri);
463        }
464    
465        public static void canonicalizePath(StringBuilder buffer, int offset,
466                int length, FileNameParser fileNameParser)
467                throws FileSystemException
468        {
469            int index = offset;
470            int count = length;
471            for (; count > 0; count--, index++)
472            {
473                final char ch = buffer.charAt(index);
474                if (ch == '%')
475                {
476                    if (count < 3)
477                    {
478                        throw new FileSystemException(
479                                "vfs.provider/invalid-escape-sequence.error",
480                                buffer.substring(index, index + count));
481                    }
482    
483                    // Decode
484                    int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
485                    int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
486                    if (dig1 == -1 || dig2 == -1)
487                    {
488                        throw new FileSystemException(
489                                "vfs.provider/invalid-escape-sequence.error",
490                                buffer.substring(index, index + 3));
491                    }
492                    char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
493    
494                    boolean match = value == '%'
495                            || (fileNameParser != null && fileNameParser.encodeCharacter(value));
496    
497                    if (match)
498                    {
499                        // this is a reserved character, not allowed to decode
500                        index += 2;
501                        count -= 2;
502                        continue;
503                    }
504    
505                    // Replace
506                    buffer.setCharAt(index, value);
507                    buffer.delete(index + 1, index + 3);
508                    count -= 2;
509                }
510                else if (fileNameParser.encodeCharacter(ch))
511                {
512                    // Encode
513                    char[] digits =
514                        {Character.forDigit(((ch >> BITS_IN_HALF_BYTE) & LOW_MASK), HEX_BASE),
515                         Character.forDigit((ch & LOW_MASK), HEX_BASE) };
516                    buffer.setCharAt(index, '%');
517                    buffer.insert(index + 1, digits);
518                    index += 2;
519                }
520            }
521        }
522    
523        /**
524         * Extract the query String from the URI.
525         * @param name StringBuilder containing the URI.
526         * @return The query string, if any. null otherwise.
527         */
528        public static String extractQueryString(StringBuilder name)
529        {
530            for (int pos = 0; pos < name.length(); pos++)
531            {
532                if (name.charAt(pos) == '?')
533                {
534                    String queryString = name.substring(pos + 1);
535                    name.delete(pos, name.length());
536                    return queryString;
537                }
538            }
539    
540            return null;
541        }
542    }