View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.vfs2.provider;
18  
19  import org.apache.commons.vfs2.FileName;
20  import org.apache.commons.vfs2.FileSystemException;
21  import org.apache.commons.vfs2.FileType;
22  import org.apache.commons.vfs2.VFS;
23  import org.apache.commons.vfs2.util.Os;
24  
25  /**
26   * Utilities for dealing with URIs. See RFC 2396 for details.
27   */
28  public final class UriParser {
29  
30      /**
31       * The set of valid separators. These are all converted to the normalized one. Does <i>not</i> contain the
32       * normalized separator
33       */
34      // public static final char[] separators = {'\\'};
35      public static final char TRANS_SEPARATOR = '\\';
36  
37      /**
38       * The normalised separator to use.
39       */
40      private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
41  
42      private static final int HEX_BASE = 16;
43  
44      private static final int BITS_IN_HALF_BYTE = 4;
45  
46      private static final char LOW_MASK = 0x0F;
47  
48      /**
49       * Encodes and appends a string to a StringBuilder.
50       *
51       * @param buffer The StringBuilder to append to.
52       * @param unencodedValue The String to encode and append.
53       * @param reserved characters to encode.
54       */
55      public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) {
56          final int offset = buffer.length();
57          buffer.append(unencodedValue);
58          encode(buffer, offset, unencodedValue.length(), reserved);
59      }
60  
61      public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length,
62              final FileNameParser fileNameParser) throws FileSystemException {
63          int index = offset;
64          int count = length;
65          for (; count > 0; count--, index++) {
66              final char ch = buffer.charAt(index);
67              if (ch == '%') {
68                  if (count < 3) {
69                      throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
70                              buffer.substring(index, index + count));
71                  }
72  
73                  // Decode
74                  final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
75                  final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
76                  if (dig1 == -1 || dig2 == -1) {
77                      throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
78                              buffer.substring(index, index + 3));
79                  }
80                  final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
81  
82                  final boolean match = value == '%' || fileNameParser.encodeCharacter(value);
83  
84                  if (match) {
85                      // this is a reserved character, not allowed to decode
86                      index += 2;
87                      count -= 2;
88                      continue;
89                  }
90  
91                  // Replace
92                  buffer.setCharAt(index, value);
93                  buffer.delete(index + 1, index + 3);
94                  count -= 2;
95              } else if (fileNameParser.encodeCharacter(ch)) {
96                  // Encode
97                  final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE),
98                          Character.forDigit(ch & LOW_MASK, HEX_BASE) };
99                  buffer.setCharAt(index, '%');
100                 buffer.insert(index + 1, digits);
101                 index += 2;
102             }
103         }
104     }
105 
106     /**
107      * Decodes the String.
108      *
109      * @param uri The String to decode.
110      * @throws FileSystemException if an error occurs.
111      */
112     public static void checkUriEncoding(final String uri) throws FileSystemException {
113         decode(uri);
114     }
115 
116     /**
117      * Removes %nn encodings from a string.
118      *
119      * @param encodedStr The encoded String.
120      * @return The decoded String.
121      * @throws FileSystemException if an error occurs.
122      */
123     public static String decode(final String encodedStr) throws FileSystemException {
124         if (encodedStr == null) {
125             return null;
126         }
127         if (encodedStr.indexOf('%') < 0) {
128             return encodedStr;
129         }
130         final StringBuilder buffer = new StringBuilder(encodedStr);
131         decode(buffer, 0, buffer.length());
132         return buffer.toString();
133     }
134 
135     /**
136      * Removes %nn encodings from a string.
137      *
138      * @param buffer StringBuilder containing the string to decode.
139      * @param offset The position in the string to start decoding.
140      * @param length The number of characters to decode.
141      * @throws FileSystemException if an error occurs.
142      */
143     public static void decode(final StringBuilder buffer, final int offset, final int length)
144             throws FileSystemException {
145         int index = offset;
146         int count = length;
147         for (; count > 0; count--, index++) {
148             final char ch = buffer.charAt(index);
149             if (ch != '%') {
150                 continue;
151             }
152             if (count < 3) {
153                 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
154                         buffer.substring(index, index + count));
155             }
156 
157             // Decode
158             final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
159             final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
160             if (dig1 == -1 || dig2 == -1) {
161                 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
162                         buffer.substring(index, index + 3));
163             }
164             final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
165 
166             // Replace
167             buffer.setCharAt(index, value);
168             buffer.delete(index + 1, index + 3);
169             count -= 2;
170         }
171     }
172 
173     /**
174      * Removes %nn encodings from a string.
175      *
176      * @param decodedStr The decoded String.
177      * @return The encoded String.
178      */
179     public static String encode(final String decodedStr) {
180         return encode(decodedStr, null);
181     }
182 
183     /**
184      * Converts "special" characters to their %nn value.
185      *
186      * @param decodedStr The decoded String.
187      * @param reserved Characters to encode.
188      * @return The encoded String
189      */
190     public static String encode(final String decodedStr, final char[] reserved) {
191         if (decodedStr == null) {
192             return null;
193         }
194         final StringBuilder buffer = new StringBuilder(decodedStr);
195         encode(buffer, 0, buffer.length(), reserved);
196         return buffer.toString();
197     }
198 
199     /**
200      * Encode an array of Strings.
201      *
202      * @param strings The array of Strings to encode.
203      * @return An array of encoded Strings.
204      */
205     public static String[] encode(final String[] strings) {
206         if (strings == null) {
207             return null;
208         }
209         for (int i = 0; i < strings.length; i++) {
210             strings[i] = encode(strings[i]);
211         }
212         return strings;
213     }
214 
215     /**
216      * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters.
217      *
218      * @param buffer The StringBuilder to append to.
219      * @param offset The position in the buffer to start encoding at.
220      * @param length The number of characters to encode.
221      * @param reserved characters to encode.
222      */
223     public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) {
224         int index = offset;
225         int count = length;
226         for (; count > 0; index++, count--) {
227             final char ch = buffer.charAt(index);
228             boolean match = ch == '%';
229             if (reserved != null) {
230                 for (int i = 0; !match && i < reserved.length; i++) {
231                     if (ch == reserved[i]) {
232                         match = true;
233                     }
234                 }
235             }
236             if (match) {
237                 // Encode
238                 final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE),
239                         Character.forDigit(ch & LOW_MASK, HEX_BASE) };
240                 buffer.setCharAt(index, '%');
241                 buffer.insert(index + 1, digits);
242                 index += 2;
243             }
244         }
245     }
246 
247     /**
248      * Extracts the first element of a path.
249      *
250      * @param name StringBuilder containing the path.
251      * @return The first element of the path.
252      */
253     public static String extractFirstElement(final StringBuilder name) {
254         final int len = name.length();
255         if (len < 1) {
256             return null;
257         }
258         int startPos = 0;
259         if (name.charAt(0) == SEPARATOR_CHAR) {
260             startPos = 1;
261         }
262         for (int pos = startPos; pos < len; pos++) {
263             if (name.charAt(pos) == SEPARATOR_CHAR) {
264                 // Found a separator
265                 final String elem = name.substring(startPos, pos);
266                 name.delete(startPos, pos + 1);
267                 return elem;
268             }
269         }
270 
271         // No separator
272         final String elem = name.substring(startPos);
273         name.setLength(0);
274         return elem;
275     }
276 
277     /**
278      * Extract the query String from the URI.
279      *
280      * @param name StringBuilder containing the URI.
281      * @return The query string, if any. null otherwise.
282      */
283     public static String extractQueryString(final StringBuilder name) {
284         for (int pos = 0; pos < name.length(); pos++) {
285             if (name.charAt(pos) == '?') {
286                 final String queryString = name.substring(pos + 1);
287                 name.delete(pos, name.length());
288                 return queryString;
289             }
290         }
291 
292         return null;
293     }
294 
295     /**
296      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
297      * <p>
298      * The scheme is extracted based on the currently supported schemes in the system.  That is to say the schemes
299      * supported by the registered providers.
300      * </p>
301      * <p>
302      * This allows us to handle varying scheme's without making assumptions based on the ':' character.  Specifically
303      * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them.
304      * </p>
305      * @param schemes The schemes to check.
306      * @param uri The potential URI. May also be a name.
307      * @return The scheme name. Returns null if there is no scheme.
308      * @since 2.3
309      */
310     public static String extractScheme(final String[] schemes, final String uri) {
311         return extractScheme(schemes, uri, null);
312     }
313 
314     /**
315      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
316      * <p>
317      * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes
318      * supported by the registered providers.
319      * </p>
320      * <p>
321      * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically
322      * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them.
323      * </p>
324      * @param schemes The schemes to check.
325      * @param uri The potential URI. May also just be a name.
326      * @param buffer Returns the remainder of the URI.
327      * @return The scheme name. Returns null if there is no scheme.
328      * @since 2.3
329      */
330     public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) {
331         if (buffer != null) {
332             buffer.setLength(0);
333             buffer.append(uri);
334         }
335         for(final String scheme : schemes) {
336             if(uri.startsWith(scheme + ":")) {
337                 if (buffer != null) {
338                     buffer.delete(0, uri.indexOf(':') + 1);
339                 }
340                 return scheme;
341             }
342         }
343         return null;
344     }
345 
346     /**
347      * Extracts the scheme from a URI.
348      *
349      * @param uri The URI.
350      * @return The scheme name. Returns null if there is no scheme.
351      * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
352      */
353     @Deprecated
354     public static String extractScheme(final String uri) {
355         return extractScheme(uri, null);
356     }
357 
358     /**
359      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
360      *
361      * @param uri The URI.
362      * @param buffer Returns the remainder of the URI.
363      * @return The scheme name. Returns null if there is no scheme.
364      * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
365      */
366     @Deprecated
367     public static String extractScheme(final String uri, final StringBuilder buffer) {
368         if (buffer != null) {
369             buffer.setLength(0);
370             buffer.append(uri);
371         }
372 
373         final int maxPos = uri.length();
374         for (int pos = 0; pos < maxPos; pos++) {
375             final char ch = uri.charAt(pos);
376 
377             if (ch == ':') {
378                 // Found the end of the scheme
379                 final String scheme = uri.substring(0, pos);
380                 if (scheme.length() <= 1 && Os.isFamily(Os.OS_FAMILY_WINDOWS)) {
381                     // This is not a scheme, but a Windows drive letter
382                     return null;
383                 }
384                 if (buffer != null) {
385                     buffer.delete(0, pos + 1);
386                 }
387                 return scheme.intern();
388             }
389 
390             if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
391                 // A scheme character
392                 continue;
393             }
394             if (pos > 0 && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.')) {
395                 // A scheme character (these are not allowed as the first
396                 // character of the scheme, but can be used as subsequent
397                 // characters.
398                 continue;
399             }
400 
401             // Not a scheme character
402             break;
403         }
404 
405         // No scheme in URI
406         return null;
407     }
408 
409     /**
410      * Normalises the separators in a name.
411      *
412      * @param name The StringBuilder containing the name
413      * @return true if the StringBuilder was modified.
414      */
415     public static boolean fixSeparators(final StringBuilder name) {
416         boolean changed = false;
417         final int maxlen = name.length();
418         for (int i = 0; i < maxlen; i++) {
419             final char ch = name.charAt(i);
420             if (ch == TRANS_SEPARATOR) {
421                 name.setCharAt(i, SEPARATOR_CHAR);
422                 changed = true;
423             }
424         }
425         return changed;
426     }
427 
428     /**
429      * Normalises a path. Does the following:
430      * <ul>
431      * <li>Removes empty path elements.
432      * <li>Handles '.' and '..' elements.
433      * <li>Removes trailing separator.
434      * </ul>
435      *
436      * Its assumed that the separators are already fixed.
437      *
438      * @param path The path to normalize.
439      * @return The FileType.
440      * @throws FileSystemException if an error occurs.
441      *
442      * @see #fixSeparators
443      */
444     public static FileType normalisePath(final StringBuilder path) throws FileSystemException {
445         FileType fileType = FileType.FOLDER;
446         if (path.length() == 0) {
447             return fileType;
448         }
449 
450         if (path.charAt(path.length() - 1) != '/') {
451             fileType = FileType.FILE;
452         }
453 
454         // Adjust separators
455         // fixSeparators(path);
456 
457         // Determine the start of the first element
458         int startFirstElem = 0;
459         if (path.charAt(0) == SEPARATOR_CHAR) {
460             if (path.length() == 1) {
461                 return fileType;
462             }
463             startFirstElem = 1;
464         }
465 
466         // Iterate over each element
467         int startElem = startFirstElem;
468         int maxlen = path.length();
469         while (startElem < maxlen) {
470             // Find the end of the element
471             int endElem = startElem;
472             for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++) {
473             }
474 
475             final int elemLen = endElem - startElem;
476             if (elemLen == 0) {
477                 // An empty element - axe it
478                 path.delete(endElem, endElem + 1);
479                 maxlen = path.length();
480                 continue;
481             }
482             if (elemLen == 1 && path.charAt(startElem) == '.') {
483                 // A '.' element - axe it
484                 path.delete(startElem, endElem + 1);
485                 maxlen = path.length();
486                 continue;
487             }
488             if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') {
489                 // A '..' element - remove the previous element
490                 if (startElem == startFirstElem) {
491                     // Previous element is missing
492                     throw new FileSystemException("vfs.provider/invalid-relative-path.error");
493                 }
494 
495                 // Find start of previous element
496                 int pos = startElem - 2;
497                 for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--) {
498                 }
499                 startElem = pos + 1;
500 
501                 path.delete(startElem, endElem + 1);
502                 maxlen = path.length();
503                 continue;
504             }
505 
506             // A regular element
507             startElem = endElem + 1;
508         }
509 
510         // Remove trailing separator
511         if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) {
512             path.delete(maxlen - 1, maxlen);
513         }
514 
515         return fileType;
516     }
517 
518     private UriParser() {
519     }
520 }