View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.vfs2.provider;
18  
19  import java.util.Set;
20  import org.apache.commons.vfs2.FileName;
21  import org.apache.commons.vfs2.FileSystemException;
22  import org.apache.commons.vfs2.FileType;
23  import org.apache.commons.vfs2.VFS;
24  import org.apache.commons.vfs2.util.Os;
25  
26  /**
27   * Utilities for dealing with URIs. See RFC 2396 for details.
28   */
29  public final class UriParser {
30  
31      /**
32       * The set of valid separators. These are all converted to the normalized one. Does <i>not</i> contain the
33       * normalized separator
34       */
35      // public static final char[] separators = {'\\'};
36      public static final char TRANS_SEPARATOR = '\\';
37  
38      /**
39       * The normalised separator to use.
40       */
41      private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
42  
43      private static final int HEX_BASE = 16;
44  
45      private static final int BITS_IN_HALF_BYTE = 4;
46  
47      private static final char LOW_MASK = 0x0F;
48  
49      /**
50       * Encodes and appends a string to a StringBuilder.
51       *
52       * @param buffer The StringBuilder to append to.
53       * @param unencodedValue The String to encode and append.
54       * @param reserved characters to encode.
55       */
56      public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) {
57          final int offset = buffer.length();
58          buffer.append(unencodedValue);
59          encode(buffer, offset, unencodedValue.length(), reserved);
60      }
61  
62      public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length,
63              final FileNameParser fileNameParser) throws FileSystemException {
64          int index = offset;
65          int count = length;
66          for (; count > 0; count--, index++) {
67              final char ch = buffer.charAt(index);
68              if (ch == '%') {
69                  if (count < 3) {
70                      throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
71                              buffer.substring(index, index + count));
72                  }
73  
74                  // Decode
75                  final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
76                  final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
77                  if (dig1 == -1 || dig2 == -1) {
78                      throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
79                              buffer.substring(index, index + 3));
80                  }
81                  final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
82  
83                  final boolean match = value == '%' || fileNameParser.encodeCharacter(value);
84  
85                  if (match) {
86                      // this is a reserved character, not allowed to decode
87                      index += 2;
88                      count -= 2;
89                      continue;
90                  }
91  
92                  // Replace
93                  buffer.setCharAt(index, value);
94                  buffer.delete(index + 1, index + 3);
95                  count -= 2;
96              } else if (fileNameParser.encodeCharacter(ch)) {
97                  // Encode
98                  final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE),
99                          Character.forDigit(ch & LOW_MASK, HEX_BASE) };
100                 buffer.setCharAt(index, '%');
101                 buffer.insert(index + 1, digits);
102                 index += 2;
103             }
104         }
105     }
106 
107     /**
108      * Decodes the String.
109      *
110      * @param uri The String to decode.
111      * @throws FileSystemException if an error occurs.
112      */
113     public static void checkUriEncoding(final String uri) throws FileSystemException {
114         decode(uri);
115     }
116 
117     /**
118      * Removes %nn encodings from a string.
119      *
120      * @param encodedStr The encoded String.
121      * @return The decoded String.
122      * @throws FileSystemException if an error occurs.
123      */
124     public static String decode(final String encodedStr) throws FileSystemException {
125         if (encodedStr == null) {
126             return null;
127         }
128         if (encodedStr.indexOf('%') < 0) {
129             return encodedStr;
130         }
131         final StringBuilder buffer = new StringBuilder(encodedStr);
132         decode(buffer, 0, buffer.length());
133         return buffer.toString();
134     }
135 
136     /**
137      * Removes %nn encodings from a string.
138      *
139      * @param buffer StringBuilder containing the string to decode.
140      * @param offset The position in the string to start decoding.
141      * @param length The number of characters to decode.
142      * @throws FileSystemException if an error occurs.
143      */
144     public static void decode(final StringBuilder buffer, final int offset, final int length)
145             throws FileSystemException {
146         int index = offset;
147         int count = length;
148         for (; count > 0; count--, index++) {
149             final char ch = buffer.charAt(index);
150             if (ch != '%') {
151                 continue;
152             }
153             if (count < 3) {
154                 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
155                         buffer.substring(index, index + count));
156             }
157 
158             // Decode
159             final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
160             final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
161             if (dig1 == -1 || dig2 == -1) {
162                 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
163                         buffer.substring(index, index + 3));
164             }
165             final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
166 
167             // Replace
168             buffer.setCharAt(index, value);
169             buffer.delete(index + 1, index + 3);
170             count -= 2;
171         }
172     }
173 
174     /**
175      * Removes %nn encodings from a string.
176      *
177      * @param decodedStr The decoded String.
178      * @return The encoded String.
179      */
180     public static String encode(final String decodedStr) {
181         return encode(decodedStr, null);
182     }
183 
184     /**
185      * Converts "special" characters to their %nn value.
186      *
187      * @param decodedStr The decoded String.
188      * @param reserved Characters to encode.
189      * @return The encoded String
190      */
191     public static String encode(final String decodedStr, final char[] reserved) {
192         if (decodedStr == null) {
193             return null;
194         }
195         final StringBuilder buffer = new StringBuilder(decodedStr);
196         encode(buffer, 0, buffer.length(), reserved);
197         return buffer.toString();
198     }
199 
200     /**
201      * Encode an array of Strings.
202      *
203      * @param strings The array of Strings to encode.
204      * @return An array of encoded Strings.
205      */
206     public static String[] encode(final String[] strings) {
207         if (strings == null) {
208             return null;
209         }
210         for (int i = 0; i < strings.length; i++) {
211             strings[i] = encode(strings[i]);
212         }
213         return strings;
214     }
215 
216     /**
217      * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters.
218      *
219      * @param buffer The StringBuilder to append to.
220      * @param offset The position in the buffer to start encoding at.
221      * @param length The number of characters to encode.
222      * @param reserved characters to encode.
223      */
224     public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) {
225         int index = offset;
226         int count = length;
227         for (; count > 0; index++, count--) {
228             final char ch = buffer.charAt(index);
229             boolean match = ch == '%';
230             if (reserved != null) {
231                 for (int i = 0; !match && i < reserved.length; i++) {
232                     if (ch == reserved[i]) {
233                         match = true;
234                     }
235                 }
236             }
237             if (match) {
238                 // Encode
239                 final char[] digits = { Character.forDigit((ch >> BITS_IN_HALF_BYTE) & LOW_MASK, HEX_BASE),
240                         Character.forDigit(ch & LOW_MASK, HEX_BASE) };
241                 buffer.setCharAt(index, '%');
242                 buffer.insert(index + 1, digits);
243                 index += 2;
244             }
245         }
246     }
247 
248     /**
249      * Extracts the first element of a path.
250      *
251      * @param name StringBuilder containing the path.
252      * @return The first element of the path.
253      */
254     public static String extractFirstElement(final StringBuilder name) {
255         final int len = name.length();
256         if (len < 1) {
257             return null;
258         }
259         int startPos = 0;
260         if (name.charAt(0) == SEPARATOR_CHAR) {
261             startPos = 1;
262         }
263         for (int pos = startPos; pos < len; pos++) {
264             if (name.charAt(pos) == SEPARATOR_CHAR) {
265                 // Found a separator
266                 final String elem = name.substring(startPos, pos);
267                 name.delete(startPos, pos + 1);
268                 return elem;
269             }
270         }
271 
272         // No separator
273         final String elem = name.substring(startPos);
274         name.setLength(0);
275         return elem;
276     }
277 
278     /**
279      * Extract the query String from the URI.
280      *
281      * @param name StringBuilder containing the URI.
282      * @return The query string, if any. null otherwise.
283      */
284     public static String extractQueryString(final StringBuilder name) {
285         for (int pos = 0; pos < name.length(); pos++) {
286             if (name.charAt(pos) == '?') {
287                 final String queryString = name.substring(pos + 1);
288                 name.delete(pos, name.length());
289                 return queryString;
290             }
291         }
292 
293         return null;
294     }
295 
296     /**
297      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
298      * <p>
299      * The scheme is extracted based on the currently supported schemes in the system.  That is to say the schemes
300      * supported by the registered providers.
301      * </p>
302      * <p>
303      * This allows us to handle varying scheme's without making assumptions based on the ':' character.  Specifically
304      * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them.
305      * </p>
306      * @param schemes The schemes to check.
307      * @param uri The potential URI. May also be a name.
308      * @return The scheme name. Returns null if there is no scheme.
309      * @since 2.3
310      */
311     public static String extractScheme(final String[] schemes, final String uri) {
312         return extractScheme(schemes, uri, null);
313     }
314 
315     /**
316      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
317      * <p>
318      * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes
319      * supported by the registered providers.
320      * </p>
321      * <p>
322      * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically
323      * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them.
324      * </p>
325      * @param schemes The schemes to check.
326      * @param uri The potential URI. May also just be a name.
327      * @param buffer Returns the remainder of the URI.
328      * @return The scheme name. Returns null if there is no scheme.
329      * @since 2.3
330      */
331     public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) {
332         if (buffer != null) {
333             buffer.setLength(0);
334             buffer.append(uri);
335         }
336         for(final String scheme : schemes) {
337             if(uri.startsWith(scheme + ":")) {
338                 if (buffer != null) {
339                     buffer.delete(0, uri.indexOf(':') + 1);
340                 }
341                 return scheme;
342             }
343         }
344         return null;
345     }
346 
347     /**
348      * Extracts the scheme from a URI.
349      *
350      * @param uri The URI.
351      * @return The scheme name. Returns null if there is no scheme.
352      * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
353      */
354     @Deprecated
355     public static String extractScheme(final String uri) {
356         return extractScheme(uri, null);
357     }
358 
359     /**
360      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
361      *
362      * @param uri The URI.
363      * @param buffer Returns the remainder of the URI.
364      * @return The scheme name. Returns null if there is no scheme.
365      * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
366      */
367     @Deprecated
368     public static String extractScheme(final String uri, final StringBuilder buffer) {
369         if (buffer != null) {
370             buffer.setLength(0);
371             buffer.append(uri);
372         }
373 
374         final int maxPos = uri.length();
375         for (int pos = 0; pos < maxPos; pos++) {
376             final char ch = uri.charAt(pos);
377 
378             if (ch == ':') {
379                 // Found the end of the scheme
380                 final String scheme = uri.substring(0, pos);
381                 if (scheme.length() <= 1 && Os.isFamily(Os.OS_FAMILY_WINDOWS)) {
382                     // This is not a scheme, but a Windows drive letter
383                     return null;
384                 }
385                 if (buffer != null) {
386                     buffer.delete(0, pos + 1);
387                 }
388                 return scheme.intern();
389             }
390 
391             if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
392                 // A scheme character
393                 continue;
394             }
395             if (pos > 0 && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.')) {
396                 // A scheme character (these are not allowed as the first
397                 // character of the scheme, but can be used as subsequent
398                 // characters.
399                 continue;
400             }
401 
402             // Not a scheme character
403             break;
404         }
405 
406         // No scheme in URI
407         return null;
408     }
409 
410     /**
411      * Normalises the separators in a name.
412      *
413      * @param name The StringBuilder containing the name
414      * @return true if the StringBuilder was modified.
415      */
416     public static boolean fixSeparators(final StringBuilder name) {
417         boolean changed = false;
418         final int maxlen = name.length();
419         for (int i = 0; i < maxlen; i++) {
420             final char ch = name.charAt(i);
421             if (ch == TRANS_SEPARATOR) {
422                 name.setCharAt(i, SEPARATOR_CHAR);
423                 changed = true;
424             }
425         }
426         return changed;
427     }
428 
429     /**
430      * Normalises a path. Does the following:
431      * <ul>
432      * <li>Removes empty path elements.
433      * <li>Handles '.' and '..' elements.
434      * <li>Removes trailing separator.
435      * </ul>
436      *
437      * Its assumed that the separators are already fixed.
438      *
439      * @param path The path to normalize.
440      * @return The FileType.
441      * @throws FileSystemException if an error occurs.
442      *
443      * @see #fixSeparators
444      */
445     public static FileType normalisePath(final StringBuilder path) throws FileSystemException {
446         FileType fileType = FileType.FOLDER;
447         if (path.length() == 0) {
448             return fileType;
449         }
450 
451         if (path.charAt(path.length() - 1) != '/') {
452             fileType = FileType.FILE;
453         }
454 
455         // Adjust separators
456         // fixSeparators(path);
457 
458         // Determine the start of the first element
459         int startFirstElem = 0;
460         if (path.charAt(0) == SEPARATOR_CHAR) {
461             if (path.length() == 1) {
462                 return fileType;
463             }
464             startFirstElem = 1;
465         }
466 
467         // Iterate over each element
468         int startElem = startFirstElem;
469         int maxlen = path.length();
470         while (startElem < maxlen) {
471             // Find the end of the element
472             int endElem = startElem;
473             for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++) {
474             }
475 
476             final int elemLen = endElem - startElem;
477             if (elemLen == 0) {
478                 // An empty element - axe it
479                 path.delete(endElem, endElem + 1);
480                 maxlen = path.length();
481                 continue;
482             }
483             if (elemLen == 1 && path.charAt(startElem) == '.') {
484                 // A '.' element - axe it
485                 path.delete(startElem, endElem + 1);
486                 maxlen = path.length();
487                 continue;
488             }
489             if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') {
490                 // A '..' element - remove the previous element
491                 if (startElem == startFirstElem) {
492                     // Previous element is missing
493                     throw new FileSystemException("vfs.provider/invalid-relative-path.error");
494                 }
495 
496                 // Find start of previous element
497                 int pos = startElem - 2;
498                 for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--) {
499                 }
500                 startElem = pos + 1;
501 
502                 path.delete(startElem, endElem + 1);
503                 maxlen = path.length();
504                 continue;
505             }
506 
507             // A regular element
508             startElem = endElem + 1;
509         }
510 
511         // Remove trailing separator
512         if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) {
513             path.delete(maxlen - 1, maxlen);
514         }
515 
516         return fileType;
517     }
518 
519     private UriParser() {
520     }
521 }