View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.vfs2.provider;
18  
19  import java.util.Arrays;
20  
21  import org.apache.commons.lang3.SystemUtils;
22  import org.apache.commons.vfs2.FileName;
23  import org.apache.commons.vfs2.FileSystemException;
24  import org.apache.commons.vfs2.FileType;
25  import org.apache.commons.vfs2.VFS;
26  
27  /**
28   * Utilities for dealing with URIs. See RFC 2396 for details.
29   */
30  public final class UriParser {
31  
32      /**
33       * The set of valid separators. These are all converted to the normalized one. Does <em>not</em> contain the
34       * normalized separator
35       */
36      // public static final char[] separators = {'\\'};
37      public static final char TRANS_SEPARATOR = '\\';
38  
39      /**
40       * The normalized separator to use.
41       */
42      private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
43  
44      private static final int HEX_BASE = 16;
45  
46      private static final int BITS_IN_HALF_BYTE = 4;
47  
48      private static final char LOW_MASK = 0x0F;
49  
50      /**
51       * Encodes and appends a string to a StringBuilder.
52       *
53       * @param buffer The StringBuilder to append to.
54       * @param unencodedValue The String to encode and append.
55       * @param reserved characters to encode.
56       */
57      public static void appendEncoded(final StringBuilder buffer, final String unencodedValue, final char[] reserved) {
58          final int offset = buffer.length();
59          buffer.append(unencodedValue);
60          encode(buffer, offset, unencodedValue.length(), reserved);
61      }
62  
63      static void appendEncodedRfc2396(final StringBuilder buffer, final String unencodedValue, final char[] allowed) {
64          final int offset = buffer.length();
65          buffer.append(unencodedValue);
66          encodeRfc2396(buffer, offset, unencodedValue.length(), allowed);
67      }
68  
69      /**
70       * Canonicalizes a path.
71       *
72       * @param buffer Source data.
73       * @param offset Where to start reading.
74       * @param length How much to read.
75       * @param fileNameParser Now to encode and decode.
76       * @throws FileSystemException If an I/O error occurs.
77       */
78      public static void canonicalizePath(final StringBuilder buffer, final int offset, final int length,
79              final FileNameParser fileNameParser) throws FileSystemException {
80          int index = offset;
81          int count = length;
82          for (; count > 0; count--, index++) {
83              final char ch = buffer.charAt(index);
84              if (ch == '%') {
85                  if (count < 3) {
86                      throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
87                              buffer.substring(index, index + count));
88                  }
89  
90                  // Decode
91                  final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
92                  final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
93                  if (dig1 == -1 || dig2 == -1) {
94                      throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
95                              buffer.substring(index, index + 3));
96                  }
97                  final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
98  
99                  final boolean match = value == '%' || fileNameParser.encodeCharacter(value);
100 
101                 if (match) {
102                     // this is a reserved character, not allowed to decode
103                     index += 2;
104                     count -= 2;
105                     continue;
106                 }
107 
108                 // Replace
109                 buffer.setCharAt(index, value);
110                 buffer.delete(index + 1, index + 3);
111                 count -= 2;
112             } else if (fileNameParser.encodeCharacter(ch)) {
113                 // Encode
114                 final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)};
115                 buffer.setCharAt(index, '%');
116                 buffer.insert(index + 1, digits);
117                 index += 2;
118             }
119         }
120     }
121 
122     /**
123      * Decodes the String.
124      *
125      * @param uri The String to decode.
126      * @throws FileSystemException if an error occurs.
127      */
128     public static void checkUriEncoding(final String uri) throws FileSystemException {
129         decode(uri);
130     }
131 
132     /**
133      * Removes %nn encodings from a string.
134      *
135      * @param encodedStr The encoded String.
136      * @return The decoded String.
137      * @throws FileSystemException if an error occurs.
138      */
139     public static String decode(final String encodedStr) throws FileSystemException {
140         if (encodedStr == null) {
141             return null;
142         }
143         if (encodedStr.indexOf('%') < 0) {
144             return encodedStr;
145         }
146         final StringBuilder buffer = new StringBuilder(encodedStr);
147         decode(buffer, 0, buffer.length());
148         return buffer.toString();
149     }
150 
151     /**
152      * Removes %nn encodings from a string.
153      *
154      * @param buffer StringBuilder containing the string to decode.
155      * @param offset The position in the string to start decoding.
156      * @param length The number of characters to decode.
157      * @throws FileSystemException if an error occurs.
158      */
159     public static void decode(final StringBuilder buffer, final int offset, final int length)
160             throws FileSystemException {
161         int index = offset;
162         int count = length;
163         boolean ipv6Host = false;
164         for (; count > 0; count--, index++) {
165             final char ch = buffer.charAt(index);
166             if (ch == '[') {
167                 ipv6Host = true;
168             }
169             if (ch == ']') {
170                 ipv6Host = false;
171             }
172             if (ch != '%' || ipv6Host) {
173                 continue;
174             }
175 
176             if (count < 3) {
177                 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
178                         buffer.substring(index, index + count));
179             }
180 
181             // Decode
182             final int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
183             final int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
184             if (dig1 == -1 || dig2 == -1) {
185                 throw new FileSystemException("vfs.provider/invalid-escape-sequence.error",
186                         buffer.substring(index, index + 3));
187             }
188             final char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
189 
190             // Replace
191             buffer.setCharAt(index, value);
192             buffer.delete(index + 1, index + 3);
193             count -= 2;
194         }
195     }
196 
197     /**
198      * Converts "special" characters to their %nn value.
199      *
200      * @param decodedStr The decoded String.
201      * @return The encoded String.
202      */
203     public static String encode(final String decodedStr) {
204         return encode(decodedStr, null);
205     }
206 
207     /**
208      * Converts "special" characters to their %nn value.
209      *
210      * @param decodedStr The decoded String.
211      * @param reserved Characters to encode.
212      * @return The encoded String
213      */
214     public static String encode(final String decodedStr, final char[] reserved) {
215         if (decodedStr == null) {
216             return null;
217         }
218         final StringBuilder buffer = new StringBuilder(decodedStr);
219         encode(buffer, 0, buffer.length(), reserved);
220         return buffer.toString();
221     }
222 
223     /**
224      * Encode an array of Strings.
225      *
226      * @param strings The array of Strings to encode.
227      * @return An array of encoded Strings.
228      */
229     public static String[] encode(final String[] strings) {
230         if (strings == null) {
231             return null;
232         }
233         Arrays.setAll(strings, i -> encode(strings[i]));
234         return strings;
235     }
236 
237     /**
238      * Encodes a set of reserved characters in a StringBuilder, using the URI %nn encoding. Always encodes % characters.
239      *
240      * @param buffer The StringBuilder to append to.
241      * @param offset The position in the buffer to start encoding at.
242      * @param length The number of characters to encode.
243      * @param reserved characters to encode.
244      */
245     public static void encode(final StringBuilder buffer, final int offset, final int length, final char[] reserved) {
246         int index = offset;
247         int count = length;
248         for (; count > 0; index++, count--) {
249             final char ch = buffer.charAt(index);
250             boolean match = ch == '%';
251             if (reserved != null) {
252                 for (int i = 0; !match && i < reserved.length; i++) {
253                     if (ch == reserved[i]) {
254                         match = true;
255                         break;
256                     }
257                 }
258             }
259             if (match) {
260                 // Encode
261                 final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)};
262                 buffer.setCharAt(index, '%');
263                 buffer.insert(index + 1, digits);
264                 index += 2;
265             }
266         }
267     }
268 
269     static void encodeRfc2396(final StringBuilder buffer, final int offset, final int length, final char[] allowed) {
270         int index = offset;
271         int count = length;
272         for (; count > 0; index++, count--) {
273             final char ch = buffer.charAt(index);
274             if (Arrays.binarySearch(allowed, ch) < 0) {
275                 // Encode
276                 final char[] digits = {Character.forDigit(ch >> BITS_IN_HALF_BYTE & LOW_MASK, HEX_BASE), Character.forDigit(ch & LOW_MASK, HEX_BASE)};
277                 buffer.setCharAt(index, '%');
278                 buffer.insert(index + 1, digits);
279                 index += 2;
280             }
281         }
282     }
283 
284     /**
285      * Extracts the first element of a path.
286      *
287      * @param name StringBuilder containing the path.
288      * @return The first element of the path.
289      */
290     public static String extractFirstElement(final StringBuilder name) {
291         final int len = name.length();
292         if (len < 1) {
293             return null;
294         }
295         int startPos = 0;
296         if (name.charAt(0) == SEPARATOR_CHAR) {
297             startPos = 1;
298         }
299         for (int pos = startPos; pos < len; pos++) {
300             if (name.charAt(pos) == SEPARATOR_CHAR) {
301                 // Found a separator
302                 final String elem = name.substring(startPos, pos);
303                 name.delete(startPos, pos + 1);
304                 return elem;
305             }
306         }
307 
308         // No separator
309         final String elem = name.substring(startPos);
310         name.setLength(0);
311         return elem;
312     }
313 
314     /**
315      * Extract the query String from the URI.
316      *
317      * @param name StringBuilder containing the URI.
318      * @return The query string, if any. null otherwise.
319      */
320     public static String extractQueryString(final StringBuilder name) {
321         for (int pos = 0; pos < name.length(); pos++) {
322             if (name.charAt(pos) == '?') {
323                 final String queryString = name.substring(pos + 1);
324                 name.delete(pos, name.length());
325                 return queryString;
326             }
327         }
328 
329         return null;
330     }
331 
332     /**
333      * Extracts the scheme from a URI.
334      *
335      * @param uri The URI.
336      * @return The scheme name. Returns null if there is no scheme.
337      * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
338      */
339     @Deprecated
340     public static String extractScheme(final String uri) {
341         return extractScheme(uri, null);
342     }
343 
344     /**
345      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
346      *
347      * @param uri The URI.
348      * @param buffer Returns the remainder of the URI.
349      * @return The scheme name. Returns null if there is no scheme.
350      * @deprecated Use instead {@link #extractScheme}.  Will be removed in 3.0.
351      */
352     @Deprecated
353     public static String extractScheme(final String uri, final StringBuilder buffer) {
354         if (buffer != null) {
355             buffer.setLength(0);
356             buffer.append(uri);
357         }
358 
359         final int maxPos = uri.length();
360         for (int pos = 0; pos < maxPos; pos++) {
361             final char ch = uri.charAt(pos);
362 
363             if (ch == ':') {
364                 // Found the end of the scheme
365                 final String scheme = uri.substring(0, pos);
366                 if (scheme.length() <= 1 && SystemUtils.IS_OS_WINDOWS) {
367                     // This is not a scheme, but a Windows drive letter
368                     return null;
369                 }
370                 if (buffer != null) {
371                     buffer.delete(0, pos + 1);
372                 }
373                 return scheme.intern();
374             }
375 
376             if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
377                 // A scheme character
378                 continue;
379             }
380             if (!(pos > 0 && (ch >= '0' && ch <= '9' || ch == '+' || ch == '-' || ch == '.'))) {
381                 // Not a scheme character
382                 break;
383             }
384             // A scheme character (these are not allowed as the first
385             // character of the scheme), but can be used as subsequent
386             // characters.
387         }
388 
389         // No scheme in URI
390         return null;
391     }
392 
393     /**
394      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
395      * <p>
396      * The scheme is extracted based on the currently supported schemes in the system.  That is to say the schemes
397      * supported by the registered providers.
398      * </p>
399      * <p>
400      * This allows us to handle varying scheme's without making assumptions based on the ':' character.  Specifically
401      * handle scheme extraction calls for URI parameters that are not actually uri's, but may be names with ':' in them.
402      * </p>
403      * @param schemes The schemes to check.
404      * @param uri The potential URI. May also be a name.
405      * @return The scheme name. Returns null if there is no scheme.
406      * @since 2.3
407      */
408     public static String extractScheme(final String[] schemes, final String uri) {
409         return extractScheme(schemes, uri, null);
410     }
411 
412     /**
413      * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from the front of the URI.
414      * <p>
415      * The scheme is extracted based on the given set of schemes. Normally, that is to say the schemes
416      * supported by the registered providers.
417      * </p>
418      * <p>
419      * This allows us to handle varying scheme's without making assumptions based on the ':' character. Specifically
420      * handle scheme extraction calls for URI parameters that are not actually URI's, but may be names with ':' in them.
421      * </p>
422      * @param schemes The schemes to check.
423      * @param uri The potential URI. May also just be a name.
424      * @param buffer Returns the remainder of the URI.
425      * @return The scheme name. Returns null if there is no scheme.
426      * @since 2.3
427      */
428     public static String extractScheme(final String[] schemes, final String uri, final StringBuilder buffer) {
429         if (buffer != null) {
430             buffer.setLength(0);
431             buffer.append(uri);
432         }
433         for (final String scheme : schemes) {
434             if (uri.startsWith(scheme + ":")) {
435                 if (buffer != null) {
436                     buffer.delete(0, uri.indexOf(':') + 1);
437                 }
438                 return scheme;
439             }
440         }
441         return null;
442     }
443 
444     /**
445      * Normalises the separators in a name.
446      *
447      * @param name The StringBuilder containing the name
448      * @return true if the StringBuilder was modified.
449      */
450     public static boolean fixSeparators(final StringBuilder name) {
451         boolean changed = false;
452         int maxlen = name.length();
453         for (int i = 0; i < maxlen; i++) {
454             final char ch = name.charAt(i);
455             if (ch == TRANS_SEPARATOR) {
456                 name.setCharAt(i, SEPARATOR_CHAR);
457                 changed = true;
458             }
459             if (i < maxlen - 2 && name.charAt(i) == '%' && name.charAt(i + 1) == '2') {
460                 if (name.charAt(i + 2) == 'f' || name.charAt(i + 2) == 'F') {
461                     name.setCharAt(i, SEPARATOR_CHAR);
462                     name.delete(i + 1, i + 3);
463                     maxlen -= 2;
464                     changed = true;
465                 } else if (name.charAt(i + 2) == 'e' || name.charAt(i + 2) == 'E') {
466                     name.setCharAt(i, '.');
467                     name.delete(i + 1, i + 3);
468                     maxlen -= 2;
469                     changed = true;
470                 }
471             }
472         }
473         return changed;
474     }
475 
476     /**
477      * Normalises a path. Does the following:
478      * <ul>
479      * <li>Removes empty path elements.
480      * <li>Handles '.' and '..' elements.
481      * <li>Removes trailing separator.
482      * </ul>
483      *
484      * Its assumed that the separators are already fixed.
485      *
486      * @param path The path to normalize.
487      * @return The FileType.
488      * @throws FileSystemException if an error occurs.
489      * @see #fixSeparators
490      */
491     public static FileType normalisePath(final StringBuilder path) throws FileSystemException {
492         FileType fileType = FileType.FOLDER;
493         if (path.length() == 0) {
494             return fileType;
495         }
496 
497         // '/' or '.' or '..' or anyPath/..' or 'anyPath/.'  should always be a path
498         if (path.charAt(path.length() - 1) != '/'
499                 && path.lastIndexOf("/..") != path.length() - 3
500                 && path.lastIndexOf("/.") != path.length() - 2
501                 && path.lastIndexOf("..") != 0
502                 && path.lastIndexOf(".") != 0
503         ) {
504             fileType = FileType.FILE;
505         }
506 
507         // Adjust separators
508         // fixSeparators(path);
509 
510         // Determine the start of the first element
511         int startFirstElem = 0;
512         if (path.charAt(0) == SEPARATOR_CHAR) {
513             if (path.length() == 1) {
514                 return fileType;
515             }
516             startFirstElem = 1;
517         }
518 
519         // Iterate over each element
520         int startElem = startFirstElem;
521         int maxlen = path.length();
522         while (startElem < maxlen) {
523             // Find the end of the element
524             int endElem = startElem;
525             while (endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR) {
526                 endElem++;
527             }
528 
529             final int elemLen = endElem - startElem;
530             if (elemLen == 0) {
531                 // An empty element - axe it
532                 path.deleteCharAt(endElem);
533                 maxlen = path.length();
534                 continue;
535             }
536             if (elemLen == 1 && path.charAt(startElem) == '.') {
537                 // A '.' element - axe it
538                 path.deleteCharAt(startElem);
539                 maxlen = path.length();
540                 continue;
541             }
542             if (elemLen == 2 && path.charAt(startElem) == '.' && path.charAt(startElem + 1) == '.') {
543                 // A '..' element - remove the previous element
544                 if (startElem == startFirstElem) {
545                     // Previous element is missing
546                     throw new FileSystemException("vfs.provider/invalid-relative-path.error");
547                 }
548 
549                 // Find start of previous element
550                 int pos = startElem - 2;
551                 while (pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR) {
552                     pos--;
553                 }
554                 startElem = pos + 1;
555 
556                 path.delete(startElem, endElem + 1);
557                 maxlen = path.length();
558                 continue;
559             }
560 
561             // A regular element
562             startElem = endElem + 1;
563         }
564 
565         // Remove trailing separator
566         if (!VFS.isUriStyle() && maxlen > 1 && path.charAt(maxlen - 1) == SEPARATOR_CHAR) {
567             path.deleteCharAt(maxlen - 1);
568         }
569 
570         return fileType;
571     }
572 
573     private UriParser() {
574     }
575 }