001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.vfs2.provider;
018
019 import org.apache.commons.vfs2.FileName;
020 import org.apache.commons.vfs2.FileSystemException;
021 import org.apache.commons.vfs2.FileType;
022 import org.apache.commons.vfs2.VFS;
023 import org.apache.commons.vfs2.util.Os;
024
025 /**
026 * Utilities for dealing with URIs. See RFC 2396 for details.
027 *
028 * @author <a href="http://commons.apache.org/vfs/team-list.html">Commons VFS team</a>
029 * 2005) $
030 */
031 public final class UriParser
032 {
033 /**
034 * The set of valid separators. These are all converted to the normalised
035 * one. Does <i>not</i> contain the normalised separator
036 */
037 // public static final char[] separators = {'\\'};
038 public static final char TRANS_SEPARATOR = '\\';
039
040 /**
041 * The normalised separator to use.
042 */
043 private static final char SEPARATOR_CHAR = FileName.SEPARATOR_CHAR;
044
045 private static final int HEX_BASE = 16;
046
047 private static final int BITS_IN_HALF_BYTE = 4;
048
049 private static final char LOW_MASK = 0x0F;
050
051 private UriParser()
052 {
053 }
054
055 /**
056 * Extracts the first element of a path.
057 * @param name StringBuilder containing the path.
058 * @return The first element of the path.
059 */
060 public static String extractFirstElement(final StringBuilder name)
061 {
062 final int len = name.length();
063 if (len < 1)
064 {
065 return null;
066 }
067 int startPos = 0;
068 if (name.charAt(0) == SEPARATOR_CHAR)
069 {
070 startPos = 1;
071 }
072 for (int pos = startPos; pos < len; pos++)
073 {
074 if (name.charAt(pos) == SEPARATOR_CHAR)
075 {
076 // Found a separator
077 final String elem = name.substring(startPos, pos);
078 name.delete(startPos, pos + 1);
079 return elem;
080 }
081 }
082
083 // No separator
084 final String elem = name.substring(startPos);
085 name.setLength(0);
086 return elem;
087 }
088
089 /**
090 * Normalises a path. Does the following:
091 * <ul>
092 * <li>Removes empty path elements.
093 * <li>Handles '.' and '..' elements.
094 * <li>Removes trailing separator.
095 * </ul>
096 *
097 * Its assumed that the separators are already fixed.
098 *
099 * @param path The path to normalize.
100 * @return The FileType.
101 * @throws FileSystemException if an error occurs.
102 *
103 * @see #fixSeparators
104 */
105 public static FileType normalisePath(final StringBuilder path)
106 throws FileSystemException
107 {
108 FileType fileType = FileType.FOLDER;
109 if (path.length() == 0)
110 {
111 return fileType;
112 }
113
114 if (path.charAt(path.length() - 1) != '/')
115 {
116 fileType = FileType.FILE;
117 }
118
119 // Adjust separators
120 // fixSeparators(path);
121
122 // Determine the start of the first element
123 int startFirstElem = 0;
124 if (path.charAt(0) == SEPARATOR_CHAR)
125 {
126 if (path.length() == 1)
127 {
128 return fileType;
129 }
130 startFirstElem = 1;
131 }
132
133 // Iterate over each element
134 int startElem = startFirstElem;
135 int maxlen = path.length();
136 while (startElem < maxlen)
137 {
138 // Find the end of the element
139 int endElem = startElem;
140 for (; endElem < maxlen && path.charAt(endElem) != SEPARATOR_CHAR; endElem++)
141 {
142 }
143
144 final int elemLen = endElem - startElem;
145 if (elemLen == 0)
146 {
147 // An empty element - axe it
148 path.delete(endElem, endElem + 1);
149 maxlen = path.length();
150 continue;
151 }
152 if (elemLen == 1 && path.charAt(startElem) == '.')
153 {
154 // A '.' element - axe it
155 path.delete(startElem, endElem + 1);
156 maxlen = path.length();
157 continue;
158 }
159 if (elemLen == 2 && path.charAt(startElem) == '.'
160 && path.charAt(startElem + 1) == '.')
161 {
162 // A '..' element - remove the previous element
163 if (startElem == startFirstElem)
164 {
165 // Previous element is missing
166 throw new FileSystemException(
167 "vfs.provider/invalid-relative-path.error");
168 }
169
170 // Find start of previous element
171 int pos = startElem - 2;
172 for (; pos >= 0 && path.charAt(pos) != SEPARATOR_CHAR; pos--)
173 {
174 }
175 startElem = pos + 1;
176
177 path.delete(startElem, endElem + 1);
178 maxlen = path.length();
179 continue;
180 }
181
182 // A regular element
183 startElem = endElem + 1;
184 }
185
186 // Remove trailing separator
187 if (!VFS.isUriStyle())
188 {
189 if (maxlen > 0 && path.charAt(maxlen - 1) == SEPARATOR_CHAR
190 && maxlen > 1)
191 {
192 path.delete(maxlen - 1, maxlen);
193 }
194 }
195
196 return fileType;
197 }
198
199 /**
200 * Normalises the separators in a name.
201 * @param name The StringBuilder containing the name
202 * @return true if the StringBuilder was modified.
203 */
204 public static boolean fixSeparators(final StringBuilder name)
205 {
206 boolean changed = false;
207 final int maxlen = name.length();
208 for (int i = 0; i < maxlen; i++)
209 {
210 final char ch = name.charAt(i);
211 if (ch == TRANS_SEPARATOR)
212 {
213 name.setCharAt(i, SEPARATOR_CHAR);
214 changed = true;
215 }
216 }
217 return changed;
218 }
219
220 /**
221 * Extracts the scheme from a URI.
222 *
223 * @param uri The URI.
224 * @return The scheme name. Returns null if there is no scheme.
225 */
226 public static String extractScheme(final String uri)
227 {
228 return extractScheme(uri, null);
229 }
230
231 /**
232 * Extracts the scheme from a URI. Removes the scheme and ':' delimiter from
233 * the front of the URI.
234 *
235 * @param uri The URI.
236 * @param buffer Returns the remainder of the URI.
237 * @return The scheme name. Returns null if there is no scheme.
238 */
239 public static String extractScheme(final String uri, final StringBuilder buffer)
240 {
241 if (buffer != null)
242 {
243 buffer.setLength(0);
244 buffer.append(uri);
245 }
246
247 final int maxPos = uri.length();
248 for (int pos = 0; pos < maxPos; pos++)
249 {
250 final char ch = uri.charAt(pos);
251
252 if (ch == ':')
253 {
254 // Found the end of the scheme
255 final String scheme = uri.substring(0, pos);
256 if (scheme.length() <= 1 && Os.isFamily(Os.OS_FAMILY_WINDOWS))
257 {
258 // This is not a scheme, but a Windows drive letter
259 return null;
260 }
261 if (buffer != null)
262 {
263 buffer.delete(0, pos + 1);
264 }
265 return scheme.intern();
266 }
267
268 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))
269 {
270 // A scheme character
271 continue;
272 }
273 if (pos > 0
274 && ((ch >= '0' && ch <= '9') || ch == '+' || ch == '-' || ch == '.'))
275 {
276 // A scheme character (these are not allowed as the first
277 // character of the scheme, but can be used as subsequent
278 // characters.
279 continue;
280 }
281
282 // Not a scheme character
283 break;
284 }
285
286 // No scheme in URI
287 return null;
288 }
289
290 /**
291 * Removes %nn encodings from a string.
292 * @param encodedStr The encoded String.
293 * @return The decoded String.
294 * @throws FileSystemException if an error occurs.
295 */
296 public static String decode(final String encodedStr)
297 throws FileSystemException
298 {
299 if (encodedStr == null)
300 {
301 return null;
302 }
303 if (encodedStr.indexOf('%') < 0)
304 {
305 return encodedStr;
306 }
307 final StringBuilder buffer = new StringBuilder(encodedStr);
308 decode(buffer, 0, buffer.length());
309 return buffer.toString();
310 }
311
312 /**
313 * Removes %nn encodings from a string.
314 * @param buffer StringBuilder containing the string to decode.
315 * @param offset The position in the string to start decoding.
316 * @param length The number of characters to decode.
317 * @throws FileSystemException if an error occurs.
318 */
319 public static void decode(final StringBuilder buffer, final int offset, final int length)
320 throws FileSystemException
321 {
322 int index = offset;
323 int count = length;
324 for (; count > 0; count--, index++)
325 {
326 final char ch = buffer.charAt(index);
327 if (ch != '%')
328 {
329 continue;
330 }
331 if (count < 3)
332 {
333 throw new FileSystemException(
334 "vfs.provider/invalid-escape-sequence.error", buffer
335 .substring(index, index + count));
336 }
337
338 // Decode
339 int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
340 int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
341 if (dig1 == -1 || dig2 == -1)
342 {
343 throw new FileSystemException(
344 "vfs.provider/invalid-escape-sequence.error", buffer
345 .substring(index, index + 3));
346 }
347 char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
348
349 // Replace
350 buffer.setCharAt(index, value);
351 buffer.delete(index + 1, index + 3);
352 count -= 2;
353 }
354 }
355
356 /**
357 * Encodes and appends a string to a StringBuilder.
358 * @param buffer The StringBuilder to append to.
359 * @param unencodedValue The String to encode and append.
360 * @param reserved characters to encode.
361 */
362 public static void appendEncoded(final StringBuilder buffer,
363 final String unencodedValue, final char[] reserved)
364 {
365 final int offset = buffer.length();
366 buffer.append(unencodedValue);
367 encode(buffer, offset, unencodedValue.length(), reserved);
368 }
369
370 /**
371 * Encodes a set of reserved characters in a StringBuilder, using the URI %nn
372 * encoding. Always encodes % characters.
373 * @param buffer The StringBuilder to append to.
374 * @param offset The position in the buffer to start encoding at.
375 * @param length The number of characters to encode.
376 * @param reserved characters to encode.
377 */
378 public static void encode(final StringBuilder buffer, final int offset,
379 final int length, final char[] reserved)
380 {
381 int index = offset;
382 int count = length;
383 for (; count > 0; index++, count--)
384 {
385 final char ch = buffer.charAt(index);
386 boolean match = ch == '%';
387 if (reserved != null)
388 {
389 for (int i = 0; !match && i < reserved.length; i++)
390 {
391 if (ch == reserved[i])
392 {
393 match = true;
394 }
395 }
396 }
397 if (match)
398 {
399 // Encode
400 char[] digits =
401 {Character.forDigit(((ch >> BITS_IN_HALF_BYTE) & LOW_MASK), HEX_BASE),
402 Character.forDigit((ch & LOW_MASK), HEX_BASE)};
403 buffer.setCharAt(index, '%');
404 buffer.insert(index + 1, digits);
405 index += 2;
406 }
407 }
408 }
409
410 /**
411 * Removes %nn encodings from a string.
412 * @param decodedStr The decoded String.
413 * @return The encoded String.
414 */
415 public static String encode(final String decodedStr)
416 {
417 return encode(decodedStr, null);
418 }
419
420 /**
421 * Converts "special" characters to their %nn value.
422 * @param decodedStr The decoded String.
423 * @param reserved Characters to encode.
424 * @return The encoded String
425 */
426 public static String encode(final String decodedStr, final char[] reserved)
427 {
428 if (decodedStr == null)
429 {
430 return null;
431 }
432 final StringBuilder buffer = new StringBuilder(decodedStr);
433 encode(buffer, 0, buffer.length(), reserved);
434 return buffer.toString();
435 }
436
437 /**
438 * Encode an array of Strings.
439 * @param strings The array of Strings to encode.
440 * @return An array of encoded Strings.
441 */
442 public static String[] encode(String[] strings)
443 {
444 if (strings == null)
445 {
446 return null;
447 }
448 for (int i = 0; i < strings.length; i++)
449 {
450 strings[i] = encode(strings[i]);
451 }
452 return strings;
453 }
454
455 /**
456 * Decodes the String.
457 * @param uri The String to decode.
458 * @throws FileSystemException if an error occurs.
459 */
460 public static void checkUriEncoding(String uri) throws FileSystemException
461 {
462 decode(uri);
463 }
464
465 public static void canonicalizePath(StringBuilder buffer, int offset,
466 int length, FileNameParser fileNameParser)
467 throws FileSystemException
468 {
469 int index = offset;
470 int count = length;
471 for (; count > 0; count--, index++)
472 {
473 final char ch = buffer.charAt(index);
474 if (ch == '%')
475 {
476 if (count < 3)
477 {
478 throw new FileSystemException(
479 "vfs.provider/invalid-escape-sequence.error",
480 buffer.substring(index, index + count));
481 }
482
483 // Decode
484 int dig1 = Character.digit(buffer.charAt(index + 1), HEX_BASE);
485 int dig2 = Character.digit(buffer.charAt(index + 2), HEX_BASE);
486 if (dig1 == -1 || dig2 == -1)
487 {
488 throw new FileSystemException(
489 "vfs.provider/invalid-escape-sequence.error",
490 buffer.substring(index, index + 3));
491 }
492 char value = (char) (dig1 << BITS_IN_HALF_BYTE | dig2);
493
494 boolean match = value == '%'
495 || (fileNameParser != null && fileNameParser.encodeCharacter(value));
496
497 if (match)
498 {
499 // this is a reserved character, not allowed to decode
500 index += 2;
501 count -= 2;
502 continue;
503 }
504
505 // Replace
506 buffer.setCharAt(index, value);
507 buffer.delete(index + 1, index + 3);
508 count -= 2;
509 }
510 else if (fileNameParser.encodeCharacter(ch))
511 {
512 // Encode
513 char[] digits =
514 {Character.forDigit(((ch >> BITS_IN_HALF_BYTE) & LOW_MASK), HEX_BASE),
515 Character.forDigit((ch & LOW_MASK), HEX_BASE) };
516 buffer.setCharAt(index, '%');
517 buffer.insert(index + 1, digits);
518 index += 2;
519 }
520 }
521 }
522
523 /**
524 * Extract the query String from the URI.
525 * @param name StringBuilder containing the URI.
526 * @return The query string, if any. null otherwise.
527 */
528 public static String extractQueryString(StringBuilder name)
529 {
530 for (int pos = 0; pos < name.length(); pos++)
531 {
532 if (name.charAt(pos) == '?')
533 {
534 String queryString = name.substring(pos + 1);
535 name.delete(pos, name.length());
536 return queryString;
537 }
538 }
539
540 return null;
541 }
542 }