View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  
18  package org.apache.commons.vfs2.provider;
19  
20  import org.apache.commons.lang3.ArraySorter;
21  import org.apache.commons.lang3.ArrayUtils;
22  
23  /**
24   * Helps deal with <a href="https://datatracker.ietf.org/doc/html/rfc2396">RFC 2396</a>.
25   * <p>
26   * The RFC 2396 Collected BNF for URI from <a href="https://datatracker.ietf.org/doc/html/rfc2396#appendix-A">Appendix
27   * A</a>:
28   * </p>
29   *
30   * <pre>
31        URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
32        absoluteURI   = scheme ":" ( hier_part | opaque_part )
33        relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
34  
35        hier_part     = ( net_path | abs_path ) [ "?" query ]
36        opaque_part   = uric_no_slash *uric
37  
38        uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
39                        "&" | "=" | "+" | "$" | ","
40  
41        net_path      = "//" authority [ abs_path ]
42        abs_path      = "/"  path_segments
43        rel_path      = rel_segment [ abs_path ]
44  
45        rel_segment   = 1*( unreserved | escaped |
46                            ";" | "@" | "&" | "=" | "+" | "$" | "," )
47  
48        scheme        = alpha *( alpha | digit | "+" | "-" | "." )
49  
50        authority     = server | reg_name
51  
52        reg_name      = 1*( unreserved | escaped | "$" | "," |
53                            ";" | ":" | "@" | "&" | "=" | "+" )
54  
55        server        = [ [ userinfo "@" ] hostport ]
56        userinfo      = *( unreserved | escaped |
57                           ";" | ":" | "&" | "=" | "+" | "$" | "," )
58  
59        hostport      = host [ ":" port ]
60        host          = hostname | IPv4address
61        hostname      = *( domainlabel "." ) toplabel [ "." ]
62        domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
63        toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
64        IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
65        port          = *digit
66  
67        path          = [ abs_path | opaque_part ]
68        path_segments = segment *( "/" segment )
69        segment       = *pchar *( ";" param )
70        param         = *pchar
71        pchar         = unreserved | escaped |
72                        ":" | "@" | "&" | "=" | "+" | "$" | ","
73  
74        query         = *uric
75  
76        fragment      = *uric
77  
78        uric          = reserved | unreserved | escaped
79        reserved      = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
80                        "$" | ","
81        unreserved    = alphanum | mark
82        mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
83                        "(" | ")"
84  
85        escaped       = "%" hex hex
86        hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
87                                "a" | "b" | "c" | "d" | "e" | "f"
88  
89        alphanum      = alpha | digit
90        alpha         = lowalpha | upalpha
91  
92        lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" |
93                   "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" |
94                   "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"
95        upalpha  = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" |
96                   "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" |
97                   "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"
98        digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
99                   "8" | "9"
100  * </pre>
101  */
102 final class RFC2396 {
103 
104     // RFC 2396 mark
105     static final char[] MARK = {'-', '_', '.', '!', '~', '*', '\'', '(', ')'};
106 
107     // RFC 2396 digit
108     static final char[] DIGIT = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'};
109 
110     // RFC 2396 lowalpha
111     static final char[] LOWALPHA = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y',
112         'z'};
113 
114     // RFC 2396 upalpha
115     static final char[] UPALPHA = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y',
116         'Z'};
117 
118     // RFC 2396 alpha
119     static final char[] ALPHA = ArrayUtils.addAll(LOWALPHA, UPALPHA);
120 
121     // RFC 2396 alphanum
122     static final char[] ALPHANUM = ArrayUtils.addAll(ALPHA, DIGIT);
123 
124     // RFC 2396 reserved
125     static final char[] RESERVED = {';', '/', '?', ':', '@', '&', '=', '+', '$', ','};
126 
127     // RFC 2396 unreserved
128     static final char[] UNRESERVED = ArrayUtils.addAll(ALPHANUM, MARK);
129 
130     // RFC 2396 userinfo chars which are unescaped, here sorted.
131     static final char[] USERINFO_UNESCAPED = ArraySorter.sort(ArrayUtils.addAll(UNRESERVED, ';', ':', '&', '=', '+', '$', ','));
132 
133     private RFC2396() {
134     }
135 }