1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.vfs2.util;
18
19 import java.io.ByteArrayOutputStream;
20 import java.io.UnsupportedEncodingException;
21 import java.nio.charset.Charset;
22 import java.nio.charset.StandardCharsets;
23
24 import org.apache.commons.lang3.StringUtils;
25 import org.apache.commons.lang3.util.FluentBitSet;
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28 import org.apache.commons.vfs2.provider.GenericURLFileName;
29
30
31
32
33
34
35
36
37
38 public class URIUtils {
39
40
41
42
43
44
45
46
47
48
49 private static final class EncodingUtils {
50
51
52
53
54
55
56
57
58
59
60
61 static String getAsciiString(final byte[] data, final int offset, final int length) {
62 return new String(data, offset, length, StandardCharsets.US_ASCII);
63 }
64
65
66
67
68
69
70
71
72
73 static byte[] getBytes(final String data, final String charsetName) {
74 if (data == null) {
75 throw new IllegalArgumentException("data may not be null");
76 }
77
78 if (StringUtils.isEmpty(charsetName)) {
79 throw new IllegalArgumentException("charset may not be null or empty");
80 }
81
82 try {
83 return data.getBytes(charsetName);
84 } catch (final UnsupportedEncodingException e) {
85
86 if (LOG.isWarnEnabled()) {
87 LOG.warn("Unsupported encoding: " + charsetName + ". System encoding used.");
88 }
89
90 return data.getBytes(Charset.defaultCharset());
91 }
92 }
93
94 private EncodingUtils() {
95 }
96 }
97
98
99
100
101
102
103
104
105
106 private static final class URLCodecUtils {
107
108 private static final byte ESCAPE_CHAR = '%';
109
110 private static final int EIGHT_BIT_CHARSET_SIZE = 256;
111
112 private static final int FOUR_BITS = 4;
113
114 private static final int UNSIGNED_BYTE_MASK = 0xF;
115
116
117 private static final FluentBitSet WWW_FORM_URL_SAFE = URIBitSets.bitSet()
118
119 .setInclusive('a', 'z')
120 .setInclusive('A', 'Z')
121
122 .setInclusive('0', '9')
123
124 .set('-', '_', '.', '*')
125
126 .set(' ');
127
128
129
130
131
132 private static final int RADIX = 16;
133
134 static byte[] encodeUrl(FluentBitSet urlsafe, final byte[] bytes) {
135 if (bytes == null) {
136 return null;
137 }
138 if (urlsafe == null) {
139 urlsafe = WWW_FORM_URL_SAFE;
140 }
141
142 final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
143 for (final byte c : bytes) {
144 int b = c;
145 if (b < 0) {
146 b = EIGHT_BIT_CHARSET_SIZE + b;
147 }
148 if (urlsafe.get(b)) {
149 if (b == ' ') {
150 b = '+';
151 }
152 buffer.write(b);
153 } else {
154 buffer.write(ESCAPE_CHAR);
155 final char hex1 = hexDigit(b >> FOUR_BITS);
156 final char hex2 = hexDigit(b);
157 buffer.write(hex1);
158 buffer.write(hex2);
159 }
160 }
161 return buffer.toByteArray();
162 }
163
164 private static char hexDigit(final int b) {
165 return Character.toUpperCase(Character.forDigit(b & UNSIGNED_BYTE_MASK, RADIX));
166 }
167
168 private URLCodecUtils() {
169 }
170 }
171
172 private static final Log LOG = LogFactory.getLog(URIUtils.class);
173
174
175
176
177 private static final String DEFAULT_PROTOCOL_CHARSET = StandardCharsets.UTF_8.name();
178
179 private static String encode(final String unescaped, final FluentBitSet allowed, final String charset) {
180 final byte[] rawdata = URLCodecUtils.encodeUrl(allowed, EncodingUtils.getBytes(unescaped, charset));
181 return EncodingUtils.getAsciiString(rawdata, 0, rawdata.length);
182 }
183
184
185
186
187
188
189
190
191 public static String encodePath(final String unescaped) {
192 return encodePath(unescaped, DEFAULT_PROTOCOL_CHARSET);
193 }
194
195
196
197
198
199
200
201
202
203 public static String encodePath(final String unescaped, final String charset) {
204 if (unescaped == null) {
205 throw new IllegalArgumentException("The string to encode may not be null.");
206 }
207
208 return encode(unescaped, URIBitSets.ALLOWED_ABS_PATH, charset);
209 }
210
211 private URIUtils() {
212 }
213
214 }