1 | |
|
2 | |
|
3 | |
|
4 | |
|
5 | |
|
6 | |
|
7 | |
|
8 | |
|
9 | |
|
10 | |
|
11 | |
|
12 | |
|
13 | |
|
14 | |
|
15 | |
|
16 | |
|
17 | |
package org.apache.commons.validator; |
18 | |
|
19 | |
import java.io.Serializable; |
20 | |
import java.util.Arrays; |
21 | |
import java.util.HashSet; |
22 | |
import java.util.Set; |
23 | |
import java.util.regex.Matcher; |
24 | |
import java.util.regex.Pattern; |
25 | |
|
26 | |
import org.apache.commons.validator.routines.InetAddressValidator; |
27 | |
import org.apache.commons.validator.util.Flags; |
28 | |
|
29 | |
|
30 | |
|
31 | |
|
32 | |
|
33 | |
|
34 | |
|
35 | |
|
36 | |
|
37 | |
|
38 | |
|
39 | |
|
40 | |
|
41 | |
|
42 | |
|
43 | |
|
44 | |
|
45 | |
|
46 | |
|
47 | |
|
48 | |
|
49 | |
|
50 | |
|
51 | |
|
52 | |
|
53 | |
|
54 | |
|
55 | |
|
56 | |
|
57 | |
|
58 | |
|
59 | |
|
60 | |
|
61 | |
|
62 | |
|
63 | |
|
64 | |
|
65 | |
|
66 | |
|
67 | |
|
68 | |
|
69 | |
|
70 | |
|
71 | |
|
72 | |
|
73 | |
|
74 | |
|
75 | |
|
76 | |
|
77 | |
|
78 | |
|
79 | |
@Deprecated |
80 | |
public class UrlValidator implements Serializable { |
81 | |
|
82 | |
private static final long serialVersionUID = 24137157400029593L; |
83 | |
|
84 | |
|
85 | |
|
86 | |
|
87 | |
|
88 | |
public static final int ALLOW_ALL_SCHEMES = 1 << 0; |
89 | |
|
90 | |
|
91 | |
|
92 | |
|
93 | |
public static final int ALLOW_2_SLASHES = 1 << 1; |
94 | |
|
95 | |
|
96 | |
|
97 | |
|
98 | |
public static final int NO_FRAGMENTS = 1 << 2; |
99 | |
|
100 | |
private static final String ALPHA_CHARS = "a-zA-Z"; |
101 | |
|
102 | |
|
103 | |
|
104 | |
private static final String SPECIAL_CHARS = ";/@&=,.?:+$"; |
105 | |
|
106 | |
private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]"; |
107 | |
|
108 | |
|
109 | |
private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\."; |
110 | |
|
111 | |
private static final String ATOM = VALID_CHARS + '+'; |
112 | |
|
113 | |
|
114 | |
|
115 | |
|
116 | |
private static final String URL_REGEX = |
117 | |
"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?"; |
118 | |
|
119 | 1 | private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX); |
120 | |
|
121 | |
|
122 | |
|
123 | |
|
124 | |
private static final int PARSE_URL_SCHEME = 2; |
125 | |
|
126 | |
|
127 | |
|
128 | |
|
129 | |
private static final int PARSE_URL_AUTHORITY = 4; |
130 | |
|
131 | |
private static final int PARSE_URL_PATH = 5; |
132 | |
|
133 | |
private static final int PARSE_URL_QUERY = 7; |
134 | |
|
135 | |
private static final int PARSE_URL_FRAGMENT = 9; |
136 | |
|
137 | |
|
138 | |
|
139 | |
|
140 | 1 | private static final Pattern SCHEME_PATTERN = Pattern.compile("^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*"); |
141 | |
|
142 | |
private static final String AUTHORITY_REGEX = |
143 | |
"^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?"; |
144 | |
|
145 | 1 | private static final Pattern AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEX); |
146 | |
|
147 | |
private static final int PARSE_AUTHORITY_HOST_IP = 1; |
148 | |
|
149 | |
private static final int PARSE_AUTHORITY_PORT = 2; |
150 | |
|
151 | |
|
152 | |
|
153 | |
|
154 | |
private static final int PARSE_AUTHORITY_EXTRA = 3; |
155 | |
|
156 | 1 | private static final Pattern PATH_PATTERN = Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$"); |
157 | |
|
158 | 1 | private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$"); |
159 | |
|
160 | 1 | private static final Pattern LEGAL_ASCII_PATTERN = Pattern.compile("^\\p{ASCII}+$"); |
161 | |
|
162 | 1 | private static final Pattern DOMAIN_PATTERN = |
163 | |
Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$"); |
164 | |
|
165 | 1 | private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$"); |
166 | |
|
167 | 1 | private static final Pattern ATOM_PATTERN = Pattern.compile("^(" + ATOM + ").*?$"); |
168 | |
|
169 | 1 | private static final Pattern ALPHA_PATTERN = Pattern.compile("^[" + ALPHA_CHARS + "]"); |
170 | |
|
171 | |
|
172 | |
|
173 | |
|
174 | |
private final Flags options; |
175 | |
|
176 | |
|
177 | |
|
178 | |
|
179 | 5 | private final Set<String> allowedSchemes = new HashSet<String>(); |
180 | |
|
181 | |
|
182 | |
|
183 | |
|
184 | 5 | protected String[] defaultSchemes = {"http", "https", "ftp"}; |
185 | |
|
186 | |
|
187 | |
|
188 | |
|
189 | |
public UrlValidator() { |
190 | 0 | this(null); |
191 | 0 | } |
192 | |
|
193 | |
|
194 | |
|
195 | |
|
196 | |
|
197 | |
|
198 | |
|
199 | |
|
200 | |
|
201 | |
public UrlValidator(String[] schemes) { |
202 | 1 | this(schemes, 0); |
203 | 1 | } |
204 | |
|
205 | |
|
206 | |
|
207 | |
|
208 | |
|
209 | |
|
210 | |
|
211 | |
public UrlValidator(int options) { |
212 | 0 | this(null, options); |
213 | 0 | } |
214 | |
|
215 | |
|
216 | |
|
217 | |
|
218 | |
|
219 | |
|
220 | |
|
221 | |
|
222 | 5 | public UrlValidator(String[] schemes, int options) { |
223 | 5 | this.options = new Flags(options); |
224 | |
|
225 | 5 | if (this.options.isOn(ALLOW_ALL_SCHEMES)) { |
226 | 2 | return; |
227 | |
} |
228 | |
|
229 | 3 | if (schemes == null) { |
230 | 0 | schemes = this.defaultSchemes; |
231 | |
} |
232 | |
|
233 | 3 | this.allowedSchemes.addAll(Arrays.asList(schemes)); |
234 | 3 | } |
235 | |
|
236 | |
|
237 | |
|
238 | |
|
239 | |
|
240 | |
|
241 | |
|
242 | |
|
243 | |
public boolean isValid(String value) { |
244 | 75606 | if (value == null) { |
245 | 0 | return false; |
246 | |
} |
247 | 75606 | if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) { |
248 | 0 | return false; |
249 | |
} |
250 | |
|
251 | |
|
252 | 75606 | Matcher urlMatcher = URL_PATTERN.matcher(value); |
253 | 75606 | if (!urlMatcher.matches()) { |
254 | 0 | return false; |
255 | |
} |
256 | |
|
257 | 75606 | if (!isValidScheme(urlMatcher.group(PARSE_URL_SCHEME))) { |
258 | 28350 | return false; |
259 | |
} |
260 | |
|
261 | 47256 | if (!isValidAuthority(urlMatcher.group(PARSE_URL_AUTHORITY))) { |
262 | 39375 | return false; |
263 | |
} |
264 | |
|
265 | 7881 | if (!isValidPath(urlMatcher.group(PARSE_URL_PATH))) { |
266 | 2520 | return false; |
267 | |
} |
268 | |
|
269 | 5361 | if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) { |
270 | 0 | return false; |
271 | |
} |
272 | |
|
273 | 5361 | if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) { |
274 | 630 | return false; |
275 | |
} |
276 | |
|
277 | 4731 | return true; |
278 | |
} |
279 | |
|
280 | |
|
281 | |
|
282 | |
|
283 | |
|
284 | |
|
285 | |
|
286 | |
|
287 | |
|
288 | |
protected boolean isValidScheme(String scheme) { |
289 | 75610 | if (scheme == null) { |
290 | 18900 | return false; |
291 | |
} |
292 | |
|
293 | 56710 | if (!SCHEME_PATTERN.matcher(scheme).matches()) { |
294 | 9450 | return false; |
295 | |
} |
296 | |
|
297 | 47260 | if (options.isOff(ALLOW_ALL_SCHEMES) && !allowedSchemes.contains(scheme)) { |
298 | 3 | return false; |
299 | |
} |
300 | |
|
301 | 47257 | return true; |
302 | |
} |
303 | |
|
304 | |
|
305 | |
|
306 | |
|
307 | |
|
308 | |
|
309 | |
|
310 | |
protected boolean isValidAuthority(String authority) { |
311 | 47256 | if (authority == null) { |
312 | 18831 | return false; |
313 | |
} |
314 | |
|
315 | 28425 | InetAddressValidator inetAddressValidator = |
316 | |
InetAddressValidator.getInstance(); |
317 | |
|
318 | 28425 | Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority); |
319 | 28425 | if (!authorityMatcher.matches()) { |
320 | 0 | return false; |
321 | |
} |
322 | |
|
323 | 28425 | boolean hostname = false; |
324 | |
|
325 | 28425 | String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP); |
326 | 28425 | boolean ipV4Address = inetAddressValidator.isValid(hostIP); |
327 | |
|
328 | 28425 | if (!ipV4Address) { |
329 | |
|
330 | 25275 | hostname = DOMAIN_PATTERN.matcher(hostIP).matches(); |
331 | |
} |
332 | |
|
333 | |
|
334 | 28425 | if (hostname) { |
335 | |
|
336 | |
|
337 | 17364 | char[] chars = hostIP.toCharArray(); |
338 | 17364 | int size = 1; |
339 | 144090 | for(int i=0; i<chars.length; i++) { |
340 | 126726 | if(chars[i] == '.') { |
341 | 26844 | size++; |
342 | |
} |
343 | |
} |
344 | 17364 | String[] domainSegment = new String[size]; |
345 | 17364 | boolean match = true; |
346 | 17364 | int segmentCount = 0; |
347 | 17364 | int segmentLength = 0; |
348 | |
|
349 | 78936 | while (match) { |
350 | 61572 | Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP); |
351 | 61572 | match = atomMatcher.matches(); |
352 | 61572 | if (match) { |
353 | 44208 | domainSegment[segmentCount] = atomMatcher.group(1); |
354 | 44208 | segmentLength = domainSegment[segmentCount].length() + 1; |
355 | 44208 | hostIP = |
356 | |
(segmentLength >= hostIP.length()) |
357 | |
? "" |
358 | |
: hostIP.substring(segmentLength); |
359 | |
|
360 | 44208 | segmentCount++; |
361 | |
} |
362 | 61572 | } |
363 | 17364 | String topLevel = domainSegment[segmentCount - 1]; |
364 | 17364 | if (topLevel.length() < 2 || topLevel.length() > 4) { |
365 | 4749 | return false; |
366 | |
} |
367 | |
|
368 | |
|
369 | 12615 | if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) { |
370 | 3150 | return false; |
371 | |
} |
372 | |
|
373 | |
|
374 | 9465 | if (segmentCount < 2) { |
375 | 1584 | return false; |
376 | |
} |
377 | |
} |
378 | |
|
379 | 18942 | if (!hostname && !ipV4Address) { |
380 | 7911 | return false; |
381 | |
} |
382 | |
|
383 | 11031 | String port = authorityMatcher.group(PARSE_AUTHORITY_PORT); |
384 | 11031 | if (port != null && !PORT_PATTERN.matcher(port).matches()) { |
385 | 1575 | return false; |
386 | |
} |
387 | |
|
388 | 9456 | String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA); |
389 | 9456 | if (!GenericValidator.isBlankOrNull(extra)) { |
390 | 1575 | return false; |
391 | |
} |
392 | |
|
393 | 7881 | return true; |
394 | |
} |
395 | |
|
396 | |
|
397 | |
|
398 | |
|
399 | |
|
400 | |
|
401 | |
protected boolean isValidPath(String path) { |
402 | 7881 | if (path == null) { |
403 | 0 | return false; |
404 | |
} |
405 | |
|
406 | 7881 | if (!PATH_PATTERN.matcher(path).matches()) { |
407 | 0 | return false; |
408 | |
} |
409 | |
|
410 | 7881 | int slash2Count = countToken("//", path); |
411 | 7881 | if (options.isOff(ALLOW_2_SLASHES) && (slash2Count > 0)) { |
412 | 630 | return false; |
413 | |
} |
414 | |
|
415 | 7251 | int slashCount = countToken("/", path); |
416 | 7251 | int dot2Count = countToken("..", path); |
417 | 7251 | if (dot2Count > 0 && (slashCount - slash2Count - 1) <= dot2Count){ |
418 | 1890 | return false; |
419 | |
} |
420 | |
|
421 | 5361 | return true; |
422 | |
} |
423 | |
|
424 | |
|
425 | |
|
426 | |
|
427 | |
|
428 | |
|
429 | |
protected boolean isValidQuery(String query) { |
430 | 5361 | if (query == null) { |
431 | 2211 | return true; |
432 | |
} |
433 | |
|
434 | 3150 | return QUERY_PATTERN.matcher(query).matches(); |
435 | |
} |
436 | |
|
437 | |
|
438 | |
|
439 | |
|
440 | |
|
441 | |
|
442 | |
protected boolean isValidFragment(String fragment) { |
443 | 5361 | if (fragment == null) { |
444 | 4731 | return true; |
445 | |
} |
446 | |
|
447 | 630 | return options.isOff(NO_FRAGMENTS); |
448 | |
} |
449 | |
|
450 | |
|
451 | |
|
452 | |
|
453 | |
|
454 | |
|
455 | |
|
456 | |
protected int countToken(String token, String target) { |
457 | 22383 | int tokenIndex = 0; |
458 | 22383 | int count = 0; |
459 | 58631 | while (tokenIndex != -1) { |
460 | 36248 | tokenIndex = target.indexOf(token, tokenIndex); |
461 | 36248 | if (tokenIndex > -1) { |
462 | 13865 | tokenIndex++; |
463 | 13865 | count++; |
464 | |
} |
465 | |
} |
466 | 22383 | return count; |
467 | |
} |
468 | |
} |