1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.validator;
18
19 import java.io.Serializable;
20 import java.util.Arrays;
21 import java.util.HashSet;
22 import java.util.Set;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 import org.apache.commons.validator.routines.InetAddressValidator;
27 import org.apache.commons.validator.util.Flags;
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 @Deprecated
79 public class UrlValidator implements Serializable {
80
81 private static final long serialVersionUID = 24137157400029593L;
82
83
84
85
86
87 public static final int ALLOW_ALL_SCHEMES = 1 << 0;
88
89
90
91
92 public static final int ALLOW_2_SLASHES = 1 << 1;
93
94
95
96
97 public static final int NO_FRAGMENTS = 1 << 2;
98
99 private static final String ALPHA_CHARS = "a-zA-Z";
100
101
102
103 private static final String SPECIAL_CHARS = ";/@&=,.?:+$";
104
105 private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
106
107
108 private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
109
110 private static final String ATOM = VALID_CHARS + '+';
111
112
113
114
115 private static final String URL_REGEX =
116 "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
117
118 private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
119
120
121
122
123 private static final int PARSE_URL_SCHEME = 2;
124
125
126
127
128 private static final int PARSE_URL_AUTHORITY = 4;
129
130 private static final int PARSE_URL_PATH = 5;
131
132 private static final int PARSE_URL_QUERY = 7;
133
134 private static final int PARSE_URL_FRAGMENT = 9;
135
136
137
138
139 private static final Pattern SCHEME_PATTERN = Pattern.compile("^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*");
140
141 private static final String AUTHORITY_REGEX =
142 "^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?";
143
144 private static final Pattern AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEX);
145
146 private static final int PARSE_AUTHORITY_HOST_IP = 1;
147
148 private static final int PARSE_AUTHORITY_PORT = 2;
149
150
151
152
153 private static final int PARSE_AUTHORITY_EXTRA = 3;
154
155 private static final Pattern PATH_PATTERN = Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$");
156
157 private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
158
159 private static final Pattern LEGAL_ASCII_PATTERN = Pattern.compile("^\\p{ASCII}+$");
160
161 private static final Pattern DOMAIN_PATTERN =
162 Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
163
164 private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$");
165
166 private static final Pattern ATOM_PATTERN = Pattern.compile("^(" + ATOM + ").*?$");
167
168 private static final Pattern ALPHA_PATTERN = Pattern.compile("^[" + ALPHA_CHARS + "]");
169
170
171
172
173 private final Flags options;
174
175
176
177
178 private final Set<String> allowedSchemes = new HashSet<>();
179
180
181
182
183 protected String[] defaultSchemes = {"http", "https", "ftp"};
184
185
186
187
188 public UrlValidator() {
189 this(null);
190 }
191
192
193
194
195
196
197
198 public UrlValidator(final int options) {
199 this(null, options);
200 }
201
202
203
204
205
206
207
208
209
210 public UrlValidator(final String[] schemes) {
211 this(schemes, 0);
212 }
213
214
215
216
217
218
219
220
221 public UrlValidator(String[] schemes, final int options) {
222 this.options = new Flags(options);
223
224 if (this.options.isOn(ALLOW_ALL_SCHEMES)) {
225 return;
226 }
227
228 if (schemes == null) {
229 schemes = this.defaultSchemes;
230 }
231
232 this.allowedSchemes.addAll(Arrays.asList(schemes));
233 }
234
235
236
237
238
239
240
241 protected int countToken(final String token, final String target) {
242 int tokenIndex = 0;
243 int count = 0;
244 while (tokenIndex != -1) {
245 tokenIndex = target.indexOf(token, tokenIndex);
246 if (tokenIndex > -1) {
247 tokenIndex++;
248 count++;
249 }
250 }
251 return count;
252 }
253
254
255
256
257
258
259
260
261 public boolean isValid(final String value) {
262 if (value == null) {
263 return false;
264 }
265 if (!LEGAL_ASCII_PATTERN.matcher(value).matches()) {
266 return false;
267 }
268
269
270 final Matcher urlMatcher = URL_PATTERN.matcher(value);
271 if (!urlMatcher.matches()) {
272 return false;
273 }
274
275 if (!isValidScheme(urlMatcher.group(PARSE_URL_SCHEME))) {
276 return false;
277 }
278
279 if (!isValidAuthority(urlMatcher.group(PARSE_URL_AUTHORITY))) {
280 return false;
281 }
282
283 if (!isValidPath(urlMatcher.group(PARSE_URL_PATH))) {
284 return false;
285 }
286
287 if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) {
288 return false;
289 }
290
291 if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) {
292 return false;
293 }
294
295 return true;
296 }
297
298
299
300
301
302
303
304 protected boolean isValidAuthority(final String authority) {
305 if (authority == null) {
306 return false;
307 }
308
309 final InetAddressValidator inetAddressValidator =
310 InetAddressValidator.getInstance();
311
312 final Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
313 if (!authorityMatcher.matches()) {
314 return false;
315 }
316
317 boolean hostname = false;
318
319 String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
320 final boolean ipV4Address = inetAddressValidator.isValid(hostIP);
321
322 if (!ipV4Address) {
323
324 hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
325 }
326
327
328 if (hostname) {
329
330
331 final char[] chars = hostIP.toCharArray();
332 int size = 1;
333 for (final char element : chars) {
334 if (element == '.') {
335 size++;
336 }
337 }
338 final String[] domainSegment = new String[size];
339 boolean match = true;
340 int segmentCount = 0;
341 int segmentLength = 0;
342
343 while (match) {
344 final Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);
345 match = atomMatcher.matches();
346 if (match) {
347 domainSegment[segmentCount] = atomMatcher.group(1);
348 segmentLength = domainSegment[segmentCount].length() + 1;
349 hostIP =
350 segmentLength >= hostIP.length()
351 ? ""
352 : hostIP.substring(segmentLength);
353
354 segmentCount++;
355 }
356 }
357 final String topLevel = domainSegment[segmentCount - 1];
358 if (topLevel.length() < 2 || topLevel.length() > 4) {
359 return false;
360 }
361
362
363 if (!ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()) {
364 return false;
365 }
366
367
368 if (segmentCount < 2) {
369 return false;
370 }
371 }
372
373 if (!hostname && !ipV4Address) {
374 return false;
375 }
376
377 final String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
378 if (port != null && !PORT_PATTERN.matcher(port).matches()) {
379 return false;
380 }
381
382 final String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
383 if (!GenericValidator.isBlankOrNull(extra)) {
384 return false;
385 }
386
387 return true;
388 }
389
390
391
392
393
394
395 protected boolean isValidFragment(final String fragment) {
396 if (fragment == null) {
397 return true;
398 }
399
400 return options.isOff(NO_FRAGMENTS);
401 }
402
403
404
405
406
407
408 protected boolean isValidPath(final String path) {
409 if (path == null) {
410 return false;
411 }
412
413 if (!PATH_PATTERN.matcher(path).matches()) {
414 return false;
415 }
416
417 final int slash2Count = countToken("//", path);
418 if (options.isOff(ALLOW_2_SLASHES) && slash2Count > 0) {
419 return false;
420 }
421
422 final int slashCount = countToken("/", path);
423 final int dot2Count = countToken("..", path);
424 if (dot2Count > 0 && slashCount - slash2Count - 1 <= dot2Count) {
425 return false;
426 }
427
428 return true;
429 }
430
431
432
433
434
435
436 protected boolean isValidQuery(final String query) {
437 if (query == null) {
438 return true;
439 }
440
441 return QUERY_PATTERN.matcher(query).matches();
442 }
443
444
445
446
447
448
449
450
451
452 protected boolean isValidScheme(final String scheme) {
453 if (scheme == null) {
454 return false;
455 }
456
457 if (!SCHEME_PATTERN.matcher(scheme).matches()) {
458 return false;
459 }
460
461 if (options.isOff(ALLOW_ALL_SCHEMES) && !allowedSchemes.contains(scheme)) {
462 return false;
463 }
464
465 return true;
466 }
467 }