1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.validator;
18
19 import java.io.Serializable;
20 import java.util.Arrays;
21 import java.util.HashSet;
22 import java.util.Set;
23 import java.util.regex.Matcher;
24 import java.util.regex.Pattern;
25
26 import org.apache.commons.validator.routines.InetAddressValidator;
27 import org.apache.commons.validator.util.Flags;
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78 @Deprecated
79 public class UrlValidator implements Serializable {
80
81 private static final int TOP_LEVEL_MAX_LEN = 4;
82
83 private static final int TOP_LEVEL_MIN_LEN = 2;
84
85 private static final long serialVersionUID = 24137157400029593L;
86
87
88
89
90
91 public static final int ALLOW_ALL_SCHEMES = 1 << 0;
92
93
94
95
96 public static final int ALLOW_2_SLASHES = 1 << 1;
97
98
99
100
101 public static final int NO_FRAGMENTS = 1 << 2;
102
103 private static final String ALPHA_CHARS = "a-zA-Z";
104
105
106
107 private static final String SPECIAL_CHARS = ";/@&=,.?:+$";
108
109 private static final String VALID_CHARS = "[^\\s" + SPECIAL_CHARS + "]";
110
111
112 private static final String AUTHORITY_CHARS_REGEX = "\\p{Alnum}\\-\\.";
113
114 private static final String ATOM = VALID_CHARS + '+';
115
116
117
118
119 private static final String URL_REGEX =
120 "^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?";
121
122 private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX);
123
124
125
126
127 private static final int PARSE_URL_SCHEME = 2;
128
129
130
131
132 private static final int PARSE_URL_AUTHORITY = 4;
133
134 private static final int PARSE_URL_PATH = 5;
135
136 private static final int PARSE_URL_QUERY = 7;
137
138 private static final int PARSE_URL_FRAGMENT = 9;
139
140
141
142
143 private static final Pattern SCHEME_PATTERN = Pattern.compile("^\\p{Alpha}[\\p{Alnum}\\+\\-\\.]*");
144
145 private static final String AUTHORITY_REGEX =
146 "^([" + AUTHORITY_CHARS_REGEX + "]*)(:\\d*)?(.*)?";
147
148 private static final Pattern AUTHORITY_PATTERN = Pattern.compile(AUTHORITY_REGEX);
149
150 private static final int PARSE_AUTHORITY_HOST_IP = 1;
151
152 private static final int PARSE_AUTHORITY_PORT = 2;
153
154
155
156
157 private static final int PARSE_AUTHORITY_EXTRA = 3;
158
159 private static final Pattern PATH_PATTERN = Pattern.compile("^(/[-\\w:@&?=+,.!/~*'%$_;]*)?$");
160
161 private static final Pattern QUERY_PATTERN = Pattern.compile("^(.*)$");
162
163 private static final Pattern LEGAL_ASCII_PATTERN = Pattern.compile("^\\p{ASCII}+$");
164
165 private static final Pattern DOMAIN_PATTERN =
166 Pattern.compile("^" + ATOM + "(\\." + ATOM + ")*$");
167
168 private static final Pattern PORT_PATTERN = Pattern.compile("^:(\\d{1,5})$");
169
170 private static final Pattern ATOM_PATTERN = Pattern.compile("^(" + ATOM + ").*?$");
171
172 private static final Pattern ALPHA_PATTERN = Pattern.compile("^[" + ALPHA_CHARS + "]");
173
174
175
176
177 private final Flags options;
178
179
180
181
182 private final Set<String> allowedSchemes = new HashSet<>();
183
184
185
186
187 protected String[] defaultSchemes = {"http", "https", "ftp"};
188
189
190
191
192 public UrlValidator() {
193 this(null);
194 }
195
196
197
198
199
200
201
202 public UrlValidator(final int options) {
203 this(null, options);
204 }
205
206
207
208
209
210
211
212
213
214 public UrlValidator(final String[] schemes) {
215 this(schemes, 0);
216 }
217
218
219
220
221
222
223
224
225 public UrlValidator(String[] schemes, final int options) {
226 this.options = new Flags(options);
227
228 if (this.options.isOn(ALLOW_ALL_SCHEMES)) {
229 return;
230 }
231
232 if (schemes == null) {
233 schemes = defaultSchemes;
234 }
235
236 allowedSchemes.addAll(Arrays.asList(schemes));
237 }
238
239
240
241
242
243
244
245 protected int countToken(final String token, final String target) {
246 int tokenIndex = 0;
247 int count = 0;
248 while (tokenIndex != -1) {
249 tokenIndex = target.indexOf(token, tokenIndex);
250 if (tokenIndex > -1) {
251 tokenIndex++;
252 count++;
253 }
254 }
255 return count;
256 }
257
258
259
260
261
262
263
264
265 public boolean isValid(final String value) {
266 if (value == null || !LEGAL_ASCII_PATTERN.matcher(value).matches()) {
267 return false;
268 }
269
270
271 final Matcher urlMatcher = URL_PATTERN.matcher(value);
272 if (!urlMatcher.matches() || !isValidScheme(urlMatcher.group(PARSE_URL_SCHEME)) || !isValidAuthority(urlMatcher.group(PARSE_URL_AUTHORITY)) || !isValidPath(urlMatcher.group(PARSE_URL_PATH))) {
273 return false;
274 }
275
276 if (!isValidQuery(urlMatcher.group(PARSE_URL_QUERY))) {
277 return false;
278 }
279
280 if (!isValidFragment(urlMatcher.group(PARSE_URL_FRAGMENT))) {
281 return false;
282 }
283
284 return true;
285 }
286
287
288
289
290
291
292
293 protected boolean isValidAuthority(final String authority) {
294 if (authority == null) {
295 return false;
296 }
297
298 final InetAddressValidator inetAddressValidator =
299 InetAddressValidator.getInstance();
300
301 final Matcher authorityMatcher = AUTHORITY_PATTERN.matcher(authority);
302 if (!authorityMatcher.matches()) {
303 return false;
304 }
305
306 boolean hostname = false;
307
308 String hostIP = authorityMatcher.group(PARSE_AUTHORITY_HOST_IP);
309 final boolean ipV4Address = inetAddressValidator.isValid(hostIP);
310
311 if (!ipV4Address) {
312
313 hostname = DOMAIN_PATTERN.matcher(hostIP).matches();
314 }
315
316
317 if (hostname) {
318
319
320 final char[] chars = hostIP.toCharArray();
321 int size = 1;
322 for (final char element : chars) {
323 if (element == '.') {
324 size++;
325 }
326 }
327 final String[] domainSegment = new String[size];
328 boolean match = true;
329 int segmentCount = 0;
330 int segmentLength = 0;
331
332 while (match) {
333 final Matcher atomMatcher = ATOM_PATTERN.matcher(hostIP);
334 match = atomMatcher.matches();
335 if (match) {
336 domainSegment[segmentCount] = atomMatcher.group(1);
337 segmentLength = domainSegment[segmentCount].length() + 1;
338 hostIP =
339 segmentLength >= hostIP.length()
340 ? ""
341 : hostIP.substring(segmentLength);
342
343 segmentCount++;
344 }
345 }
346 final String topLevel = domainSegment[segmentCount - 1];
347
348
349
350
351 if (topLevel.length() < TOP_LEVEL_MIN_LEN || topLevel.length() > TOP_LEVEL_MAX_LEN || !ALPHA_PATTERN.matcher(topLevel.substring(0, 1)).matches()
352 || segmentCount < 2) {
353 return false;
354 }
355 }
356
357 if (!hostname && !ipV4Address) {
358 return false;
359 }
360
361 final String port = authorityMatcher.group(PARSE_AUTHORITY_PORT);
362 if (port != null && !PORT_PATTERN.matcher(port).matches()) {
363 return false;
364 }
365
366 final String extra = authorityMatcher.group(PARSE_AUTHORITY_EXTRA);
367 if (!GenericValidator.isBlankOrNull(extra)) {
368 return false;
369 }
370
371 return true;
372 }
373
374
375
376
377
378
379 protected boolean isValidFragment(final String fragment) {
380 if (fragment == null) {
381 return true;
382 }
383
384 return options.isOff(NO_FRAGMENTS);
385 }
386
387
388
389
390
391
392 protected boolean isValidPath(final String path) {
393 if (path == null || !PATH_PATTERN.matcher(path).matches()) {
394 return false;
395 }
396
397 final int slash2Count = countToken("//", path);
398 if (options.isOff(ALLOW_2_SLASHES) && slash2Count > 0) {
399 return false;
400 }
401
402 final int slashCount = countToken("/", path);
403 final int dot2Count = countToken("..", path);
404 if (dot2Count > 0 && slashCount - slash2Count - 1 <= dot2Count) {
405 return false;
406 }
407
408 return true;
409 }
410
411
412
413
414
415
416 protected boolean isValidQuery(final String query) {
417 if (query == null) {
418 return true;
419 }
420
421 return QUERY_PATTERN.matcher(query).matches();
422 }
423
424
425
426
427
428
429
430
431
432 protected boolean isValidScheme(final String scheme) {
433 if (scheme == null || !SCHEME_PATTERN.matcher(scheme).matches() || options.isOff(ALLOW_ALL_SCHEMES) && !allowedSchemes.contains(scheme)) {
434 return false;
435 }
436
437 return true;
438 }
439 }