1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.validator.routines;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertTrue;
23
24 import java.io.BufferedReader;
25 import java.io.Closeable;
26 import java.io.File;
27 import java.io.FileReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.lang.reflect.Field;
31 import java.lang.reflect.Modifier;
32 import java.net.HttpURLConnection;
33 import java.net.IDN;
34 import java.net.URL;
35 import java.nio.file.Files;
36 import java.nio.file.StandardCopyOption;
37 import java.text.SimpleDateFormat;
38 import java.util.Date;
39 import java.util.HashMap;
40 import java.util.HashSet;
41 import java.util.Locale;
42 import java.util.Map;
43 import java.util.Map.Entry;
44 import java.util.Set;
45 import java.util.TreeMap;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48
49 import org.apache.commons.lang3.SystemProperties;
50 import org.apache.commons.validator.routines.DomainValidator.ArrayType;
51 import org.junit.jupiter.api.BeforeEach;
52 import org.junit.jupiter.api.Test;
53
54
55
56
57 public class DomainValidatorTest {
58
59 private static void closeQuietly(final Closeable in) {
60 if (in != null) {
61 try {
62 in.close();
63 } catch (final IOException ignore) {
64
65 }
66 }
67 }
68
69
70
71
72
73 private static long download(final File file, final String tldUrl, final long timestamp) throws IOException {
74 final int hour = 60 * 60 * 1000;
75 final long modTime;
76
77 if (file.canRead()) {
78 modTime = file.lastModified();
79 if (modTime > System.currentTimeMillis() - hour) {
80 System.out.println("Skipping download - found recent " + file);
81 return modTime;
82 }
83 } else {
84 modTime = 0;
85 }
86 final HttpURLConnection hc = (HttpURLConnection) new URL(tldUrl).openConnection();
87 if (modTime > 0) {
88 final SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
89 final String since = sdf.format(new Date(modTime));
90 hc.addRequestProperty("If-Modified-Since", since);
91 System.out.println("Found " + file + " with date " + since);
92 }
93 if (hc.getResponseCode() == 304) {
94 System.out.println("Already have most recent " + tldUrl);
95 } else {
96 System.out.println("Downloading " + tldUrl);
97 try (InputStream is = hc.getInputStream()) {
98 Files.copy(is, file.toPath(), StandardCopyOption.REPLACE_EXISTING);
99 }
100 System.out.println("Done");
101 }
102 return file.lastModified();
103 }
104
105 private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
106 final Map<String, String[]> info = new HashMap<>();
107
108
109 final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
110
111 final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
112
113
114 final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
115
116 try (BufferedReader br = new BufferedReader(new FileReader(f))) {
117 String line;
118 while ((line = br.readLine()) != null) {
119 final Matcher m = domain.matcher(line);
120 if (m.lookingAt()) {
121 final String dom = m.group(1);
122 String typ = "??";
123 String com = "??";
124 line = br.readLine();
125 while (line.matches("^\\s*$")) {
126 line = br.readLine();
127 }
128 final Matcher t = type.matcher(line);
129 if (t.lookingAt()) {
130 typ = t.group(1);
131 line = br.readLine();
132 if (line.matches("\\s+<!--.*")) {
133 while (!line.matches(".*-->.*")) {
134 line = br.readLine();
135 }
136 line = br.readLine();
137 }
138
139 while (!line.matches(".*</td>.*")) {
140 line += " " + br.readLine();
141 }
142 final Matcher n = comment.matcher(line);
143 if (n.lookingAt()) {
144 com = n.group(1);
145 }
146
147 if (com.contains("Not assigned") || com.contains("Retired") || typ.equals("test")) {
148
149 } else {
150 info.put(dom.toLowerCase(Locale.ENGLISH), new String[] { typ, com });
151
152 }
153 } else {
154 System.err.println("Unexpected type: " + line);
155 }
156 }
157 }
158 }
159 return info;
160 }
161
162
163
164
165 private static boolean isInIanaList(final String arrayName, final Set<String> ianaTlds) throws Exception {
166 final Field f = DomainValidator.class.getDeclaredField(arrayName);
167 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
168 if (isPrivate) {
169 f.setAccessible(true);
170 }
171 final String[] array = (String[]) f.get(null);
172 try {
173 return isInIanaList(arrayName, array, ianaTlds);
174 } finally {
175 if (isPrivate) {
176 f.setAccessible(false);
177 }
178 }
179 }
180
181 private static boolean isInIanaList(final String name, final String[] array, final Set<String> ianaTlds) {
182 for (final String element : array) {
183 if (!ianaTlds.contains(element)) {
184 System.out.println(name + " contains unexpected value: " + element);
185 }
186 }
187 return true;
188 }
189
190 private static boolean isLowerCase(final String string) {
191 return string.equals(string.toLowerCase(Locale.ENGLISH));
192 }
193
194
195
196
197
198
199
200
201 private static boolean isNotInRootZone(final String domain) {
202 final String tldUrl = "http://www.iana.org/domains/root/db/" + domain + ".html";
203 final File rootCheck = new File("target", "tld_" + domain + ".html");
204 BufferedReader in = null;
205 try {
206 download(rootCheck, tldUrl, 0L);
207 in = new BufferedReader(new FileReader(rootCheck));
208 String inputLine;
209 while ((inputLine = in.readLine()) != null) {
210 if (inputLine.contains("This domain is not present in the root zone at this time.")) {
211 return true;
212 }
213 }
214 in.close();
215 } catch (final IOException ignore) {
216
217 } finally {
218 closeQuietly(in);
219 }
220 return false;
221 }
222
223 private static boolean isSortedLowerCase(final String arrayName) throws Exception {
224 final Field f = DomainValidator.class.getDeclaredField(arrayName);
225 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
226 if (isPrivate) {
227 f.setAccessible(true);
228 }
229 final String[] array = (String[]) f.get(null);
230 try {
231 return isSortedLowerCase(arrayName, array);
232 } finally {
233 if (isPrivate) {
234 f.setAccessible(false);
235 }
236 }
237 }
238
239
240 private static boolean isSortedLowerCase(final String name, final String[] array) {
241 boolean sorted = true;
242 boolean strictlySorted = true;
243 final int length = array.length;
244 boolean lowerCase = isLowerCase(array[length - 1]);
245 for (int i = 0; i < length - 1; i++) {
246 final String entry = array[i];
247 final String nextEntry = array[i + 1];
248 final int cmp = entry.compareTo(nextEntry);
249 if (cmp > 0) {
250 System.out.println("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
251 sorted = false;
252 } else if (cmp == 0) {
253 strictlySorted = false;
254 System.out.println("Duplicated entry: " + entry + " in " + name);
255 }
256 if (!isLowerCase(entry)) {
257 System.out.println("Non lowerCase entry: " + entry + " in " + name);
258 lowerCase = false;
259 }
260 }
261 return sorted && strictlySorted && lowerCase;
262 }
263
264
265
266
267 public static void main(final String a[]) throws Exception {
268
269
270 boolean ok = true;
271 for (final String list : new String[] { "INFRASTRUCTURE_TLDS", "COUNTRY_CODE_TLDS", "GENERIC_TLDS", "LOCAL_TLDS" }) {
272 ok &= isSortedLowerCase(list);
273 }
274 if (!ok) {
275 System.out.println("Fix arrays before retrying; cannot continue");
276 return;
277 }
278 final Set<String> ianaTlds = new HashSet<>();
279 final DomainValidator dv = DomainValidator.getInstance();
280 final File txtFile = new File("target/tlds-alpha-by-domain.txt");
281 final long timestamp = download(txtFile, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
282 final File htmlFile = new File("target/tlds-alpha-by-domain.html");
283
284
285 download(htmlFile, "https://www.iana.org/domains/root/db", timestamp);
286
287 final BufferedReader br = new BufferedReader(new FileReader(txtFile));
288 String line;
289 final String header;
290 line = br.readLine();
291 if (!line.startsWith("# Version ")) {
292 br.close();
293 throw new IOException("File does not have expected Version header");
294 }
295 header = line.substring(2);
296 final boolean generateUnicodeTlds = false;
297
298
299 final Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
300 final Map<String, String> missingTLD = new TreeMap<>();
301 final Map<String, String> missingCC = new TreeMap<>();
302 while ((line = br.readLine()) != null) {
303 if (!line.startsWith("#")) {
304 final String unicodeTld;
305 final String asciiTld = line.toLowerCase(Locale.ENGLISH);
306 if (line.startsWith("XN--")) {
307 unicodeTld = IDN.toUnicode(line);
308 } else {
309 unicodeTld = asciiTld;
310 }
311 if (!dv.isValidTld(asciiTld)) {
312 final String[] info = htmlInfo.get(asciiTld);
313 if (info != null) {
314 final String type = info[0];
315 final String comment = info[1];
316 if ("country-code".equals(type)) {
317 missingCC.put(asciiTld, unicodeTld + " " + comment);
318 if (generateUnicodeTlds) {
319 missingCC.put(unicodeTld, asciiTld + " " + comment);
320 }
321 } else {
322 missingTLD.put(asciiTld, unicodeTld + " " + comment);
323 if (generateUnicodeTlds) {
324 missingTLD.put(unicodeTld, asciiTld + " " + comment);
325 }
326 }
327 } else {
328 System.err.println("Expected to find HTML info for " + asciiTld);
329 }
330 }
331 ianaTlds.add(asciiTld);
332
333 if (generateUnicodeTlds && !unicodeTld.equals(asciiTld)) {
334 ianaTlds.add(unicodeTld);
335 }
336 }
337 }
338 br.close();
339
340 for (final String key : new TreeMap<>(htmlInfo).keySet()) {
341 if (!ianaTlds.contains(key)) {
342 if (isNotInRootZone(key)) {
343 System.out.println("INFO: HTML entry not yet in root zone: " + key);
344 } else {
345 System.err.println("WARN: Expected to find text entry for html: " + key);
346 }
347 }
348 }
349 if (!missingTLD.isEmpty()) {
350 printMap(header, missingTLD, "GENERIC_TLDS");
351 }
352 if (!missingCC.isEmpty()) {
353 printMap(header, missingCC, "COUNTRY_CODE_TLDS");
354 }
355
356 isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds);
357 isInIanaList("COUNTRY_CODE_TLDS", ianaTlds);
358 isInIanaList("GENERIC_TLDS", ianaTlds);
359
360 System.out.println("Finished checks");
361 }
362
363 private static void printMap(final String header, final Map<String, String> map, final String string) {
364 System.out.println("Entries missing from " + string + " List\n");
365 if (header != null) {
366 System.out.println(" // Taken from " + header);
367 }
368 for (final Entry<String, String> me : map.entrySet()) {
369 System.out.println(" \"" + me.getKey() + "\", // " + me.getValue());
370 }
371 System.out.println("\nDone");
372 }
373
374 private DomainValidator validator;
375
376 @BeforeEach
377 public void setUp() {
378 validator = DomainValidator.getInstance();
379 }
380
381
382 @Test
383 public void tesLocalTldsSortedAndLowerCase() throws Exception {
384 final boolean sorted = isSortedLowerCase("LOCAL_TLDS");
385 assertTrue(sorted);
386 }
387
388 @Test
389 public void testAllowLocal() {
390 final DomainValidator noLocal = DomainValidator.getInstance(false);
391 final DomainValidator allowLocal = DomainValidator.getInstance(true);
392
393
394 assertEquals(noLocal, validator);
395
396
397 assertFalse(noLocal.isValid("localhost.localdomain"), "localhost.localdomain should validate");
398 assertFalse(noLocal.isValid("localhost"), "localhost should validate");
399
400
401 assertTrue(allowLocal.isValid("localhost.localdomain"), "localhost.localdomain should validate");
402 assertTrue(allowLocal.isValid("localhost"), "localhost should validate");
403 assertTrue(allowLocal.isValid("hostname"), "hostname should validate");
404 assertTrue(allowLocal.isValid("machinename"), "machinename should validate");
405
406
407 assertTrue(allowLocal.isValid("apache.org"), "apache.org should validate");
408 assertFalse(allowLocal.isValid(" apache.org "), "domain name with spaces shouldn't validate");
409 }
410
411
412 @Test
413 public void testCountryCodeTldsSortedAndLowerCase() throws Exception {
414 final boolean sorted = isSortedLowerCase("COUNTRY_CODE_TLDS");
415 assertTrue(sorted);
416 }
417
418 @Test
419 public void testDomainNoDots() {
420 assertTrue(validator.isValidDomainSyntax("a"), "a (alpha) should validate");
421 assertTrue(validator.isValidDomainSyntax("9"), "9 (alphanum) should validate");
422 assertTrue(validator.isValidDomainSyntax("c-z"), "c-z (alpha - alpha) should validate");
423
424 assertFalse(validator.isValidDomainSyntax("c-"), "c- (alpha -) should fail");
425 assertFalse(validator.isValidDomainSyntax("-c"), "-c (- alpha) should fail");
426 assertFalse(validator.isValidDomainSyntax("-"), "- (-) should fail");
427 }
428
429 @Test
430 public void testEnumIsPublic() {
431 assertTrue(Modifier.isPublic(DomainValidator.ArrayType.class.getModifiers()));
432 }
433
434
435 @Test
436 public void testGenericTldsSortedAndLowerCase() throws Exception {
437 final boolean sorted = isSortedLowerCase("GENERIC_TLDS");
438 assertTrue(sorted);
439 }
440
441 @Test
442 public void testGetArray() {
443 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_MINUS));
444 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_PLUS));
445 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_MINUS));
446 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_PLUS));
447 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_MINUS));
448 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_PLUS));
449 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_RO));
450 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_RO));
451 assertNotNull(DomainValidator.getTLDEntries(ArrayType.INFRASTRUCTURE_RO));
452 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_RO));
453 }
454
455 @Test
456 public void testIDN() {
457 assertTrue(validator.isValid("www.xn--bcher-kva.ch"), "b\u00fccher.ch in IDN should validate");
458 }
459
460 @Test
461 public void testIDNJava6OrLater() {
462 final String version = SystemProperties.getJavaVersion();
463 if (version.compareTo("1.6") < 0) {
464 System.out.println("Cannot run Unicode IDN tests");
465 return;
466 }
467 assertTrue(validator.isValid("www.b\u00fccher.ch"), "b\u00fccher.ch should validate");
468 assertTrue(validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"), "xn--d1abbgf6aiiy.xn--p1ai should validate");
469 assertTrue(validator.isValid("президент.рф"), "президент.рф should validate");
470 assertFalse(validator.isValid("www.\uFFFD.ch"), "www.\uFFFD.ch FFFD should fail");
471 }
472
473
474 @Test
475 public void testInfrastructureTldsSortedAndLowerCase() throws Exception {
476 final boolean sorted = isSortedLowerCase("INFRASTRUCTURE_TLDS");
477 assertTrue(sorted);
478 }
479
480 @Test
481 public void testInvalidDomains() {
482 assertFalse(validator.isValid(".org"), "bare TLD .org shouldn't validate");
483 assertFalse(validator.isValid(" apache.org "), "domain name with spaces shouldn't validate");
484 assertFalse(validator.isValid("apa che.org"), "domain name containing spaces shouldn't validate");
485 assertFalse(validator.isValid("-testdomain.name"), "domain name starting with dash shouldn't validate");
486 assertFalse(validator.isValid("testdomain-.name"), "domain name ending with dash shouldn't validate");
487 assertFalse(validator.isValid("---c.com"), "domain name starting with multiple dashes shouldn't validate");
488 assertFalse(validator.isValid("c--.com"), "domain name ending with multiple dashes shouldn't validate");
489 assertFalse(validator.isValid("apache.rog"), "domain name with invalid TLD shouldn't validate");
490
491 assertFalse(validator.isValid("http://www.apache.org"), "URL shouldn't validate");
492 assertFalse(validator.isValid(" "), "Empty string shouldn't validate as domain name");
493 assertFalse(validator.isValid(null), "Null shouldn't validate as domain name");
494 }
495
496
497 @Test
498 public void testIsIDNtoASCIIBroken() {
499 System.out.println(">>DomainValidatorTest.testIsIDNtoASCIIBroken()");
500 final String input = ".";
501 final boolean ok = input.equals(IDN.toASCII(input));
502 System.out.println("IDN.toASCII is " + (ok ? "OK" : "BROKEN"));
503 final String[] props = { "java.version",
504 "java.vendor",
505 "java.vm.specification.version",
506 "java.vm.specification.vendor",
507 "java.vm.specification.name",
508 "java.vm.version",
509 "java.vm.vendor",
510 "java.vm.name",
511 "java.specification.version",
512 "java.specification.vendor",
513 "java.specification.name",
514 "java.class.version",
515 };
516 for (final String t : props) {
517 System.out.println(t + "=" + System.getProperty(t));
518 }
519 System.out.println("<<DomainValidatorTest.testIsIDNtoASCIIBroken()");
520 assertTrue(true);
521 }
522
523
524 @Test
525 public void testRFC2396domainlabel() {
526 assertTrue(validator.isValid("a.ch"), "a.ch should validate");
527 assertTrue(validator.isValid("9.ch"), "9.ch should validate");
528 assertTrue(validator.isValid("az.ch"), "az.ch should validate");
529 assertTrue(validator.isValid("09.ch"), "09.ch should validate");
530 assertTrue(validator.isValid("9-1.ch"), "9-1.ch should validate");
531 assertFalse(validator.isValid("91-.ch"), "91-.ch should not validate");
532 assertFalse(validator.isValid("-.ch"), "-.ch should not validate");
533 }
534
535
536 @Test
537 public void testRFC2396toplabel() {
538
539 assertTrue(validator.isValidDomainSyntax("a.c"), "a.c (alpha) should validate");
540 assertTrue(validator.isValidDomainSyntax("a.cc"), "a.cc (alpha alpha) should validate");
541 assertTrue(validator.isValidDomainSyntax("a.c9"), "a.c9 (alpha alphanum) should validate");
542 assertTrue(validator.isValidDomainSyntax("a.c-9"), "a.c-9 (alpha - alphanum) should validate");
543 assertTrue(validator.isValidDomainSyntax("a.c-z"), "a.c-z (alpha - alpha) should validate");
544
545 assertFalse(validator.isValidDomainSyntax("a.9c"), "a.9c (alphanum alpha) should fail");
546 assertFalse(validator.isValidDomainSyntax("a.c-"), "a.c- (alpha -) should fail");
547 assertFalse(validator.isValidDomainSyntax("a.-"), "a.- (-) should fail");
548 assertFalse(validator.isValidDomainSyntax("a.-9"), "a.-9 (- alphanum) should fail");
549 }
550
551 @Test
552 public void testTopLevelDomains() {
553
554 assertTrue(validator.isValidInfrastructureTld(".arpa"), ".arpa should validate as iTLD");
555 assertFalse(validator.isValidInfrastructureTld(".com"), ".com shouldn't validate as iTLD");
556
557
558 assertTrue(validator.isValidGenericTld(".name"), ".name should validate as gTLD");
559 assertFalse(validator.isValidGenericTld(".us"), ".us shouldn't validate as gTLD");
560
561
562 assertTrue(validator.isValidCountryCodeTld(".uk"), ".uk should validate as ccTLD");
563 assertFalse(validator.isValidCountryCodeTld(".org"), ".org shouldn't validate as ccTLD");
564
565
566 assertTrue(validator.isValidTld(".COM"), ".COM should validate as TLD");
567 assertTrue(validator.isValidTld(".BiZ"), ".BiZ should validate as TLD");
568
569
570 assertFalse(validator.isValid(".nope"), "invalid TLD shouldn't validate");
571 assertFalse(validator.isValid(""), "empty string shouldn't validate as TLD");
572 assertFalse(validator.isValid(null), "null shouldn't validate as TLD");
573 }
574
575
576
577 @Test
578 public void testUnicodeToASCII() {
579 final String[] asciidots = { "", ",", ".",
580 "a.",
581 "a.b", "a..b", "a...b", ".a", "..a", };
582 for (final String s : asciidots) {
583 assertEquals(s, DomainValidator.unicodeToASCII(s));
584 }
585
586
587
588
589
590 final String otherDots[][] = { { "b\u3002", "b.", }, { "b\uFF0E", "b.", }, { "b\uFF61", "b.", }, { "\u3002", ".", }, { "\uFF0E", ".", },
591 { "\uFF61", ".", }, };
592 for (final String s[] : otherDots) {
593 assertEquals(s[1], DomainValidator.unicodeToASCII(s[0]));
594 }
595 }
596
597 @Test
598 public void testValidator297() {
599 assertTrue(validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"), "xn--d1abbgf6aiiy.xn--p1ai should validate");
600 }
601
602
603 @Test
604 public void testValidator306() {
605 final String longString = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789A";
606 assertEquals(63, longString.length());
607
608 assertTrue(validator.isValidDomainSyntax(longString + ".com"), "63 chars label should validate");
609 assertFalse(validator.isValidDomainSyntax(longString + "x.com"), "64 chars label should fail");
610
611 assertTrue(validator.isValidDomainSyntax("test." + longString), "63 chars TLD should validate");
612 assertFalse(validator.isValidDomainSyntax("test.x" + longString), "64 chars TLD should fail");
613
614 final String longDomain = longString + "." + longString + "." + longString + "." + longString.substring(0, 61);
615 assertEquals(253, longDomain.length());
616 assertTrue(validator.isValidDomainSyntax(longDomain), "253 chars domain should validate");
617 assertFalse(validator.isValidDomainSyntax(longDomain + "x"), "254 chars domain should fail");
618 }
619
620 @Test
621 public void testValidDomains() {
622 assertTrue(validator.isValid("apache.org"), "apache.org should validate");
623 assertTrue(validator.isValid("www.google.com"), "www.google.com should validate");
624
625 assertTrue(validator.isValid("test-domain.com"), "test-domain.com should validate");
626 assertTrue(validator.isValid("test---domain.com"), "test---domain.com should validate");
627 assertTrue(validator.isValid("test-d-o-m-ain.com"), "test-d-o-m-ain.com should validate");
628 assertTrue(validator.isValid("as.uk"), "two-letter domain label should validate");
629
630 assertTrue(validator.isValid("ApAchE.Org"), "case-insensitive ApAchE.Org should validate");
631
632 assertTrue(validator.isValid("z.com"), "single-character domain label should validate");
633
634 assertTrue(validator.isValid("i.have.an-example.domain.name"), "i.have.an-example.domain.name should validate");
635 }
636 }