1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.validator.routines;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertTrue;
23
24 import java.io.BufferedReader;
25 import java.io.Closeable;
26 import java.io.File;
27 import java.io.FileReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.lang.reflect.Field;
31 import java.lang.reflect.Modifier;
32 import java.net.HttpURLConnection;
33 import java.net.IDN;
34 import java.net.URL;
35 import java.nio.file.Files;
36 import java.nio.file.StandardCopyOption;
37 import java.text.SimpleDateFormat;
38 import java.util.Date;
39 import java.util.HashMap;
40 import java.util.HashSet;
41 import java.util.Locale;
42 import java.util.Map;
43 import java.util.Map.Entry;
44 import java.util.Set;
45 import java.util.TreeMap;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48
49 import org.apache.commons.validator.routines.DomainValidator.ArrayType;
50 import org.junit.jupiter.api.BeforeEach;
51 import org.junit.jupiter.api.Test;
52
53
54
55
56 public class DomainValidatorTest {
57
58 private static void closeQuietly(final Closeable in) {
59 if (in != null) {
60 try {
61 in.close();
62 } catch (final IOException ignore) {
63
64 }
65 }
66 }
67
68
69
70
71
72 private static long download(final File file, final String tldUrl, final long timestamp) throws IOException {
73 final int hour = 60 * 60 * 1000;
74 final long modTime;
75
76 if (file.canRead()) {
77 modTime = file.lastModified();
78 if (modTime > System.currentTimeMillis() - hour) {
79 System.out.println("Skipping download - found recent " + file);
80 return modTime;
81 }
82 } else {
83 modTime = 0;
84 }
85 final HttpURLConnection hc = (HttpURLConnection) new URL(tldUrl).openConnection();
86 if (modTime > 0) {
87 final SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
88 final String since = sdf.format(new Date(modTime));
89 hc.addRequestProperty("If-Modified-Since", since);
90 System.out.println("Found " + file + " with date " + since);
91 }
92 if (hc.getResponseCode() == 304) {
93 System.out.println("Already have most recent " + tldUrl);
94 } else {
95 System.out.println("Downloading " + tldUrl);
96 try (InputStream is = hc.getInputStream()) {
97 Files.copy(is, file.toPath(), StandardCopyOption.REPLACE_EXISTING);
98 }
99 System.out.println("Done");
100 }
101 return file.lastModified();
102 }
103
104 private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
105 final Map<String, String[]> info = new HashMap<>();
106
107
108 final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
109
110 final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
111
112
113 final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
114
115 try (BufferedReader br = new BufferedReader(new FileReader(f))) {
116 String line;
117 while ((line = br.readLine()) != null) {
118 final Matcher m = domain.matcher(line);
119 if (m.lookingAt()) {
120 final String dom = m.group(1);
121 String typ = "??";
122 String com = "??";
123 line = br.readLine();
124 while (line.matches("^\\s*$")) {
125 line = br.readLine();
126 }
127 final Matcher t = type.matcher(line);
128 if (t.lookingAt()) {
129 typ = t.group(1);
130 line = br.readLine();
131 if (line.matches("\\s+<!--.*")) {
132 while (!line.matches(".*-->.*")) {
133 line = br.readLine();
134 }
135 line = br.readLine();
136 }
137
138 while (!line.matches(".*</td>.*")) {
139 line += " " + br.readLine();
140 }
141 final Matcher n = comment.matcher(line);
142 if (n.lookingAt()) {
143 com = n.group(1);
144 }
145
146 if (com.contains("Not assigned") || com.contains("Retired") || typ.equals("test")) {
147
148 } else {
149 info.put(dom.toLowerCase(Locale.ENGLISH), new String[] { typ, com });
150
151 }
152 } else {
153 System.err.println("Unexpected type: " + line);
154 }
155 }
156 }
157 }
158 return info;
159 }
160
161
162
163
164 private static boolean isInIanaList(final String arrayName, final Set<String> ianaTlds) throws Exception {
165 final Field f = DomainValidator.class.getDeclaredField(arrayName);
166 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
167 if (isPrivate) {
168 f.setAccessible(true);
169 }
170 final String[] array = (String[]) f.get(null);
171 try {
172 return isInIanaList(arrayName, array, ianaTlds);
173 } finally {
174 if (isPrivate) {
175 f.setAccessible(false);
176 }
177 }
178 }
179
180 private static boolean isInIanaList(final String name, final String[] array, final Set<String> ianaTlds) {
181 for (final String element : array) {
182 if (!ianaTlds.contains(element)) {
183 System.out.println(name + " contains unexpected value: " + element);
184 return false;
185 }
186 }
187 return true;
188 }
189
190 private static boolean isLowerCase(final String string) {
191 return string.equals(string.toLowerCase(Locale.ENGLISH));
192 }
193
194
195
196
197
198
199
200
201 private static boolean isNotInRootZone(final String domain) {
202 final String tldUrl = "https://www.iana.org/domains/root/db/" + domain + ".html";
203 final File rootCheck = new File("target", "tld_" + domain + ".html");
204 BufferedReader in = null;
205 try {
206 download(rootCheck, tldUrl, 0L);
207 in = new BufferedReader(new FileReader(rootCheck));
208 String inputLine;
209 while ((inputLine = in.readLine()) != null) {
210 if (inputLine.contains("This domain is not present in the root zone at this time.")) {
211 return true;
212 }
213 }
214 in.close();
215 } catch (final IOException ignore) {
216
217 } finally {
218 closeQuietly(in);
219 }
220 return false;
221 }
222
223 private static boolean isSortedLowerCase(final String arrayName) throws Exception {
224 final Field f = DomainValidator.class.getDeclaredField(arrayName);
225 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
226 if (isPrivate) {
227 f.setAccessible(true);
228 }
229 final String[] array = (String[]) f.get(null);
230 try {
231 return isSortedLowerCase(arrayName, array);
232 } finally {
233 if (isPrivate) {
234 f.setAccessible(false);
235 }
236 }
237 }
238
239
240 private static boolean isSortedLowerCase(final String name, final String[] array) {
241 boolean sorted = true;
242 boolean strictlySorted = true;
243 final int length = array.length;
244 boolean lowerCase = isLowerCase(array[length - 1]);
245 for (int i = 0; i < length - 1; i++) {
246 final String entry = array[i];
247 final String nextEntry = array[i + 1];
248 final int cmp = entry.compareTo(nextEntry);
249 if (cmp > 0) {
250 System.out.println("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
251 sorted = false;
252 } else if (cmp == 0) {
253 strictlySorted = false;
254 System.out.println("Duplicated entry: " + entry + " in " + name);
255 }
256 if (!isLowerCase(entry)) {
257 System.out.println("Non lowerCase entry: " + entry + " in " + name);
258 lowerCase = false;
259 }
260 }
261 return sorted && strictlySorted && lowerCase;
262 }
263
264
265
266
267
268 public static void main(final String[] a) throws Exception {
269
270
271 boolean ok = true;
272 for (final String list : new String[] { "INFRASTRUCTURE_TLDS", "COUNTRY_CODE_TLDS", "GENERIC_TLDS", "LOCAL_TLDS" }) {
273 ok &= isSortedLowerCase(list);
274 }
275 if (!ok) {
276 System.out.println("Fix arrays before retrying; cannot continue");
277 return;
278 }
279 final Set<String> ianaTlds = new HashSet<>();
280 final DomainValidator dv = DomainValidator.getInstance();
281 final File txtFile = new File("target/tlds-alpha-by-domain.txt");
282 final long timestamp = download(txtFile, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
283 final File htmlFile = new File("target/tlds-alpha-by-domain.html");
284
285
286 download(htmlFile, "https://www.iana.org/domains/root/db", timestamp);
287
288 final BufferedReader br = new BufferedReader(new FileReader(txtFile));
289 String line;
290 final String header;
291 line = br.readLine();
292 if (!line.startsWith("# Version ")) {
293 br.close();
294 throw new IOException("File does not have expected Version header");
295 }
296 header = line.substring(2);
297 final boolean generateUnicodeTlds = false;
298
299
300 final Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
301 final Map<String, String> missingTLD = new TreeMap<>();
302 final Map<String, String> missingCC = new TreeMap<>();
303 while ((line = br.readLine()) != null) {
304 if (!line.startsWith("#")) {
305 final String unicodeTld;
306 final String asciiTld = line.toLowerCase(Locale.ENGLISH);
307 if (line.startsWith("XN--")) {
308 unicodeTld = IDN.toUnicode(line);
309 } else {
310 unicodeTld = asciiTld;
311 }
312 if (!dv.isValidTld(asciiTld)) {
313 final String[] info = htmlInfo.get(asciiTld);
314 if (info != null) {
315 final String type = info[0];
316 final String comment = info[1];
317 if ("country-code".equals(type)) {
318 missingCC.put(asciiTld, unicodeTld + " " + comment);
319 if (generateUnicodeTlds) {
320 missingCC.put(unicodeTld, asciiTld + " " + comment);
321 }
322 } else {
323 missingTLD.put(asciiTld, unicodeTld + " " + comment);
324 if (generateUnicodeTlds) {
325 missingTLD.put(unicodeTld, asciiTld + " " + comment);
326 }
327 }
328 } else {
329 System.err.println("Expected to find HTML info for " + asciiTld);
330 }
331 }
332 ianaTlds.add(asciiTld);
333
334 if (generateUnicodeTlds && !unicodeTld.equals(asciiTld)) {
335 ianaTlds.add(unicodeTld);
336 }
337 }
338 }
339 br.close();
340 int errorsDetected = 0;
341
342 for (final String key : new TreeMap<>(htmlInfo).keySet()) {
343 if (!ianaTlds.contains(key)) {
344 if (isNotInRootZone(key)) {
345 System.out.println("INFO: HTML entry not yet in root zone: " + key);
346 } else {
347 errorsDetected ++;
348 System.err.println("WARN: Expected to find text entry for html: " + key);
349 }
350 }
351 }
352 if (!missingTLD.isEmpty()) {
353 errorsDetected ++;
354 printMap(header, missingTLD, "GENERIC_TLDS");
355 }
356 if (!missingCC.isEmpty()) {
357 errorsDetected ++;
358 printMap(header, missingCC, "COUNTRY_CODE_TLDS");
359 }
360
361 if (!isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds)) {
362 errorsDetected ++;
363 }
364 if (!isInIanaList("COUNTRY_CODE_TLDS", ianaTlds)) {
365 errorsDetected ++;
366 }
367 if (!isInIanaList("GENERIC_TLDS", ianaTlds)) {
368 errorsDetected ++;
369 }
370
371 System.out.println("Finished checks");
372 if (errorsDetected > 0) {
373 throw new RuntimeException("Errors detected: " + errorsDetected);
374 }
375 }
376
377 private static void printMap(final String header, final Map<String, String> map, final String string) {
378 System.out.println("Entries missing from " + string + " List\n");
379 if (header != null) {
380 System.out.println(" // Taken from " + header);
381 }
382 for (final Entry<String, String> me : map.entrySet()) {
383 System.out.println(" \"" + me.getKey() + "\", // " + me.getValue());
384 }
385 System.out.println("\nDone");
386 }
387
388 private DomainValidator validator;
389
390 @BeforeEach
391 public void setUp() {
392 validator = DomainValidator.getInstance();
393 }
394
395
396 @Test
397 public void tesLocalTldsSortedAndLowerCase() throws Exception {
398 final boolean sorted = isSortedLowerCase("LOCAL_TLDS");
399 assertTrue(sorted);
400 }
401
402 @Test
403 void testAllowLocal() {
404 final DomainValidator noLocal = DomainValidator.getInstance(false);
405 final DomainValidator allowLocal = DomainValidator.getInstance(true);
406
407
408 assertEquals(noLocal, validator);
409
410
411 assertFalse(noLocal.isValid("localhost.localdomain"), "localhost.localdomain should validate");
412 assertFalse(noLocal.isValid("localhost"), "localhost should validate");
413
414
415 assertTrue(allowLocal.isValid("localhost.localdomain"), "localhost.localdomain should validate");
416 assertTrue(allowLocal.isValid("localhost"), "localhost should validate");
417 assertTrue(allowLocal.isValid("hostname"), "hostname should validate");
418 assertTrue(allowLocal.isValid("machinename"), "machinename should validate");
419
420
421 assertTrue(allowLocal.isValid("apache.org"), "apache.org should validate");
422 assertFalse(allowLocal.isValid(" apache.org "), "domain name with spaces shouldn't validate");
423 }
424
425
426 @Test
427 void testCountryCodeTldsSortedAndLowerCase() throws Exception {
428 final boolean sorted = isSortedLowerCase("COUNTRY_CODE_TLDS");
429 assertTrue(sorted);
430 }
431
432 @Test
433 void testDomainNoDots() {
434 assertTrue(validator.isValidDomainSyntax("a"), "a (alpha) should validate");
435 assertTrue(validator.isValidDomainSyntax("9"), "9 (alphanum) should validate");
436 assertTrue(validator.isValidDomainSyntax("c-z"), "c-z (alpha - alpha) should validate");
437
438 assertFalse(validator.isValidDomainSyntax("c-"), "c- (alpha -) should fail");
439 assertFalse(validator.isValidDomainSyntax("-c"), "-c (- alpha) should fail");
440 assertFalse(validator.isValidDomainSyntax("-"), "- (-) should fail");
441 }
442
443 @Test
444 void testEnumIsPublic() {
445 assertTrue(Modifier.isPublic(DomainValidator.ArrayType.class.getModifiers()));
446 }
447
448
449 @Test
450 void testGenericTldsSortedAndLowerCase() throws Exception {
451 final boolean sorted = isSortedLowerCase("GENERIC_TLDS");
452 assertTrue(sorted);
453 }
454
455 @Test
456 void testGetArray() {
457 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_MINUS));
458 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_PLUS));
459 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_MINUS));
460 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_PLUS));
461 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_MINUS));
462 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_PLUS));
463 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_RO));
464 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_RO));
465 assertNotNull(DomainValidator.getTLDEntries(ArrayType.INFRASTRUCTURE_RO));
466 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_RO));
467 }
468
469 @Test
470 void testIDN() {
471 assertTrue(validator.isValid("www.xn--bcher-kva.ch"), "b\u00fccher.ch in IDN should validate");
472 }
473
474 @Test
475 void testIDNJava6OrLater() {
476
477 assertTrue(validator.isValid("www.b\u00fccher.ch"), "b\u00fccher.ch should validate");
478 assertTrue(validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"), "xn--d1abbgf6aiiy.xn--p1ai should validate");
479 assertTrue(validator.isValid("президент.рф"), "президент.рф should validate");
480 assertFalse(validator.isValid("www.\uFFFD.ch"), "www.\uFFFD.ch FFFD should fail");
481 }
482
483
484 @Test
485 void testInfrastructureTldsSortedAndLowerCase() throws Exception {
486 final boolean sorted = isSortedLowerCase("INFRASTRUCTURE_TLDS");
487 assertTrue(sorted);
488 }
489
490 @Test
491 void testInvalidDomains() {
492 assertFalse(validator.isValid(".org"), "bare TLD .org shouldn't validate");
493 assertFalse(validator.isValid(" apache.org "), "domain name with spaces shouldn't validate");
494 assertFalse(validator.isValid("apa che.org"), "domain name containing spaces shouldn't validate");
495 assertFalse(validator.isValid("-testdomain.name"), "domain name starting with dash shouldn't validate");
496 assertFalse(validator.isValid("testdomain-.name"), "domain name ending with dash shouldn't validate");
497 assertFalse(validator.isValid("---c.com"), "domain name starting with multiple dashes shouldn't validate");
498 assertFalse(validator.isValid("c--.com"), "domain name ending with multiple dashes shouldn't validate");
499 assertFalse(validator.isValid("apache.rog"), "domain name with invalid TLD shouldn't validate");
500
501 assertFalse(validator.isValid("http://www.apache.org"), "URL shouldn't validate");
502 assertFalse(validator.isValid(" "), "Empty string shouldn't validate as domain name");
503 assertFalse(validator.isValid(null), "Null shouldn't validate as domain name");
504 }
505
506
507 @Test
508 void testIsIDNtoASCIIBroken() {
509 final String input = ".";
510 if (!input.equals(IDN.toASCII(input))) {
511 System.out.println(">>DomainValidatorTest.testIsIDNtoASCIIBroken()");
512 System.out.println("IDN.toASCII is BROKEN");
513 final String[] props = { "java.version",
514 "java.vendor",
515 "java.vm.specification.version",
516 "java.vm.specification.vendor",
517 "java.vm.specification.name",
518 "java.vm.version",
519 "java.vm.vendor",
520 "java.vm.name",
521 "java.specification.version",
522 "java.specification.vendor",
523 "java.specification.name",
524 "java.class.version",
525 };
526 for (final String t : props) {
527 System.out.println(t + "=" + System.getProperty(t));
528 }
529 System.out.println("<<DomainValidatorTest.testIsIDNtoASCIIBroken()");
530 }
531 assertTrue(true);
532 }
533
534
535 @Test
536 void testRFC2396domainlabel() {
537 assertTrue(validator.isValid("a.ch"), "a.ch should validate");
538 assertTrue(validator.isValid("9.ch"), "9.ch should validate");
539 assertTrue(validator.isValid("az.ch"), "az.ch should validate");
540 assertTrue(validator.isValid("09.ch"), "09.ch should validate");
541 assertTrue(validator.isValid("9-1.ch"), "9-1.ch should validate");
542 assertFalse(validator.isValid("91-.ch"), "91-.ch should not validate");
543 assertFalse(validator.isValid("-.ch"), "-.ch should not validate");
544 }
545
546
547 @Test
548 void testRFC2396toplabel() {
549
550 assertTrue(validator.isValidDomainSyntax("a.c"), "a.c (alpha) should validate");
551 assertTrue(validator.isValidDomainSyntax("a.cc"), "a.cc (alpha alpha) should validate");
552 assertTrue(validator.isValidDomainSyntax("a.c9"), "a.c9 (alpha alphanum) should validate");
553 assertTrue(validator.isValidDomainSyntax("a.c-9"), "a.c-9 (alpha - alphanum) should validate");
554 assertTrue(validator.isValidDomainSyntax("a.c-z"), "a.c-z (alpha - alpha) should validate");
555
556 assertFalse(validator.isValidDomainSyntax("a.9c"), "a.9c (alphanum alpha) should fail");
557 assertFalse(validator.isValidDomainSyntax("a.c-"), "a.c- (alpha -) should fail");
558 assertFalse(validator.isValidDomainSyntax("a.-"), "a.- (-) should fail");
559 assertFalse(validator.isValidDomainSyntax("a.-9"), "a.-9 (- alphanum) should fail");
560 }
561
562 @Test
563 void testTopLevelDomains() {
564
565 assertTrue(validator.isValidInfrastructureTld(".arpa"), ".arpa should validate as iTLD");
566 assertFalse(validator.isValidInfrastructureTld(".com"), ".com shouldn't validate as iTLD");
567
568
569 assertTrue(validator.isValidGenericTld(".name"), ".name should validate as gTLD");
570 assertFalse(validator.isValidGenericTld(".us"), ".us shouldn't validate as gTLD");
571
572
573 assertTrue(validator.isValidCountryCodeTld(".uk"), ".uk should validate as ccTLD");
574 assertFalse(validator.isValidCountryCodeTld(".org"), ".org shouldn't validate as ccTLD");
575
576
577 assertTrue(validator.isValidTld(".COM"), ".COM should validate as TLD");
578 assertTrue(validator.isValidTld(".BiZ"), ".BiZ should validate as TLD");
579
580
581 assertFalse(validator.isValid(".nope"), "invalid TLD shouldn't validate");
582 assertFalse(validator.isValid(""), "empty string shouldn't validate as TLD");
583 assertFalse(validator.isValid(null), "null shouldn't validate as TLD");
584 }
585
586
587
588 @Test
589 void testUnicodeToASCII() {
590 final String[] asciidots = { "", ",", ".",
591 "a.",
592 "a.b", "a..b", "a...b", ".a", "..a", };
593 for (final String s : asciidots) {
594 assertEquals(s, DomainValidator.unicodeToASCII(s));
595 }
596
597
598
599
600
601 final String[][] otherDots = { { "b\u3002", "b.", }, { "b\uFF0E", "b.", }, { "b\uFF61", "b.", }, { "\u3002", ".", }, { "\uFF0E", ".", },
602 { "\uFF61", ".", }, };
603 for (final String[] s : otherDots) {
604 assertEquals(s[1], DomainValidator.unicodeToASCII(s[0]));
605 }
606 }
607
608 @Test
609 void testValidator297() {
610 assertTrue(validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"), "xn--d1abbgf6aiiy.xn--p1ai should validate");
611 }
612
613
614 @Test
615 void testValidator306() {
616 final String longString = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789A";
617 assertEquals(63, longString.length());
618
619 assertTrue(validator.isValidDomainSyntax(longString + ".com"), "63 chars label should validate");
620 assertFalse(validator.isValidDomainSyntax(longString + "x.com"), "64 chars label should fail");
621
622 assertTrue(validator.isValidDomainSyntax("test." + longString), "63 chars TLD should validate");
623 assertFalse(validator.isValidDomainSyntax("test.x" + longString), "64 chars TLD should fail");
624
625 final String longDomain = longString + "." + longString + "." + longString + "." + longString.substring(0, 61);
626 assertEquals(253, longDomain.length());
627 assertTrue(validator.isValidDomainSyntax(longDomain), "253 chars domain should validate");
628 assertFalse(validator.isValidDomainSyntax(longDomain + "x"), "254 chars domain should fail");
629 }
630
631 @Test
632 void testValidDomains() {
633 assertTrue(validator.isValid("apache.org"), "apache.org should validate");
634 assertTrue(validator.isValid("www.google.com"), "www.google.com should validate");
635
636 assertTrue(validator.isValid("test-domain.com"), "test-domain.com should validate");
637 assertTrue(validator.isValid("test---domain.com"), "test---domain.com should validate");
638 assertTrue(validator.isValid("test-d-o-m-ain.com"), "test-d-o-m-ain.com should validate");
639 assertTrue(validator.isValid("as.uk"), "two-letter domain label should validate");
640
641 assertTrue(validator.isValid("ApAchE.Org"), "case-insensitive ApAchE.Org should validate");
642
643 assertTrue(validator.isValid("z.com"), "single-character domain label should validate");
644
645 assertTrue(validator.isValid("i.have.an-example.domain.name"), "i.have.an-example.domain.name should validate");
646 }
647 }