1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.validator.routines;
18
19 import static org.junit.jupiter.api.Assertions.assertEquals;
20 import static org.junit.jupiter.api.Assertions.assertFalse;
21 import static org.junit.jupiter.api.Assertions.assertNotNull;
22 import static org.junit.jupiter.api.Assertions.assertTrue;
23
24 import java.io.BufferedReader;
25 import java.io.Closeable;
26 import java.io.File;
27 import java.io.FileReader;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.lang.reflect.Field;
31 import java.lang.reflect.Modifier;
32 import java.net.HttpURLConnection;
33 import java.net.IDN;
34 import java.net.URL;
35 import java.nio.file.Files;
36 import java.nio.file.StandardCopyOption;
37 import java.text.SimpleDateFormat;
38 import java.util.Date;
39 import java.util.HashMap;
40 import java.util.HashSet;
41 import java.util.Locale;
42 import java.util.Map;
43 import java.util.Map.Entry;
44 import java.util.Set;
45 import java.util.TreeMap;
46 import java.util.regex.Matcher;
47 import java.util.regex.Pattern;
48
49 import org.apache.commons.validator.routines.DomainValidator.ArrayType;
50 import org.junit.jupiter.api.BeforeEach;
51 import org.junit.jupiter.api.Test;
52
53
54
55
56 public class DomainValidatorTest {
57
58 private static void closeQuietly(final Closeable in) {
59 if (in != null) {
60 try {
61 in.close();
62 } catch (final IOException e) {
63 }
64 }
65 }
66
67
68
69
70
71 private static long download(final File file, final String tldUrl, final long timestamp) throws IOException {
72 final int HOUR = 60 * 60 * 1000;
73 final long modTime;
74
75 if (file.canRead()) {
76 modTime = file.lastModified();
77 if (modTime > System.currentTimeMillis() - HOUR) {
78 System.out.println("Skipping download - found recent " + file);
79 return modTime;
80 }
81 } else {
82 modTime = 0;
83 }
84 final HttpURLConnection hc = (HttpURLConnection) new URL(tldUrl).openConnection();
85 if (modTime > 0) {
86 final SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
87 final String since = sdf.format(new Date(modTime));
88 hc.addRequestProperty("If-Modified-Since", since);
89 System.out.println("Found " + file + " with date " + since);
90 }
91 if (hc.getResponseCode() == 304) {
92 System.out.println("Already have most recent " + tldUrl);
93 } else {
94 System.out.println("Downloading " + tldUrl);
95 try (InputStream is = hc.getInputStream()) {
96 Files.copy(is, file.toPath(), StandardCopyOption.REPLACE_EXISTING);
97 }
98 System.out.println("Done");
99 }
100 return file.lastModified();
101 }
102
103 private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
104 final Map<String, String[]> info = new HashMap<>();
105
106
107 final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
108
109 final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
110
111
112 final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
113
114 try (BufferedReader br = new BufferedReader(new FileReader(f))) {
115 String line;
116 while ((line = br.readLine()) != null) {
117 final Matcher m = domain.matcher(line);
118 if (m.lookingAt()) {
119 final String dom = m.group(1);
120 String typ = "??";
121 String com = "??";
122 line = br.readLine();
123 while (line.matches("^\\s*$")) {
124 line = br.readLine();
125 }
126 final Matcher t = type.matcher(line);
127 if (t.lookingAt()) {
128 typ = t.group(1);
129 line = br.readLine();
130 if (line.matches("\\s+<!--.*")) {
131 while (!line.matches(".*-->.*")) {
132 line = br.readLine();
133 }
134 line = br.readLine();
135 }
136
137 while (!line.matches(".*</td>.*")) {
138 line += " " + br.readLine();
139 }
140 final Matcher n = comment.matcher(line);
141 if (n.lookingAt()) {
142 com = n.group(1);
143 }
144
145 if (com.contains("Not assigned") || com.contains("Retired") || typ.equals("test")) {
146
147 } else {
148 info.put(dom.toLowerCase(Locale.ENGLISH), new String[] { typ, com });
149
150 }
151 } else {
152 System.err.println("Unexpected type: " + line);
153 }
154 }
155 }
156 }
157 return info;
158 }
159
160
161
162
163 private static boolean isInIanaList(final String arrayName, final Set<String> ianaTlds) throws Exception {
164 final Field f = DomainValidator.class.getDeclaredField(arrayName);
165 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
166 if (isPrivate) {
167 f.setAccessible(true);
168 }
169 final String[] array = (String[]) f.get(null);
170 try {
171 return isInIanaList(arrayName, array, ianaTlds);
172 } finally {
173 if (isPrivate) {
174 f.setAccessible(false);
175 }
176 }
177 }
178
179 private static boolean isInIanaList(final String name, final String[] array, final Set<String> ianaTlds) {
180 for (final String element : array) {
181 if (!ianaTlds.contains(element)) {
182 System.out.println(name + " contains unexpected value: " + element);
183 }
184 }
185 return true;
186 }
187
188 private static boolean isLowerCase(final String string) {
189 return string.equals(string.toLowerCase(Locale.ENGLISH));
190 }
191
192
193
194
195
196
197
198
199 private static boolean isNotInRootZone(final String domain) {
200 final String tldUrl = "http://www.iana.org/domains/root/db/" + domain + ".html";
201 final File rootCheck = new File("target", "tld_" + domain + ".html");
202 BufferedReader in = null;
203 try {
204 download(rootCheck, tldUrl, 0L);
205 in = new BufferedReader(new FileReader(rootCheck));
206 String inputLine;
207 while ((inputLine = in.readLine()) != null) {
208 if (inputLine.contains("This domain is not present in the root zone at this time.")) {
209 return true;
210 }
211 }
212 in.close();
213 } catch (final IOException e) {
214 } finally {
215 closeQuietly(in);
216 }
217 return false;
218 }
219
220 private static boolean isSortedLowerCase(final String arrayName) throws Exception {
221 final Field f = DomainValidator.class.getDeclaredField(arrayName);
222 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
223 if (isPrivate) {
224 f.setAccessible(true);
225 }
226 final String[] array = (String[]) f.get(null);
227 try {
228 return isSortedLowerCase(arrayName, array);
229 } finally {
230 if (isPrivate) {
231 f.setAccessible(false);
232 }
233 }
234 }
235
236
237 private static boolean isSortedLowerCase(final String name, final String[] array) {
238 boolean sorted = true;
239 boolean strictlySorted = true;
240 final int length = array.length;
241 boolean lowerCase = isLowerCase(array[length - 1]);
242 for (int i = 0; i < length - 1; i++) {
243 final String entry = array[i];
244 final String nextEntry = array[i + 1];
245 final int cmp = entry.compareTo(nextEntry);
246 if (cmp > 0) {
247 System.out.println("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
248 sorted = false;
249 } else if (cmp == 0) {
250 strictlySorted = false;
251 System.out.println("Duplicated entry: " + entry + " in " + name);
252 }
253 if (!isLowerCase(entry)) {
254 System.out.println("Non lowerCase entry: " + entry + " in " + name);
255 lowerCase = false;
256 }
257 }
258 return sorted && strictlySorted && lowerCase;
259 }
260
261
262
263
264 public static void main(final String a[]) throws Exception {
265
266
267 boolean OK = true;
268 for (final String list : new String[] { "INFRASTRUCTURE_TLDS", "COUNTRY_CODE_TLDS", "GENERIC_TLDS", "LOCAL_TLDS" }) {
269 OK &= isSortedLowerCase(list);
270 }
271 if (!OK) {
272 System.out.println("Fix arrays before retrying; cannot continue");
273 return;
274 }
275 final Set<String> ianaTlds = new HashSet<>();
276 final DomainValidator dv = DomainValidator.getInstance();
277 final File txtFile = new File("target/tlds-alpha-by-domain.txt");
278 final long timestamp = download(txtFile, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
279 final File htmlFile = new File("target/tlds-alpha-by-domain.html");
280
281
282 download(htmlFile, "https://www.iana.org/domains/root/db", timestamp);
283
284 final BufferedReader br = new BufferedReader(new FileReader(txtFile));
285 String line;
286 final String header;
287 line = br.readLine();
288 if (!line.startsWith("# Version ")) {
289 br.close();
290 throw new IOException("File does not have expected Version header");
291 }
292 header = line.substring(2);
293 final boolean generateUnicodeTlds = false;
294
295
296 final Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
297 final Map<String, String> missingTLD = new TreeMap<>();
298 final Map<String, String> missingCC = new TreeMap<>();
299 while ((line = br.readLine()) != null) {
300 if (!line.startsWith("#")) {
301 final String unicodeTld;
302 final String asciiTld = line.toLowerCase(Locale.ENGLISH);
303 if (line.startsWith("XN--")) {
304 unicodeTld = IDN.toUnicode(line);
305 } else {
306 unicodeTld = asciiTld;
307 }
308 if (!dv.isValidTld(asciiTld)) {
309 final String[] info = htmlInfo.get(asciiTld);
310 if (info != null) {
311 final String type = info[0];
312 final String comment = info[1];
313 if ("country-code".equals(type)) {
314 missingCC.put(asciiTld, unicodeTld + " " + comment);
315 if (generateUnicodeTlds) {
316 missingCC.put(unicodeTld, asciiTld + " " + comment);
317 }
318 } else {
319 missingTLD.put(asciiTld, unicodeTld + " " + comment);
320 if (generateUnicodeTlds) {
321 missingTLD.put(unicodeTld, asciiTld + " " + comment);
322 }
323 }
324 } else {
325 System.err.println("Expected to find HTML info for " + asciiTld);
326 }
327 }
328 ianaTlds.add(asciiTld);
329
330 if (generateUnicodeTlds && !unicodeTld.equals(asciiTld)) {
331 ianaTlds.add(unicodeTld);
332 }
333 }
334 }
335 br.close();
336
337 for (final String key : new TreeMap<>(htmlInfo).keySet()) {
338 if (!ianaTlds.contains(key)) {
339 if (isNotInRootZone(key)) {
340 System.out.println("INFO: HTML entry not yet in root zone: " + key);
341 } else {
342 System.err.println("WARN: Expected to find text entry for html: " + key);
343 }
344 }
345 }
346 if (!missingTLD.isEmpty()) {
347 printMap(header, missingTLD, "GENERIC_TLDS");
348 }
349 if (!missingCC.isEmpty()) {
350 printMap(header, missingCC, "COUNTRY_CODE_TLDS");
351 }
352
353 isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds);
354 isInIanaList("COUNTRY_CODE_TLDS", ianaTlds);
355 isInIanaList("GENERIC_TLDS", ianaTlds);
356
357 System.out.println("Finished checks");
358 }
359
360 private static void printMap(final String header, final Map<String, String> map, final String string) {
361 System.out.println("Entries missing from " + string + " List\n");
362 if (header != null) {
363 System.out.println(" // Taken from " + header);
364 }
365 for (Entry<String, String> me : map.entrySet()) {
366 System.out.println(" \"" + me.getKey() + "\", // " + me.getValue());
367 }
368 System.out.println("\nDone");
369 }
370
371 private DomainValidator validator;
372
373 @BeforeEach
374 public void setUp() {
375 validator = DomainValidator.getInstance();
376 }
377
378
379 @Test
380 public void test_COUNTRY_CODE_TLDS_sortedAndLowerCase() throws Exception {
381 final boolean sorted = isSortedLowerCase("COUNTRY_CODE_TLDS");
382 assertTrue(sorted);
383 }
384
385
386 @Test
387 public void test_GENERIC_TLDS_sortedAndLowerCase() throws Exception {
388 final boolean sorted = isSortedLowerCase("GENERIC_TLDS");
389 assertTrue(sorted);
390 }
391
392
393 @Test
394 public void test_INFRASTRUCTURE_TLDS_sortedAndLowerCase() throws Exception {
395 final boolean sorted = isSortedLowerCase("INFRASTRUCTURE_TLDS");
396 assertTrue(sorted);
397 }
398
399
400 @Test
401 public void test_LOCAL_TLDS_sortedAndLowerCase() throws Exception {
402 final boolean sorted = isSortedLowerCase("LOCAL_TLDS");
403 assertTrue(sorted);
404 }
405
406 @Test
407 public void testAllowLocal() {
408 final DomainValidator noLocal = DomainValidator.getInstance(false);
409 final DomainValidator allowLocal = DomainValidator.getInstance(true);
410
411
412 assertEquals(noLocal, validator);
413
414
415 assertFalse(noLocal.isValid("localhost.localdomain"), "localhost.localdomain should validate");
416 assertFalse(noLocal.isValid("localhost"), "localhost should validate");
417
418
419 assertTrue(allowLocal.isValid("localhost.localdomain"), "localhost.localdomain should validate");
420 assertTrue(allowLocal.isValid("localhost"), "localhost should validate");
421 assertTrue(allowLocal.isValid("hostname"), "hostname should validate");
422 assertTrue(allowLocal.isValid("machinename"), "machinename should validate");
423
424
425 assertTrue(allowLocal.isValid("apache.org"), "apache.org should validate");
426 assertFalse(allowLocal.isValid(" apache.org "), "domain name with spaces shouldn't validate");
427 }
428
429 @Test
430 public void testDomainNoDots() {
431 assertTrue(validator.isValidDomainSyntax("a"), "a (alpha) should validate");
432 assertTrue(validator.isValidDomainSyntax("9"), "9 (alphanum) should validate");
433 assertTrue(validator.isValidDomainSyntax("c-z"), "c-z (alpha - alpha) should validate");
434
435 assertFalse(validator.isValidDomainSyntax("c-"), "c- (alpha -) should fail");
436 assertFalse(validator.isValidDomainSyntax("-c"), "-c (- alpha) should fail");
437 assertFalse(validator.isValidDomainSyntax("-"), "- (-) should fail");
438 }
439
440 @Test
441 public void testEnumIsPublic() {
442 assertTrue(Modifier.isPublic(DomainValidator.ArrayType.class.getModifiers()));
443 }
444
445 @Test
446 public void testGetArray() {
447 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_MINUS));
448 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_PLUS));
449 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_MINUS));
450 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_PLUS));
451 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_MINUS));
452 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_PLUS));
453 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_RO));
454 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_RO));
455 assertNotNull(DomainValidator.getTLDEntries(ArrayType.INFRASTRUCTURE_RO));
456 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_RO));
457 }
458
459 @Test
460 public void testIDN() {
461 assertTrue(validator.isValid("www.xn--bcher-kva.ch"), "b\u00fccher.ch in IDN should validate");
462 }
463
464 @Test
465 public void testIDNJava6OrLater() {
466 final String version = System.getProperty("java.version");
467 if (version.compareTo("1.6") < 0) {
468 System.out.println("Cannot run Unicode IDN tests");
469 return;
470 }
471 assertTrue(validator.isValid("www.b\u00fccher.ch"), "b\u00fccher.ch should validate");
472 assertTrue(validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"), "xn--d1abbgf6aiiy.xn--p1ai should validate");
473 assertTrue(validator.isValid("президент.рф"), "президент.рф should validate");
474 assertFalse(validator.isValid("www.\uFFFD.ch"), "www.\uFFFD.ch FFFD should fail");
475 }
476
477 @Test
478 public void testInvalidDomains() {
479 assertFalse(validator.isValid(".org"), "bare TLD .org shouldn't validate");
480 assertFalse(validator.isValid(" apache.org "), "domain name with spaces shouldn't validate");
481 assertFalse(validator.isValid("apa che.org"), "domain name containing spaces shouldn't validate");
482 assertFalse(validator.isValid("-testdomain.name"), "domain name starting with dash shouldn't validate");
483 assertFalse(validator.isValid("testdomain-.name"), "domain name ending with dash shouldn't validate");
484 assertFalse(validator.isValid("---c.com"), "domain name starting with multiple dashes shouldn't validate");
485 assertFalse(validator.isValid("c--.com"), "domain name ending with multiple dashes shouldn't validate");
486 assertFalse(validator.isValid("apache.rog"), "domain name with invalid TLD shouldn't validate");
487
488 assertFalse(validator.isValid("http://www.apache.org"), "URL shouldn't validate");
489 assertFalse(validator.isValid(" "), "Empty string shouldn't validate as domain name");
490 assertFalse(validator.isValid(null), "Null shouldn't validate as domain name");
491 }
492
493
494 @Test
495 public void testIsIDNtoASCIIBroken() {
496 System.out.println(">>DomainValidatorTest.testIsIDNtoASCIIBroken()");
497 final String input = ".";
498 final boolean ok = input.equals(IDN.toASCII(input));
499 System.out.println("IDN.toASCII is " + (ok ? "OK" : "BROKEN"));
500 final String[] props = { "java.version",
501 "java.vendor",
502 "java.vm.specification.version",
503 "java.vm.specification.vendor",
504 "java.vm.specification.name",
505 "java.vm.version",
506 "java.vm.vendor",
507 "java.vm.name",
508 "java.specification.version",
509 "java.specification.vendor",
510 "java.specification.name",
511 "java.class.version",
512 };
513 for (final String t : props) {
514 System.out.println(t + "=" + System.getProperty(t));
515 }
516 System.out.println("<<DomainValidatorTest.testIsIDNtoASCIIBroken()");
517 assertTrue(true);
518 }
519
520
521 @Test
522 public void testRFC2396domainlabel() {
523 assertTrue(validator.isValid("a.ch"), "a.ch should validate");
524 assertTrue(validator.isValid("9.ch"), "9.ch should validate");
525 assertTrue(validator.isValid("az.ch"), "az.ch should validate");
526 assertTrue(validator.isValid("09.ch"), "09.ch should validate");
527 assertTrue(validator.isValid("9-1.ch"), "9-1.ch should validate");
528 assertFalse(validator.isValid("91-.ch"), "91-.ch should not validate");
529 assertFalse(validator.isValid("-.ch"), "-.ch should not validate");
530 }
531
532
533 @Test
534 public void testRFC2396toplabel() {
535
536 assertTrue(validator.isValidDomainSyntax("a.c"), "a.c (alpha) should validate");
537 assertTrue(validator.isValidDomainSyntax("a.cc"), "a.cc (alpha alpha) should validate");
538 assertTrue(validator.isValidDomainSyntax("a.c9"), "a.c9 (alpha alphanum) should validate");
539 assertTrue(validator.isValidDomainSyntax("a.c-9"), "a.c-9 (alpha - alphanum) should validate");
540 assertTrue(validator.isValidDomainSyntax("a.c-z"), "a.c-z (alpha - alpha) should validate");
541
542 assertFalse(validator.isValidDomainSyntax("a.9c"), "a.9c (alphanum alpha) should fail");
543 assertFalse(validator.isValidDomainSyntax("a.c-"), "a.c- (alpha -) should fail");
544 assertFalse(validator.isValidDomainSyntax("a.-"), "a.- (-) should fail");
545 assertFalse(validator.isValidDomainSyntax("a.-9"), "a.-9 (- alphanum) should fail");
546 }
547
548 @Test
549 public void testTopLevelDomains() {
550
551 assertTrue(validator.isValidInfrastructureTld(".arpa"), ".arpa should validate as iTLD");
552 assertFalse(validator.isValidInfrastructureTld(".com"), ".com shouldn't validate as iTLD");
553
554
555 assertTrue(validator.isValidGenericTld(".name"), ".name should validate as gTLD");
556 assertFalse(validator.isValidGenericTld(".us"), ".us shouldn't validate as gTLD");
557
558
559 assertTrue(validator.isValidCountryCodeTld(".uk"), ".uk should validate as ccTLD");
560 assertFalse(validator.isValidCountryCodeTld(".org"), ".org shouldn't validate as ccTLD");
561
562
563 assertTrue(validator.isValidTld(".COM"), ".COM should validate as TLD");
564 assertTrue(validator.isValidTld(".BiZ"), ".BiZ should validate as TLD");
565
566
567 assertFalse(validator.isValid(".nope"), "invalid TLD shouldn't validate");
568 assertFalse(validator.isValid(""), "empty string shouldn't validate as TLD");
569 assertFalse(validator.isValid(null), "null shouldn't validate as TLD");
570 }
571
572
573
574 @Test
575 public void testUnicodeToASCII() {
576 final String[] asciidots = { "", ",", ".",
577 "a.",
578 "a.b", "a..b", "a...b", ".a", "..a", };
579 for (final String s : asciidots) {
580 assertEquals(s, DomainValidator.unicodeToASCII(s));
581 }
582
583
584
585
586
587 final String otherDots[][] = { { "b\u3002", "b.", }, { "b\uFF0E", "b.", }, { "b\uFF61", "b.", }, { "\u3002", ".", }, { "\uFF0E", ".", },
588 { "\uFF61", ".", }, };
589 for (final String s[] : otherDots) {
590 assertEquals(s[1], DomainValidator.unicodeToASCII(s[0]));
591 }
592 }
593
594 @Test
595 public void testValidator297() {
596 assertTrue(validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"), "xn--d1abbgf6aiiy.xn--p1ai should validate");
597 }
598
599
600 @Test
601 public void testValidator306() {
602 final String longString = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789A";
603 assertEquals(63, longString.length());
604
605 assertTrue(validator.isValidDomainSyntax(longString + ".com"), "63 chars label should validate");
606 assertFalse(validator.isValidDomainSyntax(longString + "x.com"), "64 chars label should fail");
607
608 assertTrue(validator.isValidDomainSyntax("test." + longString), "63 chars TLD should validate");
609 assertFalse(validator.isValidDomainSyntax("test.x" + longString), "64 chars TLD should fail");
610
611 final String longDomain = longString + "." + longString + "." + longString + "." + longString.substring(0, 61);
612 assertEquals(253, longDomain.length());
613 assertTrue(validator.isValidDomainSyntax(longDomain), "253 chars domain should validate");
614 assertFalse(validator.isValidDomainSyntax(longDomain + "x"), "254 chars domain should fail");
615 }
616
617 @Test
618 public void testValidDomains() {
619 assertTrue(validator.isValid("apache.org"), "apache.org should validate");
620 assertTrue(validator.isValid("www.google.com"), "www.google.com should validate");
621
622 assertTrue(validator.isValid("test-domain.com"), "test-domain.com should validate");
623 assertTrue(validator.isValid("test---domain.com"), "test---domain.com should validate");
624 assertTrue(validator.isValid("test-d-o-m-ain.com"), "test-d-o-m-ain.com should validate");
625 assertTrue(validator.isValid("as.uk"), "two-letter domain label should validate");
626
627 assertTrue(validator.isValid("ApAchE.Org"), "case-insensitive ApAchE.Org should validate");
628
629 assertTrue(validator.isValid("z.com"), "single-character domain label should validate");
630
631 assertTrue(validator.isValid("i.have.an-example.domain.name"), "i.have.an-example.domain.name should validate");
632 }
633 }