1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 package org.apache.commons.validator.routines;
18
19 import java.io.BufferedReader;
20 import java.io.Closeable;
21 import java.io.File;
22 import java.io.FileOutputStream;
23 import java.io.FileReader;
24 import java.io.IOException;
25 import java.io.InputStream;
26 import java.lang.reflect.Field;
27 import java.lang.reflect.Modifier;
28 import java.net.HttpURLConnection;
29 import java.net.IDN;
30 import java.net.URL;
31 import java.text.SimpleDateFormat;
32 import java.util.Date;
33 import java.util.HashMap;
34 import java.util.HashSet;
35 import java.util.Iterator;
36 import java.util.Locale;
37 import java.util.Map;
38 import java.util.Set;
39 import java.util.TreeMap;
40 import java.util.regex.Matcher;
41 import java.util.regex.Pattern;
42
43 import org.apache.commons.validator.routines.DomainValidator.ArrayType;
44
45 import junit.framework.TestCase;
46
47
48
49
50
51
52 public class DomainValidatorTest extends TestCase {
53
54 private DomainValidator validator;
55
56 @Override
57 public void setUp() {
58 validator = DomainValidator.getInstance();
59 }
60
61 public void testValidDomains() {
62 assertTrue("apache.org should validate", validator.isValid("apache.org"));
63 assertTrue("www.google.com should validate", validator.isValid("www.google.com"));
64
65 assertTrue("test-domain.com should validate", validator.isValid("test-domain.com"));
66 assertTrue("test---domain.com should validate", validator.isValid("test---domain.com"));
67 assertTrue("test-d-o-m-ain.com should validate", validator.isValid("test-d-o-m-ain.com"));
68 assertTrue("two-letter domain label should validate", validator.isValid("as.uk"));
69
70 assertTrue("case-insensitive ApAchE.Org should validate", validator.isValid("ApAchE.Org"));
71
72 assertTrue("single-character domain label should validate", validator.isValid("z.com"));
73
74 assertTrue("i.have.an-example.domain.name should validate", validator.isValid("i.have.an-example.domain.name"));
75 }
76
77 public void testInvalidDomains() {
78 assertFalse("bare TLD .org shouldn't validate", validator.isValid(".org"));
79 assertFalse("domain name with spaces shouldn't validate", validator.isValid(" apache.org "));
80 assertFalse("domain name containing spaces shouldn't validate", validator.isValid("apa che.org"));
81 assertFalse("domain name starting with dash shouldn't validate", validator.isValid("-testdomain.name"));
82 assertFalse("domain name ending with dash shouldn't validate", validator.isValid("testdomain-.name"));
83 assertFalse("domain name starting with multiple dashes shouldn't validate", validator.isValid("---c.com"));
84 assertFalse("domain name ending with multiple dashes shouldn't validate", validator.isValid("c--.com"));
85 assertFalse("domain name with invalid TLD shouldn't validate", validator.isValid("apache.rog"));
86
87 assertFalse("URL shouldn't validate", validator.isValid("http://www.apache.org"));
88 assertFalse("Empty string shouldn't validate as domain name", validator.isValid(" "));
89 assertFalse("Null shouldn't validate as domain name", validator.isValid(null));
90 }
91
92 public void testTopLevelDomains() {
93
94 assertTrue(".arpa should validate as iTLD", validator.isValidInfrastructureTld(".arpa"));
95 assertFalse(".com shouldn't validate as iTLD", validator.isValidInfrastructureTld(".com"));
96
97
98 assertTrue(".name should validate as gTLD", validator.isValidGenericTld(".name"));
99 assertFalse(".us shouldn't validate as gTLD", validator.isValidGenericTld(".us"));
100
101
102 assertTrue(".uk should validate as ccTLD", validator.isValidCountryCodeTld(".uk"));
103 assertFalse(".org shouldn't validate as ccTLD", validator.isValidCountryCodeTld(".org"));
104
105
106 assertTrue(".COM should validate as TLD", validator.isValidTld(".COM"));
107 assertTrue(".BiZ should validate as TLD", validator.isValidTld(".BiZ"));
108
109
110 assertFalse("invalid TLD shouldn't validate", validator.isValid(".nope"));
111 assertFalse("empty string shouldn't validate as TLD", validator.isValid(""));
112 assertFalse("null shouldn't validate as TLD", validator.isValid(null));
113 }
114
115 public void testAllowLocal() {
116 DomainValidator noLocal = DomainValidator.getInstance(false);
117 DomainValidator allowLocal = DomainValidator.getInstance(true);
118
119
120 assertEquals(noLocal, validator);
121
122
123 assertFalse("localhost.localdomain should validate", noLocal.isValid("localhost.localdomain"));
124 assertFalse("localhost should validate", noLocal.isValid("localhost"));
125
126
127 assertTrue("localhost.localdomain should validate", allowLocal.isValid("localhost.localdomain"));
128 assertTrue("localhost should validate", allowLocal.isValid("localhost"));
129 assertTrue("hostname should validate", allowLocal.isValid("hostname"));
130 assertTrue("machinename should validate", allowLocal.isValid("machinename"));
131
132
133 assertTrue("apache.org should validate", allowLocal.isValid("apache.org"));
134 assertFalse("domain name with spaces shouldn't validate", allowLocal.isValid(" apache.org "));
135 }
136
137 public void testIDN() {
138 assertTrue("b\u00fccher.ch in IDN should validate", validator.isValid("www.xn--bcher-kva.ch"));
139 }
140
141 public void testIDNJava6OrLater() {
142 String version = System.getProperty("java.version");
143 if (version.compareTo("1.6") < 0) {
144 System.out.println("Cannot run Unicode IDN tests");
145 return;
146 }
147 assertTrue("b\u00fccher.ch should validate", validator.isValid("www.b\u00fccher.ch"));
148 assertTrue("xn--d1abbgf6aiiy.xn--p1ai should validate", validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"));
149 assertTrue("президент.рф should validate", validator.isValid("президент.рф"));
150 assertFalse("www.\uFFFD.ch FFFD should fail", validator.isValid("www.\uFFFD.ch"));
151 }
152
153
154 public void testRFC2396domainlabel() {
155 assertTrue("a.ch should validate", validator.isValid("a.ch"));
156 assertTrue("9.ch should validate", validator.isValid("9.ch"));
157 assertTrue("az.ch should validate", validator.isValid("az.ch"));
158 assertTrue("09.ch should validate", validator.isValid("09.ch"));
159 assertTrue("9-1.ch should validate", validator.isValid("9-1.ch"));
160 assertFalse("91-.ch should not validate", validator.isValid("91-.ch"));
161 assertFalse("-.ch should not validate", validator.isValid("-.ch"));
162 }
163
164
165 public void testRFC2396toplabel() {
166
167 assertTrue("a.c (alpha) should validate", validator.isValidDomainSyntax("a.c"));
168 assertTrue("a.cc (alpha alpha) should validate", validator.isValidDomainSyntax("a.cc"));
169 assertTrue("a.c9 (alpha alphanum) should validate", validator.isValidDomainSyntax("a.c9"));
170 assertTrue("a.c-9 (alpha - alphanum) should validate", validator.isValidDomainSyntax("a.c-9"));
171 assertTrue("a.c-z (alpha - alpha) should validate", validator.isValidDomainSyntax("a.c-z"));
172
173 assertFalse("a.9c (alphanum alpha) should fail", validator.isValidDomainSyntax("a.9c"));
174 assertFalse("a.c- (alpha -) should fail", validator.isValidDomainSyntax("a.c-"));
175 assertFalse("a.- (-) should fail", validator.isValidDomainSyntax("a.-"));
176 assertFalse("a.-9 (- alphanum) should fail", validator.isValidDomainSyntax("a.-9"));
177 }
178
179 public void testDomainNoDots() {
180 assertTrue("a (alpha) should validate", validator.isValidDomainSyntax("a"));
181 assertTrue("9 (alphanum) should validate", validator.isValidDomainSyntax("9"));
182 assertTrue("c-z (alpha - alpha) should validate", validator.isValidDomainSyntax("c-z"));
183
184 assertFalse("c- (alpha -) should fail", validator.isValidDomainSyntax("c-"));
185 assertFalse("-c (- alpha) should fail", validator.isValidDomainSyntax("-c"));
186 assertFalse("- (-) should fail", validator.isValidDomainSyntax("-"));
187 }
188
189 public void testValidator297() {
190 assertTrue("xn--d1abbgf6aiiy.xn--p1ai should validate", validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"));
191 }
192
193
194 public void testValidator306() {
195 final String longString = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789A";
196 assertEquals(63, longString.length());
197
198 assertTrue("63 chars label should validate", validator.isValidDomainSyntax(longString+".com"));
199 assertFalse("64 chars label should fail", validator.isValidDomainSyntax(longString+"x.com"));
200
201 assertTrue("63 chars TLD should validate", validator.isValidDomainSyntax("test."+longString));
202 assertFalse("64 chars TLD should fail", validator.isValidDomainSyntax("test.x"+longString));
203
204 final String longDomain =
205 longString
206 + "." + longString
207 + "." + longString
208 + "." + longString.substring(0,61)
209 ;
210 assertEquals(253, longDomain.length());
211 assertTrue("253 chars domain should validate", validator.isValidDomainSyntax(longDomain));
212 assertFalse("254 chars domain should fail", validator.isValidDomainSyntax(longDomain+"x"));
213 }
214
215
216
217 public void testUnicodeToASCII() {
218 String[] asciidots = {
219 "",
220 ",",
221 ".",
222 "a.",
223 "a.b",
224 "a..b",
225 "a...b",
226 ".a",
227 "..a",
228 };
229 for(String s : asciidots) {
230 assertEquals(s,DomainValidator.unicodeToASCII(s));
231 }
232
233
234
235
236
237 final String otherDots[][] = {
238 {"b\u3002", "b.",},
239 {"b\uFF0E", "b.",},
240 {"b\uFF61", "b.",},
241 {"\u3002", ".",},
242 {"\uFF0E", ".",},
243 {"\uFF61", ".",},
244 };
245 for(String s[] : otherDots) {
246 assertEquals(s[1],DomainValidator.unicodeToASCII(s[0]));
247 }
248 }
249
250
251 public void testIsIDNtoASCIIBroken() {
252 System.out.println(">>DomainValidatorTest.testIsIDNtoASCIIBroken()");
253 final String input = ".";
254 final boolean ok = input.equals(IDN.toASCII(input));
255 System.out.println("IDN.toASCII is " + (ok? "OK" : "BROKEN"));
256 String props[] = {
257 "java.version",
258 "java.vendor",
259 "java.vm.specification.version",
260 "java.vm.specification.vendor",
261 "java.vm.specification.name",
262 "java.vm.version",
263 "java.vm.vendor",
264 "java.vm.name",
265 "java.specification.version",
266 "java.specification.vendor",
267 "java.specification.name",
268 "java.class.version",
269 };
270 for(String t : props) {
271 System.out.println(t + "=" + System.getProperty(t));
272 }
273 System.out.println("<<DomainValidatorTest.testIsIDNtoASCIIBroken()");
274 assertTrue(true);
275 }
276
277
278 public void test_INFRASTRUCTURE_TLDS_sortedAndLowerCase() throws Exception {
279 final boolean sorted = isSortedLowerCase("INFRASTRUCTURE_TLDS");
280 assertTrue(sorted);
281 }
282
283
284 public void test_COUNTRY_CODE_TLDS_sortedAndLowerCase() throws Exception {
285 final boolean sorted = isSortedLowerCase("COUNTRY_CODE_TLDS");
286 assertTrue(sorted);
287 }
288
289
290 public void test_GENERIC_TLDS_sortedAndLowerCase() throws Exception {
291 final boolean sorted = isSortedLowerCase("GENERIC_TLDS");
292 assertTrue(sorted);
293 }
294
295
296 public void test_LOCAL_TLDS_sortedAndLowerCase() throws Exception {
297 final boolean sorted = isSortedLowerCase("LOCAL_TLDS");
298 assertTrue(sorted);
299 }
300
301 public void testEnumIsPublic() {
302 assertTrue(Modifier.isPublic(DomainValidator.ArrayType.class.getModifiers()));
303 }
304
305 public void testGetArray() {
306 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_MINUS));
307 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_PLUS));
308 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_MINUS));
309 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_PLUS));
310 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_MINUS));
311 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_PLUS));
312 assertNotNull(DomainValidator.getTLDEntries(ArrayType.COUNTRY_CODE_RO));
313 assertNotNull(DomainValidator.getTLDEntries(ArrayType.GENERIC_RO));
314 assertNotNull(DomainValidator.getTLDEntries(ArrayType.INFRASTRUCTURE_RO));
315 assertNotNull(DomainValidator.getTLDEntries(ArrayType.LOCAL_RO));
316 }
317
318
319
320
321 public static void main(String a[]) throws Exception {
322
323
324 boolean OK = true;
325 for(String list : new String[]{"INFRASTRUCTURE_TLDS","COUNTRY_CODE_TLDS","GENERIC_TLDS","LOCAL_TLDS"}) {
326 OK &= isSortedLowerCase(list);
327 }
328 if (!OK) {
329 System.out.println("Fix arrays before retrying; cannot continue");
330 return;
331 }
332 Set<String> ianaTlds = new HashSet<String>();
333 DomainValidator dv = DomainValidator.getInstance();
334 File txtFile = new File("target/tlds-alpha-by-domain.txt");
335 long timestamp = download(txtFile, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt", 0L);
336 final File htmlFile = new File("target/tlds-alpha-by-domain.html");
337
338
339 download(htmlFile,"https://www.iana.org/domains/root/db", timestamp);
340
341 BufferedReader br = new BufferedReader(new FileReader(txtFile));
342 String line;
343 final String header;
344 line = br.readLine();
345 if (line.startsWith("# Version ")) {
346 header = line.substring(2);
347 } else {
348 br.close();
349 throw new IOException("File does not have expected Version header");
350 }
351 final boolean generateUnicodeTlds = false;
352
353
354 Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
355 Map<String, String> missingTLD = new TreeMap<String, String>();
356 Map<String, String> missingCC = new TreeMap<String, String>();
357 while((line = br.readLine()) != null) {
358 if (!line.startsWith("#")) {
359 final String unicodeTld;
360 final String asciiTld = line.toLowerCase(Locale.ENGLISH);
361 if (line.startsWith("XN--")) {
362 unicodeTld = IDN.toUnicode(line);
363 } else {
364 unicodeTld = asciiTld;
365 }
366 if (!dv.isValidTld(asciiTld)) {
367 String [] info = htmlInfo.get(asciiTld);
368 if (info != null) {
369 String type = info[0];
370 String comment = info[1];
371 if ("country-code".equals(type)) {
372 missingCC.put(asciiTld, unicodeTld + " " + comment);
373 if (generateUnicodeTlds) {
374 missingCC.put(unicodeTld, asciiTld + " " + comment);
375 }
376 } else {
377 missingTLD.put(asciiTld, unicodeTld + " " + comment);
378 if (generateUnicodeTlds) {
379 missingTLD.put(unicodeTld, asciiTld + " " + comment);
380 }
381 }
382 } else {
383 System.err.println("Expected to find HTML info for "+ asciiTld);
384 }
385 }
386 ianaTlds.add(asciiTld);
387
388 if (generateUnicodeTlds) {
389 if (!unicodeTld.equals(asciiTld)) {
390 ianaTlds.add(unicodeTld);
391 }
392 }
393 }
394 }
395 br.close();
396
397 for(String key : (new TreeMap<String, String[]>(htmlInfo)).keySet()) {
398 if (!ianaTlds.contains(key)) {
399 if (isNotInRootZone(key)) {
400 System.out.println("INFO: HTML entry not yet in root zone: "+key);
401 } else {
402 System.err.println("WARN: Expected to find text entry for html: "+key);
403 }
404 }
405 }
406 if (!missingTLD.isEmpty()) {
407 printMap(header, missingTLD, "TLD");
408 }
409 if (!missingCC.isEmpty()) {
410 printMap(header, missingCC, "CC");
411 }
412
413 isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds);
414 isInIanaList("COUNTRY_CODE_TLDS", ianaTlds);
415 isInIanaList("GENERIC_TLDS", ianaTlds);
416
417 System.out.println("Finished checks");
418 }
419
420 private static void printMap(final String header, Map<String, String> map, String string) {
421 System.out.println("Entries missing from "+ string +" List\n");
422 if (header != null) {
423 System.out.println(" // Taken from " + header);
424 }
425 Iterator<Map.Entry<String, String>> it = map.entrySet().iterator();
426 while(it.hasNext()){
427 Map.Entry<String, String> me = it.next();
428 System.out.println(" \"" + me.getKey() + "\", // " + me.getValue());
429 }
430 System.out.println("\nDone");
431 }
432
433 private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
434 final Map<String, String[]> info = new HashMap<String, String[]>();
435
436
437 final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
438
439 final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
440
441
442 final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
443
444 final BufferedReader br = new BufferedReader(new FileReader(f));
445 String line;
446 while((line=br.readLine())!=null){
447 Matcher m = domain.matcher(line);
448 if (m.lookingAt()) {
449 String dom = m.group(1);
450 String typ = "??";
451 String com = "??";
452 line = br.readLine();
453 while (line.matches("^\\s*$")) {
454 line = br.readLine();
455 }
456 Matcher t = type.matcher(line);
457 if (t.lookingAt()) {
458 typ = t.group(1);
459 line = br.readLine();
460 if (line.matches("\\s+<!--.*")) {
461 while(!line.matches(".*-->.*")){
462 line = br.readLine();
463 }
464 line = br.readLine();
465 }
466
467 while(!line.matches(".*</td>.*")){
468 line += " " +br.readLine();
469 }
470 Matcher n = comment.matcher(line);
471 if (n.lookingAt()) {
472 com = n.group(1);
473 }
474
475 if (com.contains("Not assigned") || com.contains("Retired") || typ.equals("test")) {
476
477 } else {
478 info.put(dom.toLowerCase(Locale.ENGLISH), new String[]{typ, com});
479
480 }
481 } else {
482 System.err.println("Unexpected type: " + line);
483 }
484 }
485 }
486 br.close();
487 return info;
488 }
489
490
491
492
493
494
495 private static long download(File f, String tldurl, long timestamp) throws IOException {
496 final int HOUR = 60*60*1000;
497 final long modTime;
498
499 if (f.canRead()) {
500 modTime = f.lastModified();
501 if (modTime > System.currentTimeMillis()-HOUR) {
502 System.out.println("Skipping download - found recent " + f);
503 return modTime;
504 }
505 } else {
506 modTime = 0;
507 }
508 HttpURLConnection hc = (HttpURLConnection) new URL(tldurl).openConnection();
509 if (modTime > 0) {
510 SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
511 String since = sdf.format(new Date(modTime));
512 hc.addRequestProperty("If-Modified-Since", since);
513 System.out.println("Found " + f + " with date " + since);
514 }
515 if (hc.getResponseCode() == 304) {
516 System.out.println("Already have most recent " + tldurl);
517 } else {
518 System.out.println("Downloading " + tldurl);
519 byte buff[] = new byte[1024];
520 InputStream is = hc.getInputStream();
521
522 FileOutputStream fos = new FileOutputStream(f);
523 int len;
524 while((len=is.read(buff)) != -1) {
525 fos.write(buff, 0, len);
526 }
527 fos.close();
528 is.close();
529 System.out.println("Done");
530 }
531 return f.lastModified();
532 }
533
534
535
536
537
538
539
540
541
542 private static boolean isNotInRootZone(String domain) {
543 String tldurl = "http://www.iana.org/domains/root/db/" + domain + ".html";
544 File rootCheck = new File("target","tld_" + domain + ".html");
545 BufferedReader in = null;
546 try {
547 download(rootCheck, tldurl, 0L);
548 in = new BufferedReader(new FileReader(rootCheck));
549 String inputLine;
550 while ((inputLine = in.readLine()) != null) {
551 if (inputLine.contains("This domain is not present in the root zone at this time.")) {
552 return true;
553 }
554 }
555 in.close();
556 } catch (IOException e) {
557 } finally {
558 closeQuietly(in);
559 }
560 return false;
561 }
562
563 private static void closeQuietly(Closeable in) {
564 if (in != null) {
565 try {
566 in.close();
567 } catch (IOException e) {
568 }
569 }
570 }
571
572
573
574
575 private static boolean isInIanaList(String arrayName, Set<String> ianaTlds) throws Exception {
576 Field f = DomainValidator.class.getDeclaredField(arrayName);
577 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
578 if (isPrivate) {
579 f.setAccessible(true);
580 }
581 String[] array = (String[]) f.get(null);
582 try {
583 return isInIanaList(arrayName, array, ianaTlds);
584 } finally {
585 if (isPrivate) {
586 f.setAccessible(false);
587 }
588 }
589 }
590
591 private static boolean isInIanaList(String name, String [] array, Set<String> ianaTlds) {
592 for(int i = 0; i < array.length; i++) {
593 if (!ianaTlds.contains(array[i])) {
594 System.out.println(name + " contains unexpected value: " + array[i]);
595 }
596 }
597 return true;
598 }
599
600 private static boolean isSortedLowerCase(String arrayName) throws Exception {
601 Field f = DomainValidator.class.getDeclaredField(arrayName);
602 final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
603 if (isPrivate) {
604 f.setAccessible(true);
605 }
606 String[] array = (String[]) f.get(null);
607 try {
608 return isSortedLowerCase(arrayName, array);
609 } finally {
610 if (isPrivate) {
611 f.setAccessible(false);
612 }
613 }
614 }
615
616 private static boolean isLowerCase(String string) {
617 return string.equals(string.toLowerCase(Locale.ENGLISH));
618 }
619
620
621 private static boolean isSortedLowerCase(String name, String [] array) {
622 boolean sorted = true;
623 boolean strictlySorted = true;
624 final int length = array.length;
625 boolean lowerCase = isLowerCase(array[length-1]);
626 for(int i = 0; i < length-1; i++) {
627 final String entry = array[i];
628 final String nextEntry = array[i+1];
629 final int cmp = entry.compareTo(nextEntry);
630 if (cmp > 0) {
631 System.out.println("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
632 sorted = false;
633 } else if (cmp == 0) {
634 strictlySorted = false;
635 System.out.println("Duplicated entry: " + entry + " in " + name);
636 }
637 if (!isLowerCase(entry)) {
638 System.out.println("Non lowerCase entry: " + entry + " in " + name);
639 lowerCase = false;
640 }
641 }
642 return sorted && strictlySorted && lowerCase;
643 }
644 }