View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.validator.routines;
18  
19  import java.io.Serializable;
20  import java.lang.reflect.InvocationTargetException;
21  import java.lang.reflect.Method;
22  import java.util.Arrays;
23  import java.util.Locale;
24  
25  /**
26   * <p><b>Domain name</b> validation routines.</p>
27   *
28   * <p>
29   * This validator provides methods for validating Internet domain names
30   * and top-level domains.
31   * </p>
32   *
33   * <p>Domain names are evaluated according
34   * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
35   * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
36   * section 2.1. No accommodation is provided for the specialized needs of
37   * other applications; if the domain name has been URL-encoded, for example,
38   * validation will fail even though the equivalent plaintext version of the
39   * same name would have passed.
40   * </p>
41   *
42   * <p>
43   * Validation is also provided for top-level domains (TLDs) as defined and
44   * maintained by the Internet Assigned Numbers Authority (IANA):
45   * </p>
46   *
47   *   <ul>
48   *     <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
49   *         (<code>.arpa</code>, etc.)</li>
50   *     <li>{@link #isValidGenericTld} - validates generic TLDs
51   *         (<code>.com, .org</code>, etc.)</li>
52   *     <li>{@link #isValidCountryCodeTld} - validates country code TLDs
53   *         (<code>.us, .uk, .cn</code>, etc.)</li>
54   *   </ul>
55   *
56   * <p>
57   * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
58   * methods to ensure that a given domain name matches a specific IP; see
59   * {@link java.net.InetAddress} for that functionality.)
60   * </p>
61   *
62   * @version $Revision: 1650777 $
63   * @since Validator 1.4
64   */
65  public class DomainValidator implements Serializable {
66  
67      private static final long serialVersionUID = -4407125112880174009L;
68  
69      // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
70  
71      // RFC2396: domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
72      // Max 63 characters
73      private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]{0,61}\\p{Alnum})?";
74  
75      // RFC2396 toplabel = alpha | alpha *( alphanum | "-" ) alphanum
76      // Max 63 characters
77      private static final String TOP_LABEL_REGEX = "\\p{Alpha}(?>[\\p{Alnum}-]{0,61}\\p{Alnum})?";
78  
79      // RFC2396 hostname = *( domainlabel "." ) toplabel [ "." ]
80      // Note that the regex currently requires both a domain label and a top level label, whereas
81      // the RFC does not. This is because the regex is used to detect if a TLD is present.
82      // If the match fails, input is checked against DOMAIN_LABEL_REGEX (hostnameRegex)
83      // RFC1123 sec 2.1 allows hostnames to start with a digit
84      private static final String DOMAIN_NAME_REGEX =
85              "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")\\.?$";
86  
87      private final boolean allowLocal;
88  
89      /**
90       * Singleton instance of this validator, which
91       *  doesn't consider local addresses as valid.
92       */
93      private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
94  
95      /**
96       * Singleton instance of this validator, which does
97       *  consider local addresses valid.
98       */
99      private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
100 
101     /**
102      * RegexValidator for matching domains.
103      */
104     private final RegexValidator domainRegex =
105             new RegexValidator(DOMAIN_NAME_REGEX);
106     /**
107      * RegexValidator for matching a local hostname
108      */
109     // RFC1123 sec 2.1 allows hostnames to start with a digit
110     private final RegexValidator hostnameRegex =
111             new RegexValidator(DOMAIN_LABEL_REGEX);
112 
113     /**
114      * Returns the singleton instance of this validator. It
115      *  will not consider local addresses as valid.
116      * @return the singleton instance of this validator
117      */
118     public static DomainValidator getInstance() {
119         return DOMAIN_VALIDATOR;
120     }
121 
122     /**
123      * Returns the singleton instance of this validator,
124      *  with local validation as required.
125      * @param allowLocal Should local addresses be considered valid?
126      * @return the singleton instance of this validator
127      */
128     public static DomainValidator getInstance(boolean allowLocal) {
129        if(allowLocal) {
130           return DOMAIN_VALIDATOR_WITH_LOCAL;
131        }
132        return DOMAIN_VALIDATOR;
133     }
134 
135     /** Private constructor. */
136     private DomainValidator(boolean allowLocal) {
137        this.allowLocal = allowLocal;
138     }
139 
140     /**
141      * Returns true if the specified <code>String</code> parses
142      * as a valid domain name with a recognized top-level domain.
143      * The parsing is case-insensitive.
144      * @param domain the parameter to check for domain name syntax
145      * @return true if the parameter is a valid domain name
146      */
147     public boolean isValid(String domain) {
148         if (domain == null) {
149             return false;
150         }
151         domain = unicodeToASCII(domain);
152         // hosts must be equally reachable via punycode and Unicode;
153         // Unicode is never shorter than punycode, so check punycode
154         // if domain did not convert, then it will be caught by ASCII
155         // checks in the regexes below
156         if (domain.length() > 253) {
157             return false;
158         }
159         String[] groups = domainRegex.match(domain);
160         if (groups != null && groups.length > 0) {
161             return isValidTld(groups[0]);
162         }
163         return allowLocal && hostnameRegex.isValid(domain);
164     }
165 
166     // package protected for unit test access
167     // must agree with isValid() above
168     final boolean isValidDomainSyntax(String domain) {
169         if (domain == null) {
170             return false;
171         }
172         domain = unicodeToASCII(domain);
173         // hosts must be equally reachable via punycode and Unicode;
174         // Unicode is never shorter than punycode, so check punycode
175         // if domain did not convert, then it will be caught by ASCII
176         // checks in the regexes below
177         if (domain.length() > 253) {
178             return false;
179         }
180         String[] groups = domainRegex.match(domain);
181         return (groups != null && groups.length > 0)
182                 || hostnameRegex.isValid(domain);
183     }
184 
185     /**
186      * Returns true if the specified <code>String</code> matches any
187      * IANA-defined top-level domain. Leading dots are ignored if present.
188      * The search is case-insensitive.
189      * @param tld the parameter to check for TLD status, not null
190      * @return true if the parameter is a TLD
191      */
192     public boolean isValidTld(String tld) {
193         tld = unicodeToASCII(tld);
194         if(allowLocal && isValidLocalTld(tld)) {
195            return true;
196         }
197         return isValidInfrastructureTld(tld)
198                 || isValidGenericTld(tld)
199                 || isValidCountryCodeTld(tld);
200     }
201 
202     /**
203      * Returns true if the specified <code>String</code> matches any
204      * IANA-defined infrastructure top-level domain. Leading dots are
205      * ignored if present. The search is case-insensitive.
206      * @param iTld the parameter to check for infrastructure TLD status, not null
207      * @return true if the parameter is an infrastructure TLD
208      */
209     public boolean isValidInfrastructureTld(String iTld) {
210         iTld = unicodeToASCII(iTld);
211         return Arrays.binarySearch(INFRASTRUCTURE_TLDS, (chompLeadingDot(iTld.toLowerCase(Locale.ENGLISH)))) >= 0;
212     }
213 
214     /**
215      * Returns true if the specified <code>String</code> matches any
216      * IANA-defined generic top-level domain. Leading dots are ignored
217      * if present. The search is case-insensitive.
218      * @param gTld the parameter to check for generic TLD status, not null
219      * @return true if the parameter is a generic TLD
220      */
221     public boolean isValidGenericTld(String gTld) {
222         gTld = unicodeToASCII(gTld);
223         return Arrays.binarySearch(GENERIC_TLDS, chompLeadingDot(gTld.toLowerCase(Locale.ENGLISH))) >= 0;
224     }
225 
226     /**
227      * Returns true if the specified <code>String</code> matches any
228      * IANA-defined country code top-level domain. Leading dots are
229      * ignored if present. The search is case-insensitive.
230      * @param ccTld the parameter to check for country code TLD status, not null
231      * @return true if the parameter is a country code TLD
232      */
233     public boolean isValidCountryCodeTld(String ccTld) {
234         ccTld = unicodeToASCII(ccTld);
235         return Arrays.binarySearch(COUNTRY_CODE_TLDS, chompLeadingDot(ccTld.toLowerCase(Locale.ENGLISH))) >= 0;
236     }
237 
238     /**
239      * Returns true if the specified <code>String</code> matches any
240      * widely used "local" domains (localhost or localdomain). Leading dots are
241      * ignored if present. The search is case-insensitive.
242      * @param lTld the parameter to check for local TLD status, not null
243      * @return true if the parameter is an local TLD
244      */
245     public boolean isValidLocalTld(String lTld) {
246         lTld = unicodeToASCII(lTld);
247         return Arrays.binarySearch(LOCAL_TLDS, chompLeadingDot(lTld.toLowerCase(Locale.ENGLISH))) >= 0;
248     }
249 
250     private String chompLeadingDot(String str) {
251         if (str.startsWith(".")) {
252             return str.substring(1);
253         }
254         return str;
255     }
256 
257     // ---------------------------------------------
258     // ----- TLDs defined by IANA
259     // ----- Authoritative and comprehensive list at:
260     // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
261 
262     // Note that the above list is in UPPER case.
263     // The code currently converts strings to lower case (as per the tables below)
264 
265     // IANA also provide an HTML list at http://www.iana.org/domains/root/db
266     // Note that this contains several country code entries which are NOT in
267     // the text file. These all have the "Not assigned" in the "Sponsoring Organisation" column
268     // For example (as of 2015-01-02):
269     // .bl  country-code    Not assigned
270     // .um  country-code    Not assigned
271 
272     // WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
273     private static final String[] INFRASTRUCTURE_TLDS = new String[] {
274         "arpa",               // internet infrastructure
275     };
276 
277     // WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
278     private static final String[] GENERIC_TLDS = new String[] {
279         "abogado",
280         "academy",
281         "accountants",
282         "active",
283         "actor",
284         "adult",
285         "aero",
286         "agency",
287         "airforce",
288         "allfinanz",
289         "alsace",
290         "amsterdam",
291         "android",
292         "aquarelle",
293         "archi",
294         "army",
295         "arpa",
296         "asia",
297         "associates",
298         "attorney",
299         "auction",
300         "audio",
301         "autos",
302         "axa",
303         "band",
304         "bar",
305         "bargains",
306         "bayern",
307         "beer",
308         "berlin",
309         "best",
310         "bid",
311         "bike",
312         "bio",
313         "biz",
314         "black",
315         "blackfriday",
316         "bloomberg",
317         "blue",
318         "bmw",
319         "bnpparibas",
320         "boo",
321         "boutique",
322         "brussels",
323         "budapest",
324         "build",
325         "builders",
326         "business",
327         "buzz",
328         "bzh",
329         "cab",
330         "cal",
331         "camera",
332         "camp",
333         "cancerresearch",
334         "capetown",
335         "capital",
336         "caravan",
337         "cards",
338         "care",
339         "career",
340         "careers",
341         "cartier",
342         "casa",
343         "cash",
344         "cat",
345         "catering",
346         "center",
347         "ceo",
348         "cern",
349         "channel",
350         "cheap",
351         "christmas",
352         "chrome",
353         "church",
354         "citic",
355         "city",
356         "claims",
357         "cleaning",
358         "click",
359         "clinic",
360         "clothing",
361         "club",
362         "coach",
363         "codes",
364         "coffee",
365         "college",
366         "cologne",
367         "com",
368         "community",
369         "company",
370         "computer",
371         "condos",
372         "construction",
373         "consulting",
374         "contractors",
375         "cooking",
376         "cool",
377         "coop",
378         "country",
379         "credit",
380         "creditcard",
381         "cricket",
382         "crs",
383         "cruises",
384         "cuisinella",
385         "cymru",
386         "dad",
387         "dance",
388         "dating",
389         "day",
390         "deals",
391         "degree",
392         "delivery",
393         "democrat",
394         "dental",
395         "dentist",
396         "desi",
397         "dev",
398         "diamonds",
399         "diet",
400         "digital",
401         "direct",
402         "directory",
403         "discount",
404         "dnp",
405         "docs",
406         "domains",
407         "doosan",
408         "durban",
409         "dvag",
410         "eat",
411         "edu",
412         "education",
413         "email",
414         "emerck",
415         "energy",
416         "engineer",
417         "engineering",
418         "enterprises",
419         "equipment",
420         "esq",
421         "estate",
422         "eurovision",
423         "eus",
424         "events",
425         "everbank",
426         "exchange",
427         "expert",
428         "exposed",
429         "fail",
430         "farm",
431         "fashion",
432         "feedback",
433         "finance",
434         "financial",
435         "firmdale",
436         "fish",
437         "fishing",
438         "fitness",
439         "flights",
440         "florist",
441         "flowers",
442         "flsmidth",
443         "fly",
444         "foo",
445         "forsale",
446         "foundation",
447         "frl",
448         "frogans",
449         "fund",
450         "furniture",
451         "futbol",
452         "gal",
453         "gallery",
454         "garden",
455         "gbiz",
456         "gent",
457         "ggee",
458         "gift",
459         "gifts",
460         "gives",
461         "glass",
462         "gle",
463         "global",
464         "globo",
465         "gmail",
466         "gmo",
467         "gmx",
468         "google",
469         "gop",
470         "gov",
471         "graphics",
472         "gratis",
473         "green",
474         "gripe",
475         "guide",
476         "guitars",
477         "guru",
478         "hamburg",
479         "haus",
480         "healthcare",
481         "help",
482         "here",
483         "hiphop",
484         "hiv",
485         "holdings",
486         "holiday",
487         "homes",
488         "horse",
489         "host",
490         "hosting",
491         "house",
492         "how",
493         "ibm",
494         "immo",
495         "immobilien",
496         "industries",
497         "info",
498         "ing",
499         "ink",
500         "institute",
501         "insure",
502         "int",
503         "international",
504         "investments",
505         "irish",
506         "iwc",
507         "jetzt",
508         "jobs",
509         "joburg",
510         "juegos",
511         "kaufen",
512         "kim",
513         "kitchen",
514         "kiwi",
515         "koeln",
516         "krd",
517         "kred",
518         "lacaixa",
519         "land",
520         "latrobe",
521         "lawyer",
522         "lds",
523         "lease",
524         "legal",
525         "lgbt",
526         "lidl",
527         "life",
528         "lighting",
529         "limited",
530         "limo",
531         "link",
532         "loans",
533         "london",
534         "lotto",
535         "ltda",
536         "luxe",
537         "luxury",
538         "madrid",
539         "maison",
540         "management",
541         "mango",
542         "market",
543         "marketing",
544         "media",
545         "meet",
546         "melbourne",
547         "meme",
548         "memorial",
549         "menu",
550         "miami",
551         "mil",
552         "mini",
553         "mobi",
554         "moda",
555         "moe",
556         "monash",
557         "money",
558         "mormon",
559         "mortgage",
560         "moscow",
561         "motorcycles",
562         "mov",
563         "museum",
564         "nagoya",
565         "name",
566         "navy",
567         "net",
568         "network",
569         "neustar",
570         "new",
571         "nexus",
572         "ngo",
573         "nhk",
574         "ninja",
575         "nra",
576         "nrw",
577         "nyc",
578         "okinawa",
579         "ong",
580         "onl",
581         "ooo",
582         "org",
583         "organic",
584         "osaka",
585         "otsuka",
586         "ovh",
587         "paris",
588         "partners",
589         "parts",
590         "party",
591         "pharmacy",
592         "photo",
593         "photography",
594         "photos",
595         "physio",
596         "pics",
597         "pictures",
598         "pink",
599         "pizza",
600         "place",
601         "plumbing",
602         "pohl",
603         "poker",
604         "porn",
605         "post",
606         "praxi",
607         "press",
608         "pro",
609         "prod",
610         "productions",
611         "prof",
612         "properties",
613         "property",
614         "pub",
615         "qpon",
616         "quebec",
617         "realtor",
618         "recipes",
619         "red",
620         "rehab",
621         "reise",
622         "reisen",
623         "reit",
624         "ren",
625         "rentals",
626         "repair",
627         "report",
628         "republican",
629         "rest",
630         "restaurant",
631         "reviews",
632         "rich",
633         "rio",
634         "rip",
635         "rocks",
636         "rodeo",
637         "rsvp",
638         "ruhr",
639         "ryukyu",
640         "saarland",
641         "sale",
642         "samsung",
643         "sarl",
644         "sca",
645         "scb",
646         "schmidt",
647         "schule",
648         "schwarz",
649         "science",
650         "scot",
651         "services",
652         "sew",
653         "sexy",
654         "shiksha",
655         "shoes",
656         "shriram",
657         "singles",
658         "sky",
659         "social",
660         "software",
661         "sohu",
662         "solar",
663         "solutions",
664         "soy",
665         "space",
666         "spiegel",
667         "supplies",
668         "supply",
669         "support",
670         "surf",
671         "surgery",
672         "suzuki",
673         "sydney",
674         "systems",
675         "taipei",
676         "tatar",
677         "tattoo",
678         "tax",
679         "technology",
680         "tel",
681         "tienda",
682         "tips",
683         "tires",
684         "tirol",
685         "today",
686         "tokyo",
687         "tools",
688         "top",
689         "town",
690         "toys",
691         "trade",
692         "training",
693         "travel",
694         "trust",
695         "tui",
696         "university",
697         "uno",
698         "uol",
699         "vacations",
700         "vegas",
701         "ventures",
702         "versicherung",
703         "vet",
704         "viajes",
705         "video",
706         "villas",
707         "vision",
708         "vlaanderen",
709         "vodka",
710         "vote",
711         "voting",
712         "voto",
713         "voyage",
714         "wales",
715         "wang",
716         "watch",
717         "webcam",
718         "website",
719         "wed",
720         "wedding",
721         "whoswho",
722         "wien",
723         "wiki",
724         "williamhill",
725         "wme",
726         "work",
727         "works",
728         "world",
729         "wtc",
730         "wtf",
731         "xn--1qqw23a", // 佛山 Guangzhou YU Wei Information Technology Co., Ltd.
732         "xn--3bst00m", // 集团 Eagle Horizon Limited
733         "xn--3ds443g", // 在线 TLD REGISTRY LIMITED
734         "xn--45q11c", // 八卦 Zodiac Scorpio Limited
735         "xn--4gbrim", // موقع Suhub Electronic Establishment
736         "xn--55qw42g", // 公益 China Organizational Name Administration Center
737         "xn--55qx5d", // 公司 Computer Network Information Center of Chinese Academy of Sciences (China Internet Network Information Center)
738         "xn--6frz82g", // 移动 Afilias Limited
739         "xn--6qq986b3xl", // 我爱你 Tycoon Treasure Limited
740         "xn--80adxhks", // москва Foundation for Assistance for Internet Technologies and Infrastructure Development (FAITID)
741         "xn--80asehdb", // онлайн CORE Association
742         "xn--80aswg", // сайт CORE Association
743         "xn--c1avg", // орг Public Interest Registry
744         "xn--cg4bki", // 삼성 SAMSUNG SDS CO., LTD
745         "xn--czr694b", // 商标 HU YI GLOBAL INFORMATION RESOURCES(HOLDING) COMPANY.HONGKONG LIMITED
746         "xn--czrs0t", // 商店 Wild Island, LLC
747         "xn--czru2d", // 商城 Zodiac Aquarius Limited
748         "xn--d1acj3b", // дети The Foundation for Network Initiatives “The Smart Internet”
749         "xn--fiq228c5hs", // 中文网 TLD REGISTRY LIMITED
750         "xn--fiq64b", // 中信 CITIC Group Corporation
751         "xn--flw351e", // 谷歌 Charleston Road Registry Inc.
752         "xn--hxt814e", // 网店 Zodiac Libra Limited
753         "xn--i1b6b1a6a2e", // संगठन Public Interest Registry
754         "xn--io0a7i", // 网络 Computer Network Information Center of Chinese Academy of Sciences (China Internet Network Information Center)
755         "xn--kput3i", // 手机 Beijing RITT-Net Technology Development Co., Ltd
756         "xn--mgbab2bd", // بازار CORE Association
757         "xn--ngbc5azd", // شبكة International Domain Registry Pty. Ltd.
758         "xn--nqv7f", // 机构 Public Interest Registry
759         "xn--nqv7fs00ema", // 组织机构 Public Interest Registry
760         "xn--p1acf", // рус Rusnames Limited
761         "xn--q9jyb4c", // みんな Charleston Road Registry Inc.
762         "xn--qcka1pmc", // グーグル Charleston Road Registry Inc.
763         "xn--rhqv96g", // 世界 Stable Tone Limited
764         "xn--ses554g", // 网址 HU YI GLOBAL INFORMATION RESOURCES (HOLDING) COMPANY. HONGKONG LIMITED
765         "xn--unup4y", // 游戏 Spring Fields, LLC
766         "xn--vermgensberater-ctb", // vermögensberater Deutsche Vermögensberatung Aktiengesellschaft DVAG
767         "xn--vermgensberatung-pwb", // vermögensberatung Deutsche Vermögensberatung Aktiengesellschaft DVAG
768         "xn--vhquv", // 企业 Dash McCook, LLC
769         "xn--xhq521b", // 广东 Guangzhou YU Wei Information Technology Co., Ltd.
770         "xn--zfr164b", // 政务 China Organizational Name Administration Center
771         "xxx",
772         "xyz",
773         "yachts",
774         "yandex",
775         "yoga",
776         "yokohama",
777         "youtube",
778         "zip",
779         "zone",
780         "zuerich",
781    };
782 
783     // WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
784     private static final String[] COUNTRY_CODE_TLDS = new String[] {
785         "ac",                 // Ascension Island
786         "ad",                 // Andorra
787         "ae",                 // United Arab Emirates
788         "af",                 // Afghanistan
789         "ag",                 // Antigua and Barbuda
790         "ai",                 // Anguilla
791         "al",                 // Albania
792         "am",                 // Armenia
793         "an",                 // Netherlands Antilles
794         "ao",                 // Angola
795         "aq",                 // Antarctica
796         "ar",                 // Argentina
797         "as",                 // American Samoa
798         "at",                 // Austria
799         "au",                 // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
800         "aw",                 // Aruba
801         "ax",                 // Åland
802         "az",                 // Azerbaijan
803         "ba",                 // Bosnia and Herzegovina
804         "bb",                 // Barbados
805         "bd",                 // Bangladesh
806         "be",                 // Belgium
807         "bf",                 // Burkina Faso
808         "bg",                 // Bulgaria
809         "bh",                 // Bahrain
810         "bi",                 // Burundi
811         "bj",                 // Benin
812         "bm",                 // Bermuda
813         "bn",                 // Brunei Darussalam
814         "bo",                 // Bolivia
815         "br",                 // Brazil
816         "bs",                 // Bahamas
817         "bt",                 // Bhutan
818         "bv",                 // Bouvet Island
819         "bw",                 // Botswana
820         "by",                 // Belarus
821         "bz",                 // Belize
822         "ca",                 // Canada
823         "cc",                 // Cocos (Keeling) Islands
824         "cd",                 // Democratic Republic of the Congo (formerly Zaire)
825         "cf",                 // Central African Republic
826         "cg",                 // Republic of the Congo
827         "ch",                 // Switzerland
828         "ci",                 // Côte d'Ivoire
829         "ck",                 // Cook Islands
830         "cl",                 // Chile
831         "cm",                 // Cameroon
832         "cn",                 // China, mainland
833         "co",                 // Colombia
834         "cr",                 // Costa Rica
835         "cu",                 // Cuba
836         "cv",                 // Cape Verde
837         "cw",                 // Curaçao
838         "cx",                 // Christmas Island
839         "cy",                 // Cyprus
840         "cz",                 // Czech Republic
841         "de",                 // Germany
842         "dj",                 // Djibouti
843         "dk",                 // Denmark
844         "dm",                 // Dominica
845         "do",                 // Dominican Republic
846         "dz",                 // Algeria
847         "ec",                 // Ecuador
848         "ee",                 // Estonia
849         "eg",                 // Egypt
850         "er",                 // Eritrea
851         "es",                 // Spain
852         "et",                 // Ethiopia
853         "eu",                 // European Union
854         "fi",                 // Finland
855         "fj",                 // Fiji
856         "fk",                 // Falkland Islands
857         "fm",                 // Federated States of Micronesia
858         "fo",                 // Faroe Islands
859         "fr",                 // France
860         "ga",                 // Gabon
861         "gb",                 // Great Britain (United Kingdom)
862         "gd",                 // Grenada
863         "ge",                 // Georgia
864         "gf",                 // French Guiana
865         "gg",                 // Guernsey
866         "gh",                 // Ghana
867         "gi",                 // Gibraltar
868         "gl",                 // Greenland
869         "gm",                 // The Gambia
870         "gn",                 // Guinea
871         "gp",                 // Guadeloupe
872         "gq",                 // Equatorial Guinea
873         "gr",                 // Greece
874         "gs",                 // South Georgia and the South Sandwich Islands
875         "gt",                 // Guatemala
876         "gu",                 // Guam
877         "gw",                 // Guinea-Bissau
878         "gy",                 // Guyana
879         "hk",                 // Hong Kong
880         "hm",                 // Heard Island and McDonald Islands
881         "hn",                 // Honduras
882         "hr",                 // Croatia (Hrvatska)
883         "ht",                 // Haiti
884         "hu",                 // Hungary
885         "id",                 // Indonesia
886         "ie",                 // Ireland (Éire)
887         "il",                 // Israel
888         "im",                 // Isle of Man
889         "in",                 // India
890         "io",                 // British Indian Ocean Territory
891         "iq",                 // Iraq
892         "ir",                 // Iran
893         "is",                 // Iceland
894         "it",                 // Italy
895         "je",                 // Jersey
896         "jm",                 // Jamaica
897         "jo",                 // Jordan
898         "jp",                 // Japan
899         "ke",                 // Kenya
900         "kg",                 // Kyrgyzstan
901         "kh",                 // Cambodia (Khmer)
902         "ki",                 // Kiribati
903         "km",                 // Comoros
904         "kn",                 // Saint Kitts and Nevis
905         "kp",                 // North Korea
906         "kr",                 // South Korea
907         "kw",                 // Kuwait
908         "ky",                 // Cayman Islands
909         "kz",                 // Kazakhstan
910         "la",                 // Laos (currently being marketed as the official domain for Los Angeles)
911         "lb",                 // Lebanon
912         "lc",                 // Saint Lucia
913         "li",                 // Liechtenstein
914         "lk",                 // Sri Lanka
915         "lr",                 // Liberia
916         "ls",                 // Lesotho
917         "lt",                 // Lithuania
918         "lu",                 // Luxembourg
919         "lv",                 // Latvia
920         "ly",                 // Libya
921         "ma",                 // Morocco
922         "mc",                 // Monaco
923         "md",                 // Moldova
924         "me",                 // Montenegro
925         "mg",                 // Madagascar
926         "mh",                 // Marshall Islands
927         "mk",                 // Republic of Macedonia
928         "ml",                 // Mali
929         "mm",                 // Myanmar
930         "mn",                 // Mongolia
931         "mo",                 // Macau
932         "mp",                 // Northern Mariana Islands
933         "mq",                 // Martinique
934         "mr",                 // Mauritania
935         "ms",                 // Montserrat
936         "mt",                 // Malta
937         "mu",                 // Mauritius
938         "mv",                 // Maldives
939         "mw",                 // Malawi
940         "mx",                 // Mexico
941         "my",                 // Malaysia
942         "mz",                 // Mozambique
943         "na",                 // Namibia
944         "nc",                 // New Caledonia
945         "ne",                 // Niger
946         "nf",                 // Norfolk Island
947         "ng",                 // Nigeria
948         "ni",                 // Nicaragua
949         "nl",                 // Netherlands
950         "no",                 // Norway
951         "np",                 // Nepal
952         "nr",                 // Nauru
953         "nu",                 // Niue
954         "nz",                 // New Zealand
955         "om",                 // Oman
956         "pa",                 // Panama
957         "pe",                 // Peru
958         "pf",                 // French Polynesia With Clipperton Island
959         "pg",                 // Papua New Guinea
960         "ph",                 // Philippines
961         "pk",                 // Pakistan
962         "pl",                 // Poland
963         "pm",                 // Saint-Pierre and Miquelon
964         "pn",                 // Pitcairn Islands
965         "pr",                 // Puerto Rico
966         "ps",                 // Palestinian territories (PA-controlled West Bank and Gaza Strip)
967         "pt",                 // Portugal
968         "pw",                 // Palau
969         "py",                 // Paraguay
970         "qa",                 // Qatar
971         "re",                 // Réunion
972         "ro",                 // Romania
973         "rs",                 // Serbia
974         "ru",                 // Russia
975         "rw",                 // Rwanda
976         "sa",                 // Saudi Arabia
977         "sb",                 // Solomon Islands
978         "sc",                 // Seychelles
979         "sd",                 // Sudan
980         "se",                 // Sweden
981         "sg",                 // Singapore
982         "sh",                 // Saint Helena
983         "si",                 // Slovenia
984         "sj",                 // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
985         "sk",                 // Slovakia
986         "sl",                 // Sierra Leone
987         "sm",                 // San Marino
988         "sn",                 // Senegal
989         "so",                 // Somalia
990         "sr",                 // Suriname
991         "st",                 // São Tomé and Príncipe
992         "su",                 // Soviet Union (deprecated)
993         "sv",                 // El Salvador
994         "sx",                 // Sint Maarten
995         "sy",                 // Syria
996         "sz",                 // Swaziland
997         "tc",                 // Turks and Caicos Islands
998         "td",                 // Chad
999         "tf",                 // French Southern and Antarctic Lands
1000         "tg",                 // Togo
1001         "th",                 // Thailand
1002         "tj",                 // Tajikistan
1003         "tk",                 // Tokelau
1004         "tl",                 // East Timor (deprecated old code)
1005         "tm",                 // Turkmenistan
1006         "tn",                 // Tunisia
1007         "to",                 // Tonga
1008         "tp",                 // East Timor
1009         "tr",                 // Turkey
1010         "tt",                 // Trinidad and Tobago
1011         "tv",                 // Tuvalu
1012         "tw",                 // Taiwan, Republic of China
1013         "tz",                 // Tanzania
1014         "ua",                 // Ukraine
1015         "ug",                 // Uganda
1016         "uk",                 // United Kingdom
1017         "us",                 // United States of America
1018         "uy",                 // Uruguay
1019         "uz",                 // Uzbekistan
1020         "va",                 // Vatican City State
1021         "vc",                 // Saint Vincent and the Grenadines
1022         "ve",                 // Venezuela
1023         "vg",                 // British Virgin Islands
1024         "vi",                 // U.S. Virgin Islands
1025         "vn",                 // Vietnam
1026         "vu",                 // Vanuatu
1027         "wf",                 // Wallis and Futuna
1028         "ws",                 // Samoa (formerly Western Samoa)
1029         "xn--3e0b707e", // 한국 KISA (Korea Internet &amp; Security Agency)
1030         "xn--45brj9c", // ভারত National Internet Exchange of India
1031         "xn--80ao21a", // қаз Association of IT Companies of Kazakhstan
1032         "xn--90a3ac", // срб Serbian National Internet Domain Registry (RNIDS)
1033         "xn--clchc0ea0b2g2a9gcd", // சிங்கப்பூர் Singapore Network Information Centre (SGNIC) Pte Ltd
1034         "xn--d1alf", // мкд Macedonian Academic Research Network Skopje
1035         "xn--fiqs8s", // 中国 China Internet Network Information Center
1036         "xn--fiqz9s", // 中國 China Internet Network Information Center
1037         "xn--fpcrj9c3d", // భారత్ National Internet Exchange of India
1038         "xn--fzc2c9e2c", // ලංකා LK Domain Registry
1039         "xn--gecrj9c", // ભારત National Internet Exchange of India
1040         "xn--h2brj9c", // भारत National Internet Exchange of India
1041         "xn--j1amh", // укр Ukrainian Network Information Centre (UANIC), Inc.
1042         "xn--j6w193g", // 香港 Hong Kong Internet Registration Corporation Ltd.
1043         "xn--kprw13d", // 台湾 Taiwan Network Information Center (TWNIC)
1044         "xn--kpry57d", // 台灣 Taiwan Network Information Center (TWNIC)
1045         "xn--l1acc", // мон Datacom Co.,Ltd
1046         "xn--lgbbat1ad8j", // الجزائر CERIST
1047         "xn--mgb9awbf", // عمان Telecommunications Regulatory Authority (TRA)
1048         "xn--mgba3a4f16a", // ایران Institute for Research in Fundamental Sciences (IPM)
1049         "xn--mgbaam7a8h", // امارات Telecommunications Regulatory Authority (TRA)
1050         "xn--mgbayh7gpa", // الاردن National Information Technology Center (NITC)
1051         "xn--mgbbh1a71e", // بھارت National Internet Exchange of India
1052         "xn--mgbc0a9azcg", // المغرب Agence Nationale de Réglementation des Télécommunications (ANRT)
1053         "xn--mgberp4a5d4ar", // السعودية Communications and Information Technology Commission
1054         "xn--mgbx4cd0ab", // مليسيا MYNIC Berhad
1055         "xn--node", // გე Information Technologies Development Center (ITDC)
1056         "xn--o3cw4h", // ไทย Thai Network Information Center Foundation
1057         "xn--ogbpf8fl", // سورية National Agency for Network Services (NANS)
1058         "xn--p1ai", // рф Coordination Center for TLD RU
1059         "xn--pgbs0dh", // تونس Agence Tunisienne d&#39;Internet
1060         "xn--s9brj9c", // ਭਾਰਤ National Internet Exchange of India
1061         "xn--wgbh1c", // مصر National Telecommunication Regulatory Authority - NTRA
1062         "xn--wgbl6a", // قطر Communications Regulatory Authority
1063         "xn--xkc2al3hye2a", // இலங்கை LK Domain Registry
1064         "xn--xkc2dl3a5ee0h", // இந்தியா National Internet Exchange of India
1065         "xn--yfro4i67o", // 新加坡 Singapore Network Information Centre (SGNIC) Pte Ltd
1066         "xn--ygbi2ammx", // فلسطين Ministry of Telecom &amp; Information Technology (MTIT)
1067         "ye",                 // Yemen
1068         "yt",                 // Mayotte
1069         "za",                 // South Africa
1070         "zm",                 // Zambia
1071         "zw",                 // Zimbabwe
1072     };
1073 
1074     // WARNING: this array MUST be sorted, others it cannot be searched reliably using binary search
1075     private static final String[] LOCAL_TLDS = new String[] {
1076        "localdomain",         // Also widely used as localhost.localdomain
1077        "localhost",           // RFC2606 defined
1078     };
1079 
1080     /**
1081      * Converts potentially Unicode input to punycode.
1082      * If conversion fails, returns the original input.
1083      * 
1084      * @param input the string to convert, not null
1085      * @return converted input, or original input if conversion fails
1086      */
1087     // Needed by UrlValidator
1088     static String unicodeToASCII(String input) {
1089         try {
1090             return /* java.net.IDN. */ toASCII(input);
1091         } catch (IllegalArgumentException e) { // input is not valid
1092             return input;
1093         }
1094     }
1095 
1096     // ================= Code needed for Java 1.4 and 1.5 compatibility ===============
1097 
1098     private static class IDNHolder {
1099         private static Method getMethod() {
1100             try {
1101                 Class clazz = Class.forName("java.net.IDN", false, DomainValidator.class.getClassLoader());
1102                 return clazz.getDeclaredMethod("toASCII", new Class[]{String.class});
1103             } catch (Exception e) {
1104               return null;
1105             }
1106         }
1107         private static final Method JAVA_NET_IDN_TO_ASCII = getMethod();
1108     }
1109 
1110     /*
1111      * Helper method to invoke java.net.IDN.toAscii(String).
1112      * Allows code to be compiled with Java 1.4 and 1.5 
1113      * @throws IllegalArgumentException if the input string doesn't conform to RFC 3490 specification
1114      */
1115     private static final String toASCII(String line) throws IllegalArgumentException {
1116 //        java.net.IDN.toASCII(line); // Java 1.6+
1117         // implementation for Java 1.4 and 1.5
1118         // effectively this is done by IDN.toASCII but we want to skip the entire call
1119         if (isOnlyASCII(line)) {
1120             return line;
1121         }
1122         Method m = IDNHolder.JAVA_NET_IDN_TO_ASCII;
1123         if (m == null) { // avoid NPE
1124             return line;
1125         }
1126         try {
1127             return (String) m.invoke(null, new String[]{line.toLowerCase(Locale.ENGLISH)});
1128         } catch (IllegalAccessException e) {
1129             throw new RuntimeException(e); // Should not happen
1130         } catch (InvocationTargetException e) {
1131             Throwable t = e.getCause();
1132             if (t instanceof IllegalArgumentException) { // this is expected from toASCII method
1133                 throw (IllegalArgumentException) t;
1134             }
1135             throw new RuntimeException(e); // Should not happen
1136         }
1137     }
1138 
1139     /*
1140      * Check if input contains only ASCII
1141      * Treats null as all ASCII
1142      */
1143     private static boolean isOnlyASCII(String input) {
1144         if (input == null) {
1145             return true;
1146         }
1147         for(int i=0; i < input.length(); i++) {
1148             if (input.charAt(i) > 0x7F) {
1149                 return false;
1150             }
1151         }
1152         return true;
1153     }
1154 
1155 }