001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.validator.routines; 018 019import java.io.Serializable; 020import java.util.Arrays; 021import java.util.List; 022 023/** 024 * <p><b>Domain name</b> validation routines.</p> 025 * 026 * <p> 027 * This validator provides methods for validating Internet domain names 028 * and top-level domains. 029 * </p> 030 * 031 * <p>Domain names are evaluated according 032 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>, 033 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>, 034 * section 2.1. No accomodation is provided for the specialized needs of 035 * other applications; if the domain name has been URL-encoded, for example, 036 * validation will fail even though the equivalent plaintext version of the 037 * same name would have passed. 038 * </p> 039 * 040 * <p> 041 * Validation is also provided for top-level domains (TLDs) as defined and 042 * maintained by the Internet Assigned Numbers Authority (IANA): 043 * </p> 044 * 045 * <ul> 046 * <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs 047 * (<code>.arpa</code>, etc.)</li> 048 * <li>{@link #isValidGenericTld} - validates generic TLDs 049 * (<code>.com, .org</code>, etc.)</li> 050 * <li>{@link #isValidCountryCodeTld} - validates country code TLDs 051 * (<code>.us, .uk, .cn</code>, etc.)</li> 052 * </ul> 053 * 054 * <p> 055 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or 056 * methods to ensure that a given domain name matches a specific IP; see 057 * {@link java.net.InetAddress} for that functionality.) 058 * </p> 059 * 060 * @version $Revision: 1227719 $ $Date: 2012-01-05 12:45:51 -0500 (Thu, 05 Jan 2012) $ 061 * @since Validator 1.4 062 */ 063public class DomainValidator implements Serializable { 064 065 private static final long serialVersionUID = -4407125112880174009L; 066 067 // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123) 068 private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*"; 069 private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}"; 070 private static final String DOMAIN_NAME_REGEX = 071 "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$"; 072 073 private final boolean allowLocal; 074 075 /** 076 * Singleton instance of this validator, which 077 * doesn't consider local addresses as valid. 078 */ 079 private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false); 080 081 /** 082 * Singleton instance of this validator, which does 083 * consider local addresses valid. 084 */ 085 private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true); 086 087 /** 088 * RegexValidator for matching domains. 089 */ 090 private final RegexValidator domainRegex = 091 new RegexValidator(DOMAIN_NAME_REGEX); 092 /** 093 * RegexValidator for matching the a local hostname 094 */ 095 private final RegexValidator hostnameRegex = 096 new RegexValidator(DOMAIN_LABEL_REGEX); 097 098 /** 099 * Returns the singleton instance of this validator. It 100 * will not consider local addresses as valid. 101 * @return the singleton instance of this validator 102 */ 103 public static DomainValidator getInstance() { 104 return DOMAIN_VALIDATOR; 105 } 106 107 /** 108 * Returns the singleton instance of this validator, 109 * with local validation as required. 110 * @param allowLocal Should local addresses be considered valid? 111 * @return the singleton instance of this validator 112 */ 113 public static DomainValidator getInstance(boolean allowLocal) { 114 if(allowLocal) { 115 return DOMAIN_VALIDATOR_WITH_LOCAL; 116 } 117 return DOMAIN_VALIDATOR; 118 } 119 120 /** Private constructor. */ 121 private DomainValidator(boolean allowLocal) { 122 this.allowLocal = allowLocal; 123 } 124 125 /** 126 * Returns true if the specified <code>String</code> parses 127 * as a valid domain name with a recognized top-level domain. 128 * The parsing is case-sensitive. 129 * @param domain the parameter to check for domain name syntax 130 * @return true if the parameter is a valid domain name 131 */ 132 public boolean isValid(String domain) { 133 String[] groups = domainRegex.match(domain); 134 if (groups != null && groups.length > 0) { 135 return isValidTld(groups[0]); 136 } else if(allowLocal) { 137 if (hostnameRegex.isValid(domain)) { 138 return true; 139 } 140 } 141 return false; 142 } 143 144 /** 145 * Returns true if the specified <code>String</code> matches any 146 * IANA-defined top-level domain. Leading dots are ignored if present. 147 * The search is case-sensitive. 148 * @param tld the parameter to check for TLD status 149 * @return true if the parameter is a TLD 150 */ 151 public boolean isValidTld(String tld) { 152 if(allowLocal && isValidLocalTld(tld)) { 153 return true; 154 } 155 return isValidInfrastructureTld(tld) 156 || isValidGenericTld(tld) 157 || isValidCountryCodeTld(tld); 158 } 159 160 /** 161 * Returns true if the specified <code>String</code> matches any 162 * IANA-defined infrastructure top-level domain. Leading dots are 163 * ignored if present. The search is case-sensitive. 164 * @param iTld the parameter to check for infrastructure TLD status 165 * @return true if the parameter is an infrastructure TLD 166 */ 167 public boolean isValidInfrastructureTld(String iTld) { 168 return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); 169 } 170 171 /** 172 * Returns true if the specified <code>String</code> matches any 173 * IANA-defined generic top-level domain. Leading dots are ignored 174 * if present. The search is case-sensitive. 175 * @param gTld the parameter to check for generic TLD status 176 * @return true if the parameter is a generic TLD 177 */ 178 public boolean isValidGenericTld(String gTld) { 179 return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase())); 180 } 181 182 /** 183 * Returns true if the specified <code>String</code> matches any 184 * IANA-defined country code top-level domain. Leading dots are 185 * ignored if present. The search is case-sensitive. 186 * @param ccTld the parameter to check for country code TLD status 187 * @return true if the parameter is a country code TLD 188 */ 189 public boolean isValidCountryCodeTld(String ccTld) { 190 return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase())); 191 } 192 193 /** 194 * Returns true if the specified <code>String</code> matches any 195 * widely used "local" domains (localhost or localdomain). Leading dots are 196 * ignored if present. The search is case-sensitive. 197 * @param iTld the parameter to check for local TLD status 198 * @return true if the parameter is an local TLD 199 */ 200 public boolean isValidLocalTld(String iTld) { 201 return LOCAL_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase())); 202 } 203 204 private String chompLeadingDot(String str) { 205 if (str.startsWith(".")) { 206 return str.substring(1); 207 } else { 208 return str; 209 } 210 } 211 212 // --------------------------------------------- 213 // ----- TLDs defined by IANA 214 // ----- Authoritative and comprehensive list at: 215 // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt 216 217 private static final String[] INFRASTRUCTURE_TLDS = new String[] { 218 "arpa", // internet infrastructure 219 "root" // diagnostic marker for non-truncated root zone 220 }; 221 222 private static final String[] GENERIC_TLDS = new String[] { 223 "aero", // air transport industry 224 "asia", // Pan-Asia/Asia Pacific 225 "biz", // businesses 226 "cat", // Catalan linguistic/cultural community 227 "com", // commercial enterprises 228 "coop", // cooperative associations 229 "info", // informational sites 230 "jobs", // Human Resource managers 231 "mobi", // mobile products and services 232 "museum", // museums, surprisingly enough 233 "name", // individuals' sites 234 "net", // internet support infrastructure/business 235 "org", // noncommercial organizations 236 "pro", // credentialed professionals and entities 237 "tel", // contact data for businesses and individuals 238 "travel", // entities in the travel industry 239 "gov", // United States Government 240 "edu", // accredited postsecondary US education entities 241 "mil", // United States Military 242 "int" // organizations established by international treaty 243 }; 244 245 private static final String[] COUNTRY_CODE_TLDS = new String[] { 246 "ac", // Ascension Island 247 "ad", // Andorra 248 "ae", // United Arab Emirates 249 "af", // Afghanistan 250 "ag", // Antigua and Barbuda 251 "ai", // Anguilla 252 "al", // Albania 253 "am", // Armenia 254 "an", // Netherlands Antilles 255 "ao", // Angola 256 "aq", // Antarctica 257 "ar", // Argentina 258 "as", // American Samoa 259 "at", // Austria 260 "au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands) 261 "aw", // Aruba 262 "ax", // à land 263 "az", // Azerbaijan 264 "ba", // Bosnia and Herzegovina 265 "bb", // Barbados 266 "bd", // Bangladesh 267 "be", // Belgium 268 "bf", // Burkina Faso 269 "bg", // Bulgaria 270 "bh", // Bahrain 271 "bi", // Burundi 272 "bj", // Benin 273 "bm", // Bermuda 274 "bn", // Brunei Darussalam 275 "bo", // Bolivia 276 "br", // Brazil 277 "bs", // Bahamas 278 "bt", // Bhutan 279 "bv", // Bouvet Island 280 "bw", // Botswana 281 "by", // Belarus 282 "bz", // Belize 283 "ca", // Canada 284 "cc", // Cocos (Keeling) Islands 285 "cd", // Democratic Republic of the Congo (formerly Zaire) 286 "cf", // Central African Republic 287 "cg", // Republic of the Congo 288 "ch", // Switzerland 289 "ci", // Côte d'Ivoire 290 "ck", // Cook Islands 291 "cl", // Chile 292 "cm", // Cameroon 293 "cn", // China, mainland 294 "co", // Colombia 295 "cr", // Costa Rica 296 "cu", // Cuba 297 "cv", // Cape Verde 298 "cx", // Christmas Island 299 "cy", // Cyprus 300 "cz", // Czech Republic 301 "de", // Germany 302 "dj", // Djibouti 303 "dk", // Denmark 304 "dm", // Dominica 305 "do", // Dominican Republic 306 "dz", // Algeria 307 "ec", // Ecuador 308 "ee", // Estonia 309 "eg", // Egypt 310 "er", // Eritrea 311 "es", // Spain 312 "et", // Ethiopia 313 "eu", // European Union 314 "fi", // Finland 315 "fj", // Fiji 316 "fk", // Falkland Islands 317 "fm", // Federated States of Micronesia 318 "fo", // Faroe Islands 319 "fr", // France 320 "ga", // Gabon 321 "gb", // Great Britain (United Kingdom) 322 "gd", // Grenada 323 "ge", // Georgia 324 "gf", // French Guiana 325 "gg", // Guernsey 326 "gh", // Ghana 327 "gi", // Gibraltar 328 "gl", // Greenland 329 "gm", // The Gambia 330 "gn", // Guinea 331 "gp", // Guadeloupe 332 "gq", // Equatorial Guinea 333 "gr", // Greece 334 "gs", // South Georgia and the South Sandwich Islands 335 "gt", // Guatemala 336 "gu", // Guam 337 "gw", // Guinea-Bissau 338 "gy", // Guyana 339 "hk", // Hong Kong 340 "hm", // Heard Island and McDonald Islands 341 "hn", // Honduras 342 "hr", // Croatia (Hrvatska) 343 "ht", // Haiti 344 "hu", // Hungary 345 "id", // Indonesia 346 "ie", // Ireland (Ãire) 347 "il", // Israel 348 "im", // Isle of Man 349 "in", // India 350 "io", // British Indian Ocean Territory 351 "iq", // Iraq 352 "ir", // Iran 353 "is", // Iceland 354 "it", // Italy 355 "je", // Jersey 356 "jm", // Jamaica 357 "jo", // Jordan 358 "jp", // Japan 359 "ke", // Kenya 360 "kg", // Kyrgyzstan 361 "kh", // Cambodia (Khmer) 362 "ki", // Kiribati 363 "km", // Comoros 364 "kn", // Saint Kitts and Nevis 365 "kp", // North Korea 366 "kr", // South Korea 367 "kw", // Kuwait 368 "ky", // Cayman Islands 369 "kz", // Kazakhstan 370 "la", // Laos (currently being marketed as the official domain for Los Angeles) 371 "lb", // Lebanon 372 "lc", // Saint Lucia 373 "li", // Liechtenstein 374 "lk", // Sri Lanka 375 "lr", // Liberia 376 "ls", // Lesotho 377 "lt", // Lithuania 378 "lu", // Luxembourg 379 "lv", // Latvia 380 "ly", // Libya 381 "ma", // Morocco 382 "mc", // Monaco 383 "md", // Moldova 384 "me", // Montenegro 385 "mg", // Madagascar 386 "mh", // Marshall Islands 387 "mk", // Republic of Macedonia 388 "ml", // Mali 389 "mm", // Myanmar 390 "mn", // Mongolia 391 "mo", // Macau 392 "mp", // Northern Mariana Islands 393 "mq", // Martinique 394 "mr", // Mauritania 395 "ms", // Montserrat 396 "mt", // Malta 397 "mu", // Mauritius 398 "mv", // Maldives 399 "mw", // Malawi 400 "mx", // Mexico 401 "my", // Malaysia 402 "mz", // Mozambique 403 "na", // Namibia 404 "nc", // New Caledonia 405 "ne", // Niger 406 "nf", // Norfolk Island 407 "ng", // Nigeria 408 "ni", // Nicaragua 409 "nl", // Netherlands 410 "no", // Norway 411 "np", // Nepal 412 "nr", // Nauru 413 "nu", // Niue 414 "nz", // New Zealand 415 "om", // Oman 416 "pa", // Panama 417 "pe", // Peru 418 "pf", // French Polynesia With Clipperton Island 419 "pg", // Papua New Guinea 420 "ph", // Philippines 421 "pk", // Pakistan 422 "pl", // Poland 423 "pm", // Saint-Pierre and Miquelon 424 "pn", // Pitcairn Islands 425 "pr", // Puerto Rico 426 "ps", // Palestinian territories (PA-controlled West Bank and Gaza Strip) 427 "pt", // Portugal 428 "pw", // Palau 429 "py", // Paraguay 430 "qa", // Qatar 431 "re", // Réunion 432 "ro", // Romania 433 "rs", // Serbia 434 "ru", // Russia 435 "rw", // Rwanda 436 "sa", // Saudi Arabia 437 "sb", // Solomon Islands 438 "sc", // Seychelles 439 "sd", // Sudan 440 "se", // Sweden 441 "sg", // Singapore 442 "sh", // Saint Helena 443 "si", // Slovenia 444 "sj", // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no) 445 "sk", // Slovakia 446 "sl", // Sierra Leone 447 "sm", // San Marino 448 "sn", // Senegal 449 "so", // Somalia 450 "sr", // Suriname 451 "st", // São Tomé and PrÃncipe 452 "su", // Soviet Union (deprecated) 453 "sv", // El Salvador 454 "sy", // Syria 455 "sz", // Swaziland 456 "tc", // Turks and Caicos Islands 457 "td", // Chad 458 "tf", // French Southern and Antarctic Lands 459 "tg", // Togo 460 "th", // Thailand 461 "tj", // Tajikistan 462 "tk", // Tokelau 463 "tl", // East Timor (deprecated old code) 464 "tm", // Turkmenistan 465 "tn", // Tunisia 466 "to", // Tonga 467 "tp", // East Timor 468 "tr", // Turkey 469 "tt", // Trinidad and Tobago 470 "tv", // Tuvalu 471 "tw", // Taiwan, Republic of China 472 "tz", // Tanzania 473 "ua", // Ukraine 474 "ug", // Uganda 475 "uk", // United Kingdom 476 "um", // United States Minor Outlying Islands 477 "us", // United States of America 478 "uy", // Uruguay 479 "uz", // Uzbekistan 480 "va", // Vatican City State 481 "vc", // Saint Vincent and the Grenadines 482 "ve", // Venezuela 483 "vg", // British Virgin Islands 484 "vi", // U.S. Virgin Islands 485 "vn", // Vietnam 486 "vu", // Vanuatu 487 "wf", // Wallis and Futuna 488 "ws", // Samoa (formerly Western Samoa) 489 "ye", // Yemen 490 "yt", // Mayotte 491 "yu", // Serbia and Montenegro (originally Yugoslavia) 492 "za", // South Africa 493 "zm", // Zambia 494 "zw", // Zimbabwe 495 }; 496 497 private static final String[] LOCAL_TLDS = new String[] { 498 "localhost", // RFC2606 defined 499 "localdomain" // Also widely used as localhost.localdomain 500 }; 501 502 private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS); 503 private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS); 504 private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS); 505 private static final List LOCAL_TLD_LIST = Arrays.asList(LOCAL_TLDS); 506}