1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.validator.routines;
18
19 import java.io.Serializable;
20 import java.util.Arrays;
21 import java.util.List;
22
23 /**
24 * <p><b>Domain name</b> validation routines.</p>
25 *
26 * <p>
27 * This validator provides methods for validating Internet domain names
28 * and top-level domains.
29 * </p>
30 *
31 * <p>Domain names are evaluated according
32 * to the standards <a href="http://www.ietf.org/rfc/rfc1034.txt">RFC1034</a>,
33 * section 3, and <a href="http://www.ietf.org/rfc/rfc1123.txt">RFC1123</a>,
34 * section 2.1. No accomodation is provided for the specialized needs of
35 * other applications; if the domain name has been URL-encoded, for example,
36 * validation will fail even though the equivalent plaintext version of the
37 * same name would have passed.
38 * </p>
39 *
40 * <p>
41 * Validation is also provided for top-level domains (TLDs) as defined and
42 * maintained by the Internet Assigned Numbers Authority (IANA):
43 * </p>
44 *
45 * <ul>
46 * <li>{@link #isValidInfrastructureTld} - validates infrastructure TLDs
47 * (<code>.arpa</code>, etc.)</li>
48 * <li>{@link #isValidGenericTld} - validates generic TLDs
49 * (<code>.com, .org</code>, etc.)</li>
50 * <li>{@link #isValidCountryCodeTld} - validates country code TLDs
51 * (<code>.us, .uk, .cn</code>, etc.)</li>
52 * </ul>
53 *
54 * <p>
55 * (<b>NOTE</b>: This class does not provide IP address lookup for domain names or
56 * methods to ensure that a given domain name matches a specific IP; see
57 * {@link java.net.InetAddress} for that functionality.)
58 * </p>
59 *
60 * @version $Revision: 1227719 $ $Date: 2012-01-05 12:45:51 -0500 (Thu, 05 Jan 2012) $
61 * @since Validator 1.4
62 */
63 public class DomainValidator implements Serializable {
64
65 private static final long serialVersionUID = -4407125112880174009L;
66
67 // Regular expression strings for hostnames (derived from RFC2396 and RFC 1123)
68 private static final String DOMAIN_LABEL_REGEX = "\\p{Alnum}(?>[\\p{Alnum}-]*\\p{Alnum})*";
69 private static final String TOP_LABEL_REGEX = "\\p{Alpha}{2,}";
70 private static final String DOMAIN_NAME_REGEX =
71 "^(?:" + DOMAIN_LABEL_REGEX + "\\.)+" + "(" + TOP_LABEL_REGEX + ")$";
72
73 private final boolean allowLocal;
74
75 /**
76 * Singleton instance of this validator, which
77 * doesn't consider local addresses as valid.
78 */
79 private static final DomainValidator DOMAIN_VALIDATOR = new DomainValidator(false);
80
81 /**
82 * Singleton instance of this validator, which does
83 * consider local addresses valid.
84 */
85 private static final DomainValidator DOMAIN_VALIDATOR_WITH_LOCAL = new DomainValidator(true);
86
87 /**
88 * RegexValidator for matching domains.
89 */
90 private final RegexValidator domainRegex =
91 new RegexValidator(DOMAIN_NAME_REGEX);
92 /**
93 * RegexValidator for matching the a local hostname
94 */
95 private final RegexValidator hostnameRegex =
96 new RegexValidator(DOMAIN_LABEL_REGEX);
97
98 /**
99 * Returns the singleton instance of this validator. It
100 * will not consider local addresses as valid.
101 * @return the singleton instance of this validator
102 */
103 public static DomainValidator getInstance() {
104 return DOMAIN_VALIDATOR;
105 }
106
107 /**
108 * Returns the singleton instance of this validator,
109 * with local validation as required.
110 * @param allowLocal Should local addresses be considered valid?
111 * @return the singleton instance of this validator
112 */
113 public static DomainValidator getInstance(boolean allowLocal) {
114 if(allowLocal) {
115 return DOMAIN_VALIDATOR_WITH_LOCAL;
116 }
117 return DOMAIN_VALIDATOR;
118 }
119
120 /** Private constructor. */
121 private DomainValidator(boolean allowLocal) {
122 this.allowLocal = allowLocal;
123 }
124
125 /**
126 * Returns true if the specified <code>String</code> parses
127 * as a valid domain name with a recognized top-level domain.
128 * The parsing is case-sensitive.
129 * @param domain the parameter to check for domain name syntax
130 * @return true if the parameter is a valid domain name
131 */
132 public boolean isValid(String domain) {
133 String[] groups = domainRegex.match(domain);
134 if (groups != null && groups.length > 0) {
135 return isValidTld(groups[0]);
136 } else if(allowLocal) {
137 if (hostnameRegex.isValid(domain)) {
138 return true;
139 }
140 }
141 return false;
142 }
143
144 /**
145 * Returns true if the specified <code>String</code> matches any
146 * IANA-defined top-level domain. Leading dots are ignored if present.
147 * The search is case-sensitive.
148 * @param tld the parameter to check for TLD status
149 * @return true if the parameter is a TLD
150 */
151 public boolean isValidTld(String tld) {
152 if(allowLocal && isValidLocalTld(tld)) {
153 return true;
154 }
155 return isValidInfrastructureTld(tld)
156 || isValidGenericTld(tld)
157 || isValidCountryCodeTld(tld);
158 }
159
160 /**
161 * Returns true if the specified <code>String</code> matches any
162 * IANA-defined infrastructure top-level domain. Leading dots are
163 * ignored if present. The search is case-sensitive.
164 * @param iTld the parameter to check for infrastructure TLD status
165 * @return true if the parameter is an infrastructure TLD
166 */
167 public boolean isValidInfrastructureTld(String iTld) {
168 return INFRASTRUCTURE_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase()));
169 }
170
171 /**
172 * Returns true if the specified <code>String</code> matches any
173 * IANA-defined generic top-level domain. Leading dots are ignored
174 * if present. The search is case-sensitive.
175 * @param gTld the parameter to check for generic TLD status
176 * @return true if the parameter is a generic TLD
177 */
178 public boolean isValidGenericTld(String gTld) {
179 return GENERIC_TLD_LIST.contains(chompLeadingDot(gTld.toLowerCase()));
180 }
181
182 /**
183 * Returns true if the specified <code>String</code> matches any
184 * IANA-defined country code top-level domain. Leading dots are
185 * ignored if present. The search is case-sensitive.
186 * @param ccTld the parameter to check for country code TLD status
187 * @return true if the parameter is a country code TLD
188 */
189 public boolean isValidCountryCodeTld(String ccTld) {
190 return COUNTRY_CODE_TLD_LIST.contains(chompLeadingDot(ccTld.toLowerCase()));
191 }
192
193 /**
194 * Returns true if the specified <code>String</code> matches any
195 * widely used "local" domains (localhost or localdomain). Leading dots are
196 * ignored if present. The search is case-sensitive.
197 * @param iTld the parameter to check for local TLD status
198 * @return true if the parameter is an local TLD
199 */
200 public boolean isValidLocalTld(String iTld) {
201 return LOCAL_TLD_LIST.contains(chompLeadingDot(iTld.toLowerCase()));
202 }
203
204 private String chompLeadingDot(String str) {
205 if (str.startsWith(".")) {
206 return str.substring(1);
207 } else {
208 return str;
209 }
210 }
211
212 // ---------------------------------------------
213 // ----- TLDs defined by IANA
214 // ----- Authoritative and comprehensive list at:
215 // ----- http://data.iana.org/TLD/tlds-alpha-by-domain.txt
216
217 private static final String[] INFRASTRUCTURE_TLDS = new String[] {
218 "arpa", // internet infrastructure
219 "root" // diagnostic marker for non-truncated root zone
220 };
221
222 private static final String[] GENERIC_TLDS = new String[] {
223 "aero", // air transport industry
224 "asia", // Pan-Asia/Asia Pacific
225 "biz", // businesses
226 "cat", // Catalan linguistic/cultural community
227 "com", // commercial enterprises
228 "coop", // cooperative associations
229 "info", // informational sites
230 "jobs", // Human Resource managers
231 "mobi", // mobile products and services
232 "museum", // museums, surprisingly enough
233 "name", // individuals' sites
234 "net", // internet support infrastructure/business
235 "org", // noncommercial organizations
236 "pro", // credentialed professionals and entities
237 "tel", // contact data for businesses and individuals
238 "travel", // entities in the travel industry
239 "gov", // United States Government
240 "edu", // accredited postsecondary US education entities
241 "mil", // United States Military
242 "int" // organizations established by international treaty
243 };
244
245 private static final String[] COUNTRY_CODE_TLDS = new String[] {
246 "ac", // Ascension Island
247 "ad", // Andorra
248 "ae", // United Arab Emirates
249 "af", // Afghanistan
250 "ag", // Antigua and Barbuda
251 "ai", // Anguilla
252 "al", // Albania
253 "am", // Armenia
254 "an", // Netherlands Antilles
255 "ao", // Angola
256 "aq", // Antarctica
257 "ar", // Argentina
258 "as", // American Samoa
259 "at", // Austria
260 "au", // Australia (includes Ashmore and Cartier Islands and Coral Sea Islands)
261 "aw", // Aruba
262 "ax", // Åland
263 "az", // Azerbaijan
264 "ba", // Bosnia and Herzegovina
265 "bb", // Barbados
266 "bd", // Bangladesh
267 "be", // Belgium
268 "bf", // Burkina Faso
269 "bg", // Bulgaria
270 "bh", // Bahrain
271 "bi", // Burundi
272 "bj", // Benin
273 "bm", // Bermuda
274 "bn", // Brunei Darussalam
275 "bo", // Bolivia
276 "br", // Brazil
277 "bs", // Bahamas
278 "bt", // Bhutan
279 "bv", // Bouvet Island
280 "bw", // Botswana
281 "by", // Belarus
282 "bz", // Belize
283 "ca", // Canada
284 "cc", // Cocos (Keeling) Islands
285 "cd", // Democratic Republic of the Congo (formerly Zaire)
286 "cf", // Central African Republic
287 "cg", // Republic of the Congo
288 "ch", // Switzerland
289 "ci", // Côte d'Ivoire
290 "ck", // Cook Islands
291 "cl", // Chile
292 "cm", // Cameroon
293 "cn", // China, mainland
294 "co", // Colombia
295 "cr", // Costa Rica
296 "cu", // Cuba
297 "cv", // Cape Verde
298 "cx", // Christmas Island
299 "cy", // Cyprus
300 "cz", // Czech Republic
301 "de", // Germany
302 "dj", // Djibouti
303 "dk", // Denmark
304 "dm", // Dominica
305 "do", // Dominican Republic
306 "dz", // Algeria
307 "ec", // Ecuador
308 "ee", // Estonia
309 "eg", // Egypt
310 "er", // Eritrea
311 "es", // Spain
312 "et", // Ethiopia
313 "eu", // European Union
314 "fi", // Finland
315 "fj", // Fiji
316 "fk", // Falkland Islands
317 "fm", // Federated States of Micronesia
318 "fo", // Faroe Islands
319 "fr", // France
320 "ga", // Gabon
321 "gb", // Great Britain (United Kingdom)
322 "gd", // Grenada
323 "ge", // Georgia
324 "gf", // French Guiana
325 "gg", // Guernsey
326 "gh", // Ghana
327 "gi", // Gibraltar
328 "gl", // Greenland
329 "gm", // The Gambia
330 "gn", // Guinea
331 "gp", // Guadeloupe
332 "gq", // Equatorial Guinea
333 "gr", // Greece
334 "gs", // South Georgia and the South Sandwich Islands
335 "gt", // Guatemala
336 "gu", // Guam
337 "gw", // Guinea-Bissau
338 "gy", // Guyana
339 "hk", // Hong Kong
340 "hm", // Heard Island and McDonald Islands
341 "hn", // Honduras
342 "hr", // Croatia (Hrvatska)
343 "ht", // Haiti
344 "hu", // Hungary
345 "id", // Indonesia
346 "ie", // Ireland (Éire)
347 "il", // Israel
348 "im", // Isle of Man
349 "in", // India
350 "io", // British Indian Ocean Territory
351 "iq", // Iraq
352 "ir", // Iran
353 "is", // Iceland
354 "it", // Italy
355 "je", // Jersey
356 "jm", // Jamaica
357 "jo", // Jordan
358 "jp", // Japan
359 "ke", // Kenya
360 "kg", // Kyrgyzstan
361 "kh", // Cambodia (Khmer)
362 "ki", // Kiribati
363 "km", // Comoros
364 "kn", // Saint Kitts and Nevis
365 "kp", // North Korea
366 "kr", // South Korea
367 "kw", // Kuwait
368 "ky", // Cayman Islands
369 "kz", // Kazakhstan
370 "la", // Laos (currently being marketed as the official domain for Los Angeles)
371 "lb", // Lebanon
372 "lc", // Saint Lucia
373 "li", // Liechtenstein
374 "lk", // Sri Lanka
375 "lr", // Liberia
376 "ls", // Lesotho
377 "lt", // Lithuania
378 "lu", // Luxembourg
379 "lv", // Latvia
380 "ly", // Libya
381 "ma", // Morocco
382 "mc", // Monaco
383 "md", // Moldova
384 "me", // Montenegro
385 "mg", // Madagascar
386 "mh", // Marshall Islands
387 "mk", // Republic of Macedonia
388 "ml", // Mali
389 "mm", // Myanmar
390 "mn", // Mongolia
391 "mo", // Macau
392 "mp", // Northern Mariana Islands
393 "mq", // Martinique
394 "mr", // Mauritania
395 "ms", // Montserrat
396 "mt", // Malta
397 "mu", // Mauritius
398 "mv", // Maldives
399 "mw", // Malawi
400 "mx", // Mexico
401 "my", // Malaysia
402 "mz", // Mozambique
403 "na", // Namibia
404 "nc", // New Caledonia
405 "ne", // Niger
406 "nf", // Norfolk Island
407 "ng", // Nigeria
408 "ni", // Nicaragua
409 "nl", // Netherlands
410 "no", // Norway
411 "np", // Nepal
412 "nr", // Nauru
413 "nu", // Niue
414 "nz", // New Zealand
415 "om", // Oman
416 "pa", // Panama
417 "pe", // Peru
418 "pf", // French Polynesia With Clipperton Island
419 "pg", // Papua New Guinea
420 "ph", // Philippines
421 "pk", // Pakistan
422 "pl", // Poland
423 "pm", // Saint-Pierre and Miquelon
424 "pn", // Pitcairn Islands
425 "pr", // Puerto Rico
426 "ps", // Palestinian territories (PA-controlled West Bank and Gaza Strip)
427 "pt", // Portugal
428 "pw", // Palau
429 "py", // Paraguay
430 "qa", // Qatar
431 "re", // Réunion
432 "ro", // Romania
433 "rs", // Serbia
434 "ru", // Russia
435 "rw", // Rwanda
436 "sa", // Saudi Arabia
437 "sb", // Solomon Islands
438 "sc", // Seychelles
439 "sd", // Sudan
440 "se", // Sweden
441 "sg", // Singapore
442 "sh", // Saint Helena
443 "si", // Slovenia
444 "sj", // Svalbard and Jan Mayen Islands Not in use (Norwegian dependencies; see .no)
445 "sk", // Slovakia
446 "sl", // Sierra Leone
447 "sm", // San Marino
448 "sn", // Senegal
449 "so", // Somalia
450 "sr", // Suriname
451 "st", // São Tomé and Príncipe
452 "su", // Soviet Union (deprecated)
453 "sv", // El Salvador
454 "sy", // Syria
455 "sz", // Swaziland
456 "tc", // Turks and Caicos Islands
457 "td", // Chad
458 "tf", // French Southern and Antarctic Lands
459 "tg", // Togo
460 "th", // Thailand
461 "tj", // Tajikistan
462 "tk", // Tokelau
463 "tl", // East Timor (deprecated old code)
464 "tm", // Turkmenistan
465 "tn", // Tunisia
466 "to", // Tonga
467 "tp", // East Timor
468 "tr", // Turkey
469 "tt", // Trinidad and Tobago
470 "tv", // Tuvalu
471 "tw", // Taiwan, Republic of China
472 "tz", // Tanzania
473 "ua", // Ukraine
474 "ug", // Uganda
475 "uk", // United Kingdom
476 "um", // United States Minor Outlying Islands
477 "us", // United States of America
478 "uy", // Uruguay
479 "uz", // Uzbekistan
480 "va", // Vatican City State
481 "vc", // Saint Vincent and the Grenadines
482 "ve", // Venezuela
483 "vg", // British Virgin Islands
484 "vi", // U.S. Virgin Islands
485 "vn", // Vietnam
486 "vu", // Vanuatu
487 "wf", // Wallis and Futuna
488 "ws", // Samoa (formerly Western Samoa)
489 "ye", // Yemen
490 "yt", // Mayotte
491 "yu", // Serbia and Montenegro (originally Yugoslavia)
492 "za", // South Africa
493 "zm", // Zambia
494 "zw", // Zimbabwe
495 };
496
497 private static final String[] LOCAL_TLDS = new String[] {
498 "localhost", // RFC2606 defined
499 "localdomain" // Also widely used as localhost.localdomain
500 };
501
502 private static final List INFRASTRUCTURE_TLD_LIST = Arrays.asList(INFRASTRUCTURE_TLDS);
503 private static final List GENERIC_TLD_LIST = Arrays.asList(GENERIC_TLDS);
504 private static final List COUNTRY_CODE_TLD_LIST = Arrays.asList(COUNTRY_CODE_TLDS);
505 private static final List LOCAL_TLD_LIST = Arrays.asList(LOCAL_TLDS);
506 }