blob: 5a74cef87097b3bb3795e0dd02f42bb222109e9a [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
package org.apache.commons.validator.routines;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.validator.routines.DomainValidator.ArrayType;
import junit.framework.TestCase;
* Tests for the DomainValidator.
* @version $Revision$
public class DomainValidatorTest extends TestCase {
private DomainValidator validator;
public void setUp() {
validator = DomainValidator.getInstance();
public void testValidDomains() {
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue("two-letter domain label should validate", validator.isValid(""));
assertTrue("case-insensitive ApAchE.Org should validate", validator.isValid("ApAchE.Org"));
assertTrue("single-character domain label should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
public void testInvalidDomains() {
assertFalse("bare TLD .org shouldn't validate", validator.isValid(".org"));
assertFalse("domain name with spaces shouldn't validate", validator.isValid(" "));
assertFalse("domain name containing spaces shouldn't validate", validator.isValid("apa"));
assertFalse("domain name starting with dash shouldn't validate", validator.isValid(""));
assertFalse("domain name ending with dash shouldn't validate", validator.isValid(""));
assertFalse("domain name starting with multiple dashes shouldn't validate", validator.isValid(""));
assertFalse("domain name ending with multiple dashes shouldn't validate", validator.isValid(""));
assertFalse("domain name with invalid TLD shouldn't validate", validator.isValid("apache.rog"));
assertFalse("URL shouldn't validate", validator.isValid(""));
assertFalse("Empty string shouldn't validate as domain name", validator.isValid(" "));
assertFalse("Null shouldn't validate as domain name", validator.isValid(null));
public void testTopLevelDomains() {
// infrastructure TLDs
assertTrue(".arpa should validate as iTLD", validator.isValidInfrastructureTld(".arpa"));
assertFalse(".com shouldn't validate as iTLD", validator.isValidInfrastructureTld(".com"));
// generic TLDs
assertTrue(".name should validate as gTLD", validator.isValidGenericTld(".name"));
assertFalse(".us shouldn't validate as gTLD", validator.isValidGenericTld(".us"));
// country code TLDs
assertTrue(".uk should validate as ccTLD", validator.isValidCountryCodeTld(".uk"));
assertFalse(".org shouldn't validate as ccTLD", validator.isValidCountryCodeTld(".org"));
// case-insensitive
assertTrue(".COM should validate as TLD", validator.isValidTld(".COM"));
assertTrue(".BiZ should validate as TLD", validator.isValidTld(".BiZ"));
// corner cases
assertFalse("invalid TLD shouldn't validate", validator.isValid(".nope")); // TODO this is not guaranteed invalid forever
assertFalse("empty string shouldn't validate as TLD", validator.isValid(""));
assertFalse("null shouldn't validate as TLD", validator.isValid(null));
public void testAllowLocal() {
DomainValidator noLocal = DomainValidator.getInstance(false);
DomainValidator allowLocal = DomainValidator.getInstance(true);
// Default is false, and should use singletons
assertEquals(noLocal, validator);
// Default won't allow local
assertFalse("localhost.localdomain should validate", noLocal.isValid("localhost.localdomain"));
assertFalse("localhost should validate", noLocal.isValid("localhost"));
// But it may be requested
assertTrue("localhost.localdomain should validate", allowLocal.isValid("localhost.localdomain"));
assertTrue("localhost should validate", allowLocal.isValid("localhost"));
assertTrue("hostname should validate", allowLocal.isValid("hostname"));
assertTrue("machinename should validate", allowLocal.isValid("machinename"));
// Check the localhost one with a few others
assertTrue(" should validate", allowLocal.isValid(""));
assertFalse("domain name with spaces shouldn't validate", allowLocal.isValid(" "));
public void testIDN() {
assertTrue("b\ in IDN should validate", validator.isValid(""));
public void testIDNJava6OrLater() {
String version = System.getProperty("java.version");
if (version.compareTo("1.6") < 0) {
System.out.println("Cannot run Unicode IDN tests");
return; // Cannot run the test
} // xn--d1abbgf6aiiy.xn--p1ai http://президент.рф
assertTrue("b\ should validate", validator.isValid("www.b\"));
assertTrue("xn--d1abbgf6aiiy.xn--p1ai should validate", validator.isValid("xn--d1abbgf6aiiy.xn--p1ai"));
assertTrue("президент.рф should validate", validator.isValid("президент.рф"));
assertFalse("www.\ FFFD should fail", validator.isValid("www.\"));
// RFC2396: domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
public void testRFC2396domainlabel() { // use fixed valid TLD
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertTrue(" should validate", validator.isValid(""));
assertFalse(" should not validate", validator.isValid(""));
assertFalse(" should not validate", validator.isValid(""));
// RFC2396 toplabel = alpha | alpha *( alphanum | "-" ) alphanum
public void testRFC2396toplabel() {
// These tests use non-existent TLDs so currently need to use a package protected method
assertTrue("a.c (alpha) should validate", validator.isValidDomainSyntax("a.c"));
assertTrue(" (alpha alpha) should validate", validator.isValidDomainSyntax(""));
assertTrue("a.c9 (alpha alphanum) should validate", validator.isValidDomainSyntax("a.c9"));
assertTrue("a.c-9 (alpha - alphanum) should validate", validator.isValidDomainSyntax("a.c-9"));
assertTrue("a.c-z (alpha - alpha) should validate", validator.isValidDomainSyntax("a.c-z"));
assertFalse("a.9c (alphanum alpha) should fail", validator.isValidDomainSyntax("a.9c"));
assertFalse("a.c- (alpha -) should fail", validator.isValidDomainSyntax("a.c-"));
assertFalse("a.- (-) should fail", validator.isValidDomainSyntax("a.-"));
assertFalse("a.-9 (- alphanum) should fail", validator.isValidDomainSyntax("a.-9"));
public void testDomainNoDots() {// rfc1123
assertTrue("a (alpha) should validate", validator.isValidDomainSyntax("a"));
assertTrue("9 (alphanum) should validate", validator.isValidDomainSyntax("9"));
assertTrue("c-z (alpha - alpha) should validate", validator.isValidDomainSyntax("c-z"));
assertFalse("c- (alpha -) should fail", validator.isValidDomainSyntax("c-"));
assertFalse("-c (- alpha) should fail", validator.isValidDomainSyntax("-c"));
assertFalse("- (-) should fail", validator.isValidDomainSyntax("-"));
public void testValidator297() {
assertTrue("xn--d1abbgf6aiiy.xn--p1ai should validate", validator.isValid("xn--d1abbgf6aiiy.xn--p1ai")); // This uses a valid TLD
// labels are a max of 63 chars and domains 253
public void testValidator306() {
final String longString = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz0123456789A";
assertEquals(63, longString.length()); // 26 * 2 + 11
assertTrue("63 chars label should validate", validator.isValidDomainSyntax(longString+".com"));
assertFalse("64 chars label should fail", validator.isValidDomainSyntax(longString+""));
assertTrue("63 chars TLD should validate", validator.isValidDomainSyntax("test."+longString));
assertFalse("64 chars TLD should fail", validator.isValidDomainSyntax("test.x"+longString));
final String longDomain =
+ "." + longString
+ "." + longString
+ "." + longString.substring(0,61)
assertEquals(253, longDomain.length());
assertTrue("253 chars domain should validate", validator.isValidDomainSyntax(longDomain));
assertFalse("254 chars domain should fail", validator.isValidDomainSyntax(longDomain+"x"));
// Check that IDN.toASCII behaves as it should (when wrapped by DomainValidator.unicodeToASCII)
// Tests show that method incorrectly trims a trailing "." character
public void testUnicodeToASCII() {
String[] asciidots = {
".", // fails IDN.toASCII, but should pass wrapped version
"a.", // ditto
for(String s : asciidots) {
// RFC3490 3.1. 1)
// Whenever dots are used as label separators, the following
// characters MUST be recognized as dots: U+002E (full stop), U+3002
// (ideographic full stop), U+FF0E (fullwidth full stop), U+FF61
// (halfwidth ideographic full stop).
final String otherDots[][] = {
{"b\u3002", "b.",},
{"b\uFF0E", "b.",},
{"b\uFF61", "b.",},
{"\u3002", ".",},
{"\uFF0E", ".",},
{"\uFF61", ".",},
for(String s[] : otherDots) {
// Check if IDN.toASCII is broken or not
public void testIsIDNtoASCIIBroken() {
final String input = ".";
final boolean ok = input.equals(IDN.toASCII(input));
System.out.println("IDN.toASCII is " + (ok? "OK" : "BROKEN"));
String props[] = {
"java.version", // Java Runtime Environment version
"java.vendor", // Java Runtime Environment vendor
"java.vm.specification.version", // Java Virtual Machine specification version
"java.vm.specification.vendor", // Java Virtual Machine specification vendor
"", // Java Virtual Machine specification name
"java.vm.version", // Java Virtual Machine implementation version
"java.vm.vendor", // Java Virtual Machine implementation vendor
"", // Java Virtual Machine implementation name
"java.specification.version", // Java Runtime Environment specification version
"java.specification.vendor", // Java Runtime Environment specification vendor
"", // Java Runtime Environment specification name
"java.class.version", // Java class format version number
for(String t : props) {
System.out.println(t + "=" + System.getProperty(t));
assertTrue(true); // dummy assertion to satisfy lint
// Check array is sorted and is lower-case
public void test_INFRASTRUCTURE_TLDS_sortedAndLowerCase() throws Exception {
final boolean sorted = isSortedLowerCase("INFRASTRUCTURE_TLDS");
// Check array is sorted and is lower-case
public void test_COUNTRY_CODE_TLDS_sortedAndLowerCase() throws Exception {
final boolean sorted = isSortedLowerCase("COUNTRY_CODE_TLDS");
// Check array is sorted and is lower-case
public void test_GENERIC_TLDS_sortedAndLowerCase() throws Exception {
final boolean sorted = isSortedLowerCase("GENERIC_TLDS");
// Check array is sorted and is lower-case
public void test_LOCAL_TLDS_sortedAndLowerCase() throws Exception {
final boolean sorted = isSortedLowerCase("LOCAL_TLDS");
public void testEnumIsPublic() {
public void testGetArray() {
// Download and process local copy of
// Check if the internal TLD table is up to date
// Check if the internal TLD tables have any spurious entries
public static void main(String a[]) throws Exception {
// Check the arrays first as this affects later checks
// Doing this here makes it easier when updating the lists
boolean OK = true;
OK &= isSortedLowerCase(list);
if (!OK) {
System.out.println("Fix arrays before retrying; cannot continue");
Set<String> ianaTlds = new HashSet<>(); // keep for comparison with array contents
DomainValidator dv = DomainValidator.getInstance();
File txtFile = new File("target/tlds-alpha-by-domain.txt");
long timestamp = download(txtFile, "", 0L);
final File htmlFile = new File("target/tlds-alpha-by-domain.html");
// N.B. sometimes the html file may be updated a day or so after the txt file
// if the txt file contains entries not found in the html file, try again in a day or two
download(htmlFile,"", timestamp);
BufferedReader br = new BufferedReader(new FileReader(txtFile));
String line;
final String header;
line = br.readLine(); // header
if (line.startsWith("# Version ")) {
header = line.substring(2);
} else {
throw new IOException("File does not have expected Version header");
final boolean generateUnicodeTlds = false; // Change this to generate Unicode TLDs as well
// Parse html page to get entries
Map<String, String[]> htmlInfo = getHtmlInfo(htmlFile);
Map<String, String> missingTLD = new TreeMap<>(); // stores entry and comments as String[]
Map<String, String> missingCC = new TreeMap<>();
while((line = br.readLine()) != null) {
if (!line.startsWith("#")) {
final String unicodeTld; // only different from asciiTld if that was punycode
final String asciiTld = line.toLowerCase(Locale.ENGLISH);
if (line.startsWith("XN--")) {
unicodeTld = IDN.toUnicode(line);
} else {
unicodeTld = asciiTld;
if (!dv.isValidTld(asciiTld)) {
String [] info = htmlInfo.get(asciiTld);
if (info != null) {
String type = info[0];
String comment = info[1];
if ("country-code".equals(type)) { // Which list to use?
missingCC.put(asciiTld, unicodeTld + " " + comment);
if (generateUnicodeTlds) {
missingCC.put(unicodeTld, asciiTld + " " + comment);
} else {
missingTLD.put(asciiTld, unicodeTld + " " + comment);
if (generateUnicodeTlds) {
missingTLD.put(unicodeTld, asciiTld + " " + comment);
} else {
System.err.println("Expected to find HTML info for "+ asciiTld);
// Don't merge these conditions; generateUnicodeTlds is final so needs to be separate to avoid a warning
if (generateUnicodeTlds && !unicodeTld.equals(asciiTld)) {
// List html entries not in TLD text list
for(String key : (new TreeMap<>(htmlInfo)).keySet()) {
if (!ianaTlds.contains(key)) {
if (isNotInRootZone(key)) {
System.out.println("INFO: HTML entry not yet in root zone: "+key);
} else {
System.err.println("WARN: Expected to find text entry for html: "+key);
if (!missingTLD.isEmpty()) {
printMap(header, missingTLD, "TLD");
if (!missingCC.isEmpty()) {
printMap(header, missingCC, "CC");
// Check if internal tables contain any additional entries
isInIanaList("INFRASTRUCTURE_TLDS", ianaTlds);
isInIanaList("COUNTRY_CODE_TLDS", ianaTlds);
isInIanaList("GENERIC_TLDS", ianaTlds);
// Don't check local TLDS isInIanaList("LOCAL_TLDS", ianaTlds);
System.out.println("Finished checks");
private static void printMap(final String header, Map<String, String> map, String string) {
System.out.println("Entries missing from "+ string +" List\n");
if (header != null) {
System.out.println(" // Taken from " + header);
Iterator<Map.Entry<String, String>> it = map.entrySet().iterator();
Map.Entry<String, String> me =;
System.out.println(" \"" + me.getKey() + "\", // " + me.getValue());
private static Map<String, String[]> getHtmlInfo(final File f) throws IOException {
final Map<String, String[]> info = new HashMap<>();
// <td><span class="domain tld"><a href="/domains/root/db/ax.html">.ax</a></span></td>
final Pattern domain = Pattern.compile(".*<a href=\"/domains/root/db/([^.]+)\\.html");
// <td>country-code</td>
final Pattern type = Pattern.compile("\\s+<td>([^<]+)</td>");
// <!-- <td>Åland Islands<br/><span class="tld-table-so">Ålands landskapsregering</span></td> </td> -->
// <td>Ålands landskapsregering</td>
final Pattern comment = Pattern.compile("\\s+<td>([^<]+)</td>");
final BufferedReader br = new BufferedReader(new FileReader(f));
String line;
Matcher m = domain.matcher(line);
if (m.lookingAt()) {
String dom =;
String typ = "??";
String com = "??";
line = br.readLine();
while (line.matches("^\\s*$")) { // extra blank lines introduced
line = br.readLine();
Matcher t = type.matcher(line);
if (t.lookingAt()) {
typ =;
line = br.readLine();
if (line.matches("\\s+<!--.*")) {
line = br.readLine();
line = br.readLine();
// Should have comment; is it wrapped?
line += " " +br.readLine();
Matcher n = comment.matcher(line);
if (n.lookingAt()) {
com =;
// Don't save unused entries
if (com.contains("Not assigned") || com.contains("Retired") || typ.equals("test")) {
// System.out.println("Ignored: " + typ + " " + dom + " " +com);
} else {
info.put(dom.toLowerCase(Locale.ENGLISH), new String[]{typ, com});
// System.out.println("Storing: " + typ + " " + dom + " " +com);
} else {
System.err.println("Unexpected type: " + line);
return info;
* Download a file if it is more recent than our cached copy.
* Unfortunately the server does not seem to honor If-Modified-Since for the
* Html page, so we check if it is newer than the txt file and skip download if so
private static long download(File f, String tldurl, long timestamp) throws IOException {
final int HOUR = 60*60*1000; // an hour in ms
final long modTime;
// For testing purposes, don't download files more than once an hour
if (f.canRead()) {
modTime = f.lastModified();
if (modTime > System.currentTimeMillis()-HOUR) {
System.out.println("Skipping download - found recent " + f);
return modTime;
} else {
modTime = 0;
HttpURLConnection hc = (HttpURLConnection) new URL(tldurl).openConnection();
if (modTime > 0) {
SimpleDateFormat sdf = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");//Sun, 06 Nov 1994 08:49:37 GMT
String since = sdf.format(new Date(modTime));
hc.addRequestProperty("If-Modified-Since", since);
System.out.println("Found " + f + " with date " + since);
if (hc.getResponseCode() == 304) {
System.out.println("Already have most recent " + tldurl);
} else {
System.out.println("Downloading " + tldurl);
byte buff[] = new byte[1024];
InputStream is = hc.getInputStream();
FileOutputStream fos = new FileOutputStream(f);
int len;
while(( != -1) {
fos.write(buff, 0, len);
return f.lastModified();
* Check whether the domain is in the root zone currently.
* Reads the URL*domain*.html
* (using a local disk cache)
* and checks for the string "This domain is not present in the root zone at this time."
* @param domain the domain to check
* @return true if the string is found
private static boolean isNotInRootZone(String domain) {
String tldurl = "" + domain + ".html";
File rootCheck = new File("target","tld_" + domain + ".html");
BufferedReader in = null;
try {
download(rootCheck, tldurl, 0L);
in = new BufferedReader(new FileReader(rootCheck));
String inputLine;
while ((inputLine = in.readLine()) != null) {
if (inputLine.contains("This domain is not present in the root zone at this time.")) {
return true;
} catch (IOException e) {
} finally {
return false;
private static void closeQuietly(Closeable in) {
if (in != null) {
try {
} catch (IOException e) {
// isInIanaList and isSorted are split into two methods.
// If/when access to the arrays is possible without reflection, the intermediate
// methods can be dropped
private static boolean isInIanaList(String arrayName, Set<String> ianaTlds) throws Exception {
Field f = DomainValidator.class.getDeclaredField(arrayName);
final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
if (isPrivate) {
String[] array = (String[]) f.get(null);
try {
return isInIanaList(arrayName, array, ianaTlds);
} finally {
if (isPrivate) {
private static boolean isInIanaList(String name, String [] array, Set<String> ianaTlds) {
for (String element : array) {
if (!ianaTlds.contains(element)) {
System.out.println(name + " contains unexpected value: " + element);
return true;
private static boolean isSortedLowerCase(String arrayName) throws Exception {
Field f = DomainValidator.class.getDeclaredField(arrayName);
final boolean isPrivate = Modifier.isPrivate(f.getModifiers());
if (isPrivate) {
String[] array = (String[]) f.get(null);
try {
return isSortedLowerCase(arrayName, array);
} finally {
if (isPrivate) {
private static boolean isLowerCase(String string) {
return string.equals(string.toLowerCase(Locale.ENGLISH));
// Check if an array is strictly sorted - and lowerCase
private static boolean isSortedLowerCase(String name, String [] array) {
boolean sorted = true;
boolean strictlySorted = true;
final int length = array.length;
boolean lowerCase = isLowerCase(array[length-1]); // Check the last entry
for(int i = 0; i < length-1; i++) { // compare all but last entry with next
final String entry = array[i];
final String nextEntry = array[i+1];
final int cmp = entry.compareTo(nextEntry);
if (cmp > 0) { // out of order
System.out.println("Out of order entry: " + entry + " < " + nextEntry + " in " + name);
sorted = false;
} else if (cmp == 0) {
strictlySorted = false;
System.out.println("Duplicated entry: " + entry + " in " + name);
if (!isLowerCase(entry)) {
System.out.println("Non lowerCase entry: " + entry + " in " + name);
lowerCase = false;
return sorted && strictlySorted && lowerCase;