Update the host validation to permit host names and components of domain names (excluding top-level domains) to start with a number and to ensure that top-level domains are fully alphabetic.

git-svn-id: https://svn.apache.org/repos/asf/tomcat/tc8.0.x/trunk@1830255 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/java/org/apache/tomcat/util/http/parser/HttpParser.java b/java/org/apache/tomcat/util/http/parser/HttpParser.java
index d9fe9df..4ca107f 100644
--- a/java/org/apache/tomcat/util/http/parser/HttpParser.java
+++ b/java/org/apache/tomcat/util/http/parser/HttpParser.java
@@ -513,6 +513,9 @@
         int c;
         int pos = 0;
 
+        // readAheadLimit doesn't matter as all the readers passed to this
+        // method buffer the entire content.
+        reader.mark(1);
         do {
             c = reader.read();
             if (c == '.') {
@@ -520,9 +523,14 @@
                     // Valid
                     octetCount++;
                     octet = -1;
-                } else {
+                } else if (inIPv6 || octet == -1) {
                     throw new IllegalArgumentException(
                             sm.getString("http.invalidOctet", Integer.toString(octet)));
+                } else {
+                    // Might not be an IPv4 address. Could be a host / FQDN with
+                    // a fully numeric component.
+                    reader.reset();
+                    return readHostDomainName(reader);
                 }
             } else if (isNumeric(c)) {
                 if (octet == -1) {
@@ -546,6 +554,10 @@
                 } else {
                     throw new IllegalArgumentException(sm.getString("http.closingBracket"));
                 }
+            } else if (!inIPv6 && (isAlpha(c) || c == '-')) {
+                // Go back to the start and parse as a host / FQDN
+                reader.reset();
+                return readHostDomainName(reader);
             } else {
                 throw new IllegalArgumentException(sm.getString(
                         "http.illegalCharacterIpv4", Character.toString((char) c)));
@@ -554,8 +566,11 @@
         } while (true);
 
         if (octetCount != 4) {
-            throw new IllegalArgumentException(
-                    sm.getString("http.wrongOctetCount", Integer.toString(octetCount)));
+            // Might not be an IPv4 address. Could be a host name or a FQDN with
+            // fully numeric components. Go back to the start and parse as a
+            // host / FQDN.
+            reader.reset();
+            return readHostDomainName(reader);
         }
         if (octet < 0 || octet > 255) {
             throw new IllegalArgumentException(
@@ -671,9 +686,13 @@
     static int readHostDomainName(Reader reader) throws IOException {
         DomainParseState state = DomainParseState.NEW;
         int pos = 0;
+        int segmentIndex = 0;
 
         while (state.mayContinue()) {
-            state = state.next(reader.read());
+            state = state.next(reader.read(), segmentIndex);
+            if (DomainParseState.PERIOD == state) {
+                segmentIndex++;
+            }
             pos++;
         }
 
@@ -701,28 +720,32 @@
         }
     }
 
+    private enum AllowsEnd {
+        NEVER,
+        FIRST,
+        ALWAYS
+    }
 
     private enum DomainParseState {
-        NEW(     true, false, false, false, false, false, " at the start of"),
-        ALPHA(   true,  true,  true,  true,  true,  true, " after a letter in"),
-        NUMERIC( true,  true,  true,  true,  true,  true, " after a number in"),
-        PERIOD(  true, false, false, false,  true,  true, " after a period in"),
-        HYPHEN(  true,  true,  true, false, false, false, " after a hypen in"),
-        COLON(  false, false, false, false, false, false, " after a colon in"),
-        END(    false, false, false, false, false, false, " at the end of");
+        NEW(       true, false, false,  AllowsEnd.NEVER,  AllowsEnd.NEVER, " at the start of"),
+        ALL_ALPHA( true,  true,  true, AllowsEnd.ALWAYS, AllowsEnd.ALWAYS, " after a letter in"),
+        ALPHA(     true,  true,  true,  AllowsEnd.FIRST,  AllowsEnd.FIRST, " after a letter in"),
+        NUMERIC(   true,  true,  true,  AllowsEnd.FIRST,  AllowsEnd.FIRST, " after a number in"),
+        PERIOD(    true, false, false,  AllowsEnd.NEVER,  AllowsEnd.NEVER, " after a period in"),
+        HYPHEN(    true,  true, false,  AllowsEnd.NEVER,  AllowsEnd.NEVER, " after a hypen in"),
+        COLON(    false, false, false,  AllowsEnd.NEVER,  AllowsEnd.NEVER, " after a colon in"),
+        END(      false, false, false,  AllowsEnd.NEVER,  AllowsEnd.NEVER, " at the end of");
 
         private final boolean mayContinue;
-        private final boolean allowsNumeric;
         private final boolean allowsHyphen;
         private final boolean allowsPeriod;
-        private final boolean allowsColon;
-        private final boolean allowsEnd;
+        private final AllowsEnd allowsColon;
+        private final AllowsEnd allowsEnd;
         private final String errorLocation;
 
-        private DomainParseState(boolean mayContinue, boolean allowsNumeric, boolean allowsHyphen,
-                boolean allowsPeriod, boolean allowsColon, boolean allowsEnd, String errorLocation) {
+        private DomainParseState(boolean mayContinue, boolean allowsHyphen, boolean allowsPeriod,
+                AllowsEnd allowsColon, AllowsEnd allowsEnd, String errorLocation) {
             this.mayContinue = mayContinue;
-            this.allowsNumeric = allowsNumeric;
             this.allowsHyphen = allowsHyphen;
             this.allowsPeriod = allowsPeriod;
             this.allowsColon = allowsColon;
@@ -734,16 +757,15 @@
             return mayContinue;
         }
 
-        public DomainParseState next(int c) {
+        public DomainParseState next(int c, int segmentIndex) {
             if (HttpParser.isAlpha(c)) {
-                return ALPHA;
-            } else if (HttpParser.isNumeric(c)) {
-                if (allowsNumeric) {
-                    return NUMERIC;
+                if (ALL_ALPHA == this || NEW == this || PERIOD == this) {
+                    return ALL_ALPHA;
                 } else {
-                    throw new IllegalArgumentException(sm.getString("http.invalidCharacterDomain",
-                            Character.toString((char) c), errorLocation));
+                    return ALPHA;
                 }
+            } else if (HttpParser.isNumeric(c)) {
+                return NUMERIC;
             } else if (c == '.') {
                 if (allowsPeriod) {
                     return PERIOD;
@@ -752,14 +774,16 @@
                             Character.toString((char) c), errorLocation));
                 }
             } else if (c == ':') {
-                if (allowsColon) {
+                if (allowsColon == AllowsEnd.ALWAYS ||
+                        allowsColon == AllowsEnd.FIRST && segmentIndex == 0) {
                     return COLON;
                 } else {
                     throw new IllegalArgumentException(sm.getString("http.invalidCharacterDomain",
                             Character.toString((char) c), errorLocation));
                 }
             } else if (c == -1) {
-                if (allowsEnd) {
+                if (allowsEnd == AllowsEnd.ALWAYS ||
+                        allowsEnd == AllowsEnd.FIRST && segmentIndex == 0) {
                     return END;
                 } else {
                     throw new IllegalArgumentException(sm.getString("http.invalidCharacterDomain",
diff --git a/test/org/apache/tomcat/util/http/parser/TestHttpParserHost.java b/test/org/apache/tomcat/util/http/parser/TestHttpParserHost.java
index a6f98ba..0284834 100644
--- a/test/org/apache/tomcat/util/http/parser/TestHttpParserHost.java
+++ b/test/org/apache/tomcat/util/http/parser/TestHttpParserHost.java
@@ -46,7 +46,7 @@
     public Class<? extends Exception> expectedException;
 
 
-    @Parameters
+    @Parameters(name="{index}: host {1}")
     public static Collection<Object[]> inputs() {
         List<Object[]> result = new ArrayList<>();
         // IPv4 - valid
@@ -54,8 +54,8 @@
         result.add(new Object[] { TestType.IPv4, "127.0.0.1:8080", Integer.valueOf(9), null} );
         result.add(new Object[] { TestType.IPv4, "0.0.0.0", Integer.valueOf(-1), null} );
         result.add(new Object[] { TestType.IPv4, "0.0.0.0:8080", Integer.valueOf(7), null} );
+        result.add(new Object[] { TestType.IPv4, "0", Integer.valueOf(-1), null} );
         // IPv4 - invalid
-        result.add(new Object[] { TestType.IPv4, "0", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv4, "0.0", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv4, "0.0.0", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv4, ".0.0.0", Integer.valueOf(-1), IAE} );
@@ -68,23 +68,48 @@
         result.add(new Object[] { TestType.IPv4, "0..0.0", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv4, "0]", Integer.valueOf(-1), IAE} );
         // Domain Name - valid
-        result.add(new Object[] { TestType.DOMAIN_NAME, "localhost", Integer.valueOf(-1), null} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "localhost:8080", Integer.valueOf(9), null} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "tomcat.apache.org", Integer.valueOf(-1), null} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "tomcat.apache.org:8080", Integer.valueOf(17), null} );
+        result.add(new Object[] { TestType.IPv4, "localhost", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "localhost:8080", Integer.valueOf(9), null} );
+        result.add(new Object[] { TestType.IPv4, "tomcat.apache.org", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "tomcat.apache.org:8080", Integer.valueOf(17), null} );
+        result.add(new Object[] { TestType.IPv4, "0.0.0.com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "0.0.0.com:8080", Integer.valueOf(9), null} );
+        result.add(new Object[] { TestType.IPv4, "0.0.0.0.com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "0.0.0.0.com:8080", Integer.valueOf(11), null} );
+        result.add(new Object[] { TestType.IPv4, "foo.0.0.com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "foo.0.0.com:8080", Integer.valueOf(11), null} );
+        result.add(new Object[] { TestType.IPv4, "1foo.0.0.com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "1foo.0.0.com:8080", Integer.valueOf(12), null} );
+        result.add(new Object[] { TestType.IPv4, "1-foo.0.0.com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "1-foo.0.0.com:8080", Integer.valueOf(13), null} );
+        result.add(new Object[] { TestType.IPv4, "1--foo.0.0.com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "1--foo.0.0.com:8080", Integer.valueOf(14), null} );
+        result.add(new Object[] { TestType.IPv4, "com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "com:8080", Integer.valueOf(3), null} );
+        result.add(new Object[] { TestType.IPv4, "0com", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "0com:8080", Integer.valueOf(4), null} );
+        result.add(new Object[] { TestType.IPv4, "123", Integer.valueOf(-1), null} );
+        result.add(new Object[] { TestType.IPv4, "123:8080", Integer.valueOf(3), null} );
         // Domain Name - invalid
-        result.add(new Object[] { TestType.DOMAIN_NAME, ".foo.bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "2foo.bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "-foo.bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "^foo.bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo-.bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "f*oo.bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo..bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo.2bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo.-bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo.^bar", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo.bar-", Integer.valueOf(-1), IAE} );
-        result.add(new Object[] { TestType.DOMAIN_NAME, "foo.b*ar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, ".", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, ".:8080", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, ".foo.bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "-foo.bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.bar.", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.bar-", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.bar.:8080", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.bar-:8080", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "^foo.bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo-.bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "f*oo.bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo..bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.-bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.^bar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.b*ar", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "0.0.0com", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "0.0.0.0com", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.bar.0com", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv4, "foo.bar.0com:8080", Integer.valueOf(-1), IAE} );
         // IPv6 - valid
         result.add(new Object[] { TestType.IPv6, "[::1]", Integer.valueOf(-1), null} );
         result.add(new Object[] { TestType.IPv6, "[::1]:8080", Integer.valueOf(5), null} );
@@ -138,6 +163,12 @@
         result.add(new Object[] { TestType.IPv6, "[0::0::127.0.0.1]", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv6, "[0:0:G:0:0:0:127.0.0.1]", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv6, "[00000:0:0:0:0:0:127.0.0.1]", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv6, "[1::127..0.1]", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv6, "[1::127..0.1]:8080", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv6, "[1::127.a.0.1]", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv6, "[1::127.a.0.1]:8080", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv6, "[1::127.-.0.1]", Integer.valueOf(-1), IAE} );
+        result.add(new Object[] { TestType.IPv6, "[1::127.-.0.1]:8080", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv6, "[::1]'", Integer.valueOf(-1), IAE} );
         result.add(new Object[] { TestType.IPv6, "[:2222:3333:4444:5555:6666:7777:8888]",
                 Integer.valueOf(-1), IAE} );
@@ -167,9 +198,6 @@
                 case IPv6:
                     result = HttpParser.readHostIPv6(sr);
                     break;
-                case DOMAIN_NAME:
-                    result = HttpParser.readHostDomainName(sr);
-                    break;
 
             }
         } catch (Exception e) {
@@ -187,7 +215,6 @@
 
     private static enum TestType {
         IPv4,
-        IPv6,
-        DOMAIN_NAME
+        IPv6
     }
 }