java/org/apache/tomcat/util/http/parser/HttpParser.java - tomcat80 - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.tomcat.util.http.parser;

 import java.io.IOException;
 import java.io.StringReader;

 /**
  * HTTP header value parser implementation. Parsing HTTP headers as per RFC2616
  * is not always as simple as it first appears. For headers that only use tokens
  * the simple approach will normally be sufficient. However, for the other
  * headers, while simple code meets 99.9% of cases, there are often some edge
  * cases that make things far more complicated.
  *
  * The purpose of this parser is to let the parser worry about the edge cases.
  * It provides tolerant (where safe to do so) parsing of HTTP header values
  * assuming that wrapped header lines have already been unwrapped. (The Tomcat
  * header processing code does the unwrapping.)
  *
  */
 public class HttpParser {

     // Arrays used by isToken(), isHex()
     private static final boolean isToken[] = new boolean[128];
     private static final boolean isHex[] = new boolean[128];

     static {
         // Setup the flag arrays
         for (int i = 0; i < 128; i++) {
             if (i < 32) {
                 isToken[i] = false;
             } else if (i == '(' || i == ')' || i == '<' || i == '>'  || i == '@'  ||
                        i == ',' || i == ';' || i == ':' || i == '\\' || i == '\"' ||
                        i == '/' || i == '[' || i == ']' || i == '?'  || i == '='  ||
                        i == '{' || i == '}' || i == ' ' || i == '\t') {
                 isToken[i] = false;
             } else {
                 isToken[i] = true;
             }

             if (i >= '0' && i <= '9' || i >= 'A' && i <= 'F' ||
                     i >= 'a' && i <= 'f') {
                 isHex[i] = true;
             } else {
                 isHex[i] = false;
             }
         }
     }

     public static String unquote(String input) {
         if (input == null || input.length() < 2 || input.charAt(0) != '"') {
             return input;
         }

         StringBuilder result = new StringBuilder();
         for (int i = 1 ; i < (input.length() - 1); i++) {
             char c = input.charAt(i);
             if (input.charAt(i) == '\\') {
                 i++;
                 result.append(input.charAt(i));
             } else {
                 result.append(c);
             }
         }
         return result.toString();
     }

     static boolean isToken(int c) {
         // Fast for correct values, slower for incorrect ones
         try {
             return isToken[c];
         } catch (ArrayIndexOutOfBoundsException ex) {
             return false;
         }
     }

     static boolean isHex(int c) {
         // Fast for correct values, slower for incorrect ones
         try {
             return isHex[c];
         } catch (ArrayIndexOutOfBoundsException ex) {
             return false;
         }
     }

     // Skip any LWS and return the next char
     static int skipLws(StringReader input, boolean withReset) throws IOException {

         if (withReset) {
             input.mark(1);
         }
         int c = input.read();

         while (c == 32 || c == 9 || c == 10 || c == 13) {
             if (withReset) {
                 input.mark(1);
             }
             c = input.read();
         }

         if (withReset) {
             input.reset();
         }
         return c;
     }

     static SkipResult skipConstant(StringReader input, String constant) throws IOException {
         int len = constant.length();

         int c = skipLws(input, false);

         for (int i = 0; i < len; i++) {
             if (i == 0 && c == -1) {
                 return SkipResult.EOF;
             }
             if (c != constant.charAt(i)) {
                 input.skip(-(i + 1));
                 return SkipResult.NOT_FOUND;
             }
             if (i != (len - 1)) {
                 c = input.read();
             }
         }
         return SkipResult.FOUND;
     }

     /**
      * @return  the token if one was found, the empty string if no data was
      *          available to read or <code>null</code> if data other than a
      *          token was found
      */
     static String readToken(StringReader input) throws IOException {
         StringBuilder result = new StringBuilder();

         int c = skipLws(input, false);

         while (c != -1 && isToken(c)) {
             result.append((char) c);
             c = input.read();
         }
         // Skip back so non-token character is available for next read
         input.skip(-1);

         if (c != -1 && result.length() == 0) {
             return null;
         } else {
             return result.toString();
         }
     }

     /**
      * @return the quoted string if one was found, null if data other than a
      *         quoted string was found or null if the end of data was reached
      *         before the quoted string was terminated
      */
     static String readQuotedString(StringReader input, boolean returnQuoted) throws IOException {

         int c = skipLws(input, false);

         if (c != '"') {
             return null;
         }

         StringBuilder result = new StringBuilder();
         if (returnQuoted) {
             result.append('\"');
         }
         c = input.read();

         while (c != '"') {
             if (c == -1) {
                 return null;
             } else if (c == '\\') {
                 c = input.read();
                 if (returnQuoted) {
                     result.append('\\');
                 }
                 result.append(c);
             } else {
                 result.append((char) c);
             }
             c = input.read();
         }
         if (returnQuoted) {
             result.append('\"');
         }

         return result.toString();
     }

     static String readTokenOrQuotedString(StringReader input, boolean returnQuoted)
             throws IOException {

         // Go back so first non-LWS character is available to be read again
         int c = skipLws(input, true);

         if (c == '"') {
             return readQuotedString(input, returnQuoted);
         } else {
             return readToken(input);
         }
     }

     /**
      * Token can be read unambiguously with or without surrounding quotes so
      * this parsing method for token permits optional surrounding double quotes.
      * This is not defined in any RFC. It is a special case to handle data from
      * buggy clients (known buggy clients for DIGEST auth include Microsoft IE 8
      * &amp; 9, Apple Safari for OSX and iOS) that add quotes to values that
      * should be tokens.
      *
      * @return the token if one was found, null if data other than a token or
      *         quoted token was found or null if the end of data was reached
      *         before a quoted token was terminated
      */
     static String readQuotedToken(StringReader input) throws IOException {

         StringBuilder result = new StringBuilder();
         boolean quoted = false;

         int c = skipLws(input, false);

         if (c == '"') {
             quoted = true;
         } else if (c == -1 || !isToken(c)) {
             return null;
         } else {
             result.append((char) c);
         }
         c = input.read();

         while (c != -1 && isToken(c)) {
             result.append((char) c);
             c = input.read();
         }

         if (quoted) {
             if (c != '"') {
                 return null;
             }
         } else {
             // Skip back so non-token character is available for next read
             input.skip(-1);
         }

         if (c != -1 && result.length() == 0) {
             return null;
         } else {
             return result.toString();
         }
     }

     /**
      * LHEX can be read unambiguously with or without surrounding quotes so this
      * parsing method for LHEX permits optional surrounding double quotes. Some
      * buggy clients (libwww-perl for DIGEST auth) are known to send quoted LHEX
      * when the specification requires just LHEX.
      *
      * <p>
      * LHEX are, literally, lower-case hexadecimal digits. This implementation
      * allows for upper-case digits as well, converting the returned value to
      * lower-case.
      *
      * @return  the sequence of LHEX (minus any surrounding quotes) if any was
      *          found, or <code>null</code> if data other LHEX was found
      */
     static String readLhex(StringReader input) throws IOException {

         StringBuilder result = new StringBuilder();
         boolean quoted = false;

         int c = skipLws(input, false);

         if (c == '"') {
             quoted = true;
         } else if (c == -1 || !isHex(c)) {
             return null;
         } else {
             if ('A' <= c && c <= 'F') {
                 c -= ('A' - 'a');
             }
             result.append((char) c);
         }
         c = input.read();

         while (c != -1 && isHex(c)) {
             if ('A' <= c && c <= 'F') {
                 c -= ('A' - 'a');
             }
             result.append((char) c);
             c = input.read();
         }

         if (quoted) {
             if (c != '"') {
                 return null;
             }
         } else {
             // Skip back so non-hex character is available for next read
             input.skip(-1);
         }

         if (c != -1 && result.length() == 0) {
             return null;
         } else {
             return result.toString();
         }
     }

     static double readWeight(StringReader input, char delimiter) throws IOException {
         int c = skipLws(input, false);
         if (c == -1 || c == delimiter) {
             // No q value just whitespace
             return 1;
         } else if (c != 'q') {
             // Malformed. Use quality of zero so it is dropped.
             skipUntil(input, c, delimiter);
             return 0;
         }
         // RFC 7231 does not allow whitespace here but be tolerant
         c = skipLws(input, false);
         if (c != '=') {
             // Malformed. Use quality of zero so it is dropped.
             skipUntil(input, c, delimiter);
             return 0;
         }

         // RFC 7231 does not allow whitespace here but be tolerant
         c = skipLws(input, false);

         // Should be no more than 3 decimal places
         StringBuilder value = new StringBuilder(5);
         int decimalPlacesRead = 0;
         if (c == '0' || c == '1') {
             value.append((char) c);
             c = input.read();
             if (c == '.') {
                 value.append('.');
             } else if (c < '0' || c > '9') {
                 decimalPlacesRead = 3;
             }
             while (true) {
                 c = input.read();
                 if (c >= '0' && c <= '9') {
                     if (decimalPlacesRead < 3) {
                         value.append((char) c);
                         decimalPlacesRead++;
                     }
                 } else if (c == delimiter || c == 9 || c == 32 || c == -1) {
                     break;
                 } else {
                     // Malformed. Use quality of zero so it is dropped and skip until
                     // EOF or the next delimiter
                     skipUntil(input, c, delimiter);
                     return 0;
                 }
             }
         } else {
             // Malformed. Use quality of zero so it is dropped and skip until
             // EOF or the next delimiter
             skipUntil(input, c, delimiter);
             return 0;
         }

         double result = Double.parseDouble(value.toString());
         if (result > 1) {
             return 0;
         }
         return result;
     }


     /**
      * Skips all characters until EOF or the specified target is found. Normally
      * used to skip invalid input until the next separator.
      */
     static SkipResult skipUntil(StringReader input, int c, char target) throws IOException {
         while (c != -1 && c != target) {
             c = input.read();
         }
         if (c == -1) {
             return SkipResult.EOF;
         } else {
             return SkipResult.FOUND;
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.tomcat.util.http.parser;

	import java.io.IOException;
	import java.io.StringReader;

	/**
	* HTTP header value parser implementation. Parsing HTTP headers as per RFC2616
	* is not always as simple as it first appears. For headers that only use tokens
	* the simple approach will normally be sufficient. However, for the other
	* headers, while simple code meets 99.9% of cases, there are often some edge
	* cases that make things far more complicated.
	*
	* The purpose of this parser is to let the parser worry about the edge cases.
	* It provides tolerant (where safe to do so) parsing of HTTP header values
	* assuming that wrapped header lines have already been unwrapped. (The Tomcat
	* header processing code does the unwrapping.)
	*
	*/
	public class HttpParser {

	// Arrays used by isToken(), isHex()
	private static final boolean isToken[] = new boolean[128];
	private static final boolean isHex[] = new boolean[128];

	static {
	// Setup the flag arrays
	for (int i = 0; i < 128; i++) {
	if (i < 32) {
	isToken[i] = false;
	} else if (i == '(' \|\| i == ')' \|\| i == '<' \|\| i == '>' \|\| i == '@' \|\|
	i == ',' \|\| i == ';' \|\| i == ':' \|\| i == '\\' \|\| i == '\"' \|\|
	i == '/' \|\| i == '[' \|\| i == ']' \|\| i == '?' \|\| i == '=' \|\|
	i == '{' \|\| i == '}' \|\| i == ' ' \|\| i == '\t') {
	isToken[i] = false;
	} else {
	isToken[i] = true;
	}

	if (i >= '0' && i <= '9' \|\| i >= 'A' && i <= 'F' \|\|
	i >= 'a' && i <= 'f') {
	isHex[i] = true;
	} else {
	isHex[i] = false;
	}
	}
	}

	public static String unquote(String input) {
	if (input == null \|\| input.length() < 2 \|\| input.charAt(0) != '"') {
	return input;
	}

	StringBuilder result = new StringBuilder();
	for (int i = 1 ; i < (input.length() - 1); i++) {
	char c = input.charAt(i);
	if (input.charAt(i) == '\\') {
	i++;
	result.append(input.charAt(i));
	} else {
	result.append(c);
	}
	}
	return result.toString();
	}

	static boolean isToken(int c) {
	// Fast for correct values, slower for incorrect ones
	try {
	return isToken[c];
	} catch (ArrayIndexOutOfBoundsException ex) {
	return false;
	}
	}

	static boolean isHex(int c) {
	// Fast for correct values, slower for incorrect ones
	try {
	return isHex[c];
	} catch (ArrayIndexOutOfBoundsException ex) {
	return false;
	}
	}

	// Skip any LWS and return the next char
	static int skipLws(StringReader input, boolean withReset) throws IOException {

	if (withReset) {
	input.mark(1);
	}
	int c = input.read();

	while (c == 32 \|\| c == 9 \|\| c == 10 \|\| c == 13) {
	if (withReset) {
	input.mark(1);
	}
	c = input.read();
	}

	if (withReset) {
	input.reset();
	}
	return c;
	}

	static SkipResult skipConstant(StringReader input, String constant) throws IOException {
	int len = constant.length();

	int c = skipLws(input, false);

	for (int i = 0; i < len; i++) {
	if (i == 0 && c == -1) {
	return SkipResult.EOF;
	}
	if (c != constant.charAt(i)) {
	input.skip(-(i + 1));
	return SkipResult.NOT_FOUND;
	}
	if (i != (len - 1)) {
	c = input.read();
	}
	}
	return SkipResult.FOUND;
	}

	/**
	* @return the token if one was found, the empty string if no data was
	* available to read or <code>null</code> if data other than a
	* token was found
	*/
	static String readToken(StringReader input) throws IOException {
	StringBuilder result = new StringBuilder();

	int c = skipLws(input, false);

	while (c != -1 && isToken(c)) {
	result.append((char) c);
	c = input.read();
	}
	// Skip back so non-token character is available for next read
	input.skip(-1);

	if (c != -1 && result.length() == 0) {
	return null;
	} else {
	return result.toString();
	}
	}

	/**
	* @return the quoted string if one was found, null if data other than a
	* quoted string was found or null if the end of data was reached
	* before the quoted string was terminated
	*/
	static String readQuotedString(StringReader input, boolean returnQuoted) throws IOException {

	int c = skipLws(input, false);

	if (c != '"') {
	return null;
	}

	StringBuilder result = new StringBuilder();
	if (returnQuoted) {
	result.append('\"');
	}
	c = input.read();

	while (c != '"') {
	if (c == -1) {
	return null;
	} else if (c == '\\') {
	c = input.read();
	if (returnQuoted) {
	result.append('\\');
	}
	result.append(c);
	} else {
	result.append((char) c);
	}
	c = input.read();
	}
	if (returnQuoted) {
	result.append('\"');
	}

	return result.toString();
	}

	static String readTokenOrQuotedString(StringReader input, boolean returnQuoted)
	throws IOException {

	// Go back so first non-LWS character is available to be read again
	int c = skipLws(input, true);

	if (c == '"') {
	return readQuotedString(input, returnQuoted);
	} else {
	return readToken(input);
	}
	}

	/**
	* Token can be read unambiguously with or without surrounding quotes so
	* this parsing method for token permits optional surrounding double quotes.
	* This is not defined in any RFC. It is a special case to handle data from
	* buggy clients (known buggy clients for DIGEST auth include Microsoft IE 8
	* & 9, Apple Safari for OSX and iOS) that add quotes to values that
	* should be tokens.
	*
	* @return the token if one was found, null if data other than a token or
	* quoted token was found or null if the end of data was reached
	* before a quoted token was terminated
	*/
	static String readQuotedToken(StringReader input) throws IOException {

	StringBuilder result = new StringBuilder();
	boolean quoted = false;

	int c = skipLws(input, false);

	if (c == '"') {
	quoted = true;
	} else if (c == -1 \|\| !isToken(c)) {
	return null;
	} else {
	result.append((char) c);
	}
	c = input.read();

	while (c != -1 && isToken(c)) {
	result.append((char) c);
	c = input.read();
	}

	if (quoted) {
	if (c != '"') {
	return null;
	}
	} else {
	// Skip back so non-token character is available for next read
	input.skip(-1);
	}

	if (c != -1 && result.length() == 0) {
	return null;
	} else {
	return result.toString();
	}
	}

	/**
	* LHEX can be read unambiguously with or without surrounding quotes so this
	* parsing method for LHEX permits optional surrounding double quotes. Some
	* buggy clients (libwww-perl for DIGEST auth) are known to send quoted LHEX
	* when the specification requires just LHEX.
	*
	* <p>
	* LHEX are, literally, lower-case hexadecimal digits. This implementation
	* allows for upper-case digits as well, converting the returned value to
	* lower-case.
	*
	* @return the sequence of LHEX (minus any surrounding quotes) if any was
	* found, or <code>null</code> if data other LHEX was found
	*/
	static String readLhex(StringReader input) throws IOException {

	StringBuilder result = new StringBuilder();
	boolean quoted = false;

	int c = skipLws(input, false);

	if (c == '"') {
	quoted = true;
	} else if (c == -1 \|\| !isHex(c)) {
	return null;
	} else {
	if ('A' <= c && c <= 'F') {
	c -= ('A' - 'a');
	}
	result.append((char) c);
	}
	c = input.read();

	while (c != -1 && isHex(c)) {
	if ('A' <= c && c <= 'F') {
	c -= ('A' - 'a');
	}
	result.append((char) c);
	c = input.read();
	}

	if (quoted) {
	if (c != '"') {
	return null;
	}
	} else {
	// Skip back so non-hex character is available for next read
	input.skip(-1);
	}

	if (c != -1 && result.length() == 0) {
	return null;
	} else {
	return result.toString();
	}
	}

	static double readWeight(StringReader input, char delimiter) throws IOException {
	int c = skipLws(input, false);
	if (c == -1 \|\| c == delimiter) {
	// No q value just whitespace
	return 1;
	} else if (c != 'q') {
	// Malformed. Use quality of zero so it is dropped.
	skipUntil(input, c, delimiter);
	return 0;
	}
	// RFC 7231 does not allow whitespace here but be tolerant
	c = skipLws(input, false);
	if (c != '=') {
	// Malformed. Use quality of zero so it is dropped.
	skipUntil(input, c, delimiter);
	return 0;
	}

	// RFC 7231 does not allow whitespace here but be tolerant
	c = skipLws(input, false);

	// Should be no more than 3 decimal places
	StringBuilder value = new StringBuilder(5);
	int decimalPlacesRead = 0;
	if (c == '0' \|\| c == '1') {
	value.append((char) c);
	c = input.read();
	if (c == '.') {
	value.append('.');
	} else if (c < '0' \|\| c > '9') {
	decimalPlacesRead = 3;
	}
	while (true) {
	c = input.read();
	if (c >= '0' && c <= '9') {
	if (decimalPlacesRead < 3) {
	value.append((char) c);
	decimalPlacesRead++;
	}
	} else if (c == delimiter \|\| c == 9 \|\| c == 32 \|\| c == -1) {
	break;
	} else {
	// Malformed. Use quality of zero so it is dropped and skip until
	// EOF or the next delimiter
	skipUntil(input, c, delimiter);
	return 0;
	}
	}
	} else {
	// Malformed. Use quality of zero so it is dropped and skip until
	// EOF or the next delimiter
	skipUntil(input, c, delimiter);
	return 0;
	}

	double result = Double.parseDouble(value.toString());
	if (result > 1) {
	return 0;
	}
	return result;
	}


	/**
	* Skips all characters until EOF or the specified target is found. Normally
	* used to skip invalid input until the next separator.
	*/
	static SkipResult skipUntil(StringReader input, int c, char target) throws IOException {
	while (c != -1 && c != target) {
	c = input.read();
	}
	if (c == -1) {
	return SkipResult.EOF;
	} else {
	return SkipResult.FOUND;
	}
	}
	}