| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.tomcat.util.http.parser; |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| |
| /** |
| * HTTP header value parser implementation. Parsing HTTP headers as per RFC2616 |
| * is not always as simple as it first appears. For headers that only use tokens |
| * the simple approach will normally be sufficient. However, for the other |
| * headers, while simple code meets 99.9% of cases, there are often some edge |
| * cases that make things far more complicated. |
| * |
| * The purpose of this parser is to let the parser worry about the edge cases. |
| * It provides tolerant (where safe to do so) parsing of HTTP header values |
| * assuming that wrapped header lines have already been unwrapped. (The Tomcat |
| * header processing code does the unwrapping.) |
| * |
| */ |
| public class HttpParser { |
| |
| // Arrays used by isToken(), isHex() |
| private static final boolean isToken[] = new boolean[128]; |
| private static final boolean isHex[] = new boolean[128]; |
| |
| static { |
| // Setup the flag arrays |
| for (int i = 0; i < 128; i++) { |
| if (i < 32) { |
| isToken[i] = false; |
| } else if (i == '(' || i == ')' || i == '<' || i == '>' || i == '@' || |
| i == ',' || i == ';' || i == ':' || i == '\\' || i == '\"' || |
| i == '/' || i == '[' || i == ']' || i == '?' || i == '=' || |
| i == '{' || i == '}' || i == ' ' || i == '\t') { |
| isToken[i] = false; |
| } else { |
| isToken[i] = true; |
| } |
| |
| if (i >= '0' && i <= '9' || i >= 'A' && i <= 'F' || |
| i >= 'a' && i <= 'f') { |
| isHex[i] = true; |
| } else { |
| isHex[i] = false; |
| } |
| } |
| } |
| |
| public static String unquote(String input) { |
| if (input == null || input.length() < 2 || input.charAt(0) != '"') { |
| return input; |
| } |
| |
| StringBuilder result = new StringBuilder(); |
| for (int i = 1 ; i < (input.length() - 1); i++) { |
| char c = input.charAt(i); |
| if (input.charAt(i) == '\\') { |
| i++; |
| result.append(input.charAt(i)); |
| } else { |
| result.append(c); |
| } |
| } |
| return result.toString(); |
| } |
| |
| static boolean isToken(int c) { |
| // Fast for correct values, slower for incorrect ones |
| try { |
| return isToken[c]; |
| } catch (ArrayIndexOutOfBoundsException ex) { |
| return false; |
| } |
| } |
| |
| static boolean isHex(int c) { |
| // Fast for correct values, slower for incorrect ones |
| try { |
| return isHex[c]; |
| } catch (ArrayIndexOutOfBoundsException ex) { |
| return false; |
| } |
| } |
| |
| // Skip any LWS and return the next char |
| static int skipLws(StringReader input, boolean withReset) throws IOException { |
| |
| if (withReset) { |
| input.mark(1); |
| } |
| int c = input.read(); |
| |
| while (c == 32 || c == 9 || c == 10 || c == 13) { |
| if (withReset) { |
| input.mark(1); |
| } |
| c = input.read(); |
| } |
| |
| if (withReset) { |
| input.reset(); |
| } |
| return c; |
| } |
| |
| static SkipResult skipConstant(StringReader input, String constant) throws IOException { |
| int len = constant.length(); |
| |
| int c = skipLws(input, false); |
| |
| for (int i = 0; i < len; i++) { |
| if (i == 0 && c == -1) { |
| return SkipResult.EOF; |
| } |
| if (c != constant.charAt(i)) { |
| input.skip(-(i + 1)); |
| return SkipResult.NOT_FOUND; |
| } |
| if (i != (len - 1)) { |
| c = input.read(); |
| } |
| } |
| return SkipResult.FOUND; |
| } |
| |
| /** |
| * @return the token if one was found, the empty string if no data was |
| * available to read or <code>null</code> if data other than a |
| * token was found |
| */ |
| static String readToken(StringReader input) throws IOException { |
| StringBuilder result = new StringBuilder(); |
| |
| int c = skipLws(input, false); |
| |
| while (c != -1 && isToken(c)) { |
| result.append((char) c); |
| c = input.read(); |
| } |
| // Skip back so non-token character is available for next read |
| input.skip(-1); |
| |
| if (c != -1 && result.length() == 0) { |
| return null; |
| } else { |
| return result.toString(); |
| } |
| } |
| |
| /** |
| * @return the quoted string if one was found, null if data other than a |
| * quoted string was found or null if the end of data was reached |
| * before the quoted string was terminated |
| */ |
| static String readQuotedString(StringReader input, boolean returnQuoted) throws IOException { |
| |
| int c = skipLws(input, false); |
| |
| if (c != '"') { |
| return null; |
| } |
| |
| StringBuilder result = new StringBuilder(); |
| if (returnQuoted) { |
| result.append('\"'); |
| } |
| c = input.read(); |
| |
| while (c != '"') { |
| if (c == -1) { |
| return null; |
| } else if (c == '\\') { |
| c = input.read(); |
| if (returnQuoted) { |
| result.append('\\'); |
| } |
| result.append(c); |
| } else { |
| result.append((char) c); |
| } |
| c = input.read(); |
| } |
| if (returnQuoted) { |
| result.append('\"'); |
| } |
| |
| return result.toString(); |
| } |
| |
| static String readTokenOrQuotedString(StringReader input, boolean returnQuoted) |
| throws IOException { |
| |
| // Go back so first non-LWS character is available to be read again |
| int c = skipLws(input, true); |
| |
| if (c == '"') { |
| return readQuotedString(input, returnQuoted); |
| } else { |
| return readToken(input); |
| } |
| } |
| |
| /** |
| * Token can be read unambiguously with or without surrounding quotes so |
| * this parsing method for token permits optional surrounding double quotes. |
| * This is not defined in any RFC. It is a special case to handle data from |
| * buggy clients (known buggy clients for DIGEST auth include Microsoft IE 8 |
| * & 9, Apple Safari for OSX and iOS) that add quotes to values that |
| * should be tokens. |
| * |
| * @return the token if one was found, null if data other than a token or |
| * quoted token was found or null if the end of data was reached |
| * before a quoted token was terminated |
| */ |
| static String readQuotedToken(StringReader input) throws IOException { |
| |
| StringBuilder result = new StringBuilder(); |
| boolean quoted = false; |
| |
| int c = skipLws(input, false); |
| |
| if (c == '"') { |
| quoted = true; |
| } else if (c == -1 || !isToken(c)) { |
| return null; |
| } else { |
| result.append((char) c); |
| } |
| c = input.read(); |
| |
| while (c != -1 && isToken(c)) { |
| result.append((char) c); |
| c = input.read(); |
| } |
| |
| if (quoted) { |
| if (c != '"') { |
| return null; |
| } |
| } else { |
| // Skip back so non-token character is available for next read |
| input.skip(-1); |
| } |
| |
| if (c != -1 && result.length() == 0) { |
| return null; |
| } else { |
| return result.toString(); |
| } |
| } |
| |
| /** |
| * LHEX can be read unambiguously with or without surrounding quotes so this |
| * parsing method for LHEX permits optional surrounding double quotes. Some |
| * buggy clients (libwww-perl for DIGEST auth) are known to send quoted LHEX |
| * when the specification requires just LHEX. |
| * |
| * <p> |
| * LHEX are, literally, lower-case hexadecimal digits. This implementation |
| * allows for upper-case digits as well, converting the returned value to |
| * lower-case. |
| * |
| * @return the sequence of LHEX (minus any surrounding quotes) if any was |
| * found, or <code>null</code> if data other LHEX was found |
| */ |
| static String readLhex(StringReader input) throws IOException { |
| |
| StringBuilder result = new StringBuilder(); |
| boolean quoted = false; |
| |
| int c = skipLws(input, false); |
| |
| if (c == '"') { |
| quoted = true; |
| } else if (c == -1 || !isHex(c)) { |
| return null; |
| } else { |
| if ('A' <= c && c <= 'F') { |
| c -= ('A' - 'a'); |
| } |
| result.append((char) c); |
| } |
| c = input.read(); |
| |
| while (c != -1 && isHex(c)) { |
| if ('A' <= c && c <= 'F') { |
| c -= ('A' - 'a'); |
| } |
| result.append((char) c); |
| c = input.read(); |
| } |
| |
| if (quoted) { |
| if (c != '"') { |
| return null; |
| } |
| } else { |
| // Skip back so non-hex character is available for next read |
| input.skip(-1); |
| } |
| |
| if (c != -1 && result.length() == 0) { |
| return null; |
| } else { |
| return result.toString(); |
| } |
| } |
| |
| static double readWeight(StringReader input, char delimiter) throws IOException { |
| int c = skipLws(input, false); |
| if (c == -1 || c == delimiter) { |
| // No q value just whitespace |
| return 1; |
| } else if (c != 'q') { |
| // Malformed. Use quality of zero so it is dropped. |
| skipUntil(input, c, delimiter); |
| return 0; |
| } |
| // RFC 7231 does not allow whitespace here but be tolerant |
| c = skipLws(input, false); |
| if (c != '=') { |
| // Malformed. Use quality of zero so it is dropped. |
| skipUntil(input, c, delimiter); |
| return 0; |
| } |
| |
| // RFC 7231 does not allow whitespace here but be tolerant |
| c = skipLws(input, false); |
| |
| // Should be no more than 3 decimal places |
| StringBuilder value = new StringBuilder(5); |
| int decimalPlacesRead = 0; |
| if (c == '0' || c == '1') { |
| value.append((char) c); |
| c = input.read(); |
| if (c == '.') { |
| value.append('.'); |
| } else if (c < '0' || c > '9') { |
| decimalPlacesRead = 3; |
| } |
| while (true) { |
| c = input.read(); |
| if (c >= '0' && c <= '9') { |
| if (decimalPlacesRead < 3) { |
| value.append((char) c); |
| decimalPlacesRead++; |
| } |
| } else if (c == delimiter || c == 9 || c == 32 || c == -1) { |
| break; |
| } else { |
| // Malformed. Use quality of zero so it is dropped and skip until |
| // EOF or the next delimiter |
| skipUntil(input, c, delimiter); |
| return 0; |
| } |
| } |
| } else { |
| // Malformed. Use quality of zero so it is dropped and skip until |
| // EOF or the next delimiter |
| skipUntil(input, c, delimiter); |
| return 0; |
| } |
| |
| double result = Double.parseDouble(value.toString()); |
| if (result > 1) { |
| return 0; |
| } |
| return result; |
| } |
| |
| |
| /** |
| * Skips all characters until EOF or the specified target is found. Normally |
| * used to skip invalid input until the next separator. |
| */ |
| static SkipResult skipUntil(StringReader input, int c, char target) throws IOException { |
| while (c != -1 && c != target) { |
| c = input.read(); |
| } |
| if (c == -1) { |
| return SkipResult.EOF; |
| } else { |
| return SkipResult.FOUND; |
| } |
| } |
| } |