blob: 115aeee5d85e7bca88b289dc30b653bd3b4fd114 [file] [log] [blame]
/** @file
Catch-based unit tests for URL
@section license License
Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements.
See the NOTICE file distributed with this work for additional information regarding copyright
ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance with the License. You may obtain a
copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the License
is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
or implied. See the License for the specific language governing permissions and limitations under
the License.
*/
#include <cstdio>
#include "catch.hpp"
#include "URL.h"
TEST_CASE("ValidateURL", "[proxy][validurl]")
{
static const struct {
const char *const text;
bool valid;
} http_validate_hdr_field_test_case[] = {{"yahoo", true},
{"yahoo.com", true},
{"yahoo.wow.com", true},
{"yahoo.wow.much.amaze.com", true},
{"209.131.52.50", true},
{"192.168.0.1", true},
{"localhost", true},
{"3ffe:1900:4545:3:200:f8ff:fe21:67cf", true},
{"fe80:0:0:0:200:f8ff:fe21:67cf", true},
{"fe80::200:f8ff:fe21:67cf", true},
{"<svg onload=alert(1)>", false}, // Sample host header XSS attack
{"jlads;f8-9349*(D&F*D(234jD*(FSD*(VKLJ#(*$@()#$)))))", false},
{"\"\t\n", false},
{"!@#$%^ &*(*&^%$#@#$%^&*(*&^%$#))", false},
{":):(:O!!!!!!", false}};
for (auto i : http_validate_hdr_field_test_case) {
const char *const txt = i.text;
if (validate_host_name({txt}) != i.valid) {
std::printf("Validation of FQDN (host) header: \"%s\", expected %s, but not\n", txt, (i.valid ? "true" : "false"));
CHECK(false);
}
}
}
TEST_CASE("Validate Scheme", "[proxy][validscheme]")
{
static const struct {
std::string_view text;
bool valid;
} scheme_test_cases[] = {{"http", true}, {"https", true}, {"example", true}, {"example.", true},
{"example++", true}, {"example--.", true}, {"++example", false}, {"--example", false},
{".example", false}, {"example://", false}};
for (auto i : scheme_test_cases) {
// it's pretty hard to debug with
// CHECK(validate_scheme(i.text) == i.valid);
std::string_view text = i.text;
if (validate_scheme(text) != i.valid) {
std::printf("Validation of scheme: \"%s\", expected %s, but not\n", text.data(), (i.valid ? "true" : "false"));
CHECK(false);
}
}
}
namespace UrlImpl
{
bool url_is_strictly_compliant(const char *start, const char *end);
bool url_is_mostly_compliant(const char *start, const char *end);
} // namespace UrlImpl
using namespace UrlImpl;
TEST_CASE("ParseRulesStrictURI", "[proxy][parseuri]")
{
const struct {
const char *const uri;
bool valid;
} http_strict_uri_parsing_test_case[] = {{"//index.html", true},
{"/home", true},
{"/path/data?key=value#id", true},
{"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
{"/abcdefghijklmnopqrstuvwxyz", true},
{"/abcde fghijklmnopqrstuvwxyz", false},
{"/abcde\tfghijklmnopqrstuvwxyz", false},
{"/abcdefghijklmnopqrstuvwxyz", false},
{"/0123456789", true},
{":/?#[]@", true},
{"!$&'()*+,;=", true},
{"-._~", true},
{"%", true},
{"\n", false},
{"\"", false},
{"<", false},
{">", false},
{"\\", false},
{"^", false},
{"`", false},
{"{", false},
{"|", false},
{"}", false},
{"é", false}};
for (auto i : http_strict_uri_parsing_test_case) {
const char *const uri = i.uri;
if (url_is_strictly_compliant(uri, uri + strlen(uri)) != i.valid) {
std::printf("Strictly parse URI: \"%s\", expected %s, but not\n", uri, (i.valid ? "true" : "false"));
CHECK(false);
}
}
}
TEST_CASE("ParseRulesMostlyStrictURI", "[proxy][parseuri]")
{
const struct {
const char *const uri;
bool valid;
} http_mostly_strict_uri_parsing_test_case[] = {{"//index.html", true},
{"/home", true},
{"/path/data?key=value#id", true},
{"/ABCDEFGHIJKLMNOPQRSTUVWXYZ", true},
{"/abcdefghijklmnopqrstuvwxyz", true},
{"/abcde fghijklmnopqrstuvwxyz", false},
{"/abcde\tfghijklmnopqrstuvwxyz", false},
{"/abcdefghijklmnopqrstuvwxyz", false},
{"/0123456789", true},
{":/?#[]@", true},
{"!$&'()*+,;=", true},
{"-._~", true},
{"%", true},
{"\n", false},
{"\"", true},
{"<", true},
{">", true},
{"\\", true},
{"^", true},
{"`", true},
{"{", true},
{"|", true},
{"}", true},
{"é", false}}; // Non-printable ascii
for (auto i : http_mostly_strict_uri_parsing_test_case) {
const char *const uri = i.uri;
if (url_is_mostly_compliant(uri, uri + strlen(uri)) != i.valid) {
std::printf("Mostly strictly parse URI: \"%s\", expected %s, but not\n", uri, (i.valid ? "true" : "false"));
CHECK(false);
}
}
}
struct url_parse_test_case {
const std::string input_uri;
const std::string expected_printed_url;
const bool verify_host_characters;
const std::string expected_printed_url_regex;
const bool is_valid;
const bool is_valid_regex;
};
constexpr bool IS_VALID = true;
constexpr bool VERIFY_HOST_CHARACTERS = true;
// clang-format off
std::vector<url_parse_test_case> url_parse_test_cases = {
{
"/index.html",
"/index.html",
VERIFY_HOST_CHARACTERS,
"/index.html",
IS_VALID,
IS_VALID
},
{
"//index.html",
"//index.html",
VERIFY_HOST_CHARACTERS,
"//index.html",
IS_VALID,
IS_VALID
},
{
// The following scheme-only URI is technically valid per the spec, but we
// have historically returned this as invalid and I'm not comfortable
// changing it in case something depends upon this behavior. Besides, a
// scheme-only URI is probably not helpful to us nor something likely
// Traffic Server will see.
"http://",
"",
VERIFY_HOST_CHARACTERS,
"",
!IS_VALID,
!IS_VALID
},
{
"https:///",
"https:///",
VERIFY_HOST_CHARACTERS,
"https:///",
IS_VALID,
IS_VALID
},
{
// RFC 3986 section-3: When authority is not present, the path cannot begin
// with two slash characters ("//"). We have historically allowed this,
// however, and will continue to do so.
"https:////",
"https:////",
VERIFY_HOST_CHARACTERS,
"https:////",
IS_VALID,
IS_VALID
},
{
// By convention, our url_print() function adds a path of '/' at the end of
// URLs that have no path, query, or fragment after the authority.
"mailto:Test.User@example.com",
"mailto:Test.User@example.com/",
VERIFY_HOST_CHARACTERS,
"mailto:Test.User@example.com/",
IS_VALID,
IS_VALID
},
{
"mailto:Test.User@example.com:25",
"mailto:Test.User@example.com:25/",
VERIFY_HOST_CHARACTERS,
"mailto:Test.User@example.com:25/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com",
"https://www.example.com/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/",
"https://www.example.com/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com//",
"https://www.example.com//",
VERIFY_HOST_CHARACTERS,
"https://www.example.com//",
IS_VALID,
IS_VALID
},
{
"https://127.0.0.1",
"https://127.0.0.1/",
VERIFY_HOST_CHARACTERS,
"https://127.0.0.1/",
IS_VALID,
IS_VALID
},
{
"https://[::1]",
"https://[::1]/",
VERIFY_HOST_CHARACTERS,
"https://[::1]/",
IS_VALID,
IS_VALID
},
{
"https://127.0.0.1/",
"https://127.0.0.1/",
VERIFY_HOST_CHARACTERS,
"https://127.0.0.1/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com:8888",
"https://www.example.com:8888/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com:8888/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com:8888/",
"https://www.example.com:8888/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com:8888/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path",
"https://www.example.com/a/path",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path",
IS_VALID,
IS_VALID
},
{
"https://www.example.com//a/path",
"https://www.example.com//a/path",
VERIFY_HOST_CHARACTERS,
"https://www.example.com//a/path",
IS_VALID,
IS_VALID
},
// Technically a trailing '?' with an empty query string is valid, but we
// drop the '?'. The parse_regex, however, makes no distinction between
// query, fragment, and path components so it does not cut it out.
{
"https://www.example.com/a/path?",
"https://www.example.com/a/path",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path?",
IS_VALID,
IS_VALID},
{
"https://www.example.com/a/path?name=value",
"https://www.example.com/a/path?name=value",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path?name=value",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path?name=/a/path/value",
"https://www.example.com/a/path?name=/a/path/value",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path?name=/a/path/value",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path?name=/a/path/value;some=other_value",
"https://www.example.com/a/path?name=/a/path/value;some=other_value",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path?name=/a/path/value;some=other_value",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path?name=/a/path/value;some=other_value/",
"https://www.example.com/a/path?name=/a/path/value;some=other_value/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path?name=/a/path/value;some=other_value/",
IS_VALID,
IS_VALID
},
// Again, URL::parse drops a final '?'.
{
"https://www.example.com?",
"https://www.example.com",
VERIFY_HOST_CHARACTERS,
"https://www.example.com?/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com?name=value",
"https://www.example.com?name=value",
VERIFY_HOST_CHARACTERS,
"https://www.example.com?name=value/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com?name=value/",
"https://www.example.com?name=value/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com?name=value/",
IS_VALID,
IS_VALID
},
// URL::parse also drops the final '#'.
{
"https://www.example.com#",
"https://www.example.com",
VERIFY_HOST_CHARACTERS,
"https://www.example.com#/",
IS_VALID,
IS_VALID
},
{
"https://www.example.com#some=value",
"https://www.example.com#some=value",
VERIFY_HOST_CHARACTERS,
"https://www.example.com#some=value/",
IS_VALID,
IS_VALID},
{
"https://www.example.com/a/path#",
"https://www.example.com/a/path",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path#",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path#some=value",
"https://www.example.com/a/path#some=value",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path#some=value",
IS_VALID,
IS_VALID
},
{
// Note that this final '?' is not for a query parameter but is a part of
// the fragment.
"https://www.example.com/a/path#some=value?",
"https://www.example.com/a/path#some=value?",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path#some=value?",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path#some=value?with_question",
"https://www.example.com/a/path#some=value?with_question",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path#some=value?with_question",
IS_VALID,
IS_VALID
},
{
"https://www.example.com/a/path?name=value?_with_question#some=value?with_question/",
"https://www.example.com/a/path?name=value?_with_question#some=value?with_question/",
VERIFY_HOST_CHARACTERS,
"https://www.example.com/a/path?name=value?_with_question#some=value?with_question/",
IS_VALID,
IS_VALID
},
// The following are some examples of strings we expect from regex_map in
// remap.config. The "From" portion, which are regular expressions, are
// often not parsible by URL::parse but are by URL::parse_regex, which is the
// purpose of its existence.
{
R"(http://(.*)?reactivate\.mail\.yahoo\.com/)",
"",
VERIFY_HOST_CHARACTERS,
R"(http://(.*)?reactivate\.mail\.yahoo\.com/)",
!IS_VALID,
IS_VALID
},
{
// The following is an example of a "To" URL in a regex_map line. We'll
// first verify that the '$' is flagged as invalid for a host in this case.
"http://$1reactivate.real.mail.yahoo.com/",
"http://$1reactivate.real.mail.yahoo.com/",
VERIFY_HOST_CHARACTERS,
"http://$1reactivate.real.mail.yahoo.com/",
!IS_VALID,
IS_VALID
},
{
// Same as above, but this time we pass in !VERIFY_HOST_CHARACTERS. This is
// how RemapConfig will call this parse() function.
"http://$1reactivate.real.mail.yahoo.com/",
"http://$1reactivate.real.mail.yahoo.com/",
!VERIFY_HOST_CHARACTERS,
"http://$1reactivate.real.mail.yahoo.com/",
IS_VALID,
IS_VALID
}
};
// clang-format on
constexpr bool URL_PARSE = true;
constexpr bool URL_PARSE_REGEX = false;
/** Test the specified url.parse function.
*
* URL::parse and URL::parse_regex should behave the same. This function
* performs the same behavior for each.
*
* @param[in] test_case The test case specification to run.
*
* @param[in] parse_function Whether to run parse() or
* parse_regex().
*/
void
test_parse(url_parse_test_case const &test_case, bool parse_function)
{
URL url;
HdrHeap *heap = new_HdrHeap();
url.create(heap);
ParseResult result = PARSE_RESULT_OK;
if (parse_function == URL_PARSE) {
if (test_case.verify_host_characters) {
result = url.parse(test_case.input_uri);
} else {
result = url.parse_no_host_check(test_case.input_uri);
}
} else if (parse_function == URL_PARSE_REGEX) {
result = url.parse_regex(test_case.input_uri);
}
bool expected_is_valid = test_case.is_valid;
if (parse_function == URL_PARSE_REGEX) {
expected_is_valid = test_case.is_valid_regex;
}
if (expected_is_valid && result != PARSE_RESULT_DONE) {
std::printf("Parse URI: \"%s\", expected it to be valid but it was parsed invalid (%d)\n", test_case.input_uri.c_str(), result);
CHECK(false);
} else if (!expected_is_valid && result != PARSE_RESULT_ERROR) {
std::printf("Parse URI: \"%s\", expected it to be invalid but it was parsed valid (%d)\n", test_case.input_uri.c_str(), result);
CHECK(false);
}
if (result == PARSE_RESULT_DONE) {
char buf[1024];
int index = 0;
int offset = 0;
url.print(buf, sizeof(buf), &index, &offset);
std::string printed_url{buf, static_cast<size_t>(index)};
if (parse_function == URL_PARSE) {
CHECK(test_case.expected_printed_url == printed_url);
CHECK(test_case.expected_printed_url.size() == printed_url.size());
} else if (parse_function == URL_PARSE_REGEX) {
CHECK(test_case.expected_printed_url_regex == printed_url);
CHECK(test_case.expected_printed_url_regex.size() == printed_url.size());
}
}
heap->destroy();
}
TEST_CASE("UrlParse", "[proxy][parseurl]")
{
for (auto const &test_case : url_parse_test_cases) {
test_parse(test_case, URL_PARSE);
test_parse(test_case, URL_PARSE_REGEX);
}
}