blob: fc0c787d0a9fba3454a84a943f54e670920ece44 [file] [log] [blame]
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: matterbury@google.com (Matt Atterbury)
//
// Unit tests for ScanFilter.
#include "net/instaweb/rewriter/public/scan_filter.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
#include "pagespeed/kernel/base/charset_util.h"
#include "pagespeed/kernel/base/gtest.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_parse_test_base.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/response_headers.h"
namespace net_instaweb {
// Test fixture for ScanFilter unit tests.
class ScanFilterTest : public RewriteTestBase {
};
TEST_F(ScanFilterTest, EmptyPage) {
// By default the base is the URL, which is set by ValidateNoChanges.
const char kTestName[] = "empty_page";
ValidateNoChanges(kTestName, "<head></head>");
EXPECT_STREQ(StrCat(kTestDomain, kTestName, ".html"),
rewrite_driver()->base_url().Spec());
EXPECT_FALSE(rewrite_driver()->refs_before_base());
}
TEST_F(ScanFilterTest, SetBase) {
// The default base (the URL) is overridden by a base tag.
const char kTestName[] = "set_base";
const char kNewBase[] = "http://example.com/index.html";
ValidateNoChanges(kTestName,
StrCat("<head>"
"<base href=\"", kNewBase, "\">"
"</head>"));
EXPECT_STREQ(kNewBase, rewrite_driver()->base_url().Spec());
EXPECT_FALSE(rewrite_driver()->refs_before_base());
}
TEST_F(ScanFilterTest, RefsAfterBase) {
// Check that we don't flag refs after the base tag.
const char kTestName[] = "refs_after_base";
const char kNewBase[] = "http://example.com/index.html";
ValidateNoChanges(kTestName,
StrCat("<head profile='no problem'>"
"<base href=\"", kNewBase, "\">"
"<a href=\"help.html\">link</a>"
"</head>"));
EXPECT_STREQ(kNewBase, rewrite_driver()->base_url().Spec());
EXPECT_FALSE(rewrite_driver()->refs_before_base());
}
TEST_F(ScanFilterTest, RefsBeforeBase) {
// Check that we do flag refs before the base tag.
const char kTestName[] = "refs_after_base";
const char kNewBase[] = "http://example.com/index.html";
ValidateNoChanges(kTestName,
StrCat("<head>"
"<a href=\"help.html\">link</a>"
"<base href=\"", kNewBase, "\">"
"</head>"));
EXPECT_STREQ(kNewBase, rewrite_driver()->base_url().Spec());
EXPECT_TRUE(rewrite_driver()->refs_before_base());
}
TEST_F(ScanFilterTest, NoCharset) {
// Check that the charset is empty if we don't set it in any way.
const char kTestName[] = "no_charset";
ValidateNoChanges(kTestName, "<head></head>");
EXPECT_TRUE(rewrite_driver()->containing_charset().empty());
}
TEST_F(ScanFilterTest, CharsetFromResponseHeaders) {
// Check that the charset is taken from the response headers.
const char kTestName[] = "charset_from_response_headers";
ResponseHeaders headers;
headers.MergeContentType("text/html; charset=iso-8859-1");
rewrite_driver()->set_response_headers_ptr(&headers);
ValidateNoChanges(kTestName, "<head></head>");
EXPECT_STREQ("iso-8859-1", rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromBomDoesntOverride) {
// Check that a BOM does not override the charset from the headers.
const char kTestName[] = "charset_from_bom_doesnt_override";
ResponseHeaders headers;
headers.MergeContentType("text/html; charset=iso-8859-1");
rewrite_driver()->set_response_headers_ptr(&headers);
SetDoctype(kUtf8Bom);
ValidateNoChanges(kTestName, "<head></head>");
EXPECT_STREQ("iso-8859-1", rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromBom) {
// Check that a BOM sets the charset.
const char kTestName[] = "charset_from_bom";
SetDoctype(kUtf8Bom);
ValidateNoChanges(kTestName, "<head></head>");
EXPECT_STREQ(kUtf8Charset, rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromMetaTagDoesntOverrideHeaders) {
// Check that a meta tag does not override the charset from the headers.
const char kTestName[] = "charset_from_meta_tag_doesnt_override_headers";
ResponseHeaders headers;
headers.MergeContentType("text/html; charset=iso-8859-1");
rewrite_driver()->set_response_headers_ptr(&headers);
ValidateNoChanges(kTestName,
"<head>"
"<meta charset=\"UTF-8\">"
"</head>");
EXPECT_STREQ("iso-8859-1", rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromMetaTagDoesntOverrideBom) {
// Check that a meta tag does not override the charset from a BOM.
const char kTestName[] = "charset_from_meta_tag_doesnt_override_bom";
SetDoctype(kUtf8Bom);
ValidateNoChanges(kTestName,
"<head>"
"<meta charset=\"us-ascii\">"
"</head>");
EXPECT_STREQ(kUtf8Charset, rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromMetaTag) {
// Check that a meta tag sets the charset.
const char kTestName[] = "charset_from_meta_tag";
ValidateNoChanges(kTestName,
"<head>"
"<meta charset=\"UTF-8\">"
"</head>");
EXPECT_STREQ("UTF-8", rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromFirstMetaTag) {
// Check that the first meta tag is used.
const char kTestName[] = "charset_from_first_meta_tag";
ValidateNoChanges(kTestName,
"<head>"
"<meta http-equiv=\"Content-Type\" "
"content=\"text/xml; charset=us-ascii\">"
"<meta charset=\"UTF-8\">"
"</head>");
EXPECT_STREQ("us-ascii", rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromFirstMetaTagWithCharset) {
// Check that the first meta tag is used.
const char kTestName[] = "charset_from_first_meta_tag_with_charset";
ValidateNoChanges(kTestName,
"<head>"
"<meta http-equiv=\"Content-Type\">"
"<meta charset=\"UTF-8\">"
"</head>");
EXPECT_STREQ("UTF-8", rewrite_driver()->containing_charset());
}
TEST_F(ScanFilterTest, CharsetFromMetaTagMissingQuotes) {
// Check that the first meta tag is used even if it's missing the quotes.
const char kTestName[] = "charset_from_meta_tag_missing_quotes";
ValidateNoChanges(kTestName,
"<head>"
"<meta http-equiv=Content-Type "
"content=text/html; charset=us-ascii>"
"<meta charset=\"UTF-8\">"
"</head>");
EXPECT_STREQ("us-ascii", rewrite_driver()->containing_charset());
}
} // namespace net_instaweb