/*
 * Copyright 2012 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: matterbury@google.com (Matt Atterbury)
//
// Unit tests for ScanFilter.

#include "net/instaweb/rewriter/public/scan_filter.h"

#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_test_base.h"
#include "pagespeed/kernel/base/charset_util.h"
#include "pagespeed/kernel/base/gtest.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_parse_test_base.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/response_headers.h"

namespace net_instaweb {

// Test fixture for ScanFilter unit tests.
class ScanFilterTest : public RewriteTestBase {
};

TEST_F(ScanFilterTest, EmptyPage) {
  // By default the base is the URL, which is set by ValidateNoChanges.
  const char kTestName[] = "empty_page";
  ValidateNoChanges(kTestName, "<head></head>");
  EXPECT_STREQ(StrCat(kTestDomain, kTestName, ".html"),
               rewrite_driver()->base_url().Spec());
  EXPECT_FALSE(rewrite_driver()->refs_before_base());
}

TEST_F(ScanFilterTest, SetBase) {
  // The default base (the URL) is overridden by a base tag.
  const char kTestName[] = "set_base";
  const char kNewBase[] = "http://example.com/index.html";
  ValidateNoChanges(kTestName,
                    StrCat("<head>"
                           "<base href=\"", kNewBase, "\">"
                           "</head>"));
  EXPECT_STREQ(kNewBase, rewrite_driver()->base_url().Spec());
  EXPECT_FALSE(rewrite_driver()->refs_before_base());
}

TEST_F(ScanFilterTest, RefsAfterBase) {
  // Check that we don't flag refs after the base tag.
  const char kTestName[] = "refs_after_base";
  const char kNewBase[] = "http://example.com/index.html";
  ValidateNoChanges(kTestName,
                    StrCat("<head profile='no problem'>"
                           "<base href=\"", kNewBase, "\">"
                           "<a href=\"help.html\">link</a>"
                           "</head>"));
  EXPECT_STREQ(kNewBase, rewrite_driver()->base_url().Spec());
  EXPECT_FALSE(rewrite_driver()->refs_before_base());
}

TEST_F(ScanFilterTest, RefsBeforeBase) {
  // Check that we do flag refs before the base tag.
  const char kTestName[] = "refs_after_base";
  const char kNewBase[] = "http://example.com/index.html";
  ValidateNoChanges(kTestName,
                    StrCat("<head>"
                           "<a href=\"help.html\">link</a>"
                           "<base href=\"", kNewBase, "\">"
                           "</head>"));
  EXPECT_STREQ(kNewBase, rewrite_driver()->base_url().Spec());
  EXPECT_TRUE(rewrite_driver()->refs_before_base());
}

TEST_F(ScanFilterTest, NoCharset) {
  // Check that the charset is empty if we don't set it in any way.
  const char kTestName[] = "no_charset";
  ValidateNoChanges(kTestName, "<head></head>");
  EXPECT_TRUE(rewrite_driver()->containing_charset().empty());
}

TEST_F(ScanFilterTest, CharsetFromResponseHeaders) {
  // Check that the charset is taken from the response headers.
  const char kTestName[] = "charset_from_response_headers";
  ResponseHeaders headers;
  headers.MergeContentType("text/html; charset=iso-8859-1");
  rewrite_driver()->set_response_headers_ptr(&headers);
  ValidateNoChanges(kTestName, "<head></head>");
  EXPECT_STREQ("iso-8859-1", rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromBomDoesntOverride) {
  // Check that a BOM does not override the charset from the headers.
  const char kTestName[] = "charset_from_bom_doesnt_override";
  ResponseHeaders headers;
  headers.MergeContentType("text/html; charset=iso-8859-1");
  rewrite_driver()->set_response_headers_ptr(&headers);
  SetDoctype(kUtf8Bom);
  ValidateNoChanges(kTestName, "<head></head>");
  EXPECT_STREQ("iso-8859-1", rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromBom) {
  // Check that a BOM sets the charset.
  const char kTestName[] = "charset_from_bom";
  SetDoctype(kUtf8Bom);
  ValidateNoChanges(kTestName, "<head></head>");
  EXPECT_STREQ(kUtf8Charset, rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromMetaTagDoesntOverrideHeaders) {
  // Check that a meta tag does not override the charset from the headers.
  const char kTestName[] = "charset_from_meta_tag_doesnt_override_headers";
  ResponseHeaders headers;
  headers.MergeContentType("text/html; charset=iso-8859-1");
  rewrite_driver()->set_response_headers_ptr(&headers);
  ValidateNoChanges(kTestName,
                    "<head>"
                    "<meta charset=\"UTF-8\">"
                    "</head>");
  EXPECT_STREQ("iso-8859-1", rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromMetaTagDoesntOverrideBom) {
  // Check that a meta tag does not override the charset from a BOM.
  const char kTestName[] = "charset_from_meta_tag_doesnt_override_bom";
  SetDoctype(kUtf8Bom);
  ValidateNoChanges(kTestName,
                    "<head>"
                    "<meta charset=\"us-ascii\">"
                    "</head>");
  EXPECT_STREQ(kUtf8Charset, rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromMetaTag) {
  // Check that a meta tag sets the charset.
  const char kTestName[] = "charset_from_meta_tag";
  ValidateNoChanges(kTestName,
                    "<head>"
                    "<meta charset=\"UTF-8\">"
                    "</head>");
  EXPECT_STREQ("UTF-8", rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromFirstMetaTag) {
  // Check that the first meta tag is used.
  const char kTestName[] = "charset_from_first_meta_tag";
  ValidateNoChanges(kTestName,
                    "<head>"
                    "<meta http-equiv=\"Content-Type\" "
                          "content=\"text/xml; charset=us-ascii\">"
                    "<meta charset=\"UTF-8\">"
                    "</head>");
  EXPECT_STREQ("us-ascii", rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromFirstMetaTagWithCharset) {
  // Check that the first meta tag is used.
  const char kTestName[] = "charset_from_first_meta_tag_with_charset";
  ValidateNoChanges(kTestName,
                    "<head>"
                    "<meta http-equiv=\"Content-Type\">"
                    "<meta charset=\"UTF-8\">"
                    "</head>");
  EXPECT_STREQ("UTF-8", rewrite_driver()->containing_charset());
}

TEST_F(ScanFilterTest, CharsetFromMetaTagMissingQuotes) {
  // Check that the first meta tag is used even if it's missing the quotes.
  const char kTestName[] = "charset_from_meta_tag_missing_quotes";
  ValidateNoChanges(kTestName,
                    "<head>"
                    "<meta http-equiv=Content-Type "
                          "content=text/html; charset=us-ascii>"
                    "<meta charset=\"UTF-8\">"
                    "</head>");
  EXPECT_STREQ("us-ascii", rewrite_driver()->containing_charset());
}

}  // namespace net_instaweb
