blob: 53a9a73fd9c8a5271e8d96177705a58c30878e68 [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: mdsteele@google.com (Matthew D. Steele)
#include "net/instaweb/rewriter/public/script_tag_scanner.h"
#include <vector>
#include "base/logging.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/gtest.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/html/empty_html_filter.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_parse.h"
#include "pagespeed/kernel/html/html_parse_test_base.h"
namespace net_instaweb {
class ScriptTagScannerTest : public HtmlParseTestBase {
protected:
ScriptTagScannerTest() : collector_(&html_parse_) {
html_parse_.AddFilter(&collector_);
}
virtual bool AddBody() const { return true; }
// Helper class to collect script information (language,
// and attributes)
class ScriptCollector : public EmptyHtmlFilter {
public:
explicit ScriptCollector(HtmlParse* html_parse)
: script_tag_scanner_(html_parse) {
}
virtual void StartElement(HtmlElement* element) {
HtmlElement::Attribute* src;
ScriptInfo info;
info.classification =
script_tag_scanner_.ParseScriptElement(element, &src);
if (info.classification != ScriptTagScanner::kNonScript) {
if (src) {
info.url = src->DecodedValueOrNull();
}
info.flags = script_tag_scanner_.ExecutionMode(element);
scripts_.push_back(info);
}
}
int Size() const { return static_cast<int>(scripts_.size()); }
const GoogleString& UrlAt(int pos) const {
return scripts_[pos].url;
}
ScriptTagScanner::ScriptClassification ClassificationAt(int pos) {
return scripts_[pos].classification;
}
int FlagsAt(int pos) {
return scripts_[pos].flags;
}
virtual const char* Name() const { return "ScriptCollector"; }
private:
struct ScriptInfo {
GoogleString url;
ScriptTagScanner::ScriptClassification classification;
int flags;
};
std::vector<ScriptInfo> scripts_;
ScriptTagScanner script_tag_scanner_;
DISALLOW_COPY_AND_ASSIGN(ScriptCollector);
};
struct TestSpec {
const char* attributes;
int expected_flags;
};
// Checks to make sure each of attributes inside <script> produces
// the appropriate flags. The array is expected to be 0-terminated
void TestFlags(const TestSpec* test_spec) {
GoogleString html;
int test;
for (test = 0; test_spec[test].attributes; ++test) {
html += "<script " + GoogleString(test_spec[test].attributes) +
"></script>";
}
ValidateNoChanges("from_test_spec", html);
ASSERT_EQ(test, collector_.Size());
for (test = 0; test_spec[test].attributes; ++test) {
LOG(INFO) << test_spec[test].attributes;
EXPECT_EQ(test_spec[test].expected_flags, collector_.FlagsAt(test));
}
}
GoogleString ScriptWithType(const GoogleString& type) {
return "<script type=\"" + type + "\"></script>";
}
GoogleString ScriptWithLang(const GoogleString& type) {
return "<script language=\"" + type + "\"></script>";
}
ScriptCollector collector_;
private:
DISALLOW_COPY_AND_ASSIGN(ScriptTagScannerTest);
};
// Note: kNonScript is covered by the length counts,
// as it will not go into the collector
TEST_F(ScriptTagScannerTest, NotFoundScriptTag) {
ValidateNoChanges("noscript", "<noscript>");
ASSERT_EQ(0, collector_.Size());
}
TEST_F(ScriptTagScannerTest, FindNoScriptTag) {
ValidateNoChanges("simple_script", "<script src=\"myscript.js\"></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString("myscript.js"), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeNoVal) {
// type with no value - handle as JS
ValidateNoChanges("simple_script", "<script type></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeEmpty) {
// type is empty - handle as JS
ValidateNoChanges("simple_script", "<script type=""></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeNoValHaveLang) {
// type is missing, but language is there.
ValidateNoChanges("simple_script", "<script type language=tcl></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeEmptyLang) {
// Type is absent, and language lacks a value. Interpret as JS.
ValidateNoChanges("empty_lang", "<script language></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, LangNotDecodable) {
// Type is absent, and language cannot be decoded. Interpret as unknown.
ValidateNoChanges("lang_non_decodable", "<script language=muñecos></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeNonDecodable) {
// type is not decodable, and language is missing.
ValidateNoChanges("non_decodable", "<script type=muñecos></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeLangSubordinate) {
// make sure type beats language
ValidateNoChanges("simple_script",
"<script type=\"text/ecmascript\" language=tcl></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, LangNoVal) {
// lang no value - handle as JS
ValidateNoChanges("simple_script", "<script language></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, LangEmpty) {
// lang is empty - handle as JS
ValidateNoChanges("simple_script", "<script language=""></script>");
ASSERT_EQ(1, collector_.Size());
EXPECT_EQ(GoogleString(), collector_.UrlAt(0));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(0));
}
TEST_F(ScriptTagScannerTest, TypeScripts) {
// various type values. Nothing fancy done with them. List of types is from
// HTML5 + a few ones that are not
ValidateNoChanges("script types",
ScriptWithType("application/ecmascript") + // 0
ScriptWithType("application/javascript") +
ScriptWithType("application/x-ecmascript") +
ScriptWithType("application/x-javascript") +
ScriptWithType("text/ecmascript") + // 4
ScriptWithType("text/javascript") +
ScriptWithType("text/javascript1.0") +
ScriptWithType("text/javascript1.1") +
ScriptWithType("text/javascript1.2") +
ScriptWithType("text/javascript1.3") + // 9
ScriptWithType("text/javascript1.4") +
ScriptWithType("text/javascript1.5") +
ScriptWithType("text/jscript") +
ScriptWithType("text/livescript") +
ScriptWithType("text/x-ecmascript") + // 14
ScriptWithType("text/x-javascript") + // 15 -- last valid one
ScriptWithType("text/tcl") +
ScriptWithType("text/ecmascript4") +
ScriptWithType("text/javascript2.0") +
ScriptWithType(" ")); // 19 -- last invalid one
ASSERT_EQ(20, collector_.Size());
for (int i = 0; i <= 15; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(i));
}
for (int i = 16; i <= 19; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(i));
}
}
TEST_F(ScriptTagScannerTest, TypeScriptsNormalize) {
// For type, we need to support removal of leading/trailing whitespace
// and case folding
ValidateNoChanges("script types",
ScriptWithType(" application/ecmascRipt") + // 0
ScriptWithType(" applicAtion/javascript ") +
ScriptWithType("application/x-ecmaScript ") +
ScriptWithType(" applicAtion/x-javascript") +
ScriptWithType("text/Ecmascript") + // 4
ScriptWithType(" text/jaVasCript ") +
ScriptWithType(" TEXt/javascript1.0\t") +
ScriptWithType(" text/javascript1.1") +
ScriptWithType(" teXt/javascripT1.2") +
ScriptWithType("\ttExt/javascRipt1.3 ") + // 9
ScriptWithType(" text/javascRipT1.4 ") +
ScriptWithType(" Text/javAscript1.5 ") +
ScriptWithType(" Text/jscrIpt") +
ScriptWithType(" text/lIvescript") +
ScriptWithType("teXt/x-ecmasCript ") + // 14
ScriptWithType("tExt/x-jaVascript ") + // 15 -- last valid one
ScriptWithType("Text/Tcl ") +
ScriptWithType(" text/Ecmascript4") +
ScriptWithType("tExt/javascript2.0")+
ScriptWithType("text/javasc ript")); // 19 -- last invalid one
ASSERT_EQ(20, collector_.Size());
for (int i = 0; i <= 15; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(i));
}
for (int i = 16; i <= 19; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(i));
}
}
TEST_F(ScriptTagScannerTest, LangScripts) {
// for language attribute, we are supposed to test text/lang
// against the valid mimetypes list
ValidateNoChanges("script langs",
ScriptWithLang("ecmascript") +
ScriptWithLang("javascript") +
ScriptWithLang("javascript1.0") +
ScriptWithLang("javascript1.1") +
ScriptWithLang("javascript1.2") + // 4
ScriptWithLang("javascript1.3") +
ScriptWithLang("javascript1.4") +
ScriptWithLang("javascript1.5") +
ScriptWithLang("jscript") +
ScriptWithLang("livescript") + // 9
ScriptWithLang("x-ecmascript") +
ScriptWithLang("x-javascript") + // 11 -- last valid one
ScriptWithLang("tcl") +
ScriptWithLang("ecmascript4") +
ScriptWithLang("javascript2.0")); // 14 -- last invalid one
ASSERT_EQ(15, collector_.Size());
for (int i = 0; i <= 11; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(i));
}
for (int i = 12; i <= 14; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(i));
}
}
TEST_F(ScriptTagScannerTest, LangScriptsNormalizeCase) {
// Case normalization is to be done for language="" as well.
ValidateNoChanges("script langs",
ScriptWithLang("ecmasCript") +
ScriptWithLang("javAscript") +
ScriptWithLang("javascript1.0") +
ScriptWithLang("javascRipt1.1") +
ScriptWithLang("javascripT1.2") + // 4
ScriptWithLang("javaScrIpt1.3") +
ScriptWithLang("jaVasCript1.4") +
ScriptWithLang("javaScriPt1.5") +
ScriptWithLang("jscRiPt") +
ScriptWithLang("livEscript") + // 9
ScriptWithLang("x-ecmaScript") +
ScriptWithLang("x-jaVascript") + // 11 -- last valid one
ScriptWithLang("tCl") +
ScriptWithLang("ecmasCript4") +
ScriptWithLang("jaVascript2.0")); // 14 -- last invalid one
ASSERT_EQ(15, collector_.Size());
for (int i = 0; i <= 11; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kJavaScript, collector_.ClassificationAt(i));
}
for (int i = 12; i <= 14; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(i));
}
}
TEST_F(ScriptTagScannerTest, LangScriptsNormalizeWhitespace) {
// Whitespace, however, is not removed for language, unlike with type,
// so all of these are to fail
ValidateNoChanges("script langs",
ScriptWithLang(" ecmascript") +
ScriptWithLang("javascript\t") +
ScriptWithLang(" javascript1.0 ") +
ScriptWithLang(" javascript1.1") +
ScriptWithLang("javascript1.2 ") + // 4
ScriptWithLang(" javascript1.3") +
ScriptWithLang("javascript1.4 ") +
ScriptWithLang(" javascript1.5") +
ScriptWithLang("jscript ") +
ScriptWithLang("livescript ") + // 9
ScriptWithLang(" x-ecmascript") +
ScriptWithLang("x-javascript\t") +
ScriptWithLang(" tcl ") +
ScriptWithLang("ecmascript4 ") +
ScriptWithLang(" javascript2.0")); // 14 -- last invalid one
ASSERT_EQ(15, collector_.Size());
for (int i = 0; i <= 14; ++i) {
EXPECT_EQ(GoogleString(), collector_.UrlAt(i));
EXPECT_EQ(ScriptTagScanner::kUnknownScript, collector_.ClassificationAt(i));
}
}
TEST_F(ScriptTagScannerTest, ForEvent) {
TestSpec for_event_tests[] = {
{ "for event", ScriptTagScanner::kExecuteForEvent },
{ "for=\"\" event=\"\"", ScriptTagScanner::kExecuteForEvent },
{ "for", ScriptTagScanner::kExecuteSync },
{ "event", ScriptTagScanner::kExecuteSync },
{ "for=\"a\" event=\"b\"", ScriptTagScanner::kExecuteForEvent },
{ "for=\"window\" event=\"b\"", ScriptTagScanner::kExecuteForEvent },
{ "for=\"window\" event=\"b\" async",
ScriptTagScanner::kExecuteForEvent | ScriptTagScanner::kExecuteAsync },
{ "for=\"window\" event=\"onload\"", ScriptTagScanner::kExecuteSync },
{ "for=\"window\" event=onload async", ScriptTagScanner::kExecuteAsync },
{ "for=\"window\" event=\"onload()\"", ScriptTagScanner::kExecuteSync },
{ "for=\"wiNdow \" event=\" onLoad \"", ScriptTagScanner::kExecuteSync },
{ "for=\" windOw\" event=\"OnloAd() \"", ScriptTagScanner::kExecuteSync },
{ 0, ScriptTagScanner::kExecuteSync }
};
TestFlags(for_event_tests);
}
TEST_F(ScriptTagScannerTest, AsyncDefer) {
TestSpec async_defer_tests[] = {
{ "language=tcl async", ScriptTagScanner::kExecuteAsync },
{ "async=\"irrelevant\"", ScriptTagScanner::kExecuteAsync },
{ "defer", ScriptTagScanner::kExecuteDefer },
{ "defer async",
ScriptTagScanner::kExecuteDefer | ScriptTagScanner::kExecuteAsync },
{ "language=tcl async src=a", ScriptTagScanner::kExecuteAsync },
{ "async=\"irrelevant\" src=a", ScriptTagScanner::kExecuteAsync },
{ "defer src=a", ScriptTagScanner::kExecuteDefer },
{ "defer async src=a",
ScriptTagScanner::kExecuteDefer | ScriptTagScanner::kExecuteAsync },
{ 0, ScriptTagScanner::kExecuteSync }
};
TestFlags(async_defer_tests);
}
} // namespace net_instaweb