blob: cce5bf1ed314d0123a9d180c7630c3ba2799e4f5 [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmaessen@google.com (Jan Maessen)
#include "net/instaweb/rewriter/public/javascript_filter.h"
#include <ctype.h>
#include "base/scoped_ptr.h"
#include "net/instaweb/htmlparse/public/html_element.h"
#include "net/instaweb/htmlparse/public/html_node.h"
#include "net/instaweb/htmlparse/public/html_parse.h"
#include "net/instaweb/rewriter/public/javascript_code_block.h"
#include "net/instaweb/rewriter/public/javascript_library_identification.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/output_resource.h"
#include "net/instaweb/rewriter/public/resource_manager.h"
#include "net/instaweb/util/public/atom.h"
#include "net/instaweb/util/public/content_type.h"
#include "net/instaweb/util/public/google_url.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/meta_data.h"
#include <string>
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/url_async_fetcher.h"
#include "net/instaweb/util/public/url_escaper.h"
#include "net/instaweb/util/public/writer.h"
namespace net_instaweb {
namespace {
const HttpStatus::Code kNotOptimizable = HttpStatus::kNotModified;
} // namespace
JavascriptFilter::JavascriptFilter(RewriteDriver* driver,
const StringPiece& path_prefix)
: RewriteFilter(driver, path_prefix),
html_parse_(driver->html_parse()),
script_in_progress_(NULL),
script_src_(NULL),
resource_manager_(driver->resource_manager()),
some_missing_scripts_(false),
config_(driver->resource_manager()->statistics()),
s_script_(html_parse_->Intern("script")),
s_src_(html_parse_->Intern("src")),
s_type_(html_parse_->Intern("type")) { }
JavascriptFilter::~JavascriptFilter() { }
void JavascriptFilter::Initialize(Statistics* statistics) {
JavascriptRewriteConfig::Initialize(statistics);
}
void JavascriptFilter::StartScriptElement(HtmlElement* element) {
static const char kTextJavascript[] = "text/javascript";
static const char kTextEcmascript[] = "text/ecmascript";
static const char kAppJavascript[] = "application/javascript";
static const char kAppEcmascript[] = "application/ecmascript";
const char* script_type = element->AttributeValue(s_type_);
if (script_type == NULL ||
strncmp(kTextJavascript, script_type, sizeof(kTextJavascript)) == 0 ||
strncmp(kTextEcmascript, script_type, sizeof(kTextEcmascript)) == 0 ||
strncmp(kAppJavascript, script_type, sizeof(kAppJavascript)) == 0 ||
strncmp(kAppEcmascript, script_type, sizeof(kAppEcmascript)) == 0) {
script_in_progress_ = element;
if ((script_src_ = element->FindAttribute(s_src_)) != NULL) {
html_parse_->InfoHere("Found script with src %s", script_src_->value());
}
} else {
html_parse_->InfoHere("Unrecognized script type='%s'", script_type);
}
}
void JavascriptFilter::StartElementImpl(HtmlElement* element) {
CHECK(script_in_progress_ == NULL);
if (element->tag() == s_script_) {
StartScriptElement(element);
}
}
void JavascriptFilter::Characters(HtmlCharactersNode* characters) {
if (script_in_progress_ != NULL) {
// Note that we're keeping a vector of nodes here,
// and appending them lazily at the end. This is
// because there's usually only 1 HtmlCharactersNode involved,
// and we end up not actually needing to copy the string.
buffer_.push_back(characters);
}
}
// Flatten script fragments in buffer_, using script_buffer to hold
// the data if necessary. Return a StringPiece referring to the data.
const StringPiece JavascriptFilter::FlattenBuffer(std::string* script_buffer) {
const int buffer_size = buffer_.size();
if (buffer_.size() == 1) {
StringPiece result(buffer_[0]->contents());
return result;
} else {
for (int i = 0; i < buffer_size; i++) {
script_buffer->append(buffer_[i]->contents());
}
StringPiece result(*script_buffer);
return result;
}
}
void JavascriptFilter::RewriteInlineScript() {
const int buffer_size = buffer_.size();
if (buffer_size > 0) {
// First buffer up script data and minify it.
std::string script_buffer;
const StringPiece script = FlattenBuffer(&script_buffer);
MessageHandler* message_handler = html_parse_->message_handler();
JavascriptCodeBlock code_block(script, &config_, message_handler);
JavascriptLibraryId library = code_block.ComputeJavascriptLibrary();
if (library.recognized()) {
html_parse_->InfoHere("Script is %s %s",
library.name(), library.version());
}
if (code_block.ProfitableToRewrite()) {
// Now replace all CharactersNodes with a single CharactersNode containing
// the minified script.
HtmlCharactersNode* new_script =
html_parse_->NewCharactersNode(buffer_[0]->parent(),
code_block.Rewritten());
html_parse_->ReplaceNode(buffer_[0], new_script);
for (int i = 1; i < buffer_size; i++) {
html_parse_->DeleteElement(buffer_[i]);
}
}
}
}
// Load script resource located at the given URL,
// on error report & return NULL (caller need not report)
Resource* JavascriptFilter::ScriptAtUrl(const StringPiece& script_url) {
MessageHandler* message_handler = html_parse_->message_handler();
Resource* script_input =
resource_manager_->CreateInputResourceAndReadIfCached(
base_gurl(), script_url, message_handler);
return script_input;
}
// Take script_out, which is derived from the script at script_url,
// and write it to script_dest.
// Returns true on success, reports failures itself.
bool JavascriptFilter::WriteExternalScriptTo(
const Resource* script_resource,
const StringPiece& script_out, OutputResource* script_dest) {
bool ok = false;
MessageHandler* message_handler = html_parse_->message_handler();
int64 origin_expire_time_ms = script_resource->CacheExpirationTimeMs();
if (resource_manager_->Write(HttpStatus::kOK, script_out, script_dest,
origin_expire_time_ms, message_handler)) {
ok = true;
html_parse_->InfoHere("Rewrite script %s to %s",
script_resource->url().c_str(),
script_dest->url().c_str());
}
return ok;
}
// External script; minify and replace with rewritten version (also external).
void JavascriptFilter::RewriteExternalScript() {
const StringPiece script_url(script_src_->value());
MessageHandler* message_handler = html_parse_->message_handler();
scoped_ptr<OutputResource> script_dest(
resource_manager_->CreateOutputResourceForRewrittenUrl(
base_gurl(), filter_prefix_, script_url,
&kContentTypeJavascript, resource_manager_->url_escaper(),
message_handler));
if (script_dest != NULL) {
bool ok;
if (resource_manager_->FetchOutputResource(script_dest.get(), NULL, NULL,
message_handler)) {
// Only rewrite URL if we have usable rewritten data.
ok = script_dest->metadata()->status_code() == HttpStatus::kOK;
} else {
scoped_ptr<Resource> script_input(ScriptAtUrl(script_url));
ok = script_input != NULL;
if (ok) {
StringPiece script = script_input->contents();
MessageHandler* message_handler = html_parse_->message_handler();
JavascriptCodeBlock code_block(script, &config_, message_handler);
JavascriptLibraryId library = code_block.ComputeJavascriptLibrary();
if (library.recognized()) {
html_parse_->InfoHere("Script %s is %s %s",
script_input->url().c_str(),
library.name(), library.version());
}
ok = code_block.ProfitableToRewrite();
if (ok) {
ok = WriteExternalScriptTo(script_input.get(), code_block.Rewritten(),
script_dest.get());
} else {
// Rewriting happened but wasn't useful; remember this for later
// so we don't attempt to rewrite twice.
html_parse_->InfoHere("Script %s didn't shrink",
script_input->url().c_str());
int64 origin_expire_time_ms = script_input->CacheExpirationTimeMs();
// TODO(jmarantz): currently this will not work, because HTTPCache
// will not report a 'hit' on any status other than OK. This should
// be fixed by either:
// 1. adding a few other codes that HTTPCache will return hits for
// 2. using a special header to indicate failed-to-optimize.
resource_manager_->Write(
kNotOptimizable, "",
script_dest.get(), origin_expire_time_ms, message_handler);
}
} else {
some_missing_scripts_ = true;
}
}
if (ok) {
script_src_->SetValue(script_dest->url());
}
}
// Finally, note that the script might contain body data.
// We erase this if it is just whitespace; otherwise we leave it alone.
// The script body is ignored by all browsers we know of.
// However, various sources have encouraged using the body of an
// external script element to store a post-load callback.
// As this technique is preferable to storing callbacks in, say, html
// comments, we support it for now.
bool allSpaces = true;
for (size_t i = 0; allSpaces && i < buffer_.size(); ++i) {
const std::string& contents = buffer_[i]->contents();
for (size_t j = 0; allSpaces && j < contents.size(); ++j) {
char c = contents[j];
if (!isspace(c) && c != 0) {
html_parse_->WarningHere("Retaining contents of script tag"
" even though script is external.");
allSpaces = false;
}
}
}
for (size_t i = 0; allSpaces && i < buffer_.size(); ++i) {
html_parse_->DeleteElement(buffer_[i]);
}
}
// Reset state at end of script.
void JavascriptFilter::CompleteScriptInProgress() {
buffer_.clear();
script_in_progress_ = NULL;
script_src_ = NULL;
}
void JavascriptFilter::EndElementImpl(HtmlElement* element) {
if (script_in_progress_ != NULL &&
html_parse_->IsRewritable(script_in_progress_) &&
html_parse_->IsRewritable(element)) {
if (element->tag() == s_script_) {
if (element->close_style() == HtmlElement::BRIEF_CLOSE) {
html_parse_->ErrorHere("Brief close of script tag (non-portable)");
}
if (script_src_ == NULL) {
RewriteInlineScript();
} else {
RewriteExternalScript();
}
CompleteScriptInProgress();
} else {
// Should not happen by construction (parser should not have tags here).
// Note that if we get here, this test *Will* fail; it is written
// out longhand to make diagnosis easier.
CHECK(script_in_progress_ == NULL);
}
}
}
void JavascriptFilter::Flush() {
// TODO(jmaessen): We can be smarter here if it turns out to be necessary (eg
// by buffering an in-progress script across the flush boundary).
if (script_in_progress_ != NULL) {
// Not actually an error!
html_parse_->InfoHere("Flush in mid-script; leaving script untouched.");
CompleteScriptInProgress();
some_missing_scripts_ = true;
}
}
void JavascriptFilter::IEDirective(HtmlIEDirectiveNode* directive) {
CHECK(script_in_progress_ == NULL);
// We presume an IE directive is concealing some js code.
some_missing_scripts_ = true;
}
bool JavascriptFilter::Fetch(OutputResource* output_resource,
Writer* writer,
const MetaData& request_header,
MetaData* response_headers,
UrlAsyncFetcher* fetcher,
MessageHandler* message_handler,
UrlAsyncFetcher::Callback* callback) {
bool queued = false;
scoped_ptr<Resource> script_input(
resource_manager_->CreateInputResourceFromOutputResource(
resource_manager_->url_escaper(), output_resource,
message_handler));
if (script_input != NULL &&
resource_manager_->ReadIfCached(script_input.get(), message_handler) &&
script_input->ContentsValid()) {
StringPiece script = script_input->contents();
std::string script_out;
JavascriptCodeBlock code_block(script, &config_, message_handler);
bool ok = WriteExternalScriptTo(script_input.get(),
code_block.Rewritten(), output_resource);
if (ok) {
ok = writer->Write(output_resource->contents(), message_handler);
}
callback->Done(ok);
queued = true;
}
return queued;
}
} // namespace net_instaweb