blob: ddcfb9740de55fb969af8a6f3c40b957090bfaae [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: morlovich@google.com (Maksim Orlovich)
#include "net/instaweb/rewriter/public/css_summarizer_base.h"
#include <cstddef>
#include <memory>
#include "base/logging.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/common_filter.h"
#include "net/instaweb/rewriter/public/css_inline_filter.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/rewriter/public/data_url_input_resource.h"
#include "net/instaweb/rewriter/public/inline_resource_slot.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_result.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/single_rewrite_context.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/charset_util.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/thread_system.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/data_url.h"
#include "webutil/css/parser.h"
namespace net_instaweb {
class UrlSegmentEncoder;
// Rewrite context for CssSummarizerBase --- it invokes the filter's
// summarization functions on parsed CSS ASTs when available, and synchronizes
// them with the summaries_ table in the CssSummarizerBase.
class CssSummarizerBase::Context : public SingleRewriteContext {
public:
// pos denotes our position in the filters' summaries_ vector.
Context(int pos, CssSummarizerBase* filter, RewriteDriver* driver);
virtual ~Context();
// Calls to finish initialization for given rewrite type; should be called
// soon after construction.
void SetupInlineRewrite(HtmlElement* element, HtmlCharactersNode* text);
void SetupExternalRewrite(HtmlElement* element);
protected:
virtual void Render();
virtual void WillNotRender();
virtual void Cancel();
virtual bool Partition(OutputPartitions* partitions,
OutputResourceVector* outputs);
virtual void RewriteSingle(const ResourcePtr& input,
const OutputResourcePtr& output);
virtual const char* id() const { return filter_->id(); }
virtual OutputResourceKind kind() const { return kRewrittenResource; }
virtual GoogleString CacheKeySuffix() const;
virtual const UrlSegmentEncoder* encoder() const {
return filter_->encoder();
}
private:
// Reports completion of one summary (including failures).
void ReportDone();
int pos_; // our position in the list of all styles in the page.
CssSummarizerBase* filter_;
HtmlElement* element_;
HtmlCharactersNode* text_;
// True if we're rewriting a <style> block, false if it's a <link>
bool rewrite_inline_;
DISALLOW_COPY_AND_ASSIGN(Context);
};
CssSummarizerBase::Context::Context(int pos,
CssSummarizerBase* filter,
RewriteDriver* driver)
: SingleRewriteContext(driver, NULL /*parent*/, NULL /* resource_context*/),
pos_(pos),
filter_(filter),
element_(NULL),
text_(NULL),
rewrite_inline_(false) {
}
CssSummarizerBase::Context::~Context() {
}
void CssSummarizerBase::Context::SetupInlineRewrite(HtmlElement* element,
HtmlCharactersNode* text) {
rewrite_inline_ = true;
element_ = element;
text_ = text;
}
void CssSummarizerBase::Context::SetupExternalRewrite(HtmlElement* element) {
rewrite_inline_ = false;
element_ = element;
text_ = NULL;
}
void CssSummarizerBase::Context::ReportDone() {
bool should_report_all_done = false;
{
ScopedMutex hold(filter_->progress_lock_.get());
--filter_->outstanding_rewrites_;
if (filter_->saw_end_of_document_ &&
(filter_->outstanding_rewrites_ == 0)) {
should_report_all_done = true;
}
}
if (should_report_all_done) {
filter_->ReportSummariesDone();
}
}
void CssSummarizerBase::Context::Render() {
DCHECK_LE(0, pos_);
DCHECK_LT(static_cast<size_t>(pos_), filter_->summaries_.size());
SummaryInfo& summary_info = filter_->summaries_[pos_];
bool is_element_deleted = false;
if (num_output_partitions() == 0) {
// Failed at partition -> resource fetch failed or uncacheable.
summary_info.state = kSummaryInputUnavailable;
filter_->WillNotRenderSummary(pos_, element_, text_, &is_element_deleted);
} else {
const CachedResult& result = *output_partition(0);
// Transfer the summarization result from the metadata cache (where it was
// stored by RewriteSingle) to the summary table; we have to do it here
// so it's available on a cache hit. Conveniently this will also never race
// with the HTML thread, so the summary accessors will be safe to access
// off parser events.
if (result.has_inlined_data()) {
summary_info.state = kSummaryOk;
summary_info.data = result.inlined_data();
// For external resources, fix up base to refer to the current URL in
// the slot, as it may have been changed by an earlier filter.
if (summary_info.is_external) {
summary_info.base = slot(0)->resource()->url();
}
// TODO(sligocki): text_ could easily be out of date. We should use the
// ResourceSlot to render the result.
filter_->RenderSummary(pos_, element_, text_, &is_element_deleted);
} else {
summary_info.state = kSummaryCssParseError;
filter_->WillNotRenderSummary(pos_, element_, text_, &is_element_deleted);
}
}
if (is_element_deleted) {
slot(0)->set_disable_further_processing(true);
}
ReportDone();
}
void CssSummarizerBase::Context::WillNotRender() {
bool is_element_deleted = false;
filter_->WillNotRenderSummary(pos_, element_, text_, &is_element_deleted);
if (is_element_deleted) {
slot(0)->set_disable_further_processing(true);
}
}
void CssSummarizerBase::Context::Cancel() {
ScopedMutex hold(filter_->progress_lock_.get());
filter_->canceled_summaries_.push_back(pos_);
}
void CssSummarizerBase::Context::RewriteSingle(
const ResourcePtr& input_resource,
const OutputResourcePtr& output_resource) {
StringPiece input_contents = input_resource->ExtractUncompressedContents();
// TODO(morlovich): Should we keep track of this so it can be restored?
StripUtf8Bom(&input_contents);
// Load stylesheet w/o expanding background attributes and preserving as
// much content as possible from the original document.
Css::Parser parser(input_contents);
parser.set_preservation_mode(true);
// We avoid quirks-mode so that we do not "fix" something we shouldn't have.
parser.set_quirks_mode(false);
scoped_ptr<Css::Stylesheet> stylesheet(parser.ParseRawStylesheet());
CachedResult* result = output_partition(0);
if (stylesheet.get() == NULL ||
parser.errors_seen_mask() != Css::Parser::kNoError) {
// TODO(morlovich): do we want a stat here?
result->clear_inlined_data();
} else {
filter_->Summarize(stylesheet.get(), result->mutable_inlined_data());
}
if (CssInlineFilter::HasClosingStyleTag(result->inlined_data())) {
result->clear_inlined_data();
}
// We never produce output --- just write to the CachedResult; so we
// technically fail.
RewriteDone(kRewriteFailed, 0);
}
bool CssSummarizerBase::Context::Partition(OutputPartitions* partitions,
OutputResourceVector* outputs) {
if (num_slots() != 1) {
return false;
}
ResourcePtr resource(slot(0)->resource());
if (!rewrite_inline_ && !resource->IsSafeToRewrite(rewrite_uncacheable())) {
// TODO(anupama): Shouldn't we check the closing style tag portion of
// ShouldInline(resource) here?
return false;
}
// We don't want an output resource but still want a non-trivial partition.
// We use kOmitInputHash here as this is for content that will be inlined.
CachedResult* partition = partitions->add_partition();
resource->AddInputInfoToPartition(Resource::kOmitInputHash, 0, partition);
outputs->push_back(OutputResourcePtr(NULL));
return true;
}
GoogleString CssSummarizerBase::Context::CacheKeySuffix() const {
return filter_->CacheKeySuffix();
}
const char CssSummarizerBase::kNumCssUsedForCriticalCssComputation[] =
"num_css_used_for_critical_css_computation";
const char CssSummarizerBase::kNumCssNotUsedForCriticalCssComputation[] =
"num_css_not_used_for_critical_css_computation";
CssSummarizerBase::CssSummarizerBase(RewriteDriver* driver)
: RewriteFilter(driver),
progress_lock_(driver->server_context()->thread_system()->NewMutex()) {
Statistics* stats = server_context()->statistics();
num_css_used_for_critical_css_computation_ =
stats->GetVariable(kNumCssUsedForCriticalCssComputation);
num_css_not_used_for_critical_css_computation_ =
stats->GetVariable(kNumCssNotUsedForCriticalCssComputation);
Clear();
}
CssSummarizerBase::~CssSummarizerBase() {
Clear();
}
void CssSummarizerBase::InitStats(Statistics* statistics) {
statistics->AddVariable(kNumCssUsedForCriticalCssComputation);
statistics->AddVariable(kNumCssNotUsedForCriticalCssComputation);
}
GoogleString CssSummarizerBase::CacheKeySuffix() const {
return GoogleString();
}
void CssSummarizerBase::SummariesDone() {
}
void CssSummarizerBase::RenderSummary(
int pos, HtmlElement* element, HtmlCharactersNode* char_node,
bool* is_element_deleted) {
}
void CssSummarizerBase::WillNotRenderSummary(
int pos, HtmlElement* element, HtmlCharactersNode* char_node,
bool* is_element_deleted) {
}
void CssSummarizerBase::Clear() {
outstanding_rewrites_ = 0;
saw_end_of_document_ = false;
style_element_ = NULL;
summaries_.clear();
canceled_summaries_.clear();
}
void CssSummarizerBase::StartDocumentImpl() {
// TODO(morlovich): we hold on to the summaries_ memory too long; refine this
// once the data type is refined.
DCHECK(canceled_summaries_.empty());
Clear();
}
void CssSummarizerBase::EndDocument() {
bool should_report_all_done = false;
{
ScopedMutex hold(progress_lock_.get());
saw_end_of_document_ = true;
if (outstanding_rewrites_ == 0) {
// All done before it even got to us!
should_report_all_done = true;
}
}
if (should_report_all_done) {
ReportSummariesDone();
}
}
void CssSummarizerBase::StartElementImpl(HtmlElement* element) {
// HtmlParse should not pass us elements inside a style element.
CHECK(style_element_ == NULL);
if (element->keyword() == HtmlName::kStyle &&
element->FindAttribute(HtmlName::kScoped) == NULL) {
style_element_ = element;
}
// We deal with <link> elements in EndElement.
// We ignore scoped style elements, as they are already inlined,
// can't safely be moved, and take precedence in cascade order
// regardless of their position relative to non-scoped CSS.
}
void CssSummarizerBase::Characters(HtmlCharactersNode* characters_node) {
CommonFilter::Characters(characters_node);
if (style_element_ != NULL) {
// Note: HtmlParse should guarantee that we only get one CharactersNode
// per <style> block even if it is split by a flush.
if (MustSummarize(style_element_)) {
StartInlineRewrite(style_element_, characters_node);
}
}
}
void CssSummarizerBase::EndElementImpl(HtmlElement* element) {
if (style_element_ != NULL) {
// End of an inline style.
CHECK_EQ(style_element_, element); // HtmlParse should not pass unmatching.
style_element_ = NULL;
return;
}
if (element->keyword() == HtmlName::kLink) {
// Rewrite an external style.
StringPiece rel = element->AttributeValue(HtmlName::kRel);
if (CssTagScanner::IsStylesheetOrAlternate(rel)) {
HtmlElement::Attribute* element_href = element->FindAttribute(
HtmlName::kHref);
if (element_href != NULL) {
// If it has a href= attribute
if (MustSummarize(element)) {
StartExternalRewrite(element, element_href, rel);
}
}
}
}
}
void CssSummarizerBase::RenderDone() {
bool should_report_all_done = false;
{
ScopedMutex hold(progress_lock_.get());
// Transfer from canceled_summaries_ to summaries_.
for (int i = 0, n = canceled_summaries_.size(); i < n; ++i) {
int pos = canceled_summaries_[i];
summaries_[pos].state = kSummarySlotRemoved;
}
if (!canceled_summaries_.empty()) {
outstanding_rewrites_ -= canceled_summaries_.size();
if (outstanding_rewrites_ == 0) {
should_report_all_done = saw_end_of_document_;
}
}
canceled_summaries_.clear();
}
if (should_report_all_done) {
ReportSummariesDone();
}
}
void CssSummarizerBase::ReportSummariesDone() {
if (DebugMode()) {
GoogleString comment = "Summary computation status for ";
StrAppend(&comment, Name(), "\n");
for (int i = 0, n = summaries_.size(); i < n; ++i) {
StrAppend(&comment, "Resource ", IntegerToString(i),
" ", summaries_[i].location, ": ");
switch (summaries_[i].state) {
case kSummaryOk:
StrAppend(&comment, "Computed OK\n");
break;
case kSummaryStillPending:
StrAppend(&comment, "Computation still pending\n");
break;
case kSummaryCssParseError:
StrAppend(&comment, "Unrecoverable CSS parse error or resource "
"contains closing style tag\n");
break;
case kSummaryResourceCreationFailed:
StrAppend(&comment, kCreateResourceFailedDebugMsg, "\n");
break;
case kSummaryInputUnavailable:
StrAppend(&comment,
"Fetch failed or resource not publicly cacheable\n");
break;
case kSummarySlotRemoved:
StrAppend(&comment,
"Resource removed by another filter\n");
break;
}
}
GoogleString escaped;
HtmlKeywords::Escape(comment, &escaped);
InsertNodeAtBodyEnd(driver()->NewCommentNode(NULL, escaped));
}
for (int i = 0, n = summaries_.size(); i < n; ++i) {
if (summaries_[i].state == kSummaryOk) {
num_css_used_for_critical_css_computation_->Add(1);
} else {
num_css_not_used_for_critical_css_computation_->Add(1);
}
}
SummariesDone();
}
void CssSummarizerBase::StartInlineRewrite(
HtmlElement* style, HtmlCharactersNode* text) {
ResourceSlotPtr slot(MakeSlotForInlineCss(text));
Context* context =
CreateContextAndSummaryInfo(style, false /* not external */,
slot, slot->LocationString(),
driver()->decoded_base(),
StringPiece() /* rel, none since inline */);
context->SetupInlineRewrite(style, text);
driver()->InitiateRewrite(context);
}
void CssSummarizerBase::StartExternalRewrite(
HtmlElement* link, HtmlElement::Attribute* src, StringPiece rel) {
// Create the input resource for the slot.
bool is_authorized;
ResourcePtr input_resource(CreateInputResource(src->DecodedValueOrNull(),
&is_authorized));
if (input_resource.get() == NULL) {
// Record a failure, so the subclass knows of it.
summaries_.push_back(SummaryInfo());
summaries_.back().state = kSummaryResourceCreationFailed;
const char* url = src->DecodedValueOrNull();
summaries_.back().location = (url != NULL ? url : driver()->UrlLine());
bool is_element_deleted = false; // unused after call because no slot here
WillNotRenderSummary(summaries_.size() - 1, link, NULL /* char_node */,
&is_element_deleted);
// TODO(morlovich): Stat?
if (DebugMode()) {
if (is_authorized || url == NULL) {
driver()->InsertComment(StrCat(
Name(), ": ", kCreateResourceFailedDebugMsg));
} else {
// Do not write a debug message in this case because that has already
// been done by the CSS rewriting filter.
}
}
return;
}
ResourceSlotPtr slot(driver()->GetSlot(input_resource, link, src));
Context* context = CreateContextAndSummaryInfo(
link, true /* external*/, slot, input_resource->url() /* location*/,
input_resource->url() /* base */, rel);
context->SetupExternalRewrite(link);
driver()->InitiateRewrite(context);
}
ResourceSlotPtr CssSummarizerBase::MakeSlotForInlineCss(
HtmlCharactersNode* char_node) {
// Create the input resource for the slot.
GoogleString data_url;
// TODO(morlovich): This does a lot of useless conversions and
// copying. Get rid of them.
DataUrl(kContentTypeCss, PLAIN, char_node->contents(), &data_url);
ResourcePtr input_resource(DataUrlInputResource::Make(data_url, driver()));
return ResourceSlotPtr(driver()->GetInlineSlot(input_resource, char_node));
}
CssSummarizerBase::Context* CssSummarizerBase::CreateContextAndSummaryInfo(
const HtmlElement* element, bool external, const ResourceSlotPtr& slot,
const GoogleString& location, StringPiece base_for_resources,
StringPiece rel) {
int id = summaries_.size();
summaries_.push_back(SummaryInfo());
SummaryInfo& new_summary = summaries_.back();
new_summary.location = location;
base_for_resources.CopyToString(&new_summary.base);
const HtmlElement::Attribute* media_attribute =
element->FindAttribute(HtmlName::kMedia);
if (media_attribute != NULL &&
media_attribute->DecodedValueOrNull() != NULL) {
new_summary.media_from_html = media_attribute->DecodedValueOrNull();
}
rel.CopyToString(&new_summary.rel);
new_summary.is_external = external;
new_summary.is_inside_noscript = (noscript_element() != NULL);
++outstanding_rewrites_;
Context* context = new Context(id, this, driver());
context->AddSlot(slot);
return context;
}
RewriteContext* CssSummarizerBase::MakeRewriteContext() {
// We should not be registered under our id as a rewrite filter, since we
// don't expect to answer fetches.
LOG(DFATAL) << "CssSummarizerBase subclasses should not be registered "
"as handling fetches";
return NULL;
}
} // namespace net_instaweb