blob: 7cdafb179064e276e3868fbd8bb783e1c4e02d4f [file] [log] [blame]
// Copyright 2010 Google Inc. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Author: mdsteele@google.com (Matthew D. Steele)
#include "net/instaweb/rewriter/public/css_inline_filter.h"
#include "base/logging.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/rewriter/public/css_util.h"
#include "net/instaweb/rewriter/public/inline_rewrite_context.h"
#include "net/instaweb/rewriter/public/local_storage_cache_filter.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_filter.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/charset_util.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string_writer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/util/gzip_inflater.h"
namespace net_instaweb {
class MessageHandler;
const char CssInlineFilter::kNumCssInlined[] = "num_css_inlined";
class CssInlineFilter::Context : public InlineRewriteContext {
public:
Context(CssInlineFilter* filter, const GoogleUrl& base_url,
HtmlElement* element, HtmlElement::Attribute* src)
: InlineRewriteContext(filter, element, src),
filter_(filter) {
base_url_.Reset(base_url);
const char* charset = element->AttributeValue(HtmlName::kCharset);
if (charset != NULL) {
attrs_charset_ = GoogleString(charset);
}
}
virtual bool ShouldInline(const ResourcePtr& resource,
GoogleString* reason) const {
return filter_->ShouldInline(resource, attrs_charset_, reason);
}
virtual void Render() {
if (num_output_partitions() < 1 ||
!output_partition(0)->has_inlined_data()) {
// Remove any LSC attributes as they're pointless if we don't inline.
LocalStorageCacheFilter::RemoveLscAttributes(get_element(),
filter_->driver());
}
InlineRewriteContext::Render();
}
virtual void RenderInline(
const ResourcePtr& resource, const StringPiece& text,
HtmlElement* element) {
filter_->RenderInline(resource, *(output_partition(0)),
base_url_, text, element);
}
virtual ResourcePtr CreateResource(const char* url, bool* is_authorized) {
return filter_->CreateResource(url, is_authorized);
}
virtual const char* id() const { return filter_->id_; }
private:
CssInlineFilter* filter_;
GoogleUrl base_url_;
GoogleString attrs_charset_;
DISALLOW_COPY_AND_ASSIGN(Context);
};
CssInlineFilter::CssInlineFilter(RewriteDriver* driver)
: CommonFilter(driver),
id_(RewriteOptions::kCssInlineId),
size_threshold_bytes_(driver->options()->css_inline_max_bytes()) {
Statistics* stats = server_context()->statistics();
num_css_inlined_ = stats->GetVariable(kNumCssInlined);
}
void CssInlineFilter::InitStats(Statistics* statistics) {
statistics->AddVariable(kNumCssInlined);
}
void CssInlineFilter::StartDocumentImpl() {
}
CssInlineFilter::~CssInlineFilter() {}
void CssInlineFilter::EndElementImpl(HtmlElement* element) {
// Don't inline if the CSS element is under <noscript>.
if (noscript_element() != NULL) {
return;
}
HtmlElement::Attribute* href = NULL;
const char* media = NULL;
if (CssTagScanner::ParseCssElement(element, &href, &media) &&
!driver()->HasChildrenInFlushWindow(element)) {
// Only inline if the media type affects "screen". We don't inline other
// types since they're very unlikely to change the initial page view, and
// inlining them would actually slow down the 99% case of "screen".
if (!css_util::CanMediaAffectScreen(media)) {
driver()->InsertDebugComment(
"CSS not inlined because media does not match screen", element);
return;
}
// Ask the LSC filter to work out how to handle this element. A return
// value of true means we don't have to rewrite it so can skip that.
// The state is carried forward to after we initiate rewriting since
// we might still have to modify the element.
LocalStorageCacheFilter::InlineState state;
if (!LocalStorageCacheFilter::AddStorableResource(
href->DecodedValueOrNull(), driver(), false /* check cookie */,
element, &state)) {
// StartInlining() transfers possession of ctx to RewriteDriver or
// deletes it on failure.
Context* ctx = new Context(this, base_url(), element, href);
bool initiated = ctx->StartInlining();
// If we're rewriting we need the LSC filter to add the URL as an
// attribute so that it knows to insert the LSC specific javascript.
if (initiated) {
LocalStorageCacheFilter::AddStorableResource(href->DecodedValueOrNull(),
driver(),
true /* ignore cookie */,
element, &state);
}
}
}
}
ResourcePtr CssInlineFilter::CreateResource(const char* url,
bool* is_authorized) {
return CreateInputResource(url, is_authorized);
}
bool CssInlineFilter::HasClosingStyleTag(StringPiece contents) {
return FindIgnoreCase(contents, "</style") != StringPiece::npos;
}
bool CssInlineFilter::ShouldInline(const ResourcePtr& resource,
const StringPiece& attrs_charset,
GoogleString* reason) const {
// If the contents are bigger than our threshold, don't inline.
StringPiece contents(resource->ExtractUncompressedContents());
if (contents.size() > size_threshold_bytes_) {
*reason = StrCat("CSS not inlined since it's bigger than ",
Integer64ToString(size_threshold_bytes_),
" bytes");
return false;
}
// Also don't inline if it looks gzipped.
if (GzipInflater::HasGzipMagicBytes(contents)) {
*reason = "CSS not inlined because it appears to be gzip-encoded";
return false;
}
// And also not if the contents contain "</style>" anywhere. If we inline an
// external stylesheet containing a "</style>", the <style> tag will be ended
// early.
if (HasClosingStyleTag(contents)) {
*reason = "CSS not inlined since it contains style closing tag";
return false;
}
// If the charset is incompatible with the HTML's, we may not be able to
// inline.
StringPiece htmls_charset(driver()->containing_charset());
GoogleString css_charset = RewriteFilter::GetCharsetForStylesheet(
resource.get(), attrs_charset, htmls_charset);
if (!StringCaseEqual(htmls_charset, css_charset)) {
// Check if everything is in <= 127 range, we may still be able to
// inline if it keeps to the ASCII subset (also potentially dropping the
// BOM, since we'll strip it anyway).
StringPiece contents = resource->ExtractUncompressedContents();
StringPiece clean_contents(contents);
StripUtf8Bom(&clean_contents);
bool has_non_ascii = false;
for (int i = 0, n = clean_contents.size(); i < n; ++i) {
if (static_cast<unsigned char>(clean_contents[i]) >= 0x80) {
has_non_ascii = true;
break;
}
}
if (has_non_ascii) {
*reason = StrCat(
"CSS not inlined due to apparent charset incompatibility;"
" we think the HTML is ", htmls_charset,
" while the CSS is ", css_charset);
return false;
}
}
return true;
}
void CssInlineFilter::RenderInline(const ResourcePtr& resource,
const CachedResult& cached,
const GoogleUrl& base_url,
const StringPiece& contents,
HtmlElement* element) {
MessageHandler* message_handler = driver()->message_handler();
// Absolutify the URLs in the CSS -- relative URLs will break otherwise.
// Note that we have to do this at rendering stage, since the same stylesheet
// may be included from HTML in different directories.
// TODO(jmarantz): fix bug 295: domain-rewrite & shard here.
StringPiece clean_contents(contents);
StripUtf8Bom(&clean_contents);
GoogleString rewritten_contents;
StringWriter writer(&rewritten_contents);
GoogleUrl resource_url(resource->url());
bool resolved_ok = true;
switch (driver()->ResolveCssUrls(
resource_url, base_url.Spec(), clean_contents,
&writer, message_handler)) {
case RewriteDriver::kNoResolutionNeeded:
// We don't need to absolutify URLs if input directory is same as base.
if (!writer.Write(clean_contents, message_handler)) {
resolved_ok = false;
}
break;
case RewriteDriver::kWriteFailed:
resolved_ok = false;
break;
case RewriteDriver::kSuccess:
break;
}
if (!resolved_ok) {
// Remove any LSC attributes as they're now pointless.
LocalStorageCacheFilter::RemoveLscAttributes(element, driver());
return;
}
// Inline the CSS.
HtmlElement* style_element =
driver()->NewElement(element->parent(), HtmlName::kStyle);
if (!driver()->ReplaceNode(element, style_element)) {
DCHECK(false) << "!driver()->ReplaceNode(element, style_element)";
return;
}
driver()->AppendChild(style_element,
driver()->NewCharactersNode(element,
rewritten_contents));
// Copy over most attributes from the original link, discarding those that
// we convert (href, rel), and dropping those that are irrelevant (type).
bool has_pagespeed_lsc_url = false;
bool has_pagespeed_lsc_hash = false;
const HtmlElement::AttributeList& attrs = element->attributes();
for (HtmlElement::AttributeConstIterator i(attrs.begin()), e(attrs.end());
i != e; ++i) {
const HtmlElement::Attribute& attr = *i;
switch (attr.keyword()) {
case HtmlName::kHref:
case HtmlName::kRel:
case HtmlName::kType:
break;
case HtmlName::kDataPagespeedLscHash:
// If we have a hash, we /must/ have an url as well, so the fallthrough
// will be a no-op (so, the hash case must come before the url case).
has_pagespeed_lsc_hash = true;
FALLTHROUGH_INTENDED;
case HtmlName::kDataPagespeedLscUrl:
has_pagespeed_lsc_url = true;
FALLTHROUGH_INTENDED;
default:
style_element->AddAttribute(attr);
break;
}
}
if (driver()->options()->Enabled(RewriteOptions::kComputeCriticalCss)) {
// If compute_critical_css is enabled, add 'href' attribute to the style
// node.
// Computing critical css needs this url to store the critical
// css in the map.
driver()->AddAttribute(style_element, HtmlName::kDataPagespeedHref,
resource_url.Spec());
}
// If we don't already have a data-pagespeed-lsc-url then EndElementImpl must
// not have called AddStorableResource or LSC is disabled; in either case
// there is no point in trying to add the LSC attributes. OTOH, if have an url
// and a hash then we've already got all the attributes we need.
if (has_pagespeed_lsc_url && !has_pagespeed_lsc_hash) {
LocalStorageCacheFilter::AddLscAttributes(resource_url.Spec(), cached,
driver(), style_element);
}
num_css_inlined_->Add(1);
}
} // namespace net_instaweb