blob: 47f9e5d638dd8a93c89671db612dc0d6faea584d [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: slamm@google.com (Stephen Lamm),
// morlovich@google.com (Maksim Orlovich)
// See the header for overview.
#include "net/instaweb/rewriter/public/critical_selector_filter.h"
#include <algorithm>
#include <cstddef>
#include <set>
#include "base/logging.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/rewriter/flush_early.pb.h"
#include "net/instaweb/rewriter/public/critical_selector_finder.h"
#include "net/instaweb/rewriter/public/css_minify.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/rewriter/public/css_util.h"
#include "net/instaweb/rewriter/public/request_properties.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/static_asset_manager.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/null_message_handler.h"
#include "pagespeed/kernel/base/stl_util.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/string_writer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/html/html_parse.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/opt/logging/enums.pb.h"
#include "webutil/css/media.h"
#include "webutil/css/parser.h"
#include "webutil/css/selector.h"
namespace net_instaweb {
namespace {
// Helper that takes a std::vector-like collection, and compacts
// any null holes in it.
template<typename VectorType> void Compact(VectorType* cl) {
typename VectorType::iterator new_end =
std::remove(cl->begin(), cl->end(),
static_cast<typename VectorType::value_type>(NULL));
cl->erase(new_end, cl->end());
}
} // namespace
// When flush early filter is enabled, critical css rules are flushed early
// as innerHTML of a script element. When the CSS element appears in the
// document, find the previously flushed style data and copy it to the style
// element so it can be applied. This script is used for that.
const char CriticalSelectorFilter::kApplyFlushEarlyCss[] =
"var applyFlushedCriticalCss = function(script_id, mediaString) {"
" var scripts = document.getElementsByTagName('script');"
" var styleScript = document.getElementById(script_id);"
" if (styleScript == null) {"
" return;"
" }"
" var cssText = styleScript.innerHTML || styleScript.textContent || "
" styleScript.data || \"\";"
" var styleElem = document.createElement('style');"
" styleElem.type = 'text/css';"
" if (styleElem.styleSheet) {"
" styleElem.styleSheet.cssText = cssText;"
" } else {"
" styleElem.appendChild(document.createTextNode(cssText));"
" }"
" if (mediaString) {"
" styleElem.setAttribute(\"media\", mediaString);"
" }"
" var currentScript = scripts[scripts.length-1];"
" currentScript.parentNode.insertBefore(styleElem, currentScript);"
"};";
const char CriticalSelectorFilter::kInvokeFlushEarlyCssTemplate[] =
"applyFlushedCriticalCss(\"%s\", \"%s\");";
const char CriticalSelectorFilter::kMoveScriptId[] = "psa_flush_style_early";
const char CriticalSelectorFilter::kNoscriptStylesClass[] = "psa_add_styles";
// TODO(morlovich): Check charset like CssInlineFilter::ShouldInline().
// Wrap CSS elements to move them later in the document.
// A simple list of elements is insufficient because link tags and style tags
// are inserted different.
class CriticalSelectorFilter::CssElement {
public:
CssElement(HtmlParse* p, HtmlElement* e, bool inside_noscript)
: html_parse_(p), element_(p->CloneElement(e)),
inside_noscript_(inside_noscript) {}
// HtmlParse deletes the element (regardless of whether it is inserted).
virtual ~CssElement() {}
virtual void AppendTo(HtmlElement* parent) const {
html_parse_->AppendChild(parent, element_);
}
bool inside_noscript() const { return inside_noscript_; }
protected:
HtmlParse* html_parse_;
HtmlElement* element_;
bool inside_noscript_;
private:
DISALLOW_COPY_AND_ASSIGN(CssElement);
};
// Wrap CSS style blocks to move them later in the document.
class CriticalSelectorFilter::CssStyleElement
: public CriticalSelectorFilter::CssElement {
public:
CssStyleElement(HtmlParse* p, HtmlElement* e, bool inside_noscript)
: CssElement(p, e, inside_noscript) {}
virtual ~CssStyleElement() {}
// Call before InsertBeforeCurrent.
void AppendCharactersNode(HtmlCharactersNode* characters_node) {
characters_nodes_.push_back(
html_parse_->NewCharactersNode(NULL, characters_node->contents()));
}
virtual void AppendTo(HtmlElement* parent) const {
HtmlElement* element = element_;
CssElement::AppendTo(parent);
for (CharactersNodeVector::const_iterator it = characters_nodes_.begin(),
end = characters_nodes_.end(); it != end; ++it) {
html_parse_->AppendChild(element, *it);
}
}
protected:
typedef std::vector<HtmlCharactersNode*> CharactersNodeVector;
CharactersNodeVector characters_nodes_;
private:
DISALLOW_COPY_AND_ASSIGN(CssStyleElement);
};
// Wrap CSS related elements so they can be moved later in the document.
CriticalSelectorFilter::CriticalSelectorFilter(RewriteDriver* driver)
: CssSummarizerBase(driver),
saw_end_document_(false),
any_rendered_(false),
is_flush_script_added_(false) {
}
CriticalSelectorFilter::~CriticalSelectorFilter() {
}
void CriticalSelectorFilter::Summarize(Css::Stylesheet* stylesheet,
GoogleString* out) const {
for (int ruleset_index = 0, num_rulesets = stylesheet->rulesets().size();
ruleset_index < num_rulesets; ++ruleset_index) {
Css::Ruleset* r = stylesheet->mutable_rulesets().at(ruleset_index);
if (r->type() == Css::Ruleset::UNPARSED_REGION) {
// Couldn't parse this as a rule, leave unaltered. Hopefully it's not
// too big..
continue;
}
// TODO(morlovich): This does a lot of repeated work as the same media
// entries are repeated for tons of rulesets.
// TODO(morlovich): It's silly to serialize this, we should work directly
// off AST once we have decision procedure on that.
bool any_media_apply = r->media_queries().empty();
for (int mediaquery_index = 0, num_mediaquery = r->media_queries().size();
mediaquery_index < num_mediaquery; ++mediaquery_index) {
Css::MediaQuery* mq = r->mutable_media_queries().at(mediaquery_index);
if (css_util::CanMediaAffectScreen(mq->ToString())) {
any_media_apply = true;
} else {
delete mq;
r->mutable_media_queries()[mediaquery_index] = NULL;
}
}
bool any_selectors_apply = false;
if (any_media_apply) {
// See which of the selectors for given declaration apply.
// Note that in some partial parse errors we will get 0 selectors here,
// in which case we retain things to be conservative.
any_selectors_apply = r->selectors().empty();
for (int selector_index = 0, num_selectors = r->selectors().size();
selector_index < num_selectors; ++selector_index) {
Css::Selector* s = r->mutable_selectors().at(selector_index);
GoogleString portion_to_compare = css_util::JsDetectableSelector(*s);
if (portion_to_compare.empty() ||
critical_selectors_.find(portion_to_compare)
!= critical_selectors_.end()) {
any_selectors_apply = true;
} else {
delete s;
r->mutable_selectors()[selector_index] = NULL;
}
}
}
if (any_selectors_apply && any_media_apply) {
// Just remove the irrelevant selectors & media
Compact(&r->mutable_selectors());
Compact(&r->mutable_media_queries());
} else {
// Remove the entire production
delete r;
stylesheet->mutable_rulesets()[ruleset_index] = NULL;
}
}
Compact(&stylesheet->mutable_rulesets());
// Serialize out the remaining subset.
StringWriter writer(out);
NullMessageHandler handler;
CssMinify::Stylesheet(*stylesheet, &writer, &handler);
}
void CriticalSelectorFilter::RenderSummary(
int pos, HtmlElement* element, HtmlCharactersNode* char_node,
bool* is_element_deleted) {
RememberFullCss(pos, element, char_node);
const SummaryInfo& summary = GetSummaryForStyle(pos);
DCHECK_EQ(kSummaryOk, summary.state);
// If we're inlining an external CSS file, make sure to adjust the URLs
// inside to the new base.
const GoogleString* css_to_use = &summary.data;
GoogleString resolved_css;
if (summary.is_external) {
StringWriter writer(&resolved_css);
GoogleUrl input_css_base(summary.base);
if (driver()->ResolveCssUrls(
input_css_base, driver()->base_url().Spec(), summary.data,
&writer, driver()->message_handler()) == RewriteDriver::kSuccess) {
css_to_use = &resolved_css;
}
}
// Update the DOM --- either an existing style element, or replace link
// with style.
if (char_node != NULL) {
// Note: This depends upon all previous filters also mutating the contents
// of the original Characters Node. If any previous filters replaces the
// Characters Node with another one or makes some other change, this node
// will be out of date and the update will not do anything.
// TODO(sligocki): We should use a non-trivial ResourceSlot to update this
// instead so that it is not so delicate.
*char_node->mutable_contents() = *css_to_use;
} else {
HtmlElement* style_element = driver()->NewElement(NULL, HtmlName::kStyle);
driver()->InsertNodeBeforeNode(element, style_element);
HtmlCharactersNode* content =
driver()->NewCharactersNode(style_element, *css_to_use);
driver()->AppendChild(style_element, content);
*is_element_deleted = driver()->DeleteNode(element);
element = style_element;
}
// Update the media attribute to just the media that's relevant to screen.
StringVector all_media;
css_util::VectorizeMediaAttribute(summary.media_from_html, &all_media);
element->DeleteAttribute(HtmlName::kMedia);
bool drop_entire_element = false;
if (css_to_use->empty()) {
// Don't keep empty blocks around.
drop_entire_element = true;
} else if (summary.is_inside_noscript) {
// Optimize summary version for scriptable environment, since noscript
// environment will eagerly load the whole CSS anyway at the foot of the
// page.
drop_entire_element = true;
} else if (summary.is_external &&
CssTagScanner::IsAlternateStylesheet(summary.rel)) {
// Likewise drop alternate stylesheets, they're non-critical.
drop_entire_element = true;
} else if (!all_media.empty()) {
StringVector relevant_media;
for (int i = 0, n = all_media.size(); i < n; ++i) {
const GoogleString& medium = all_media[i];
if (css_util::CanMediaAffectScreen(medium)) {
relevant_media.push_back(medium);
}
}
if (!relevant_media.empty()) {
driver()->AddAttribute(element, HtmlName::kMedia,
css_util::StringifyMediaVector(relevant_media));
} else {
// None of the media applied to the screen, so remove the entire element.
drop_entire_element = true;
}
}
if (drop_entire_element) {
driver()->DeleteNode(element);
} else if (char_node == NULL) {
const GoogleString& url = summary.location;
if (IsCssFlushedEarly(url)) {
ApplyCssFlushedEarly(element,
driver()->server_context()->hasher()->Hash(url),
element->AttributeValue(HtmlName::kMedia));
} else if (driver()->flushing_early()) {
// Add an attribute so the flush early filter can flush these
// elements early.
driver()->AddAttribute(element, HtmlName::kDataPagespeedFlushStyle,
driver()->server_context()->hasher()->Hash(url));
}
}
// We've altered the CSS, so we should generate code to load the entire thing.
// TODO(morlovich): Check if we actually dropped something?
any_rendered_ = true;
}
void CriticalSelectorFilter::WillNotRenderSummary(
int pos, HtmlElement* element, HtmlCharactersNode* char_node,
bool* is_element_deleted) {
RememberFullCss(pos, element, char_node);
}
GoogleString CriticalSelectorFilter::CacheKeySuffix() const {
return cache_key_suffix_;
}
void CriticalSelectorFilter::StartDocumentImpl() {
CssSummarizerBase::StartDocumentImpl();
ServerContext* context = driver()->server_context();
// Read critical selector info from pcache.
critical_selectors_ =
context->critical_selector_finder()->GetCriticalSelectors(driver());
// Compute corresponding cache key suffix
GoogleString all_selectors = JoinCollection(critical_selectors_, ",");
cache_key_suffix_ = context->lock_hasher()->Hash(all_selectors);
// Clear state between re-uses / check to make sure we wrapped up properly.
DCHECK(css_elements_.empty());
saw_end_document_ = false;
any_rendered_ = false;
is_flush_script_added_ = false;
}
void CriticalSelectorFilter::EndDocument() {
CssSummarizerBase::EndDocument();
saw_end_document_ = true;
}
void CriticalSelectorFilter::RenderDone() {
CssSummarizerBase::RenderDone();
// Only do this on very last flush window.
if (!saw_end_document_) {
return;
}
if (!css_elements_.empty() && any_rendered_ && !driver()->flushing_early()) {
HtmlElement* noscript_element = NULL;
Compact(&css_elements_);
for (int i = 0, n = css_elements_.size(); i < n; ++i) {
// Insert the full CSS, but hide all the style, link tags inside noscript
// blocks so that look-ahead parser cannot find them; and mark the
// portions that were visible to scripting-aware browser with
// class = psa_add_styles.
//
// If the browser has scripting off, it will therefore read everything,
// including portions of original CSS that were in noscript block.
//
// If the browser has scripting on, the parser will not do anything, but
// we will add a loader script which will load things with
// class = psa_add_styles (thus skipping over things that were originally
// inside noscript).
if (i == 0 || (css_elements_[i]->inside_noscript() !=
css_elements_[i - 1]->inside_noscript())) {
noscript_element = driver()->NewElement(NULL, HtmlName::kNoscript);
if (!css_elements_[i]->inside_noscript()) {
driver()->AddAttribute(noscript_element, HtmlName::kClass,
kNoscriptStylesClass);
}
InsertNodeAtBodyEnd(noscript_element);
}
css_elements_[i]->AppendTo(noscript_element);
}
HtmlElement* script = driver()->NewElement(NULL, HtmlName::kScript);
driver()->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
InsertNodeAtBodyEnd(script);
GoogleString js =
driver()->server_context()->static_asset_manager()->GetAsset(
StaticAssetEnum::CRITICAL_CSS_LOADER_JS, driver()->options());
if (!driver()->options()
->test_only_prioritize_critical_css_dont_apply_original_css()) {
StrAppend(&js, "pagespeed.CriticalCssLoader.Run();");
}
AddJsToElement(js, script);
}
STLDeleteElements(&css_elements_);
}
void CriticalSelectorFilter::DetermineEnabled(GoogleString* disabled_reason) {
// We shouldn't do anything if there is no information on critical selectors
// in the property cache. Unfortunately, we also cannot run safely in case of
// IE, since we do not understand IE conditional comments well enough to
// replicate their behavior in the load-everything section.
const StringSet& critical_selectors = driver()->server_context()
->critical_selector_finder()->GetCriticalSelectors(driver());
bool ua_supports_critical_css =
driver()->request_properties()->SupportsCriticalCss();
bool can_run = ua_supports_critical_css && !critical_selectors.empty();
driver()->log_record()->LogRewriterHtmlStatus(
RewriteOptions::FilterId(RewriteOptions::kPrioritizeCriticalCss),
(can_run ? RewriterHtmlApplication::ACTIVE
: (ua_supports_critical_css
? RewriterHtmlApplication::PROPERTY_CACHE_MISS
: RewriterHtmlApplication::USER_AGENT_NOT_SUPPORTED)));
if (!can_run) {
if (!ua_supports_critical_css) {
*disabled_reason = "User agent not supported";
} else {
*disabled_reason = "No critical selector info in cache";
}
}
set_is_enabled(can_run);
}
void CriticalSelectorFilter::RememberFullCss(
int pos, HtmlElement* element, HtmlCharactersNode* char_node) {
// Deep copy[1] into the css_elements_ array the CSS as optimized by all the
// filters that ran before us and rendered their results, so that we can
// emit it accurately at end, as a lazy-load sequence.
// [1] We need a deep copy since some of the DOM data will get freed up at the
// end of each flush window.
if (static_cast<size_t>(pos) >= css_elements_.size()) {
css_elements_.resize(pos + 1);
}
bool noscript = GetSummaryForStyle(pos).is_inside_noscript;
CssElement* save = NULL;
if (char_node != NULL) {
CssStyleElement* save_inline =
new CssStyleElement(driver(), element, noscript);
save_inline->AppendCharactersNode(char_node);
save = save_inline;
} else {
save = new CssElement(driver(), element, noscript);
}
css_elements_[pos] = save;
}
bool CriticalSelectorFilter::IsCssFlushedEarly(const GoogleString& url) const {
if (!driver()->flushed_early() ||
!driver()->options()->enable_flush_early_critical_css() ||
driver()->flush_early_info() == NULL) {
return false;
}
// If the url is present in the DOM cohort, it is guaranteed to have
// been flushed early.
GoogleString escaped_url;
HtmlKeywords::Escape(url, &escaped_url);
// TODO(slamm): Replace with cheaper and more robust solution.
return (driver()->flush_early_info()->resource_html().find(
StrCat("\"", escaped_url, "\"")) != GoogleString::npos);
}
void CriticalSelectorFilter::ApplyCssFlushedEarly(
HtmlElement* element, const GoogleString& style_id, const char* media) {
// In this case we have already added the CSS rules to the head as
// part of flushing early. Now, find the rule, remove the disabled tag
// and move it here.
// Add the JS function definition that moves and applies the flushed early
// CSS rules, if it has not already been added.
if (!is_flush_script_added_) {
is_flush_script_added_ = true;
HtmlElement* script =
driver()->NewElement(element->parent(), HtmlName::kScript);
// TODO(slamm): Remove this attribute and update webdriver test as needed.
driver()->AddAttribute(script, HtmlName::kId, kMoveScriptId);
driver()->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
driver()->InsertNodeBeforeNode(element, script);
AddJsToElement(kApplyFlushEarlyCss, script);
}
HtmlElement* script_element =
driver()->NewElement(element->parent(), HtmlName::kScript);
driver()->AddAttribute(script_element, HtmlName::kDataPagespeedNoDefer, NULL);
driver()->ReplaceNode(element, script_element);
GoogleString js_data = StringPrintf(kInvokeFlushEarlyCssTemplate,
style_id.c_str(),
(media != NULL ? media : ""));
AddJsToElement(js_data, script_element);
}
} // namespace net_instaweb