blob: 1c8ddc7b922b08187d1e60d1466e0f93416b6dab [file] [log] [blame]
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: nikhilmadan@google.com (Nikhil Madan)
#include "net/instaweb/rewriter/public/collect_flush_early_content_filter.h"
#include <memory>
#include "base/logging.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/flush_early.pb.h"
#include "net/instaweb/rewriter/public/critical_selector_filter.h"
#include "net/instaweb/rewriter/public/flush_early_info_finder.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/resource_tag_scanner.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_result.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/single_rewrite_context.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/http/data_url.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/semantic_type.h"
namespace net_instaweb {
class CollectFlushEarlyContentFilter::Context : public SingleRewriteContext {
public:
explicit Context(RewriteDriver* driver)
: SingleRewriteContext(driver, NULL, NULL) {}
protected:
virtual void RewriteSingle(const ResourcePtr& input,
const OutputResourcePtr& output) {
// Do not add resources which are inlined or combined.
if (num_slots() != 1 || slot(0)->should_delete_element()) {
// Do nothing.
} else {
// Update the cache with resource size.
ResourceSlot* resource_slot = slot(0).get();
ResourcePtr resource = resource_slot->resource();
CachedResult* partition = output_partition(0);
partition->set_size(resource->UncompressedContentsSize());
}
RewriteDone(kRewriteFailed, 0);
}
virtual void Render() {
if (num_output_partitions() > 0 && output_partition(0)->has_size()) {
HtmlResourceSlot* html_slot = static_cast<HtmlResourceSlot*>(
slot(0).get());
HtmlElement* element = html_slot->element();
if (Driver()->IsRewritable(element)) {
// TODO(pulkitg): Can IsRewritable be false here (see comment to
// Propagate in rewrite_context.h)?
Driver()->AddAttribute(element, HtmlName::kDataPagespeedSize,
Integer64ToString(output_partition(0)->size()));
}
}
}
virtual OutputResourceKind kind() const { return kOnTheFlyResource; }
virtual const char* id() const {
return "rscc";
}
private:
DISALLOW_COPY_AND_ASSIGN(Context);
};
CollectFlushEarlyContentFilter::CollectFlushEarlyContentFilter(
RewriteDriver* driver)
: RewriteFilter(driver) {
Clear();
}
void CollectFlushEarlyContentFilter::StartDocumentImpl() {
Clear();
FlushEarlyInfoFinder* finder =
driver()->server_context()->flush_early_info_finder();
if (finder != NULL && finder->IsMeaningful(driver())) {
finder->UpdateFlushEarlyInfoInDriver(driver());
}
}
void CollectFlushEarlyContentFilter::EndDocument() {
if (driver()->flushing_early()) {
return;
}
// Empty the resource_html_ if no resource is found.
if (!found_resource_) {
resource_html_ = "";
}
if (!resource_html_.empty()) {
driver()->flush_early_info()->set_resource_html(resource_html_);
}
}
void CollectFlushEarlyContentFilter::StartElementImpl(HtmlElement* element) {
// Collect the link stylesheet tags inside the noscript element only if
// they are added by the Critical CSS filter. In this case, the link tags
// thus collected will be parsed by a subsequent run of the Critical CSS
// filter in flush early phase. In this phase, Critical CSS filter replaces
// link tags with style elements with critical CSS rules inlined and a
// special attribute added (kDataPagespeedFlushStyle). Flush early content
// filter in turn looks for the special attribute in the style tag and flush
// the content early as inlined CSS link tags.
// Note that this may cause the order of CSS elements stored in resource html
// to be different from the order in which elements are parsed in HTML. This
// can cause downloads to be in a different order too.
//
// FlushEarlyContentWriterFilter depends on us not flushing multiple resources
// for the same element for two reasons:
// - The data-pagespeed-size attribute doesn't specify which url-valued
// attribute it refers to.
// - If there are multiple such attributes at least one is unlikely to be
// used and so not worth flushing.
if (element == noscript_element()) {
if (driver()->options()->enable_flush_early_critical_css()) {
const char* cls = noscript_element()->AttributeValue(HtmlName::kClass);
if (cls != NULL &&
StringCaseEqual(cls, CriticalSelectorFilter::kNoscriptStylesClass)) {
should_collect_critical_css_ = true;
}
}
return;
}
if (noscript_element() != NULL && !should_collect_critical_css_) {
// Do nothing
return;
}
if (element->keyword() == HtmlName::kBody) {
StrAppend(&resource_html_, "<body>");
return;
}
if (driver()->flushing_early() &&
driver()->options()->flush_more_resources_early_if_time_permits()) {
resource_tag_scanner::UrlCategoryVector attributes;
resource_tag_scanner::ScanElement(
element, driver()->options(), &attributes);
// We only want to flush early if there is a single flushable resource.
HtmlElement::Attribute* resource_url = NULL;
for (int i = 0, n = attributes.size(); i < n; ++i) {
if (attributes[i].category == semantic_type::kStylesheet ||
attributes[i].category == semantic_type::kScript ||
attributes[i].category == semantic_type::kImage) {
if (resource_url != NULL) {
// This should never happen. When StartElementImpl is called with
// driver()->flushing_early() being true we're parsing the content
// which we want to flush early. That content was already filtered to
// contain only elements with single resources to be flushed early.
DCHECK(false);
return;
}
resource_url = attributes[i].url;
}
}
if (resource_url != NULL) {
// We found a single resource to flush early.
StringPiece url(resource_url->DecodedValueOrNull());
if (url.empty() || IsDataUrl(url)) {
return;
}
ResourcePtr resource(CreateInputResourceOrInsertDebugComment(url,
element));
if (resource.get() == NULL) {
return;
}
ResourceSlotPtr slot(driver()->GetSlot(resource, element, resource_url));
Context* context = new Context(driver());
context->AddSlot(slot);
driver()->InitiateRewrite(context);
}
} else {
// Find javascript elements in the head, and css elements in the entire
// page. Only look at standard link-href/script-src tags because those are
// the only ones we can handle with AppendToHtml() and because we're only
// able to flush one resource early per element.
HtmlName::Keyword attribute_name;
if (element->keyword() == HtmlName::kScript) {
attribute_name = HtmlName::kSrc;
} else if (element->keyword() == HtmlName::kLink) {
attribute_name = HtmlName::kHref;
} else {
return;
}
HtmlElement::Attribute* resource_url =
element->FindAttribute(attribute_name);
semantic_type::Category category =
resource_tag_scanner::CategorizeAttribute(
element, resource_url, driver()->options());
if (element->keyword() == HtmlName::kScript &&
category != semantic_type::kScript) {
return;
}
if (element->keyword() == HtmlName::kLink &&
category != semantic_type::kStylesheet) {
return;
}
StringPiece url(resource_url->DecodedValueOrNull());
if (url.empty() || IsDataUrl(url)) {
return;
}
ResourcePtr resource(CreateInputResourceOrInsertDebugComment(url, element));
if (resource.get() == NULL) {
return;
}
// We need to always use the absolutified urls while flushing, else we
// might end up flushing wrong resources. Use the absolutified url that is
// computed in CreateInputResource call.
GoogleUrl gurl(resource->url());
if (gurl.IsWebValid()) {
StringVector decoded_url;
// Decode the url if it is encoded.
if (driver()->DecodeUrl(gurl, &decoded_url)) {
// TODO(pulkitg): Detect cases where rewritten resources are already
// present in the original html.
if (decoded_url.size() == 1) {
// There will be only 1 url as combiners are off and this should be
// modified once they are enabled.
AppendToHtml(decoded_url.at(0), category, element);
}
} else {
AppendToHtml(gurl.Spec(), category, element);
}
}
}
}
void CollectFlushEarlyContentFilter::AppendToHtml(
StringPiece url, semantic_type::Category category, HtmlElement* element) {
GoogleString escaped_url;
HtmlKeywords::Escape(url, &escaped_url);
found_resource_ = true;
if (category == semantic_type::kStylesheet) {
StrAppend(&resource_html_, "<link ");
AppendAttribute(HtmlName::kType, element);
AppendAttribute(HtmlName::kRel, element);
StrAppend(&resource_html_, "href=\"", escaped_url, "\"/>");
} else if (category == semantic_type::kScript) {
StrAppend(&resource_html_, "<script ");
AppendAttribute(HtmlName::kType, element);
StrAppend(&resource_html_, "src=\"", escaped_url, "\"></script>");
}
}
void CollectFlushEarlyContentFilter::AppendAttribute(
HtmlName::Keyword keyword, HtmlElement* element) {
HtmlElement::Attribute* attr = element->FindAttribute(keyword);
if (attr != NULL) {
StringPiece attr_value(attr->DecodedValueOrNull());
if (!attr_value.empty()) {
GoogleString escaped_value;
HtmlKeywords::Escape(attr_value, &escaped_value);
StrAppend(
&resource_html_, attr->name_str(), "=\"", escaped_value, "\" ");
}
}
}
void CollectFlushEarlyContentFilter::EndElementImpl(HtmlElement* element) {
if (noscript_element() != NULL) {
if (element == noscript_element()) {
should_collect_critical_css_ = false;
}
} else if (element->keyword() == HtmlName::kBody) {
StrAppend(&resource_html_, "</body>");
}
}
void CollectFlushEarlyContentFilter::Clear() {
resource_html_.clear();
found_resource_ = false;
should_collect_critical_css_ = false;
}
} // namespace net_instaweb