blob: 94a3aad7b965b92d625c96581b3c2e677de21af0 [file] [log] [blame]
/*
* Copyright 2012 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: mmohabey@google.com (Megha Mohabey)
#include "net/instaweb/rewriter/public/suppress_prehead_filter.h"
#include "base/logging.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/http/public/logging_proto_impl.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/rewriter/flush_early.pb.h"
#include "net/instaweb/rewriter/public/flush_early_info_finder.h"
#include "net/instaweb/rewriter/public/meta_tag_filter.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/opt/logging/request_timing_info.h"
namespace {
const char kCookieJs[] =
"(function(){"
"var data = %s;"
"for (var i = 0; i < data.length; i++) {"
"document.cookie = data[i];"
"}})()";
const char kFetchLatencySeparator[] = ",";
const char kNumFetchLatencyEntries = 10;
} // namespace
namespace net_instaweb {
SuppressPreheadFilter::SuppressPreheadFilter(RewriteDriver* driver)
: HtmlWriterFilter(driver),
driver_(driver),
pre_head_writer_(&pre_head_) {
Clear();
}
void SuppressPreheadFilter::StartDocument() {
Clear();
original_writer_ = driver_->writer();
// If the request was flushed early then do not flush the pre head again.
if (driver_->flushed_early()) {
// Change the writer to suppress the bytes from being written to the
// response. Also for storing the new pre head information in property
// cache.
set_writer(&pre_head_writer_);
} else {
// We have not flushed early so both store the pre_head and allow it to be
// written to the response.
pre_head_and_response_writer_.reset(new SplitWriter(
original_writer_, &pre_head_writer_));
set_writer(pre_head_and_response_writer_.get());
}
// Setting the charset in response headers related initialization.
response_headers_.reset(new ResponseHeaders(*driver_->response_headers()));
charset_ = response_headers_->DetermineCharset();
has_charset_ = !charset_.empty();
}
void SuppressPreheadFilter::PreHeadDone(HtmlElement* element) {
seen_first_head_ = true;
set_writer(original_writer_);
if (driver_->flushed_early()) {
SendCookies(element);
}
}
// TODO(mmohabey): AddHead filter will not add a head in the following case:
// <html><noscript><head></head></noscript></html>. This will break the page if
// FlushSubresources filter is applied.
void SuppressPreheadFilter::StartElement(HtmlElement* element) {
if (noscript_element_ == NULL && element->keyword() == HtmlName::kNoscript) {
noscript_element_ = element; // Record top-level <noscript>
}
if (!seen_first_head_ && noscript_element_ == NULL) {
if (element->keyword() == HtmlName::kHtml) {
seen_start_html_ = true;
} else if (element->keyword() == HtmlName::kHead) {
HtmlWriterFilter::StartElement(element);
// If the element is Head, flush the node and set seen_first_head_.
// If HtmlWriterFilter is holding off any bytes due to
// HtmlElement::BRIEF_CLOSE then emit that.
HtmlWriterFilter::TerminateLazyCloseElement();
PreHeadDone(element);
return;
} else if (seen_start_html_ && element->keyword() != HtmlName::kHtml) {
// If the element is other than HTML/HEAD, do not flush it. According to
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#the-before-head-insertion-mode,
// such nodes are part of head.
PreHeadDone(element);
}
}
HtmlWriterFilter::StartElement(element);
}
void SuppressPreheadFilter::EndElement(HtmlElement* element) {
HtmlWriterFilter::EndElement(element);
if (noscript_element_ == NULL &&
element->keyword() == HtmlName::kMeta) {
if (!has_charset_) {
has_charset_ = MetaTagFilter::ExtractAndUpdateMetaTagDetails(
element, response_headers_.get());
}
if (!has_x_ua_compatible_) {
has_x_ua_compatible_ = ExtractAndUpdateXUACompatible(element);
}
}
if (element == noscript_element_) {
noscript_element_ = NULL; // We are exitting the top-level <noscript>
}
}
void SuppressPreheadFilter::Clear() {
seen_start_html_ = false;
seen_first_head_ = false;
has_charset_ = false;
has_x_ua_compatible_ = false;
noscript_element_ = NULL;
pre_head_.clear();
charset_.clear();
pre_head_and_response_writer_.reset(NULL);
response_headers_.reset();
HtmlWriterFilter::Clear();
}
void SuppressPreheadFilter::EndDocument() {
int64 header_fetch_ms = -1;
{
bool is_cacheable_html = false;
{
AbstractLogRecord* log_record = driver_->log_record();
ScopedMutex lock(log_record->mutex());
// It is assumed that default value of is_original_resource_cacheable is
// true. This field will be set only if original resource is not
// cacheable.
is_cacheable_html =
(!log_record->logging_info()->has_is_original_resource_cacheable() ||
log_record->logging_info()->is_original_resource_cacheable());
} // Release lock before calling GetFetchHeaderMs as it takes the same lock
// TODO(gee): Fix this.
// If the html is cacheable, then any resource other than the critical
// resources may block the html download as html might get served from
// cache. Thus header_fetch_ms is not populated in that case.
if (!driver_->flushing_early() && !is_cacheable_html) {
driver_->request_context()->timing_info().GetFetchHeaderLatencyMs(
&header_fetch_ms);
}
}
FlushEarlyInfo* flush_early_info = driver_->flush_early_info();
if (header_fetch_ms >= 0) {
UpdateFetchLatencyInFlushEarlyProto(header_fetch_ms, driver_);
} else {
flush_early_info->clear_average_fetch_latency_ms();
flush_early_info->clear_last_n_fetch_latencies();
}
flush_early_info->set_pre_head(pre_head_);
// See the description of the HttpOnly cookie in
// http://tools.ietf.org/html/rfc6265#section-4.1.2.6
flush_early_info->set_http_only_cookie_present(
flush_early_info->http_only_cookie_present() ||
response_headers_->HasAnyCookiesWithAttribute("HttpOnly", NULL));
if (!has_charset_) {
FlushEarlyInfoFinder* finder =
driver_->server_context()->flush_early_info_finder();
if (finder != NULL && finder->IsMeaningful(driver_)) {
finder->UpdateFlushEarlyInfoInDriver(driver_);
charset_ = finder->GetCharset(driver_);
if (!charset_.empty()) {
GoogleString type = StrCat(";charset=", charset_);
response_headers_->MergeContentType(type);
}
}
}
driver_->SaveOriginalHeaders(*response_headers_);
}
// TODO(marq): Make this a regular method instead of a static method, and have
// it inspect driver_ instead of passing it as a parameter.
void SuppressPreheadFilter::UpdateFetchLatencyInFlushEarlyProto(
int64 latency, RewriteDriver* driver) {
double average_fetch_latency = latency;
GoogleString last_n_fetch_latency;
FlushEarlyInfo* flush_early_info = driver->flush_early_info();
if (flush_early_info->has_last_n_fetch_latencies() &&
flush_early_info->has_average_fetch_latency_ms()) {
last_n_fetch_latency = flush_early_info->last_n_fetch_latencies();
average_fetch_latency = flush_early_info->average_fetch_latency_ms();
StringPieceVector fetch_latency_vector;
SplitStringPieceToVector(
last_n_fetch_latency, kFetchLatencySeparator,
&fetch_latency_vector, true);
int num_fetch_latency = fetch_latency_vector.size();
if (num_fetch_latency > kNumFetchLatencyEntries) {
LOG(WARNING) << "Number of fetch latencies in flush early proto is more "
<< "than " << kNumFetchLatencyEntries << " for url: "
<< driver->url();
average_fetch_latency = 0;
last_n_fetch_latency = "";
} else if (num_fetch_latency == kNumFetchLatencyEntries) {
// If last_n_fetch_latency contains 'n' entries, then remove the entry
// from the end and add new entry at the front. Also update the average
// latency.
int64 nth_latency;
if (StringToInt64(
fetch_latency_vector[kNumFetchLatencyEntries - 1].as_string(),
&nth_latency)) {
average_fetch_latency = ((average_fetch_latency * num_fetch_latency) -
nth_latency + latency) / num_fetch_latency;
last_n_fetch_latency = StrCat(
Integer64ToString(latency), kFetchLatencySeparator,
last_n_fetch_latency.substr(
0, last_n_fetch_latency.find_last_of(kFetchLatencySeparator)));
}
} else {
// last_n_fetch_latency does not contains 'n' entries. Add a new entry at
// the front and update average.
average_fetch_latency =
(average_fetch_latency * (num_fetch_latency) + latency) /
(num_fetch_latency + 1);
last_n_fetch_latency = StrCat(
Integer64ToString(latency), ",",
flush_early_info->last_n_fetch_latencies());
}
} else {
// Add entry in the proto if no information is present.
last_n_fetch_latency = Integer64ToString(latency);
}
flush_early_info->set_average_fetch_latency_ms(average_fetch_latency);
flush_early_info->set_last_n_fetch_latencies(last_n_fetch_latency);
}
bool SuppressPreheadFilter::ExtractAndUpdateXUACompatible(
HtmlElement* element) {
const HtmlElement::Attribute* equiv =
element->FindAttribute(HtmlName::kHttpEquiv);
const HtmlElement::Attribute* value =
element->FindAttribute(HtmlName::kContent);
if (equiv != NULL && value!= NULL) {
StringPiece attribute = equiv->DecodedValueOrNull();
StringPiece value_str = value->DecodedValueOrNull();
if (!value_str.empty() && !attribute.empty()) {
TrimWhitespace(&attribute);
// http-equiv must equal "Content-Type" and content must not be blank.
if (StringCaseEqual(attribute, HttpAttributes::kXUACompatible)) {
if (!response_headers_->HasValue(attribute, value_str)) {
response_headers_->Add(attribute, value_str);
return true;
}
}
}
}
return false;
}
void SuppressPreheadFilter::SendCookies(HtmlElement* element) {
GoogleString cookie_str;
const ResponseHeaders* response_headers = driver_->response_headers();
if (response_headers->GetCookieString(&cookie_str)) {
HtmlElement* script = driver_->NewElement(element, HtmlName::kScript);
driver_->AddAttribute(script, HtmlName::kType, "text/javascript");
driver_->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
HtmlCharactersNode* script_code = driver_->NewCharactersNode(script,
StringPrintf(kCookieJs, cookie_str.c_str()));
driver_->PrependChild(element, script);
driver_->AppendChild(script, script_code);
}
}
} // namespace net_instaweb