src/net/instaweb/rewriter/suppress_prehead_filter.cc - incubator-pagespeed-debian - Git at Google

 /*
  * Copyright 2012 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 // Author: mmohabey@google.com (Megha Mohabey)

 #include "net/instaweb/rewriter/public/suppress_prehead_filter.h"

 #include "base/logging.h"
 #include "net/instaweb/http/public/log_record.h"
 #include "net/instaweb/http/public/logging_proto_impl.h"
 #include "net/instaweb/http/public/request_context.h"
 #include "net/instaweb/rewriter/flush_early.pb.h"
 #include "net/instaweb/rewriter/public/flush_early_info_finder.h"
 #include "net/instaweb/rewriter/public/meta_tag_filter.h"
 #include "net/instaweb/rewriter/public/rewrite_driver.h"
 #include "net/instaweb/rewriter/public/server_context.h"
 #include "pagespeed/kernel/base/abstract_mutex.h"
 #include "pagespeed/kernel/base/ref_counted_ptr.h"
 #include "pagespeed/kernel/base/string_util.h"
 #include "pagespeed/kernel/html/html_element.h"
 #include "pagespeed/kernel/html/html_name.h"
 #include "pagespeed/kernel/html/html_node.h"
 #include "pagespeed/kernel/http/http_names.h"
 #include "pagespeed/opt/logging/request_timing_info.h"


 namespace {

 const char kCookieJs[] =
     "(function(){"
     "var data = %s;"
     "for (var i = 0; i < data.length; i++) {"
     "document.cookie = data[i];"
     "}})()";

 const char kFetchLatencySeparator[] = ",";

 const char kNumFetchLatencyEntries = 10;

 }  // namespace

 namespace net_instaweb {

 SuppressPreheadFilter::SuppressPreheadFilter(RewriteDriver* driver)
     : HtmlWriterFilter(driver),
       driver_(driver),
       pre_head_writer_(&pre_head_) {
   Clear();
 }

 void SuppressPreheadFilter::StartDocument() {
   Clear();
   original_writer_ = driver_->writer();
   // If the request was flushed early then do not flush the pre head again.
   if (driver_->flushed_early()) {
     // Change the writer to suppress the bytes from being written to the
     // response. Also for storing the new pre head information in property
     // cache.
     set_writer(&pre_head_writer_);
   } else {
     // We have not flushed early so both store the pre_head and allow it to be
     // written to the response.
     pre_head_and_response_writer_.reset(new SplitWriter(
         original_writer_, &pre_head_writer_));
     set_writer(pre_head_and_response_writer_.get());
   }
   // Setting the charset in response headers related initialization.
   response_headers_.reset(new ResponseHeaders(*driver_->response_headers()));
   charset_ = response_headers_->DetermineCharset();
   has_charset_ = !charset_.empty();
 }

 void SuppressPreheadFilter::PreHeadDone(HtmlElement* element) {
   seen_first_head_ = true;
   set_writer(original_writer_);
   if (driver_->flushed_early()) {
     SendCookies(element);
   }
 }

 // TODO(mmohabey): AddHead filter will not add a head in the following case:
 // <html><noscript><head></head></noscript></html>. This will break the page if
 // FlushSubresources filter is applied.
 void SuppressPreheadFilter::StartElement(HtmlElement* element) {
   if (noscript_element_ == NULL && element->keyword() == HtmlName::kNoscript) {
     noscript_element_ = element;  // Record top-level <noscript>
   }
   if (!seen_first_head_ && noscript_element_ == NULL) {
     if (element->keyword() == HtmlName::kHtml) {
       seen_start_html_ = true;
     } else if (element->keyword() == HtmlName::kHead) {
       HtmlWriterFilter::StartElement(element);
       // If the element is Head, flush the node and set seen_first_head_.
       // If HtmlWriterFilter is holding off any bytes due to
       // HtmlElement::BRIEF_CLOSE then emit that.
       HtmlWriterFilter::TerminateLazyCloseElement();
       PreHeadDone(element);
       return;
     } else if (seen_start_html_ && element->keyword() != HtmlName::kHtml) {
       // If the element is other than HTML/HEAD, do not flush it. According to
       // http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#the-before-head-insertion-mode,
       // such nodes are part of head.
       PreHeadDone(element);
     }
   }
   HtmlWriterFilter::StartElement(element);
 }

 void SuppressPreheadFilter::EndElement(HtmlElement* element) {
   HtmlWriterFilter::EndElement(element);
   if (noscript_element_ == NULL &&
       element->keyword() == HtmlName::kMeta) {
     if (!has_charset_) {
       has_charset_ = MetaTagFilter::ExtractAndUpdateMetaTagDetails(
           element, response_headers_.get());
     }
     if (!has_x_ua_compatible_) {
       has_x_ua_compatible_ = ExtractAndUpdateXUACompatible(element);
     }
   }
   if (element == noscript_element_) {
     noscript_element_ = NULL;  // We are exitting the top-level <noscript>
   }
 }

 void SuppressPreheadFilter::Clear() {
   seen_start_html_ = false;
   seen_first_head_ = false;
   has_charset_ = false;
   has_x_ua_compatible_ = false;
   noscript_element_ = NULL;
   pre_head_.clear();
   charset_.clear();
   pre_head_and_response_writer_.reset(NULL);
   response_headers_.reset();
   HtmlWriterFilter::Clear();
 }

 void SuppressPreheadFilter::EndDocument() {
   int64 header_fetch_ms = -1;
   {
     bool is_cacheable_html = false;
     {
       AbstractLogRecord* log_record = driver_->log_record();
       ScopedMutex lock(log_record->mutex());
       // It is assumed that default value of is_original_resource_cacheable is
       // true. This field will be set only if original resource is not
       // cacheable.
       is_cacheable_html =
           (!log_record->logging_info()->has_is_original_resource_cacheable() ||
            log_record->logging_info()->is_original_resource_cacheable());
     }  // Release lock before calling GetFetchHeaderMs as it takes the same lock
     // TODO(gee): Fix this.

     // If the html is cacheable, then any resource other than the critical
     // resources may block the html download as html might get served from
     // cache. Thus header_fetch_ms is not populated in that case.
     if (!driver_->flushing_early() && !is_cacheable_html) {
       driver_->request_context()->timing_info().GetFetchHeaderLatencyMs(
           &header_fetch_ms);
     }
   }

   FlushEarlyInfo* flush_early_info = driver_->flush_early_info();

   if (header_fetch_ms >= 0) {
     UpdateFetchLatencyInFlushEarlyProto(header_fetch_ms, driver_);
   } else {
     flush_early_info->clear_average_fetch_latency_ms();
     flush_early_info->clear_last_n_fetch_latencies();
   }

   flush_early_info->set_pre_head(pre_head_);
   // See the description of the HttpOnly cookie in
   // http://tools.ietf.org/html/rfc6265#section-4.1.2.6
   flush_early_info->set_http_only_cookie_present(
       flush_early_info->http_only_cookie_present() ||
       response_headers_->HasAnyCookiesWithAttribute("HttpOnly", NULL));
   if (!has_charset_) {
     FlushEarlyInfoFinder* finder =
         driver_->server_context()->flush_early_info_finder();
     if (finder != NULL && finder->IsMeaningful(driver_)) {
       finder->UpdateFlushEarlyInfoInDriver(driver_);
       charset_ = finder->GetCharset(driver_);
       if (!charset_.empty()) {
         GoogleString type = StrCat(";charset=", charset_);
         response_headers_->MergeContentType(type);
       }
     }
   }
   driver_->SaveOriginalHeaders(*response_headers_);
 }

 // TODO(marq): Make this a regular method instead of a static method, and have
 // it inspect driver_ instead of passing it as a parameter.
 void SuppressPreheadFilter::UpdateFetchLatencyInFlushEarlyProto(
     int64 latency, RewriteDriver* driver) {
   double average_fetch_latency = latency;
   GoogleString last_n_fetch_latency;
   FlushEarlyInfo* flush_early_info = driver->flush_early_info();
   if (flush_early_info->has_last_n_fetch_latencies() &&
       flush_early_info->has_average_fetch_latency_ms()) {
     last_n_fetch_latency = flush_early_info->last_n_fetch_latencies();
     average_fetch_latency = flush_early_info->average_fetch_latency_ms();
     StringPieceVector fetch_latency_vector;
     SplitStringPieceToVector(
         last_n_fetch_latency, kFetchLatencySeparator,
         &fetch_latency_vector, true);
     int num_fetch_latency = fetch_latency_vector.size();
     if (num_fetch_latency > kNumFetchLatencyEntries) {
       LOG(WARNING) << "Number of fetch latencies in flush early proto is more "
                  << "than " << kNumFetchLatencyEntries << " for url: "
                  << driver->url();
       average_fetch_latency = 0;
       last_n_fetch_latency = "";
     } else if (num_fetch_latency == kNumFetchLatencyEntries) {
       // If last_n_fetch_latency contains 'n' entries, then remove the entry
       // from the end and add new entry at the front. Also update the average
       // latency.
       int64 nth_latency;
       if (StringToInt64(
           fetch_latency_vector[kNumFetchLatencyEntries - 1].as_string(),
           &nth_latency)) {
         average_fetch_latency = ((average_fetch_latency * num_fetch_latency) -
             nth_latency + latency) / num_fetch_latency;
         last_n_fetch_latency = StrCat(
             Integer64ToString(latency), kFetchLatencySeparator,
             last_n_fetch_latency.substr(
                 0, last_n_fetch_latency.find_last_of(kFetchLatencySeparator)));
       }
     } else {
       // last_n_fetch_latency does not contains 'n' entries. Add a new entry at
       // the front and update average.
       average_fetch_latency =
           (average_fetch_latency * (num_fetch_latency) + latency) /
           (num_fetch_latency + 1);
       last_n_fetch_latency = StrCat(
           Integer64ToString(latency), ",",
           flush_early_info->last_n_fetch_latencies());
     }
   } else {
     // Add entry in the proto if no information is present.
     last_n_fetch_latency = Integer64ToString(latency);
   }
   flush_early_info->set_average_fetch_latency_ms(average_fetch_latency);
   flush_early_info->set_last_n_fetch_latencies(last_n_fetch_latency);
 }

 bool SuppressPreheadFilter::ExtractAndUpdateXUACompatible(
     HtmlElement* element) {
   const HtmlElement::Attribute* equiv =
       element->FindAttribute(HtmlName::kHttpEquiv);
   const HtmlElement::Attribute* value =
       element->FindAttribute(HtmlName::kContent);
   if (equiv != NULL && value!= NULL) {
     StringPiece attribute = equiv->DecodedValueOrNull();
     StringPiece value_str = value->DecodedValueOrNull();
     if (!value_str.empty() && !attribute.empty()) {
       TrimWhitespace(&attribute);

       // http-equiv must equal "Content-Type" and content must not be blank.
       if (StringCaseEqual(attribute, HttpAttributes::kXUACompatible)) {
         if (!response_headers_->HasValue(attribute, value_str)) {
           response_headers_->Add(attribute, value_str);
           return true;
         }
       }
     }
   }
   return false;
 }

 void SuppressPreheadFilter::SendCookies(HtmlElement* element) {
   GoogleString cookie_str;
   const ResponseHeaders* response_headers = driver_->response_headers();
   if (response_headers->GetCookieString(&cookie_str)) {
     HtmlElement* script = driver_->NewElement(element, HtmlName::kScript);
     driver_->AddAttribute(script, HtmlName::kType, "text/javascript");
     driver_->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
     HtmlCharactersNode* script_code = driver_->NewCharactersNode(script,
         StringPrintf(kCookieJs, cookie_str.c_str()));
     driver_->PrependChild(element, script);
     driver_->AppendChild(script, script_code);
   }
 }

 }  // namespace net_instaweb
	/*
	* Copyright 2012 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	// Author: mmohabey@google.com (Megha Mohabey)

	#include "net/instaweb/rewriter/public/suppress_prehead_filter.h"

	#include "base/logging.h"
	#include "net/instaweb/http/public/log_record.h"
	#include "net/instaweb/http/public/logging_proto_impl.h"
	#include "net/instaweb/http/public/request_context.h"
	#include "net/instaweb/rewriter/flush_early.pb.h"
	#include "net/instaweb/rewriter/public/flush_early_info_finder.h"
	#include "net/instaweb/rewriter/public/meta_tag_filter.h"
	#include "net/instaweb/rewriter/public/rewrite_driver.h"
	#include "net/instaweb/rewriter/public/server_context.h"
	#include "pagespeed/kernel/base/abstract_mutex.h"
	#include "pagespeed/kernel/base/ref_counted_ptr.h"
	#include "pagespeed/kernel/base/string_util.h"
	#include "pagespeed/kernel/html/html_element.h"
	#include "pagespeed/kernel/html/html_name.h"
	#include "pagespeed/kernel/html/html_node.h"
	#include "pagespeed/kernel/http/http_names.h"
	#include "pagespeed/opt/logging/request_timing_info.h"


	namespace {

	const char kCookieJs[] =
	"(function(){"
	"var data = %s;"
	"for (var i = 0; i < data.length; i++) {"
	"document.cookie = data[i];"
	"}})()";

	const char kFetchLatencySeparator[] = ",";

	const char kNumFetchLatencyEntries = 10;

	} // namespace

	namespace net_instaweb {

	SuppressPreheadFilter::SuppressPreheadFilter(RewriteDriver* driver)
	: HtmlWriterFilter(driver),
	driver_(driver),
	pre_head_writer_(&pre_head_) {
	Clear();
	}

	void SuppressPreheadFilter::StartDocument() {
	Clear();
	original_writer_ = driver_->writer();
	// If the request was flushed early then do not flush the pre head again.
	if (driver_->flushed_early()) {
	// Change the writer to suppress the bytes from being written to the
	// response. Also for storing the new pre head information in property
	// cache.
	set_writer(&pre_head_writer_);
	} else {
	// We have not flushed early so both store the pre_head and allow it to be
	// written to the response.
	pre_head_and_response_writer_.reset(new SplitWriter(
	original_writer_, &pre_head_writer_));
	set_writer(pre_head_and_response_writer_.get());
	}
	// Setting the charset in response headers related initialization.
	response_headers_.reset(new ResponseHeaders(*driver_->response_headers()));
	charset_ = response_headers_->DetermineCharset();
	has_charset_ = !charset_.empty();
	}

	void SuppressPreheadFilter::PreHeadDone(HtmlElement* element) {
	seen_first_head_ = true;
	set_writer(original_writer_);
	if (driver_->flushed_early()) {
	SendCookies(element);
	}
	}

	// TODO(mmohabey): AddHead filter will not add a head in the following case:
	// <html><noscript><head></head></noscript></html>. This will break the page if
	// FlushSubresources filter is applied.
	void SuppressPreheadFilter::StartElement(HtmlElement* element) {
	if (noscript_element_ == NULL && element->keyword() == HtmlName::kNoscript) {
	noscript_element_ = element; // Record top-level <noscript>
	}
	if (!seen_first_head_ && noscript_element_ == NULL) {
	if (element->keyword() == HtmlName::kHtml) {
	seen_start_html_ = true;
	} else if (element->keyword() == HtmlName::kHead) {
	HtmlWriterFilter::StartElement(element);
	// If the element is Head, flush the node and set seen_first_head_.
	// If HtmlWriterFilter is holding off any bytes due to
	// HtmlElement::BRIEF_CLOSE then emit that.
	HtmlWriterFilter::TerminateLazyCloseElement();
	PreHeadDone(element);
	return;
	} else if (seen_start_html_ && element->keyword() != HtmlName::kHtml) {
	// If the element is other than HTML/HEAD, do not flush it. According to
	// http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#the-before-head-insertion-mode,
	// such nodes are part of head.
	PreHeadDone(element);
	}
	}
	HtmlWriterFilter::StartElement(element);
	}

	void SuppressPreheadFilter::EndElement(HtmlElement* element) {
	HtmlWriterFilter::EndElement(element);
	if (noscript_element_ == NULL &&
	element->keyword() == HtmlName::kMeta) {
	if (!has_charset_) {
	has_charset_ = MetaTagFilter::ExtractAndUpdateMetaTagDetails(
	element, response_headers_.get());
	}
	if (!has_x_ua_compatible_) {
	has_x_ua_compatible_ = ExtractAndUpdateXUACompatible(element);
	}
	}
	if (element == noscript_element_) {
	noscript_element_ = NULL; // We are exitting the top-level <noscript>
	}
	}

	void SuppressPreheadFilter::Clear() {
	seen_start_html_ = false;
	seen_first_head_ = false;
	has_charset_ = false;
	has_x_ua_compatible_ = false;
	noscript_element_ = NULL;
	pre_head_.clear();
	charset_.clear();
	pre_head_and_response_writer_.reset(NULL);
	response_headers_.reset();
	HtmlWriterFilter::Clear();
	}

	void SuppressPreheadFilter::EndDocument() {
	int64 header_fetch_ms = -1;
	{
	bool is_cacheable_html = false;
	{
	AbstractLogRecord* log_record = driver_->log_record();
	ScopedMutex lock(log_record->mutex());
	// It is assumed that default value of is_original_resource_cacheable is
	// true. This field will be set only if original resource is not
	// cacheable.
	is_cacheable_html =
	(!log_record->logging_info()->has_is_original_resource_cacheable() \|\|
	log_record->logging_info()->is_original_resource_cacheable());
	} // Release lock before calling GetFetchHeaderMs as it takes the same lock
	// TODO(gee): Fix this.

	// If the html is cacheable, then any resource other than the critical
	// resources may block the html download as html might get served from
	// cache. Thus header_fetch_ms is not populated in that case.
	if (!driver_->flushing_early() && !is_cacheable_html) {
	driver_->request_context()->timing_info().GetFetchHeaderLatencyMs(
	&header_fetch_ms);
	}
	}

	FlushEarlyInfo* flush_early_info = driver_->flush_early_info();

	if (header_fetch_ms >= 0) {
	UpdateFetchLatencyInFlushEarlyProto(header_fetch_ms, driver_);
	} else {
	flush_early_info->clear_average_fetch_latency_ms();
	flush_early_info->clear_last_n_fetch_latencies();
	}

	flush_early_info->set_pre_head(pre_head_);
	// See the description of the HttpOnly cookie in
	// http://tools.ietf.org/html/rfc6265#section-4.1.2.6
	flush_early_info->set_http_only_cookie_present(
	flush_early_info->http_only_cookie_present() \|\|
	response_headers_->HasAnyCookiesWithAttribute("HttpOnly", NULL));
	if (!has_charset_) {
	FlushEarlyInfoFinder* finder =
	driver_->server_context()->flush_early_info_finder();
	if (finder != NULL && finder->IsMeaningful(driver_)) {
	finder->UpdateFlushEarlyInfoInDriver(driver_);
	charset_ = finder->GetCharset(driver_);
	if (!charset_.empty()) {
	GoogleString type = StrCat(";charset=", charset_);
	response_headers_->MergeContentType(type);
	}
	}
	}
	driver_->SaveOriginalHeaders(*response_headers_);
	}

	// TODO(marq): Make this a regular method instead of a static method, and have
	// it inspect driver_ instead of passing it as a parameter.
	void SuppressPreheadFilter::UpdateFetchLatencyInFlushEarlyProto(
	int64 latency, RewriteDriver* driver) {
	double average_fetch_latency = latency;
	GoogleString last_n_fetch_latency;
	FlushEarlyInfo* flush_early_info = driver->flush_early_info();
	if (flush_early_info->has_last_n_fetch_latencies() &&
	flush_early_info->has_average_fetch_latency_ms()) {
	last_n_fetch_latency = flush_early_info->last_n_fetch_latencies();
	average_fetch_latency = flush_early_info->average_fetch_latency_ms();
	StringPieceVector fetch_latency_vector;
	SplitStringPieceToVector(
	last_n_fetch_latency, kFetchLatencySeparator,
	&fetch_latency_vector, true);
	int num_fetch_latency = fetch_latency_vector.size();
	if (num_fetch_latency > kNumFetchLatencyEntries) {
	LOG(WARNING) << "Number of fetch latencies in flush early proto is more "
	<< "than " << kNumFetchLatencyEntries << " for url: "
	<< driver->url();
	average_fetch_latency = 0;
	last_n_fetch_latency = "";
	} else if (num_fetch_latency == kNumFetchLatencyEntries) {
	// If last_n_fetch_latency contains 'n' entries, then remove the entry
	// from the end and add new entry at the front. Also update the average
	// latency.
	int64 nth_latency;
	if (StringToInt64(
	fetch_latency_vector[kNumFetchLatencyEntries - 1].as_string(),
	&nth_latency)) {
	average_fetch_latency = ((average_fetch_latency * num_fetch_latency) -
	nth_latency + latency) / num_fetch_latency;
	last_n_fetch_latency = StrCat(
	Integer64ToString(latency), kFetchLatencySeparator,
	last_n_fetch_latency.substr(
	0, last_n_fetch_latency.find_last_of(kFetchLatencySeparator)));
	}
	} else {
	// last_n_fetch_latency does not contains 'n' entries. Add a new entry at
	// the front and update average.
	average_fetch_latency =
	(average_fetch_latency * (num_fetch_latency) + latency) /
	(num_fetch_latency + 1);
	last_n_fetch_latency = StrCat(
	Integer64ToString(latency), ",",
	flush_early_info->last_n_fetch_latencies());
	}
	} else {
	// Add entry in the proto if no information is present.
	last_n_fetch_latency = Integer64ToString(latency);
	}
	flush_early_info->set_average_fetch_latency_ms(average_fetch_latency);
	flush_early_info->set_last_n_fetch_latencies(last_n_fetch_latency);
	}

	bool SuppressPreheadFilter::ExtractAndUpdateXUACompatible(
	HtmlElement* element) {
	const HtmlElement::Attribute* equiv =
	element->FindAttribute(HtmlName::kHttpEquiv);
	const HtmlElement::Attribute* value =
	element->FindAttribute(HtmlName::kContent);
	if (equiv != NULL && value!= NULL) {
	StringPiece attribute = equiv->DecodedValueOrNull();
	StringPiece value_str = value->DecodedValueOrNull();
	if (!value_str.empty() && !attribute.empty()) {
	TrimWhitespace(&attribute);

	// http-equiv must equal "Content-Type" and content must not be blank.
	if (StringCaseEqual(attribute, HttpAttributes::kXUACompatible)) {
	if (!response_headers_->HasValue(attribute, value_str)) {
	response_headers_->Add(attribute, value_str);
	return true;
	}
	}
	}
	}
	return false;
	}

	void SuppressPreheadFilter::SendCookies(HtmlElement* element) {
	GoogleString cookie_str;
	const ResponseHeaders* response_headers = driver_->response_headers();
	if (response_headers->GetCookieString(&cookie_str)) {
	HtmlElement* script = driver_->NewElement(element, HtmlName::kScript);
	driver_->AddAttribute(script, HtmlName::kType, "text/javascript");
	driver_->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
	HtmlCharactersNode* script_code = driver_->NewCharactersNode(script,
	StringPrintf(kCookieJs, cookie_str.c_str()));
	driver_->PrependChild(element, script);
	driver_->AppendChild(script, script_code);
	}
	}

	} // namespace net_instaweb