blob: 8ae2f7fba828c635a27c4cd645634cdf810e629f [file] [log] [blame]
/*
* Copyright 2013 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Authors: mmohabey@google.com (Megha Mohabey)
// pulkitg@google.com (Pulkit Goyal)
#include "pagespeed/automatic/cache_html_flow.h"
#include "base/logging.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/http/public/logging_proto_impl.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/public/global_constants.h"
#include "net/instaweb/rewriter/cache_html_info.pb.h"
#include "net/instaweb/rewriter/public/blink_util.h"
#include "net/instaweb/rewriter/public/cache_html_info_finder.h"
#include "net/instaweb/rewriter/public/critical_css_finder.h"
#include "net/instaweb/rewriter/public/critical_images_finder.h"
#include "net/instaweb/rewriter/public/critical_selector_finder.h"
#include "net/instaweb/rewriter/public/experiment_matcher.h"
#include "net/instaweb/rewriter/public/property_cache_util.h"
#include "net/instaweb/rewriter/public/request_properties.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_driver_factory.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/static_asset_manager.h"
// TODO(pulkitg): Change the function GetHtmlCriticalImages to take
// AbstractPropertyPage so that fallback_property_page.h dependency can be
// removed.
#include "net/instaweb/util/public/fallback_property_page.h"
#include "net/instaweb/util/public/property_cache.h"
#include "pagespeed/automatic/html_detector.h"
#include "pagespeed/automatic/proxy_fetch.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/function.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/null_mutex.h"
#include "pagespeed/kernel/base/proto_util.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/thread_system.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/kernel/http/http.pb.h"
#include "pagespeed/kernel/thread/thread_synchronizer.h"
#include "pagespeed/opt/logging/enums.pb.h"
namespace net_instaweb {
const char kBlinkJsString[] =
"<script type=\"text/javascript\" src=\"%s\"></script>";
const char kCacheHtmlSuffixJsString[] =
"<script type=\"text/javascript\">"
"pagespeed.panelLoaderInit();"
"</script>\n";
const char kCacheHtmlSetInternalIp[] =
"<script type=\"text/javascript\">"
"pagespeed.panelLoader.setRequestFromInternalIp();"
"</script>\n";
const char CacheHtmlFlow::kBackgroundComputationDone[] =
"BackgroundComputation:Done";
const char CacheHtmlFlow::kNumCacheHtmlHits[] =
"num_cache_html_hits";
const char CacheHtmlFlow::kNumCacheHtmlMisses[] =
"num_cache_html_misses";
const char CacheHtmlFlow::kNumCacheHtmlMatches[] =
"num_cache_html_matches";
const char CacheHtmlFlow::kNumCacheHtmlMismatches[] =
"num_cache_html_mismatches";
const char CacheHtmlFlow::kNumCacheHtmlMismatchesCacheDeletes[] =
"num_cache_html_mismatch_cache_deletes";
const char CacheHtmlFlow::kNumCacheHtmlSmartdiffMatches[] =
"num_cache_html_smart_diff_matches";
const char CacheHtmlFlow::kNumCacheHtmlSmartdiffMismatches[] =
"num_cache_html_smart_diff_mismatches";
// Utility for logging to both main and cache html flow log records.
// Does not take ownership of the passed in log records.
class CacheHtmlFlow::LogHelper {
public:
LogHelper(AbstractLogRecord* log_record1, AbstractLogRecord* log_record2)
: log_record1_(log_record1), log_record2_(log_record2) {}
void SetCacheHtmlRequestFlow(int32 cache_html_request_flow) {
log_record1_->SetCacheHtmlRequestFlow(cache_html_request_flow);
log_record2_->SetCacheHtmlRequestFlow(cache_html_request_flow);
}
void LogAppliedRewriter(const char* filter_id) {
log_record1_->SetRewriterLoggingStatus(filter_id,
RewriterApplication::APPLIED_OK);
log_record2_->SetRewriterLoggingStatus(filter_id,
RewriterApplication::APPLIED_OK);
}
private:
AbstractLogRecord* log_record1_;
AbstractLogRecord* log_record2_;
DISALLOW_COPY_AND_ASSIGN(LogHelper);
};
namespace {
// Reads requisite info from Property Page. After reading, property page in
// driver is set to NULL, so that no one writes to property cache while
// rewriting cached html.
// TODO(mmohabey): Move the logic of copying properties in rewrite_driver when
// it is cloned.
void InitDriverWithPropertyCacheValues(
RewriteDriver* cache_html_driver, FallbackPropertyPage* page) {
// TODO(pulkitg): Change the function GetHtmlCriticalImages to take
// AbstractPropertyPage as a parameter so that
// set_unowned_fallback_property_page function call can be removed. Also make
// the function take AbstractPropertyPage instead of FallbackPropertyPage.
cache_html_driver->set_unowned_fallback_property_page(page);
// TODO(mmohabey): Critical line info should be populated here.
ServerContext* server_context = cache_html_driver->server_context();
// Because we are resetting the property page at the end of this function, we
// need to make sure the CriticalImageFinder state is updated here. We don't
// have a public interface for updating the state in the driver, so perform a
// throwaway critical image query here, which will in turn cause the state
// that CriticalImageFinder keeps in RewriteDriver to be updated.
// TODO(jud): Remove this when the CriticalImageFinder is held in the
// RewriteDriver, instead of ServerContext.
server_context->critical_images_finder()->
GetHtmlCriticalImages(cache_html_driver);
CriticalSelectorFinder* selector_finder =
server_context->critical_selector_finder();
if (selector_finder != NULL) {
selector_finder->GetCriticalSelectors(cache_html_driver);
}
CriticalCssFinder* css_finder = server_context->critical_css_finder();
if (css_finder != NULL) {
css_finder->UpdateCriticalCssInfoInDriver(cache_html_driver);
}
CacheHtmlInfoFinder* cache_html_finder =
cache_html_driver->server_context()->cache_html_info_finder();
if (cache_html_finder != NULL) {
cache_html_finder->UpdateSplitInfoInDriver(cache_html_driver);
}
cache_html_driver->set_unowned_fallback_property_page(NULL);
}
class CacheHtmlComputationFetch : public AsyncFetch {
public:
CacheHtmlComputationFetch(const GoogleString& url,
RewriteDriver* rewrite_driver,
CacheHtmlInfo* cache_html_info,
AbstractLogRecord* cache_html_log_record,
CacheHtmlFlow::LogHelper* cache_html_log_helper)
: AsyncFetch(rewrite_driver->request_context()),
url_(url),
server_context_(rewrite_driver->server_context()),
options_(rewrite_driver->options()),
rewrite_driver_(rewrite_driver),
cache_html_log_record_(cache_html_log_record),
cache_html_log_helper_(cache_html_log_helper),
cache_html_info_(cache_html_info),
claims_html_(false),
probable_html_(false),
content_length_over_threshold_(false),
non_ok_status_code_(false),
cache_html_change_mutex_(
server_context_->thread_system()->NewMutex()),
finish_(false) {
// Makes rewrite_driver live longer as ProxyFetch may called Cleanup()
// on the rewrite_driver even if ComputeCacheHtmlInfo() has not yet
// been triggered.
rewrite_driver_->IncrementAsyncEventsCount();
Statistics* stats = server_context_->statistics();
num_cache_html_misses_ = stats->GetTimedVariable(
CacheHtmlFlow::kNumCacheHtmlMisses);
num_cache_html_matches_ = stats->GetTimedVariable(
CacheHtmlFlow::kNumCacheHtmlMatches);
num_cache_html_mismatches_ = stats->GetTimedVariable(
CacheHtmlFlow::kNumCacheHtmlMismatches);
num_cache_html_mismatches_cache_deletes_ = stats->GetTimedVariable(
CacheHtmlFlow::kNumCacheHtmlMismatchesCacheDeletes);
num_cache_html_smart_diff_matches_ = stats->GetTimedVariable(
CacheHtmlFlow::kNumCacheHtmlSmartdiffMatches);
num_cache_html_smart_diff_mismatches_ = stats->GetTimedVariable(
CacheHtmlFlow::kNumCacheHtmlSmartdiffMismatches);
}
virtual ~CacheHtmlComputationFetch() {
cache_html_log_record_->SetCacheHtmlLoggingInfo("");
if (!cache_html_log_record_->WriteLog()) {
LOG(WARNING) << "Cache html flow GWS Logging failed for " << url_;
}
rewrite_driver_->DecrementAsyncEventsCount();
ThreadSynchronizer* sync = server_context_->thread_synchronizer();
sync->Signal(CacheHtmlFlow::kBackgroundComputationDone);
}
virtual void HandleHeadersComplete() {
if (response_headers()->status_code() == HttpStatus::kOK) {
claims_html_ = response_headers()->IsHtmlLike();
int64 content_length;
bool content_length_found = response_headers()->FindContentLength(
&content_length);
if (content_length_found && content_length >
options_->blink_max_html_size_rewritable()) {
content_length_over_threshold_ = true;
}
} else {
non_ok_status_code_ = true;
VLOG(1) << "Non 200 response code for: " << url_;
}
}
virtual bool HandleWrite(const StringPiece& content,
MessageHandler* handler) {
if (!claims_html_ || content_length_over_threshold_) {
return true;
}
if (!html_detector_.already_decided() &&
html_detector_.ConsiderInput(content)) {
if (html_detector_.probable_html()) {
probable_html_ = true;
html_detector_.ReleaseBuffered(&buffer_);
}
}
// TODO(poojatandon): share this logic of finding the length and setting a
// limit with http_cache code.
if (probable_html_) {
if (unsigned(buffer_.size() + content.size()) >
options_->blink_max_html_size_rewritable()) {
content_length_over_threshold_ = true;
buffer_.clear();
} else {
content.AppendToString(&buffer_);
}
}
return true;
}
virtual bool HandleFlush(MessageHandler* handler) {
return true;
// No operation.
}
virtual void HandleDone(bool success) {
if (non_ok_status_code_ || !success || !claims_html_ || !probable_html_ ||
content_length_over_threshold_) {
if (cache_html_info_->has_cached_html()) {
// This means it is a cache hit case. Currently it also means diff is
// enabled (possibly in logging mode), since CacheHtmlComputationFetch
// is attached in cache hit case only when diff is enabled.
// Calling Finish since the deletion of this object needs to be
// synchronized with HandleDone call in AsyncFetchWithHeadersInhibited,
// since that class refers to this object.
Finish();
} else {
if (content_length_over_threshold_) {
cache_html_log_helper_->SetCacheHtmlRequestFlow(
CacheHtmlLoggingInfo::FOUND_CONTENT_LENGTH_OVER_THRESHOLD);
} else if (non_ok_status_code_ || !success) {
cache_html_log_helper_->SetCacheHtmlRequestFlow(
CacheHtmlLoggingInfo::CACHE_HTML_MISS_FETCH_NON_OK);
} else if (!claims_html_ || !probable_html_) {
cache_html_log_helper_->SetCacheHtmlRequestFlow(
CacheHtmlLoggingInfo::CACHE_HTML_MISS_FOUND_RESOURCE);
}
delete this;
}
return;
}
if (!cache_html_info_->has_cached_html()) {
cache_html_log_helper_->SetCacheHtmlRequestFlow(
CacheHtmlLoggingInfo::CACHE_HTML_MISS_TRIGGERED_REWRITE);
}
if ((rewrite_driver_->options()->enable_blink_html_change_detection() ||
rewrite_driver_->options()->
enable_blink_html_change_detection_logging()) &&
server_context_->cache_html_info_finder() != NULL) {
// We do diff mismatch detection in cache miss case also so that we can
// update the content hash and smart text hash in CacheHtmlInfo in pcache.
CreateHtmlChangeDetectionDriverAndRewrite();
} else {
CreateCacheHtmlComputationDriverAndRewrite();
}
}
void CreateHtmlChangeDetectionDriverAndRewrite() {
RewriteOptions* options = rewrite_driver_->options()->Clone();
options->ClearFilters();
options->ForceEnableFilter(RewriteOptions::kRemoveComments);
options->ForceEnableFilter(RewriteOptions::kStripNonCacheable);
options->ForceEnableFilter(RewriteOptions::kComputeVisibleText);
server_context_->ComputeSignature(options);
html_change_detection_driver_ =
server_context_->NewCustomRewriteDriver(options, request_context());
value_.Clear();
html_change_detection_driver_->SetWriter(&value_);
html_change_detection_driver_->set_response_headers_ptr(response_headers());
complete_finish_parse_html_change_driver_fn_ = MakeFunction(
this,
&CacheHtmlComputationFetch::CompleteFinishParseForHtmlChangeDriver);
html_change_detection_driver_->AddLowPriorityRewriteTask(
MakeFunction(
this, &CacheHtmlComputationFetch::Parse,
&CacheHtmlComputationFetch::CancelParseForHtmlChangeDriver,
html_change_detection_driver_,
complete_finish_parse_html_change_driver_fn_));
}
void CreateCacheHtmlComputationDriverAndRewrite() {
RewriteOptions* options = rewrite_driver_->options()->Clone();
options->ClearFilters();
options->ForceEnableFilter(RewriteOptions::kStripNonCacheable);
cache_html_computation_driver_ =
server_context_->NewCustomRewriteDriver(options, request_context());
value_.Clear();
cache_html_computation_driver_->SetWriter(&value_);
cache_html_computation_driver_->set_response_headers_ptr(
response_headers());
complete_finish_parse_cache_html_driver_fn_ = MakeFunction(
this, &CacheHtmlComputationFetch::
CompleteFinishParseForCacheHtmlComputationDriver);
cache_html_computation_driver_->AddLowPriorityRewriteTask(
MakeFunction(
this, &CacheHtmlComputationFetch::Parse,
&CacheHtmlComputationFetch::
CancelParseForCacheHtmlComputationDriver,
cache_html_computation_driver_,
complete_finish_parse_cache_html_driver_fn_));
}
void Parse(RewriteDriver* driver, Function* task) {
driver->StartParse(url_);
driver->ParseText(buffer_);
driver->FinishParseAsync(task);
}
void CancelParseForCacheHtmlComputationDriver(RewriteDriver* driver,
Function* task) {
LOG(WARNING) << "Cache Html computation dropped due to load"
<< " for url: " << url_;
complete_finish_parse_cache_html_driver_fn_->CallCancel();
cache_html_computation_driver_->Cleanup();
delete this;
}
void CancelParseForHtmlChangeDriver(RewriteDriver* driver, Function* task) {
LOG(WARNING) << "Html change diff dropped due to load"
<< " for url: " << url_;
complete_finish_parse_html_change_driver_fn_->CallCancel();
html_change_detection_driver_->Cleanup();
Finish();
}
void CompleteFinishParseForCacheHtmlComputationDriver() {
StringPiece rewritten_content;
value_.ExtractContents(&rewritten_content);
cache_html_info_->set_cached_html(rewritten_content.data(),
rewritten_content.size());
cache_html_info_->set_last_cached_html_computation_timestamp_ms(
server_context_->timer()->NowMs());
if (!cache_html_info_->cached_html().empty() &&
!content_length_over_threshold_) {
UpdatePropertyCacheWithCacheHtmlInfo();
}
delete this;
}
void CompleteFinishParseForHtmlChangeDriver() {
StringPiece output;
value_.ExtractContents(&output);
StringPieceVector result;
net_instaweb::SplitStringUsingSubstr(
output, BlinkUtil::kComputeVisibleTextFilterOutputEndMarker,
&result);
if (result.size() == 2) {
computed_hash_smart_diff_ = server_context_->hasher()->Hash(result[0]);
computed_hash_ = server_context_->hasher()->Hash(result[1]);
}
if (!cache_html_info_->has_cached_html()) {
CreateCacheHtmlComputationDriverAndRewrite();
return;
}
{
ScopedMutex lock(cache_html_log_record_->mutex());
CacheHtmlLoggingInfo* cache_html_logging_info =
cache_html_log_record_->
logging_info()->mutable_cache_html_logging_info();
if (computed_hash_ != cache_html_info_->hash()) {
num_cache_html_mismatches_->IncBy(1);
cache_html_logging_info->set_html_match(false);
} else {
num_cache_html_matches_->IncBy(1);
cache_html_logging_info->set_html_match(true);
}
if (computed_hash_smart_diff_ !=
cache_html_info_->hash_smart_diff()) {
num_cache_html_smart_diff_mismatches_->IncBy(1);
cache_html_logging_info->set_html_smart_diff_match(false);
} else {
num_cache_html_smart_diff_matches_->IncBy(1);
cache_html_logging_info->set_html_smart_diff_match(true);
}
}
Finish();
}
// This function should only be called if change detection is enabled and
// this is a cache hit case. In such cases, the content may need to be deleted
// from the property cache if a change was detected. This deletion should wait
// for AsyncFetchWithHeadersInhibited to complete (HandleDone called) to
// ensure that we do not delete entry from cache while it is still being used
// to process the request.
//
// This method achieves this goal using a mutex protected
// variable finish_. Both CacheHtmlComputationFetch and
// AsyncFetchWithHeadersInhibited call this method once their processing is
// done. The first call sets the value of finish_ to true and returns.
// The second call to this method actually calls ProcessDiffResult.
void Finish() {
{
ScopedMutex lock(cache_html_change_mutex_.get());
if (!finish_) {
finish_ = true;
return;
}
}
ProcessDiffResult();
}
// This method processes the result of html change detection. If a mismatch
// is found, we delete the entry from the cache and trigger a cache html info
// computation.
void ProcessDiffResult() {
if (computed_hash_.empty()) {
delete this;
return;
}
bool compute_cache_html_info = false;
if (options_->use_smart_diff_in_blink()) {
compute_cache_html_info =
(computed_hash_smart_diff_ !=
cache_html_info_->hash_smart_diff());
} else {
compute_cache_html_info =
(computed_hash_ !=
cache_html_info_->hash());
}
int64 now_ms = server_context_->timer()->NowMs();
PropertyPage* page = rewrite_driver_->property_page();
const PropertyCache::Cohort* cohort = server_context_->blink_cohort();
bool diff_info_updated =
server_context_->cache_html_info_finder()->UpdateDiffInfo(
compute_cache_html_info, now_ms, cache_html_log_record_.get(),
rewrite_driver_, server_context_->factory());
if (options_->enable_blink_html_change_detection() &&
compute_cache_html_info) {
num_cache_html_mismatches_cache_deletes_->IncBy(1);
// TODO(mmohabey): Do not call delete here as we will be subsequently
// updating the new value in property cache using
// CreateCacheHtmlComputationDriverAndRewrite.
server_context_->cache_html_info_finder()->PropagateCacheDeletes(
url_,
rewrite_driver_->options()->experiment_id(),
rewrite_driver_->device_type());
page->DeleteProperty(
cohort, BlinkUtil::kCacheHtmlRewriterInfo);
page->WriteCohort(cohort);
CreateCacheHtmlComputationDriverAndRewrite();
} else if (options_->enable_blink_html_change_detection() ||
computed_hash_ != cache_html_info_->hash() ||
computed_hash_smart_diff_ !=
cache_html_info_->hash_smart_diff()) {
UpdatePropertyCacheWithCacheHtmlInfo();
delete this;
} else {
if (diff_info_updated) {
page->WriteCohort(cohort);
}
delete this;
}
}
void UpdatePropertyCacheWithCacheHtmlInfo() {
cache_html_info_->set_charset(response_headers()->DetermineCharset());
cache_html_info_->set_hash(computed_hash_);
cache_html_info_->set_hash_smart_diff(computed_hash_smart_diff_);
UpdateInPropertyCache(*cache_html_info_, rewrite_driver_,
server_context_->blink_cohort(),
BlinkUtil::kCacheHtmlRewriterInfo,
true /* write_cohort*/);
}
private:
GoogleString url_;
ServerContext* server_context_;
const RewriteOptions* options_;
GoogleString buffer_;
HTTPValue value_;
HtmlDetector html_detector_;
GoogleString computed_hash_;
GoogleString computed_hash_smart_diff_;
HttpResponseHeaders http_response_headers_;
// RewriteDriver passed to ProxyFetch to serve user-facing request.
RewriteDriver* rewrite_driver_;
// RewriteDriver used to parse the buffered html content.
RewriteDriver* cache_html_computation_driver_;
RewriteDriver* html_change_detection_driver_;
scoped_ptr<AbstractLogRecord> cache_html_log_record_;
scoped_ptr<CacheHtmlFlow::LogHelper> cache_html_log_helper_;
scoped_ptr<CacheHtmlInfo> cache_html_info_;
Function* complete_finish_parse_cache_html_driver_fn_;
Function* complete_finish_parse_html_change_driver_fn_;
bool claims_html_;
bool probable_html_;
bool content_length_over_threshold_;
bool non_ok_status_code_;
// Variables to manage change detection processing.
// Mutex
scoped_ptr<AbstractMutex> cache_html_change_mutex_;
bool finish_; // protected by cache_html_change_mutex_
TimedVariable* num_cache_html_misses_;
TimedVariable* num_cache_html_matches_;
TimedVariable* num_cache_html_mismatches_;
TimedVariable* num_cache_html_mismatches_cache_deletes_;
TimedVariable* num_cache_html_smart_diff_matches_;
TimedVariable* num_cache_html_smart_diff_mismatches_;
DISALLOW_COPY_AND_ASSIGN(CacheHtmlComputationFetch);
};
// AsyncFetch that doesn't call HeadersComplete() on the base fetch. Note that
// this class only links the request headers from the base fetch and does not
// link the response headers.
// This is used as a wrapper around the base fetch when CacheHtmlInfo is
// found in cache. This is done because the response headers and the
// cached html have been already been flushed out in the base fetch
// and we don't want to call HeadersComplete() twice on the base fetch.
// This class deletes itself when HandleDone() is called.
class AsyncFetchWithHeadersInhibited : public AsyncFetchUsingWriter {
public:
AsyncFetchWithHeadersInhibited(
AsyncFetch* fetch,
CacheHtmlComputationFetch* cache_html_computation_fetch)
: AsyncFetchUsingWriter(fetch->request_context(), fetch),
base_fetch_(fetch),
cache_html_computation_fetch_(cache_html_computation_fetch) {
set_request_headers(fetch->request_headers());
}
private:
virtual ~AsyncFetchWithHeadersInhibited() {
}
virtual void HandleHeadersComplete() {}
virtual void HandleDone(bool success) {
base_fetch_->Done(success);
if (cache_html_computation_fetch_ != NULL) {
cache_html_computation_fetch_->Finish();
}
delete this;
}
AsyncFetch* base_fetch_;
CacheHtmlComputationFetch* cache_html_computation_fetch_;
DISALLOW_COPY_AND_ASSIGN(AsyncFetchWithHeadersInhibited);
};
} // namespace
void CacheHtmlFlow::Start(
const GoogleString& url,
AsyncFetch* base_fetch,
RewriteDriver* driver,
ProxyFetchFactory* factory,
ProxyFetchPropertyCallbackCollector* property_cache_callback) {
CacheHtmlFlow* flow = new CacheHtmlFlow(
url, base_fetch, driver, factory, property_cache_callback);
Function* func = MakeFunction(flow, &CacheHtmlFlow::CacheHtmlLookupDone,
&CacheHtmlFlow::Cancel);
property_cache_callback->AddPostLookupTask(func);
// Not doing any config lookup until pcache completes.
property_cache_callback->RequestHeadersComplete();
}
void CacheHtmlFlow::InitStats(Statistics* stats) {
stats->AddTimedVariable(kNumCacheHtmlHits,
ServerContext::kStatisticsGroup);
stats->AddTimedVariable(kNumCacheHtmlMisses,
ServerContext::kStatisticsGroup);
stats->AddTimedVariable(kNumCacheHtmlMatches,
ServerContext::kStatisticsGroup);
stats->AddTimedVariable(kNumCacheHtmlMismatches,
ServerContext::kStatisticsGroup);
stats->AddTimedVariable(kNumCacheHtmlMismatchesCacheDeletes,
ServerContext::kStatisticsGroup);
stats->AddTimedVariable(kNumCacheHtmlSmartdiffMatches,
ServerContext::kStatisticsGroup);
stats->AddTimedVariable(kNumCacheHtmlSmartdiffMismatches,
ServerContext::kStatisticsGroup);
}
CacheHtmlFlow::CacheHtmlFlow(
const GoogleString& url,
AsyncFetch* base_fetch,
RewriteDriver* driver,
ProxyFetchFactory* factory,
ProxyFetchPropertyCallbackCollector* property_cache_callback)
: url_(url),
google_url_(url),
base_fetch_(base_fetch),
cache_html_log_record_(
base_fetch_->request_context()->NewSubordinateLogRecord(
new NullMutex)),
rewrite_driver_(driver),
options_(driver->options()),
factory_(factory),
server_context_(driver->server_context()),
property_cache_callback_(property_cache_callback),
handler_(rewrite_driver_->server_context()->message_handler()),
cache_html_log_helper_(new LogHelper(
cache_html_log_record_.get(),
base_fetch_->request_context()->log_record())) {
Statistics* stats = server_context_->statistics();
num_cache_html_misses_ = stats->GetTimedVariable(
kNumCacheHtmlMisses);
num_cache_html_hits_ = stats->GetTimedVariable(
kNumCacheHtmlHits);
const char* request_event_id = base_fetch_->request_headers()->Lookup1(
HttpAttributes::kXGoogleRequestEventId);
{
ScopedMutex lock(cache_html_log_record_->mutex());
CacheHtmlLoggingInfo* cache_html_logging_info =
cache_html_log_record_->
logging_info()->mutable_cache_html_logging_info();
cache_html_logging_info->set_url(url_);
if (request_event_id != NULL) {
cache_html_logging_info->set_request_event_id_time_usec(request_event_id);
}
}
}
CacheHtmlFlow::~CacheHtmlFlow() {
}
void CacheHtmlFlow::CacheHtmlLookupDone() {
FallbackPropertyPage* fallback_page =
property_cache_callback_->fallback_property_page();
PopulateCacheHtmlInfo(fallback_page->actual_property_page());
// TODO(mmohabey): Add CSI timings.
if (cache_html_info_.has_cached_html()) {
CacheHtmlHit(fallback_page);
} else {
CacheHtmlMiss();
}
}
void CacheHtmlFlow::CacheHtmlMiss() {
num_cache_html_misses_->IncBy(1);
TriggerProxyFetch();
}
void CacheHtmlFlow::CacheHtmlHit(FallbackPropertyPage* page) {
num_cache_html_hits_->IncBy(1);
StringPiece cached_html = cache_html_info_.cached_html();
// TODO(mmohabey): Handle malformed html case.
cache_html_log_helper_->SetCacheHtmlRequestFlow(
CacheHtmlLoggingInfo::CACHE_HTML_HIT);
cache_html_log_helper_->LogAppliedRewriter(
RewriteOptions::FilterId(RewriteOptions::kCachePartialHtml));
ResponseHeaders* response_headers = base_fetch_->response_headers();
response_headers->SetStatusAndReason(HttpStatus::kOK);
// TODO(pulkitg): Store content type in pcache.
// TODO(mmohabey): Handle Meta tags.
GoogleString content_type = StrCat(
"text/html", cache_html_info_.has_charset() ?
StrCat("; charset=", cache_html_info_.charset()) : "");
response_headers->Add(HttpAttributes::kContentType, content_type);
{
ScopedMutex lock(cache_html_log_record_->mutex());
response_headers->Add(
kPsaRewriterHeader, cache_html_log_record_->AppliedRewritersString());
}
response_headers->ComputeCaching();
response_headers->SetDateAndCaching(server_context_->timer()->NowMs(), 0,
", private, no-cache");
// If relevant, add the Set-Cookie header for experiments.
if (options_->need_to_store_experiment_data() &&
options_->running_experiment()) {
int experiment_value = options_->experiment_id();
server_context_->experiment_matcher()->StoreExperimentData(
experiment_value, url_,
server_context_->timer()->NowMs() +
options_->experiment_cookie_duration_ms(),
response_headers);
}
base_fetch_->HeadersComplete();
// Clone the RewriteDriver which is used to rewrite the HTML that we are
// trying to flush early.
RewriteDriver* new_driver = rewrite_driver_->Clone();
new_driver->set_response_headers_ptr(base_fetch_->response_headers());
new_driver->set_flushing_cached_html(true);
new_driver->SetWriter(base_fetch_);
new_driver->StartParse(url_);
InitDriverWithPropertyCacheValues(new_driver, page);
bool flushed_split_js =
new_driver->options()->Enabled(RewriteOptions::kSplitHtml) &&
new_driver->request_properties()->SupportsSplitHtml(
new_driver->options()->enable_aggressive_rewriters_for_mobile());
new_driver->ParseText(cached_html);
new_driver->FinishParseAsync(
MakeFunction(this, &CacheHtmlFlow::CacheHtmlRewriteDone,
flushed_split_js));
}
void CacheHtmlFlow::CacheHtmlRewriteDone(bool flushed_split_js) {
rewrite_driver_->set_flushed_cached_html(true);
StaticAssetManager* static_asset_manager =
server_context_->static_asset_manager();
if (!flushed_split_js) {
base_fetch_->Write(StringPrintf(kBlinkJsString,
static_asset_manager->GetAssetUrl(
StaticAssetEnum::BLINK_JS, options_).c_str()), handler_);
base_fetch_->Write(kCacheHtmlSuffixJsString, handler_);
}
const char* user_ip = base_fetch_->request_headers()->Lookup1(
HttpAttributes::kXForwardedFor);
if (user_ip != NULL && server_context_->factory()->IsDebugClient(user_ip) &&
options_->enable_blink_debug_dashboard()) {
base_fetch_->Write(kCacheHtmlSetInternalIp, handler_);
}
base_fetch_->Flush(handler_);
TriggerProxyFetch();
}
void CacheHtmlFlow::TriggerProxyFetch() {
bool flushed_cached_html = rewrite_driver_->flushed_cached_html();
AsyncFetch* fetch = NULL;
CacheHtmlComputationFetch* cache_html_computation_fetch = NULL;
// Remove any headers that can lead to a 304, since CacheHtmlFlow can't
// handle 304s.
base_fetch_->request_headers()->RemoveAll(HttpAttributes::kIfNoneMatch);
base_fetch_->request_headers()->RemoveAll(HttpAttributes::kIfModifiedSince);
if (!flushed_cached_html || options_->enable_blink_html_change_detection() ||
options_->enable_blink_html_change_detection_logging()) {
CacheHtmlInfo* cache_html_info = new CacheHtmlInfo();
cache_html_info->CopyFrom(cache_html_info_);
cache_html_computation_fetch = new CacheHtmlComputationFetch(
url_, rewrite_driver_, cache_html_info,
cache_html_log_record_.release(), cache_html_log_helper_.release());
// TODO(mmohabey) : Set a fixed user agent for fetching content from the
// origin server if options->use_fixed_user_agent_for_blink_cache_misses()
// is enabled.
}
if (flushed_cached_html) {
// TODO(mmohabey): Disable LazyloadImages filter for the driver sending non
// cacheables.
fetch = new AsyncFetchWithHeadersInhibited(base_fetch_,
cache_html_computation_fetch);
} else {
// PassThrough case.
// This flow has side effect that DeferJs is applied in passthrough case
// even when it is not explicitly enabled since it is added in
// RewriteDriver::AddPostRenderFilters() if
// RewriteOptions::kCachePartialHtml is enabled.
fetch = base_fetch_;
}
if (cache_html_computation_fetch == NULL) {
cache_html_log_record_->SetCacheHtmlLoggingInfo(
fetch->request_headers()->Lookup1(HttpAttributes::kUserAgent));
if (!cache_html_log_record_->WriteLog()) {
LOG(ERROR) << "Cache html flow GWS Logging failed for " << url_;
}
} // else, logging will be done by cache_html_computation_fetch.
factory_->StartNewProxyFetch(
url_, fetch, rewrite_driver_, property_cache_callback_,
cache_html_computation_fetch);
delete this;
}
// TODO(mmohabey): Disable conflicting filters for cache html flow.
void CacheHtmlFlow::Cancel() {
delete this;
}
void CacheHtmlFlow::PopulateCacheHtmlInfo(PropertyPage* page) {
const PropertyCache::Cohort* cohort = server_context_->page_property_cache()->
GetCohort(BlinkUtil::kBlinkCohort);
if (page == NULL || cohort == NULL) {
return;
}
PropertyValue* property_value = page->GetProperty(
cohort, BlinkUtil::kCacheHtmlRewriterInfo);
if (!property_value->has_value()) {
return;
}
ArrayInputStream value(property_value->value().data(),
property_value->value().size());
if (!cache_html_info_.ParseFromZeroCopyStream(&value)) {
LOG(DFATAL) << "Parsing value from cache into CacheHtmlInfo failed.";
cache_html_info_.Clear();
}
return;
}
} // namespace net_instaweb