| /* |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: jmarantz@google.com (Joshua Marantz) |
| |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| |
| #include <algorithm> |
| #include <cstdarg> |
| #include <cstdio> |
| #include <list> |
| #include <map> |
| #include <set> |
| #include <utility> // for std::pair |
| #include <vector> |
| |
| #include "base/logging.h" |
| #include "net/instaweb/config/rewrite_options_manager.h" |
| #include "net/instaweb/http/public/async_fetch.h" |
| #include "net/instaweb/http/public/cache_url_async_fetcher.h" |
| #include "net/instaweb/http/public/http_cache.h" |
| #include "net/instaweb/http/public/http_value.h" |
| #include "net/instaweb/http/public/log_record.h" |
| #include "net/instaweb/http/public/logging_proto_impl.h" |
| #include "net/instaweb/http/public/request_context.h" |
| #include "net/instaweb/http/public/url_async_fetcher.h" |
| #include "net/instaweb/rewriter/cached_result.pb.h" |
| #include "net/instaweb/rewriter/critical_css.pb.h" |
| #include "net/instaweb/rewriter/critical_keys.pb.h" |
| #include "net/instaweb/rewriter/critical_line_info.pb.h" |
| #include "net/instaweb/rewriter/flush_early.pb.h" |
| #include "net/instaweb/rewriter/public/add_head_filter.h" |
| #include "net/instaweb/rewriter/public/add_ids_filter.h" |
| #include "net/instaweb/rewriter/public/add_instrumentation_filter.h" |
| #include "net/instaweb/rewriter/public/base_tag_filter.h" |
| #include "net/instaweb/rewriter/public/cache_extender.h" |
| #include "net/instaweb/rewriter/public/cache_html_filter.h" |
| #include "net/instaweb/rewriter/public/collect_flush_early_content_filter.h" |
| #include "net/instaweb/rewriter/public/compute_visible_text_filter.h" |
| #include "net/instaweb/rewriter/public/critical_css_beacon_filter.h" |
| #include "net/instaweb/rewriter/public/critical_css_filter.h" |
| #include "net/instaweb/rewriter/public/critical_images_beacon_filter.h" |
| #include "net/instaweb/rewriter/public/critical_selector_filter.h" |
| #include "net/instaweb/rewriter/public/critical_selector_finder.h" |
| #include "net/instaweb/rewriter/public/css_combine_filter.h" |
| #include "net/instaweb/rewriter/public/css_filter.h" |
| #include "net/instaweb/rewriter/public/css_inline_filter.h" |
| #include "net/instaweb/rewriter/public/css_inline_import_to_link_filter.h" |
| #include "net/instaweb/rewriter/public/css_move_to_head_filter.h" |
| #include "net/instaweb/rewriter/public/css_outline_filter.h" |
| #include "net/instaweb/rewriter/public/css_summarizer_base.h" |
| #include "net/instaweb/rewriter/public/css_tag_scanner.h" |
| #include "net/instaweb/rewriter/public/data_url_input_resource.h" |
| #include "net/instaweb/rewriter/public/debug_filter.h" |
| #include "net/instaweb/rewriter/public/decode_rewritten_urls_filter.h" |
| #include "net/instaweb/rewriter/public/dedup_inlined_images_filter.h" |
| #include "net/instaweb/rewriter/public/defer_iframe_filter.h" |
| #include "net/instaweb/rewriter/public/delay_images_filter.h" |
| #include "net/instaweb/rewriter/public/deterministic_js_filter.h" |
| #include "net/instaweb/rewriter/public/dom_stats_filter.h" |
| #include "net/instaweb/rewriter/public/domain_lawyer.h" |
| #include "net/instaweb/rewriter/public/domain_rewrite_filter.h" |
| #include "net/instaweb/rewriter/public/downstream_cache_purger.h" |
| #include "net/instaweb/rewriter/public/file_input_resource.h" |
| #include "net/instaweb/rewriter/public/file_load_policy.h" |
| #include "net/instaweb/rewriter/public/fix_reflow_filter.h" |
| #include "net/instaweb/rewriter/public/flush_early_content_writer_filter.h" |
| #include "net/instaweb/rewriter/public/flush_html_filter.h" |
| #include "net/instaweb/rewriter/public/google_analytics_filter.h" |
| #include "net/instaweb/rewriter/public/google_font_css_inline_filter.h" |
| #include "net/instaweb/rewriter/public/handle_noscript_redirect_filter.h" |
| #include "net/instaweb/rewriter/public/iframe_fetcher.h" |
| #include "net/instaweb/rewriter/public/image_combine_filter.h" |
| #include "net/instaweb/rewriter/public/image_rewrite_filter.h" |
| #include "net/instaweb/rewriter/public/in_place_rewrite_context.h" |
| #include "net/instaweb/rewriter/public/insert_dns_prefetch_filter.h" |
| #include "net/instaweb/rewriter/public/insert_ga_filter.h" |
| #include "net/instaweb/rewriter/public/javascript_filter.h" |
| #include "net/instaweb/rewriter/public/js_combine_filter.h" |
| #include "net/instaweb/rewriter/public/js_defer_disabled_filter.h" |
| #include "net/instaweb/rewriter/public/js_disable_filter.h" |
| #include "net/instaweb/rewriter/public/js_inline_filter.h" |
| #include "net/instaweb/rewriter/public/js_outline_filter.h" |
| #include "net/instaweb/rewriter/public/lazyload_images_filter.h" |
| #include "net/instaweb/rewriter/public/local_storage_cache_filter.h" |
| #include "net/instaweb/rewriter/public/make_show_ads_async_filter.h" |
| #include "net/instaweb/rewriter/public/meta_tag_filter.h" |
| #include "net/instaweb/rewriter/public/mobilize_label_filter.h" |
| #include "net/instaweb/rewriter/public/mobilize_menu_filter.h" |
| #include "net/instaweb/rewriter/public/mobilize_menu_render_filter.h" |
| #include "net/instaweb/rewriter/public/mobilize_rewrite_filter.h" |
| #include "net/instaweb/rewriter/public/output_resource.h" |
| #include "net/instaweb/rewriter/public/output_resource_kind.h" |
| #include "net/instaweb/rewriter/public/pedantic_filter.h" |
| #include "net/instaweb/rewriter/public/property_cache_util.h" |
| #include "net/instaweb/rewriter/public/redirect_on_size_limit_filter.h" |
| #include "net/instaweb/rewriter/public/request_properties.h" |
| #include "net/instaweb/rewriter/public/resource.h" |
| #include "net/instaweb/rewriter/public/resource_namer.h" |
| #include "net/instaweb/rewriter/public/resource_slot.h" |
| #include "net/instaweb/rewriter/public/responsive_image_filter.h" |
| #include "net/instaweb/rewriter/public/rewrite_context.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver_factory.h" |
| #include "net/instaweb/rewriter/public/rewrite_filter.h" |
| #include "net/instaweb/rewriter/public/rewrite_options.h" |
| #include "net/instaweb/rewriter/public/rewrite_query.h" |
| #include "net/instaweb/rewriter/public/rewrite_stats.h" |
| #include "net/instaweb/rewriter/public/rewritten_content_scanning_filter.h" |
| #include "net/instaweb/rewriter/public/scan_filter.h" |
| #include "net/instaweb/rewriter/public/server_context.h" |
| #include "net/instaweb/rewriter/public/split_html_beacon_filter.h" |
| #include "net/instaweb/rewriter/public/split_html_config.h" |
| #include "net/instaweb/rewriter/public/split_html_filter.h" |
| #include "net/instaweb/rewriter/public/split_html_helper_filter.h" |
| #include "net/instaweb/rewriter/public/strip_non_cacheable_filter.h" |
| #include "net/instaweb/rewriter/public/strip_scripts_filter.h" |
| #include "net/instaweb/rewriter/public/strip_subresource_hints_filter.h" |
| #include "net/instaweb/rewriter/public/support_noscript_filter.h" |
| #include "net/instaweb/rewriter/public/suppress_prehead_filter.h" |
| #include "net/instaweb/rewriter/public/url_input_resource.h" |
| #include "net/instaweb/rewriter/public/url_left_trim_filter.h" |
| #include "net/instaweb/rewriter/public/url_namer.h" |
| #include "net/instaweb/util/public/fallback_property_page.h" |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/callback.h" |
| #include "pagespeed/kernel/base/file_system.h" |
| #include "pagespeed/kernel/base/function.h" |
| #include "pagespeed/kernel/base/hasher.h" |
| #include "pagespeed/kernel/base/message_handler.h" |
| #include "pagespeed/kernel/base/request_trace.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/sha1_signature.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/stl_util.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/base/timer.h" |
| #include "pagespeed/kernel/base/writer.h" |
| #include "pagespeed/kernel/cache/cache_interface.h" |
| #include "pagespeed/kernel/html/collapse_whitespace_filter.h" |
| #include "pagespeed/kernel/html/elide_attributes_filter.h" |
| #include "pagespeed/kernel/html/html_attribute_quote_removal.h" |
| #include "pagespeed/kernel/html/html_element.h" |
| #include "pagespeed/kernel/html/html_filter.h" |
| #include "pagespeed/kernel/html/html_keywords.h" |
| #include "pagespeed/kernel/html/html_node.h" |
| #include "pagespeed/kernel/html/html_parse.h" |
| #include "pagespeed/kernel/html/html_writer_filter.h" |
| #include "pagespeed/kernel/html/remove_comments_filter.h" |
| #include "pagespeed/kernel/http/content_type.h" |
| #include "pagespeed/kernel/http/google_url.h" |
| #include "pagespeed/kernel/http/http_names.h" |
| #include "pagespeed/kernel/http/request_headers.h" |
| #include "pagespeed/kernel/thread/scheduler.h" |
| #include "pagespeed/kernel/util/statistics_logger.h" |
| |
| namespace net_instaweb { |
| |
| class CriticalCssFinder; |
| class RewriteDriverPool; |
| |
| namespace { |
| |
| const int kTestTimeoutMs = 10000; |
| const char kDeadlineExceeded[] = "deadline_exceeded"; |
| |
| // Implementation of RemoveCommentsFilter::OptionsInterface that wraps |
| // a RewriteOptions instance. |
| class RemoveCommentsFilterOptions |
| : public RemoveCommentsFilter::OptionsInterface { |
| public: |
| explicit RemoveCommentsFilterOptions(const RewriteOptions* options) |
| : options_(options) { |
| } |
| |
| virtual bool IsRetainedComment(const StringPiece& comment) const { |
| return options_->IsRetainedComment(comment); |
| } |
| |
| private: |
| const RewriteOptions* options_; |
| |
| DISALLOW_COPY_AND_ASSIGN(RemoveCommentsFilterOptions); |
| }; |
| |
| // Provides hook to CacheUrlAsyncFetcher to protect the lifetime of the |
| // RewriteDriver which owns fetcher, otherwise, fetcher may be deleted |
| // by the time background fetch completes. |
| class RewriteDriverCacheUrlAsyncFetcherAsyncOpHooks |
| : public CacheUrlAsyncFetcher::AsyncOpHooks { |
| public: |
| explicit RewriteDriverCacheUrlAsyncFetcherAsyncOpHooks( |
| RewriteDriver* rewrite_driver) : rewrite_driver_(rewrite_driver) { |
| } |
| |
| virtual ~RewriteDriverCacheUrlAsyncFetcherAsyncOpHooks() { |
| } |
| |
| // TODO(pulkitg): Remove session fetchers, so that fetcher can live as long |
| // server is alive and there is no need of |
| // {Increment/Decrement}AsyncEventsCount(). |
| virtual void StartAsyncOp() { |
| // Increment async_events_counts so that driver will be alive as long as |
| // background fetch happens in CacheUrlAsyncFetcher. |
| rewrite_driver_->IncrementAsyncEventsCount(); |
| } |
| |
| virtual void FinishAsyncOp() { |
| rewrite_driver_->DecrementAsyncEventsCount(); |
| } |
| |
| private: |
| RewriteDriver* rewrite_driver_; |
| |
| DISALLOW_COPY_AND_ASSIGN(RewriteDriverCacheUrlAsyncFetcherAsyncOpHooks); |
| }; |
| |
| } // namespace |
| |
| const char RewriteDriver::kDomCohort[] = "dom"; |
| const char RewriteDriver::kBeaconCohort[] = "beacon_cohort"; |
| const char RewriteDriver::kSubresourcesPropertyName[] = "subresources"; |
| const char RewriteDriver::kStatusCodePropertyName[] = "status_code"; |
| |
| const char RewriteDriver::kLastRequestTimestamp[] = "last_request_timestamp"; |
| const char RewriteDriver::kParseSizeLimitExceeded[] = |
| "parse_size_limit_exceeded"; |
| |
| int RewriteDriver::initialized_count_ = 0; |
| |
| RewriteDriver::RewriteDriver(MessageHandler* message_handler, |
| FileSystem* file_system, |
| UrlAsyncFetcher* url_async_fetcher) |
| : HtmlParse(message_handler), |
| base_was_set_(false), |
| refs_before_base_(false), |
| filters_added_(false), |
| externally_managed_(false), |
| ref_counts_(this), |
| release_driver_(false), |
| waiting_(kNoWait), |
| waiting_deadline_reached_(false), |
| fully_rewrite_on_flush_(false), |
| fast_blocking_rewrite_(true), |
| flush_requested_(false), |
| flush_occurred_(false), |
| flushed_cached_html_(false), |
| flushing_cached_html_(false), |
| flushed_early_(false), |
| flushing_early_(false), |
| is_lazyload_script_flushed_(false), |
| write_property_cache_dom_cohort_(false), |
| should_skip_parsing_(kNotSet), |
| response_headers_(NULL), |
| status_code_(HttpStatus::kUnknownStatusCode), |
| max_page_processing_delay_ms_(-1), |
| num_initiated_rewrites_(0), |
| num_detached_rewrites_(0), |
| possibly_quick_rewrites_(0), |
| file_system_(file_system), |
| server_context_(NULL), |
| scheduler_(NULL), |
| default_url_async_fetcher_(url_async_fetcher), |
| url_async_fetcher_(default_url_async_fetcher_), |
| distributed_async_fetcher_(NULL), |
| dom_stats_filter_(NULL), |
| scan_filter_(this), |
| controlling_pool_(NULL), |
| cache_url_async_fetcher_async_op_hooks_( |
| new RewriteDriverCacheUrlAsyncFetcherAsyncOpHooks(this)), |
| html_worker_(NULL), |
| rewrite_worker_(NULL), |
| low_priority_rewrite_worker_(NULL), |
| writer_(NULL), |
| fallback_property_page_(NULL), |
| owns_property_page_(false), |
| device_type_(UserAgentMatcher::kDesktop), |
| xhtml_mimetype_computed_(false), |
| xhtml_status_(kXhtmlUnknown), |
| num_inline_preview_images_(0), |
| num_flushed_early_pagespeed_resources_(0), |
| num_bytes_in_(0), |
| debug_filter_(NULL), |
| can_rewrite_resources_(true), |
| is_nested_(false), |
| request_context_(NULL), |
| start_time_ms_(0), |
| tried_to_distribute_fetch_(false), |
| defer_instrumentation_script_(false), |
| downstream_cache_purger_(this) |
| // NOTE: Be sure to clear per-request member variables in Clear() |
| { // NOLINT -- I want the initializer-list to end with that comment. |
| // The Scan filter always goes first so it can find base-tags. |
| early_pre_render_filters_.push_back(&scan_filter_); |
| } |
| |
| void RewriteDriver::PopulateRequestContext() { |
| if ((request_context_.get() != NULL && (request_headers_ != NULL))) { |
| request_context_->SetAcceptsWebp( |
| request_properties_->SupportsWebpRewrittenUrls()); |
| request_context_->SetAcceptsGzip(request_properties_->AcceptsGzip()); |
| request_context_->Freeze(); |
| } |
| } |
| |
| void RewriteDriver::SetRequestHeaders(const RequestHeaders& headers) { |
| DCHECK(request_headers_.get() == NULL); |
| RequestHeaders* new_request_headers = new RequestHeaders(); |
| new_request_headers->CopyFrom(headers); |
| new_request_headers->PopulateLazyCaches(); |
| request_headers_.reset(new_request_headers); |
| ClearRequestProperties(); |
| |
| const char* user_agent = request_headers_->Lookup1( |
| HttpAttributes::kUserAgent); |
| if (user_agent != NULL) { |
| user_agent_ = user_agent; |
| request_properties_->SetUserAgent(user_agent_); |
| } |
| |
| request_properties_->ParseRequestHeaders(*request_headers_); |
| PopulateRequestContext(); |
| } |
| |
| void RewriteDriver::set_request_context(const RequestContextPtr& x) { |
| // Ideally, we would have a CHECK(x.get() != NULL) here, since all "real" |
| // RewriteDrivers should have a valid request context. |
| // |
| // However, one use-case currently prevent this -- |
| // ServerContext::InitWorkersAndDecodingDriver() creates a new driver |
| // to decode options. This creation, via NewUnmanagedRewriteDriver(), invokes |
| // this method with the provided request context, which really should be NULL |
| // because it is not associated with a request. |
| // |
| // In lieu of the significant refactor required to move option decoding out |
| // of RewriteDriver or synthesizing a context, we allow NULL here, and opt |
| // to instead CHECK aggressively on code paths that really should have a |
| // request context; i.e., those necessarily associated with page serving |
| // rather than option decoding. |
| request_context_.reset(x); |
| if (request_context_.get() != NULL) { |
| request_context_->log_record()->SetRewriterInfoMaxSize( |
| options()->max_rewrite_info_log_size()); |
| request_context_->log_record()->SetAllowLoggingUrls( |
| options()->allow_logging_urls_in_log_record()); |
| request_context_->log_record()->SetLogUrlIndices( |
| options()->log_url_indices()); |
| PopulateRequestContext(); |
| } |
| } |
| |
| AbstractLogRecord* RewriteDriver::log_record() { |
| CHECK(request_context_.get() != NULL); |
| return request_context_->log_record(); |
| } |
| |
| RewriteDriver::~RewriteDriver() { |
| if (rewrite_worker_ != NULL) { |
| scheduler_->UnregisterWorker(rewrite_worker_); |
| server_context_->rewrite_workers()->FreeSequence(rewrite_worker_); |
| } |
| if (html_worker_ != NULL) { |
| scheduler_->UnregisterWorker(html_worker_); |
| server_context_->html_workers()->FreeSequence(html_worker_); |
| } |
| if (low_priority_rewrite_worker_ != NULL) { |
| scheduler_->UnregisterWorker(low_priority_rewrite_worker_); |
| server_context_->low_priority_rewrite_workers()->FreeSequence( |
| low_priority_rewrite_worker_); |
| } |
| Clear(); |
| STLDeleteElements(&filters_to_delete_); |
| STLDeleteElements(&resource_claimants_); |
| } |
| |
| RewriteDriver* RewriteDriver::Clone() { |
| RewriteDriver* result; |
| RewriteDriverPool* pool = controlling_pool(); |
| if (pool == NULL) { |
| // TODO(jmarantz): when used with SetParent, it should not be |
| // necessary to clone the options here. Once we set the child's |
| // parent to this, the child will reference this->options() and |
| // ignores its self_options_. To exploit that, we'd need to |
| // make a different entry-point for CloneAndSetParent. |
| RewriteOptions* options_copy = options()->Clone(); |
| options_copy->ComputeSignature(); |
| result = |
| server_context_->NewCustomRewriteDriver(options_copy, request_context_); |
| } else { |
| result = server_context_->NewRewriteDriverFromPool(pool, request_context_); |
| } |
| result->is_nested_ = true; |
| |
| // Remove any Via headers for the nested driver. This is intended for |
| // removing "Via:1.1 google", so that nested drivers don't wind up |
| // adding cc:public into intermediate cached results. |
| // |
| // Note that we *do* want to propagate http/2 detection to nested drivers. |
| // This is OK because it gets captured in the RequestContext, which is |
| // shared, and is not reconstructed from request-headers. |
| RequestHeaders headers; |
| headers.CopyFrom(*request_headers_); |
| headers.RemoveAll(HttpAttributes::kVia); |
| result->SetRequestHeaders(headers); |
| |
| return result; |
| } |
| |
| void RewriteDriver::Clear() NO_THREAD_SAFETY_ANALYSIS { |
| HtmlParse::Clear(); |
| |
| // If this was a fetch, fetch_rewrites_ may still hold a reference to a |
| // RewriteContext. |
| STLDeleteElements(&fetch_rewrites_); |
| |
| DCHECK(!flush_requested_); |
| release_driver_ = false; |
| downstream_cache_purger_.Clear(); |
| write_property_cache_dom_cohort_ = false; |
| base_url_.Clear(); |
| DCHECK(!base_url_.IsAnyValid()); |
| decoded_base_url_.Clear(); |
| fetch_url_.clear(); |
| |
| if (!server_context_->shutting_down()) { |
| if (!externally_managed_) { |
| ref_counts_.DCheckAllCountsZero(); |
| } |
| DCHECK(primary_rewrite_context_map_.empty()); |
| DCHECK(initiated_rewrites_.empty()); |
| DCHECK(detached_rewrites_.empty()); |
| DCHECK(rewrites_.empty()); |
| DCHECK_EQ(0, possibly_quick_rewrites_); |
| } |
| xhtml_mimetype_computed_ = false; |
| xhtml_status_ = kXhtmlUnknown; |
| |
| should_skip_parsing_ = kNotSet; |
| max_page_processing_delay_ms_ = -1; |
| request_headers_.reset(NULL); |
| response_headers_ = NULL; |
| status_code_ = 0; |
| flush_requested_ = false; |
| flush_occurred_ = false; |
| flushed_cached_html_ = false; |
| flushing_cached_html_ = false; |
| flushed_early_ = false; |
| flushing_early_ = false; |
| tried_to_distribute_fetch_ = false; |
| defer_instrumentation_script_ = false; |
| is_lazyload_script_flushed_ = false; |
| base_was_set_ = false; |
| refs_before_base_ = false; |
| containing_charset_.clear(); |
| fully_rewrite_on_flush_ = false; |
| fast_blocking_rewrite_ = true; |
| num_inline_preview_images_ = 0; |
| num_flushed_early_pagespeed_resources_ = 0; |
| num_bytes_in_ = 0; |
| flush_early_info_.reset(NULL); |
| flush_early_render_info_.reset(NULL); |
| can_rewrite_resources_ = true; |
| is_nested_ = false; |
| num_initiated_rewrites_ = 0; |
| num_detached_rewrites_ = 0; |
| if (request_context_.get() != NULL) { |
| request_context_->WriteBackgroundRewriteLog(); |
| request_context_.reset(NULL); |
| } |
| start_time_ms_ = 0; |
| |
| critical_css_result_.reset(NULL); |
| critical_images_info_.reset(NULL); |
| critical_line_info_.reset(NULL); |
| beacon_critical_line_info_.reset(NULL); |
| critical_selector_info_.reset(NULL); |
| |
| if (owns_property_page_) { |
| delete fallback_property_page_; |
| } |
| fallback_property_page_ = NULL; |
| origin_property_page_.reset(); |
| owns_property_page_ = false; |
| device_type_ = UserAgentMatcher::kDesktop; |
| pagespeed_query_params_.clear(); |
| pagespeed_option_cookies_.clear(); |
| |
| // Reset to the default fetcher from any session fetcher |
| // (as the request is over). |
| url_async_fetcher_ = default_url_async_fetcher_; |
| STLDeleteElements(&owned_url_async_fetchers_); |
| ClearRequestProperties(); |
| user_agent_.clear(); |
| } |
| |
| // Must be called with rewrite_mutex() held. |
| bool RewriteDriver::RewritesComplete() const { |
| // 3 kinds of rewrites triggered from HTML: |
| bool no_pending_rewrites = |
| (ref_counts_.QueryCountMutexHeld(kRefPendingRewrites) == 0); |
| bool no_deleting_rewrites = |
| (ref_counts_.QueryCountMutexHeld(kRefDeletingRewrites) == 0); |
| bool no_detached_rewrites = detached_rewrites_.empty(); |
| DCHECK_EQ(static_cast<int>(detached_rewrites_.size()), |
| ref_counts_.QueryCountMutexHeld(kRefDetachedRewrites)); |
| |
| // And also user-facing fetches. Note that background fetches are handled |
| // by IsDone separately. |
| bool no_user_facing_fetch = |
| (ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing) == 0); |
| |
| return no_pending_rewrites && no_deleting_rewrites && no_detached_rewrites && |
| no_user_facing_fetch; |
| } |
| |
| void RewriteDriver::WaitForCompletion() { |
| BoundedWaitFor(kWaitForCompletion, -1); |
| } |
| |
| void RewriteDriver::WaitForShutDown() { |
| BoundedWaitFor(kWaitForShutDown, -1); |
| } |
| |
| void RewriteDriver::BoundedWaitFor(WaitMode mode, int64 timeout_ms) { |
| SchedulerBlockingFunction wait(scheduler_); |
| |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| CheckForCompletionAsync(mode, timeout_ms, &wait); |
| } |
| wait.Block(); |
| #ifndef NDEBUG |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| CHECK_EQ(waiting_, kNoWait); |
| } |
| #endif |
| } |
| |
| void RewriteDriver::CheckForCompletionAsync(WaitMode wait_mode, |
| int64 timeout_ms, |
| Function* done) { |
| scheduler_->DCheckLocked(); |
| DCHECK_NE(kNoWait, wait_mode); |
| DCHECK_EQ(kNoWait, waiting_); |
| waiting_ = wait_mode; |
| waiting_deadline_reached_ = false; |
| |
| int64 end_time_ms; |
| if (timeout_ms <= 0) { |
| end_time_ms = -1; // Encodes unlimited |
| } else { |
| end_time_ms = server_context()->timer()->NowMs() + timeout_ms; |
| } |
| |
| TryCheckForCompletion(wait_mode, end_time_ms, done); |
| } |
| |
| void RewriteDriver::TryCheckForCompletion(WaitMode wait_mode, int64 end_time_ms, |
| Function* done) |
| NO_THREAD_SAFETY_ANALYSIS { |
| scheduler_->DCheckLocked(); |
| int64 now_ms = server_context_->timer()->NowMs(); |
| int64 sleep_ms; |
| if (end_time_ms < 0) { |
| waiting_deadline_reached_ = false; // Unlimited wait.. |
| sleep_ms = kTestTimeoutMs; |
| } else { |
| waiting_deadline_reached_ = (now_ms >= end_time_ms); |
| if (waiting_deadline_reached_) { |
| // If deadline is already reached if we keep going we will want to use |
| // long sleeps since we expect to be woken up based on conditions. |
| sleep_ms = kTestTimeoutMs; |
| } else { |
| sleep_ms = end_time_ms - now_ms; |
| } |
| } |
| |
| // Note that we may end up going past the deadline in order to make sure |
| // that at least the metadata cache lookups have a chance to come in. |
| if (!IsDone(wait_mode, waiting_deadline_reached_)) { |
| scheduler_->TimedWaitMs( |
| sleep_ms, |
| MakeFunction(this, &RewriteDriver::TryCheckForCompletion, |
| wait_mode, end_time_ms, done)); |
| } else { |
| // Done. Note that we may get deleted by our callback, so we have to |
| // make sure to save the mutex pointer. The thread annotation can't deal |
| // with this aliasing, hence the need for NO_THREAD_SAFETY_ANALYSIS above. |
| AbstractMutex* mutex = rewrite_mutex(); |
| waiting_ = kNoWait; |
| mutex->Unlock(); |
| done->CallRun(); |
| mutex->Lock(); |
| } |
| } |
| |
| bool RewriteDriver::IsDone(WaitMode wait_mode, bool deadline_reached) { |
| int async_events = ref_counts_.QueryCountMutexHeld(kRefAsyncEvents); |
| if (async_events > 0 && WaitForPendingAsyncEvents(wait_mode)) { |
| return false; |
| } |
| |
| int render_blocking_async_events = |
| ref_counts_.QueryCountMutexHeld(kRefRenderBlockingAsyncEvents); |
| if (render_blocking_async_events > 0) { |
| return false; |
| } |
| |
| // Before deadline, we're happy only if we're 100% done. |
| if (!deadline_reached) { |
| bool have_background_fetch = |
| (ref_counts_.QueryCountMutexHeld(kRefFetchBackground) != 0); |
| return RewritesComplete() && |
| !((wait_mode == kWaitForShutDown) && have_background_fetch); |
| } else { |
| // When we've reached the deadline, if we're Render()'ing |
| // we also give the jobs we can serve from cache a chance to finish |
| // (so they always render). |
| // We do not have to worry about possibly_quick_rewrites_ not being |
| // incremented yet as jobs are only initiated from the HTML parse thread. |
| if (wait_mode == kWaitForCachedRender) { |
| return (possibly_quick_rewrites_ == 0); |
| } else { |
| return true; |
| } |
| } |
| } |
| |
| void RewriteDriver::ExecuteFlushIfRequested() { |
| if (flush_requested_) { |
| Flush(); |
| } |
| } |
| |
| void RewriteDriver::ExecuteFlushIfRequestedAsync(Function* callback) { |
| if (flush_requested_) { |
| FlushAsync(callback); |
| } else { |
| callback->CallRun(); |
| } |
| } |
| |
| void RewriteDriver::Flush() { |
| SchedulerBlockingFunction wait(scheduler_); |
| FlushAsync(&wait); |
| wait.Block(); |
| flush_requested_ = false; |
| } |
| |
| void RewriteDriver::FlushAsync(Function* callback) { |
| DCHECK(request_context_.get() != NULL); |
| TraceLiteral("RewriteDriver::FlushAsync()"); |
| if (debug_filter_ != NULL) { |
| debug_filter_->StartRender(); |
| } |
| flush_requested_ = false; |
| |
| // Figure out which filters should be enabled and whether any enabled filter |
| // can modify urls. |
| DetermineFiltersBehavior(); |
| |
| for (FilterList::iterator it = early_pre_render_filters_.begin(); |
| it != early_pre_render_filters_.end(); ++it) { |
| HtmlFilter* filter = *it; |
| if (filter->is_enabled()) { |
| ApplyFilter(filter); |
| } |
| } |
| for (FilterList::iterator it = pre_render_filters_.begin(); |
| it != pre_render_filters_.end(); ++it) { |
| HtmlFilter* filter = *it; |
| if (filter->is_enabled()) { |
| ApplyFilter(filter); |
| } |
| } |
| |
| int num_rewrites = rewrites_.size(); |
| |
| // Copy all of the RewriteContext* into the initiated_rewrites_ set |
| // *before* initiating them, as we are doing this before we lock. |
| // The RewriteThread can start mutating the initiated_rewrites_ |
| // set as soon as one is initiated. |
| { |
| // If not locked, this WRITE to initiated_rewrites_ can race with |
| // locked READs of initiated_rewrites_ in RewriteComplete which |
| // runs in the Rewrite thread. Note that the DCHECK above, of |
| // initiated_rewrites_.empty(), is a READ and it's OK to have |
| // concurrent READs. |
| ScopedMutex lock(rewrite_mutex()); |
| |
| // Note that no actual resource Rewriting can occur until this point |
| // is reached, where we initiate all the RewriteContexts. |
| DCHECK(initiated_rewrites_.empty()); |
| |
| DCHECK_EQ(ref_counts_.QueryCountMutexHeld(kRefPendingRewrites), |
| num_rewrites); |
| initiated_rewrites_.insert(rewrites_.begin(), rewrites_.end()); |
| num_initiated_rewrites_ += num_rewrites; |
| |
| // We must also start tasks while holding the lock, as otherwise a |
| // successor task may complete and delete itself before we see if we |
| // are the ones to start it. |
| for (int i = 0; i < num_rewrites; ++i) { |
| RewriteContext* rewrite_context = rewrites_[i]; |
| if (!rewrite_context->chained()) { |
| rewrite_context->Initiate(); |
| } |
| } |
| } |
| rewrites_.clear(); |
| |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing)); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchBackground)); |
| Function* flush_async_done = |
| MakeFunction(this, &RewriteDriver::QueueFlushAsyncDone, |
| num_rewrites, callback); |
| if (fully_rewrite_on_flush_) { |
| CheckForCompletionAsync(kWaitForCompletion, -1, flush_async_done); |
| } else { |
| int64 deadline = ComputeCurrentFlushWindowRewriteDelayMs(); |
| CheckForCompletionAsync(kWaitForCachedRender, deadline, flush_async_done); |
| } |
| } |
| } |
| |
| int64 RewriteDriver::ComputeCurrentFlushWindowRewriteDelayMs() { |
| int64 deadline = rewrite_deadline_ms(); |
| // If we've configured a max processing delay for the entire page, enforce |
| // that limit here. |
| if (max_page_processing_delay_ms_ > 0) { |
| int64 ms_since_start = |
| server_context_->timer()->NowMs() - start_time_ms_; |
| int64 ms_remaining = max_page_processing_delay_ms_ - ms_since_start; |
| // If the deadline for the current flush window (deadline) is less |
| // than the overall time remaining (ms_remaining), we enforce the |
| // per-flush window deadline. Otherwise, we wait for the overall |
| // page deadline. |
| // |
| // In any case, we require a minimum value of 1 millisecond since |
| // a value <= 0 implies an unlimited wait. |
| deadline = |
| std::max(std::min(ms_remaining, deadline), static_cast<int64>(1)); |
| } |
| return deadline; |
| } |
| |
| void RewriteDriver::QueueFlushAsyncDone(int num_rewrites, Function* callback) { |
| html_worker_->Add(MakeFunction(this, &RewriteDriver::FlushAsyncDone, |
| num_rewrites, callback)); |
| } |
| |
| void RewriteDriver::FlushAsyncDone(int num_rewrites, Function* callback) { |
| DCHECK(request_context_.get() != NULL); |
| TraceLiteral("RewriteDriver::FlushAsyncDone()"); |
| |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| DCHECK_EQ(0, possibly_quick_rewrites_); |
| int still_pending_rewrites = |
| ref_counts_.QueryCountMutexHeld(kRefPendingRewrites); |
| int completed_rewrites = num_rewrites - still_pending_rewrites; |
| |
| // If the output cache lookup came as a HIT in after the deadline, that |
| // means that (a) we can't use the result and (b) we don't need |
| // to re-initiate the rewrite since it was in fact in cache. Hopefully |
| // the cache system will respond to HIT by making the next HIT faster |
| // so it meets our deadline. In either case we will track with stats. |
| // |
| RewriteStats* stats = server_context_->rewrite_stats(); |
| stats->cached_output_hits()->Add(completed_rewrites); |
| stats->cached_output_missed_deadline()->Add(still_pending_rewrites); |
| { |
| // Add completed_rewrites (from this flush window) to the logged value. |
| ScopedMutex lock(log_record()->mutex()); |
| MetadataCacheInfo* metadata_log_info = |
| log_record()->logging_info()->mutable_metadata_cache_info(); |
| metadata_log_info->set_num_rewrites_completed( |
| metadata_log_info->num_rewrites_completed() + completed_rewrites); |
| } |
| |
| // Detach all rewrites that are still outstanding, by moving them from |
| // initiated_rewrites_ to detached_rewrites_; also notify them that they |
| // will not be rendered. |
| for (RewriteContextSet::iterator p = initiated_rewrites_.begin(), |
| e = initiated_rewrites_.end(); p != e; ++p) { |
| RewriteContext* rewrite_context = *p; |
| |
| // If debugging is enabled, annotate that we have missed our rewrite |
| // deadline. |
| if (options()->Enabled(RewriteOptions::kDebug)) { |
| for (int i = 0, n = rewrite_context->num_slots(); i < n; ++i) { |
| ResourceSlotPtr slot = rewrite_context->slot(i); |
| GoogleString suffix; |
| const char* id = rewrite_context->id(); |
| StringFilterMap::const_iterator p = resource_filter_map_.find(id); |
| if (p != resource_filter_map_.end()) { |
| RewriteFilter* filter = p->second; |
| InsertDebugComment(DeadlineExceededMessage(filter->Name()), |
| slot->element()); |
| } else { |
| InsertDebugComment(kDeadlineExceeded, slot->element()); |
| } |
| } |
| } |
| rewrite_context->WillNotRender(); |
| detached_rewrites_.insert(rewrite_context); |
| ++num_detached_rewrites_; |
| ref_counts_.AddRefMutexHeld(kRefDetachedRewrites); |
| ref_counts_.ReleaseRefMutexHeld(kRefPendingRewrites); |
| } |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefPendingRewrites)); |
| initiated_rewrites_.clear(); |
| |
| slots_.clear(); |
| inline_slots_.clear(); |
| inline_attribute_slots_.clear(); |
| } |
| |
| // Notify all enabled pre-render filters that rendering is done. |
| if (debug_filter_ != NULL) { |
| debug_filter_->RenderDone(); |
| } |
| |
| for (FilterList::iterator it = early_pre_render_filters_.begin(); |
| it != early_pre_render_filters_.end(); ++it) { |
| HtmlFilter* filter = *it; |
| if (filter->is_enabled()) { |
| filter->RenderDone(); |
| } |
| } |
| for (FilterList::iterator it = pre_render_filters_.begin(); |
| it != pre_render_filters_.end(); ++it) { |
| HtmlFilter* filter = *it; |
| if (filter->is_enabled()) { |
| filter->RenderDone(); |
| } |
| } |
| |
| // Run all the post-render filters, and clear the event queue. |
| HtmlParse::Flush(); |
| flush_occurred_ = true; |
| callback->CallRun(); |
| } |
| |
| GoogleString RewriteDriver::DeadlineExceededMessage(StringPiece filter_name) { |
| return StrCat(kDeadlineExceeded, " for filter ", filter_name); |
| } |
| |
| void RewriteDriver::Initialize() { |
| ++initialized_count_; |
| if (initialized_count_ == 1) { |
| RewriteOptions::Initialize(); |
| ImageRewriteFilter::Initialize(); |
| CssFilter::Initialize(); |
| SplitHtmlConfig::Initialize(); |
| } |
| } |
| |
| void RewriteDriver::InitStats(Statistics* statistics) { |
| AddInstrumentationFilter::InitStats(statistics); |
| CacheExtender::InitStats(statistics); |
| CriticalCssBeaconFilter::InitStats(statistics); |
| CriticalImagesBeaconFilter::InitStats(statistics); |
| CssCombineFilter::InitStats(statistics); |
| CssFilter::InitStats(statistics); |
| CssInlineFilter::InitStats(statistics); |
| CssInlineImportToLinkFilter::InitStats(statistics); |
| CssMoveToHeadFilter::InitStats(statistics); |
| CssSummarizerBase::InitStats(statistics); |
| DedupInlinedImagesFilter::InitStats(statistics); |
| DomainRewriteFilter::InitStats(statistics); |
| GoogleAnalyticsFilter::InitStats(statistics); |
| GoogleFontCssInlineFilter::InitStats(statistics); |
| ImageCombineFilter::InitStats(statistics); |
| ImageRewriteFilter::InitStats(statistics); |
| InPlaceRewriteContext::InitStats(statistics); |
| InsertGAFilter::InitStats(statistics); |
| JavascriptFilter::InitStats(statistics); |
| JsCombineFilter::InitStats(statistics); |
| JsInlineFilter::InitStats(statistics); |
| LocalStorageCacheFilter::InitStats(statistics); |
| MakeShowAdsAsyncFilter::InitStats(statistics); |
| MetaTagFilter::InitStats(statistics); |
| MobilizeLabelFilter::InitStats(statistics); |
| MobilizeMenuFilter::InitStats(statistics); |
| MobilizeMenuRenderFilter::InitStats(statistics); |
| MobilizeRewriteFilter::InitStats(statistics); |
| SplitHtmlBeaconFilter::InitStats(statistics); |
| RewriteContext::InitStats(statistics); |
| UrlInputResource::InitStats(statistics); |
| UrlLeftTrimFilter::InitStats(statistics); |
| } |
| |
| void RewriteDriver::Terminate() { |
| // Clean up statics. |
| --initialized_count_; |
| if (initialized_count_ == 0) { |
| CssFilter::Terminate(); |
| ImageRewriteFilter::Terminate(); |
| RewriteOptions::Terminate(); |
| SplitHtmlConfig::Terminate(); |
| } |
| } |
| |
| void RewriteDriver::SetServerContext(ServerContext* server_context) |
| NO_THREAD_SAFETY_ANALYSIS { |
| DCHECK(server_context_ == NULL); |
| server_context_ = server_context; |
| scheduler_ = server_context_->scheduler(); |
| ref_counts_.set_mutex(rewrite_mutex()); |
| set_timer(server_context->timer()); |
| rewrite_worker_ = server_context_->rewrite_workers()->NewSequence(); |
| html_worker_ = server_context_->html_workers()->NewSequence(); |
| low_priority_rewrite_worker_ = |
| server_context_->low_priority_rewrite_workers()->NewSequence(); |
| scheduler_->RegisterWorker(rewrite_worker_); |
| scheduler_->RegisterWorker(html_worker_); |
| scheduler_->RegisterWorker(low_priority_rewrite_worker_); |
| |
| DCHECK(resource_filter_map_.empty()); |
| |
| // Add the rewriting filters to the map unconditionally -- we may |
| // need them to process resource requests due to a query-specific |
| // 'rewriters' specification. We still use the passed-in options |
| // to determine whether they get added to the html parse filter chain. |
| // Note: RegisterRewriteFilter takes ownership of these filters. |
| CacheExtender* cache_extender = new CacheExtender(this); |
| ImageCombineFilter* image_combiner = new ImageCombineFilter(this); |
| ImageRewriteFilter* image_rewriter = new ImageRewriteFilter(this); |
| |
| RegisterRewriteFilter(new CssCombineFilter(this)); |
| RegisterRewriteFilter( |
| new CssFilter(this, cache_extender, image_rewriter, image_combiner)); |
| RegisterRewriteFilter(new JavascriptFilter(this)); |
| RegisterRewriteFilter(new JsCombineFilter(this)); |
| RegisterRewriteFilter(image_rewriter); |
| RegisterRewriteFilter(cache_extender); |
| RegisterRewriteFilter(image_combiner); |
| RegisterRewriteFilter(new LocalStorageCacheFilter(this)); |
| RegisterRewriteFilter(new JavascriptSourceMapFilter(this)); |
| |
| // These filters are needed to rewrite and trim urls in modified CSS files. |
| domain_rewriter_.reset(new DomainRewriteFilter(this, statistics())); |
| url_trim_filter_.reset(new UrlLeftTrimFilter(this, statistics())); |
| } |
| |
| RequestTrace* RewriteDriver::trace_context() { |
| return request_context_.get() == NULL ? NULL : |
| request_context_->root_trace_context(); |
| } |
| |
| void RewriteDriver::TracePrintf(const char* fmt, ...) { |
| if (trace_context() == NULL || !trace_context()->tracing_enabled()) { |
| return; |
| } |
| va_list argp; |
| va_start(argp, fmt); |
| trace_context()->TraceVPrintf(fmt, argp); |
| va_end(argp); |
| } |
| |
| void RewriteDriver::TraceLiteral(const char* literal) { |
| if (trace_context() == NULL || !trace_context()->tracing_enabled()) { |
| return; |
| } |
| trace_context()->TraceLiteral(literal); |
| } |
| |
| void RewriteDriver::TraceString(const GoogleString& s) { |
| if (trace_context() == NULL || !trace_context()->tracing_enabled()) { |
| return; |
| } |
| trace_context()->TraceString(s); |
| } |
| |
| void RewriteDriver::AddFilters() { |
| CHECK(html_writer_filter_ == NULL); |
| CHECK(!filters_added_); |
| server_context_->ComputeSignature(options_.get()); |
| filters_added_ = true; |
| |
| AddPreRenderFilters(); |
| AddPostRenderFilters(); |
| } |
| |
| void RewriteDriver::AddPreRenderFilters() { |
| // This function defines the order that filters are run. We document |
| // in pagespeed.conf.template that the order specified in the conf |
| // file does not matter, but we give the filters there in the order |
| // they are actually applied, for the benefit of the understanding |
| // of the site owner. So if you change that here, change it in |
| // install/common/pagespeed.conf.template as well. |
| // |
| // Also be sure to update the doc in net/instaweb/doc/docs/config_filters.ezt. |
| // |
| // Now process boolean options, which may include propagating non-boolean |
| // and boolean parameter settings to filters. |
| const RewriteOptions* rewrite_options = options(); |
| |
| if (rewrite_options->flush_html()) { |
| // Note that this does not get hooked into the normal html-parse |
| // filter-chain as it gets run immediately after every call to |
| // ParseText, possibly inducing the system to trigger a Flush |
| // based on the content it sees. |
| add_event_listener(new FlushHtmlFilter(this)); |
| } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kComputeStatistics)) { |
| dom_stats_filter_ = new DomStatsFilter(this); |
| AddOwnedEarlyPreRenderFilter(dom_stats_filter_); |
| } |
| if (!rewrite_options->preserve_subresource_hints()) { |
| AddOwnedEarlyPreRenderFilter(new StripSubresourceHintsFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kDecodeRewrittenUrls)) { |
| AddOwnedEarlyPreRenderFilter(new DecodeRewrittenUrlsFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kSplitHtmlHelper)) { |
| AddOwnedEarlyPreRenderFilter(new SplitHtmlHelperFilter(this)); |
| } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kResponsiveImages) && |
| rewrite_options->Enabled(RewriteOptions::kResizeImages)) { |
| ResponsiveImageFirstFilter* resp_filter1 = |
| new ResponsiveImageFirstFilter(this); |
| AddOwnedEarlyPreRenderFilter(resp_filter1); |
| |
| ResponsiveImageSecondFilter* resp_filter2 = |
| new ResponsiveImageSecondFilter(this, resp_filter1); |
| AddOwnedPostRenderFilter(resp_filter2); |
| } |
| |
| // We disable combine_css and combine_javascript when flush_subresources is |
| // enabled, since the way CSS and JS is combined is not deterministic. |
| // However, we do not disable combine_javascript when defer_javascript is |
| // enabled since in this case, flush_subresources does not flush JS resources. |
| bool flush_subresources_enabled = rewrite_options->Enabled( |
| RewriteOptions::kFlushSubresources); |
| |
| if (rewrite_options->Enabled(RewriteOptions::kAddBaseTag) || |
| rewrite_options->Enabled(RewriteOptions::kAddHead) || |
| rewrite_options->Enabled(RewriteOptions::kAddInstrumentation) || |
| rewrite_options->Enabled(RewriteOptions::kCombineHeads) || |
| rewrite_options->Enabled(RewriteOptions::kDeterministicJs) || |
| rewrite_options->Enabled(RewriteOptions::kHandleNoscriptRedirect) || |
| rewrite_options->Enabled(RewriteOptions::kMakeGoogleAnalyticsAsync) || |
| rewrite_options->Enabled(RewriteOptions::kMobilize) || |
| rewrite_options->Enabled(RewriteOptions::kMoveCssAboveScripts) || |
| rewrite_options->Enabled(RewriteOptions::kMoveCssToHead) || |
| flush_subresources_enabled) { |
| // Adds a filter that adds a 'head' section to html documents if |
| // none found prior to the body. |
| AddOwnedEarlyPreRenderFilter(new AddHeadFilter( |
| this, rewrite_options->Enabled(RewriteOptions::kCombineHeads))); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kAddBaseTag)) { |
| AddOwnedEarlyPreRenderFilter(new BaseTagFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kAddIds) || |
| rewrite_options->MobUseLabelFilter()) { |
| AddOwnedEarlyPreRenderFilter(new AddIdsFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kStripScripts)) { |
| // Experimental filter that blindly strips all scripts from a page. |
| AppendOwnedPreRenderFilter(new StripScriptsFilter(this)); |
| } |
| if (is_critical_images_beacon_enabled()) { |
| // This filter should be enabled early, at least before image rewriting, |
| // because it depends on seeing the original image URLs. |
| AppendOwnedPreRenderFilter(new CriticalImagesBeaconFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kMakeShowAdsAsync)) { |
| // We want this filter early in case we ever inline the loader JS. |
| AppendOwnedPreRenderFilter(new MakeShowAdsAsyncFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kSplitHtml) && |
| server_context()->factory()->UseBeaconResultsInFilters()) { |
| AppendOwnedPreRenderFilter(new SplitHtmlBeaconFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kInlineImportToLink) || |
| (!rewrite_options->Forbidden(RewriteOptions::kInlineImportToLink) && |
| (CriticalSelectorsEnabled() || |
| rewrite_options->Enabled(RewriteOptions::kComputeCriticalCss)))) { |
| // If we're converting simple embedded CSS @imports into a href link |
| // then we need to do that before any other CSS processing. |
| AppendOwnedPreRenderFilter(new CssInlineImportToLinkFilter(this, |
| statistics())); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kPrioritizeCriticalCss)) { |
| // If we're inlining styles that resolved initially, skip outlining |
| // css since that works against this. |
| // TODO(slamm): Figure out if move_css_to_head needs to be disabled. |
| CriticalCssFinder* finder = server_context()->critical_css_finder(); |
| if (finder != NULL && !CriticalSelectorsEnabled()) { |
| AppendOwnedPreRenderFilter(new CriticalCssFilter(this, finder)); |
| } |
| } else if (rewrite_options->Enabled(RewriteOptions::kOutlineCss)) { |
| // Cut out inlined styles and make them into external resources. |
| // This can only be called once and requires a server_context_ to be set. |
| CHECK(server_context_ != NULL); |
| AppendOwnedPreRenderFilter(new CssOutlineFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kInlineGoogleFontCss)) { |
| // Inline small Google Font Service CSS files. |
| // Do this before MoveCssToHead / MoveCssAboveScripts. |
| AppendOwnedPreRenderFilter(new GoogleFontCssInlineFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kMoveCssToHead) || |
| rewrite_options->Enabled(RewriteOptions::kMoveCssAboveScripts)) { |
| // It's good to move CSS links to the head prior to running CSS combine, |
| // which only combines CSS links that are already in the head. |
| AppendOwnedPreRenderFilter(new CssMoveToHeadFilter(this)); |
| } |
| if (!flush_subresources_enabled && |
| rewrite_options->Enabled(RewriteOptions::kCombineCss)) { |
| // Combine external CSS resources after we've outlined them. |
| // CSS files in html document. This can only be called |
| // once and requires a server_context_ to be set. |
| EnableRewriteFilter(RewriteOptions::kCssCombinerId); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kRewriteCss) || |
| (!rewrite_options->Forbidden(RewriteOptions::kRewriteCss) && |
| FlattenCssImportsEnabled())) { |
| // Since AddFilters only applies to the HTML rewrite path, we check here |
| // if IPRO preemptive rewrites are disabled and skip the filter if so. |
| if (!rewrite_options->css_preserve_urls() || |
| rewrite_options->in_place_preemptive_rewrite_css()) { |
| EnableRewriteFilter(RewriteOptions::kCssFilterId); |
| } |
| } |
| if ((rewrite_options->Enabled(RewriteOptions::kPrioritizeCriticalCss) && |
| server_context()->factory()->UseBeaconResultsInFilters()) || |
| (rewrite_options->Enabled(RewriteOptions::kComputeCriticalCss) && |
| rewrite_options->use_selectors_for_critical_css())) { |
| // Add the critical selector instrumentation before the rewriting filter. |
| AppendOwnedPreRenderFilter(new CriticalCssBeaconFilter(this)); |
| } |
| if (CriticalSelectorsEnabled()) { |
| AppendOwnedPreRenderFilter(new CriticalSelectorFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kInlineCss)) { |
| // Inline small CSS files. Give CSS minification and flattening a chance to |
| // run before we decide what counts as "small". |
| CHECK(server_context_ != NULL); |
| AppendOwnedPreRenderFilter(new CssInlineFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kOutlineJavascript)) { |
| // Cut out inlined scripts and make them into external resources. |
| // This can only be called once and requires a server_context_ to be set. |
| CHECK(server_context_ != NULL); |
| AppendOwnedPreRenderFilter(new JsOutlineFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kMakeGoogleAnalyticsAsync)) { |
| // Converts sync loads of Google Analytics javascript to async loads. |
| // This needs to be listed before rewrite_javascript because it injects |
| // javascript that has comments and extra whitespace. |
| AppendOwnedPreRenderFilter(new GoogleAnalyticsFilter(this, statistics())); |
| } |
| if ((rewrite_options->Enabled(RewriteOptions::kInsertGA) || |
| rewrite_options->running_experiment()) && |
| rewrite_options->ga_id() != "") { |
| // Like MakeGoogleAnalyticsAsync, InsertGA should be before js rewriting. |
| AppendOwnedPreRenderFilter(new InsertGAFilter(this)); |
| } |
| if (!flush_subresources_enabled && |
| rewrite_options->Enabled(RewriteOptions::kCombineJavascript)) { |
| // Combine external JS resources. Done after minification and analytics |
| // detection, as it converts script sources into string literals, making |
| // them opaque to analysis. |
| EnableRewriteFilter(RewriteOptions::kJavascriptCombinerId); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kRewriteJavascriptExternal) || |
| rewrite_options->Enabled(RewriteOptions::kRewriteJavascriptInline) || |
| rewrite_options->Enabled( |
| RewriteOptions::kCanonicalizeJavascriptLibraries)) { |
| // Since AddFilters only applies to the HTML rewrite path, we check here |
| // if IPRO preemptive rewrites are disabled and skip the filter if so. |
| // |
| // Note that we minify before we inline, so if you enable |
| // rewrite_javascript_inline but not rewrite_javascript_external, we |
| // will only minify the already-inlined JavaScript, and we will not |
| // minify external JS that we decided later to inline. It seems unlikely |
| // that someone would want to enable inline_javascript and not enable |
| // rewrite_javascript_external though. |
| if (!rewrite_options->js_preserve_urls() || |
| rewrite_options->in_place_preemptive_rewrite_javascript() || |
| rewrite_options->Enabled(RewriteOptions::kRewriteJavascriptInline)) { |
| // Rewrite (minify etc.) JavaScript code to reduce time to first |
| // interaction. |
| EnableRewriteFilter(RewriteOptions::kJavascriptMinId); |
| } |
| } |
| |
| // Disable incomplete and unsupported moblization filters. |
| // |
| // Mobilize after JS-rewrite and before JS-inline. We don't |
| // want to use the PageSpeed minifier to re-optimize the |
| // closure-compiled mobilization code because (a) it won't |
| // do much good (b) it would rename the URL to something we won't |
| // find on /mod_pagespeed_static and (c) we certainly don't want |
| // source-maps for the compiled code. However, we do want |
| // the inliner to work on the small compiled xhr.js. |
| // if (rewrite_options->Enabled(RewriteOptions::kMobilize)) { |
| // if (rewrite_options->MobUseLabelFilter()) { |
| // AppendOwnedPreRenderFilter( |
| // new MobilizeLabelFilter(false /* not menu request */, this)); |
| // } |
| // if (rewrite_options->MobRenderServerSideMenus()) { |
| // AppendOwnedPreRenderFilter(new MobilizeMenuRenderFilter(this)); |
| // } |
| // AppendOwnedPreRenderFilter(new MobilizeRewriteFilter(this)); |
| // } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kInlineJavascript)) { |
| // Inline small Javascript files. Give JS minification a chance to run |
| // before we decide what counts as "small". |
| CHECK(server_context_ != NULL); |
| AppendOwnedPreRenderFilter(new JsInlineFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kConvertJpegToProgressive) || |
| rewrite_options->ImageOptimizationEnabled() || |
| rewrite_options->Enabled(RewriteOptions::kResizeImages) || |
| rewrite_options->Enabled( |
| RewriteOptions::kResizeToRenderedImageDimensions) || |
| rewrite_options->Enabled(RewriteOptions::kInlineImages) || |
| rewrite_options->Enabled(RewriteOptions::kInsertImageDimensions) || |
| rewrite_options->Enabled(RewriteOptions::kJpegSubsampling) || |
| rewrite_options->Enabled(RewriteOptions::kStripImageColorProfile) || |
| rewrite_options->Enabled(RewriteOptions::kStripImageMetaData) || |
| rewrite_options->Enabled(RewriteOptions::kDelayImages)) { |
| // Since AddFilters only applies to the HTML rewrite path, we check here |
| // if IPRO preemptive rewrites are disabled and skip the filter if so. |
| if (!rewrite_options->image_preserve_urls() || |
| rewrite_options->in_place_preemptive_rewrite_images()) { |
| EnableRewriteFilter(RewriteOptions::kImageCompressionId); |
| } |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kRemoveComments)) { |
| AppendOwnedPreRenderFilter(new RemoveCommentsFilter( |
| this, new RemoveCommentsFilterOptions(rewrite_options))); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kElideAttributes)) { |
| // Remove HTML element attribute values where |
| // http://www.w3.org/TR/html4/loose.dtd says that the name is all |
| // that's necessary |
| AppendOwnedPreRenderFilter(new ElideAttributesFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kExtendCacheCss) || |
| rewrite_options->Enabled(RewriteOptions::kExtendCacheImages) || |
| rewrite_options->Enabled(RewriteOptions::kExtendCachePdfs) || |
| rewrite_options->Enabled(RewriteOptions::kExtendCacheScripts)) { |
| // Extend the cache lifetime of resources. |
| EnableRewriteFilter(RewriteOptions::kCacheExtenderId); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kSpriteImages)) { |
| EnableRewriteFilter(RewriteOptions::kImageCombineId); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kLocalStorageCache)) { |
| EnableRewriteFilter(RewriteOptions::kLocalStorageCacheId); |
| } |
| // Enable Flush subresources early filter to extract the subresources from |
| // head. This should be the last prerender filter. |
| if (flush_subresources_enabled) { |
| AppendOwnedPreRenderFilter(new |
| CollectFlushEarlyContentFilter(this)); |
| } |
| } |
| |
| void RewriteDriver::AddPostRenderFilters() { |
| const RewriteOptions* rewrite_options = options(); |
| if (rewrite_options->domain_lawyer()->can_rewrite_domains() && |
| (rewrite_options->Enabled(RewriteOptions::kRewriteDomains) || |
| rewrite_options->mob_iframe())) { |
| // Rewrite mapped domains and shard any resources not otherwise rewritten. |
| // We want do do this after all the content-changing rewrites, because they |
| // will map & shard as part of their execution. |
| // |
| // TODO(jmarantz): Consider removing all the domain-mapping functionality |
| // from other rewrites and do it exclusively in this filter. Before we |
| // do that we'll need to validate this filter so we can turn it on by |
| // default. |
| // |
| // Note that the "domain_lawyer" filter controls whether we rewrite |
| // domains for resources in HTML files. However, when we cache-extend |
| // CSS files, we rewrite the domains in them whether this filter is |
| // specified or not. |
| AddUnownedPostRenderFilter(domain_rewriter_.get()); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kLeftTrimUrls)) { |
| // Trim extraneous prefixes from urls in attribute values. |
| // Happens before RemoveQuotes but after everything else. Note: |
| // we Must left trim urls BEFORE quote removal. |
| AddUnownedPostRenderFilter(url_trim_filter_.get()); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kFlushSubresources) && |
| !options()->pre_connect_url().empty()) { |
| AddOwnedPostRenderFilter(new RewrittenContentScanningFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kInsertDnsPrefetch)) { |
| InsertDnsPrefetchFilter* insert_dns_prefetch_filter = |
| new InsertDnsPrefetchFilter(this); |
| AddOwnedPostRenderFilter(insert_dns_prefetch_filter); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kAddInstrumentation)) { |
| // Inject javascript to instrument loading-time. This should run before |
| // defer js so that its onload handler can fire before JS starts executing. |
| AddOwnedPostRenderFilter(new AddInstrumentationFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kSplitHtml)) { |
| AddOwnedPostRenderFilter(new DeferIframeFilter(this)); |
| AddOwnedPostRenderFilter(new JsDisableFilter(this)); |
| } else if (rewrite_options->Enabled(RewriteOptions::kDeferJavascript) || |
| rewrite_options->Enabled(RewriteOptions::kCachePartialHtml)) { |
| // Defers javascript download and execution to post onload. This filter |
| // should be applied before JsDisableFilter and JsDeferFilter. |
| // kDeferIframe filter should never be turned on when either defer_js |
| // or disable_js is enabled. |
| AddOwnedPostRenderFilter(new DeferIframeFilter(this)); |
| AddOwnedPostRenderFilter(new JsDisableFilter(this)); |
| // Though we are adding JsDeferDisabledFilter here, if we are flushing |
| // cached html or we have flushed cached html, this filter will disable |
| // itself. |
| AddOwnedPostRenderFilter(new JsDeferDisabledFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kFixReflows)) { |
| AddOwnedPostRenderFilter(new FixReflowFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kDeterministicJs)) { |
| AddOwnedPostRenderFilter(new DeterministicJsFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kConvertMetaTags)) { |
| AddOwnedPostRenderFilter(new MetaTagFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kDisableJavascript)) { |
| // kDeferIframe filter should never be turned on when either defer_js |
| // or disable_js is enabled. |
| AddOwnedPostRenderFilter(new DeferIframeFilter(this)); |
| AddOwnedPostRenderFilter(new JsDisableFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kDelayImages)) { |
| // kInsertImageDimensions should be enabled to avoid drastic reflows. |
| AddOwnedPostRenderFilter(new DelayImagesFilter(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kDedupInlinedImages)) { |
| AddOwnedPostRenderFilter(new DedupInlinedImagesFilter(this)); |
| } |
| // TODO(nikhilmadan): Should we disable this for bots? |
| // LazyLoadImagesFilter should be applied after DelayImagesFilter. |
| if (rewrite_options->Enabled(RewriteOptions::kLazyloadImages)) { |
| AddOwnedPostRenderFilter(new LazyloadImagesFilter(this)); |
| } |
| if (rewrite_options->support_noscript_enabled()) { |
| AddOwnedPostRenderFilter(new SupportNoscriptFilter(this)); |
| } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kHandleNoscriptRedirect)) { |
| AddOwnedPostRenderFilter(new HandleNoscriptRedirectFilter(this)); |
| } |
| |
| if (rewrite_options->max_html_parse_bytes() > 0) { |
| AddOwnedPostRenderFilter(new RedirectOnSizeLimitFilter(this)); |
| set_size_limit(rewrite_options->max_html_parse_bytes()); |
| } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kStripNonCacheable)) { |
| StripNonCacheableFilter* filter = new StripNonCacheableFilter(this); |
| AddOwnedPostRenderFilter(filter); |
| } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kComputeVisibleText)) { |
| ComputeVisibleTextFilter* filter = new ComputeVisibleTextFilter(this); |
| AddOwnedPostRenderFilter(filter); |
| } |
| |
| if (rewrite_options->Enabled(RewriteOptions::kPedantic)) { |
| // Add HTML type attributes where HTML4 says that it's necessary. |
| PedanticFilter* filter = new PedanticFilter(this); |
| AddOwnedPostRenderFilter(filter); |
| } |
| |
| // Remove quotes and collapse whitespace at the very end for maximum effect. |
| if (rewrite_options->Enabled(RewriteOptions::kRemoveQuotes)) { |
| // Remove extraneous quotes from html attributes. |
| AddOwnedPostRenderFilter(new HtmlAttributeQuoteRemoval(this)); |
| } |
| if (rewrite_options->Enabled(RewriteOptions::kCollapseWhitespace)) { |
| // Remove excess whitespace in HTML. |
| AddOwnedPostRenderFilter(new CollapseWhitespaceFilter(this)); |
| } |
| |
| if (DebugMode()) { |
| debug_filter_ = new DebugFilter(this); |
| AddOwnedPostRenderFilter(debug_filter_); |
| } |
| |
| // NOTE(abliss): Adding a new filter? Does it export any statistics? If it |
| // doesn't, it probably should. If it does, be sure to add it to the |
| // InitStats() function above or it will break under Apache! |
| } |
| |
| void RewriteDriver::AddOwnedEarlyPreRenderFilter(HtmlFilter* filter) { |
| filters_to_delete_.push_back(filter); |
| early_pre_render_filters_.push_back(filter); |
| } |
| |
| void RewriteDriver::PrependOwnedPreRenderFilter(HtmlFilter* filter) { |
| filters_to_delete_.push_back(filter); |
| pre_render_filters_.push_front(filter); |
| } |
| |
| void RewriteDriver::AppendOwnedPreRenderFilter(HtmlFilter* filter) { |
| filters_to_delete_.push_back(filter); |
| pre_render_filters_.push_back(filter); |
| } |
| |
| void RewriteDriver::AppendUnownedPreRenderFilter(HtmlFilter* filter) { |
| pre_render_filters_.push_back(filter); |
| } |
| |
| void RewriteDriver::AddOwnedPostRenderFilter(HtmlFilter* filter) { |
| filters_to_delete_.push_back(filter); |
| AddUnownedPostRenderFilter(filter); |
| } |
| |
| void RewriteDriver::AddUnownedPostRenderFilter(HtmlFilter* filter) { |
| HtmlParse::AddFilter(filter); |
| } |
| |
| void RewriteDriver::AppendRewriteFilter(RewriteFilter* filter) { |
| CHECK(filter != NULL); |
| RegisterRewriteFilter(filter); |
| pre_render_filters_.push_back(filter); |
| } |
| |
| void RewriteDriver::PrependRewriteFilter(RewriteFilter* filter) { |
| CHECK(filter != NULL); |
| RegisterRewriteFilter(filter); |
| pre_render_filters_.push_front(filter); |
| } |
| |
| void RewriteDriver::AddResourceUrlClaimant(ResourceUrlClaimant* claimant) { |
| CHECK(claimant != NULL); |
| resource_claimants_.push_back(claimant); |
| } |
| |
| void RewriteDriver::EnableRewriteFilter(const char* id) { |
| RewriteFilter* filter = resource_filter_map_[id]; |
| CHECK(filter != NULL); |
| pre_render_filters_.push_back(filter); |
| } |
| |
| void RewriteDriver::RegisterRewriteFilter(RewriteFilter* filter) { |
| // Track resource_fetches if we care about statistics. Note that |
| // the statistics are owned by the server context, which generally |
| // should be set up prior to the rewrite_driver. |
| // |
| // TODO(sligocki): It'd be nice to get this into the constructor. |
| resource_filter_map_[filter->id()] = filter; |
| filters_to_delete_.push_back(filter); |
| } |
| |
| void RewriteDriver::SetWriter(Writer* writer) { |
| writer_ = writer; |
| if (html_writer_filter_ == NULL) { |
| if (options()->Enabled(RewriteOptions::kCachePartialHtml) && |
| flushed_cached_html_) { |
| html_writer_filter_.reset(new CacheHtmlFilter(this)); |
| } else if (options()->Enabled(RewriteOptions::kFlushSubresources) && |
| flushing_early_) { |
| // If we are flushing early using this RewriteDriver object, we use the |
| // FlushEarlyContentWriterFilter. |
| html_writer_filter_.reset(new FlushEarlyContentWriterFilter(this)); |
| } else if (options()->Enabled(RewriteOptions::kSplitHtml)) { |
| html_writer_filter_.reset(new SplitHtmlFilter(this)); |
| } else if (options()->Enabled(RewriteOptions::kFlushSubresources)) { |
| html_writer_filter_.reset(new SuppressPreheadFilter(this)); |
| } else { |
| html_writer_filter_.reset(new HtmlWriterFilter(this)); |
| } |
| html_writer_filter_->set_case_fold(options()->lowercase_html_names()); |
| if (options()->Enabled(RewriteOptions::kHtmlWriterFilter)) { |
| HtmlParse::AddFilter(html_writer_filter_.get()); |
| } |
| } |
| |
| html_writer_filter_->set_writer(writer); |
| } |
| |
| Statistics* RewriteDriver::statistics() const { |
| return (server_context_ == NULL) ? NULL : server_context_->statistics(); |
| } |
| |
| void RewriteDriver::SetSessionFetcher(UrlAsyncFetcher* f) { |
| url_async_fetcher_ = f; |
| owned_url_async_fetchers_.push_back(f); |
| } |
| |
| CacheUrlAsyncFetcher* RewriteDriver::CreateCustomCacheFetcher( |
| UrlAsyncFetcher* base_fetcher) { |
| return server_context()->CreateCustomCacheFetcher( |
| options(), CacheFragment(), cache_url_async_fetcher_async_op_hooks_.get(), |
| base_fetcher); |
| } |
| |
| CacheUrlAsyncFetcher* RewriteDriver::CreateCacheFetcher() { |
| if (options()->mob_iframe() && !options()->mob_config()) { |
| IframeFetcher* ifetcher = new IframeFetcher( |
| options(), server_context_->user_agent_matcher(), |
| url_async_fetcher_); |
| CacheUrlAsyncFetcher* cache_fetcher = CreateCustomCacheFetcher(ifetcher); |
| cache_fetcher->set_own_fetcher(true); |
| return cache_fetcher; |
| } |
| return CreateCustomCacheFetcher(url_async_fetcher_); |
| } |
| |
| CacheUrlAsyncFetcher* RewriteDriver::CreateCacheOnlyFetcher() { |
| return CreateCustomCacheFetcher(NULL); |
| } |
| |
| bool RewriteDriver::Decode(StringPiece leaf, |
| ResourceNamer* resource_namer) const { |
| return resource_namer->Decode( |
| leaf, server_context()->hasher()->HashSizeInChars(), SignatureLength()); |
| } |
| |
| int RewriteDriver::SignatureLength() const { |
| return options()->url_signing_key().empty() |
| ? 0 |
| : options()->sha1signature()->SignatureSizeInChars(); |
| } |
| |
| bool RewriteDriver::DecodeOutputResourceNameHelper( |
| const GoogleUrl& gurl, |
| const RewriteOptions* options_to_use, |
| const UrlNamer* url_namer, |
| ResourceNamer* namer_out, |
| OutputResourceKind* kind_out, |
| RewriteFilter** filter_out, |
| GoogleString* url_base, |
| StringVector* urls) const { |
| // In forward proxy in preserve-URLs mode we want to fetch .pagespeed. |
| // resource, i.e. do not decode and and do not fetch original (especially |
| // that encoded one will never be cached internally). |
| if (options_to_use != NULL && options_to_use->oblivious_pagespeed_urls()) { |
| return false; |
| } |
| |
| // First, we can't handle anything that's not a valid URL nor is named |
| // properly as our resource. |
| if (!gurl.IsWebValid()) { |
| return false; |
| } |
| |
| StringPiece name = gurl.LeafSansQuery(); |
| if (!Decode(name, namer_out)) { |
| return false; |
| } |
| |
| // URLs without any hash are rejected as well, as they do not produce |
| // OutputResources with a computable URL. (We do accept 'wrong' hashes since |
| // they could come up legitimately under some asynchrony scenarios) |
| if (namer_out->hash().empty()) { |
| return false; |
| } |
| |
| GoogleString decoded_url; |
| // If we are running in proxy mode we need to ignore URLs where the leaf is |
| // encoded but the URL as a whole isn't proxy encoded, since that can happen |
| // when proxying from a server using mod_pagespeed. |
| // |
| // This is also important for XSS avoidance when running in proxy mode with |
| // a relaxed lawyer, as it ensures that resources will only ever go under |
| // the low-privilege proxy domain and not the trusted site domain. |
| // |
| // If we are running in proxy mode and the URL is in the proxy domain, we |
| // also need to ensure that the URL decodes correctly as otherwise we end |
| // up with an invalid decoded base URL, which ultimately leads to inability |
| // to rewrite the URL. |
| if (url_namer->ProxyMode()) { |
| if (!url_namer->IsProxyEncoded(gurl)) { |
| message_handler()->Message(kInfo, |
| "Decoding of resource name %s failed because " |
| "it is not proxy encoded.", |
| gurl.spec_c_str()); |
| return false; |
| } else if (!url_namer->Decode(gurl, options_to_use, NULL, &decoded_url)) { |
| message_handler()->Message(kInfo, |
| "Decoding of resource name %s failed because " |
| " the URL namer cannot decode it.", |
| gurl.spec_c_str()); |
| return false; |
| } |
| GoogleUrl decoded_gurl(decoded_url); |
| if (decoded_gurl.IsWebValid()) { |
| *url_base = (decoded_gurl.AllExceptLeaf()).as_string(); |
| } else { |
| return false; |
| } |
| } else { |
| *url_base = (gurl.AllExceptLeaf()).as_string(); |
| } |
| |
| // Now let's reject as mal-formed if the id string is not |
| // in the rewrite drivers. Also figure out the filter's preferred |
| // resource kind. |
| StringPiece id = namer_out->id(); |
| GoogleString id_str(id.data(), id.size()); |
| *kind_out = kRewrittenResource; |
| StringFilterMap::const_iterator p = resource_filter_map_.find(id_str); |
| if (p != resource_filter_map_.end()) { |
| *filter_out = p->second; |
| if ((*filter_out)->ComputeOnTheFly()) { |
| *kind_out = kOnTheFlyResource; |
| } |
| } else if ((id == CssOutlineFilter::kFilterId) || |
| (id == JsOutlineFilter::kFilterId)) { |
| // OutlineFilter is special because it's not a RewriteFilter -- it's |
| // just an HtmlFilter, but it does encode rewritten resources that |
| // must be served from the cache. |
| // |
| // TODO(jmarantz): figure out a better way to refactor this. |
| // TODO(jmarantz): add a unit-test to show serving outline-filter resources. |
| *kind_out = kOutlinedResource; |
| *filter_out = NULL; |
| } else { |
| message_handler()->Message(kInfo, |
| "Decoding of resource name %s failed because " |
| " there is no filter with id %s.", |
| gurl.spec_c_str(), id_str.c_str()); |
| return false; |
| } |
| |
| // Check if filter-specific decoding works as well. |
| // TODO(morlovich): This is doing some redundant work. |
| if (*filter_out != NULL) { |
| ResourceContext resource_context; |
| if (!(*filter_out)->encoder()->Decode( |
| namer_out->name(), urls, &resource_context, message_handler())) { |
| message_handler()->Message(kInfo, |
| "Decoding of resource name %s failed because " |
| " filter %s cannot decode the URL.", |
| gurl.spec_c_str(), (*filter_out)->Name()); |
| return false; |
| } |
| } |
| |
| // Check if the id string's filter is forbidden and reject the URL if so. |
| if (options_to_use->Forbidden(id_str)) { |
| message_handler()->Message(kInfo, |
| "Decoding of resource name %s failed because " |
| " filter_id %s is forbidden.", |
| gurl.spec_c_str(), id_str.c_str()); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool RewriteDriver::DecodeOutputResourceName( |
| const GoogleUrl& gurl, |
| const RewriteOptions* options_to_use, |
| const UrlNamer* url_namer, |
| ResourceNamer* namer_out, |
| OutputResourceKind* kind_out, |
| RewriteFilter** filter_out) const { |
| StringVector urls; |
| GoogleString url_base; |
| return DecodeOutputResourceNameHelper( |
| gurl, options_to_use, url_namer, namer_out, kind_out, |
| filter_out, &url_base, &urls); |
| } |
| |
| bool RewriteDriver::DecodeUrl(const GoogleUrl& url, |
| StringVector* decoded_urls) const { |
| return DecodeUrlGivenOptions(url, options(), |
| server_context()->url_namer(), decoded_urls); |
| } |
| |
| bool RewriteDriver::DecodeUrlGivenOptions( |
| const GoogleUrl& url, |
| const RewriteOptions* options, |
| const UrlNamer* url_namer, |
| StringVector* decoded_urls) const { |
| ResourceNamer namer; |
| OutputResourceKind kind; |
| RewriteFilter* filter = NULL; |
| GoogleString url_base; |
| bool is_decoded = DecodeOutputResourceNameHelper( |
| url, options, url_namer, &namer, &kind, &filter, &url_base, decoded_urls); |
| if (is_decoded) { |
| GoogleUrl gurl_base(url_base); |
| for (int i = 0, n = decoded_urls->size(); i < n; ++i) { |
| GoogleUrl full_url(gurl_base, (*decoded_urls)[i]); |
| (*decoded_urls)[i] = full_url.Spec().as_string(); |
| } |
| } |
| return is_decoded; |
| } |
| |
| OutputResourcePtr RewriteDriver::DecodeOutputResource( |
| const GoogleUrl& gurl, |
| RewriteFilter** filter) const { |
| ResourceNamer namer; |
| OutputResourceKind kind; |
| if (!DecodeOutputResourceName(gurl, options(), server_context()->url_namer(), |
| &namer, &kind, filter)) { |
| return OutputResourcePtr(); |
| } |
| |
| StringPiece base = gurl.AllExceptLeaf(); |
| OutputResourcePtr output_resource( |
| new OutputResource(this, base, base, base, namer, kind)); |
| if (!output_resource.get()->CheckSignature()) { |
| output_resource.clear(); |
| } |
| return output_resource; |
| } |
| |
| namespace { |
| |
| class FilterFetch : public SharedAsyncFetch { |
| public: |
| FilterFetch(RewriteDriver* driver, AsyncFetch* async_fetch) |
| : SharedAsyncFetch(async_fetch), |
| driver_(driver) { |
| } |
| virtual ~FilterFetch() {} |
| |
| static bool Start(RewriteFilter* filter, |
| const OutputResourcePtr& output_resource, |
| AsyncFetch* async_fetch, |
| MessageHandler* handler) { |
| RewriteDriver* driver = filter->driver(); |
| FilterFetch* filter_fetch = new FilterFetch(driver, async_fetch); |
| |
| bool queued = false; |
| RewriteContext* context = filter->MakeRewriteContext(); |
| DCHECK(context != NULL); |
| if (context != NULL) { |
| queued = context->Fetch(output_resource, filter_fetch, handler); |
| } |
| if (!queued) { |
| RewriteStats* stats = driver->server_context()->rewrite_stats(); |
| stats->failed_filter_resource_fetches()->Add(1); |
| async_fetch->Done(false); |
| driver->FetchComplete(); |
| delete filter_fetch; |
| } |
| return queued; |
| } |
| |
| protected: |
| virtual void HandleDone(bool success) { |
| RewriteStats* stats = driver_->server_context()->rewrite_stats(); |
| if (success) { |
| stats->succeeded_filter_resource_fetches()->Add(1); |
| } else { |
| stats->failed_filter_resource_fetches()->Add(1); |
| } |
| SharedAsyncFetch::HandleDone(success); |
| driver_->FetchComplete(); |
| delete this; |
| } |
| |
| private: |
| RewriteDriver* driver_; |
| }; |
| |
| class CacheCallback : public OptionsAwareHTTPCacheCallback { |
| public: |
| CacheCallback(RewriteDriver* driver, |
| RewriteFilter* filter, |
| const OutputResourcePtr& output_resource, |
| AsyncFetch* async_fetch, |
| MessageHandler* handler) |
| : OptionsAwareHTTPCacheCallback(driver->options(), |
| async_fetch->request_context()), |
| driver_(driver), |
| filter_(filter), |
| output_resource_(output_resource), |
| async_fetch_(async_fetch), |
| handler_(handler) { |
| // Canonicalize the URL before looking it up. Applies |
| // rewrite-domain mappings, and reverses any sharding. E.g. |
| // if you have |
| // ModPagespeedMapRewriteDomain master alias |
| // ModPagespeedShardDomain master shard1,shard2 |
| // then this will convert: |
| // http://alias/foo --> http://master/foo |
| // http://shard1/foo --> http://master/foo |
| // http://shard2/foo --> http://master/foo |
| // http://master/foo --> http://master/foo |
| canonical_url_ = output_resource_->HttpCacheKey(); |
| } |
| |
| virtual ~CacheCallback() {} |
| |
| void Find() { |
| ServerContext* server_context = driver_->server_context(); |
| HTTPCache* http_cache = server_context->http_cache(); |
| http_cache->Find(canonical_url_, driver_->CacheFragment(), handler_, this); |
| } |
| |
| bool IsCacheValid(const GoogleString& key, const ResponseHeaders& headers) { |
| // If the user cares, don't try to send a rewritten .pagespeed. webp |
| // resources to a browser that can't handle it. |
| if (!driver_->options()->serve_rewritten_webp_urls_to_any_agent() && |
| (headers.DetermineContentType() == &kContentTypeWebp) && |
| !async_fetch_->request_context()->accepts_webp()) { |
| return false; |
| } |
| return OptionsAwareHTTPCacheCallback::IsCacheValid(key, headers); |
| } |
| |
| virtual void Done(HTTPCache::FindResult find_result) { |
| StringPiece content; |
| ResponseHeaders* response_headers = async_fetch_->response_headers(); |
| if (find_result.status == HTTPCache::kFound) { |
| RewriteStats* stats = driver_->server_context()->rewrite_stats(); |
| stats->cached_resource_fetches()->Add(1); |
| |
| HTTPValue* value = http_value(); |
| bool success = (value->ExtractContents(&content) && |
| value->ExtractHeaders(response_headers, handler_)); |
| if (success) { |
| output_resource_->Link(value, handler_); |
| output_resource_->SetWritten(true); |
| async_fetch_->set_content_length(content.size()); |
| async_fetch_->FixCacheControlForGoogleCache(); |
| async_fetch_->HeadersComplete(); |
| success = async_fetch_->Write(content, handler_); |
| } |
| async_fetch_->Done(success); |
| driver_->FetchComplete(); |
| delete this; |
| } else { |
| if (output_resource_->IsWritten()) { |
| // OutputResources can also be loaded while not in cache if |
| // FetchOutputResource() somehow got called on an already written |
| // resource object (while the cache somehow decided not to store it). |
| content = output_resource_->ExtractUncompressedContents(); |
| response_headers->CopyFrom(*output_resource_->response_headers()); |
| ServerContext* server_context = driver_->server_context(); |
| HTTPCache* http_cache = server_context->http_cache(); |
| http_cache->Put(canonical_url_, driver_->CacheFragment(), |
| RequestHeaders::Properties(), |
| (ResponseHeaders::GetVaryOption( |
| driver_->options()->respect_vary())), |
| response_headers, content, handler_); |
| async_fetch_->Done(async_fetch_->Write(content, handler_)); |
| driver_->FetchComplete(); |
| } else { |
| // Use the filter to reconstruct. |
| if (filter_ != NULL) { |
| FilterFetch::Start(filter_, output_resource_, async_fetch_, handler_); |
| } else { |
| response_headers->SetStatusAndReason(HttpStatus::kNotFound); |
| async_fetch_->Done(false); |
| driver_->FetchComplete(); |
| } |
| } |
| delete this; |
| } |
| } |
| |
| private: |
| RewriteDriver* driver_; |
| RewriteFilter* filter_; |
| OutputResourcePtr output_resource_; |
| AsyncFetch* async_fetch_; |
| MessageHandler* handler_; |
| GoogleString canonical_url_; |
| }; |
| |
| // A fetch that writes back to the base fetch, takes care of a few stats, |
| // and can recover from an early (before HeadersComplete) fetcher error by |
| // ignoring subsequent writes and calling FetchResources() on the driver once |
| // Done is called. |
| class DistributedFetchResourceFetch : public SharedAsyncFetch { |
| public: |
| // Increments the driver's async_events_count to ensure that it survives |
| // as long as the fetch does. |
| explicit DistributedFetchResourceFetch(AsyncFetch* base_fetch, |
| RewriteDriver* driver) |
| : SharedAsyncFetch(base_fetch), |
| driver_(driver), |
| early_failure_(false), |
| driver_fetch_(base_fetch), |
| url_(driver->fetch_url().as_string()) { |
| RequestHeaders* new_req_headers = new RequestHeaders(); |
| new_req_headers->CopyFrom(*driver_->request_headers()); |
| SetRequestHeadersTakingOwnership(new_req_headers); |
| } |
| |
| virtual ~DistributedFetchResourceFetch() {} |
| |
| // Subclasses should override HandleHeadersComplete and set early_failure_ |
| // to true if a recoverable failure is detected. |
| virtual void HandleHeadersComplete() { |
| if (response_headers()->status_code() >= |
| HttpStatus::kProxyPublisherFailure) { |
| // Was it an instaweb failure? If so, we'll make note of that and try |
| // again locally. |
| early_failure_ = true; |
| } else { |
| SharedAsyncFetch::HandleHeadersComplete(); |
| } |
| } |
| |
| virtual void HandleDone(bool success) { |
| // Bump the stats. |
| if (success) { |
| driver_->statistics() |
| ->GetVariable(RewriteContext::kNumDistributedRewriteSuccesses) |
| ->Add(1); |
| } else { |
| driver_->statistics() |
| ->GetVariable(RewriteContext::kNumDistributedRewriteFailures)->Add(1); |
| } |
| |
| if (early_failure_) { |
| // Perhaps an RPC error? We can recover from this state since we haven't |
| // written anything to the base fetch yet. Tell the driver to try again |
| // but this time don't distribute the request because |
| // tried_to_distribute_fetch_ is true. |
| driver_->FetchResource(url_, driver_fetch_); |
| } else { |
| SharedAsyncFetch::HandleDone(success); |
| } |
| driver_->DecrementAsyncEventsCount(); |
| delete this; |
| } |
| |
| virtual bool HandleWrite(const StringPiece& content, |
| MessageHandler* handler) { |
| if (early_failure_) { |
| return true; |
| } else { |
| return SharedAsyncFetch::HandleWrite(content, handler); |
| } |
| } |
| |
| void DispatchFetch() { |
| StringPiece distributed_key = driver_->options()->distributed_rewrite_key(); |
| request_headers()->Add(HttpAttributes::kXPsaDistributedRewriteFetch, |
| distributed_key); |
| // Nested driver fetches are not supposed to use deadlines, so block the |
| // distributed rewrite. |
| if (driver_->is_nested()) { |
| request_headers()->Add(HttpAttributes::kXPsaDistributedRewriteBlock, |
| distributed_key); |
| } |
| |
| RewriteOptionsManager* rewrite_options_manager = |
| driver_->server_context()->rewrite_options_manager(); |
| GoogleString url = driver_->fetch_url().as_string(); |
| driver_->IncrementAsyncEventsCount(); |
| rewrite_options_manager->PrepareRequest( |
| driver_->options(), driver_->request_context(), &url, request_headers(), |
| NewCallback(this, &DistributedFetchResourceFetch::StartFetch)); |
| } |
| |
| void StartFetch(bool success) { |
| if (success) { |
| driver_->distributed_fetcher()->Fetch(driver_->fetch_url().as_string(), |
| driver_->message_handler(), this); |
| } else { |
| // We failed. Try fetching again, but this time we won't distribute |
| // because tried_to_distribute_fetch_ is true. |
| driver_->FetchResource(driver_->fetch_url(), driver_fetch_); |
| driver_->DecrementAsyncEventsCount(); |
| delete this; |
| } |
| } |
| |
| private: |
| // This class increments the asynchronous event count on the RewriteDriver to |
| // ensure that it stays alive as long as the fetch does. |
| RewriteDriver* driver_; |
| bool early_failure_; |
| AsyncFetch* driver_fetch_; // This is owned externally. |
| GoogleString url_; |
| DISALLOW_COPY_AND_ASSIGN(DistributedFetchResourceFetch); |
| }; |
| |
| } // namespace |
| |
| bool RewriteDriver::ShouldDistributeFetch(const StringPiece& filter_id) { |
| // TODO(jkarlin): There is also a RewriteContext::ShouldDistributeFetch |
| // intended for the HTML-path but not the fetch paths. Consolidate the code if |
| // reasonable. |
| if (distributed_fetcher() == NULL || |
| !options()->Distributable(filter_id) || |
| !options()->distribute_fetches() || |
| tried_to_distribute_fetch_ || |
| options()->distributed_rewrite_key().empty() || |
| options()->distributed_rewrite_servers().empty()) { |
| return false; |
| } |
| |
| // Don't redistribute an already distributed rewrite. Note: We don't verify |
| // the distributed rewrite key because we want to be conservative about loop |
| // detection. |
| DCHECK(request_headers() != NULL); |
| if (request_headers() != NULL) { |
| if (request_headers()->Has(HttpAttributes::kXPsaDistributedRewriteFetch) || |
| request_headers()->Has( |
| HttpAttributes::kXPsaDistributedRewriteForHtml)) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool RewriteDriver::DistributeFetch(const StringPiece& url, |
| const StringPiece& filter_id, |
| AsyncFetch* async_fetch) { |
| if (!ShouldDistributeFetch(filter_id)) { |
| return false; |
| } |
| DistributedFetchResourceFetch* dist_fetch = |
| new DistributedFetchResourceFetch(async_fetch, this); |
| tried_to_distribute_fetch_ = true; |
| // The following line might delete 'this' and clean up the RewriteDriver if it |
| // finishes fast enough so don't touch those things afterwards. |
| dist_fetch->DispatchFetch(); |
| return true; |
| } |
| |
| bool RewriteDriver::FetchResource(const StringPiece& url, |
| AsyncFetch* async_fetch) { |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing)); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchBackground)); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefParsing)); |
| bool handled = false; |
| |
| fetch_url_ = url.as_string(); |
| |
| // Set the request headers if they haven't been yet. |
| if (request_headers_ == NULL && async_fetch->request_headers() != NULL) { |
| SetRequestHeaders(*async_fetch->request_headers()); |
| } |
| |
| // Note that this does permission checking and parsing of the url, but doesn't |
| // actually fetch any data until we specifically ask it to. |
| RewriteFilter* filter = NULL; |
| GoogleUrl gurl(url); |
| OutputResourcePtr output_resource(DecodeOutputResource(gurl, &filter)); |
| |
| if (output_resource.get() != NULL) { |
| handled = true; |
| if (filter != NULL) { |
| // TODO(marq): This is a gross generalization. Remove this and properly |
| // log the application of each rewrite filter. |
| filter->LogFilterModifiedContent(); |
| } |
| FetchOutputResource(output_resource, filter, async_fetch); |
| } else if (options()->in_place_rewriting_enabled()) { |
| // TODO(jcrowell): Make URLs with signatures take this path so they will 403 |
| // instead of 404. |
| // This is an ajax resource. |
| handled = true; |
| // TODO(sligocki): Get rid of this fallback and make all callers call |
| // FetchInPlaceResource when that is what they want. |
| FetchInPlaceResource(gurl, true /* proxy_mode */, async_fetch); |
| } |
| |
| // Note: "this" may have been deleted by this point. It is not safe to |
| // reference data members. |
| |
| return handled; |
| } |
| |
| void RewriteDriver::FetchInPlaceResource(const GoogleUrl& gurl, |
| bool proxy_mode, |
| AsyncFetch* async_fetch) { |
| CHECK(gurl.IsWebValid()) << "Invalid URL " << gurl.spec_c_str(); |
| CHECK(request_headers_.get() != NULL); |
| gurl.Spec().CopyToString(&fetch_url_); |
| StringPiece base = gurl.AllExceptLeaf(); |
| ResourceNamer namer; |
| OutputResourcePtr output_resource( |
| new OutputResource(this, base, base, base, namer, kRewrittenResource)); |
| SetBaseUrlForFetch(gurl.Spec()); |
| // Set the request headers if they haven't been yet. |
| if (request_headers_ == NULL && async_fetch->request_headers() != NULL) { |
| SetRequestHeaders(*async_fetch->request_headers()); |
| } |
| |
| if (DistributeFetch(fetch_url_, RewriteOptions::kInPlaceRewriteId, |
| async_fetch)) { |
| return; |
| } |
| |
| ref_counts_.AddRef(kRefFetchUserFacing); |
| InPlaceRewriteContext* context = new InPlaceRewriteContext(this, gurl.Spec()); |
| context->set_proxy_mode(proxy_mode); |
| |
| // Save pointer to stats_logger before "this" is deleted. |
| StatisticsLogger* stats_logger = |
| server_context_->statistics()->console_logger(); |
| |
| if (!context->Fetch(output_resource, async_fetch, message_handler())) { |
| // RewriteContext::Fetch can fail if the input URLs are undecodeable |
| // or unfetchable. There is no decoding in this case, but unfetchability |
| // is possible if we're given an https URL but have a fetcher that |
| // can't do it. In that case, the only thing we can do is fail |
| // and cleanup. |
| async_fetch->Done(false); |
| FetchComplete(); |
| } |
| |
| // Note: "this" may have been deleted by this point. It is not safe to |
| // reference data members. |
| |
| // Update statistics log. |
| if (stats_logger != NULL) { |
| stats_logger->UpdateAndDumpIfRequired(); |
| } |
| } |
| |
| bool RewriteDriver::FetchOutputResource( |
| const OutputResourcePtr& output_resource, |
| RewriteFilter* filter, |
| AsyncFetch* async_fetch) { |
| |
| if (DistributeFetch(output_resource->url(), output_resource->filter_prefix(), |
| async_fetch)) { |
| // TODO(jkarlin): This doesn't fill in the output_resource with the result |
| // of the fetch. Right now I believe the only thing expecting data to be in |
| // the output_resource is a nested_driver fetch in |
| // RewriteContext::FetchInputs (which calls FetchResource) but it currently |
| // copies from the fetch into the OutputResource anyway so nothing is broken |
| // yet. One option is to change the first parameter of FetchOutputResource |
| // to a URL instead of an OutputResourcePtr. |
| return true; |
| } |
| |
| // None of our resources ever change -- the hash of the content is embedded |
| // in the filename. This is why we serve them with very long cache |
| // lifetimes. However, when the user presses Reload, the browser may |
| // attempt to validate that the cached copy is still fresh by sending a GET |
| // with an If-Modified-Since header. If this header is present, we should |
| // return a 304 Not Modified, since any representation of the resource |
| // that's in the browser's cache must be correct. |
| bool queued = false; |
| ConstStringStarVector values; |
| // Save pointer to stats_logger before "this" is deleted. |
| StatisticsLogger* stats_logger = |
| server_context_->statistics()->console_logger(); |
| if (async_fetch->request_headers()->Lookup(HttpAttributes::kIfModifiedSince, |
| &values)) { |
| async_fetch->response_headers()->SetStatusAndReason( |
| HttpStatus::kNotModified); |
| async_fetch->HeadersComplete(); |
| async_fetch->Done(true); |
| queued = false; |
| } else { |
| SetBaseUrlForFetch(output_resource->url()); |
| ref_counts_.AddRef(kRefFetchUserFacing); |
| if (output_resource->kind() == kOnTheFlyResource || |
| MetadataRequested(*async_fetch->request_headers())) { |
| // Don't bother to look up the resource in the cache: ask the filter. If |
| // metadata is requested we need to skip the initial http cache lookup |
| // because we can't return until we've done a metadata lookup first. |
| if (filter != NULL) { |
| queued = FilterFetch::Start(filter, output_resource, async_fetch, |
| message_handler()); |
| } |
| } else { |
| CacheCallback* cache_callback = new CacheCallback( |
| this, filter, output_resource, async_fetch, message_handler()); |
| cache_callback->Find(); |
| queued = true; |
| } |
| } |
| |
| // Update statistics log. |
| if (stats_logger != NULL) { |
| stats_logger->UpdateAndDumpIfRequired(); |
| } |
| |
| return queued; |
| } |
| |
| void RewriteDriver::FetchComplete() { |
| DropReference(kRefFetchUserFacing); |
| } |
| |
| void RewriteDriver::DetachFetch() { |
| ScopedMutex lock(rewrite_mutex()); |
| CHECK_EQ(1, ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing)); |
| CHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchBackground)); |
| ref_counts_.AddRefMutexHeld(kRefFetchBackground); |
| } |
| |
| void RewriteDriver::DetachedFetchComplete() { |
| DropReference(kRefFetchBackground); |
| } |
| |
| bool RewriteDriver::MayRewriteUrl( |
| const GoogleUrl& domain_url, |
| const GoogleUrl& input_url, |
| InlineAuthorizationPolicy inline_authorization_policy, |
| IntendedFor intended_for, |
| bool* is_authorized_domain) const { |
| *is_authorized_domain = false; |
| if (domain_url.IsWebValid()) { |
| if (options()->IsAllowed(input_url.Spec()) || |
| (intended_for == kIntendedForInlining && |
| options()->IsAllowedWhenInlining(input_url.Spec()))) { |
| *is_authorized_domain = options()->domain_lawyer()->IsDomainAuthorized( |
| domain_url, input_url); |
| if (!*is_authorized_domain && |
| inline_authorization_policy == kInlineUnauthorizedResources) { |
| // We decide that this URL can be rewritten (true) but |
| // is_authorized_domain will be retained as false to allow creation of |
| // the Resource object in the correct cache key space. |
| return true; |
| } |
| } |
| } |
| return *is_authorized_domain; |
| } |
| |
| bool RewriteDriver::MatchesBaseUrl(const GoogleUrl& input_url) const { |
| return (decoded_base_url_.IsWebValid() && |
| options()->IsAllowed(input_url.Spec()) && |
| decoded_base_url_.Origin() == input_url.Origin()); |
| } |
| |
| ResourcePtr RewriteDriver::CreateInputResource(const GoogleUrl& input_url, |
| bool* is_authorized) { |
| return CreateInputResource( |
| input_url, kInlineOnlyAuthorizedResources, kIntendedForGeneral, |
| is_authorized); |
| } |
| |
| ResourcePtr RewriteDriver::CreateInputResource( |
| const GoogleUrl& input_url, |
| InlineAuthorizationPolicy inline_authorization_policy, |
| IntendedFor intended_for, |
| bool* is_authorized) { |
| *is_authorized = true; // Must be false iff we fail b/c of authorization. |
| ResourcePtr resource; |
| bool may_rewrite = false; |
| if (input_url.SchemeIs("data")) { |
| // Skip and silently ignore; don't log a failure. |
| // For the moment we assume data: urls are small enough to not be worth |
| // optimizing. We have optimized them in the past, but that code is likely |
| // to have bit-rotted since it was disabled. |
| return resource; |
| } else if (decoded_base_url_.IsAnyValid()) { |
| may_rewrite = MayRewriteUrl(decoded_base_url_, input_url, |
| inline_authorization_policy, |
| intended_for, |
| is_authorized); |
| // In the case where we are proxying and we have resources that have been |
| // rewritten multiple times, input_url will still have the encoded domain, |
| // and we can rewrite that, so test again but against the encoded base url. |
| if (!may_rewrite) { |
| UrlNamer* namer = server_context()->url_namer(); |
| GoogleString decoded_input; |
| if (namer->Decode(input_url, options(), NULL, &decoded_input)) { |
| GoogleUrl decoded_url(decoded_input); |
| may_rewrite = MayRewriteUrl(decoded_base_url_, decoded_url, |
| inline_authorization_policy, |
| intended_for, |
| is_authorized); |
| } |
| } |
| } else { |
| // Shouldn't happen? |
| message_handler()->Message( |
| kFatal, "invalid decoded_base_url_ for '%s'", input_url.spec_c_str()); |
| LOG(DFATAL); |
| } |
| RewriteStats* stats = server_context_->rewrite_stats(); |
| if (may_rewrite) { |
| // *is_authorized may be true or false (if inlining an unauth'd URL). |
| resource = CreateInputResourceUnchecked(input_url, *is_authorized); |
| stats->resource_url_domain_acceptances()->Add(1); |
| } else { |
| DCHECK(!*is_authorized); |
| message_handler()->Message(kInfo, "No permission to rewrite '%s'", |
| input_url.spec_c_str()); |
| stats->resource_url_domain_rejections()->Add(1); |
| } |
| return resource; |
| } |
| |
| ResourcePtr RewriteDriver::CreateInputResourceAbsoluteUncheckedForTestsOnly( |
| const StringPiece& absolute_url) { |
| GoogleUrl url(absolute_url); |
| if (!url.IsWebOrDataValid()) { |
| // Note: Bad user-content can leave us here. But it's really hard |
| // to concatenate a valid protocol and domain onto an arbitrary string |
| // and end up with an invalid GURL. |
| message_handler()->Message(kInfo, "Invalid resource url '%s'", |
| url.spec_c_str()); |
| return ResourcePtr(); |
| } |
| return CreateInputResourceUnchecked(url, true); |
| } |
| |
| ResourcePtr RewriteDriver::CreateInputResourceUnchecked( |
| const GoogleUrl& url, |
| bool is_authorized_domain) { |
| StringPiece url_string = url.Spec(); |
| ResourcePtr resource; |
| |
| if (IsResourceUrlClaimed(url)) { |
| return resource; |
| } |
| |
| if (url.SchemeIs("data")) { |
| resource = DataUrlInputResource::Make(url_string, this); |
| if (resource.get() == NULL) { |
| // Note: Bad user-content can leave us here. |
| message_handler()->Message(kWarning, "Badly formatted data url '%s'", |
| url.spec_c_str()); |
| } |
| } else if (url.SchemeIs("http") || url.SchemeIs("https")) { |
| // Note: type may be NULL if url has an unexpected or malformed extension. |
| const ContentType* type = NameExtensionToContentType(url.LeafSansQuery()); |
| GoogleString filename; |
| if (options()->file_load_policy()->ShouldLoadFromFile(url, &filename)) { |
| resource.reset( |
| new FileInputResource(this, type, url_string, filename)); |
| } else { |
| // If the scheme is https and the fetcher doesn't support https, map |
| // the URL to what will ultimately be fetched to see if that will be |
| // http, in which case the fetcher will be able to handle it. |
| GoogleString mapped_url; |
| GoogleString host_header; |
| bool is_proxy = false; |
| options()->domain_lawyer()->MapOriginUrl(url, &mapped_url, |
| &host_header, &is_proxy); |
| GoogleUrl mapped_gurl(mapped_url); |
| if (mapped_gurl.SchemeIs("http") || |
| (mapped_gurl.SchemeIs("https") && |
| url_async_fetcher_->SupportsHttps())) { |
| resource.reset(new UrlInputResource(this, type, url_string, |
| is_authorized_domain)); |
| } else { |
| message_handler()->Message( |
| kInfo, "Cannot fetch url '%s': as %s is not supported", |
| url.spec_c_str(), mapped_gurl.Scheme().as_string().c_str()); |
| } |
| } |
| } else { |
| // Note: Valid user-content can leave us here. |
| // Specifically, any URLs with scheme other than data: or http: or https:. |
| // TODO(sligocki): Is this true? Or will such URLs not make it this far? |
| message_handler()->Message(kWarning, "Unsupported scheme '%s' for url '%s'", |
| url.Scheme().as_string().c_str(), |
| url.spec_c_str()); |
| } |
| return resource; |
| } |
| |
| bool RewriteDriver::IsResourceUrlClaimed(const GoogleUrl& url) const { |
| for (int i = 0, n = resource_claimants_.size(); i < n; ++i) { |
| bool claims = false; |
| resource_claimants_[i]->Run(url, &claims); |
| if (claims) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| bool RewriteDriver::StartParseId(const StringPiece& url, const StringPiece& id, |
| const ContentType& content_type) { |
| if (response_headers_ != NULL) { |
| status_code_ = response_headers_->status_code(); |
| } |
| start_time_ms_ = server_context_->timer()->NowMs(); |
| set_log_rewrite_timing(options()->log_rewrite_timing()); |
| |
| if (debug_filter_ != NULL) { |
| debug_filter_->InitParse(); |
| } |
| |
| bool ret = HtmlParse::StartParseId(url, id, content_type); |
| if (ret) { |
| ScopedMutex lock(rewrite_mutex()); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefParsing)); |
| ref_counts_.AddRefMutexHeld(kRefParsing); |
| } |
| |
| if (ret) { |
| base_was_set_ = false; |
| if (is_url_valid()) { |
| base_url_.Reset(google_url()); |
| SetDecodedUrlFromBase(); |
| } |
| } |
| |
| can_rewrite_resources_ = server_context_->metadata_cache()->IsHealthy(); |
| return ret; |
| } |
| |
| void RewriteDriver::ParseTextInternal(const char* content, int size) { |
| num_bytes_in_ += size; |
| if (ShouldSkipParsing()) { |
| writer()->Write(content, message_handler()); |
| } else if (debug_filter_ != NULL) { |
| debug_filter_->StartParse(); |
| HtmlParse::ParseTextInternal(content, size); |
| debug_filter_->EndParse(); |
| } else { |
| HtmlParse::ParseTextInternal(content, size); |
| } |
| } |
| |
| void RewriteDriver::SetDecodedUrlFromBase() { |
| UrlNamer* namer = server_context()->url_namer(); |
| GoogleString decoded_base; |
| if (namer->Decode(base_url_, options(), NULL, &decoded_base)) { |
| decoded_base_url_.Reset(decoded_base); |
| } else { |
| decoded_base_url_.Reset(base_url_); |
| } |
| DCHECK(decoded_base_url_.IsAnyValid()); |
| } |
| |
| bool RewriteDriver::ShouldSkipParsing() { |
| if (should_skip_parsing_ == kNotSet) { |
| bool should_skip = false; |
| PropertyPage* page = property_page(); |
| if (page != NULL) { |
| PropertyCache* pcache = server_context_->page_property_cache(); |
| const PropertyCache::Cohort* dom_cohort = pcache->GetCohort(kDomCohort); |
| if (dom_cohort != NULL) { |
| PropertyValue* property_value = property_page()->GetProperty( |
| dom_cohort, kParseSizeLimitExceeded); |
| should_skip = property_value->has_value() && |
| StringCaseEqual(property_value->value(), "1"); |
| } |
| } |
| should_skip_parsing_ = should_skip ? kTrue : kFalse; |
| } |
| return (should_skip_parsing_ == kTrue); |
| } |
| |
| bool RewriteDriver::PrepareShouldSignal() { |
| // Basically, we just save IsDone() from before state changes. |
| return IsDone(waiting_, waiting_deadline_reached_); |
| } |
| |
| void RewriteDriver::SignalIfRequired(bool result_of_prepare_should_signal) { |
| // If we were already done before, or no one is waiting, no need to signal |
| if (result_of_prepare_should_signal || waiting_ == kNoWait) { |
| return; |
| } |
| |
| if (IsDone(waiting_, waiting_deadline_reached_)) { |
| // If someone is waiting, refcount shouldn't be 0! |
| DCHECK(!release_driver_); |
| scheduler_->Signal(); |
| } |
| } |
| |
| void RewriteDriver::RewriteComplete(RewriteContext* rewrite_context, |
| bool permit_render) { |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing)); |
| bool signal_cookie = PrepareShouldSignal(); |
| bool attached = false; |
| |
| // Rewrite transitions either pending -> deleting or detached -> deleting |
| ref_counts_.AddRefMutexHeld(kRefDeletingRewrites); |
| RewriteContextSet::iterator p = initiated_rewrites_.find(rewrite_context); |
| if (p != initiated_rewrites_.end()) { |
| if (rewrite_context->is_metadata_cache_miss()) { |
| // If the rewrite completed within the deadline and it actually involved |
| // and fetch rewrite (not a metadata hit or successful revalidate) then |
| // bump up the corresponding counter in log record. |
| ScopedMutex lock(log_record()->mutex()); |
| MetadataCacheInfo* metadata_log_info = |
| log_record()->logging_info()->mutable_metadata_cache_info(); |
| metadata_log_info->set_num_successful_rewrites_on_miss( |
| metadata_log_info->num_successful_rewrites_on_miss() + 1); |
| } |
| initiated_rewrites_.erase(p); |
| attached = true; |
| |
| ref_counts_.ReleaseRefMutexHeld(kRefPendingRewrites); |
| if (!rewrite_context->slow()) { |
| --possibly_quick_rewrites_; |
| } |
| } else { |
| int erased = detached_rewrites_.erase(rewrite_context); |
| CHECK_EQ(1, erased) << " rewrite_context " << rewrite_context |
| << " not in either detached_rewrites or " |
| << "initiated_rewrites_"; |
| ref_counts_.ReleaseRefMutexHeld(kRefDetachedRewrites); |
| } |
| // release_driver_ should be false since we moved a count between |
| // categories, and didn't change the total. |
| DCHECK(!release_driver_) << ref_counts_.DebugStringMutexHeld(); |
| rewrite_context->Propagate(attached && permit_render); |
| SignalIfRequired(signal_cookie); |
| } |
| } |
| |
| void RewriteDriver::ReportSlowRewrites(int num) { |
| ScopedMutex lock(rewrite_mutex()); |
| bool signal_cookie = PrepareShouldSignal(); |
| possibly_quick_rewrites_ -= num; |
| CHECK_LE(0, possibly_quick_rewrites_) << base_url_.Spec(); |
| SignalIfRequired(signal_cookie); |
| } |
| |
| void RewriteDriver::DeleteRewriteContext(RewriteContext* rewrite_context) { |
| delete rewrite_context; |
| DropReference(kRefDeletingRewrites); |
| } |
| |
| |
| void RewriteDriver::PossiblyPurgeCachedResponseAndReleaseDriver() { |
| DCHECK(!externally_managed_); |
| // We might temporarily (due to purging) revive the object here, so |
| // better clear the "we were told it's dead!" bit. |
| release_driver_ = false; |
| if (downstream_cache_purger_.MaybeIssuePurge(google_url())) { |
| return; |
| } |
| server_context_->ReleaseRewriteDriver(this); |
| } |
| |
| RewriteContext* RewriteDriver::RegisterForPartitionKey( |
| const GoogleString& partition_key, RewriteContext* candidate) { |
| std::pair<PrimaryRewriteContextMap::iterator, bool> insert_result = |
| primary_rewrite_context_map_.insert( |
| std::make_pair(partition_key, candidate)); |
| if (insert_result.second) { |
| // Our value is new, so just return NULL. |
| return NULL; |
| } else { |
| // Insert failed, return the old value. |
| return insert_result.first->second; |
| } |
| } |
| |
| void RewriteDriver::DeregisterForPartitionKey(const GoogleString& partition_key, |
| RewriteContext* rewrite_context) { |
| // If the context being deleted is the primary for some cache key, |
| // deregister it. |
| PrimaryRewriteContextMap::iterator i = |
| primary_rewrite_context_map_.find(partition_key); |
| if ((i != primary_rewrite_context_map_.end()) && |
| (i->second == rewrite_context)) { |
| primary_rewrite_context_map_.erase(i); |
| } |
| } |
| |
| void RewriteDriver::WriteDomCohortIntoPropertyCache() { |
| // Only update the property cache if there is a filter or option enabled that |
| // actually makes use of it. |
| if (!(write_property_cache_dom_cohort_ || |
| options()->max_html_parse_bytes() > 0)) { |
| return; |
| } |
| |
| PropertyPage* page = property_page(); |
| // Dont update property cache value if we are flushing early. |
| // TODO(jud): Is this the best place to check for shutting down? It might |
| // make more sense for this check to be done at the property cache or |
| // lower level. |
| if (server_context_->shutting_down() || |
| page == NULL || |
| !owns_property_page_) { |
| return; |
| } |
| // Update the timestamp of the last request in both actual property page |
| // and property page with fallback values. |
| UpdatePropertyValueInDomCohort( |
| fallback_property_page(), |
| kLastRequestTimestamp, |
| Integer64ToString(server_context()->timer()->NowMs())); |
| // Update the status code of the last request. |
| if (status_code_ != HttpStatus::kUnknownStatusCode) { |
| UpdatePropertyValueInDomCohort( |
| fallback_property_page(), |
| kStatusCodePropertyName, IntegerToString(status_code_)); |
| } |
| if (options()->max_html_parse_bytes() > 0) { |
| // Update whether the page exceeded the html parse size limit. |
| UpdatePropertyValueInDomCohort( |
| page, kParseSizeLimitExceeded, |
| num_bytes_in_ > options()->max_html_parse_bytes() ? "1" : "0"); |
| } |
| if (flush_early_info_.get() != NULL) { |
| GoogleString value; |
| flush_early_info_->SerializeToString(&value); |
| UpdatePropertyValueInDomCohort( |
| fallback_property_page(), kSubresourcesPropertyName, value); |
| } |
| // Write dom cohort for both actual property page and property page with |
| // fallback values. |
| fallback_property_page()->WriteCohort(server_context()->dom_cohort()); |
| } |
| |
| void RewriteDriver::UpdatePropertyValueInDomCohort( |
| AbstractPropertyPage* page, |
| StringPiece property_name, |
| StringPiece property_value) { |
| if (page == NULL || !owns_property_page_) { |
| return; |
| } |
| page->UpdateValue( |
| server_context()->dom_cohort(), property_name, property_value); |
| } |
| |
| void RewriteDriver::Cleanup() { |
| { |
| // TODO(morlovich): Clean this up, it's a rather inappropriate place to |
| // do this. |
| ScopedMutex lock(log_record()->mutex()); |
| if (!log_record()->logging_info()->has_experiment_id()) { |
| log_record()->logging_info()->set_experiment_id( |
| options()->experiment_id()); |
| } |
| } |
| DropReference(kRefUser); |
| } |
| |
| void RewriteDriver::AddUserReference() { |
| ref_counts_.AddRef(kRefUser); |
| } |
| |
| namespace { |
| |
| void AppendBool(GoogleString* out, const char* name, bool val) { |
| StrAppend(out, name, ": ", val ? "true\n": "false\n"); |
| } |
| |
| } // namespace |
| |
| GoogleString RewriteDriver::ToString(bool show_detached_contexts) const { |
| GoogleString out; |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| StrAppend(&out, "URL: ", google_url().Spec(), "\n"); |
| StrAppend(&out, "decoded_base: ", decoded_base_url().Spec(), "\n"); |
| AppendBool(&out, "base_was_set", base_was_set_); |
| StrAppend(&out, "containing_charset: ", containing_charset_, "\n"); |
| AppendBool(&out, "filters_added", filters_added_); |
| AppendBool(&out, "externally_managed", externally_managed_); |
| switch (waiting_) { |
| case kNoWait: |
| StrAppend(&out, "waiting: kNoWait\n"); |
| break; |
| case kWaitForCompletion: |
| StrAppend(&out, "waiting: kWaitForCompletion\n"); |
| break; |
| case kWaitForCachedRender: |
| StrAppend(&out, "waiting: kWaitForCachedRender\n"); |
| break; |
| case kWaitForShutDown: |
| StrAppend(&out, "waiting: kWaitForShutDown\n"); |
| break; |
| default: |
| StrAppend(&out, "waiting: ", IntegerToString(waiting_)); |
| break; |
| } |
| AppendBool(&out, "waiting_deadline_reached", waiting_deadline_reached_); |
| StrAppend(&out, "detached_rewrites_.size(): ", |
| IntegerToString(detached_rewrites_.size()), "\n"); |
| |
| if (show_detached_contexts) { |
| for (RewriteContextSet::iterator p = detached_rewrites_.begin(), |
| e = detached_rewrites_.end(); p != e; ++p) { |
| RewriteContext* detached_rewrite = *p; |
| StrAppend(&out, " Detached Rewrite:\n", |
| detached_rewrite->ToStringWithPrefix(" ")); |
| } |
| } |
| AppendBool(&out, "RewritesComplete()", RewritesComplete()); |
| AppendBool(&out, "fully_rewrite_on_flush", fully_rewrite_on_flush_); |
| AppendBool(&out, "fast_blocking_rewrite", fast_blocking_rewrite_); |
| AppendBool(&out, "flush_requested", flush_requested_); |
| AppendBool(&out, "flush_occurred", flush_occurred_); |
| AppendBool(&out, "flushed_early", flushed_early_); |
| AppendBool(&out, "flushing_early", flushing_early_); |
| AppendBool(&out, "is_lazyload_script_flushed", is_lazyload_script_flushed_); |
| AppendBool(&out, "release_driver", release_driver_); |
| AppendBool(&out, "write_property_cache_dom_cohort", |
| write_property_cache_dom_cohort_); |
| AppendBool(&out, "using_spdy", using_spdy()); |
| AppendBool(&out, "owns_property_page", owns_property_page_); |
| AppendBool(&out, "xhtml_mimetype_computed", xhtml_mimetype_computed_); |
| AppendBool(&out, "can_rewrite_resources", can_rewrite_resources_); |
| AppendBool(&out, "is_nested", is_nested()); |
| StrAppend(&out, "ref counts:\n", ref_counts_.DebugStringMutexHeld()); |
| } |
| return out; |
| } |
| |
| void RewriteDriver::PrintState(bool show_detached_contexts) { |
| fputs(ToString(show_detached_contexts).c_str(), stderr); |
| fputc('\n', stderr); |
| } |
| |
| void RewriteDriver::PrintStateToErrorLog(bool show_detached_contexts) { |
| message_handler()->MessageS(kError, ToString(show_detached_contexts)); |
| } |
| |
| void RewriteDriver::LogStats() { |
| if (dom_stats_filter_ != NULL && log_record() != NULL) { |
| log_record()->SetImageStats(dom_stats_filter_->num_img_tags(), |
| dom_stats_filter_->num_inlined_img_tags(), |
| dom_stats_filter_->num_critical_images_used()); |
| log_record()->SetResourceCounts(dom_stats_filter_->num_external_css(), |
| dom_stats_filter_->num_scripts()); |
| } |
| request_properties_->LogDeviceInfo( |
| log_record(), options()->enable_aggressive_rewriters_for_mobile()); |
| bool is_xhr = request_headers() != NULL && |
| request_headers()->IsXmlHttpRequest(); |
| log_record()->LogIsXhr(is_xhr); |
| } |
| |
| void RewriteDriver::FinishParse() { |
| SchedulerBlockingFunction wait(scheduler_); |
| FinishParseAsync(&wait); |
| wait.Block(); |
| } |
| |
| void RewriteDriver::FinishParseAsync(Function* callback) { |
| HtmlParse::BeginFinishParse(); |
| FlushAsync( |
| MakeFunction(this, &RewriteDriver::QueueFinishParseAfterFlush, callback)); |
| } |
| |
| void RewriteDriver::QueueFinishParseAfterFlush(Function* user_callback) { |
| Function* finish_parse = MakeFunction(this, |
| &RewriteDriver::FinishParseAfterFlush, |
| user_callback); |
| html_worker_->Add(finish_parse); |
| } |
| |
| void RewriteDriver::FinishParseAfterFlush(Function* user_callback) { |
| DCHECK_EQ(0U, GetEventQueueSize()); |
| HtmlParse::EndFinishParse(); |
| LogStats(); |
| WriteDomCohortIntoPropertyCache(); |
| |
| // Update stats. |
| RewriteStats* stats = server_context_->rewrite_stats(); |
| stats->rewrite_latency_histogram()->Add( |
| server_context_->timer()->NowMs() - start_time_ms_); |
| stats->total_rewrite_count()->IncBy(1); |
| |
| // Update statistics log. |
| StatisticsLogger* stats_logger = |
| server_context_->statistics()->console_logger(); |
| if (stats_logger != NULL) { |
| stats_logger->UpdateAndDumpIfRequired(); |
| } |
| |
| DropReference(kRefParsing); |
| Cleanup(); |
| if (user_callback != NULL) { |
| user_callback->CallRun(); |
| } |
| } |
| |
| void RewriteDriver::InfoAt(const RewriteContext* context, |
| const char* msg, ...) { |
| va_list args; |
| va_start(args, msg); |
| |
| if ((context == NULL) || (context->num_slots() == 0)) { |
| InfoHereV(msg, args); |
| } else { |
| GoogleString new_msg; |
| for (int c = 0; c < context->num_slots(); ++c) { |
| StrAppend(&new_msg, context->slot(c)->LocationString(), |
| ((c == context->num_slots() - 1) ? ": " : " ")); |
| } |
| StringAppendV(&new_msg, msg, args); |
| message_handler()->MessageS(kInfo, new_msg); |
| } |
| |
| va_end(args); |
| } |
| |
| // Constructs name and URL for the specified input resource and encoder. |
| bool RewriteDriver::GenerateOutputResourceNameAndUrl( |
| const UrlSegmentEncoder* encoder, |
| const ResourceContext* data, |
| const ResourcePtr& input_resource, |
| GoogleString* name, |
| GoogleUrl* mapped_gurl, |
| GoogleString* failure_reason) { |
| if (input_resource.get() == NULL) { |
| *failure_reason = "No input resource."; |
| return false; |
| } |
| |
| // TODO(jmarantz): It would be more efficient to pass in the base |
| // document GURL or save that in the input resource. |
| GoogleUrl unmapped_gurl(input_resource->url()); |
| GoogleString mapped_domain; // Unused. TODO(sligocki): Stop setting this? |
| // Get the domain and URL after any domain lawyer rewriting. |
| if (!options()->IsAllowed(unmapped_gurl.Spec())) { |
| *failure_reason = StrCat("Rewriting disallowed for ", unmapped_gurl.Spec()); |
| return false; |
| } |
| |
| if (!options()->domain_lawyer()->MapRequestToDomain( |
| unmapped_gurl, unmapped_gurl.Spec(), &mapped_domain, mapped_gurl, |
| server_context_->message_handler())) { |
| *failure_reason = StrCat("Domain not authorized for ", |
| unmapped_gurl.Spec()); |
| return false; |
| } |
| |
| StringVector v; |
| v.push_back(mapped_gurl->LeafWithQuery().as_string()); |
| encoder->Encode(v, data, name); |
| return true; |
| } |
| |
| // Constructs an output resource corresponding to the specified input resource |
| // and encoded using the provided encoder. |
| OutputResourcePtr RewriteDriver::CreateOutputResourceFromResource( |
| const char* filter_id, |
| const UrlSegmentEncoder* encoder, |
| const ResourceContext* data, |
| const ResourcePtr& input_resource, |
| OutputResourceKind kind, |
| GoogleString* failure_reason) { |
| OutputResourcePtr result; |
| GoogleString name; |
| GoogleUrl mapped_gurl; |
| if (!GenerateOutputResourceNameAndUrl(encoder, data, input_resource, &name, |
| &mapped_gurl, failure_reason)) { |
| return result; |
| } |
| |
| // TODO(jmarantz): It would be more efficient to pass in the base |
| // document GURL or save that in the input resource. |
| GoogleUrl unmapped_gurl(input_resource->url()); |
| |
| result.reset(CreateOutputResourceWithMappedPath( |
| mapped_gurl.AllExceptLeaf(), unmapped_gurl.AllExceptLeaf(), |
| filter_id, name, kind, failure_reason)); |
| |
| CHECK(input_resource->is_authorized_domain()); |
| return result; |
| } |
| |
| void RewriteDriver::PopulateResourceNamer( |
| const StringPiece& filter_id, |
| const StringPiece& name, |
| ResourceNamer* full_name) { |
| full_name->set_id(filter_id); |
| full_name->set_name(name); |
| full_name->set_experiment(options()->GetExperimentStateStr()); |
| |
| // Note that we never populate ResourceNamer::options for in place resource |
| // rewrites. |
| if (filter_id != RewriteOptions::kInPlaceRewriteId && |
| !full_name->has_experiment() && options()->add_options_to_urls()) { |
| GoogleString resource_option = RewriteQuery::GenerateResourceOption( |
| filter_id, this); |
| full_name->set_options(resource_option); |
| } else { |
| full_name->set_options(""); |
| } |
| } |
| |
| OutputResourcePtr RewriteDriver::CreateOutputResourceWithPath( |
| const StringPiece& mapped_path, |
| const StringPiece& unmapped_path, |
| const StringPiece& base_url, |
| const StringPiece& filter_id, |
| const StringPiece& name, |
| OutputResourceKind kind, |
| GoogleString* failure_reason) { |
| ResourceNamer full_name; |
| PopulateResourceNamer(filter_id, name, &full_name); |
| OutputResourcePtr resource; |
| int max_leaf_size = |
| full_name.EventualSize(*server_context_->hasher(), SignatureLength()) + |
| ContentType::MaxProducedExtensionLength(); |
| if (max_leaf_size > options()->max_url_segment_size()) { |
| *failure_reason = "Rewritten URL segment too long."; |
| return resource; |
| } |
| |
| bool no_hash = false; |
| int extra_len = 0; |
| Hasher* hasher = server_context()->hasher(); |
| if (full_name.hash().empty()) { |
| // Content and content type are not present. So set some nonzero hash and |
| // assume largest possible extension. |
| no_hash = true; |
| full_name.set_hash(GoogleString(hasher->HashSizeInChars(), '#')); |
| extra_len = ContentType::MaxProducedExtensionLength(); |
| } |
| resource.reset(new OutputResource( |
| this, mapped_path, unmapped_path, base_url, full_name, kind)); |
| |
| if (options()->max_url_size() < |
| (static_cast<int>(resource->url().size()) + extra_len)) { |
| *failure_reason = StrCat("Rewritten URL too long: ", resource->url()); |
| resource.clear(); |
| return resource; |
| } |
| if (no_hash) { |
| resource->clear_hash(); |
| } |
| return resource; |
| } |
| |
| OutputResourcePtr RewriteDriver::CreateOutputResourceWithUnmappedUrl( |
| const GoogleUrl& unmapped_gurl, const StringPiece& filter_id, |
| const StringPiece& name, OutputResourceKind kind, |
| GoogleString* failure_reason) { |
| OutputResourcePtr resource; |
| GoogleString mapped_domain; // Unused. TODO(sligocki): Stop setting this? |
| GoogleUrl mapped_gurl; |
| // Get the domain and URL after any domain lawyer rewriting. |
| if (!options()->IsAllowed(unmapped_gurl.Spec())) { |
| *failure_reason = StrCat("Rewriting disallowed for ", unmapped_gurl.Spec()); |
| return resource; |
| } |
| if (!options()->domain_lawyer()->MapRequestToDomain( |
| unmapped_gurl, unmapped_gurl.Spec(), &mapped_domain, &mapped_gurl, |
| server_context_->message_handler())) { |
| *failure_reason = StrCat("Domain not authorized for ", |
| unmapped_gurl.Spec()); |
| return resource; |
| } |
| |
| resource.reset(CreateOutputResourceWithMappedPath( |
| mapped_gurl.AllExceptLeaf(), unmapped_gurl.AllExceptLeaf(), |
| filter_id, name, kind, failure_reason)); |
| return resource; |
| } |
| |
| void RewriteDriver::SetBaseUrlIfUnset(const StringPiece& new_base) { |
| // Base url is relative to the document URL in HTML5, but not in |
| // HTML4.01. FF3.x does it HTML4.01 way, Chrome, Opera 11 and FF4 |
| // betas do it according to HTML5, as is our implementation here. |
| GoogleUrl new_base_url(base_url_, new_base); |
| if (new_base_url.IsAnyValid()) { |
| if (base_was_set_) { |
| if (new_base_url.Spec() != base_url_.Spec()) { |
| InfoHere("Conflicting base tags: %s and %s", |
| new_base_url.spec_c_str(), |
| base_url_.spec_c_str()); |
| } |
| } else { |
| base_was_set_ = true; |
| base_url_.Swap(&new_base_url); |
| SetDecodedUrlFromBase(); |
| } |
| } else { |
| InfoHere("Invalid base tag %s relative to %s", |
| new_base.as_string().c_str(), |
| base_url_.spec_c_str()); |
| } |
| } |
| |
| void RewriteDriver::SetBaseUrlForFetch(const StringPiece& url) { |
| // Set the base url for the resource fetch. This corresponds to where the |
| // fetched resource resides (which might or might not be where the original |
| // resource lived). |
| |
| // TODO(jmaessen): we're re-constructing a GoogleUrl after having already |
| // done so (repeatedly over several calls) in DecodeOutputResource! Gah! |
| // We at least assume that base_url_ is valid since it was checked when |
| // output_resource was created. |
| base_url_.Reset(url); |
| DCHECK(base_url_.IsAnyValid()); |
| SetDecodedUrlFromBase(); |
| base_was_set_ = false; |
| } |
| |
| RewriteFilter* RewriteDriver::FindFilter(const StringPiece& id) const { |
| RewriteFilter* filter = NULL; |
| StringFilterMap::const_iterator p = resource_filter_map_.find(id.as_string()); |
| if (p != resource_filter_map_.end()) { |
| filter = p->second; |
| } |
| return filter; |
| } |
| |
| HtmlResourceSlotPtr RewriteDriver::GetSlot( |
| const ResourcePtr& resource, HtmlElement* elt, |
| HtmlElement::Attribute* attr) { |
| HtmlResourceSlotPtr slot(new HtmlResourceSlot(resource, elt, attr, this)); |
| std::pair<HtmlResourceSlotSet::iterator, bool> iter_inserted = |
| slots_.insert(slot); |
| if (!iter_inserted.second) { |
| // The slot was already in the set. Release the one we just |
| // allocated and use the one already in. |
| HtmlResourceSlotSet::iterator iter = iter_inserted.first; |
| slot.reset(*iter); |
| } |
| return slot; |
| } |
| |
| InlineResourceSlotPtr RewriteDriver::GetInlineSlot( |
| const ResourcePtr& resource, HtmlCharactersNode* char_node) { |
| InlineResourceSlotPtr slot( |
| new InlineResourceSlot(resource, char_node, UrlLine())); |
| std::pair<InlineResourceSlotSet::iterator, bool> iter_inserted = |
| inline_slots_.insert(slot); |
| if (!iter_inserted.second) { |
| // The slot was already in the set. Release the one we just |
| // allocated and use the one already in. |
| InlineResourceSlotSet::iterator iter = iter_inserted.first; |
| slot.reset(*iter); |
| } |
| return slot; |
| } |
| |
| InlineAttributeSlotPtr RewriteDriver::GetInlineAttributeSlot( |
| const ResourcePtr& resource, HtmlElement* element, |
| HtmlElement::Attribute* attribute) { |
| InlineAttributeSlotPtr slot( |
| new InlineAttributeSlot(resource, element, attribute, UrlLine())); |
| std::pair<InlineAttributeSlotSet::iterator, bool> iter_inserted = |
| inline_attribute_slots_.insert(slot); |
| if (!iter_inserted.second) { |
| // The slot was already in the set. Release the one we just |
| // allocated and use the one already in. |
| InlineAttributeSlotSet::iterator iter = iter_inserted.first; |
| slot.reset(*iter); |
| } |
| return slot; |
| } |
| |
| bool RewriteDriver::InitiateRewrite(RewriteContext* rewrite_context) { |
| #ifndef NDEBUG |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing)); |
| } |
| #endif |
| |
| // Drop all rewrites if metadata_cache is unhealthy. This has |
| // got to be done 100% or not at all, otherwise we can wind up with |
| // a broken slot-context graph. |
| // |
| // Note that we strobe cache health at the beginning of request |
| // (StartParseId), so that we don't decide in the middle of an HTML |
| // rewrite that we won't be able to initialize the resource, thus leaving |
| // us with a partially constructed slot-graph. |
| if (!can_rewrite_resources_) { |
| if (rewrites_.empty()) { |
| rewrite_context->DetachSlots(); |
| delete rewrite_context; |
| return false; |
| } else { |
| // A programming error has allowed a RewriteContext to be added |
| // despite not being able to rewrite resources. Log a fatal for |
| // debug builds, and otherwise fall through to keep the context-slot |
| // graph coherent. |
| LOG(DFATAL) |
| << "Unexpected queued RewriteContext when cannot rewrite resources"; |
| } |
| } |
| rewrites_.push_back(rewrite_context); |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| ref_counts_.AddRefMutexHeld(kRefPendingRewrites); |
| ++possibly_quick_rewrites_; |
| } |
| return true; |
| } |
| |
| void RewriteDriver::InitiateFetch(RewriteContext* rewrite_context) { |
| // TODO(jmarantz): consider setting a bit in the RewriteContext |
| // based on server_context_->metadata_cache()->IsHealthy() to tell |
| // the system not to perform any optimization on single resources, |
| // since the results would not wind up cached. Instead, just serve |
| // the origin resource as it's fetched. For combined resources, of |
| // course, we'll have to run the combiner logic on the fetched data |
| // after we collect it all in memory. |
| DCHECK_EQ(0, ref_counts_.QueryCountMutexHeld(kRefParsing)); |
| DCHECK_EQ(1, ref_counts_.QueryCountMutexHeld(kRefFetchUserFacing)); |
| fetch_rewrites_.push_back(rewrite_context); |
| } |
| |
| bool RewriteDriver::MayCacheExtendCss() const { |
| return options()->Enabled(RewriteOptions::kExtendCacheCss); |
| } |
| |
| bool RewriteDriver::MayCacheExtendImages() const { |
| return options()->Enabled(RewriteOptions::kExtendCacheImages); |
| } |
| |
| bool RewriteDriver::MayCacheExtendPdfs() const { |
| return options()->Enabled(RewriteOptions::kExtendCachePdfs); |
| } |
| |
| bool RewriteDriver::MayCacheExtendScripts() const { |
| return options()->Enabled(RewriteOptions::kExtendCacheScripts); |
| } |
| |
| void RewriteDriver::AddRewriteTask(Function* task) { |
| rewrite_worker_->Add(task); |
| } |
| |
| void RewriteDriver::AddLowPriorityRewriteTask(Function* task) { |
| low_priority_rewrite_worker_->Add(task); |
| } |
| |
| OptionsAwareHTTPCacheCallback::OptionsAwareHTTPCacheCallback( |
| const RewriteOptions* rewrite_options, const RequestContextPtr& request_ctx) |
| : HTTPCache::Callback(request_ctx, RequestHeaders::Properties()), |
| rewrite_options_(rewrite_options) { |
| // We initialize the callback with a blank RequestHeaders::Properties, |
| // rather than extracing the actual request properties from |
| // request_ctx->request_headers(). This is because, with our domain |
| // mapping, we don't know for sure whether cookies should apply |
| // to Vary:Cacheable resources. So we pessimistically assume there |
| // are cookies by initializing a blank one. |
| |
| response_headers()->set_implicit_cache_ttl_ms( |
| rewrite_options->implicit_cache_ttl_ms()); |
| response_headers()->set_min_cache_ttl_ms(rewrite_options->min_cache_ttl_ms()); |
| } |
| |
| OptionsAwareHTTPCacheCallback::~OptionsAwareHTTPCacheCallback() {} |
| |
| bool OptionsAwareHTTPCacheCallback::IsCacheValid( |
| const GoogleString& key, const ResponseHeaders& headers) { |
| return IsCacheValid(key, *rewrite_options_, request_context(), headers); |
| } |
| |
| ResponseHeaders::VaryOption |
| OptionsAwareHTTPCacheCallback::RespectVaryOnResources() const { |
| return ResponseHeaders::GetVaryOption(rewrite_options_->respect_vary()); |
| } |
| |
| // static |
| bool OptionsAwareHTTPCacheCallback::IsCacheValid( |
| const GoogleString& url, |
| const RewriteOptions& rewrite_options, |
| const RequestContextPtr& request_ctx, |
| const ResponseHeaders& headers) { |
| if ((headers.DetermineContentType() == &kContentTypeWebp) && |
| !request_ctx->accepts_webp() && |
| headers.HasValue(HttpAttributes::kVary, HttpAttributes::kAccept)) { |
| return false; |
| } |
| |
| return (headers.has_date_ms() && |
| rewrite_options.IsUrlCacheValid(url, headers.date_ms(), |
| true /* search_wildcards */)); |
| } |
| |
| int64 OptionsAwareHTTPCacheCallback::OverrideCacheTtlMs( |
| const GoogleString& key) { |
| if (rewrite_options_->IsCacheTtlOverridden(key)) { |
| return rewrite_options_->override_caching_ttl_ms(); |
| } |
| return -1; |
| } |
| |
| RewriteDriver::CssResolutionStatus RewriteDriver::ResolveCssUrls( |
| const GoogleUrl& input_css_base, |
| const StringPiece& output_css_base, |
| const StringPiece& contents, |
| Writer* writer, |
| MessageHandler* handler) { |
| GoogleUrl output_base(output_css_base); |
| bool proxy_mode; |
| if (ShouldAbsolutifyUrl(input_css_base, output_base, &proxy_mode)) { |
| RewriteDomainTransformer transformer(&input_css_base, &output_base, |
| server_context(), options(), |
| message_handler()); |
| if (proxy_mode) { |
| // If URLs are being rewritten to a proxy domain, then trimming |
| // them based purely on the domain-lawyer mappings is going to |
| // relativize them so that they cannot be resolved properly in |
| // their intended context. |
| // |
| // TODO(jmarantz): Consider merging the url_namer with DomainLawyer |
| // so that DomainLawyer::WillDomainChange will be accurate. |
| transformer.set_trim_urls(false); |
| } |
| if (CssTagScanner::TransformUrls(contents, writer, &transformer, handler)) { |
| return kSuccess; |
| } else { |
| return kWriteFailed; |
| } |
| } |
| return kNoResolutionNeeded; |
| } |
| |
| bool RewriteDriver::ShouldAbsolutifyUrl(const GoogleUrl& input_base, |
| const GoogleUrl& output_base, |
| bool* proxy_mode) const { |
| bool result = false; |
| const UrlNamer* url_namer = server_context_->url_namer(); |
| bool proxying = url_namer->ProxyMode(); |
| |
| if (proxying) { |
| result = true; |
| } else if (input_base.AllExceptLeaf() != output_base.AllExceptLeaf()) { |
| result = true; |
| } else { |
| const DomainLawyer* domain_lawyer = options()->domain_lawyer(); |
| result = domain_lawyer->WillDomainChange(input_base); |
| } |
| |
| if (proxy_mode != NULL) { |
| *proxy_mode = proxying; |
| } |
| |
| return result; |
| } |
| |
| PropertyPage* RewriteDriver::property_page() const { |
| return fallback_property_page_ == NULL ? |
| NULL : fallback_property_page_->actual_property_page(); |
| } |
| |
| PropertyPage* RewriteDriver::origin_property_page() const { |
| return origin_property_page_.get(); |
| } |
| |
| // This is in the .cc rather than the header to avoid the need to |
| // include property_cache.h in the header. |
| void RewriteDriver::set_property_page(PropertyPage* page) { |
| if (page == NULL) { |
| set_fallback_property_page(NULL); |
| return; |
| } |
| FallbackPropertyPage* fallback_page = new FallbackPropertyPage(page, NULL); |
| set_fallback_property_page(fallback_page); |
| } |
| |
| void RewriteDriver::set_fallback_property_page(FallbackPropertyPage* page) { |
| if (owns_property_page_) { |
| delete fallback_property_page_; |
| } |
| fallback_property_page_ = page; |
| owns_property_page_ = true; |
| } |
| |
| void RewriteDriver::set_unowned_fallback_property_page( |
| FallbackPropertyPage* page) { |
| if (owns_property_page_) { |
| delete fallback_property_page_; |
| } |
| fallback_property_page_ = page; |
| owns_property_page_ = false; |
| } |
| |
| void RewriteDriver::set_origin_property_page(PropertyPage* page) { |
| origin_property_page_.reset(page); |
| } |
| |
| bool RewriteDriver::CriticalSelectorsEnabled() const { |
| return (options()->Enabled(RewriteOptions::kPrioritizeCriticalCss) && |
| (server_context()->factory()->UseBeaconResultsInFilters() || |
| options()->use_selectors_for_critical_css())); |
| } |
| |
| void RewriteDriver::increment_num_inline_preview_images() { |
| ++num_inline_preview_images_; |
| } |
| |
| StringPiece RewriteDriver::RefCategoryName(RefCategory cat) { |
| switch (cat) { |
| case kRefUser: |
| return "User references"; |
| case kRefParsing: |
| return "Parsing"; |
| case kRefPendingRewrites: |
| return "Pending rewrites"; |
| case kRefDetachedRewrites: |
| return "Detached rewrites"; |
| case kRefDeletingRewrites: |
| return "Deleting rewrites"; |
| case kRefFetchUserFacing: |
| return "User-facing fetch rewrite"; |
| case kRefFetchBackground: |
| return "Background fetch rewrite"; |
| case kRefAsyncEvents: |
| return "Misc async event"; |
| case kRefRenderBlockingAsyncEvents: |
| return "Misc async event that's render-blocking"; |
| case kNumRefCategories: |
| break; |
| } |
| LOG(DFATAL) << "Invalid argument to RefCategoryName" << cat; |
| return ""; |
| } |
| |
| void RewriteDriver::LastRefRemoved() { |
| if (!externally_managed_) { |
| release_driver_ = true; |
| } else { |
| ref_counts_.DCheckAllCountsZeroMutexHeld(); |
| |
| // In externally managed mode, we always keep at least one "user" |
| // reference to the driver for our bookkeeping purposes. |
| ref_counts_.AddRefMutexHeld(kRefUser); |
| } |
| } |
| |
| void RewriteDriver::DropReference(RefCategory ref_cat) { |
| bool should_release = false; |
| { |
| ScopedMutex lock(rewrite_mutex()); |
| bool signal_cookie = PrepareShouldSignal(); |
| ref_counts_.ReleaseRefMutexHeld(ref_cat); |
| should_release = release_driver_; |
| SignalIfRequired(signal_cookie); |
| } |
| if (should_release) { |
| PossiblyPurgeCachedResponseAndReleaseDriver(); |
| } |
| } |
| |
| void RewriteDriver::IncrementAsyncEventsCount() { |
| ref_counts_.AddRef(kRefAsyncEvents); |
| } |
| |
| void RewriteDriver::DecrementAsyncEventsCount() { |
| DropReference(kRefAsyncEvents); |
| } |
| |
| void RewriteDriver::IncrementRenderBlockingAsyncEventsCount() { |
| ref_counts_.AddRef(kRefRenderBlockingAsyncEvents); |
| } |
| |
| void RewriteDriver::DecrementRenderBlockingAsyncEventsCount() { |
| DropReference(kRefRenderBlockingAsyncEvents); |
| } |
| |
| void RewriteDriver::EnableBlockingRewrite(RequestHeaders* request_headers) { |
| if (!options()->blocking_rewrite_key().empty()) { |
| const char* blocking_rewrite_key = request_headers->Lookup1( |
| HttpAttributes::kXPsaBlockingRewrite); |
| if (blocking_rewrite_key != NULL) { |
| if (options()->blocking_rewrite_key() == blocking_rewrite_key) { |
| set_fully_rewrite_on_flush(true); |
| } |
| // TODO(bharathbhushan): Allow for multiple PSAs on the request path by |
| // interpreting the value as a comma separated list of keys and avoid |
| // removing this header unconditionally. |
| request_headers->RemoveAll(HttpAttributes::kXPsaBlockingRewrite); |
| } |
| } |
| if (!fully_rewrite_on_flush() && |
| options()->IsBlockingRewriteRefererUrlPatternPresent()) { |
| const char* referer = request_headers->Lookup1( |
| HttpAttributes::kReferer); |
| if (referer != NULL && |
| options()->IsBlockingRewriteEnabledForReferer(referer)) { |
| set_fully_rewrite_on_flush(true); |
| } |
| } |
| if (fully_rewrite_on_flush()) { |
| const char* blocking_rewrite_mode(request_headers->Lookup1( |
| HttpAttributes::kXPsaBlockingRewriteMode)); |
| if (blocking_rewrite_mode != NULL) { |
| StringPiece mode(HttpAttributes::kXPsaBlockingRewriteModeSlow); |
| if (blocking_rewrite_mode == mode) { |
| // Don't wait for async events. |
| set_fast_blocking_rewrite(false); |
| } |
| request_headers->RemoveAll(HttpAttributes::kXPsaBlockingRewriteMode); |
| } |
| } |
| } |
| |
| RewriteDriver::XhtmlStatus RewriteDriver::MimeTypeXhtmlStatus() { |
| if (!xhtml_mimetype_computed_ && |
| server_context_->response_headers_finalized() && |
| (response_headers_ != NULL)) { |
| xhtml_mimetype_computed_ = true; |
| const ContentType* content_type = response_headers_->DetermineContentType(); |
| if (content_type != NULL) { |
| if (content_type->IsXmlLike()) { |
| xhtml_status_ = kIsXhtml; |
| } else { |
| xhtml_status_ = kIsNotXhtml; |
| } |
| } |
| } |
| return xhtml_status_; |
| } |
| |
| FlushEarlyInfo* RewriteDriver::flush_early_info() { |
| if (flush_early_info_.get() == NULL) { |
| PropertyCacheDecodeResult status; |
| flush_early_info_.reset(DecodeFromPropertyCache<FlushEarlyInfo>( |
| server_context()->page_property_cache(), |
| fallback_property_page(), |
| server_context()->dom_cohort(), |
| kSubresourcesPropertyName, |
| -1 /* no ttl checking*/, |
| &status)); |
| if (status != kPropertyCacheDecodeOk) { |
| flush_early_info_.reset(new FlushEarlyInfo); |
| } |
| } |
| return flush_early_info_.get(); |
| } |
| |
| void RewriteDriver::InsertDebugComment(StringPiece unescaped, |
| HtmlNode* node) { |
| if (DebugMode() && node != NULL && IsRewritable(node)) { |
| GoogleString escaped; |
| HtmlKeywords::Escape(unescaped, &escaped); |
| |
| HtmlNode* comment_node = NewCommentNode(node->parent(), escaped); |
| InsertNodeAfterNode(node, comment_node); |
| } |
| } |
| |
| void RewriteDriver::InsertDebugComments( |
| const protobuf::RepeatedPtrField<GoogleString>& unescaped_messages, |
| HtmlElement* element) { |
| if (DebugMode() && element != NULL && IsRewritable(element)) { |
| HtmlNode* preceding_node = element; |
| for (protobuf::RepeatedPtrField<GoogleString>::const_iterator unescaped = |
| unescaped_messages.begin(); |
| unescaped != unescaped_messages.end(); ++unescaped) { |
| GoogleString escaped; |
| HtmlKeywords::Escape(*unescaped, &escaped); |
| |
| HtmlNode* comment_node = |
| NewCommentNode(preceding_node->parent(), escaped); |
| InsertNodeAfterNode(preceding_node, comment_node); |
| preceding_node = comment_node; |
| } |
| } |
| } |
| |
| void RewriteDriver::InsertUnauthorizedDomainDebugComment(StringPiece url, |
| HtmlElement* element) { |
| if (DebugMode() && element != NULL && IsRewritable(element)) { |
| GoogleUrl gurl(url); |
| InsertNodeAfterNode( |
| element, NewCommentNode(element->parent(), |
| GenerateUnauthorizedDomainDebugComment(gurl))); |
| } |
| } |
| |
| GoogleString RewriteDriver::GenerateUnauthorizedDomainDebugComment( |
| const GoogleUrl& gurl) { |
| GoogleString comment("The preceding resource was not rewritten because "); |
| // Note: this is all being defensive - at the time of writing I believe |
| // url will always be a valid URL. |
| if (gurl.IsWebValid()) { |
| StrAppend(&comment, "its domain (", gurl.Host(), ") is not authorized"); |
| } else if (gurl.IsWebOrDataValid()) { |
| StrAppend(&comment, "it is a data URI"); |
| } else { |
| StrAppend(&comment, "it is not authorized"); |
| } |
| GoogleString escaped; |
| HtmlKeywords::Escape(comment, &escaped); |
| return escaped; |
| } |
| |
| void RewriteDriver::SaveOriginalHeaders(const ResponseHeaders& headers) { |
| headers.GetSanitizedProto(flush_early_info()->mutable_response_headers()); |
| } |
| |
| const CriticalLineInfo* RewriteDriver::critical_line_info() const { |
| return critical_line_info_.get(); |
| } |
| |
| void RewriteDriver::set_critical_line_info( |
| CriticalLineInfo* critical_line_info) { |
| critical_line_info_.reset(critical_line_info); |
| } |
| |
| CriticalKeys* RewriteDriver::beacon_critical_line_info() const { |
| return beacon_critical_line_info_.get(); |
| } |
| |
| void RewriteDriver::set_beacon_critical_line_info( |
| CriticalKeys* beacon_critical_line_info) { |
| beacon_critical_line_info_.reset(beacon_critical_line_info); |
| } |
| |
| // The split html config is lazily constructed on first access. Since the |
| // split-html-filter and the split-html-helper-filter access this from the html |
| // parsing thread, the lazy construction does not need mutex protection. |
| const SplitHtmlConfig* RewriteDriver::split_html_config() { |
| if (split_html_config_ == NULL) { |
| split_html_config_.reset(new SplitHtmlConfig(this)); |
| } |
| return split_html_config_.get(); |
| } |
| |
| CriticalCssResult* RewriteDriver::critical_css_result() const { |
| return critical_css_result_.get(); |
| } |
| |
| void RewriteDriver::set_critical_css_result( |
| CriticalCssResult* critical_css_rules) { |
| critical_css_result_.reset(critical_css_rules); |
| } |
| |
| bool RewriteDriver::is_critical_images_beacon_enabled() { |
| return (options()->Enabled(RewriteOptions::kLazyloadImages) || |
| options()->Enabled(RewriteOptions::kInlineImages) || |
| options()->Enabled(RewriteOptions::kDelayImages) || |
| options()->Enabled( |
| RewriteOptions::kResizeToRenderedImageDimensions)) && |
| options()->critical_images_beacon_enabled() && |
| server_context_->factory()->UseBeaconResultsInFilters() && |
| server_context_->page_property_cache()->enabled(); |
| } |
| |
| FlushEarlyRenderInfo* RewriteDriver::flush_early_render_info() const { |
| return flush_early_render_info_.get(); |
| } |
| |
| void RewriteDriver::set_flush_early_render_info( |
| FlushEarlyRenderInfo* flush_early_render_info) { |
| flush_early_render_info_.reset(flush_early_render_info); |
| } |
| |
| bool RewriteDriver::Write(const ResourceVector& inputs, |
| const StringPiece& contents, |
| const ContentType* type, |
| StringPiece charset, |
| OutputResource* output) { |
| output->SetType(type); |
| output->set_charset(charset); |
| ResponseHeaders* meta_data = output->response_headers(); |
| bool clear_last_modified = false; |
| |
| // Transfer Last-Modified from the input for single-input on-the-fly |
| // resources. |
| if ((inputs.size() == 1) && (output->kind() == kOnTheFlyResource)) { |
| const ResponseHeaders* input_headers = inputs[0]->response_headers(); |
| const char* last_modified = input_headers->Lookup1( |
| HttpAttributes::kLastModified); |
| if (last_modified == NULL) { |
| clear_last_modified = true; |
| } else { |
| meta_data->Add(HttpAttributes::kLastModified, last_modified); |
| } |
| } |
| |
| server_context_->SetDefaultLongCacheHeaders( |
| type, charset, output->cache_control_suffix(), meta_data); |
| if (clear_last_modified) { |
| meta_data->RemoveAll(HttpAttributes::kLastModified); |
| } |
| meta_data->SetStatusAndReason(HttpStatus::kOK); |
| server_context_->ApplyInputCacheControl(inputs, meta_data); |
| server_context_->AddOriginalContentLengthHeader(inputs, meta_data); |
| |
| // The URL for any resource we will write includes the hash of contents, |
| // so it can can live, essentially, forever. So compute this hash, |
| // and cache the output using meta_data's default headers which are to cache |
| // forever. |
| MessageHandler* handler = message_handler(); |
| Writer* writer = output->BeginWrite(handler); |
| bool ret = (writer != NULL); |
| if (ret) { |
| ret = writer->Write(contents, handler); |
| output->EndWrite(handler); |
| |
| HTTPCache* http_cache = server_context_->http_cache(); |
| if (output->kind() != kOnTheFlyResource && |
| output->kind() != kInlineResource && |
| (http_cache->force_caching() || meta_data->IsProxyCacheable())) { |
| // This URL should already be mapped to the canonical rewrite domain, |
| // But we should store its unsharded form in the cache. |
| http_cache->Put(output->HttpCacheKey(), CacheFragment(), |
| RequestHeaders::Properties(), |
| options()->ComputeHttpOptions(), |
| &output->value_, handler); |
| } |
| |
| // If we're asked to, also save a debug dump |
| if (server_context_->store_outputs_in_file_system()) { |
| output->DumpToDisk(handler); |
| } |
| |
| // If our URL is derived from some pre-existing URL (and not invented by |
| // us due to something like outlining), cache the mapping from original URL |
| // to the constructed one. |
| if (output->kind() == kRewrittenResource || |
| output->kind() == kOnTheFlyResource) { |
| CachedResult* cached = output->EnsureCachedResultCreated(); |
| cached->set_optimizable(true); |
| cached->set_url(output->url()); // Note: output->url() will be sharded. |
| } |
| } else { |
| // Note that we've already gotten a "could not open file" message; |
| // this just serves to explain why and suggest a remedy. |
| handler->Message(kInfo, "Could not create output resource" |
| " (bad filename prefix '%s'?)", |
| server_context_->filename_prefix().as_string().c_str()); |
| } |
| return ret; |
| } |
| |
| void RewriteDriver::DetermineFiltersBehaviorImpl() { |
| DetermineFilterListBehavior(early_pre_render_filters_); |
| DetermineFilterListBehavior(pre_render_filters_); |
| |
| // Call parent to set up post render filters. |
| HtmlParse::DetermineFiltersBehaviorImpl(); |
| } |
| |
| void RewriteDriver::ClearRequestProperties() { |
| request_properties_.reset(new RequestProperties( |
| server_context_->user_agent_matcher())); |
| } |
| |
| bool RewriteDriver::MetadataRequested( |
| const RequestHeaders& request_headers) const { |
| StringPiece expected_key = options_->distributed_rewrite_key(); |
| // Empty keys don't count. |
| if (expected_key.empty()) { |
| return false; |
| } |
| return request_headers.HasValue(HttpAttributes::kXPsaRequestMetadata, |
| expected_key); |
| } |
| |
| const GoogleString& RewriteDriver::CacheFragment() const { |
| CHECK(options_ != NULL); |
| const GoogleString& fragment = options_->cache_fragment(); |
| if (!fragment.empty()) { |
| return fragment; |
| } |
| CHECK(request_context_.get() != NULL) << "NULL request context in " |
| << "RewriteDriver::CacheFragment"; |
| return request_context_->minimal_private_suffix(); |
| } |
| |
| bool RewriteDriver::SetOrClearPageSpeedOptionCookies( |
| const GoogleUrl& gurl, ResponseHeaders* response_headers) { |
| StringPiece required_token(options_->sticky_query_parameters()); |
| StringPiece provided_token(request_context_->sticky_query_parameters_token()); |
| // These are mutually exclusive but provide a way of specifying "do nothing". |
| bool set_cookies = false; |
| bool clear_cookies = false; |
| |
| if (options_->allow_options_to_be_set_by_cookies() && |
| !required_token.empty() && |
| required_token == provided_token) { |
| // Make the current options sticky if we allow options to be set by |
| // cookies (otherwise why bother?), there is a token specified in the |
| // configuration, and the token specified in the request matches the |
| // one in the configuration. |
| set_cookies = true; |
| } else if (!pagespeed_option_cookies_.empty() && |
| !required_token.empty() && !provided_token.empty() && |
| required_token != provided_token) { |
| // Clear the current option cookies if there are any, there is a token |
| // specified in the configuration, there is a token in the request, and |
| // the token specified in the request does NOT match the one in the |
| // configuration - treat that as a specific request to clear the cookies. |
| clear_cookies = true; |
| } else if (!pagespeed_option_cookies_.empty() && |
| !options_->allow_options_to_be_set_by_cookies()) { |
| // Clear the current option cookies if there any but we no longer allow |
| // options to be set by cookies. |
| clear_cookies = true; |
| } |
| |
| if (!set_cookies && !clear_cookies) { |
| return false; |
| } |
| |
| // We need to not set cookies for the option that triggered this. |
| const GoogleString old_option_name( |
| StrCat(RewriteQuery::kPageSpeed, |
| RewriteOptions::kStickyQueryParameters)); |
| const GoogleString new_option_name( |
| StrCat(RewriteQuery::kModPagespeed, |
| RewriteOptions::kStickyQueryParameters)); |
| StringPieceVector exclusions; |
| exclusions.push_back(old_option_name); |
| exclusions.push_back(new_option_name); |
| bool result = false; |
| if (set_cookies) { |
| int64 expiration_time_ms = (server_context()->timer()->NowMs() + |
| options_->option_cookies_duration_ms()); |
| result = response_headers->SetQueryParamsAsCookies(gurl, |
| pagespeed_query_params_, |
| exclusions, |
| expiration_time_ms); |
| } else /* ASSERT: clear_cookies == true */ { |
| result = response_headers->ClearOptionCookies(gurl, |
| pagespeed_option_cookies_, |
| exclusions); |
| } |
| if (result) { |
| response_headers->ComputeCaching(); |
| } |
| |
| return result; |
| } |
| |
| bool RewriteDriver::LookupMetadataForOutputResource( |
| StringPiece url, GoogleString* error_out, |
| RewriteContext::CacheLookupResultCallback* callback) { |
| RewriteFilter* filter = NULL; |
| GoogleUrl gurl(url); |
| |
| if (!gurl.IsWebValid()) { |
| *error_out = "Unable to parse URL."; |
| return false; |
| } |
| |
| // The setup is different depending on if url is .pagespeed. resource or an |
| // in-place rewritten one. |
| bool is_pagespeed_resource = server_context_->IsPagespeedResource(gurl); |
| |
| SetBaseUrlForFetch(gurl.Spec()); |
| OutputResourcePtr output_resource; |
| |
| if (is_pagespeed_resource) { |
| output_resource.reset(DecodeOutputResource(gurl, &filter)); |
| } else { |
| StringPiece base = gurl.AllExceptLeaf(); |
| ResourceNamer namer; |
| output_resource.reset( |
| new OutputResource(this, base, base, base, namer, kRewrittenResource)); |
| } |
| |
| if (output_resource.get() == NULL || |
| (filter == NULL && is_pagespeed_resource)) { |
| *error_out = "Unable to decode resource."; |
| return false; |
| } |
| |
| scoped_ptr<RewriteContext> context; |
| if (is_pagespeed_resource) { |
| context.reset(filter->MakeRewriteContext()); |
| } else { |
| context.reset(new InPlaceRewriteContext(this, gurl.Spec())); |
| } |
| |
| return RewriteContext::LookupMetadataForOutputResourceImpl( |
| output_resource, gurl, context.release(), |
| this, error_out, callback); |
| } |
| |
| } // namespace net_instaweb |