blob: 0ebbc2cbaf3c8fc211c336cf2510764570c20b07 [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: sligocki@google.com (Shawn Ligocki)
#include "net/instaweb/rewriter/public/rewrite_driver_factory.h"
#include "base/logging.h"
#include "net/instaweb/rewriter/public/resource_manager.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/util/public/abstract_mutex.h"
#include "net/instaweb/util/public/cache_url_async_fetcher.h"
#include "net/instaweb/util/public/cache_url_fetcher.h"
#include "net/instaweb/util/public/fake_url_async_fetcher.h"
#include "net/instaweb/util/public/filename_encoder.h"
#include "net/instaweb/util/public/file_system.h"
#include "net/instaweb/util/public/hasher.h"
#include "net/instaweb/util/public/http_cache.h"
#include "net/instaweb/util/public/http_dump_url_fetcher.h"
#include "net/instaweb/util/public/http_dump_url_writer.h"
#include "net/instaweb/util/public/message_handler.h"
#include "net/instaweb/util/public/statistics.h"
#include "net/instaweb/util/public/stl_util.h"
#include "net/instaweb/util/public/timer.h"
namespace net_instaweb {
const char kInstawebResource404Count[] = "resource_404_count";
const char kInstawebSlurp404Count[] = "slurp_404_count";
RewriteDriverFactory::RewriteDriverFactory()
: url_fetcher_(NULL),
url_async_fetcher_(NULL),
html_parse_(NULL),
filename_prefix_(""),
url_prefix_(""),
force_caching_(false),
slurp_read_only_(false),
resource_404_count_(NULL),
slurp_404_count_(NULL) {
}
RewriteDriverFactory::~RewriteDriverFactory() {
ShutDown();
}
bool RewriteDriverFactory::AddEnabledFilters(const StringPiece& filter_names) {
return options_.EnableFiltersByCommaSeparatedList(
filter_names, message_handler());
}
bool RewriteDriverFactory::AddDisabledFilters(const StringPiece& filter_names) {
return options_.DisableFiltersByCommaSeparatedList(
filter_names, message_handler());
}
void RewriteDriverFactory::SetRewriteLevel(RewriteOptions::RewriteLevel level) {
options_.SetRewriteLevel(level);
}
void RewriteDriverFactory::set_html_parse_message_handler(
MessageHandler* message_handler) {
html_parse_message_handler_.reset(message_handler);
}
void RewriteDriverFactory::set_message_handler(
MessageHandler* message_handler) {
message_handler_.reset(message_handler);
}
bool RewriteDriverFactory::FetchersComputed() const {
return (url_fetcher_ != NULL) || (url_async_fetcher_ != NULL);
}
void RewriteDriverFactory::set_slurp_directory(const StringPiece& dir) {
CHECK(!FetchersComputed())
<< "Cannot call set_slurp_directory "
<< " after ComputeUrl*Fetcher has been called";
dir.CopyToString(&slurp_directory_);
}
void RewriteDriverFactory::set_slurp_read_only(bool read_only) {
CHECK(!FetchersComputed())
<< "Cannot call set_slurp_read_only "
<< " after ComputeUrl*Fetcher has been called";
slurp_read_only_ = read_only;
}
void RewriteDriverFactory::set_file_system(FileSystem* file_system) {
file_system_.reset(file_system);
}
// TODO(jmarantz): Change this to set_base_url_fetcher
void RewriteDriverFactory::set_base_url_fetcher(UrlFetcher* url_fetcher) {
CHECK(!FetchersComputed())
<< "Cannot call set_base_url_fetcher "
<< " after ComputeUrl*Fetcher has been called";
CHECK(base_url_async_fetcher_.get() == NULL)
<< "Only call one of set_base_url_fetcher and set_base_url_async_fetcher";
base_url_fetcher_.reset(url_fetcher);
}
void RewriteDriverFactory::set_base_url_async_fetcher(
UrlAsyncFetcher* url_async_fetcher) {
CHECK(!FetchersComputed())
<< "Cannot call set_base_url_fetcher "
<< " after ComputeUrl*Fetcher has been called";
CHECK(base_url_fetcher_.get() == NULL)
<< "Only call one of set_base_url_fetcher and set_base_url_async_fetcher";
base_url_async_fetcher_.reset(url_async_fetcher);
}
void RewriteDriverFactory::set_hasher(Hasher* hasher) {
hasher_.reset(hasher);
}
void RewriteDriverFactory::set_timer(Timer* timer) {
timer_.reset(timer);
}
void RewriteDriverFactory::set_filename_encoder(FilenameEncoder* e) {
filename_encoder_.reset(e);
}
MessageHandler* RewriteDriverFactory::html_parse_message_handler() {
if (html_parse_message_handler_ == NULL) {
html_parse_message_handler_.reset(DefaultHtmlParseMessageHandler());
}
return html_parse_message_handler_.get();
}
MessageHandler* RewriteDriverFactory::message_handler() {
if (message_handler_ == NULL) {
message_handler_.reset(DefaultMessageHandler());
}
return message_handler_.get();
}
FileSystem* RewriteDriverFactory::file_system() {
if (file_system_ == NULL) {
file_system_.reset(DefaultFileSystem());
}
return file_system_.get();
}
Timer* RewriteDriverFactory::timer() {
if (timer_ == NULL) {
timer_.reset(DefaultTimer());
}
return timer_.get();
}
Hasher* RewriteDriverFactory::hasher() {
if (hasher_ == NULL) {
hasher_.reset(NewHasher());
}
return hasher_.get();
}
FilenameEncoder* RewriteDriverFactory::filename_encoder() {
if (filename_encoder_ == NULL) {
filename_encoder_.reset(new FilenameEncoder);
}
return filename_encoder_.get();
}
StringPiece RewriteDriverFactory::filename_prefix() {
return filename_prefix_;
}
StringPiece RewriteDriverFactory::url_prefix() {
// Check this lazily, so an application can look at the default value from
// the factory before deciding whether to update it. It's checked before
// use in ComputeResourceManager() below.
return url_prefix_;
}
HTTPCache* RewriteDriverFactory::http_cache() {
if (http_cache_ == NULL) {
CacheInterface* cache = DefaultCacheInterface();
http_cache_.reset(new HTTPCache(cache, timer()));
http_cache_->set_force_caching(force_caching_);
}
return http_cache_.get();
}
ResourceManager* RewriteDriverFactory::ComputeResourceManager() {
if (resource_manager_ == NULL) {
CHECK(!filename_prefix_.empty())
<< "Must specify --filename_prefix or call "
<< "RewriteDriverFactory::set_filename_prefix.";
CHECK(!url_prefix_.empty())
<< "Must specify --url_prefix or call "
<< "RewriteDriverFactory::set_url_prefix.";
resource_manager_.reset(new ResourceManager(
filename_prefix_, url_prefix_, num_shards(),
file_system(), filename_encoder(), ComputeUrlAsyncFetcher(), hasher(),
http_cache(), &domain_lawyer_));
resource_manager_->set_store_outputs_in_file_system(
ShouldWriteResourcesToFileSystem());
}
return resource_manager_.get();
}
RewriteDriver* RewriteDriverFactory::NewCustomRewriteDriver(
const RewriteOptions& options) {
RewriteDriver* rewrite_driver = new RewriteDriver(
message_handler(), file_system(), ComputeUrlAsyncFetcher());
rewrite_driver->SetResourceManager(ComputeResourceManager());
AddPlatformSpecificRewritePasses(rewrite_driver);
rewrite_driver->AddFilters(options);
return rewrite_driver;
}
RewriteDriver* RewriteDriverFactory::NewRewriteDriver() {
RewriteDriver* rewrite_driver = NewCustomRewriteDriver(options_);
{
ScopedMutex lock(rewrite_drivers_mutex());
rewrite_drivers_.push_back(rewrite_driver);
}
return rewrite_driver;
}
void RewriteDriverFactory::AddPlatformSpecificRewritePasses(
RewriteDriver* driver) {
}
UrlFetcher* RewriteDriverFactory::ComputeUrlFetcher() {
if (url_fetcher_ == NULL) {
// Run any hooks like setting up slurp directory.
FetcherSetupHooks();
if (slurp_directory_.empty()) {
if (base_url_fetcher_.get() == NULL) {
url_fetcher_ = DefaultUrlFetcher();
} else {
url_fetcher_ = base_url_fetcher_.get();
}
} else {
SetupSlurpDirectories();
}
}
return url_fetcher_;
}
UrlAsyncFetcher* RewriteDriverFactory::ComputeUrlAsyncFetcher() {
if (url_async_fetcher_ == NULL) {
// Run any hooks like setting up slurp directory.
FetcherSetupHooks();
if (slurp_directory_.empty()) {
if (base_url_async_fetcher_.get() == NULL) {
url_async_fetcher_ = DefaultAsyncUrlFetcher();
} else {
url_async_fetcher_ = base_url_async_fetcher_.get();
}
} else {
SetupSlurpDirectories();
}
}
return url_async_fetcher_;
}
void RewriteDriverFactory::SetupSlurpDirectories() {
CHECK(!FetchersComputed());
if (slurp_read_only_) {
CHECK(!FetchersComputed());
url_fetcher_ = new HttpDumpUrlFetcher(
slurp_directory_, file_system(), timer());
} else {
// Check to see if the factory already had set_base_url_fetcher
// called on it. If so, then we'll want to use that fetcher
// as the mechanism for the dump-writer to retrieve missing
// content from the internet so it can be saved in the slurp
// directory.
url_fetcher_ = base_url_fetcher_.get();
if (url_fetcher_ == NULL) {
url_fetcher_ = DefaultUrlFetcher();
}
url_fetcher_ = new HttpDumpUrlWriter(slurp_directory_, url_fetcher_,
file_system(), timer());
}
// We do not use real async fetches when slurping.
url_async_fetcher_ = new FakeUrlAsyncFetcher(url_fetcher_);
}
void RewriteDriverFactory::FetcherSetupHooks() {
}
void RewriteDriverFactory::ShutDown() {
// Avoid double-destructing the url fetchers if they were not overridden
// programmatically
if ((url_async_fetcher_ != NULL) &&
(url_async_fetcher_ != base_url_async_fetcher_.get())) {
delete url_async_fetcher_;
}
url_async_fetcher_ = NULL;
if ((url_fetcher_ != NULL) && (url_fetcher_ != base_url_fetcher_.get())) {
delete url_fetcher_;
}
url_fetcher_ = NULL;
STLDeleteContainerPointers(rewrite_drivers_.begin(), rewrite_drivers_.end());
rewrite_drivers_.clear();
file_system_.reset(NULL);
hasher_.reset(NULL);
filename_encoder_.reset(NULL);
timer_.reset(NULL);
resource_manager_.reset(NULL);
html_parse_message_handler_.reset(NULL);
http_cache_.reset(NULL);
cache_fetcher_.reset(NULL);
cache_async_fetcher_.reset(NULL);
}
void RewriteDriverFactory::Initialize(Statistics* statistics) {
if (statistics) {
RewriteDriver::Initialize(statistics);
statistics->AddVariable(kInstawebResource404Count);
statistics->AddVariable(kInstawebSlurp404Count);
}
}
void RewriteDriverFactory::Increment404Count() {
Statistics* statistics = resource_manager_->statistics();
if (statistics != NULL) {
if (resource_404_count_ == NULL) {
resource_404_count_ = statistics->GetVariable(kInstawebResource404Count);
}
resource_404_count_->Add(1);
}
}
void RewriteDriverFactory::IncrementSlurpCount() {
Statistics* statistics = resource_manager_->statistics();
if (statistics != NULL) {
if (slurp_404_count_ == NULL) {
slurp_404_count_ = statistics->GetVariable(kInstawebSlurp404Count);
}
slurp_404_count_->Add(1);
}
}
} // namespace net_instaweb