blob: 71dee441fbaeb1c8cccde3a2c995391700c69a3b [file] [log] [blame]
/**
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: jmarantz@google.com (Joshua Marantz)
// and sligocki@google.com (Shawn Ligocki)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_
#include <map>
#include <vector>
#include "base/basictypes.h"
#include "base/scoped_ptr.h"
#include "net/instaweb/util/public/http_cache.h"
#include "net/instaweb/util/public/meta_data.h"
#include "net/instaweb/rewriter/public/resource.h"
#include <string>
#include "net/instaweb/util/public/string_util.h"
#include "net/instaweb/util/public/url_async_fetcher.h"
#include "net/instaweb/util/public/url_segment_encoder.h"
class GURL;
namespace net_instaweb {
class ContentType;
class DomainLawyer;
class FileSystem;
class FilenameEncoder;
class HTTPCache;
class HTTPValue;
class Hasher;
class MessageHandler;
class MetaData;
class OutputResource;
class ResourceNamer;
class Statistics;
class UrlAsyncFetcher;
class UrlEscaper;
class Variable;
class Writer;
class ResourceManager {
public:
static const int kNotSharded;
ResourceManager(const StringPiece& file_prefix,
const StringPiece& url_prefix_pattern,
const int num_shards,
FileSystem* file_system,
FilenameEncoder* filename_encoder,
UrlAsyncFetcher* url_async_fetcher,
Hasher* hasher,
HTTPCache* http_cache,
DomainLawyer* domain_lawyer);
~ResourceManager();
// Initialize statistics gathering.
static void Initialize(Statistics* statistics);
// Created resources are managed by ResourceManager and eventually deleted by
// ResourceManager's destructor. Every time a Create...Resource... method is
// called, a fresh Resource object is generated (or the creation fails and
// NULL is returned). All content_type arguments can be NULL if the content
// type isn't known or isn't covered by the ContentType library. Where
// necessary, the extension is used to infer a content type if one is needed
// and none is provided. It is faster and more reliable to provide one
// explicitly when it is known.
// Constructs an output resource corresponding to the specified input resource
// and encoded using the provided encoder. Assumes permissions checking
// occurred when the input resource was constructed, and does not do it again.
// To avoid if-chains, tolerates a NULL input_resource (by returning NULL).
// TODO(jmaessen, jmarantz): Do we want to permit NULL input_resources here?
// jmarantz has evinced a distaste.
OutputResource* CreateOutputResourceFromResource(
const StringPiece& filter_prefix,
const ContentType* content_type,
UrlSegmentEncoder* encoder,
Resource* input_resource,
MessageHandler* handler);
// Constructs and permissions-checks an output resource for the specified url,
// which occurs in the context of document_gurl. Returns NULL on failure.
// The content_type argument cannot be NULL. The resource name will be
// encoded using the provided encoder.
OutputResource* CreateOutputResourceForRewrittenUrl(
const GURL& document_gurl,
const StringPiece& filter_prefix,
const StringPiece& resource_url,
const ContentType* content_type,
UrlSegmentEncoder* encoder,
MessageHandler* handler);
// Creates an output resource where the name is provided by the rewriter.
// The intent is to be able to derive the content from the name, for example,
// by encoding URLs and metadata.
//
// This method is not dependent on shared persistent storage, and always
// succeeds.
//
// This name is prepended with path for writing hrefs, and the resulting url
// is encoded and stored at file_prefix when working with the file system. So
// hrefs are:
// $(PATH)/$(FILTER_PREFIX).$(HASH).$(NAME).$(CONTENT_TYPE_EXT)
//
// 'type' arg can be null if it's not known, or is not in our ContentType
// library.
OutputResource* CreateOutputResourceWithPath(
const StringPiece& path, const StringPiece& filter_prefix,
const StringPiece& name, const ContentType* type,
MessageHandler* handler);
// Creates a resource based on a URL. This is used for serving rewritten
// resources. No permission checks are performed on the url, though it
// is parsed to see if it looks like the url of a generated resource (which
// should mean checking the hash to ensure we generated it ourselves).
// TODO(jmaessen): add url hash & check thereof.
OutputResource* CreateOutputResourceForFetch(
const StringPiece& url,
MessageHandler* handler);
// Creates an input resource with the url evaluated based on input_url
// which may need to be absolutified relative to base_url. Returns NULL if
// the input resource url isn't valid, or can't legally be rewritten in the
// context of this page.
Resource* CreateInputResource(const GURL& base_url,
const StringPiece& input_url,
MessageHandler* handler);
// Create input resource from input_url, if it is legal in the context of
// base_gurl, and if the resource can be read from cache. If it's not in
// cache, initiate an asynchronous fetch so it will be on next access. This
// is a common case for filters.
Resource* CreateInputResourceAndReadIfCached(const GURL& base_gurl,
const StringPiece& input_url,
MessageHandler* handler);
// Create an input resource by decoding output_resource using the given
// encoder. Assures legality by checking hash signatures, rather than
// explicitly permission-checking the result.
Resource* CreateInputResourceFromOutputResource(
UrlSegmentEncoder* encoder,
OutputResource* output_resource,
MessageHandler* handler);
// Creates an input resource from the given absolute url. Requires that the
// provided url has been checked, and can legally be rewritten in the current
// page context. If you have a GURL, prefer CreateInputResourceUnchecked,
// otherwise use this.
Resource* CreateInputResourceAbsolute(const StringPiece& absolute_url,
MessageHandler* handler);
// Creates an input resource with the given gurl, already absolute and valid.
// Use only for resource fetches that lack a page context, or in places where
// permission checking has been done explicitly on the caller side (for
// example css_combine_filter, which constructs its own url_partnership).
Resource* CreateInputResourceUnchecked(const GURL& gurl,
MessageHandler* handler);
// Set up a basic header for a given content_type.
// If content_type is null, the Content-Type is omitted.
// This method may only be called once on a header.
void SetDefaultHeaders(const ContentType* content_type,
MetaData* header) const;
// Changes the content type of a pre-initialized header.
void SetContentType(const ContentType* content_type, MetaData* header);
StringPiece filename_prefix() const { return file_prefix_; }
// Sets the URL prefix pattern. The pattern must have exactly one %d
// in it, if num_shards is not 0. If num shards is 0, then it should
// not have any % characters in it.
void SetUrlPrefixPattern(const StringPiece& url_prefix_pattern);
void set_filename_prefix(const StringPiece& file_prefix);
Statistics* statistics() const { return statistics_; }
void set_statistics(Statistics* s) {
statistics_ = s;
resource_url_domain_rejections_ = NULL; // Lazily initialized.
}
void set_relative_path(bool x) { relative_path_ = x; }
bool FetchOutputResource(
OutputResource* output_resource,
Writer* writer, MetaData* response_headers,
MessageHandler* handler) const;
// Writes the specified contents into the output resource, retaining
// both a name->filename map and the filename->contents map.
//
// TODO(jmarantz): add last_modified arg.
bool Write(HttpStatus::Code status_code,
const StringPiece& contents, OutputResource* output,
int64 origin_expire_time_ms, MessageHandler* handler);
// Read resource contents & headers, returning false if the resource
// is not already cached, in which case an async request is queued.
// The Resource remains owned by the caller.
bool ReadIfCached(Resource* resource, MessageHandler* message_handler) const;
// Read contents of resource asynchronously, calling callback when
// done. If the resource contents is cached, the callback will
// be called directly, rather than asynchronously. The Resource
// will be passed to the callback, which will be responsible for
// ultimately freeing the resource. The Resource will have its
// contents and headers filled in.
//
// The resource can be deleted only after the callback is called.
void ReadAsync(Resource* resource, Resource::AsyncCallback* callback,
MessageHandler* message_handler);
// TODO(jmarantz): check thread safety in Apache.
Hasher* hasher() { return hasher_; }
FileSystem* file_system() { return file_system_; }
FilenameEncoder* filename_encoder() const { return filename_encoder_; }
UrlAsyncFetcher* url_async_fetcher() { return url_async_fetcher_; }
Timer* timer() { return http_cache_->timer(); }
HTTPCache* http_cache() { return http_cache_; }
UrlEscaper* url_escaper() { return url_escaper_.get(); }
int num_shards() const { return num_shards_; }
// Given a ResourceNamer, generates the prefix (everything but the file name)
// for the corresponding URL.
std::string UrlPrefixFor(const ResourceNamer& namer) const;
// Whether or not resources should hit the filesystem.
bool store_outputs_in_file_system() { return store_outputs_in_file_system_; }
void set_store_outputs_in_file_system(bool store) {
store_outputs_in_file_system_ = store;
}
DomainLawyer* domain_lawyer() { return domain_lawyer_; }
const DomainLawyer* domain_lawyer() const { return domain_lawyer_; }
private:
inline void IncrementResourceUrlDomainRejections();
std::string ConstructNameKey(const OutputResource& output) const;
void ValidateShardsAgainstUrlPrefixPattern();
std::string CanonicalizeBase(const StringPiece& base, int* shard) const;
std::string file_prefix_;
std::string url_prefix_pattern_;
const int num_shards_;
int resource_id_; // Sequential ids for temporary Resource filenames.
FileSystem* file_system_;
FilenameEncoder* filename_encoder_;
UrlAsyncFetcher* url_async_fetcher_;
Hasher* hasher_;
Statistics* statistics_;
Variable* resource_url_domain_rejections_;
HTTPCache* http_cache_;
scoped_ptr<UrlEscaper> url_escaper_;
bool relative_path_;
bool store_outputs_in_file_system_;
DomainLawyer* domain_lawyer_;
DISALLOW_COPY_AND_ASSIGN(ResourceManager);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_