src/net/instaweb/rewriter/public/resource_manager.h - incubator-pagespeed-mod - Git at Google

 /**
  * Copyright 2010 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Author: jmarantz@google.com (Joshua Marantz)
 //     and sligocki@google.com (Shawn Ligocki)

 #ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_
 #define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_

 #include <map>
 #include <vector>
 #include "base/basictypes.h"
 #include "base/scoped_ptr.h"
 #include "net/instaweb/util/public/http_cache.h"
 #include "net/instaweb/util/public/meta_data.h"
 #include "net/instaweb/rewriter/public/resource.h"
 #include <string>
 #include "net/instaweb/util/public/string_util.h"
 #include "net/instaweb/util/public/url_async_fetcher.h"
 #include "net/instaweb/util/public/url_segment_encoder.h"

 class GURL;

 namespace net_instaweb {

 class ContentType;
 class DomainLawyer;
 class FileSystem;
 class FilenameEncoder;
 class HTTPCache;
 class HTTPValue;
 class Hasher;
 class MessageHandler;
 class MetaData;
 class OutputResource;
 class ResourceNamer;
 class Statistics;
 class UrlAsyncFetcher;
 class UrlEscaper;
 class Variable;
 class Writer;

 class ResourceManager {
  public:
   static const int kNotSharded;

   ResourceManager(const StringPiece& file_prefix,
                   const StringPiece& url_prefix_pattern,
                   const int num_shards,
                   FileSystem* file_system,
                   FilenameEncoder* filename_encoder,
                   UrlAsyncFetcher* url_async_fetcher,
                   Hasher* hasher,
                   HTTPCache* http_cache,
                   DomainLawyer* domain_lawyer);
   ~ResourceManager();

   // Initialize statistics gathering.
   static void Initialize(Statistics* statistics);

   // Created resources are managed by ResourceManager and eventually deleted by
   // ResourceManager's destructor.  Every time a Create...Resource... method is
   // called, a fresh Resource object is generated (or the creation fails and
   // NULL is returned).  All content_type arguments can be NULL if the content
   // type isn't known or isn't covered by the ContentType library.  Where
   // necessary, the extension is used to infer a content type if one is needed
   // and none is provided.  It is faster and more reliable to provide one
   // explicitly when it is known.

   // Constructs an output resource corresponding to the specified input resource
   // and encoded using the provided encoder.  Assumes permissions checking
   // occurred when the input resource was constructed, and does not do it again.
   // To avoid if-chains, tolerates a NULL input_resource (by returning NULL).
   // TODO(jmaessen, jmarantz): Do we want to permit NULL input_resources here?
   // jmarantz has evinced a distaste.
   OutputResource* CreateOutputResourceFromResource(
       const StringPiece& filter_prefix,
       const ContentType* content_type,
       UrlSegmentEncoder* encoder,
       Resource* input_resource,
       MessageHandler* handler);

   // Constructs and permissions-checks an output resource for the specified url,
   // which occurs in the context of document_gurl.  Returns NULL on failure.
   // The content_type argument cannot be NULL.  The resource name will be
   // encoded using the provided encoder.
   OutputResource* CreateOutputResourceForRewrittenUrl(
       const GURL& document_gurl,
       const StringPiece& filter_prefix,
       const StringPiece& resource_url,
       const ContentType* content_type,
       UrlSegmentEncoder* encoder,
       MessageHandler* handler);

   // Creates an output resource where the name is provided by the rewriter.
   // The intent is to be able to derive the content from the name, for example,
   // by encoding URLs and metadata.
   //
   // This method is not dependent on shared persistent storage, and always
   // succeeds.
   //
   // This name is prepended with path for writing hrefs, and the resulting url
   // is encoded and stored at file_prefix when working with the file system.  So
   // hrefs are:
   //    $(PATH)/$(FILTER_PREFIX).$(HASH).$(NAME).$(CONTENT_TYPE_EXT)
   //
   // 'type' arg can be null if it's not known, or is not in our ContentType
   // library.
   OutputResource* CreateOutputResourceWithPath(
       const StringPiece& path, const StringPiece& filter_prefix,
       const StringPiece& name,  const ContentType* type,
       MessageHandler* handler);

   // Creates a resource based on a URL.  This is used for serving rewritten
   // resources.  No permission checks are performed on the url, though it
   // is parsed to see if it looks like the url of a generated resource (which
   // should mean checking the hash to ensure we generated it ourselves).
   // TODO(jmaessen): add url hash & check thereof.
   OutputResource* CreateOutputResourceForFetch(
       const StringPiece& url,
       MessageHandler* handler);

   // Creates an input resource with the url evaluated based on input_url
   // which may need to be absolutified relative to base_url.  Returns NULL if
   // the input resource url isn't valid, or can't legally be rewritten in the
   // context of this page.
   Resource* CreateInputResource(const GURL& base_url,
                                 const StringPiece& input_url,
                                 MessageHandler* handler);

   // Create input resource from input_url, if it is legal in the context of
   // base_gurl, and if the resource can be read from cache.  If it's not in
   // cache, initiate an asynchronous fetch so it will be on next access.  This
   // is a common case for filters.
   Resource* CreateInputResourceAndReadIfCached(const GURL& base_gurl,
                                                const StringPiece& input_url,
                                                MessageHandler* handler);

   // Create an input resource by decoding output_resource using the given
   // encoder.  Assures legality by checking hash signatures, rather than
   // explicitly permission-checking the result.
   Resource* CreateInputResourceFromOutputResource(
     UrlSegmentEncoder* encoder,
     OutputResource* output_resource,
     MessageHandler* handler);

   // Creates an input resource from the given absolute url.  Requires that the
   // provided url has been checked, and can legally be rewritten in the current
   // page context.  If you have a GURL, prefer CreateInputResourceUnchecked,
   // otherwise use this.
   Resource* CreateInputResourceAbsolute(const StringPiece& absolute_url,
                                         MessageHandler* handler);

   // Creates an input resource with the given gurl, already absolute and valid.
   // Use only for resource fetches that lack a page context, or in places where
   // permission checking has been done explicitly on the caller side (for
   // example css_combine_filter, which constructs its own url_partnership).
   Resource* CreateInputResourceUnchecked(const GURL& gurl,
                                          MessageHandler* handler);

   // Set up a basic header for a given content_type.
   // If content_type is null, the Content-Type is omitted.
   // This method may only be called once on a header.
   void SetDefaultHeaders(const ContentType* content_type,
                          MetaData* header) const;

   // Changes the content type of a pre-initialized header.
   void SetContentType(const ContentType* content_type, MetaData* header);

   StringPiece filename_prefix() const { return file_prefix_; }

   // Sets the URL prefix pattern.  The pattern must have exactly one %d
   // in it, if num_shards is not 0.  If num shards is 0, then it should
   // not have any % characters in it.
   void SetUrlPrefixPattern(const StringPiece& url_prefix_pattern);

   void set_filename_prefix(const StringPiece& file_prefix);
   Statistics* statistics() const { return statistics_; }
   void set_statistics(Statistics* s) {
     statistics_ = s;
     resource_url_domain_rejections_ = NULL;  // Lazily initialized.
   }
   void set_relative_path(bool x) { relative_path_ = x; }

   bool FetchOutputResource(
     OutputResource* output_resource,
     Writer* writer, MetaData* response_headers,
     MessageHandler* handler) const;

   // Writes the specified contents into the output resource, retaining
   // both a name->filename map and the filename->contents map.
   //
   // TODO(jmarantz): add last_modified arg.
   bool Write(HttpStatus::Code status_code,
              const StringPiece& contents, OutputResource* output,
              int64 origin_expire_time_ms, MessageHandler* handler);

   // Read resource contents & headers, returning false if the resource
   // is not already cached, in which case an async request is queued.
   // The Resource remains owned by the caller.
   bool ReadIfCached(Resource* resource, MessageHandler* message_handler) const;

   // Read contents of resource asynchronously, calling callback when
   // done.  If the resource contents is cached, the callback will
   // be called directly, rather than asynchronously.  The Resource
   // will be passed to the callback, which will be responsible for
   // ultimately freeing the resource.  The Resource will have its
   // contents and headers filled in.
   //
   // The resource can be deleted only after the callback is called.
   void ReadAsync(Resource* resource, Resource::AsyncCallback* callback,
                  MessageHandler* message_handler);

   // TODO(jmarantz): check thread safety in Apache.
   Hasher* hasher() { return hasher_; }
   FileSystem* file_system() { return file_system_; }
   FilenameEncoder* filename_encoder() const { return filename_encoder_; }
   UrlAsyncFetcher* url_async_fetcher() { return url_async_fetcher_; }
   Timer* timer() { return http_cache_->timer(); }
   HTTPCache* http_cache() { return http_cache_; }
   UrlEscaper* url_escaper() { return url_escaper_.get(); }
   int num_shards() const { return num_shards_; }

   // Given a ResourceNamer, generates the prefix (everything but the file name)
   // for the corresponding URL.
   std::string UrlPrefixFor(const ResourceNamer& namer) const;

   // Whether or not resources should hit the filesystem.
   bool store_outputs_in_file_system() { return store_outputs_in_file_system_; }
   void set_store_outputs_in_file_system(bool store) {
     store_outputs_in_file_system_ = store;
   }

   DomainLawyer* domain_lawyer() { return domain_lawyer_; }
   const DomainLawyer* domain_lawyer() const { return domain_lawyer_; }

  private:
   inline void IncrementResourceUrlDomainRejections();
   std::string ConstructNameKey(const OutputResource& output) const;
   void ValidateShardsAgainstUrlPrefixPattern();
   std::string CanonicalizeBase(const StringPiece& base, int* shard) const;

   std::string file_prefix_;
   std::string url_prefix_pattern_;
   const int num_shards_;
   int resource_id_;  // Sequential ids for temporary Resource filenames.
   FileSystem* file_system_;
   FilenameEncoder* filename_encoder_;
   UrlAsyncFetcher* url_async_fetcher_;
   Hasher* hasher_;
   Statistics* statistics_;
   Variable* resource_url_domain_rejections_;
   HTTPCache* http_cache_;
   scoped_ptr<UrlEscaper> url_escaper_;
   bool relative_path_;
   bool store_outputs_in_file_system_;
   DomainLawyer* domain_lawyer_;

   DISALLOW_COPY_AND_ASSIGN(ResourceManager);
 };

 }  // namespace net_instaweb

 #endif  // NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_
	/**
	* Copyright 2010 Google Inc.
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	// Author: jmarantz@google.com (Joshua Marantz)
	// and sligocki@google.com (Shawn Ligocki)

	#ifndef NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_
	#define NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_

	#include <map>
	#include <vector>
	#include "base/basictypes.h"
	#include "base/scoped_ptr.h"
	#include "net/instaweb/util/public/http_cache.h"
	#include "net/instaweb/util/public/meta_data.h"
	#include "net/instaweb/rewriter/public/resource.h"
	#include <string>
	#include "net/instaweb/util/public/string_util.h"
	#include "net/instaweb/util/public/url_async_fetcher.h"
	#include "net/instaweb/util/public/url_segment_encoder.h"

	class GURL;

	namespace net_instaweb {

	class ContentType;
	class DomainLawyer;
	class FileSystem;
	class FilenameEncoder;
	class HTTPCache;
	class HTTPValue;
	class Hasher;
	class MessageHandler;
	class MetaData;
	class OutputResource;
	class ResourceNamer;
	class Statistics;
	class UrlAsyncFetcher;
	class UrlEscaper;
	class Variable;
	class Writer;

	class ResourceManager {
	public:
	static const int kNotSharded;

	ResourceManager(const StringPiece& file_prefix,
	const StringPiece& url_prefix_pattern,
	const int num_shards,
	FileSystem* file_system,
	FilenameEncoder* filename_encoder,
	UrlAsyncFetcher* url_async_fetcher,
	Hasher* hasher,
	HTTPCache* http_cache,
	DomainLawyer* domain_lawyer);
	~ResourceManager();

	// Initialize statistics gathering.
	static void Initialize(Statistics* statistics);

	// Created resources are managed by ResourceManager and eventually deleted by
	// ResourceManager's destructor. Every time a Create...Resource... method is
	// called, a fresh Resource object is generated (or the creation fails and
	// NULL is returned). All content_type arguments can be NULL if the content
	// type isn't known or isn't covered by the ContentType library. Where
	// necessary, the extension is used to infer a content type if one is needed
	// and none is provided. It is faster and more reliable to provide one
	// explicitly when it is known.

	// Constructs an output resource corresponding to the specified input resource
	// and encoded using the provided encoder. Assumes permissions checking
	// occurred when the input resource was constructed, and does not do it again.
	// To avoid if-chains, tolerates a NULL input_resource (by returning NULL).
	// TODO(jmaessen, jmarantz): Do we want to permit NULL input_resources here?
	// jmarantz has evinced a distaste.
	OutputResource* CreateOutputResourceFromResource(
	const StringPiece& filter_prefix,
	const ContentType* content_type,
	UrlSegmentEncoder* encoder,
	Resource* input_resource,
	MessageHandler* handler);

	// Constructs and permissions-checks an output resource for the specified url,
	// which occurs in the context of document_gurl. Returns NULL on failure.
	// The content_type argument cannot be NULL. The resource name will be
	// encoded using the provided encoder.
	OutputResource* CreateOutputResourceForRewrittenUrl(
	const GURL& document_gurl,
	const StringPiece& filter_prefix,
	const StringPiece& resource_url,
	const ContentType* content_type,
	UrlSegmentEncoder* encoder,
	MessageHandler* handler);

	// Creates an output resource where the name is provided by the rewriter.
	// The intent is to be able to derive the content from the name, for example,
	// by encoding URLs and metadata.
	//
	// This method is not dependent on shared persistent storage, and always
	// succeeds.
	//
	// This name is prepended with path for writing hrefs, and the resulting url
	// is encoded and stored at file_prefix when working with the file system. So
	// hrefs are:
	// $(PATH)/$(FILTER_PREFIX).$(HASH).$(NAME).$(CONTENT_TYPE_EXT)
	//
	// 'type' arg can be null if it's not known, or is not in our ContentType
	// library.
	OutputResource* CreateOutputResourceWithPath(
	const StringPiece& path, const StringPiece& filter_prefix,
	const StringPiece& name, const ContentType* type,
	MessageHandler* handler);

	// Creates a resource based on a URL. This is used for serving rewritten
	// resources. No permission checks are performed on the url, though it
	// is parsed to see if it looks like the url of a generated resource (which
	// should mean checking the hash to ensure we generated it ourselves).
	// TODO(jmaessen): add url hash & check thereof.
	OutputResource* CreateOutputResourceForFetch(
	const StringPiece& url,
	MessageHandler* handler);

	// Creates an input resource with the url evaluated based on input_url
	// which may need to be absolutified relative to base_url. Returns NULL if
	// the input resource url isn't valid, or can't legally be rewritten in the
	// context of this page.
	Resource* CreateInputResource(const GURL& base_url,
	const StringPiece& input_url,
	MessageHandler* handler);

	// Create input resource from input_url, if it is legal in the context of
	// base_gurl, and if the resource can be read from cache. If it's not in
	// cache, initiate an asynchronous fetch so it will be on next access. This
	// is a common case for filters.
	Resource* CreateInputResourceAndReadIfCached(const GURL& base_gurl,
	const StringPiece& input_url,
	MessageHandler* handler);

	// Create an input resource by decoding output_resource using the given
	// encoder. Assures legality by checking hash signatures, rather than
	// explicitly permission-checking the result.
	Resource* CreateInputResourceFromOutputResource(
	UrlSegmentEncoder* encoder,
	OutputResource* output_resource,
	MessageHandler* handler);

	// Creates an input resource from the given absolute url. Requires that the
	// provided url has been checked, and can legally be rewritten in the current
	// page context. If you have a GURL, prefer CreateInputResourceUnchecked,
	// otherwise use this.
	Resource* CreateInputResourceAbsolute(const StringPiece& absolute_url,
	MessageHandler* handler);

	// Creates an input resource with the given gurl, already absolute and valid.
	// Use only for resource fetches that lack a page context, or in places where
	// permission checking has been done explicitly on the caller side (for
	// example css_combine_filter, which constructs its own url_partnership).
	Resource* CreateInputResourceUnchecked(const GURL& gurl,
	MessageHandler* handler);

	// Set up a basic header for a given content_type.
	// If content_type is null, the Content-Type is omitted.
	// This method may only be called once on a header.
	void SetDefaultHeaders(const ContentType* content_type,
	MetaData* header) const;

	// Changes the content type of a pre-initialized header.
	void SetContentType(const ContentType* content_type, MetaData* header);

	StringPiece filename_prefix() const { return file_prefix_; }

	// Sets the URL prefix pattern. The pattern must have exactly one %d
	// in it, if num_shards is not 0. If num shards is 0, then it should
	// not have any % characters in it.
	void SetUrlPrefixPattern(const StringPiece& url_prefix_pattern);

	void set_filename_prefix(const StringPiece& file_prefix);
	Statistics* statistics() const { return statistics_; }
	void set_statistics(Statistics* s) {
	statistics_ = s;
	resource_url_domain_rejections_ = NULL; // Lazily initialized.
	}
	void set_relative_path(bool x) { relative_path_ = x; }

	bool FetchOutputResource(
	OutputResource* output_resource,
	Writer* writer, MetaData* response_headers,
	MessageHandler* handler) const;

	// Writes the specified contents into the output resource, retaining
	// both a name->filename map and the filename->contents map.
	//
	// TODO(jmarantz): add last_modified arg.
	bool Write(HttpStatus::Code status_code,
	const StringPiece& contents, OutputResource* output,
	int64 origin_expire_time_ms, MessageHandler* handler);

	// Read resource contents & headers, returning false if the resource
	// is not already cached, in which case an async request is queued.
	// The Resource remains owned by the caller.
	bool ReadIfCached(Resource* resource, MessageHandler* message_handler) const;

	// Read contents of resource asynchronously, calling callback when
	// done. If the resource contents is cached, the callback will
	// be called directly, rather than asynchronously. The Resource
	// will be passed to the callback, which will be responsible for
	// ultimately freeing the resource. The Resource will have its
	// contents and headers filled in.
	//
	// The resource can be deleted only after the callback is called.
	void ReadAsync(Resource* resource, Resource::AsyncCallback* callback,
	MessageHandler* message_handler);

	// TODO(jmarantz): check thread safety in Apache.
	Hasher* hasher() { return hasher_; }
	FileSystem* file_system() { return file_system_; }
	FilenameEncoder* filename_encoder() const { return filename_encoder_; }
	UrlAsyncFetcher* url_async_fetcher() { return url_async_fetcher_; }
	Timer* timer() { return http_cache_->timer(); }
	HTTPCache* http_cache() { return http_cache_; }
	UrlEscaper* url_escaper() { return url_escaper_.get(); }
	int num_shards() const { return num_shards_; }

	// Given a ResourceNamer, generates the prefix (everything but the file name)
	// for the corresponding URL.
	std::string UrlPrefixFor(const ResourceNamer& namer) const;

	// Whether or not resources should hit the filesystem.
	bool store_outputs_in_file_system() { return store_outputs_in_file_system_; }
	void set_store_outputs_in_file_system(bool store) {
	store_outputs_in_file_system_ = store;
	}

	DomainLawyer* domain_lawyer() { return domain_lawyer_; }
	const DomainLawyer* domain_lawyer() const { return domain_lawyer_; }

	private:
	inline void IncrementResourceUrlDomainRejections();
	std::string ConstructNameKey(const OutputResource& output) const;
	void ValidateShardsAgainstUrlPrefixPattern();
	std::string CanonicalizeBase(const StringPiece& base, int* shard) const;

	std::string file_prefix_;
	std::string url_prefix_pattern_;
	const int num_shards_;
	int resource_id_; // Sequential ids for temporary Resource filenames.
	FileSystem* file_system_;
	FilenameEncoder* filename_encoder_;
	UrlAsyncFetcher* url_async_fetcher_;
	Hasher* hasher_;
	Statistics* statistics_;
	Variable* resource_url_domain_rejections_;
	HTTPCache* http_cache_;
	scoped_ptr<UrlEscaper> url_escaper_;
	bool relative_path_;
	bool store_outputs_in_file_system_;
	DomainLawyer* domain_lawyer_;

	DISALLOW_COPY_AND_ASSIGN(ResourceManager);
	};

	} // namespace net_instaweb

	#endif // NET_INSTAWEB_REWRITER_PUBLIC_RESOURCE_MANAGER_H_