blob: fff133e7d723d5512a49dc5cf8a2c1a01d4fc775 [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: sligocki@google.com (Shawn Ligocki)
//
// Output resources are created by a RewriteDriver. They must be able to
// write contents and return their url (so that it can be href'd on a page).
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_OUTPUT_RESOURCE_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_OUTPUT_RESOURCE_H_
#include "base/logging.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_namer.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
namespace net_instaweb {
class CachedResult;
class MessageHandler;
class RewriteDriver;
class RewriteOptions;
class Writer;
struct ContentType;
class OutputResource : public Resource {
public:
// Construct an OutputResource. For the moment, we pass in type redundantly
// even though full_name embeds an extension. This reflects current code
// structure rather than a principled stand on anything.
// TODO(jmaessen): remove redundancy.
//
// The 'options' argument can be NULL. This is done in the Fetch path because
// that field is only used for domain sharding, and during the fetch, further
// domain makes no sense.
OutputResource(const RewriteDriver* driver,
StringPiece resolved_base,
StringPiece unmapped_base, /* aka source domain */
StringPiece original_base, /* aka cnamed domain */
const ResourceNamer& resource_id,
OutputResourceKind kind);
virtual void LoadAndCallback(NotCacheablePolicy not_cacheable_policy,
const RequestContextPtr& request_context,
AsyncCallback* callback);
// NOTE: url() will crash if resource has does not have a hash set yet.
// Specifically, this will occur if the resource has not been completely
// written yet. Before that point, the final URL cannot be known.
//
// Note: the OutputResource will never have a query string, even when
// ModPagespeedAddOptionsToUrls is on.
virtual GoogleString url() const;
// Returns the same as url(), but with a spoofed hash in case no hash
// was set yet. Use this for error reporting, etc. where you do not
// know whether the output resource has a valid hash yet.
GoogleString UrlEvenIfHashNotSet();
// Save resource contents to disk, for testing and debugging purposes.
// Precondition: the resource contents must be fully set.
// The resource will be saved under the resource manager's filename_prefix()
// using with URL escaped using its filename_encoder().
void DumpToDisk(MessageHandler* handler);
// Update the passed in CachedResult from the CachedResult in this
// OutputResource.
void UpdateCachedResultPreservingInputInfo(CachedResult* to_update) const;
// The NameKey describes the source url and rewriter used, without hash and
// content type information. This is used to find previously-computed filter
// results whose output hash and content type is unknown. The full name of a
// resource is of the form
// path/prefix.encoded_resource_name.hash.extension
// we know prefix and name, but not the hash, and we don't always even have
// the extension, which might have changes as the result of, for example image
// optimization (e.g. gif->png). But We can "remember" the hash/extension for
// as long as the origin URL was cacheable. So we construct this as a key:
// path/prefix.encoded_resource_name
// and use that to map to the hash-code and extension. If we know the
// hash-code then we may also be able to look up the contents in the same
// cache.
virtual GoogleString name_key() const;
// Builds a canonical URL in a form for use with the HTTP cache.
// The DomainLawyer from options is used to find the proper domain in case
// there is a fetch for the unsharded form, or the wrong shard.
//
// For example, if you have a resource styles.css
// ModPagespeedMapRewriteDomain master alias
// ModPagespeedShardDomain master shard1,shard2
// then all HTTP cache puts/gets will use the key "http://master/style.css",
// which can be obtained from an output resource using this method.
GoogleString HttpCacheKey() const;
// output-specific
const GoogleString& resolved_base() const { return resolved_base_; }
const GoogleString& unmapped_base() const { return unmapped_base_; }
const GoogleString& original_base() const { return original_base_; }
const ResourceNamer& full_name() const { return full_name_; }
ResourceNamer* mutable_full_name() { return &full_name_; }
StringPiece name() const { return full_name_.name(); }
StringPiece experiment() const { return full_name_.experiment(); }
StringPiece suffix() const;
StringPiece filter_prefix() const { return full_name_.id(); }
StringPiece hash() const { return full_name_.hash(); }
StringPiece signature() const { return full_name_.signature(); }
bool has_hash() const { return !hash().empty(); }
void clear_hash() {
full_name_.ClearHash();
computed_url_.clear();
}
// Some output resources have mangled names derived from input resource(s),
// such as when combining CSS files. When we need to regenerate the output
// resource given just its URL we need to convert the URL back to its
// constituent input resource URLs. Our url() method can return a modified
// version of the input resources' host and path if our resource manager
// has a non-standard url_namer(), so when trying to regenerate the input
// resources' URL we need to reverse that modification. Note that the
// default UrlNamer class doesn't do any modification, and that the decoding
// of the leaf names is done separetly by the UrlMultipartEncoder class.
GoogleString decoded_base() const;
// In a scalable installation where the sprites must be kept in a
// database, we cannot serve HTML that references new resources
// that have not been committed yet, and committing to a database
// may take too long to block on the HTML rewrite. So we will want
// to refactor this to check to see whether the desired resource is
// already known. For now we'll assume we can commit to serving the
// resource during the HTML rewriter.
bool IsWritten() const { return writing_complete_; }
// Sets the type of the output resource, and thus also its suffix.
virtual void SetType(const ContentType* type);
// Whenever output resources are created via RewriteDriver
// (except DecodeOutputResource) it looks up cached
// information on any previous creation of that resource, including
// the full filename and any filter-specific metadata. If such
// information is available, this method will return non-NULL.
//
// Note: cached_result() will also be non-NULL if you explicitly
// create the result from a filter by calling EnsureCachedResultCreated()
//
// The output is const because we do not check that the CachedResult has not
// been written. If you want to modify the CachedResult, use
// EnsureCachedResultCreated instead.
const CachedResult* cached_result() const { return cached_result_; }
// If there is no cached output information, creates an empty one,
// without any information filled in (so no url(), or timestamps).
//
// The primary use of this method is to let filters store any metadata they
// want before calling RewriteDriver::Write.
// This never returns null.
// We will DCHECK that the cached result has not been written.
CachedResult* EnsureCachedResultCreated();
void clear_cached_result();
// Sets the cached-result to an already-existing, externally owned
// buffer. We need to make sure not to free it on destruction.
void set_cached_result(CachedResult* cached_result) {
clear_cached_result();
cached_result_ = cached_result;
}
OutputResourceKind kind() const { return kind_; }
// This is called by CacheCallback::Done in rewrite_driver.cc.
void SetWritten(bool written) { writing_complete_ = true; }
virtual const RewriteOptions* rewrite_options() const {
return rewrite_options_;
}
// Interface for directly setting the value of the resource.
// It must not have been set otherwise! The return value of
// BeginWrite is owned by this OutputResource.
Writer* BeginWrite(MessageHandler* message_handler);
void EndWrite(MessageHandler* message_handler);
virtual bool UseHttpCache() const { return true; }
// Extra suffix to be added to Cache-Control in the response headers
// when serving the response. E.g. a filter might want to set
// no-transform on its output.
const GoogleString& cache_control_suffix() const {
return cache_control_suffix_;
}
void set_cache_control_suffix(const GoogleString& x) {
DCHECK(cache_control_suffix_.empty());
cache_control_suffix_ = x;
}
protected:
virtual ~OutputResource();
REFCOUNT_FRIEND_DECLARATION(OutputResource);
private:
friend class RewriteDriver;
friend class ServerContext;
friend class ServerContextTest;
void SetHash(StringPiece hash);
StringPiece extension() const { return full_name_.ext(); }
GoogleString ComputeSignature();
bool CheckSignature();
// Name of the file used by DumpToDisk.
GoogleString DumpFileName() const;
bool writing_complete_;
// TODO(jmarantz): We have a complicated semantic for CachedResult
// ownership as we transition from rewriting inline while html parsing
// to rewriting asynchronously. In the asynchronous world, the
// CachedResult object will be owned at a higher level. So it is not
// safe to call cached_result_.release() or .reset() directly. Instead,
// go through the clear_cached_result() method.
bool cached_result_owned_;
CachedResult* cached_result_;
// The resolved_base_ is the domain as reported by UrlPartnership.
// It takes into account domain-mapping via ModPagespeedMapRewriteDomain.
// However, the resolved base is not affected by sharding. Shard-selection
// is done when url() is called, relying on the content hash.
// The unmapped_base_ is the same domain as resolved_base_ but before domain
// mapping was applied; it is also known as the source domain since it is
// the domain of the resource's link.
// The original_base_ is the domain of the page that contains the resource
// link; it is also known as the CNAMEd domain since the page's URL is
// one that we manage and is one that we are rwriting.
// For example, given an HTML page with URL http://www.example.com/index.html
// containing elements "<base href='http://static.example.com/'>" and
// "<link rel='stylesheet' href='styles.css'>", and also a rule rewriting
// static.example.com -> cdn.com/example/static, then the OutputResource for
// the link element's href will have:
// resolved_base_ == http://cdn.com/example/static/
// unmapped_base_ == http://static.example.com/
// original_base_ == http://www.example.com/
GoogleString resolved_base_;
GoogleString unmapped_base_;
GoogleString original_base_;
GoogleString cache_control_suffix_;
ResourceNamer full_name_;
// Lazily evaluated and cached result of the url() method, which is const.
mutable GoogleString computed_url_;
const RewriteOptions* rewrite_options_;
// Output resource have a 'kind' associated with them that controls the kind
// of caching we would like to be performed on them when written out.
OutputResourceKind kind_;
DISALLOW_COPY_AND_ASSIGN(OutputResource);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_OUTPUT_RESOURCE_H_