blob: fa7e4d627681aa975c71e22d2dc0ffeb1bf417eb [file] [log] [blame]
/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Author: sligocki@google.com (Shawn Ligocki)
#ifndef NET_INSTAWEB_REWRITER_PUBLIC_CSS_FILTER_H_
#define NET_INSTAWEB_REWRITER_PUBLIC_CSS_FILTER_H_
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/css_hierarchy.h"
#include "net/instaweb/rewriter/public/css_resource_slot.h"
#include "net/instaweb/rewriter/public/css_url_encoder.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_context.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_filter.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/single_rewrite_context.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/util/url_segment_encoder.h"
namespace Css {
class Stylesheet;
} // namespace Css
namespace net_instaweb {
class AssociationTransformer;
class AsyncFetch;
class CssImageRewriter;
class CacheExtender;
class ImageCombineFilter;
class ImageRewriteFilter;
class MessageHandler;
class RewriteDomainTransformer;
class Statistics;
class UpDownCounter;
class Variable;
// Find and parse all CSS in the page and apply transformations including:
// minification, combining, refactoring, and optimizing sub-resources.
//
// Currently only does basic minification.
//
// Note that CssCombineFilter currently does combining (although there is a bug)
// but CssFilter will eventually replace this.
//
// Currently only deals with inline <style> tags and external <link> resources.
// It does not consider style= attributes on arbitrary elements.
class CssFilter : public RewriteFilter {
public:
class Context;
CssFilter(RewriteDriver* driver,
// TODO(sligocki): Temporary pattern until we figure out a better
// way to do this without passing all filters around everywhere.
CacheExtender* cache_extender,
ImageRewriteFilter* image_rewriter,
ImageCombineFilter* image_combiner);
virtual ~CssFilter();
// May be called multiple times, in case there are multiple statistics
// objects.
static void InitStats(Statistics* statistics);
// Initialize & Terminate must be paired.
static void Initialize();
static void Terminate();
// Add this filters related options to the given vector.
static void AddRelatedOptions(StringPieceVector* target);
// Note: AtExitManager needs to be initialized or you get a nasty error:
// Check failed: false. Tried to RegisterCallback without an AtExitManager.
// This is called by Initialize.
static void InitializeAtExitManager();
virtual void StartDocumentImpl();
virtual void StartElementImpl(HtmlElement* element);
virtual void Characters(HtmlCharactersNode* characters);
virtual void EndElementImpl(HtmlElement* element);
virtual const char* Name() const { return "CssFilter"; }
virtual const char* id() const { return RewriteOptions::kCssFilterId; }
virtual void EncodeUserAgentIntoResourceContext(
ResourceContext* context) const;
static const char kBlocksRewritten[];
static const char kParseFailures[];
static const char kFallbackRewrites[];
static const char kFallbackFailures[];
static const char kRewritesDropped[];
static const char kTotalBytesSaved[];
static const char kTotalOriginalBytes[];
static const char kUses[];
static const char kCharsetMismatch[];
static const char kInvalidUrl[];
static const char kLimitExceeded[];
static const char kMinifyFailed[];
static const char kRecursion[];
static const char kComplexQueries[];
RewriteContext* MakeNestedFlatteningContextInNewSlot(
const ResourcePtr& resource, const GoogleString& location,
CssFilter::Context* rewriter, RewriteContext* parent,
CssHierarchy* hierarchy);
virtual const RewriteOptions::Filter* RelatedFilters(int* num_filters) const {
*num_filters = merged_filters_size_;
return merged_filters_;
}
virtual const StringPieceVector* RelatedOptions() const {
return related_options_;
}
protected:
virtual RewriteContext* MakeRewriteContext();
virtual const UrlSegmentEncoder* encoder() const;
virtual RewriteContext* MakeNestedRewriteContext(
RewriteContext* parent, const ResourceSlotPtr& slot);
private:
friend class Context;
friend class CssFlattenImportsContext; // for statistics
friend class CssHierarchy; // for statistics
enum InlineCssKind {
kInsideStyleTag,
kAttributeWithoutUrls,
kAttributeWithUrls
};
Context* MakeContext(RewriteDriver* driver,
RewriteContext* parent);
// Starts the asynchronous rewrite process for inline CSS 'text'.
void StartInlineRewrite(HtmlCharactersNode* text);
// Starts the asynchronous rewrite process for inline CSS inside the given
// element's given style attribute.
void StartAttributeRewrite(HtmlElement* element,
HtmlElement::Attribute* style,
InlineCssKind inline_css_kind);
// Starts the asynchronous rewrite process for external CSS referenced by
// attribute 'src' of 'link'.
void StartExternalRewrite(HtmlElement* link, HtmlElement::Attribute* src);
ResourcePtr MakeInlineResource(StringPiece content);
CssFilter::Context* StartRewriting(const ResourceSlotPtr& slot);
// Get the charset of the HTML being parsed which can be specified in the
// driver's headers, defaulting to ISO-8859-1 if isn't. Then, if a charset
// is specified in the given element, check that they agree, and if not
// return false and set the failure reason, otherwise return true and assign
// the first charset to '*charset'.
bool GetApplicableCharset(const HtmlElement* element,
GoogleString* charset,
GoogleString* failure_reason) const;
// Get the media specified in the given element, if any. Returns true if
// media were found false if not.
bool GetApplicableMedia(const HtmlElement* element,
StringVector* media) const;
bool in_style_element_; // Are we in a style element?
// This is meaningless if in_style_element_ is false:
HtmlElement* style_element_; // The element we are in.
// The charset extracted from a meta tag, if any.
GoogleString meta_tag_charset_;
// Filters we delegate to.
CacheExtender* cache_extender_;
ImageRewriteFilter* image_rewrite_filter_;
ImageCombineFilter* image_combiner_;
// Statistics
// # of CSS blocks (CSS files, <style> blocks or style= attributes)
// successfully rewritten.
Variable* num_blocks_rewritten_;
// # of CSS blocks that rewriter failed to parse.
Variable* num_parse_failures_;
// # of CSS blocks that failed to be parsed, but were rewritten in the
// fallback path.
Variable* num_fallback_rewrites_;
// # of CSS blocks that failed to be rewritten in the fallback path.
Variable* num_fallback_failures_;
// # of CSS rewrites which were not applied because they made the CSS larger
// and did not rewrite any images in it/flatten any other CSS files into it.
Variable* num_rewrites_dropped_;
// # of bytes saved from rewriting CSS (including minification and the
// increase of bytes from longer image URLs and the increase of bytes
// from @import flattening).
// TODO(sligocki): This should consider the input size to be the input sizes
// of all CSS files flattened into this one. Currently it does not.
UpDownCounter* total_bytes_saved_;
// Sum of original bytes of all successfully rewritten CSS blocks.
// total_bytes_saved_ / total_original_bytes_ should be the
// average percentage reduction of CSS block size.
Variable* total_original_bytes_;
// # of uses of rewritten CSS (updating <link> href= attributes,
// <style> contents or style= attributes).
Variable* num_uses_;
// # of times CSS was not flattened because of a charset mismatch.
Variable* num_flatten_imports_charset_mismatch_;
// # of times CSS was not flattened because of an invalid @import URL.
Variable* num_flatten_imports_invalid_url_;
// # of times CSS was not flattened because the resulting CSS too big.
Variable* num_flatten_imports_limit_exceeded_;
// # of times CSS was not flattened because minification failed.
Variable* num_flatten_imports_minify_failed_;
// # of times CSS was not flattened because of recursive imports.
Variable* num_flatten_imports_recursion_;
// # of times CSS was not flattened because it had complex media queries.
Variable* num_flatten_imports_complex_queries_;
CssUrlEncoder encoder_;
// The filters related to this filter.
static const RewriteOptions::Filter* merged_filters_;
static int merged_filters_size_;
// The options related to this filter.
static StringPieceVector* related_options_;
DISALLOW_COPY_AND_ASSIGN(CssFilter);
};
// Context used by CssFilter under async flow.
class CssFilter::Context : public SingleRewriteContext {
public:
Context(CssFilter* filter, RewriteDriver* driver,
RewriteContext* parent,
CacheExtender* cache_extender,
ImageRewriteFilter* image_rewriter,
ImageCombineFilter* image_combiner,
ResourceContext* context);
virtual ~Context();
// Setup rewriting for inline, attribute, or external CSS.
void SetupInlineRewrite(HtmlElement* style_element, HtmlCharactersNode* text);
void SetupAttributeRewrite(HtmlElement* element,
HtmlElement::Attribute* src,
InlineCssKind inline_css_kind);
void SetupExternalRewrite(HtmlElement* element,
const GoogleUrl& base_gurl,
const GoogleUrl& trim_gurl);
// Starts nested rewrite jobs for any imports or images contained in the CSS.
// Marked public, so that it's accessible from CssHierarchy.
void RewriteCssFromNested(RewriteContext* parent, CssHierarchy* hierarchy);
// Specialization to absolutify URLs in input resource in case of rewrite
// fail or deadline exceeded.
virtual bool SendFallbackResponse(StringPiece output_url_base,
StringPiece input_contents,
AsyncFetch* async_fetch,
MessageHandler* handler);
CssResourceSlotFactory* slot_factory() { return &slot_factory_; }
CssHierarchy* mutable_hierarchy() { return &hierarchy_; }
protected:
virtual void Render();
virtual void Harvest();
virtual bool Partition(OutputPartitions* partitions,
OutputResourceVector* outputs);
virtual void RewriteSingle(const ResourcePtr& input,
const OutputResourcePtr& output);
virtual const char* id() const { return filter_->id(); }
virtual OutputResourceKind kind() const { return kRewrittenResource; }
virtual GoogleString CacheKeySuffix() const;
virtual const UrlSegmentEncoder* encoder() const;
// Implements UserAgentCacheKey method of RewriteContext.
virtual GoogleString UserAgentCacheKey(
const ResourceContext* resource_context) const;
private:
void GetCssBaseUrlToUse(const ResourcePtr& input_resource,
GoogleUrl* css_base_gurl_to_use);
void GetCssTrimUrlToUse(const ResourcePtr& input_resource,
const StringPiece& output_url_base,
GoogleUrl* css_base_gurl_to_use);
void GetCssTrimUrlToUse(const ResourcePtr& input_resource,
const OutputResourcePtr& output_resource,
GoogleUrl* css_base_gurl_to_use);
bool RewriteCssText(const GoogleUrl& css_base_gurl,
const GoogleUrl& css_trim_gurl,
const StringPiece& in_text,
int64 in_text_size,
bool text_is_declarations,
MessageHandler* handler);
// Starts nested rewrite jobs for any imports or images contained in the CSS.
void RewriteCssFromRoot(const GoogleUrl& css_base_gurl,
const GoogleUrl& css_trim_gurl,
const StringPiece& in_text, int64 in_text_size,
bool has_unparseables, Css::Stylesheet* stylesheet);
// Fall back to using CssTagScanner to find the URLs and rewrite them
// that way. Like RewriteCssFromRoot, output is written into output
// resource in Harvest(). Called if CSS Parser fails to parse doc.
// Returns whether or not fallback rewriting succeeds. Fallback can fail
// if URLs in CSS are not parseable.
bool FallbackRewriteUrls(const GoogleUrl& css_base_gurl,
const GoogleUrl& css_trim_gurl,
const StringPiece& in_text);
// Tries to write out a (potentially edited) stylesheet out to out_text,
// and returns whether we should consider the result as an improvement.
bool SerializeCss(int64 in_text_size,
const Css::Stylesheet* stylesheet,
const GoogleUrl& css_base_gurl,
const GoogleUrl& css_trim_gurl,
bool previously_optimized,
bool stylesheet_is_declarations,
bool add_utf8_bom,
GoogleString* out_text,
MessageHandler* handler);
// Used by the asynchronous rewrite callbacks (RewriteSingle + Harvest) to
// determine if what is being rewritten is a style attribute or a stylesheet,
// since an attribute comprises only declarations, unlike a stlyesheet.
bool IsInlineAttribute() const {
return (rewrite_inline_attribute_ != NULL);
}
// Determine the appropriate image inlining threshold based upon whether we're
// in an html file (<style> tag or style= attribute) or in an external css
// file.
int64 ImageInlineMaxBytes() const;
CssFilter* filter_;
scoped_ptr<CssImageRewriter> css_image_rewriter_;
ImageRewriteFilter* image_rewrite_filter_;
CssResourceSlotFactory slot_factory_;
CssHierarchy hierarchy_;
bool css_rewritten_;
bool has_utf8_bom_;
// Are we performing a fallback rewrite?
bool fallback_mode_;
// Transformer used by CssTagScanner to rewrite URLs if we failed to
// parse CSS. This will only be defined if CSS parsing failed.
scoped_ptr<AssociationTransformer> fallback_transformer_;
// Backup transformer for AssociationTransformer. Absolutifies URLs and
// rewrites their domains as necessary if they can't be cache extended.
scoped_ptr<RewriteDomainTransformer> absolutifier_;
// The element containing the CSS being rewritten, either a script element
// (inline), a link element (external), or anything with a style attribute.
HtmlElement* rewrite_element_;
// Style element containing inline CSS (see StartInlineRewrite) -or-
// any element with a style attribute (see StartAttributeRewrite), or
// NULL if we're rewriting external stuff.
HtmlElement* rewrite_inline_element_;
// Node with inline CSS to rewrite, or NULL if we're rewriting external stuff.
HtmlCharactersNode* rewrite_inline_char_node_;
// The style attribute associated with rewrite_inline_element_. Mutually
// exclusive with rewrite_inline_char_node_ since style elements cannot
// have style attributes.
HtmlElement::Attribute* rewrite_inline_attribute_;
// Indicates the kind of CSS inline CSS we are rewriting (<style> vs. style=,
// and whether we've noticed any URLs). Only valid if the other
// rewrite_inline_ fields reflect us doing inline rewriting.
InlineCssKind rewrite_inline_css_kind_;
// Information needed for nested rewrites or finishing up serialization.
int64 in_text_size_;
GoogleUrl initial_css_base_gurl_;
GoogleUrl initial_css_trim_gurl_;
scoped_ptr<GoogleUrl> base_gurl_for_fallback_;
scoped_ptr<GoogleUrl> trim_gurl_for_fallback_;
ResourcePtr input_resource_;
OutputResourcePtr output_resource_;
DISALLOW_COPY_AND_ASSIGN(Context);
};
} // namespace net_instaweb
#endif // NET_INSTAWEB_REWRITER_PUBLIC_CSS_FILTER_H_