blob: 29b7d15a97e2226c1e51f1b621de92e9ca6d1983 [file] [log] [blame]
* Copyright 2011 Google Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
// Author: (Matt Atterbury)
#include <vector>
#include "net/instaweb/rewriter/public/resource.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/http/data_url.h"
#include "pagespeed/kernel/http/google_url.h"
namespace Css {
class Stylesheet;
} // namespace Css
namespace net_instaweb {
class CssFilter;
class MessageHandler;
// Representation of a CSS with all the information required for import
// flattening, image rewriting, and minifying. A flattened CSS has had all
// of its @import's replaced with the contents of the @import'd file (and
// each of those have had their @import's replaced, and so on recursively).
// Lifecycle:
// Processing:
// Construct + InitializeRoot
// if (ExpandChildren) <------------------+
// for each child |
// InitializeNested |
// set_input_contents |
// if (Parse) |
// if (CheckCharsetOk) |
// Kick off recursion from here --+
// Harvesting (when all the children of a node have completed):
// if you need the rolled-up text form:
// RollUpContents
// Use minified_contents
// if you need the rolled-up parsed form:
// RollUpStylesheet
// Use stylesheet
class CssHierarchy {
static const char kFailureReasonPrefix[];
// Initialized in an empty state, which is considered successful since it
// can be flattened into nothing.
explicit CssHierarchy(CssFilter* filter);
// Initialize the top-level hierarchy's state from the given values.
// A StringPiece reference to input_contents is made so it must remain
// valid for the life of this object.
void InitializeRoot(const GoogleUrl& css_base_url,
const GoogleUrl& css_trim_url,
const StringPiece input_contents,
bool has_unparseables,
int64 flattened_result_limit,
Css::Stylesheet* stylesheet,
MessageHandler* message_handler);
// A hierarchy needs rewriting only if it has an import to read and expand.
bool NeedsRewriting() const {
return flattening_succeeded_ && !url_.empty();
const StringPiece url() const { return url_; }
const StringPiece url_for_humans() const {
return (url_.empty() ? "inline"
: IsDataUrl(url_) ? "data URL"
: url_);
const GoogleUrl& css_base_url() const { return css_base_url_; }
const GoogleUrl& css_trim_url() const { return css_trim_url_; }
// If the input contents have been resolved return css_trim_url_ because
// that's what the contents have been resolved against, otherwise return
// css_base_url_ because that's what the paths in the CSS are relative to.
const GoogleUrl& css_resolution_base() {
return (input_contents_resolved_ ? css_trim_url_ : css_base_url_);
const Css::Stylesheet* stylesheet() const { return stylesheet_.get(); }
Css::Stylesheet* mutable_stylesheet() { return stylesheet_.get(); }
void set_stylesheet(Css::Stylesheet* stylesheet);
const StringPiece input_contents() const { return input_contents_; }
// A StringPiece reference to input_contents is made so it must remain
// valid for the life of this object.
void set_input_contents(const StringPiece input_contents) {
input_contents_ = input_contents;
GoogleString* input_contents_backing_store() {
return &input_contents_backing_store_;
void set_input_contents_to_backing_store() {
input_contents_ = input_contents_backing_store_;
const GoogleString& minified_contents() const { return minified_contents_; }
void set_minified_contents(const StringPiece minified_contents);
const GoogleString& charset() const { return charset_; }
GoogleString* mutable_charset() { return &charset_; }
const GoogleString& charset_source() const { return charset_source_; }
const StringVector& media() const { return media_; }
StringVector* mutable_media() { return &media_; }
// Intended for access to children; add new children using ExpandChildren.
const std::vector<CssHierarchy*>& children() const { return children_; }
std::vector<CssHierarchy*>& children() { return children_; }
bool input_contents_resolved() const { return input_contents_resolved_; }
void set_input_contents_resolved(bool x) { input_contents_resolved_ = x; }
bool flattening_succeeded() const { return flattening_succeeded_; }
void set_flattening_succeeded(bool ok) { flattening_succeeded_ = ok; }
const GoogleString& flattening_failure_reason() const {
return flattening_failure_reason_;
// Do nothing if given an empty reason, otherwise if we don't have a failure
// reason yet, set it to the given one prepended with "Flattening failed: ",
// otherwise append the given one to what we have now, separated by " AND ".
// We also strip any leading "Flattening failed: " from the given reason,
// which can happen when rolling up hierarchies.
void AddFlatteningFailureReason(const GoogleString& reason);
bool unparseable_detected() const { return unparseable_detected_; }
void set_unparseable_detected(bool ok) { unparseable_detected_ = ok; }
int64 flattened_result_limit() const { return flattened_result_limit_; }
void set_flattened_result_limit(int64 x) { flattened_result_limit_ = x; }
// If we haven't already, determine the charset of this CSS, then check if
// it is compatible with the charset of its parent; currently they are
// compatible if they're exactly the same (ignoring case). The charset of
// this CSS is taken from resource's headers if specified, else from the
// @charset rule in the parsed CSS, if any, else from the owning document
// (our parent). Returns true if the charsets are compatible, otherwise
// returns false and sets the failure reason. The charset is always
// determined and set regardless of the return value.
// TODO(matterbury): A potential future enhancement is to allow 'compatible'
// charsets, like a US-ASCII child in a UTF-8 parent, since US-ASCII is a
// subset of UTF-8.
bool CheckCharsetOk(const ResourcePtr& resource,
GoogleString* failure_reason);
// Parse the input contents into a stylesheet iff it doesn't have one yet,
// and apply the media applicable to the whole CSS to each ruleset in the
// stylesheet and delete any rulesets that end up with no applicable media.
// Returns true if the input contents are successfully parsed, false if not.
// 'this' will be unchanged if false is returned.
bool Parse();
// Expand the imports in our stylesheet, creating the next level of the
// hierarchy tree by creating a child hierarchy for each import. The
// expansion of a child can fail because of problems with the imported URL
// or because of import recursion, in which case the flattening_succeeded
// flag for that child is set to false. An expanded child might be empty
// because of disjoint media rules, in which case the child is un-initialized
// [for example, if a.css is imported with a media rule of 'print' and it
// imports b.css with a media rule of 'screen' there is no point in expanding
// b.css because none of it can apply to the 'print' medium]. Returns true
// if any children were expanded and need rewriting, which can be tested
// using NeedsRewriting() [it tests both that the child was expanded and
// that the expansion succeeded].
bool ExpandChildren();
// Recursively roll up this CSS's textual form such that minified_contents()
// returns the flattened version of this CSS with @import's replaced with the
// contents of the imported file, all @charset rules removed, and the entire
// result minified. Intended for use by nested hierarchies that need to
// produce their flattened+minimized CSS for their parent to incorporate
// into their own flattened+minimized CSS. If anything goes wrong with the
// rolling up then the minified contents are set to the original contents.
// If the textual form hasn't yet been parsed this method will do so by
// invoking Parse, since the parsed form is required for minification.
// If rolling up succeeds, any charset and imports are removed from the
// parsed stylesheet, to match the flattened+minimized CSS for the input
// contents (without charset/imports), and to help speed up the ultimate
// call to RollUpStylesheets().
void RollUpContents();
// Recursively roll up this CSS's parsed form such that stylesheet() returns
// the flattened version of it, with child CSSs' rulesets merged into this
// one's and all imports and charsets removed. It is a pre-requisite that
// any *children* have had RollUpContents() invoked on them; it is *not*
// required that it has been invoked on 'this' but it is OK if it has. It is
// also a pre-requisite that if the CSS has not yet been parsed then it must
// not contain any @import rules, rather it must be the already-flattened
// CSS text, because we use the existence of @import rules to tell that we
// have already tried and failed to parse and flatten the CSS. This method
// is intended to be invoked only on the root CSS since there is no need to
// roll up intermediate/nested stylesheets; only their contents need to be
// rolled up. Returns false if the CSS was not already parsed and the call
// to Parse() failed, in which case rolling up has not been performed and
// 'this' is unchanged.
bool RollUpStylesheets();
friend class CssHierarchyTest;
// Initialize state from the given values; for use by nested levels that
// are initialized from their parent's state. A StringPiece reference to
// import_url is made so it must remain valid for the life of this object.
void InitializeNested(const CssHierarchy& parent,
const GoogleUrl& import_url);
// Resize to the specified number of children.
void ResizeChildren(int n);
// Determine whether this CSS is a recusrive import by checking if any CSS
// in the hierarchy is handling our url already. This is to cater for things
// like a.css @import'ing itself.
bool IsRecursive() const;
// Determine the media applicable to this CSS as the intersection of the
// set of media applicable to the containing CSS and the set of media
// applicable to this CSS as a whole, and save that intersection in this
// CSS's media attribute. If the resulting media is empty then this CSS
// doesn't have to be processed at all so return false, otherwise true.
bool DetermineImportMedia(const StringVector& containing_media,
const StringVector& import_media);
// Determine the media applicable to a ruleset as the intersection of the
// set of media that apply just to the ruleset and the set of media that
// apply to this CSS (as determined by DetermineImportMedia above), and
// edits ruleset_media in place. If the intersection is empty, false is
// returned and the ruleset doesn't have to be processed at all (it can
// be omitted), else true is returned.
bool DetermineRulesetMedia(StringVector* ruleset_media);
// The filter that owns us, used for recording statistics.
CssFilter* filter_;
// The URL of the stylesheet being represented; in the case of inline CSS
// this will be a data URL.
StringPiece url_;
// The base for any relative URLs in the input CSS.
GoogleUrl css_base_url_;
// The base of the output URL which is used to trim absolutified URLs back
// to relative URLs in the output CSS.
GoogleUrl css_trim_url_;
// A pointer to the representation of the parent CSS that imports this CSS;
// for the top-level CSS only this will be NULL.
const CssHierarchy* parent_;
// An array of pointers to the child representations of the CSS's that this
// CSS imports, one array element per import, in the order they are imported;
// for leaf CSS's this will be empty.
std::vector<CssHierarchy*> children_;
// The text form of the input CSS.
StringPiece input_contents_;
// Backing store for the input contents. As input_contents_ is a StringPiece,
// the referenced string has to survive as long as we do; normally that is the
// inlined_contents() of the input resource, which is guaranteed to survive
// us, however if CssFlattenImportsContext::RewriteSingle() has to resolve
// URLs in that text it needs somewhere to save the result, and this is it.
GoogleString input_contents_backing_store_;
// The text form of the output (flattened) CSS.
GoogleString minified_contents_;
// The parsed form of the CSS, in various states of transformation. Created
// from the input text form by Parse, mutated by RollUpContents and
// RollUpStylesheets - see their description for details.
scoped_ptr<Css::Stylesheet> stylesheet_;
// The charset for this CSS as specified by HTTP headers, or a charset
// attribute, or an @charset rule, or inherited from the parent.
GoogleString charset_;
// The source of the charset for this CSS (headers, attribute, etc).
GoogleString charset_source_;
// The collection of media for which this CSS applies; an empty collection
// means all media. CSS in or linked from HTML can specify this using a media
// attribute, @import'd CSS can specify it on the @import rule. Note that
// this is NOT media from @media rules, it is only media that applies to the
// *whole* CSS document. Note that media expressions (CSS3) are NOT handled.
StringVector media_;
// An indication of whether the input contents have been resolved. If not,
// we use css_base_url_ to resolve @import's, but if so we use css_trim_url_
// because that's what the contents have been resolved against. Resolution
// is performed by CssFlattenImportsContext::RewriteSingle().
bool input_contents_resolved_;
// An indication of the success or failure of the flattening process, which
// can fail for various reasons, and any failure propagates up the hierarchy
// to the root CSS and eventually stops the process.
bool flattening_succeeded_;
// If flattening failed, a user-oriented description of why, for injection
// into the HTML if the debug filter is enabled.
GoogleString flattening_failure_reason_;
// An indication of whether anything unparseable was detected in this CSS.
bool unparseable_detected_;
// The limit to the size of the result of flattening (0 means no limit).
// If the flattened result would be this much or more, flattening will be
// aborted. TODO(matterbury): Investigate whether we can, or ought to,
// flatten nested @imports that do fit within the limit [eg. a.css imports
// b.css then has a load of CSS; b.css imports then some CSS; say the
// flattened version of b.css fits in the limit, but the flattened version
// of a.css does not; we could flatten b.css then change the @import in
// a.css to import the flattened version, saving the fetch of c.css].
int64 flattened_result_limit_;
// For logging messages.
MessageHandler* message_handler_;
} // namespace net_instaweb