| /* |
| * Copyright 2011 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: matterbury@google.com (Matt Atterbury) |
| |
| #ifndef NET_INSTAWEB_REWRITER_PUBLIC_CSS_HIERARCHY_H_ |
| #define NET_INSTAWEB_REWRITER_PUBLIC_CSS_HIERARCHY_H_ |
| |
| #include <vector> |
| |
| #include "net/instaweb/rewriter/public/resource.h" |
| #include "pagespeed/kernel/base/basictypes.h" |
| #include "pagespeed/kernel/base/scoped_ptr.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/http/data_url.h" |
| #include "pagespeed/kernel/http/google_url.h" |
| |
| namespace Css { |
| class Stylesheet; |
| } // namespace Css |
| |
| namespace net_instaweb { |
| |
| class CssFilter; |
| class MessageHandler; |
| |
| // Representation of a CSS with all the information required for import |
| // flattening, image rewriting, and minifying. A flattened CSS has had all |
| // of its @import's replaced with the contents of the @import'd file (and |
| // each of those have had their @import's replaced, and so on recursively). |
| // |
| // Lifecycle: |
| // Processing: |
| // Construct + InitializeRoot |
| // if (ExpandChildren) <------------------+ |
| // for each child | |
| // InitializeNested | |
| // set_input_contents | |
| // if (Parse) | |
| // if (CheckCharsetOk) | |
| // Kick off recursion from here --+ |
| // Harvesting (when all the children of a node have completed): |
| // if you need the rolled-up text form: |
| // RollUpContents |
| // Use minified_contents |
| // if you need the rolled-up parsed form: |
| // RollUpStylesheet |
| // Use stylesheet |
| // |
| class CssHierarchy { |
| public: |
| static const char kFailureReasonPrefix[]; |
| |
| // Initialized in an empty state, which is considered successful since it |
| // can be flattened into nothing. |
| explicit CssHierarchy(CssFilter* filter); |
| ~CssHierarchy(); |
| |
| // Initialize the top-level hierarchy's state from the given values. |
| // A StringPiece reference to input_contents is made so it must remain |
| // valid for the life of this object. |
| void InitializeRoot(const GoogleUrl& css_base_url, |
| const GoogleUrl& css_trim_url, |
| const StringPiece input_contents, |
| bool has_unparseables, |
| int64 flattened_result_limit, |
| Css::Stylesheet* stylesheet, |
| MessageHandler* message_handler); |
| |
| // A hierarchy needs rewriting only if it has an import to read and expand. |
| bool NeedsRewriting() const { |
| return flattening_succeeded_ && !url_.empty(); |
| } |
| |
| const StringPiece url() const { return url_; } |
| const StringPiece url_for_humans() const { |
| return (url_.empty() ? "inline" |
| : IsDataUrl(url_) ? "data URL" |
| : url_); |
| } |
| |
| const GoogleUrl& css_base_url() const { return css_base_url_; } |
| const GoogleUrl& css_trim_url() const { return css_trim_url_; } |
| |
| // If the input contents have been resolved return css_trim_url_ because |
| // that's what the contents have been resolved against, otherwise return |
| // css_base_url_ because that's what the paths in the CSS are relative to. |
| const GoogleUrl& css_resolution_base() { |
| return (input_contents_resolved_ ? css_trim_url_ : css_base_url_); |
| } |
| |
| const Css::Stylesheet* stylesheet() const { return stylesheet_.get(); } |
| Css::Stylesheet* mutable_stylesheet() { return stylesheet_.get(); } |
| void set_stylesheet(Css::Stylesheet* stylesheet); |
| |
| const StringPiece input_contents() const { return input_contents_; } |
| // A StringPiece reference to input_contents is made so it must remain |
| // valid for the life of this object. |
| void set_input_contents(const StringPiece input_contents) { |
| input_contents_ = input_contents; |
| } |
| |
| GoogleString* input_contents_backing_store() { |
| return &input_contents_backing_store_; |
| } |
| void set_input_contents_to_backing_store() { |
| input_contents_ = input_contents_backing_store_; |
| } |
| |
| const GoogleString& minified_contents() const { return minified_contents_; } |
| void set_minified_contents(const StringPiece minified_contents); |
| |
| const GoogleString& charset() const { return charset_; } |
| GoogleString* mutable_charset() { return &charset_; } |
| |
| const GoogleString& charset_source() const { return charset_source_; } |
| |
| const StringVector& media() const { return media_; } |
| StringVector* mutable_media() { return &media_; } |
| |
| // Intended for access to children; add new children using ExpandChildren. |
| const std::vector<CssHierarchy*>& children() const { return children_; } |
| std::vector<CssHierarchy*>& children() { return children_; } |
| |
| bool input_contents_resolved() const { return input_contents_resolved_; } |
| void set_input_contents_resolved(bool x) { input_contents_resolved_ = x; } |
| |
| bool flattening_succeeded() const { return flattening_succeeded_; } |
| void set_flattening_succeeded(bool ok) { flattening_succeeded_ = ok; } |
| |
| const GoogleString& flattening_failure_reason() const { |
| return flattening_failure_reason_; |
| } |
| // Do nothing if given an empty reason, otherwise if we don't have a failure |
| // reason yet, set it to the given one prepended with "Flattening failed: ", |
| // otherwise append the given one to what we have now, separated by " AND ". |
| // We also strip any leading "Flattening failed: " from the given reason, |
| // which can happen when rolling up hierarchies. |
| void AddFlatteningFailureReason(const GoogleString& reason); |
| |
| bool unparseable_detected() const { return unparseable_detected_; } |
| void set_unparseable_detected(bool ok) { unparseable_detected_ = ok; } |
| |
| int64 flattened_result_limit() const { return flattened_result_limit_; } |
| void set_flattened_result_limit(int64 x) { flattened_result_limit_ = x; } |
| |
| // If we haven't already, determine the charset of this CSS, then check if |
| // it is compatible with the charset of its parent; currently they are |
| // compatible if they're exactly the same (ignoring case). The charset of |
| // this CSS is taken from resource's headers if specified, else from the |
| // @charset rule in the parsed CSS, if any, else from the owning document |
| // (our parent). Returns true if the charsets are compatible, otherwise |
| // returns false and sets the failure reason. The charset is always |
| // determined and set regardless of the return value. |
| // |
| // TODO(matterbury): A potential future enhancement is to allow 'compatible' |
| // charsets, like a US-ASCII child in a UTF-8 parent, since US-ASCII is a |
| // subset of UTF-8. |
| bool CheckCharsetOk(const ResourcePtr& resource, |
| GoogleString* failure_reason); |
| |
| // Parse the input contents into a stylesheet iff it doesn't have one yet, |
| // and apply the media applicable to the whole CSS to each ruleset in the |
| // stylesheet and delete any rulesets that end up with no applicable media. |
| // Returns true if the input contents are successfully parsed, false if not. |
| // 'this' will be unchanged if false is returned. |
| bool Parse(); |
| |
| // Expand the imports in our stylesheet, creating the next level of the |
| // hierarchy tree by creating a child hierarchy for each import. The |
| // expansion of a child can fail because of problems with the imported URL |
| // or because of import recursion, in which case the flattening_succeeded |
| // flag for that child is set to false. An expanded child might be empty |
| // because of disjoint media rules, in which case the child is un-initialized |
| // [for example, if a.css is imported with a media rule of 'print' and it |
| // imports b.css with a media rule of 'screen' there is no point in expanding |
| // b.css because none of it can apply to the 'print' medium]. Returns true |
| // if any children were expanded and need rewriting, which can be tested |
| // using NeedsRewriting() [it tests both that the child was expanded and |
| // that the expansion succeeded]. |
| bool ExpandChildren(); |
| |
| // Recursively roll up this CSS's textual form such that minified_contents() |
| // returns the flattened version of this CSS with @import's replaced with the |
| // contents of the imported file, all @charset rules removed, and the entire |
| // result minified. Intended for use by nested hierarchies that need to |
| // produce their flattened+minimized CSS for their parent to incorporate |
| // into their own flattened+minimized CSS. If anything goes wrong with the |
| // rolling up then the minified contents are set to the original contents. |
| // If the textual form hasn't yet been parsed this method will do so by |
| // invoking Parse, since the parsed form is required for minification. |
| // If rolling up succeeds, any charset and imports are removed from the |
| // parsed stylesheet, to match the flattened+minimized CSS for the input |
| // contents (without charset/imports), and to help speed up the ultimate |
| // call to RollUpStylesheets(). |
| void RollUpContents(); |
| |
| // Recursively roll up this CSS's parsed form such that stylesheet() returns |
| // the flattened version of it, with child CSSs' rulesets merged into this |
| // one's and all imports and charsets removed. It is a pre-requisite that |
| // any *children* have had RollUpContents() invoked on them; it is *not* |
| // required that it has been invoked on 'this' but it is OK if it has. It is |
| // also a pre-requisite that if the CSS has not yet been parsed then it must |
| // not contain any @import rules, rather it must be the already-flattened |
| // CSS text, because we use the existence of @import rules to tell that we |
| // have already tried and failed to parse and flatten the CSS. This method |
| // is intended to be invoked only on the root CSS since there is no need to |
| // roll up intermediate/nested stylesheets; only their contents need to be |
| // rolled up. Returns false if the CSS was not already parsed and the call |
| // to Parse() failed, in which case rolling up has not been performed and |
| // 'this' is unchanged. |
| bool RollUpStylesheets(); |
| |
| private: |
| friend class CssHierarchyTest; |
| |
| // Initialize state from the given values; for use by nested levels that |
| // are initialized from their parent's state. A StringPiece reference to |
| // import_url is made so it must remain valid for the life of this object. |
| void InitializeNested(const CssHierarchy& parent, |
| const GoogleUrl& import_url); |
| |
| // Resize to the specified number of children. |
| void ResizeChildren(int n); |
| |
| // Determine whether this CSS is a recusrive import by checking if any CSS |
| // in the hierarchy is handling our url already. This is to cater for things |
| // like a.css @import'ing itself. |
| bool IsRecursive() const; |
| |
| // Determine the media applicable to this CSS as the intersection of the |
| // set of media applicable to the containing CSS and the set of media |
| // applicable to this CSS as a whole, and save that intersection in this |
| // CSS's media attribute. If the resulting media is empty then this CSS |
| // doesn't have to be processed at all so return false, otherwise true. |
| bool DetermineImportMedia(const StringVector& containing_media, |
| const StringVector& import_media); |
| |
| // Determine the media applicable to a ruleset as the intersection of the |
| // set of media that apply just to the ruleset and the set of media that |
| // apply to this CSS (as determined by DetermineImportMedia above), and |
| // edits ruleset_media in place. If the intersection is empty, false is |
| // returned and the ruleset doesn't have to be processed at all (it can |
| // be omitted), else true is returned. |
| bool DetermineRulesetMedia(StringVector* ruleset_media); |
| |
| // The filter that owns us, used for recording statistics. |
| CssFilter* filter_; |
| |
| // The URL of the stylesheet being represented; in the case of inline CSS |
| // this will be a data URL. |
| StringPiece url_; |
| |
| // The base for any relative URLs in the input CSS. |
| GoogleUrl css_base_url_; |
| |
| // The base of the output URL which is used to trim absolutified URLs back |
| // to relative URLs in the output CSS. |
| GoogleUrl css_trim_url_; |
| |
| // A pointer to the representation of the parent CSS that imports this CSS; |
| // for the top-level CSS only this will be NULL. |
| const CssHierarchy* parent_; |
| |
| // An array of pointers to the child representations of the CSS's that this |
| // CSS imports, one array element per import, in the order they are imported; |
| // for leaf CSS's this will be empty. |
| std::vector<CssHierarchy*> children_; |
| |
| // The text form of the input CSS. |
| StringPiece input_contents_; |
| |
| // Backing store for the input contents. As input_contents_ is a StringPiece, |
| // the referenced string has to survive as long as we do; normally that is the |
| // inlined_contents() of the input resource, which is guaranteed to survive |
| // us, however if CssFlattenImportsContext::RewriteSingle() has to resolve |
| // URLs in that text it needs somewhere to save the result, and this is it. |
| GoogleString input_contents_backing_store_; |
| |
| // The text form of the output (flattened) CSS. |
| GoogleString minified_contents_; |
| |
| // The parsed form of the CSS, in various states of transformation. Created |
| // from the input text form by Parse, mutated by RollUpContents and |
| // RollUpStylesheets - see their description for details. |
| scoped_ptr<Css::Stylesheet> stylesheet_; |
| |
| // The charset for this CSS as specified by HTTP headers, or a charset |
| // attribute, or an @charset rule, or inherited from the parent. |
| GoogleString charset_; |
| |
| // The source of the charset for this CSS (headers, attribute, etc). |
| GoogleString charset_source_; |
| |
| // The collection of media for which this CSS applies; an empty collection |
| // means all media. CSS in or linked from HTML can specify this using a media |
| // attribute, @import'd CSS can specify it on the @import rule. Note that |
| // this is NOT media from @media rules, it is only media that applies to the |
| // *whole* CSS document. Note that media expressions (CSS3) are NOT handled. |
| StringVector media_; |
| |
| // An indication of whether the input contents have been resolved. If not, |
| // we use css_base_url_ to resolve @import's, but if so we use css_trim_url_ |
| // because that's what the contents have been resolved against. Resolution |
| // is performed by CssFlattenImportsContext::RewriteSingle(). |
| bool input_contents_resolved_; |
| |
| // An indication of the success or failure of the flattening process, which |
| // can fail for various reasons, and any failure propagates up the hierarchy |
| // to the root CSS and eventually stops the process. |
| bool flattening_succeeded_; |
| |
| // If flattening failed, a user-oriented description of why, for injection |
| // into the HTML if the debug filter is enabled. |
| GoogleString flattening_failure_reason_; |
| |
| // An indication of whether anything unparseable was detected in this CSS. |
| bool unparseable_detected_; |
| |
| // The limit to the size of the result of flattening (0 means no limit). |
| // If the flattened result would be this much or more, flattening will be |
| // aborted. TODO(matterbury): Investigate whether we can, or ought to, |
| // flatten nested @imports that do fit within the limit [eg. a.css imports |
| // b.css then has a load of CSS; b.css imports c.ss then some CSS; say the |
| // flattened version of b.css fits in the limit, but the flattened version |
| // of a.css does not; we could flatten b.css then change the @import in |
| // a.css to import the flattened version, saving the fetch of c.css]. |
| int64 flattened_result_limit_; |
| |
| // For logging messages. |
| MessageHandler* message_handler_; |
| |
| DISALLOW_COPY_AND_ASSIGN(CssHierarchy); |
| }; |
| |
| } // namespace net_instaweb |
| |
| #endif // NET_INSTAWEB_REWRITER_PUBLIC_CSS_HIERARCHY_H_ |