src/third_party/css_parser/src/webutil/css/parser.h - incubator-pagespeed-debian - Git at Google

 /**
  * Copyright 2010 Google Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Copyright 2006 Google Inc. All Rights Reserved.
 // Author: dpeng@google.com (Daniel Peng)

 #ifndef WEBUTIL_CSS_PARSER_H__
 #define WEBUTIL_CSS_PARSER_H__

 #include <memory>
 #include "base/scoped_ptr.h"
 #include <string>
 #include <vector>
 #include "base/scoped_ptr.h"
 #include "strings/stringpiece.h"
 #include "testing/production_stub/public/gunit_prod.h"
 #include "util/utf8/public/unicodetext.h"
 #include "webutil/css/media.h"
 #include "webutil/css/property.h"  // while these CSS includes can be
 #include "webutil/css/selector.h"  // forward-declared, who is really
 #include "webutil/css/string.h"
 #include "webutil/css/value.h"    // going to want parser.h but not values.h?
 #include "webutil/html/htmlcolor.h"

 namespace Css {

 // These are defined below Parser.
 class Declaration;
 class Declarations;
 class Import;
 class FontFace;
 class Stylesheet;
 class Ruleset;

 // Recursive descent parser for CSS.
 //   Based on: http://www.w3.org/TR/CSS21/syndata.html
 //             http://www.w3.org/TR/CSS21/grammar.html
 //
 // Say you want to parse a fragment of CSS.  Then construct a new
 // Parser object (this is very lightweight) and pass in the fragment to parse.
 // Then, call the top-level ParseXXX() function for what you want to parse.
 // This parses the fragment and returns a pointer to the abstract syntax tree.
 // You own this pointer and must delete it when you're done.
 //
 // The data structures comprising the abstract syntax tree are described in
 // cssvalue.h, cssparser-inl.h, csscondition.h, and cssproperty.h.
 //
 // Essentially, each stylesheet is a collection of rulesets.

 // Each ruleset has some selectors to describe what HTML elements it
 // applies to and some declarations to describe how the HTML elements
 // should be styled.
 // The ruleset may apply to multiple comma-separated selectors, which
 // means they apply to an element matching any of the selectors.
 // Ex: h1, h2 > p, h3 { color: red; }
 //
 // Each selector consists of a chain of simple selectors, separated by
 // combinators.
 // Ex: h2 > p selects a P element that is a child of an H2 element.
 //
 // Each simple selector may have conditions which impose further
 // restrictions, such as [foo], #id, .class, or :hover.  We represent
 // these as a list, which are semantically AND-ed together.
 //
 // Each declaration consists of a property and a list of values.
 //
 // Say, for example, you wish to parse a CSS declaration:
 //    Parser a("background: white; color: #333; line-height: 1.3;
 //                 text-align: justify; font-family: \"Gill Sans MT\",
 //                 \"Gill Sans\", GillSans, Arial, Helvetica, sans-serif");
 //    scoped_ptr<Declarations> t(a.ParseDeclarations());
 // See the 'declarations' unit test case for more details.
 //
 // We've made most of the recursive-descent ParseXXX functions private
 // to shrink the size of the public interface.  We expose
 // ParseStylesheet() and ParseDeclarations() because those are the
 // top-level functions necessary to parse stylesheets in HTML
 // documents. And ParseSelectors is exposed to parse selectors.  If it's
 // useful to expose more of the functions, please just send a CL for approval,
 // so we know what people depend on.
 //
 // The CSS parser runs in either quirks mode (the default) and standard
 // compliant mode. The latter is stricter in many aspects. Currently, it
 // affects color parsing (see below for details). Please refer to:
 //   http://developer.mozilla.org/en/docs/Mozilla_Quirks_Mode_Behavior
 // for the difference in Mozilla browsers.
 class Parser {
  public:
   Parser(const char* utf8text, const char* textend);
   explicit Parser(const char* utf8text);
   explicit Parser(StringPiece s);

   // ParseRawSytlesheet and ParseStylesheet consume the entire document and
   // return a Stylesheet* containing all the imports and rulesets that it
   // found.  You must delete the return pointer.

   // ParseRawStylesheet simply parses the document into an abstract syntax tree.
   Stylesheet* ParseRawStylesheet();
   // ParseStylesheet also runs a second pass to convert shorthand
   // declarations such as background, font and font-family into sets of
   // declarations that they represent.
   Stylesheet* ParseStylesheet();

   // ParserRawDeclarations and ParseDeclarations parse declarations like
   // "background: white; color: #333; line-height: 1.3;", consuming until
   // (but not including) the closing '}' or EOF.  You must delete the return
   // pointer.

   // ParseRawDeclarations simply parses the declarations into an AST.
   Declarations* ParseRawDeclarations();

   // ParseDeclarations also runs a second pass to convert *some* syntactic
   // sugar declarations such as background, font and font-family.
   // Currently, both the expanded properties (such as background-color) and the
   // original property (background) are stored because the impl. is incomplete.
   // For details, see parser.cc.
   Declarations* ParseDeclarations();

   // Starting at whitespace or the first media query, ParseMediaQueries
   // parses a media query list and returns it. Never returns NULL. Returns
   // all MediaQueries it can successfully parse.
   MediaQueries* ParseMediaQueries();

   // Expand the values of shorthand declarations. Currently expands background
   // and font. Clears (but does not delete) input orig_declarartions in the
   // process. orig_declarations should be a std::vector of NULLs on exit.
   Declarations* ExpandDeclarations(Declarations* orig_declarations);

   // Starting at the first simple selector or whitespace, ParseSelectors parses
   // a sequence of selectors. Return NULL if the parsing fails. The parser would
   // consume anything up to the declaration starting '{' or the end of document.
   Selectors* ParseSelectors();

   // Parse the next @import statement from the document. If it's not an @import
   // or if there's a syntax error, NULL is returned. Added for mod_pagespeed's
   // conversion to a link of @imports inside a style element.
   // If the next statement is not an @import rule, in_ is left at the
   // beginning of that statement.
   Import* ParseNextImport();

   // Parse the document as a single @import statement. If it's not exactly
   // one of these, or there's a syntax error, NULL is returned. Added for
   // mod_pagespeed's conversion to a link of this inside a style element.
   Import* ParseAsSingleImport();

   // Extract the leading @charset from the document. The return value is
   // valid iff it is not empty -and- errors_seen_mask() is zero. Added so
   // that mod_pagespeed can determine the charset of a CSS file without
   // duplicating a ton of our code.
   UnicodeText ExtractCharset();

   // current position in the parse.
   const char* getpos() const { return in_; }

   // Current position in document (bytes from beginning).
   int CurrentOffset() const { return in_ - begin_; }

   // Done with the parse?
   bool Done() const {
     DCHECK(in_ <= end_) << "in_ is out of bounds, buffer overflow.";
     return in_ >= end_;
   }

   // Whether quirks mode (the default) is used in parsing. Standard compliant
   // (non-quirks) mode is stricter in color parsing, where a form of "rrgbbb"
   // without a leading # is not allowed.
   bool quirks_mode() const { return quirks_mode_; }
   void set_quirks_mode(bool quirks_mode) { quirks_mode_ = quirks_mode; }

   // In preservation mode (default off) we attempt to parse and store as much
   // info as possible from the stylesheet. We avoid value validation and allow
   // all parseable values. In addition for some constructs that cannot be
   // parsed, we store verbatim bytes which can be re-serialized back out.
   bool preservation_mode() const { return preservation_mode_; }
   void set_preservation_mode(bool x) { preservation_mode_ = x; }

   // Maximum recursive function depth. How deeply should the parser parse
   // functions inside of functions. It is important to limit this to avoid
   // unbounded stack-frame depth on untrusted input. See b/17628553
   int max_function_depth() const { return max_function_depth_; }
   void set_max_function_depth(int x) { max_function_depth_ = x; }
   static const int kDefaultMaxFunctionDepth = 10;

   // This is a bitmask of errors seen during the parse.  This is decidedly
   // incomplete --- there are definitely many errors that are not reported here.
   static const uint64 kNoError           = 0;
   static const uint64 kUtf8Error         = 1ULL << 0;   // 1
   static const uint64 kDeclarationError  = 1ULL << 1;   // 2
   static const uint64 kSelectorError     = 1ULL << 2;   // 4
   static const uint64 kFunctionError     = 1ULL << 3;   // 8
   static const uint64 kMediaError        = 1ULL << 4;   // 16
   static const uint64 kCounterError      = 1ULL << 5;   // 32
   static const uint64 kHtmlCommentError  = 1ULL << 6;   // 64
   static const uint64 kValueError        = 1ULL << 7;   // 128
   static const uint64 kRulesetError      = 1ULL << 8;   // 256
   static const uint64 kSkippedTokenError = 1ULL << 9;   // 512
   static const uint64 kCharsetError      = 1ULL << 10;  // 1024
   static const uint64 kBlockError        = 1ULL << 11;  // 2048
   static const uint64 kNumberError       = 1ULL << 12;  // 4096
   static const uint64 kImportError       = 1ULL << 13;  // 8192
   static const uint64 kAtRuleError       = 1ULL << 14;  // 16384
   static const uint64 kCssCommentError   = 1ULL << 15;  // 32768
   uint64 errors_seen_mask() const { return errors_seen_mask_; }
   uint64 unparseable_sections_seen_mask() const {
     return unparseable_sections_seen_mask_;
   }

   static const int kMaxErrorsRemembered = 16;
   struct ErrorInfo {
     int error_num;
     int byte_offset;
     string message;
   };
   // A vector of first kNumErrorsRemembered errors seen.
   const std::vector<ErrorInfo> errors_seen() const { return errors_seen_; }

   // Returns the error number based on the error flag.
   //   Ex: ErrorNumber(kUtf8Error) == 0,
   //       ErrorNumber(kDeclarationError) == 1, etc.
   static int ErrorNumber(uint64 error_flag);

  private:
   //
   // Syntactic methods
   //

   // SkipSpace() skips whitespace ([ \t\r\n\f]) and comments
   // (/* .... */) until we reach a non-whitespace, non-comment
   // character, or the end of the document.
   void SkipSpace();

   // Starting at /*, SkipComment() skips past the matching */ or to
   // the end of the document.
   void SkipComment();

   // Helper method for the other Skip* methods. Skips over the next bit of text.
   // Note: It does not yet lex all tokens, only strings, comments and escape
   // sequences. These are specifically lexed to avoid naively interpreting:
   // "}", /*]*/ or identifier\)foo as closing brackets.
   // Note: We do not use ParseAny() for this to avoid excessive recursion.
   void SkipNextToken();

   // Starting at '{', '[' or '(', SkipMatching consumes to the closing '}',
   // ']' or ')' respecting nested blocks.  We discard the result.
   // Returns true if matching '}' was found, false if EOF was reached first.
   bool SkipMatching();

   // Skips following tokens until delimiter delim or end is seen, delim is
   // consumed if found. Smart enough to skip over matches inside comments,
   // quoted strings or balanced parentheses ()[]{}.
   // For example, if in_ = "foo(a, b), 1, bar"
   //   SkipPastDelimiter(',') will result in in_ = " 1, bar".
   // Returns true if it found delim before end of file.
   bool SkipPastDelimiter(char delim);

   // Skip until next "any" token (value which can be parsed by ParseAny).
   //
   // Skips whitespace, comments, blocks ({..}), and @tokens, and returns true
   // unless we are at the end of the document or the next character is a token
   // ending delimiter ([;}!]).
   bool SkipToNextAny();

   // Skip past the end of the at-rule. Used for at-rules that we do not
   // recognize. Return value is whether or not the at-rule was closed correctly.
   // Returns true if at-rule is correctly closed (by ; or end of block),
   // false if EOF was reached first.
   // Ending ; or {}-block are consumed. However, closing } are not consumed.
   //
   // From http://www.w3.org/TR/CSS2/syndata.html#parsing-errors:
   //
   //   At-rules with unknown at-keywords. User agents must ignore an invalid
   //   at-keyword together with everything following it, up to the end of the
   //   block that contains the invalid at-keyword, or up to and including the
   //   next semicolon (;), or up to and including the next block ({...}),
   //   whichever comes first.
   bool SkipToAtRuleEnd();

   // Skip until the end of a single media query. @media statements may have
   // multiple comma-separated media queries. If one cannot be parsed, the others
   // are still valid, so we need to skip just the one.
   // Does not consume the tokens marking the end of the media query.
   void SkipToMediaQueryEnd();

   // Parse functions.
   //
   // When the comment reads 'starting at foo', it's a dchecked runtime
   // error to call the function if the input does not start with
   // 'foo'.
   //
   // If a ParseXXX method returns a pointer, you own it and must
   // delete it.

   //
   // 'leaves' of the parse tree: strings, urls, identifiers, numbers,
   // etc
   //

   // ParseIdent() consumes the identifier and returns its unescaped
   // representation.  If we are at the end of the document, or if no
   // identifier is found, ParseIdent() returns the empty string.
   //
   // In CSS2, identifiers (including element names, classes, and IDs in
   // selectors) can contain only the characters [A-Za-z0-9] and ISO
   // 10646 characters 161 and higher, plus the hyphen (-); they cannot
   // start with a hyphen or a digit. They can also contain escaped
   // characters and any ISO 10646 character as a numeric code (see next
   // item). For instance, the identifier "B&W?" may be written as
   // "B\&W\?" or "B\26 W\3F".
   // http://www.w3.org/TR/REC-CSS2/syndata.html#value-def-identifier
   //
   // We're a little more forgiving than the standard and permit hyphens
   // and digits to start identifiers.
   // This method does not skip spaces like most other methods do, because it
   // may be used to identify things like "import" in "@import", which is
   // different from "@ import".
   UnicodeText ParseIdent();

   // Starting at \, parse the escape and return the corresponding
   // unicode codepoint.  If the \ is the last character in the
   // document, we return '\'; there is no other malformed input.  This
   // implements the second and third types of character escapes at
   // http://www.w3.org/TR/REC-CSS2/syndata.html#escaped-characters
   //
   // 2) It cancels the meaning of special CSS characters. Any
   // character (except a hexadecimal digit) can be escaped with a
   // backslash to remove its special meaning.  For example,
   // ParseEscape() returns 0x6240 for \所 and 71 for \G (but \C is a
   // hex escape, treated below:)
   //
   // 3) Backslash escapes allow authors to refer to characters
   // they can't easily put in a document. In this case, the backslash
   // is followed by at most six hexadecimal digits (0..9A..Fa..f), which
   // stand for the ISO 10646 ([ISO10646]) character with that
   // number. If a digit or letter follows the hexadecimal number, the
   // end of the number needs to be made clear. There are two ways to
   // do that:
   //    1. with a space (or other whitespace character): "\26 B" ("&B")
   //    2. by providing exactly 6 hexadecimal digits: "\000026B" ("&B")
   //
   // So, if the escape sequence is a hex escape and the character following
   // the last hex digit is a space, then ParseEscape() consumes it.
   //
   // Only interchange valid Unicode characters will be returned.
   // all other characters will be replaced with space (" ") and
   // a kUtf8Error will be recorded in errors_seen_mask_.
   char32 ParseEscape();  // return the codepoint for the current escape \12a76f

   // Starting at delim, ParseString<char delim>() consumes the string,
   // including the matching end-delim, and returns its unescaped
   // representation, without the delimiters.  If we fail to find the
   // matching delimiter, we consume the rest of the document and
   // return it.
   //
   // Strings can either be written with double quotes or with single
   // quotes. Double quotes cannot occur inside double quotes, unless
   // escaped (as '\"' or as '\22'). Analogously for single quotes
   // ("\'" or "\27").  A string cannot directly contain a newline,
   // unless hex-escaped as "\A".
   //
   // It is possible to break strings over several lines, for aesthetic
   // or other reasons, but in such a case the newline itself has to be
   // escaped with a backslash (\). For instance, the following two
   // selectors are exactly the same:
   // http://www.w3.org/TR/REC-CSS2/syndata.html#strings
   template<char delim> UnicodeText ParseString();

   // If the current character is a string-delimiter (' or "),
   // ParseStringOrIdent() parses a string and returns the contents.
   // Otherwise, it tries to parse an identifier.  We must not be at
   // the end of the document.
   UnicodeText ParseStringOrIdent();

   // Same as ParseString, but returns a Value object containing that string,
   // which has bytes_in_original_buffer set.
   template<char delim> Value* ParseStringValue();

   // ParseNumber parses a number and an optional unit, consuming to
   // the end of the number or unit and returning a Value*.
   // Real numbers and integers are specified in decimal notation
   // only. An <integer> consists of one or more digits "0" to "9". A
   // <number> can either be an <integer>, or it can be zero or more
   // digits followed by a dot (.) followed by one or more digits. Both
   // integers and real numbers may be preceded by a "-" or "+" to
   // indicate the sign.
   //
   // If no number is found, ParseNumber returns NULL.
   Value* ParseNumber();

   // ParseColor parses several different representations of colors:
   // 1) rgb
   // 2) #rgb
   // 3) rrggbb
   // 4) #rrggbb
   // 5) The 16 HTML4 color names (aqua, black, blue,
   //    fuchsia, gray, green, lime, maroon, navy, olive, purple, red,
   //    silver, teal, white, and yellow), with or without quotes (' or ").
   // It's designed to handle all the ill-formed CSS color values out there.
   // It consumes the color if it finds a valid color.  Otherwise, it returns
   // an undefined HtmlColor (HtmlColor::IsDefined()) and does not consume
   // anything.
   //
   // However, if quirks_mode_ is false (standard compliant mode), forms 1 and 3
   // (without #) would not be accepted.
   HtmlColor ParseColor();                          // parse a hex or named
                                                    // color like #fff, #bcdefa
                                                    // or black

   //
   // FUNCTIONS and FUNCTION-like objects: rgb(), url(), rect()
   //

   // Parse a generic list of function parameters.
   //
   // Specifically, starting after the opening '(', repeatedly ParseAny() as
   // values either comma or space separated until we reach the closing ')'.
   //
   // ParseFunction() does not consume closing ')' and returns a vector of
   // values if successful, and NULL if the contents were mal-formed.
   //
   // We limit the max depth of nested functions to avoid unbounded stack depth.
   // See b/17628553
   FunctionParameters* ParseFunction(int max_function_depth);

   // Converts a Value number or percentage to an RGB value.
   static unsigned char ValueToRGB(Value* v);

   // ParseRgbColor parsers the part between the parentheses of rgb( )
   // according to http://www.w3.org/TR/REC-CSS2/syndata.html#color-units .
   //
   //  The format of an RGB value in the functional notation is 'rgb('
   //  followed by a comma-separated list of three numerical values
   //  (either three integer values or three percentage values)
   //  followed by ')'. The integer value 255 corresponds to 100%, and
   //  to F or FF in the hexadecimal notation: rgb(255,255,255) =
   //  rgb(100%,100%,100%) = #FFF. Whitespace characters are allowed
   //  around the numerical values.
   //
   // Starting just past 'rgb(', ParseRgbColor() consumes up to (but not
   // including) the closing ) and returns the color it finds.
   // Returns NULL if mal-formed.
   Value* ParseRgbColor();   // parse an rgbcolor like 125, 25, 12
                             // or 12%, 57%, 89%

   // ParseUrl parses the part between the parentheses of url( )
   // according to http://www.w3.org/TR/REC-CSS2/syndata.html#uri .
   //
   //  The format of a URI value is 'url(' followed by optional
   //  whitespace followed by an optional single quote (') or double
   //  quote (") character followed by the URI itself, followed by an
   //  optional single quote (') or double quote (") character followed
   //  by optional whitespace followed by ')'. The two quote characters
   //  must be the same.
   //
   // Starting just past 'url(', ParseUrl() consumes the url as well as
   // the optional whitespace.  If the url is well-formed, the next
   // character must be ')'.
   // Returns NULL for mal-formed URLs.
   Value* ParseUrl();      // parse a url like yellow.png or 'blah.png'

   //
   // Value and Values
   //

   // Parses a value which is expected to be color values. It can be
   // different from ParseAny, for example, for black or ccddff, both
   // are translated into color values here but are returned as idents
   // in the latter case.  We call this instead of ParseAny() after
   // color, background-color, and background properties to accomodate bad CSS.
   // If no value is found, ParseAnyExpectingColor returns NULL.
   Value* ParseAnyExpectingColor();

   // ParseAny() parses a css value and consumes it.  It does not skip
   // leading or trailing whitespace.
   // If no value is found, ParseAny returns NULL and make sure at least one
   // character is consumed (to make progress).
   Value* ParseAny();
   // Helper function which limits the levels of recursion.
   Value* ParseAnyWithFunctionDepth(int max_function_depth);

   // Parse a list of values for the given property.
   // We parse until we see a !, ;, or } delimiter. However, if there are any
   // malformed values, stop parsing and return NULL immediately.
   // For special shortcut properties, use the following specialized methods
   // instead.
   Values* ParseValues(Property::Prop prop);

   // Expand a background property into all the sub-properties (background-color,
   // background-image, etc.). Return false on malformed original_declaration.
   static bool ExpandBackground(const Declaration& original_declaration,
                                Declarations* new_declarations);

   // Parses FONT. Returnss NULL if malformed. Otherwise, the output is a tuple
   // in the following order
   //   "font-style font-variant font-weight font-size line-height font-family+"
   Values* ParseFont();

   // Parses FONT-FAMILY and the tailing part in FONT and appends the results in
   // values. Returns false if there are any malformed values.
   // This interface is different from the others because it is also used by
   // ParseFont(), where family names are appended to other CSS values.
   bool ParseFontFamily(Values* values);

   //
   // Selectors and Rulesets
   //

   // ParseAttributeSelector() starts at [ and parses an attribute
   // selector like [ foo ~= bar], consuming the final ].  Returns NULL
   // on error but still consumes to the matching ].
   // This method does not skip spaces like most other methods do.
   // Whitespace is syntactically significant here, because a sequence of simple
   // selectors contains no whitespace.  'div[align=center]' is a sequence of
   // simple selectors, but 'div [align=center]' is a syntax error (though we
   // will parse it as a selector, i.e., two simple selector sequences separated
   // by a whitespace combinator).
   SimpleSelector* ParseAttributeSelector();

   // ParseSimpleSelector() parses one simple sector.  Starts from
   // anything and returns NULL if no simple selector found or parse error.
   // This method does not skip spaces like most other methods do.
   // See comment above.
   SimpleSelector* ParseSimpleSelector();

   // Checks if the parser stops at a character (or characters) that will
   // legally terminate a SimpleSelectors. The checked characters are not eaten.
   // Valid terminators are whitespaces, comments, combinators ('>', '+'), ','
   // and '{'. A stop at the end is also considered valid.
   bool AtValidSimpleSelectorsTerminator() const;

   // Starting at whitespace, a combinator, or the first simple
   // selector, ParseSimpleSelectors parses a sequence of simple
   // selectors, i.e., a chain of simple selectors that are not
   // separated by a combinator.  The chain itself may be preceeded by
   // a combinator, in which case you should pass true for
   // expecting_combinator, and we will parse the combinator.
   // Typically, when you're parsing a selector (i.e., a chain of
   // sequences of simple selectors separated by combinators), you pass
   // false on the first simple selector and true on the subsequent
   // ones.
   SimpleSelectors* ParseSimpleSelectors(bool expecting_combinator);

   // Parse an at-rule or ruleset.
   //
   // This may be nested inside of an @media rule if media_queries != NULL.
   // If media_queries == NULL, this is not nested.
   //
   // Although @media rules are allowed to be nested inside other @media rules
   // in CSS3, we do not parse such nested rules, and therefore avoid unbounded
   // recursive depth.
   void ParseStatement(const MediaQueries* media_queries,
                       Stylesheet* stylesheet);

   // ParseRuleset() starts from the first character of the first
   // selector (note: it does not skip whitespace) and consumes the
   // ruleset, including the closing '}'. Return NULL if the parsing fails.
   // However, the parser would consume anything up to the closing '}', if any,
   // even if it fails somehow in the middle, per CSS spec.
   //
   // Note: In preservation mode, a ruleset may be returned even if selectors
   // could not be parsed. If this happens the selectors.is_dummy() will be true.
   Ruleset* ParseRuleset();

   //
   // Miscellaneous
   //

   // Starting at whitespace or the start of a media query, parses and returns
   // the entire query. Returns NULL if the media query is invalid.
   MediaQuery* ParseMediaQuery();

   // ParseImport starts just after @import and consumes the import
   // declaration, but not the closing ;.  It returns a Import*
   // containing the imported name and the media.
   Import* ParseImport();

   // Parse the charset after an @charset rule.
   UnicodeText ParseCharset();

   // Parse an @font-face statement.
   FontFace* ParseFontFace();

   static const int kErrorContext;

   // error_flag should be one of the static const k*Error's above.
   void ReportParsingError(uint64 error_flag, const StringPiece& message);

   const char *begin_;  // The beginning of the doc (used to report offset).
   const char *in_;     // The current point in the parse.
   const char *end_;    // The end of the document to parse.

   bool quirks_mode_;  // Whether we are in quirks mode.
   // In preservation mode, we attempt to save all information from the
   // stylesheet (including unparseable constructs such as proprietary CSS
   // and CSS hacks) so that they can be re-serialized precisely.
   bool preservation_mode_;
   int max_function_depth_;

   // errors_seen_mask_ is non-zero iff we failed to parse part of the CSS
   // and could not recover and so we have lost information.
   uint64 errors_seen_mask_;
   // Only set in preservation_mode_. unparseable_sections_seen_mask_ is non-zero
   // iff we failed to parse a section of CSS, but saved the text verbatim or
   // in some other way preserved the information from the original document.
   uint64 unparseable_sections_seen_mask_;
   // Vector of all errors { error_type_number, location, message }.
   std::vector<ErrorInfo> errors_seen_;

   friend class Tracer;
   friend class ParserTest;  // we need to unit test private Parse functions.
   FRIEND_TEST(ParserTest, color);
   FRIEND_TEST(ParserTest, url);
   FRIEND_TEST(ParserTest, rect);
   FRIEND_TEST(ParserTest, background);
   FRIEND_TEST(ParserTest, font_family);
   FRIEND_TEST(ParserTest, ParseBlock);
   FRIEND_TEST(ParserTest, font);
   FRIEND_TEST(ParserTest, numbers);
   FRIEND_TEST(ParserTest, values);
   FRIEND_TEST(ParserTest, declarations);
   FRIEND_TEST(ParserTest, universalselector);
   FRIEND_TEST(ParserTest, universalselectorcondition);
   FRIEND_TEST(ParserTest, comment_breaking_descendant_combinator);
   FRIEND_TEST(ParserTest, comment_breaking_child_combinator);
   FRIEND_TEST(ParserTest, simple_selectors);
   FRIEND_TEST(ParserTest, bad_simple_selectors);
   FRIEND_TEST(ParserTest, rulesets);
   FRIEND_TEST(ParserTest, ruleset_starts_with_combinator);
   FRIEND_TEST(ParserTest, atrules);
   FRIEND_TEST(ParserTest, percentage_colors);
   FRIEND_TEST(ParserTest, SkipCornerCases);
   FRIEND_TEST(ParserTest, SkipMatching);
   FRIEND_TEST(ParserTest, SkippedTokenError);
   FRIEND_TEST(ParserTest, ValueError);
   FRIEND_TEST(ParserTest, ParseAnyParens);
   friend void ParseFontFamily(Parser* parser);
   friend class MediaAppliesToScreenTest;

   DISALLOW_COPY_AND_ASSIGN(Parser);
 };

 // Definitions of various data structures returned by the parser.
 // More in selector.h and value.h.

 // A single declaration such as font: 12pt Arial.
 // A declaration consists of a property name (Property) and a list
 // of values (Values*).
 // It could also be important (font: 12pt Arial !important).
 class Declaration {
  public:
   // constructor.  We take ownership of v.
   Declaration(Property p, Values* v, bool important)
       : property_(p), values_(v), important_(important) {}
   // constructor with a single Value. We make a copy of the value.
   Declaration(Property p, const Value& v, bool important)
       : property_(p), values_(new Values), important_(important) {
     values_->push_back(new Value(v));
   }
   // Constructor for dummy declaration used to pass through unparseable
   // declaration text.
   explicit Declaration(const StringPiece& bytes_in_original_buffer)
       : property_(Property::UNPARSEABLE), important_(false),
         bytes_in_original_buffer_(bytes_in_original_buffer.data(),
                                   bytes_in_original_buffer.length()) {}

   // accessors
   Property property() const { return property_; }
   const Values* values() const { return values_.get(); }
   bool IsImportant() const { return important_; }

   // Note: May be invalid UTF8.
   StringPiece bytes_in_original_buffer() const {
     return bytes_in_original_buffer_;
   }
   void set_bytes_in_original_buffer(const StringPiece& new_bytes) {
     bytes_in_original_buffer_ = string(new_bytes.data(), new_bytes.length());
   }

   // convenience accessors
   Property::Prop prop() const { return property_.prop(); }
   string prop_text() const { return property_.prop_text(); }

   Values* mutable_values() { return values_.get(); }

   void set_property(Property property) { property_ = property; }
   // Takes ownership of values.
   void set_values(Values* values) { values_.reset(values); }
   void set_important(bool important) { important_ = important; }

   string ToString() const;

  private:
   Property property_;
   scoped_ptr<Values> values_;
   bool important_;  // Whether !important is declared on this declaration.

   // Verbatim bytes parsed for the declaration. Currently this is only stored
   // for unparseable declarations (stored with property_ == UNPARSEABLE).
   // TODO(sligocki): We may want to store verbatim text for all declarations
   // to preserve the details of the original text.
   string bytes_in_original_buffer_;

   DISALLOW_COPY_AND_ASSIGN(Declaration);
 };

 // Declarations is a vector of Declaration*, which we own and
 // will delete upon destruction.  If you remove elements from
 // Declarations, you are responsible for deleting them.
 // Also, be careful --- there's no virtual destructor, so this must be
 // deleted as a Declarations.
 class Declarations : public std::vector<Declaration*> {
  public:
   Declarations() : std::vector<Declaration*>() { }
   ~Declarations();

   // We provide syntactic sugar for accessing elements.
   // declarations->get(i) looks better than (*declarations)[i])
   const Declaration* get(int i) const { return (*this)[i]; }

   string ToString() const;
  private:
   DISALLOW_COPY_AND_ASSIGN(Declarations);
 };

 // Unparsed sections of CSS file. For example, unexpected @-rules cannnot be
 // parsed, so we simply collect the verbatim bytes from start to finish and
 // store them in an UnparsedRegion so that they can be re-emitted in
 // preservation mode.
 class UnparsedRegion {
  public:
   explicit UnparsedRegion(const StringPiece& bytes_in_original_buffer)
       : bytes_in_original_buffer_(bytes_in_original_buffer.data(),
                                   bytes_in_original_buffer.size()) {}

   StringPiece bytes_in_original_buffer() const {
     return bytes_in_original_buffer_;
   }

   void set_bytes_in_original_buffer(const StringPiece& bytes) {
     bytes.CopyToString(&bytes_in_original_buffer_);
   }

   string ToString() const;

  private:
   string bytes_in_original_buffer_;

   DISALLOW_COPY_AND_ASSIGN(UnparsedRegion);
 };

 // A ruleset consists of a list of selectors followed by a declaration block.
 // It can also optionally include a list of medium description.
 //
 // Unparsed regions between Rulesets can also be stored here in preservation
 // mode. For example, at-rules can be interspersed with Rulesets, for those
 // that we don't parse, they are stored in dummy Rulesets.
 class Ruleset {
  public:
   // TODO(sligocki): Allow other parsed at-rules, like @page.
   enum Type { RULESET, UNPARSED_REGION, };

   Ruleset() : type_(RULESET), media_queries_(new MediaQueries),
               selectors_(new Selectors),
               declarations_(new Declarations) { }
   // Takes ownership of selectors, media_queries and declarations.
   Ruleset(Selectors* selectors, MediaQueries* media_queries,
           Declarations* declarations)
       : type_(RULESET), media_queries_(media_queries), selectors_(selectors),
         declarations_(declarations) { }
   // Dummy Ruleset. Used for unparsed statements, for example unknown at-rules.
   explicit Ruleset(UnparsedRegion* unparsed_region)
       : type_(UNPARSED_REGION), media_queries_(new MediaQueries),
         unparsed_region_(unparsed_region) { }
   ~Ruleset() { }

   // Is this actually a Ruleset or some sort of at-rule? For historical reasons
   // at-rules are also stored as Rulesets.
   Type type() const { return type_; }

   // All type()s can have media_queries.
   const MediaQueries& media_queries() const { return *media_queries_; }
   const MediaQuery& media_query(int i) const { return *media_queries_->at(i); }
   MediaQueries& mutable_media_queries() { return *media_queries_; }
   // Takes ownership of parameter.
   void set_media_queries(MediaQueries* media_queries) {
     media_queries_.reset(media_queries);
   }

   // NOTE: Only call these getters if you know that type() == RULESET.
   // type() always == RULESET if Css::Parser::preservation_mode() is false,
   // so getters should all be valid if preservation mode is off (default).
   const Selectors& selectors() const {
     CHECK_EQ(RULESET, type());
     return *selectors_;
   }
   const Selector& selector(int i) const {
     CHECK_EQ(RULESET, type());
     return *selectors_->at(i);
   }
   const Declarations& declarations() const {
     CHECK_EQ(RULESET, type());
     return *declarations_;
   }
   const Declaration& declaration(int i) const {
     CHECK_EQ(RULESET, type());
     return *declarations_->at(i);
   }

   Selectors& mutable_selectors() {
     CHECK_EQ(RULESET, type());
     return *selectors_;
   }
   Declarations& mutable_declarations() {
     CHECK_EQ(RULESET, type());
     return *declarations_;
   }

   // set_selectors and _declarations take ownership of parameters.
   void set_selectors(Selectors* selectors) {
     CHECK_EQ(RULESET, type());
     selectors_.reset(selectors);
   }
   void set_declarations(Declarations* decls) {
     CHECK_EQ(RULESET, type());
     declarations_.reset(decls);
   }

   // If type() == UNPARSED_REGION, this is the link to that region.
   const UnparsedRegion* unparsed_region() const {
     CHECK_EQ(UNPARSED_REGION, type());
     return unparsed_region_.get();
   }
   UnparsedRegion* mutable_unparsed_region() {
     CHECK_EQ(UNPARSED_REGION, type());
     return unparsed_region_.get();
   }

   string ToString() const;
  private:
   Type type_;

   // All types have media_queries_.
   scoped_ptr<MediaQueries> media_queries_;

   // Only defined for type_ == RULESET.
   scoped_ptr<Selectors> selectors_;
   scoped_ptr<Declarations> declarations_;

   // Only defined for type_ == UNPARSED_REGION.
   scoped_ptr<UnparsedRegion> unparsed_region_;

   DISALLOW_COPY_AND_ASSIGN(Ruleset);
 };

 class Rulesets : public std::vector<Css::Ruleset*> {
  public:
   Rulesets() : std::vector<Css::Ruleset*>() { }
   ~Rulesets();
 };

 class Charsets : public std::vector<UnicodeText> {
  public:
   ~Charsets();

   string ToString() const;
 };

 class Import {
  public:
   Import() {}
   ~Import() {}

   const MediaQueries& media_queries() const { return *media_queries_; }
   const UnicodeText& link() const { return link_; }

   // Takes ownership of media_queries.
   void set_media_queries(MediaQueries* media_queries) {
     media_queries_.reset(media_queries);
   }
   void set_link(const UnicodeText& link) { link_ = link; }

   string ToString() const;

  private:
   scoped_ptr<MediaQueries> media_queries_;
   UnicodeText link_;

   DISALLOW_COPY_AND_ASSIGN(Import);
 };

 class Imports : public std::vector<Css::Import*> {
  public:
   Imports() : std::vector<Css::Import*>() { }
   ~Imports();
 };

 class FontFace {
  public:
   FontFace() {}
   ~FontFace() {}

   const MediaQueries& media_queries() const { return *media_queries_; }
   // Stores all font-face properties as Declarations.
   // TODO(sligocki): Provide accessors for individual properties, like src?
   const Declarations& declarations() const { return *declarations_; }

   void set_media_queries(MediaQueries* media_queries) {
     media_queries_.reset(media_queries);
   }
   void set_declarations(Declarations* declarations) {
     declarations_.reset(declarations);
   }

   MediaQueries& mutable_media_queries() { return *media_queries_; }
   Declarations& mutable_declarations() { return *declarations_; }

   string ToString() const;
  private:
   scoped_ptr<MediaQueries> media_queries_;
   scoped_ptr<Declarations> declarations_;

   DISALLOW_COPY_AND_ASSIGN(FontFace);
 };

 class FontFaces : public std::vector<Css::FontFace*> {
  public:
   FontFaces() : std::vector<Css::FontFace*>() { }
   ~FontFaces();
 };

 // A stylesheet consists of a list of import information and a list of
 // rulesets.
 class Stylesheet {
  public:
   Stylesheet() : type_(AUTHOR) {}

   // USER is currently unused.
   enum StylesheetType { AUTHOR, USER, SYSTEM };
   StylesheetType type() const { return type_; }
   const Charsets& charsets() const { return charsets_; }
   const Imports& imports() const { return imports_; }
   const FontFaces& font_faces() const { return font_faces_; }
   const Rulesets& rulesets() const { return rulesets_; }

   const UnicodeText& charset(int i) const { return charsets_[i]; }
   const Import& import(int i) const { return *imports_[i]; }
   const FontFace& font_face(int i) const { return *font_faces_[i]; }
   const Ruleset& ruleset(int i) const { return *rulesets_[i]; }

   void set_type(StylesheetType type) { type_ = type; }
   // TODO(sligocki): Return pointer instead of ref as per Google-style for
   // non-const return values.
   Charsets& mutable_charsets() { return charsets_; }
   Imports& mutable_imports() { return imports_; }
   FontFaces& mutable_font_faces() { return font_faces_; }
   Rulesets& mutable_rulesets() { return rulesets_; }

   string ToString() const;
  private:
   StylesheetType type_;
   Charsets charsets_;
   Imports imports_;
   FontFaces font_faces_;

   // Note: CSS spec specifies that a stylesheet is a list of statements each
   // of which is either a ruleset or at-rule. Since we want to support the
   // legacy rulesets() interface and most at-rules are not parsed, unparsed
   // at-rules are currently being stored as dummy rulesets.
   Rulesets rulesets_;

   DISALLOW_COPY_AND_ASSIGN(Stylesheet);
 };

 }  // namespace Css

 #endif  // WEBUTIL_CSS_PARSER_H__