| /** |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Copyright 2006 Google Inc. All Rights Reserved. |
| // Author: dpeng@google.com (Daniel Peng) |
| |
| #include "webutil/css/parser.h" |
| |
| #include <ctype.h> // isascii |
| |
| #include <algorithm> // std::min |
| #include <memory> |
| #include "base/scoped_ptr.h" |
| #include <string> |
| #include <vector> |
| |
| #include "base/logging.h" |
| #include "base/macros.h" |
| #include "strings/strutil.h" |
| #include "third_party/utf/utf.h" |
| #include "util/gtl/stl_util.h" |
| #include "util/utf8/public/unicodetext.h" |
| #include "util/utf8/public/unilib.h" |
| #include "webutil/css/fallthrough_intended.h" // Needed in open source |
| #include "webutil/css/string_util.h" |
| #include "webutil/css/util.h" |
| #include "webutil/css/value.h" |
| |
| |
| namespace Css { |
| |
| const uint64 Parser::kNoError; |
| const uint64 Parser::kUtf8Error; |
| const uint64 Parser::kDeclarationError; |
| const uint64 Parser::kSelectorError; |
| const uint64 Parser::kFunctionError; |
| const uint64 Parser::kMediaError; |
| const uint64 Parser::kCounterError; |
| const uint64 Parser::kHtmlCommentError; |
| const uint64 Parser::kValueError; |
| const uint64 Parser::kRulesetError; |
| const uint64 Parser::kSkippedTokenError; |
| const uint64 Parser::kCharsetError; |
| const uint64 Parser::kBlockError; |
| const uint64 Parser::kNumberError; |
| const uint64 Parser::kImportError; |
| const uint64 Parser::kAtRuleError; |
| const uint64 Parser::kCssCommentError; |
| |
| const int Parser::kMaxErrorsRemembered; |
| const int Parser::kDefaultMaxFunctionDepth; |
| |
| class Tracer { // in opt mode, do nothing. |
| public: |
| Tracer(const char* name, const Parser* parser) { } |
| ~Tracer() { } |
| }; |
| |
| |
| // **************** |
| // constructors |
| // **************** |
| |
| Parser::Parser(const char* utf8text, const char* textend) |
| : begin_(utf8text), |
| in_(begin_), |
| end_(textend), |
| quirks_mode_(true), |
| preservation_mode_(false), |
| max_function_depth_(kDefaultMaxFunctionDepth), |
| errors_seen_mask_(kNoError), |
| unparseable_sections_seen_mask_(kNoError) { |
| } |
| |
| Parser::Parser(const char* utf8text) |
| : begin_(utf8text), |
| in_(begin_), |
| end_(utf8text + strlen(utf8text)), |
| quirks_mode_(true), |
| preservation_mode_(false), |
| max_function_depth_(kDefaultMaxFunctionDepth), |
| errors_seen_mask_(kNoError), |
| unparseable_sections_seen_mask_(kNoError) { |
| } |
| |
| Parser::Parser(StringPiece s) |
| : begin_(s.begin()), |
| in_(begin_), |
| end_(s.end()), |
| quirks_mode_(true), |
| preservation_mode_(false), |
| max_function_depth_(kDefaultMaxFunctionDepth), |
| errors_seen_mask_(kNoError), |
| unparseable_sections_seen_mask_(kNoError) { |
| } |
| |
| int Parser::ErrorNumber(uint64 error_flag) { |
| for (int i = 0; i < 64; ++i) { |
| if (error_flag & (1ULL << i)) { |
| return i; |
| } |
| } |
| LOG(DFATAL) << "Invalid error flag."; |
| return -1; |
| } |
| |
| const int Parser::kErrorContext = 20; |
| void Parser::ReportParsingError(uint64 error_flag, |
| const StringPiece& message) { |
| errors_seen_mask_ |= error_flag; |
| // Make sure we don't print outside of the range in_ begin_ to end_. |
| const char* context_begin = in_ - std::min(static_cast<int64>(kErrorContext), |
| static_cast<int64>(in_ - begin_)); |
| const char* context_end = in_ + std::min(static_cast<int64>(kErrorContext), |
| static_cast<int64>(end_ - in_)); |
| CHECK_LE(begin_, context_begin); |
| CHECK_LE(context_begin, context_end); |
| CHECK_LE(context_end, end_); |
| string context(context_begin, context_end - context_begin); |
| string full_message = StringPrintf( |
| "%s at byte %d \"...%s...\"", |
| message.as_string().c_str(), CurrentOffset(), context.c_str()); |
| VLOG(1) << full_message; |
| if (errors_seen_.size() < kMaxErrorsRemembered) { |
| ErrorInfo info = {ErrorNumber(error_flag), CurrentOffset(), full_message}; |
| errors_seen_.push_back(info); |
| } |
| } |
| |
| // **************** |
| // Helper functions |
| // **************** |
| |
| // is c a space? Only the characters "space" (Unicode code 32), "tab" |
| // (9), "line feed" (10), "carriage return" (13), and "form feed" (12) |
| // can occur in whitespace. Other space-like characters, such as |
| // "em-space" (8195) and "ideographic space" (12288), are never part |
| // of whitespace. |
| // http://www.w3.org/TR/REC-CSS2/syndata.html#whitespace |
| static bool IsSpace(char c) { |
| switch (c) { |
| case ' ': case '\t': case '\r': case '\n': case '\f': |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| // If the character c is a hex digit, DeHex returns the number it |
| // represents ('0' => 0, 'A' => 10, 'F' => 15). Otherwise, DeHex |
| // returns -1. |
| static int DeHex(char c) { |
| if (c >= '0' && c <= '9') { |
| return c - '0'; |
| } else if (c >= 'A' && c <= 'F') { |
| return (c - 'A') + 10; |
| } else if (c >= 'a' && c <= 'f') { |
| return (c - 'a') + 10; |
| } else { |
| return -1; |
| } |
| } |
| |
| // **************** |
| // Recursive-descent functions. |
| // |
| // The best documentation for these is in cssparser.h. |
| // |
| // **************** |
| |
| // consume whitespace and comments. |
| void Parser::SkipSpace() { |
| Tracer trace(__func__, this); |
| while (in_ < end_) { |
| if (IsSpace(*in_)) |
| in_++; |
| else if (in_ + 1 < end_ && in_[0] == '/' && in_[1] == '*') |
| SkipComment(); |
| else |
| return; |
| } |
| } |
| |
| // consume comment /* aoeuaoe */ |
| void Parser::SkipComment() { |
| DCHECK(in_ + 2 <= end_ && in_[0] == '/' && in_[1] == '*'); |
| in_ += 2; // skip the /* |
| while (in_ + 1 < end_) { |
| if (in_[0] == '*' && in_[1] == '/') { |
| in_ += 2; |
| return; |
| } else { |
| in_++; |
| } |
| } |
| ReportParsingError(kCssCommentError, "Unexpected EOF in CSS comment."); |
| in_ = end_; |
| } |
| |
| // This is very basic right now and only skips full strings, comments and |
| // escapes. |
| // TODO(sligocki): Improve to parse all tokens in CSS lexing grammar. |
| // Note: We intentionally do not consume the ( in a FUNCTION token so that |
| // SkipNextToken can be used by SkipMatching, etc. and still preserve nesting. |
| void Parser::SkipNextToken() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return; |
| |
| switch (*in_) { |
| case '\'': |
| ParseString<'\''>(); // Ignore result. |
| break; |
| case '"': |
| ParseString<'"'>(); // Ignore result. |
| break; |
| case '\\': |
| ParseEscape(); // Ignore result. |
| break; |
| default: |
| in_++; |
| break; |
| } |
| } |
| |
| // Starting with {, [ or ( at in_, skip ahead to the matching closing char. |
| // Returns true if end was found, false if EOF was reached first. |
| bool Parser::SkipMatching() { |
| Tracer trace(__func__, this); |
| DCHECK(*in_ == '{' || *in_ == '[' || *in_ == '('); |
| |
| ReportParsingError(kBlockError, "Ignoring {}, [] or () block."); |
| |
| // Stack of closing delims to look for. |
| string delim_stack; |
| |
| switch (*in_) { |
| case '(': |
| ++in_; |
| delim_stack.push_back(')'); |
| break; |
| case '[': |
| ++in_; |
| delim_stack.push_back(']'); |
| break; |
| case '{': |
| ++in_; |
| delim_stack.push_back('}'); |
| break; |
| default: |
| return false; |
| } |
| |
| SkipSpace(); |
| while (in_ < end_) { |
| if (*in_ == delim_stack[delim_stack.size() - 1]) { |
| ++in_; |
| delim_stack.erase(delim_stack.size() - 1); |
| if (delim_stack.empty()) { |
| // Found outermost closing delimiter. |
| return true; |
| } |
| } else { |
| switch (*in_) { |
| case '(': |
| ++in_; |
| delim_stack.push_back(')'); |
| break; |
| case '[': |
| ++in_; |
| delim_stack.push_back(']'); |
| break; |
| case '{': |
| ++in_; |
| delim_stack.push_back('}'); |
| break; |
| default: |
| // Ignore whatever there is to parse. |
| SkipNextToken(); |
| break; |
| } |
| } |
| SkipSpace(); // Skips comments too. |
| } |
| |
| // Reached EOF before block was closed. |
| return false; |
| } |
| |
| // Skips until delim is seen or EOF. |
| // Returns true if delim was found, false if EOF was reached first. |
| bool Parser::SkipPastDelimiter(char delim) { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| while (in_ < end_) { |
| if (*in_ == delim) { |
| ++in_; |
| return true; |
| } else { |
| switch (*in_) { |
| // Properly match and skip over nested {}, [] and (). |
| case '{': |
| case '[': |
| case '(': |
| // Ignore result. |
| SkipMatching(); |
| break; |
| // Skip over all other tokens. |
| default: |
| // Ignore whatever there is to parse. |
| SkipNextToken(); |
| break; |
| } |
| } |
| SkipSpace(); |
| } |
| |
| // Reached EOF before delimiter reached. |
| return false; |
| } |
| |
| // Returns true if an "any" token was found, false if EOF was reached first. |
| bool Parser::SkipToNextAny() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| while (in_ < end_) { |
| switch (*in_) { |
| case '{': |
| ReportParsingError(kSkippedTokenError, |
| "Ignoring block between tokens."); |
| SkipMatching(); // ignore |
| break; |
| case '@': |
| ReportParsingError(kSkippedTokenError, |
| "Ignoring @ident between tokens."); |
| in_++; |
| // Note: CSS spec seems to say that when unexpected at-keywords are |
| // encountered you should skip ahead to the end of the at-rule (which |
| // would skip everything till the first ;, {} block or closing }) |
| // but browsers do not seem to do this, instead they seem to just |
| // skip to the end of the keyword and then invalidate that declaration. |
| ParseIdent(); // ignore |
| break; |
| case ';': case '}': |
| case '!': |
| return false; |
| default: |
| return true; |
| } |
| SkipSpace(); |
| } |
| |
| // Reached EOF before an "any" value. |
| return false; |
| } |
| |
| // From http://www.w3.org/TR/CSS2/syndata.html#parsing-errors: |
| // |
| // At-rules with unknown at-keywords. User agents must ignore an invalid |
| // at-keyword together with everything following it, up to the end of the |
| // block that contains the invalid at-keyword, or up to and including the |
| // next semicolon (;), or up to and including the next block ({...}), |
| // whichever comes first. |
| bool Parser::SkipToAtRuleEnd() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| while (in_ < end_) { |
| switch (*in_) { |
| // "up to the end of the block that contains the invalid at-keyword" |
| case '}': |
| // Note: Do not advance in_, so that caller will see closing '}'. |
| return true; |
| // "up to and including the next semicolon (;)" |
| case ';': |
| ++in_; |
| return true; |
| // "up to and including the next block ({...})" |
| case '{': |
| return SkipMatching(); |
| |
| // Properly match nested [] and (). |
| case '[': |
| case '(': |
| // Ignore result. |
| SkipMatching(); |
| break; |
| |
| // Skip over all other tokens. |
| default: |
| // Ignore whatever there is to parse. |
| SkipNextToken(); |
| break; |
| } |
| SkipSpace(); |
| } |
| |
| // Reached EOF before syntactically closing @-rule. |
| return false; |
| } |
| |
| void Parser::SkipToMediaQueryEnd() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| while (in_ < end_) { |
| switch (*in_) { |
| // We expect a media query to end with either , (if there are more |
| // media queries) or { (if this is the last media query). ; and } can |
| // also prematurely terminate any at-rule, so we must respect them. |
| case ',': |
| case '{': |
| case ';': |
| case '}': |
| return; |
| |
| // Properly match nested [] and (). |
| case '[': |
| case '(': |
| // Ignore result. |
| SkipMatching(); |
| break; |
| |
| // Skip over all other tokens. |
| default: |
| // Ignore whatever there is to parse. |
| scoped_ptr<Value> v(ParseAny()); |
| break; |
| } |
| SkipSpace(); |
| } |
| |
| // Reached EOF before syntactically closing media query. |
| return; |
| } |
| |
| // In CSS2, identifiers (including element names, classes, and IDs in |
| // selectors) can contain only the characters [A-Za-z0-9] and ISO |
| // 10646 characters 161 and higher, plus the hyphen (-); they cannot |
| // start with a hyphen or a digit. They can also contain escaped |
| // characters and any ISO 10646 character as a numeric code (see next |
| // item). For instance, the identifier "B&W?" may be written as |
| // "B\&W\?" or "B\26 W\3F". |
| // |
| // We're a little more forgiving than the standard and permit hyphens |
| // and digits to start identifiers. |
| // |
| // FIXME(yian): actually, IE is more forgiving than Firefox in using a class |
| // selector starting with digits. |
| // |
| // http://www.w3.org/TR/REC-CSS2/syndata.html#value-def-identifier |
| static bool StartsIdent(char c) { |
| return ((c >= 'A' && c <= 'Z') |
| || (c >= 'a' && c <= 'z') |
| || (c >= '0' && c <= '9') |
| || c == '-' || c == '_' |
| || !IsAscii(c)); |
| } |
| |
| UnicodeText Parser::ParseIdent() { |
| Tracer trace(__func__, this); |
| UnicodeText s; |
| while (in_ < end_) { |
| if ((*in_ >= 'A' && *in_ <= 'Z') |
| || (*in_ >= 'a' && *in_ <= 'z') |
| || (*in_ >= '0' && *in_ <= '9') |
| || *in_ == '-' || *in_ == '_') { |
| s.push_back(*in_); |
| in_++; |
| } else if (!IsAscii(*in_)) { |
| Rune rune; |
| int len = charntorune(&rune, in_, end_-in_); |
| if (len && rune != Runeerror) { |
| if (rune >= 161) { |
| s.push_back(rune); |
| in_ += len; |
| } else { // characters 128-160 can't be in identifiers. |
| return s; |
| } |
| } else { // Encoding error. Be a little forgiving. |
| ReportParsingError(kUtf8Error, "UTF8 parsing error in identifier"); |
| in_++; |
| } |
| } else if (*in_ == '\\') { |
| s.push_back(ParseEscape()); |
| } else { |
| return s; |
| } |
| } |
| return s; |
| } |
| |
| // Returns the codepoint for the current escape. |
| // \abcdef => codepoint 0xabcdef. also consumes whitespace afterwards. |
| // \(UTF8-encoded unicode character) => codepoint for that character |
| char32 Parser::ParseEscape() { |
| Tracer trace(__func__, this); |
| SkipSpace(); |
| DCHECK_LT(in_, end_); |
| DCHECK_EQ(*in_, '\\'); |
| in_++; |
| if (Done()) return static_cast<char32>('\\'); |
| |
| char32 codepoint = 0; |
| |
| int dehexed = DeHex(*in_); |
| if (dehexed == -1) { |
| Rune rune; |
| int len = charntorune(&rune, in_, end_-in_); |
| if (len && rune != Runeerror) { |
| in_ += len; |
| } else { |
| ReportParsingError(kUtf8Error, "UTF8 parsing error"); |
| in_++; |
| } |
| codepoint = rune; |
| } else { |
| for (int count = 0; count < 6 && in_ < end_; count++) { |
| dehexed = DeHex(*in_); |
| if (dehexed == -1) |
| break; |
| in_++; |
| codepoint = codepoint << 4 | dehexed; |
| } |
| if (end_ - in_ >= 2 && memcmp(in_, "\r\n", 2) == 0) |
| in_ += 2; |
| else if (in_ < end_ && IsSpace(*in_)) |
| in_++; |
| } |
| |
| if (!UniLib::IsInterchangeValid(codepoint)) { |
| // From http://www.w3.org/TR/CSS2/syndata.html#escaped-characters: |
| // It is undefined in CSS 2.1 what happens if a style sheet does |
| // contain a character with Unicode codepoint zero. |
| // We replace them (and all other improper escapes with a space |
| // and log an error. |
| ReportParsingError(kUtf8Error, StringPrintf( |
| "Invalid CSS-escaped Unicode value: 0x%lX", |
| static_cast<unsigned long int>(codepoint))); |
| codepoint = ' '; |
| } |
| return codepoint; |
| } |
| |
| // Starts at delim. |
| template<char delim> |
| UnicodeText Parser::ParseString() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| DCHECK_LT(in_, end_); |
| DCHECK_EQ(*in_, delim); |
| in_++; |
| if (Done()) return UnicodeText(); |
| |
| UnicodeText s; |
| while (in_ < end_) { |
| switch (*in_) { |
| case delim: |
| in_++; |
| return s; |
| case '\n': |
| return s; |
| case '\\': |
| if (in_ + 1 < end_ && in_[1] == '\n') { |
| in_ += 2; |
| } else { |
| s.push_back(ParseEscape()); |
| } |
| break; |
| default: |
| if (!IsAscii(*in_)) { |
| Rune rune; |
| int len = charntorune(&rune, in_, end_-in_); |
| if (len && rune != Runeerror) { |
| s.push_back(rune); |
| in_ += len; |
| } else { |
| ReportParsingError(kUtf8Error, "UTF8 parsing error in string"); |
| in_++; |
| } |
| } else { |
| s.push_back(*in_); |
| in_++; |
| } |
| break; |
| } |
| } |
| return s; |
| } |
| |
| // parse ident or 'string' |
| UnicodeText Parser::ParseStringOrIdent() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return UnicodeText(); |
| DCHECK_LT(in_, end_); |
| |
| if (*in_ == '\'') { |
| return ParseString<'\''>(); |
| } else if (*in_ == '"') { |
| return ParseString<'"'>(); |
| } else { |
| return ParseIdent(); |
| } |
| } |
| |
| template <char delim> |
| Value* Parser::ParseStringValue() { |
| Tracer trace(__func__, this); |
| |
| const char* oldin = in_; |
| UnicodeText string_contents = ParseString<delim>(); |
| StringPiece verbatim_bytes(oldin, in_ - oldin); |
| Value* value = new Value(Value::STRING, string_contents); |
| if (preservation_mode_) { |
| value->set_bytes_in_original_buffer(verbatim_bytes); |
| } |
| |
| return value; |
| } |
| |
| // Parse a CSS number, including unit or percent sign. |
| Value* Parser::ParseNumber() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| const char* begin = in_; |
| if (!Done() && (*in_ == '-' || *in_ == '+')) // sign |
| in_++; |
| while (!Done() && isdigit(*in_)) { |
| in_++; |
| } |
| // CSS Spec tokenizes numbers as: |
| // num [0-9]+|[0-9]*\.[0-9]+ |
| // Therefore we must have at least one digit after the dot. |
| // If there isn't, then dot is not part of the number. |
| if (in_ + 1 < end_ && in_[0] == '.' && isdigit(in_[1])) { |
| in_++; |
| |
| while (!Done() && isdigit(*in_)) { |
| in_++; |
| } |
| } |
| double num = 0; |
| if (in_ == begin || !ParseDouble(begin, in_ - begin, &num)) { |
| ReportParsingError(kNumberError, StringPrintf( |
| "Failed to parse number %s", string(begin, in_ - begin).c_str())); |
| return NULL; |
| } |
| |
| // Set the verbatim_bytes for the number before we parse the unit below |
| // (before the in_ pointer moves). |
| StringPiece verbatim_bytes(begin, in_ - begin); |
| Value* value; |
| if (Done()) { |
| value = new Value(num, Value::NO_UNIT); |
| } else if (*in_ == '%') { |
| in_++; |
| value = new Value(num, Value::PERCENT); |
| } else if (StartsIdent(*in_)) { |
| value = new Value(num, ParseIdent()); |
| } else { |
| value = new Value(num, Value::NO_UNIT); |
| } |
| |
| if (preservation_mode_) { |
| // Store verbatim bytes so that we can reconstruct this with exactly the |
| // same precision. |
| value->set_bytes_in_original_buffer(verbatim_bytes); |
| } |
| |
| return value; |
| } |
| |
| HtmlColor Parser::ParseColor() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return HtmlColor("", 0); |
| DCHECK_LT(in_, end_); |
| |
| unsigned char hexdigits[6] = {0}; |
| int dehexed; |
| int i = 0; |
| |
| const char* oldin = in_; |
| |
| // To further mess things up, IE also accepts string values happily. |
| if (*in_ == '"' || *in_ == '\'') { |
| in_++; |
| if (Done()) return HtmlColor("", 0); |
| } |
| |
| bool rgb_valid = quirks_mode_ || *in_ == '#'; |
| |
| if (*in_ == '#') in_++; |
| |
| while (in_ < end_ && i < 6 && (dehexed = DeHex(*in_)) != -1) { |
| hexdigits[i] = static_cast<unsigned char>(dehexed); |
| i++; |
| in_++; |
| } |
| |
| // close strings. Assume a named color if there are trailing characters |
| if (*oldin == '"' || *oldin == '\'') { |
| if (Done() || *in_ != *oldin) // no need to touch in_, will redo anyway. |
| i = 0; |
| else |
| in_++; |
| } |
| |
| // Normally, ParseXXX() routines stop wherever it cannot be consumed and |
| // doesn't check whether the next character is valid. which should be caught |
| // by the next ParseXXX() routine. But ParseColor may be called to test |
| // whether a numerical value can be used as color, and fail over to a normal |
| // ParseAny(). We need to do an immediate check here to guarantine a valid |
| // non-color number (such as 100%) will not be accepted as a color. |
| // |
| // We also do not want rrggbb (without #) to be accepted in non-quirks mode, |
| // but HtmlColor will happily accept it anyway. Do a sanity check here. |
| if (i == 3 || i == 6) { |
| if (!Done() && (*in_ == '%' || StartsIdent(*in_))) { |
| return HtmlColor("", 0); |
| } else { |
| if (!rgb_valid) { |
| if (preservation_mode_) { |
| // In preservation mode, we want to preserve quirks-mode colors |
| // (even if we are not parsing in quirks-mode). By reporting an |
| // error, we make sure that preservation-mode will preserve the |
| // original bytes and pass them through verbatim. |
| ReportParsingError(kValueError, "Quirks-mode color encountered"); |
| } |
| return HtmlColor("", 0); |
| } |
| } |
| } |
| |
| if (i == 3) { |
| return HtmlColor(hexdigits[0] | hexdigits[0] << 4, |
| hexdigits[1] | hexdigits[1] << 4, |
| hexdigits[2] | hexdigits[2] << 4); |
| } else if (i == 6) { |
| return HtmlColor(hexdigits[1] | hexdigits[0] << 4, |
| hexdigits[3] | hexdigits[2] << 4, |
| hexdigits[5] | hexdigits[4] << 4); |
| } else { |
| in_ = oldin; |
| |
| // A named color must not begin with #, but we need to parse it anyway and |
| // report failure later. |
| bool name_valid = true; |
| DCHECK(!Done()); |
| if (*in_ == '#') { |
| in_++; |
| name_valid = false; |
| } |
| |
| string ident = UnicodeTextToUTF8(ParseStringOrIdent()); |
| HtmlColor val("", 0); |
| if (name_valid) { |
| val.SetValueFromName(ident); |
| if (!val.IsDefined() && !preservation_mode_) |
| Util::GetSystemColor(ident, &val); |
| } |
| return val; |
| } |
| } |
| |
| // Parse body of generic function foo(a, "b" 3, d(e, #fff)) without |
| // consuming final right-paren. |
| // |
| // Both commas and spaces are allowed as separators and are remembered. |
| FunctionParameters* Parser::ParseFunction(int max_function_depth) { |
| Tracer trace(__func__, this); |
| scoped_ptr<FunctionParameters> params(new FunctionParameters); |
| |
| SkipSpace(); |
| // Separator before next value. Initial value doesn't matter. |
| FunctionParameters::Separator separator = FunctionParameters::SPACE_SEPARATED; |
| while (!Done()) { |
| DCHECK_LT(in_, end_); |
| switch (*in_) { |
| case ')': |
| // End of function. |
| return params.release(); |
| break; |
| case ',': |
| // Note that next value is comma-separated. |
| separator = FunctionParameters::COMMA_SEPARATED; |
| in_++; |
| break; |
| case ' ': |
| // The only purpose of spaces between identifiers is as a separator. |
| // Note: separator defaults to SPACE_SEPARATED. |
| in_++; |
| break; |
| default: { |
| scoped_ptr<Value> val( |
| ParseAnyWithFunctionDepth(max_function_depth)); |
| if (!val.get()) { |
| ReportParsingError(kFunctionError, |
| "Cannot parse parameter in function"); |
| return NULL; |
| } |
| if (!Done() && *in_ != ' ' && *in_ != ',' && *in_ != ')') { |
| ReportParsingError(kFunctionError, StringPrintf( |
| "Function parameter contains unexpected char '%c'", *in_)); |
| return NULL; |
| } |
| params->AddSepValue(separator, val.release()); |
| // Unless otherwise indicated, next item is space-separated. |
| separator = FunctionParameters::SPACE_SEPARATED; |
| break; |
| } |
| } |
| SkipSpace(); |
| } |
| |
| return NULL; |
| } |
| |
| // Returns the 0-255 RGB value corresponding to Value v. Only |
| // unusual thing is percentages are interpreted as percentages of |
| // 255.0. |
| unsigned char Parser::ValueToRGB(Value* v) { |
| int toret = 0; |
| if (v == NULL) { |
| toret = 0; |
| } else if (v->GetLexicalUnitType() == Value::NUMBER) { |
| if (v->GetDimension() == Value::PERCENT) { |
| toret = static_cast<int>(v->GetFloatValue()/100.0 * 255.0); |
| } else { |
| toret = v->GetIntegerValue(); |
| } |
| } else { |
| toret = 0; |
| } |
| |
| // RGB values outside the device gamut should be clipped according to spec. |
| if (toret > 255) |
| toret = 255; |
| if (toret < 0) |
| toret = 0; |
| return static_cast<unsigned char>(toret); |
| } |
| |
| // parse RGB color 25, 32, 12 or 25%, 1%, 7%. |
| // stops without consuming final right-paren |
| Value* Parser::ParseRgbColor() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| unsigned char rgb[3]; |
| |
| for (int i = 0; i < 3; i++) { |
| scoped_ptr<Value> val(ParseNumber()); |
| if (!val.get() || val->GetLexicalUnitType() != Value::NUMBER || |
| (val->GetDimension() != Value::PERCENT && |
| val->GetDimension() != Value::NO_UNIT)) |
| break; |
| rgb[i] = ValueToRGB(val.get()); |
| SkipSpace(); |
| // Make sure the correct syntax is followed. |
| if (Done() || (*in_ != ',' && *in_ != ')') || (*in_ == ')' && i != 2)) |
| break; |
| |
| if (*in_ == ')') |
| return new Value(HtmlColor(rgb[0], rgb[1], rgb[2])); |
| |
| DCHECK_EQ(',', *in_); |
| in_++; |
| } |
| |
| return NULL; |
| } |
| |
| // parse url yellow.png or 'yellow.png' |
| // (doesn't consume subsequent right-paren). |
| Value* Parser::ParseUrl() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| UnicodeText s; |
| if (*in_ == '\'') { |
| s = ParseString<'\''>(); |
| } else if (*in_ == '"') { |
| s = ParseString<'"'>(); |
| } else { |
| while (in_ < end_) { |
| if (IsSpace(*in_) || *in_ == ')') { |
| break; |
| } else if (*in_ == '\\') { |
| s.push_back(ParseEscape()); |
| } else if (!IsAscii(*in_)) { |
| Rune rune; |
| int len = charntorune(&rune, in_, end_-in_); |
| if (len && rune != Runeerror) { |
| s.push_back(rune); |
| in_ += len; |
| } else { |
| ReportParsingError(kUtf8Error, "UTF8 parsing error in URL"); |
| in_++; |
| } |
| } else { |
| s.push_back(*in_); |
| in_++; |
| } |
| } |
| } |
| SkipSpace(); |
| if (!Done() && *in_ == ')') |
| return new Value(Value::URI, s); |
| |
| return NULL; |
| } |
| |
| Value* Parser::ParseAnyExpectingColor() { |
| Tracer trace(__func__, this); |
| Value* toret = NULL; |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| const char* oldin = in_; |
| HtmlColor c = ParseColor(); |
| if (c.IsDefined()) { |
| toret = new Value(c); |
| } else { |
| in_ = oldin; // no valid color. rollback. |
| toret = ParseAny(); |
| } |
| return toret; |
| } |
| |
| // Parses a CSS value. Could be just about anything. |
| Value* Parser::ParseAny() { |
| return ParseAnyWithFunctionDepth(max_function_depth_); |
| } |
| |
| Value* Parser::ParseAnyWithFunctionDepth(int max_function_depth) { |
| Tracer trace(__func__, this); |
| Value* toret = NULL; |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| const char* oldin = in_; |
| switch (*in_) { |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| case '8': |
| case '9': |
| case '.': |
| toret = ParseNumber(); |
| break; |
| case '(': case '[': { |
| ReportParsingError(kValueError, StringPrintf( |
| "Unsupported value starting with %c", *in_)); |
| char delim = *in_ == '(' ? ')' : ']'; |
| // Move past this delimiter so that we don't double count it. |
| in_++; |
| SkipPastDelimiter(delim); |
| toret = NULL; // we don't understand this construct. |
| break; |
| } |
| case '"': |
| toret = ParseStringValue<'"'>(); |
| break; |
| case '\'': |
| toret = ParseStringValue<'\''>(); |
| break; |
| case '#': { |
| HtmlColor color = ParseColor(); |
| if (color.IsDefined()) |
| toret = new Value(color); |
| else |
| toret = NULL; |
| break; |
| } |
| case ',': |
| // TODO(sligocki): Add other possible value tokens like DELIM. |
| toret = new Value(Value::COMMA); |
| in_++; |
| break; |
| case '+': |
| toret = ParseNumber(); |
| break; |
| case '-': |
| // ambiguity between a negative number and an identifier starting with -. |
| if (in_ < end_ - 1 && |
| ((*(in_ + 1) >= '0' && *(in_ + 1) <= '9') || *(in_ + 1) == '.')) { |
| toret = ParseNumber(); |
| break; |
| } |
| FALLTHROUGH_INTENDED; |
| default: { |
| UnicodeText id = ParseIdent(); |
| if (id.empty()) { |
| toret = NULL; |
| } else if (!Done() && *in_ == '(') { |
| in_++; |
| if (max_function_depth > 0) { |
| if (StringCaseEquals(id, "url")) { |
| toret = ParseUrl(); |
| } else if (StringCaseEquals(id, "rgb")) { |
| toret = ParseRgbColor(); |
| } else if (StringCaseEquals(id, "rect")) { |
| scoped_ptr<FunctionParameters> params( |
| ParseFunction(max_function_depth - 1)); |
| if (params.get() != NULL && params->size() == 4) { |
| toret = new Value(Value::RECT, params.release()); |
| } else { |
| ReportParsingError(kFunctionError, "Could not parse parameters " |
| "for function rect"); |
| } |
| } else { |
| scoped_ptr<FunctionParameters> params( |
| ParseFunction(max_function_depth - 1)); |
| if (params.get() != NULL) { |
| toret = new Value(id, params.release()); |
| } else { |
| ReportParsingError(kFunctionError, StringPrintf( |
| "Could not parse function parameters for function %s", |
| UnicodeTextToUTF8(id).c_str())); |
| } |
| } |
| SkipSpace(); |
| if (!Done() && *in_ != ')') { |
| ReportParsingError(kFunctionError, |
| "Ignored chars at end of function."); |
| } |
| } else { |
| ReportParsingError(kFunctionError, "Functions nested too deeply."); |
| } |
| SkipPastDelimiter(')'); |
| } else { |
| toret = new Value(Identifier(id)); |
| } |
| break; |
| } |
| } |
| // Deadlock prevention: always make progress even if nothing can be parsed. |
| if (toret == NULL && in_ == oldin) { |
| ReportParsingError(kValueError, "Ignoring chars in value."); |
| ++in_; |
| } |
| return toret; |
| } |
| |
| static bool IsPropExpectingColor(Property::Prop prop) { |
| switch (prop) { |
| case Property::BORDER_COLOR: |
| case Property::BORDER_TOP_COLOR: |
| case Property::BORDER_RIGHT_COLOR: |
| case Property::BORDER_BOTTOM_COLOR: |
| case Property::BORDER_LEFT_COLOR: |
| case Property::BORDER: |
| case Property::BORDER_TOP: |
| case Property::BORDER_RIGHT: |
| case Property::BORDER_BOTTOM: |
| case Property::BORDER_LEFT: |
| case Property::BACKGROUND_COLOR: |
| case Property::BACKGROUND: |
| case Property::COLOR: |
| case Property::OUTLINE_COLOR: |
| case Property::OUTLINE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| // Parse values like "12pt Arial" |
| // If you make any change to this function, please also update |
| // ParseBackground, ParseFont and ParseFontFamily accordingly. |
| Values* Parser::ParseValues(Property::Prop prop) { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return new Values(); |
| DCHECK_LT(in_, end_); |
| |
| // If expecting_color is true, color values are expected. |
| bool expecting_color = IsPropExpectingColor(prop); |
| |
| scoped_ptr<Values> values(new Values); |
| // Note: We skip over all blocks and at-keywords and only parse "any"s. |
| // value : [ any | block | ATKEYWORD S* ]+; |
| // TODO(sligocki): According to the spec, if we cannot parse one of the |
| // values, we must ignore the whole declaration. |
| while (SkipToNextAny()) { |
| scoped_ptr<Value> v(expecting_color ? ParseAnyExpectingColor() |
| : ParseAny()); |
| |
| if (v.get()) { |
| values->push_back(v.release()); |
| } else { |
| return NULL; |
| } |
| } |
| if (values->size() > 0) { |
| return values.release(); |
| } else { |
| return NULL; |
| } |
| } |
| |
| // Parse background. It is a shortcut property for individual background |
| // properties. |
| // |
| // The output is a tuple in the following order: |
| // "background-color background-image background-repeat background-attachment |
| // background-position-x background-position-y" |
| // or NULL if invalid |
| // |
| // The x-y position parsing is somewhat complicated. The following spec is from |
| // CSS 2.1. |
| // http://www.w3.org/TR/CSS21/colors.html#propdef-background-position |
| // |
| // "If a background image has been specified, this property specifies its |
| // initial position. If only one value is specified, the second value is |
| // assumed to be 'center'. If at least one value is not a keyword, then the |
| // first value represents the horizontal position and the second represents the |
| // vertical position. Negative <percentage> and <length> values are allowed. |
| // <percentage> ... |
| // <length> ... |
| // top ... |
| // right ... |
| // bottom ... |
| // left ... |
| // center ..." |
| // |
| // In addition, we have some IE specific behavior: |
| // 1) you can specifiy more than two values, but once both x and y have |
| // specified values, further values will be discarded. |
| // 2) if y is not specified and x has seen two or more values, the last value |
| // counts. The same for y. |
| // 3) [length, left/right] is valid and the length becomes a value for y. |
| // [top/bottom, length] is also valid and the length becomes a value for x. |
| // If you make any change to this function, please also update ParseValues, |
| // ParseFont and ParseFontFamily if applicable. |
| bool Parser::ExpandBackground(const Declaration& original_declaration, |
| Declarations* new_declarations) { |
| const Values* vals = original_declaration.values(); |
| bool important = original_declaration.IsImportant(); |
| DCHECK(vals != NULL); |
| |
| Value background_color(Identifier::TRANSPARENT); |
| Value background_image(Identifier::NONE); |
| Value background_repeat(Identifier::REPEAT); |
| Value background_attachment(Identifier::SCROLL); |
| scoped_ptr<Value> background_position_x; |
| scoped_ptr<Value> background_position_y; |
| |
| bool is_first = true; |
| |
| // The following flag is used to implement IE quirks #3. When the first |
| // positional value is a length or CENTER, it is stored in |
| // background-position-x, but the value may actually be used as |
| // background-position-y if a keyword LEFT or RIGHT appears later. |
| bool first_is_ambiguous = false; // Value::NUMBER or Identifier::CENTER |
| |
| for (Values::const_iterator iter = vals->begin(); iter != vals->end(); |
| ++iter) { |
| const Value* val = *iter; |
| |
| // Firefox allows only one value to be set per property, IE need not. |
| switch (val->GetLexicalUnitType()) { |
| case Value::COLOR: |
| // background_color, etc. take ownership of val. We will clear vals |
| // at the end to make sure we don't have double ownership. |
| background_color = *val; |
| break; |
| case Value::URI: |
| background_image = *val; |
| break; |
| case Value::NUMBER: |
| if (!background_position_x.get()) { |
| background_position_x.reset(new Value(*val)); |
| first_is_ambiguous = true; |
| } else if (!background_position_y.get()) { |
| background_position_y.reset(new Value(*val)); |
| } |
| break; |
| case Value::IDENT: |
| switch (val->GetIdentifier().ident()) { |
| case Identifier::CENTER: |
| if (!background_position_x.get()) { |
| background_position_x.reset(new Value(*val)); |
| first_is_ambiguous = true; |
| } else if (!background_position_y.get()) { |
| background_position_y.reset(new Value(*val)); |
| } |
| break; |
| case Identifier::LEFT: |
| case Identifier::RIGHT: |
| // This is IE-specific behavior. |
| if (!background_position_x.get() || !background_position_y.get()) { |
| if (background_position_x.get() && first_is_ambiguous) |
| background_position_y.reset(background_position_x.release()); |
| background_position_x.reset(new Value(*val)); |
| first_is_ambiguous = false; |
| } |
| break; |
| case Identifier::TOP: |
| case Identifier::BOTTOM: |
| if (!background_position_x.get() || !background_position_y.get()) |
| background_position_y.reset(new Value(*val)); |
| break; |
| case Identifier::REPEAT: |
| case Identifier::REPEAT_X: |
| case Identifier::REPEAT_Y: |
| case Identifier::NO_REPEAT: |
| background_repeat = *val; |
| break; |
| case Identifier::SCROLL: |
| case Identifier::FIXED: |
| background_attachment = *val; |
| break; |
| case Identifier::TRANSPARENT: |
| background_color = *val; |
| break; |
| case Identifier::NONE: |
| background_image = *val; |
| break; |
| case Identifier::INHERIT: |
| // Inherit must be the one and only value. |
| if (!(iter == vals->begin() && vals->size() == 1)) |
| return false; |
| // We copy the inherit value into each background_* value. |
| background_color = *val; |
| background_image = *val; |
| background_repeat = *val; |
| background_attachment = *val; |
| background_position_x.reset(new Value(*val)); |
| background_position_y.reset(new Value(*val)); |
| break; |
| default: |
| return false; |
| } |
| break; |
| default: |
| return false; |
| } |
| is_first = false; |
| } |
| if (is_first) return false; |
| |
| new_declarations->push_back(new Declaration(Property::BACKGROUND_COLOR, |
| background_color, |
| important)); |
| new_declarations->push_back(new Declaration(Property::BACKGROUND_IMAGE, |
| background_image, |
| important)); |
| new_declarations->push_back(new Declaration(Property::BACKGROUND_REPEAT, |
| background_repeat, |
| important)); |
| new_declarations->push_back(new Declaration(Property::BACKGROUND_ATTACHMENT, |
| background_attachment, |
| important)); |
| |
| // Fix up x and y position. |
| if (!background_position_x.get() && !background_position_y.get()) { |
| background_position_x.reset(new Value(0, Value::PERCENT)); |
| background_position_y.reset(new Value(0, Value::PERCENT)); |
| } else if (!background_position_x.get()) { |
| background_position_x.reset(new Value(50, Value::PERCENT)); |
| } else if (!background_position_y.get()) { |
| background_position_y.reset(new Value(50, Value::PERCENT)); |
| } |
| new_declarations->push_back(new Declaration(Property::BACKGROUND_POSITION_X, |
| *background_position_x, |
| important)); |
| new_declarations->push_back(new Declaration(Property::BACKGROUND_POSITION_Y, |
| *background_position_y, |
| important)); |
| |
| return true; |
| } |
| |
| // Parses font-family. It is special in that it uses commas as delimiters. It |
| // also concatenates adjacent idents into one name. Strings can be also used. |
| // They must also be separated from each other with commas. |
| // From http://www.w3.org/TR/CSS2/fonts.html#propdef-font-family: |
| // 'font-family' |
| // Value: [[ <family-name> | <generic-family> ] |
| // [, <family-name>| <generic-family>]* ] | inherit |
| // |
| // E.g, Courier New, Sans -> "Courier New", "Sans" |
| // Arial, "MS Times", monospace -> "Arial", "MS Times", "monospace". |
| // Arial "MS Times" monospace -> Parse error. |
| // If you make any change to this function, please also update ParseValues, |
| // ParseBackground and ParseFont if applicable. |
| bool Parser::ParseFontFamily(Values* values) { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return true; |
| DCHECK_LT(in_, end_); |
| |
| while (true) { |
| const char* oldin = in_; |
| scoped_ptr<Value> v(ParseAny()); |
| if (v.get() == NULL) { |
| ReportParsingError(kValueError, "Unexpected token in font-family."); |
| in_ = oldin; // We did not use token, so unconsume it. |
| return false; |
| } |
| // Font families can be either strings or space separated identifiers. |
| switch (v->GetLexicalUnitType()) { |
| case Value::STRING: |
| // For example: "Times New Roman" |
| // Font name is just the string value. |
| values->push_back(v.release()); |
| break; |
| case Value::IDENT: { |
| // For example: Times New Roman |
| // Font name is the string made from combining all identifiers with |
| // a single space separator between each. |
| UnicodeText family; |
| family.append(v->GetIdentifierText()); |
| while (SkipToNextAny() && !Done() && *in_ != ',') { |
| const char* oldin = in_; |
| v.reset(ParseAny()); |
| if (v.get() == NULL || v->GetLexicalUnitType() != Value::IDENT) { |
| ReportParsingError(kValueError, "Unexpected token after " |
| "identifier in font-family."); |
| in_ = oldin; // We did not use token, so unconsume it. |
| return false; |
| } |
| family.push_back(static_cast<char32>(' ')); |
| family.append(v->GetIdentifierText()); |
| } |
| values->push_back(new Value(Identifier(family))); |
| break; |
| } |
| default: |
| ReportParsingError(kValueError, "Unexpected token in font-family."); |
| return false; |
| } |
| SkipSpace(); |
| if (!Done() && *in_ == ',') { |
| ++in_; |
| } else { |
| return true; |
| } |
| } |
| } |
| |
| // Parse font. It is special in that it uses a special format (see spec): |
| // [ [ <'font-style'> || <'font-variant'> || <'font-weight'> ]? |
| // <'font-size'> [ / <'line-height'> ]? <'font-family'> ] |
| // | caption | icon | menu | message-box | small-caption | status-bar | inherit |
| // |
| // The output is a tuple in the following order: |
| // "font-style font-variant font-weight font-size line-height font-family*" |
| // or NULL if invalid |
| // IE pecularity: font-family is optional (hence the *). |
| // If you make any change to this function, please also update ParseValues, |
| // ParseBackground and ParseFontFamily if applicable. |
| Values* Parser::ParseFont() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| scoped_ptr<Values> values(new Values); |
| |
| if (!SkipToNextAny()) |
| return NULL; |
| |
| scoped_ptr<Value> v(ParseAny()); |
| if (!v.get()) return NULL; |
| |
| // For special one-valued font: notations, just return with that one value. |
| // Note: these can be expanded by ExpandShorthandProperties |
| if (v->GetLexicalUnitType() == Value::IDENT) { |
| switch (v->GetIdentifier().ident()) { |
| case Identifier::CAPTION: |
| case Identifier::ICON: |
| case Identifier::MENU: |
| case Identifier::MESSAGE_BOX: |
| case Identifier::SMALL_CAPTION: |
| case Identifier::STATUS_BAR: |
| case Identifier::INHERIT: |
| // These special identifiers must be the only one in a declaration. |
| // Fail if there are others. |
| if (SkipToNextAny()) { |
| ReportParsingError(kValueError, "Font has incorrect values."); |
| return NULL; |
| } |
| // If everything is good, push these out. |
| values->push_back(v.release()); |
| return values.release(); |
| default: |
| break; |
| } |
| } |
| |
| scoped_ptr<Value> font_style(new Value(Identifier::NORMAL)); |
| scoped_ptr<Value> font_variant(new Value(Identifier::NORMAL)); |
| scoped_ptr<Value> font_weight(new Value(Identifier::NORMAL)); |
| scoped_ptr<Value> font_size(new Value(Identifier::MEDIUM)); |
| scoped_ptr<Value> line_height(new Value(Identifier::NORMAL)); |
| scoped_ptr<Value> font_family; |
| |
| // parse style, variant and weight |
| while (true) { |
| // Firefox allows only one value to be set per property, IE need not. |
| if (v->GetLexicalUnitType() == Value::IDENT) { |
| switch (v->GetIdentifier().ident()) { |
| case Identifier::NORMAL: |
| // no-op |
| break; |
| case Identifier::ITALIC: |
| case Identifier::OBLIQUE: |
| font_style.reset(v.release()); |
| break; |
| case Identifier::SMALL_CAPS: |
| font_variant.reset(v.release()); |
| break; |
| case Identifier::BOLD: |
| case Identifier::BOLDER: |
| case Identifier::LIGHTER: |
| font_weight.reset(v.release()); |
| break; |
| default: |
| goto check_fontsize; |
| } |
| } else if (v->GetLexicalUnitType() == Value::NUMBER && |
| v->GetDimension() == Value::NO_UNIT) { |
| switch (v->GetIntegerValue()) { |
| // In standards-mode, font-sizes must have units (or be 0) and thus |
| // unitless numbers 100-900 must be font-weights. |
| // |
| // However, in quirks-mode, different browsers handle this quite |
| // differently. But there is at least a test that is consistent |
| // between IE and firefox: try <span style="font:120 serif"> and |
| // <span style="font:100 serif">, the first one treats 120 as |
| // font-size, and the second does not. |
| case 100: case 200: case 300: case 400: |
| case 500: case 600: case 700: case 800: |
| case 900: |
| font_weight.reset(v.release()); |
| break; |
| default: |
| goto check_fontsize; |
| } |
| } else { |
| goto check_fontsize; |
| } |
| if (!SkipToNextAny()) |
| return NULL; |
| v.reset(ParseAny()); |
| if (!v.get()) return NULL; |
| } |
| |
| check_fontsize: |
| // parse font-size |
| switch (v->GetLexicalUnitType()) { |
| case Value::IDENT: |
| switch (v->GetIdentifier().ident()) { |
| case Identifier::XX_SMALL: |
| case Identifier::X_SMALL: |
| case Identifier::SMALL: |
| case Identifier::MEDIUM: |
| case Identifier::LARGE: |
| case Identifier::X_LARGE: |
| case Identifier::XX_LARGE: |
| case Identifier::LARGER: |
| case Identifier::SMALLER: |
| font_size.reset(v.release()); |
| break; |
| default: |
| return NULL; |
| } |
| break; |
| case Value::NUMBER: |
| font_size.reset(v.release()); |
| break; |
| default: |
| return NULL; |
| } |
| |
| // parse line-height if '/' is seen, or use the default line-height |
| if (SkipToNextAny() && *in_ == '/') { |
| in_++; |
| if (!SkipToNextAny()) return NULL; |
| v.reset(ParseAny()); |
| if (!v.get()) return NULL; |
| |
| switch (v->GetLexicalUnitType()) { |
| case Value::IDENT: |
| if (v->GetIdentifier().ident() == Identifier::NORMAL) |
| break; |
| else |
| return NULL; |
| case Value::NUMBER: |
| line_height.reset(v.release()); |
| break; |
| default: |
| return NULL; |
| } |
| } |
| |
| values->push_back(font_style.release()); |
| values->push_back(font_variant.release()); |
| values->push_back(font_weight.release()); |
| values->push_back(font_size.release()); |
| values->push_back(line_height.release()); |
| |
| if (!ParseFontFamily(values.get())) // empty is okay. |
| return NULL; |
| return values.release(); |
| } |
| |
| static void ExpandShorthandProperties(Declarations* declarations, |
| const Declaration& declaration) { |
| Property prop = declaration.property(); |
| const Values* vals = declaration.values(); |
| bool important = declaration.IsImportant(); |
| |
| // Buffer to build up values used instead of vals above. |
| scoped_ptr<Values> edit_vals; |
| switch (prop.prop()) { |
| case Property::FONT: { |
| // Expand the value vector for special font: values. |
| if (vals->size() == 1) { |
| const Value* val = vals->at(0); |
| switch (val->GetIdentifier().ident()) { |
| case Identifier::CAPTION: |
| case Identifier::ICON: |
| case Identifier::MENU: |
| case Identifier::MESSAGE_BOX: |
| case Identifier::SMALL_CAPTION: |
| case Identifier::STATUS_BAR: |
| edit_vals.reset(new Values()); |
| // Reasonable defaults to use for special font: declarations. |
| edit_vals->push_back(new Value(Identifier::NORMAL)); // font-style |
| edit_vals->push_back(new Value(Identifier::NORMAL)); // font-variant |
| edit_vals->push_back(new Value(Identifier::NORMAL)); // font-weight |
| // In this case, the actual font size will depend on browser, |
| // this is a common value found in IE and Firefox: |
| edit_vals->push_back(new Value(32.0/3, Value::PX)); // font-size |
| edit_vals->push_back(new Value(Identifier::NORMAL)); // line-height |
| // We store the special font type as font-family: |
| edit_vals->push_back(new Value(*val)); // font-family |
| vals = edit_vals.get(); // Move pointer to new, built-up values. |
| break; |
| case Identifier::INHERIT: |
| edit_vals.reset(new Values()); |
| // font: inherit means all properties inherit. |
| edit_vals->push_back(new Value(*val)); // font-style |
| edit_vals->push_back(new Value(*val)); // font-variant |
| edit_vals->push_back(new Value(*val)); // font-weight |
| edit_vals->push_back(new Value(*val)); // font-size |
| edit_vals->push_back(new Value(*val)); // line-height |
| edit_vals->push_back(new Value(*val)); // font-family |
| vals = edit_vals.get(); // Move pointer to new, built-up values. |
| break; |
| default: |
| break; |
| } |
| } |
| // Only expand valid font: declarations (ones created by ParseFont, which |
| // requires at least 5 values in a specific order). |
| if (vals->size() < 5) { |
| LOG(ERROR) << "font: values are not in the correct format.\n" << vals; |
| break; |
| } |
| declarations->push_back( |
| new Declaration(Property::FONT_STYLE, *vals->get(0), important)); |
| declarations->push_back( |
| new Declaration(Property::FONT_VARIANT, *vals->get(1), important)); |
| declarations->push_back( |
| new Declaration(Property::FONT_WEIGHT, *vals->get(2), important)); |
| declarations->push_back( |
| new Declaration(Property::FONT_SIZE, *vals->get(3), important)); |
| declarations->push_back( |
| new Declaration(Property::LINE_HEIGHT, *vals->get(4), important)); |
| if (vals->size() > 5) { |
| Values* family_vals = new Values; |
| for (int i = 5, n = vals->size(); i < n; ++i) |
| family_vals->push_back(new Value(*vals->get(i))); |
| declarations->push_back( |
| new Declaration(Property::FONT_FAMILY, family_vals, important)); |
| } |
| } |
| break; |
| default: |
| // TODO(yian): other shorthand properties: |
| // background-position |
| // border-color border-style border-width |
| // border-top border-right border-bottom border-left |
| // border |
| // margin padding |
| // outline |
| break; |
| } |
| } |
| |
| // Parse declarations like "background: white; color: #333; line-height: 1.3;" |
| Declarations* Parser::ParseRawDeclarations() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return new Declarations(); |
| DCHECK_LT(in_, end_); |
| |
| Declarations* declarations = new Declarations(); |
| while (in_ < end_) { |
| // decl_start is saved so that we may pass through verbatim text |
| // in case declaration could not be parsed correctly. |
| const char* decl_start = in_; |
| const uint64 start_errors_seen_mask = errors_seen_mask_; |
| bool ignore_this_decl = false; |
| switch (*in_) { |
| case ';': |
| // Note: We check below that all declarations end with ';' or '}'. |
| in_++; |
| break; |
| case '}': |
| return declarations; |
| default: { |
| UnicodeText id = ParseIdent(); |
| if (id.empty()) { |
| ReportParsingError(kDeclarationError, "Ignoring empty property"); |
| ignore_this_decl = true; |
| break; |
| } |
| Property prop(id); |
| SkipSpace(); |
| if (Done() || *in_ != ':') { |
| ReportParsingError(kDeclarationError, |
| StringPrintf("Ignoring property with no values %s", |
| prop.prop_text().c_str())); |
| ignore_this_decl = true; |
| break; |
| } |
| DCHECK_EQ(':', *in_); |
| in_++; |
| |
| scoped_ptr<Values> vals; |
| switch (prop.prop()) { |
| // TODO(sligocki): stop special-casing. |
| case Property::FONT: |
| vals.reset(ParseFont()); |
| break; |
| case Property::FONT_FAMILY: |
| vals.reset(new Values()); |
| if (!ParseFontFamily(vals.get()) || vals->empty()) { |
| vals.reset(NULL); |
| } |
| break; |
| default: |
| vals.reset(ParseValues(prop.prop())); |
| break; |
| } |
| |
| if (vals.get() == NULL) { |
| ReportParsingError(kDeclarationError, StringPrintf( |
| "Failed to parse values for property %s", |
| prop.prop_text().c_str())); |
| ignore_this_decl = true; |
| break; |
| } |
| |
| // If an error has occurred while parsing vals, some content may have |
| // been lost (invalid Unicode chars, etc.). Thus, in preservation-mode |
| // we just want to drop this malformed declaration and pass it through |
| // verbatim below. |
| // |
| // Note: This will not preserve values if an error occurred which was |
| // already in start_errors_seen_mask. But the goal of preservation |
| // mode is to have errors_seen_mask_ held at 0, because any higher |
| // than that and we cannot trust the output to be fully preserved. |
| // So, we are not worried about failing to preserve values when |
| // errors_seen_mask_ is already non-0. |
| if (preservation_mode_ && errors_seen_mask_ != start_errors_seen_mask) { |
| ReportParsingError(kDeclarationError, StringPrintf( |
| "Error while parsing values for property %s", |
| prop.prop_text().c_str())); |
| ignore_this_decl = true; |
| break; |
| } |
| |
| bool important = false; |
| if (in_ < end_ && *in_ == '!') { |
| in_++; |
| SkipSpace(); |
| UnicodeText ident = ParseIdent(); |
| if (StringCaseEquals(ident, "important")) { |
| important = true; |
| } else { |
| ReportParsingError(kDeclarationError, StringPrintf( |
| "Unexpected !-identifier: !%s", |
| UnicodeTextToUTF8(ident).c_str())); |
| ignore_this_decl = true; |
| break; |
| } |
| } |
| SkipSpace(); |
| // Don't add Declaration if it is not ended with a ';' or '}'. |
| // For example: "foo: bar !important really;" is not valid. |
| if (Done() || *in_ == ';' || *in_ == '}') { |
| declarations->push_back( |
| new Declaration(prop, vals.release(), important)); |
| } else { |
| ReportParsingError(kDeclarationError, StringPrintf( |
| "Unexpected char %c at end of declaration", *in_)); |
| ignore_this_decl = true; |
| break; |
| } |
| } |
| } |
| SkipSpace(); |
| if (ignore_this_decl) { // on bad syntax, we skip till the next declaration |
| errors_seen_mask_ |= kDeclarationError; |
| while (in_ < end_ && *in_ != ';' && *in_ != '}') { |
| // IE (and IE only) ignores {} blocks in quirks mode. |
| if (*in_ == '{' && !quirks_mode_) { |
| // Move past this delimiter so that we don't double count it. |
| in_++; |
| SkipPastDelimiter('}'); |
| } else { |
| in_++; |
| SkipSpace(); |
| } |
| } |
| if (preservation_mode_) { |
| // Add pseudo-declaration of verbatim text because we failed to parse |
| // this declaration correctly. This is saved so that it can be |
| // serialized back out in case it was actually meaningful even though |
| // we could not understand it. |
| StringPiece bytes_in_original_buffer(decl_start, in_ - decl_start); |
| declarations->push_back(new Declaration(bytes_in_original_buffer)); |
| // All errors that occurred sinse we started this declaration are |
| // demoted to unparseable sections now that we've saved the dummy |
| // element. |
| unparseable_sections_seen_mask_ |= errors_seen_mask_; |
| errors_seen_mask_ = start_errors_seen_mask; |
| } |
| } |
| } |
| return declarations; |
| } |
| |
| Declarations* Parser::ExpandDeclarations(Declarations* orig_declarations) { |
| scoped_ptr<Declarations> new_declarations(new Declarations); |
| for (int j = 0; j < orig_declarations->size(); ++j) { |
| // new_declarations takes ownership of declaration. |
| Declaration* declaration = orig_declarations->at(j); |
| orig_declarations->at(j) = NULL; |
| // TODO(yian): We currently store both expanded properties and the original |
| // property because only limited expansion is supported. In future, we |
| // should discard the original property after expansion. |
| new_declarations->push_back(declaration); |
| ExpandShorthandProperties(new_declarations.get(), *declaration); |
| // TODO(sligocki): Get ExpandBackground back into ExpandShorthandProperties. |
| switch (declaration->property().prop()) { |
| case Css::Property::BACKGROUND: { |
| ExpandBackground(*declaration, new_declarations.get()); |
| break; |
| } |
| default: |
| break; |
| } |
| } |
| return new_declarations.release(); |
| } |
| |
| Declarations* Parser::ParseDeclarations() { |
| scoped_ptr<Declarations> orig_declarations(ParseRawDeclarations()); |
| return ExpandDeclarations(orig_declarations.get()); |
| } |
| |
| // Starts from [ and parses to the closing ] |
| // in [ foo ~= bar ]. |
| // Whitespace is not skipped at beginning or the end. |
| SimpleSelector* Parser::ParseAttributeSelector() { |
| Tracer trace(__func__, this); |
| |
| DCHECK_LT(in_, end_); |
| DCHECK_EQ('[', *in_); |
| in_++; |
| SkipSpace(); |
| |
| UnicodeText attr = ParseIdent(); |
| SkipSpace(); |
| scoped_ptr<SimpleSelector> newcond; |
| if (!attr.empty() && in_ < end_) { |
| char oper = *in_; |
| switch (*in_) { |
| case '~': |
| case '|': |
| case '^': |
| case '$': |
| case '*': |
| in_++; |
| if (Done() || *in_ != '=') |
| break; |
| FALLTHROUGH_INTENDED; |
| case '=': { |
| in_++; |
| UnicodeText value = ParseStringOrIdent(); |
| if (!value.empty()) |
| newcond.reset(SimpleSelector::NewBinaryAttribute( |
| SimpleSelector::AttributeTypeFromOperator(oper), |
| attr, |
| value)); |
| break; |
| } |
| default: |
| newcond.reset(SimpleSelector::NewExistAttribute(attr)); |
| break; |
| } |
| } |
| SkipSpace(); |
| if (!Done() && *in_ != ']') { |
| ReportParsingError(kSelectorError, "Ignoring chars in attribute selector."); |
| } |
| if (SkipPastDelimiter(']')) |
| return newcond.release(); |
| else |
| return NULL; |
| } |
| |
| SimpleSelector* Parser::ParseSimpleSelector() { |
| Tracer trace(__func__, this); |
| |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| switch (*in_) { |
| case '#': { |
| in_++; |
| UnicodeText id = ParseIdent(); |
| if (!id.empty()) |
| return SimpleSelector::NewId(id); |
| break; |
| } |
| case '.': { |
| in_++; |
| UnicodeText classname = ParseIdent(); |
| if (!classname.empty()) |
| return SimpleSelector::NewClass(classname); |
| break; |
| } |
| case ':': { |
| UnicodeText sep; |
| in_++; |
| // CSS3 requires all pseudo-elements to use :: to distinguish them from |
| // pseudo-classes. We save which separator was used in the Pseudoclass |
| // object, so that the original value can be reconstructed. |
| // |
| // http://www.w3.org/TR/css3-selectors/#pseudo-elements |
| if (!Done() && *in_ == ':') { |
| in_++; |
| sep.CopyUTF8("::", 2); |
| } else { |
| sep.CopyUTF8(":", 1); |
| } |
| UnicodeText pseudoclass = ParseIdent(); |
| // FIXME(yian): skip constructs "(en)" in lang(en) for now. |
| if (!Done() && *in_ == '(') { |
| ReportParsingError(kSelectorError, |
| "Cannot parse parameters for pseudoclass."); |
| in_++; |
| if (!SkipPastDelimiter(')')) |
| break; |
| } |
| if (!pseudoclass.empty()) |
| return SimpleSelector::NewPseudoclass(pseudoclass, sep); |
| break; |
| } |
| case '[': { |
| SimpleSelector* newcond = ParseAttributeSelector(); |
| if (newcond) |
| return newcond; |
| break; |
| } |
| case '*': |
| in_++; |
| return SimpleSelector::NewUniversal(); |
| break; |
| default: { |
| UnicodeText ident = ParseIdent(); |
| if (!ident.empty()) |
| return SimpleSelector::NewElementType(ident); |
| break; |
| } |
| } |
| // We did not parse anything or we parsed something incorrectly. |
| return NULL; |
| } |
| |
| bool Parser::AtValidSimpleSelectorsTerminator() const { |
| if (Done()) return true; |
| switch (*in_) { |
| case ' ': case '\t': case '\r': case '\n': case '\f': |
| case ',': case '{': case '>': case '+': |
| return true; |
| case '/': |
| if (in_ + 1 < end_ && *(in_ + 1) == '*') |
| return true; |
| break; |
| } |
| return false; |
| } |
| |
| SimpleSelectors* Parser::ParseSimpleSelectors(bool expecting_combinator) { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| SimpleSelectors::Combinator combinator; |
| if (!expecting_combinator) |
| combinator = SimpleSelectors::NONE; |
| else |
| switch (*in_) { |
| case '>': |
| in_++; |
| combinator = SimpleSelectors::CHILD; |
| break; |
| case '+': |
| in_++; |
| combinator = SimpleSelectors::SIBLING; |
| break; |
| default: |
| combinator = SimpleSelectors::DESCENDANT; |
| break; |
| } |
| |
| scoped_ptr<SimpleSelectors> selectors(new SimpleSelectors(combinator)); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| |
| const char* oldin = in_; |
| while (SimpleSelector* simpleselector = ParseSimpleSelector()) { |
| selectors->push_back(simpleselector); |
| oldin = in_; |
| } |
| |
| if (selectors->size() > 0 && // at least one simple selector stored |
| in_ == oldin && // the last NULL does not make progress |
| AtValidSimpleSelectorsTerminator()) // stop at a valid terminator |
| return selectors.release(); |
| |
| return NULL; |
| } |
| |
| Selectors* Parser::ParseSelectors() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| // Remember whether anything goes wrong, but continue parsing until the |
| // declaration starts or the position comes to the end. Then discard the |
| // selectors. |
| bool success = true; |
| |
| scoped_ptr<Selectors> selectors(new Selectors()); |
| Selector* selector = new Selector(); |
| selectors->push_back(selector); |
| |
| // The first simple selector sequence in a chain of simple selector |
| // sequences does not have a combinator. ParseSimpleSelectors needs |
| // to know this, so we set this to false here and after ',', and |
| // true after we see a simple selector sequence. |
| bool expecting_combinator = false; |
| while (in_ < end_ && *in_ != '{') { |
| switch (*in_) { |
| case ',': |
| if (selector->size() == 0) { |
| success = false; |
| ReportParsingError(kSelectorError, |
| "Could not parse ruleset: unexpected ,"); |
| } else { |
| selector = new Selector(); |
| selectors->push_back(selector); |
| } |
| in_++; |
| expecting_combinator = false; |
| break; |
| default: { |
| const char* oldin = in_; |
| SimpleSelectors* simple_selectors |
| = ParseSimpleSelectors(expecting_combinator); |
| if (!simple_selectors) { |
| success = false; |
| if (in_ == oldin) { |
| DCHECK(!Done()); |
| ReportParsingError(kSelectorError, StringPrintf( |
| "Could not parse selector: illegal char %c", *in_)); |
| in_++; |
| } |
| } else { |
| selector->push_back(simple_selectors); |
| } |
| |
| expecting_combinator = true; |
| break; |
| } |
| } |
| SkipSpace(); |
| } |
| |
| if (selector->size() == 0) |
| success = false; |
| |
| if (success) |
| return selectors.release(); |
| else |
| return NULL; |
| } |
| |
| Import* Parser::ParseNextImport() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| |
| const char* oldin = in_; |
| |
| DCHECK_LT(in_, end_); |
| if (*in_ != '@') return NULL; |
| ++in_; |
| |
| UnicodeText ident = ParseIdent(); |
| |
| // @import string|uri medium-list ? ; |
| if (!StringCaseEquals(ident, "import")) { |
| // Rewind to beginning of at-rule, since it wasn't an @import and we want |
| // to leave the parser in a consistent state. |
| in_ = oldin; |
| return NULL; |
| } |
| |
| Import* import = ParseImport(); |
| SkipToAtRuleEnd(); |
| SkipSpace(); |
| |
| return import; |
| } |
| |
| Import* Parser::ParseAsSingleImport() { |
| Tracer trace(__func__, this); |
| |
| Import* import = ParseNextImport(); |
| if (import == NULL || Done()) return import; |
| |
| // There's something after the @import, which is expressly disallowed. |
| delete import; |
| return NULL; |
| } |
| |
| UnicodeText Parser::ExtractCharset() { |
| Tracer trace(__func__, this); |
| |
| UnicodeText result; |
| if (!Done() && *in_ == '@') { |
| ++in_; |
| UnicodeText ident = ParseIdent(); |
| if (StringCaseEquals(ident, "charset")) { |
| result = ParseCharset(); |
| SkipSpace(); |
| if (Done() || *in_ != ';') { |
| ReportParsingError(kCharsetError, "@charset not closed properly."); |
| result.clear(); |
| } |
| } |
| } |
| return result; |
| } |
| |
| UnicodeText Parser::ParseCharset() { |
| Tracer trace(__func__, this); |
| |
| UnicodeText result; |
| SkipSpace(); |
| |
| if (Done()) { |
| ReportParsingError(kCharsetError, "Unexpected EOF parsing @charset."); |
| return result; |
| } |
| |
| switch (*in_) { |
| case '\'': { |
| result = ParseString<'\''>(); |
| break; |
| } |
| case '"': { |
| result = ParseString<'"'>(); |
| break; |
| } |
| default: { |
| ReportParsingError(kCharsetError, "@charset lacks string."); |
| break; |
| } |
| } |
| return result; |
| } |
| |
| Ruleset* Parser::ParseRuleset() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| // Remember whether anything goes wrong, but continue parsing until the |
| // closing }. Then discard the whole ruleset if necessary. This allows the |
| // parser to make progress anyway. |
| bool success = true; |
| const char* start_pos = in_; |
| const uint64 start_errors_seen_mask = errors_seen_mask_; |
| |
| scoped_ptr<Ruleset> ruleset(new Ruleset()); |
| scoped_ptr<Selectors> selectors(ParseSelectors()); |
| |
| if (Done()) { |
| ReportParsingError(kSelectorError, |
| "Selectors without declarations at end of doc."); |
| return NULL; |
| } |
| |
| // In preservation_mode_ we want to use verbatim text whenever we got a |
| // parsing error during selector parsing, so clear the partial parse here. |
| if (preservation_mode_ && (start_errors_seen_mask != errors_seen_mask_)) { |
| selectors.reset(NULL); |
| } |
| |
| if (selectors.get() == NULL) { |
| ReportParsingError(kSelectorError, "Failed to parse selector"); |
| if (preservation_mode_) { |
| selectors.reset(new Selectors(StringPiece(start_pos, in_ - start_pos))); |
| ruleset->set_selectors(selectors.release()); |
| // All errors that occurred sinse we started this declaration are |
| // demoted to unparseable sections now that we've saved the dummy |
| // element. |
| unparseable_sections_seen_mask_ |= errors_seen_mask_; |
| errors_seen_mask_ = start_errors_seen_mask; |
| } else { |
| // http://www.w3.org/TR/CSS21/syndata.html#rule-sets |
| // When a user agent can't parse the selector (i.e., it is not |
| // valid CSS 2.1), it must ignore the declaration block as |
| // well. |
| success = false; |
| } |
| } else { |
| ruleset->set_selectors(selectors.release()); |
| } |
| |
| DCHECK(!Done()); |
| DCHECK_EQ('{', *in_); |
| in_++; |
| ruleset->set_declarations(ParseRawDeclarations()); |
| |
| SkipSpace(); |
| if (Done() || *in_ != '}') { |
| // TODO(sligocki): Can this ever be hit? Add a test that does. |
| ReportParsingError(kRulesetError, "Ignored chars at end of ruleset."); |
| } |
| SkipPastDelimiter('}'); |
| |
| if (success) |
| return ruleset.release(); |
| else |
| return NULL; |
| } |
| |
| MediaQueries* Parser::ParseMediaQueries() { |
| Tracer trace(__func__, this); |
| |
| scoped_ptr<MediaQueries> media_queries(new MediaQueries); |
| |
| SkipSpace(); |
| if (Done() || (*in_ == ';' || *in_ == '{')) { |
| // Empty media queries. |
| return media_queries.release(); |
| } |
| |
| while (in_ < end_) { |
| scoped_ptr<MediaQuery> query(ParseMediaQuery()); |
| if (query.get() == NULL) { |
| // According to http://www.w3.org/TR/css3-mediaqueries/#error-handling, |
| // All malformed media queries should be represented as "not all". |
| // Note: This is not exactly the same as just ignoring this media query. |
| // For example, if there is only one media query and it's invalid, |
| // then the contents don't apply, whereas if there were 0 queries, |
| // the contents would apply. |
| query.reset(new MediaQuery); |
| query->set_qualifier(MediaQuery::NOT); |
| query->set_media_type(UTF8ToUnicodeText("all")); |
| } |
| media_queries->push_back(query.release()); |
| |
| SkipSpace(); |
| if (Done()) { |
| return media_queries.release(); |
| } |
| switch (*in_) { |
| case ';': |
| case '{': |
| return media_queries.release(); |
| case ',': |
| in_++; |
| break; |
| default: |
| ReportParsingError(kMediaError, |
| "Unexpected char while parsing media query."); |
| return media_queries.release(); |
| } |
| } |
| |
| return media_queries.release(); |
| } |
| |
| // Note: This function returns NULL if any part of the media query has a |
| // syntax error. From http://www.w3.org/TR/css3-mediaqueries/#error-handling: |
| // User agents are to represent a media query as "not all" when one |
| // of the specified media features is not known. |
| MediaQuery* Parser::ParseMediaQuery() { |
| Tracer trace(__func__, this); |
| SkipSpace(); |
| |
| scoped_ptr<MediaQuery> query(new MediaQuery); |
| UnicodeText id = ParseIdent(); |
| SkipSpace(); |
| |
| // Check for optional qualifiers "not" or "only". |
| if (StringCaseEquals(id, "not")) { |
| query->set_qualifier(MediaQuery::NOT); |
| id = ParseIdent(); |
| } else if (StringCaseEquals(id, "only")) { |
| query->set_qualifier(MediaQuery::ONLY); |
| id = ParseIdent(); |
| } |
| |
| // Do we need to find an 'and' before the next media expression? This is |
| // always true unless there was no explicit media type, ex: "@media (color)". |
| bool need_and = false; |
| // Have we seen an 'and' token since last media expression or media type. |
| bool found_and = false; |
| |
| // Set media type (optional). |
| if (!id.empty()) { |
| query->set_media_type(id); |
| need_and = true; |
| } |
| |
| bool done = false; |
| SkipSpace(); |
| while (!Done() && !done) { |
| switch (*in_) { |
| case ';': |
| case '{': |
| case ',': |
| done = true; |
| break; |
| case '(': { // CSS3 media expression. Ex: (max-width:290px) |
| if (need_and != found_and) { |
| ReportParsingError(kMediaError, |
| "Missing or extra 'and' in media query"); |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } |
| // Reset |
| need_and = true; |
| found_and = false; |
| in_++; |
| SkipSpace(); |
| UnicodeText name = ParseIdent(); |
| SkipSpace(); |
| if (Done()) { |
| ReportParsingError(kMediaError, "Unexpected EOF in media query."); |
| return NULL; |
| } |
| switch (*in_) { |
| case ')': |
| in_++; |
| // Expression with no value. Ex: (color) |
| query->add_expression(new MediaExpression(name)); |
| break; |
| case ':': { |
| in_++; |
| SkipSpace(); |
| if (Done()) { |
| ReportParsingError(kMediaError, "Unexpected EOF in media query."); |
| return query.release(); |
| } |
| const char* begin = in_; |
| // TODO(sligocki): Actually parse value? |
| if (SkipPastDelimiter(')')) { |
| const char* end = in_ - 1; |
| UnicodeText value; |
| // Note: If SkipPastDelimiter() returns true, then |
| // it has always run ++in_ at the end. So this is safe. |
| CHECK_LE(begin, end); |
| value.CopyUTF8(begin, end - begin); |
| query->add_expression(new MediaExpression(name, value)); |
| } else { |
| ReportParsingError(kMediaError, "Unclosed media query."); |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } |
| break; |
| } |
| default: |
| ReportParsingError(kMediaError, |
| "Failed to parse media expression."); |
| SkipPastDelimiter(')'); |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } |
| break; |
| } |
| default: { |
| // Expect "and" between media expressions. All other things are errors. |
| UnicodeText ident = ParseIdent(); |
| if (StringCaseEquals(ident, "and")) { |
| if (found_and) { |
| ReportParsingError(kMediaError, "Multiple 'and' tokens in a row."); |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } else if (!Done() && *in_ == '(') { |
| // TODO(sligocki): Instead of special-casing "and(" let's lex the |
| // content first in general (say with a NextToken() function). |
| |
| // This @media query is technically invalid because CSS is |
| // defined to be lexed context-free first and defines the |
| // flex primitive: |
| // FUNCTION {ident}\( |
| // Thus "and(color)" will be parsed as a function instead of an |
| // identifier followed by a media expression. |
| // See: b/7694757 and |
| // http://lists.w3.org/Archives/Public/www-style/2012Dec/0263.html |
| ReportParsingError(kMediaError, |
| "Space required between 'and' and '(' tokens."); |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } else { |
| found_and = true; |
| } |
| } else { |
| if (in_ >= end_) { |
| ReportParsingError(kMediaError, "Unexpected EOF"); |
| } else if (ident.empty()) { |
| ReportParsingError(kMediaError, StringPrintf( |
| "Unexpected char in media query: %c", *in_)); |
| } else { |
| ReportParsingError(kMediaError, StringPrintf( |
| "Unexpected identifier separating media queries: %s", |
| UnicodeTextToUTF8(ident).c_str())); |
| } |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } |
| break; |
| } |
| } |
| SkipSpace(); |
| } |
| |
| if (found_and) { |
| ReportParsingError(kMediaError, "Unexpected trailing 'and' token."); |
| SkipToMediaQueryEnd(); |
| return NULL; |
| } |
| |
| // Media queries cannot be empty, that is an error. |
| if (query->media_type().empty() && query->expressions().empty()) { |
| ReportParsingError(kMediaError, "Unexpected empty media query."); |
| query.reset(NULL); |
| } |
| |
| return query.release(); |
| } |
| |
| // Start after @import is parsed. |
| Import* Parser::ParseImport() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return NULL; |
| DCHECK_LT(in_, end_); |
| |
| scoped_ptr<Value> v(ParseAny()); |
| if (!v.get() || (v->GetLexicalUnitType() != Value::STRING && |
| v->GetLexicalUnitType() != Value::URI)) { |
| ReportParsingError(kImportError, "Unexpected token while parsing @import"); |
| return NULL; |
| } |
| |
| scoped_ptr<Import> import(new Import()); |
| import->set_link(v->GetStringValue()); |
| SkipSpace(); |
| if (Done() || *in_ == ';') { |
| // Set empty media queries. |
| import->set_media_queries(new MediaQueries); |
| } else { |
| const uint64 start_errors_seen_mask = errors_seen_mask_; |
| scoped_ptr<MediaQueries> media(ParseMediaQueries()); |
| if (preservation_mode_ && (errors_seen_mask_ != start_errors_seen_mask)) { |
| ReportParsingError(kImportError, "Error parsing media for @import."); |
| return NULL; |
| } else { |
| import->set_media_queries(media.release()); |
| } |
| } |
| return import.release(); |
| } |
| |
| FontFace* Parser::ParseFontFace() { |
| Tracer trace(__func__, this); |
| |
| scoped_ptr<FontFace> font_face(new FontFace()); |
| SkipSpace(); |
| if (Done()) { |
| ReportParsingError(kAtRuleError, "Unexpected EOF in @font-face."); |
| return NULL; |
| } |
| |
| if ('{' != *in_) { |
| ReportParsingError(kAtRuleError, "Expected '{' after @font-face."); |
| return NULL; |
| } |
| in_++; |
| |
| font_face->set_declarations(ParseRawDeclarations()); |
| |
| SkipSpace(); |
| if (Done() || *in_ != '}') { |
| ReportParsingError(kAtRuleError, "Ignored chars at end of @font-face."); |
| } |
| SkipPastDelimiter('}'); |
| |
| return font_face.release(); |
| } |
| |
| void Parser::ParseStatement(const MediaQueries* media_queries, |
| Stylesheet* stylesheet) { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return; |
| DCHECK_LT(in_, end_); |
| // The starting point is saved so that we may pass through verbatim text |
| // in case the @-rule cannot be parsed correctly. |
| const char* oldin = in_; |
| const uint64 start_errors_seen_mask = errors_seen_mask_; |
| |
| if (*in_ == '@') { |
| bool correctly_terminated = true; |
| in_++; |
| UnicodeText ident = ParseIdent(); |
| |
| // @import string|uri medium-list ? ; |
| if (StringCaseEquals(ident, "import")) { |
| if (media_queries != NULL) { |
| ReportParsingError(kImportError, "@import found inside @media"); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else if (!stylesheet->rulesets().empty() || |
| !stylesheet->font_faces().empty()) { |
| ReportParsingError(kImportError, "@import found after rulesets."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else { |
| scoped_ptr<Import> import(ParseImport()); |
| SkipSpace(); |
| if (import.get()) { |
| if (Done()) { |
| ReportParsingError(kImportError, |
| "Unexpected EOF in @import statement."); |
| correctly_terminated = false; |
| // @import was not closed with a ; and so we must preserve an error |
| // message, but we still need to save this import. |
| stylesheet->mutable_imports().push_back(import.release()); |
| } else if (*in_ == ';') { |
| in_++; |
| stylesheet->mutable_imports().push_back(import.release()); |
| } else { |
| ReportParsingError(kImportError, |
| "Ignoring chars at end of @import."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } |
| } else { |
| ReportParsingError(kImportError, "Failed to parse @import."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } |
| } |
| |
| // @charset string ; |
| } else if (StringCaseEquals(ident, "charset")) { |
| if (media_queries != NULL) { |
| ReportParsingError(kCharsetError, "@charset found inside @media"); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else if (!stylesheet->rulesets().empty() || |
| !stylesheet->imports().empty() || |
| !stylesheet->font_faces().empty()) { |
| ReportParsingError(kCharsetError, "@charset found after other rules."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else { |
| UnicodeText s = ParseCharset(); |
| SkipSpace(); |
| if (preservation_mode_ && |
| (errors_seen_mask_ != start_errors_seen_mask)) { |
| ReportParsingError(kCharsetError, "Failed to parse @charset."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else if (Done()) { |
| ReportParsingError(kCharsetError, |
| "Unexpected EOF in @charset statement."); |
| correctly_terminated = false; |
| stylesheet->mutable_charsets().push_back(s); |
| } else { |
| if (*in_ == ';') { |
| in_++; |
| stylesheet->mutable_charsets().push_back(s); |
| } else { |
| ReportParsingError(kCharsetError, |
| "Ignoring chars at end of @charset."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } |
| } |
| } |
| |
| // @media medium-list { ruleset-list } |
| } else if (StringCaseEquals(ident, "media")) { |
| if (media_queries != NULL) { |
| // Note: We do not parse nested @media rules although they are |
| // technically allowed in CSS3. Among other things, this makes our |
| // lives easier by avoiding unbounded recursive depth. |
| ReportParsingError(kMediaError, "@media found inside @media"); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else { |
| scoped_ptr<MediaQueries> media_queries(ParseMediaQueries()); |
| if (preservation_mode_ && errors_seen_mask_ != start_errors_seen_mask) { |
| ReportParsingError(kMediaError, |
| "Error parsing media queries, ignoring block."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else if (Done()) { |
| ReportParsingError(kMediaError, "Unexpected EOF in @media statement"); |
| correctly_terminated = false; |
| } else if (*in_ == ';') { |
| // @media tags ending in ';' are no-ops, we simply ignore them. |
| // Skip over ending ';' |
| in_++; |
| return; |
| } else if (*in_ != '{') { |
| ReportParsingError(kMediaError, "Malformed @media statement."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else { |
| DCHECK(!Done()); |
| DCHECK_EQ('{', *in_); |
| in_++; |
| SkipSpace(); |
| while (in_ < end_ && *in_ != '}') { |
| const char* oldin = in_; |
| // Parse either a ruleset or at-rule. |
| ParseStatement(media_queries.get(), stylesheet); |
| if (in_ == oldin) { |
| ReportParsingError(kSelectorError, StringPrintf( |
| "Could not parse ruleset: illegal char %c", *in_)); |
| in_++; |
| } |
| SkipSpace(); |
| } |
| if (in_ < end_) { |
| DCHECK_EQ('}', *in_); |
| in_++; |
| } else { |
| ReportParsingError(kMediaError, |
| "Unexpected EOF in @media statement."); |
| correctly_terminated = false; |
| } |
| } |
| } |
| |
| } else if (StringCaseEquals(ident, "font-face")) { |
| scoped_ptr<FontFace> font_face(ParseFontFace()); |
| if ((preservation_mode_ && (errors_seen_mask_ != start_errors_seen_mask)) |
| || font_face.get() == NULL) { |
| ReportParsingError(kAtRuleError, "Could not parse @font-face rule."); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } else { |
| if (media_queries != NULL) { |
| font_face->set_media_queries(media_queries->DeepCopy()); |
| } else { |
| // Blank media queries. |
| font_face->set_media_queries(new MediaQueries); |
| } |
| stylesheet->mutable_font_faces().push_back(font_face.release()); |
| } |
| |
| // Unexpected @-rule. |
| } else { |
| string ident_string(ident.utf8_data(), ident.utf8_length()); |
| ReportParsingError(kAtRuleError, StringPrintf( |
| "Cannot parse unknown @-statement: %s", ident_string.c_str())); |
| correctly_terminated = SkipToAtRuleEnd(); |
| } |
| |
| // We can only preserve the @-rule if it is correctly terminated. If it |
| // is not (because we reach EOF before it terminates) we must preserve |
| // the error. |
| if (errors_seen_mask_ != start_errors_seen_mask && |
| correctly_terminated && preservation_mode_) { |
| // Add a place-holder with verbatim text because we failed to parse |
| // this @-rule correctly. This is saved so that it can be |
| // serialized back out in case it was actually meaningful even though |
| // we could not understand it. |
| StringPiece bytes_in_original_buffer(oldin, in_ - oldin); |
| |
| Ruleset* ruleset = |
| new Ruleset(new UnparsedRegion(bytes_in_original_buffer)); |
| if (media_queries != NULL) { |
| ruleset->set_media_queries(media_queries->DeepCopy()); |
| } |
| stylesheet->mutable_rulesets().push_back(ruleset); |
| |
| // All errors that occurred sinse we started this declaration are |
| // demoted to unparseable sections now that we've saved the dummy |
| // element. |
| unparseable_sections_seen_mask_ |= errors_seen_mask_; |
| errors_seen_mask_ = start_errors_seen_mask; |
| } |
| } else { |
| scoped_ptr<Ruleset> ruleset(ParseRuleset()); |
| if (ruleset.get() == NULL && oldin == in_) { |
| ReportParsingError(kSelectorError, StringPrintf( |
| "Could not parse ruleset: illegal char %c", *in_)); |
| in_++; |
| } |
| if (ruleset.get() != NULL) { |
| if (media_queries != NULL) { |
| ruleset->set_media_queries(media_queries->DeepCopy()); |
| } |
| stylesheet->mutable_rulesets().push_back(ruleset.release()); |
| } |
| } |
| } |
| |
| Stylesheet* Parser::ParseRawStylesheet() { |
| Tracer trace(__func__, this); |
| |
| SkipSpace(); |
| if (Done()) return new Stylesheet(); |
| DCHECK_LT(in_, end_); |
| |
| Stylesheet* stylesheet = new Stylesheet(); |
| while (in_ < end_) { |
| switch (*in_) { |
| // HTML-style comments are not allowed in CSS. |
| // In fact, "<!--" and "-->" are ignored when parsing CSS. |
| // Probably a legacy from when browsers didn't support <style> tags. |
| case '<': |
| in_++; |
| if (end_ - in_ >= 3 && memcmp(in_, "!--", 3) == 0) { |
| in_ += 3; |
| } else { |
| ReportParsingError(kHtmlCommentError, "< without following !--"); |
| } |
| break; |
| case '-': |
| in_++; |
| if (end_ - in_ >= 2 && memcmp(in_, "->", 2) == 0) { |
| in_ += 2; |
| } else { |
| ReportParsingError(kHtmlCommentError, "- without following ->"); |
| } |
| break; |
| default: |
| ParseStatement(NULL, stylesheet); |
| break; |
| } |
| SkipSpace(); |
| } |
| |
| DCHECK(Done()) << "Finished parsing before end of document."; |
| |
| return stylesheet; |
| } |
| |
| Stylesheet* Parser::ParseStylesheet() { |
| Tracer trace(__func__, this); |
| |
| Stylesheet* stylesheet = ParseRawStylesheet(); |
| |
| Rulesets& rulesets = stylesheet->mutable_rulesets(); |
| for (int i = 0; i < rulesets.size(); ++i) { |
| if (rulesets[i]->type() == Css::Ruleset::RULESET) { |
| Declarations& orig_declarations = rulesets[i]->mutable_declarations(); |
| rulesets[i]->set_declarations(ExpandDeclarations(&orig_declarations)); |
| } |
| } |
| |
| return stylesheet; |
| } |
| |
| // |
| // Some destructors that need STLDeleteElements() from stl_util.h |
| // |
| |
| Declarations::~Declarations() { STLDeleteElements(this); } |
| Rulesets::~Rulesets() { STLDeleteElements(this); } |
| Charsets::~Charsets() {} |
| Imports::~Imports() { STLDeleteElements(this); } |
| FontFaces::~FontFaces() { STLDeleteElements(this); } |
| |
| } // namespace Css |