| /* |
| * Copyright 2011 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: matterbury@google.com (Matt Atterbury) |
| |
| #include "net/instaweb/rewriter/public/css_inline_import_to_link_filter.h" |
| |
| #include <algorithm> |
| #include <memory> |
| #include <vector> |
| |
| #include "base/logging.h" |
| #include "net/instaweb/rewriter/public/css_tag_scanner.h" |
| #include "net/instaweb/rewriter/public/css_util.h" |
| #include "net/instaweb/rewriter/public/rewrite_driver.h" |
| #include "pagespeed/kernel/base/statistics.h" |
| #include "pagespeed/kernel/base/string.h" |
| #include "pagespeed/kernel/base/string_util.h" |
| #include "pagespeed/kernel/html/html_element.h" |
| #include "pagespeed/kernel/html/html_name.h" |
| #include "pagespeed/kernel/html/html_node.h" |
| #include "pagespeed/kernel/http/content_type.h" |
| #include "util/utf8/public/unicodetext.h" |
| #include "webutil/css/media.h" |
| #include "webutil/css/parser.h" |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| // names for Statistics variables. |
| const char kCssImportsToLinks[] = "css_imports_to_links"; |
| |
| } // namespace |
| |
| CssInlineImportToLinkFilter::CssInlineImportToLinkFilter(RewriteDriver* driver, |
| Statistics* statistics) |
| : driver_(driver), |
| counter_(statistics->GetVariable(kCssImportsToLinks)) { |
| ResetState(); |
| } |
| |
| CssInlineImportToLinkFilter::~CssInlineImportToLinkFilter() {} |
| |
| void CssInlineImportToLinkFilter::InitStats(Statistics* statistics) { |
| statistics->AddVariable(kCssImportsToLinks); |
| } |
| |
| void CssInlineImportToLinkFilter::StartDocument() { |
| ResetState(); |
| } |
| |
| void CssInlineImportToLinkFilter::EndDocument() { |
| ResetState(); |
| } |
| |
| void CssInlineImportToLinkFilter::StartElement(HtmlElement* element) { |
| DCHECK(style_element_ == NULL); // HTML Parser guarantees this. |
| if (style_element_ == NULL && element->keyword() == HtmlName::kStyle) { |
| // The contents are ok to rewrite iff its type is text/css or it has none. |
| // See http://www.w3.org/TR/html5/semantics.html#the-style-element |
| const char* type = element->AttributeValue(HtmlName::kType); |
| if (type == NULL || strcmp(type, kContentTypeCss.mime_type()) == 0) { |
| style_element_ = element; |
| style_characters_ = NULL; |
| } |
| } |
| } |
| |
| void CssInlineImportToLinkFilter::EndElement(HtmlElement* element) { |
| if (style_element_ == element) { |
| InlineImportToLinkStyle(); |
| ResetState(); |
| } |
| } |
| |
| void CssInlineImportToLinkFilter::Characters(HtmlCharactersNode* characters) { |
| if (style_element_ != NULL) { |
| DCHECK(style_characters_ == NULL); // HTML Parser guarantees this. |
| style_characters_ = characters; |
| } |
| } |
| |
| void CssInlineImportToLinkFilter::Flush() { |
| // If we were flushed in a style element, we cannot rewrite it. |
| if (style_element_ != NULL) { |
| ResetState(); |
| } |
| } |
| |
| void CssInlineImportToLinkFilter::ResetState() { |
| style_element_ = NULL; |
| style_characters_ = NULL; |
| } |
| |
| namespace { |
| |
| // Extract the given style's media attribute, if any. Fail if can't decode it. |
| bool ExtractMediaFromStyle(const HtmlElement* style_element, |
| GoogleString* media_attribute) { |
| const HtmlElement::Attribute* styles_media = |
| style_element->FindAttribute(HtmlName::kMedia); |
| if (styles_media!= NULL) { |
| const char* decoded_value = styles_media->DecodedValueOrNull(); |
| if (decoded_value == NULL) { |
| return false; |
| } else { |
| media_attribute->assign(decoded_value); |
| } |
| } |
| return true; |
| } |
| |
| // Determine if the import has a single simple media that matches the style's. |
| bool MediaMatch(const GoogleString& media_attribute, |
| const Css::Import* import) { |
| bool result = false; |
| if (media_attribute.empty()) { |
| // The style doesn't have a media attribute to match against. |
| } else if (import->media_queries().size() != 1) { |
| // The import doesn't have a single media. |
| } else if (css_util::IsComplexMediaQuery(*import->media_queries()[0])) { |
| // The import doesn't have a simple media. |
| } else { |
| // TODO(jmarantz): this code would feel a bit better if |
| // attribute-decoding supported UTF8. |
| const StringPiece import_media( |
| import->media_queries()[0]->media_type().utf8_data(), |
| import->media_queries()[0]->media_type().utf8_length()); |
| result = media_attribute == import_media; |
| } |
| return result; |
| } |
| |
| // Check if the given import can be converted to a link elements. |
| // media_attribute is the original style's media attribute; link_media is set |
| // to the import's media iff it has one and the style doesn't; style_media is |
| // used to store the vectorized version of media_attribute and is lazily |
| // initialized by this function when it is first required; |
| // style_media_is_determined is the flag that records that style_media is set. |
| bool CheckConversionOfImportToLink(const Css::Import* import, |
| const GoogleString& media_attribute, |
| GoogleString* link_media, |
| bool* style_media_is_determined, |
| StringVector* style_media) { |
| if (import->link().utf8_length() == 0) { |
| // Empty URLs are problematic so we give up if we hit any. |
| return false; |
| } else if (import->media_queries().empty()) { |
| // No media queries is easy - just copy any media into the link. |
| } else if (MediaMatch(media_attribute, import)) { |
| // A 'simple' media query that matches the style's is also good. |
| } else { |
| // If the style has media then the @import may specify no media or the |
| // same media; if the style has no media use the @import's, if any. |
| StringVector import_media; |
| if (css_util::ConvertMediaQueriesToStringVector( |
| import->media_queries(), &import_media)) { |
| if (!media_attribute.empty()) { |
| if (!*style_media_is_determined) { |
| css_util::VectorizeMediaAttribute(media_attribute, style_media); |
| std::sort(style_media->begin(), style_media->end()); |
| *style_media_is_determined = true; |
| } |
| // VectorizeMediaAttribute returns an empty vector if any medium |
| // is "all", so be careful to do the same to import_media. |
| css_util::ClearVectorIfContainsMediaAll(&import_media); |
| std::sort(import_media.begin(), import_media.end()); |
| // We have sorted both the vectors because the order of media is not |
| // significant as they're additive: screen,print == print,screen. |
| return (*style_media == import_media); |
| } else { |
| // Note the import's media to copy it to the corresponding link. |
| *link_media = css_util::StringifyMediaVector(import_media); |
| } |
| } else { |
| // If we can't parse the media query then it's too complex for us. |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| } // namespace |
| |
| // Pull out each @import from a <style> element into <link> elements. |
| void CssInlineImportToLinkFilter::InlineImportToLinkStyle() { |
| // Conditions for rewriting @imports from within a style element: |
| // * The element isn't empty. |
| // * The element is rewritable. |
| // * It doesn't already have an href or rel attribute, since we add these. |
| // * It doesn't have a scoped attribute, since scoped styles can't be |
| // done with a <link> |
| // * It begins with one or more valid @import statement. |
| // * Each @import actually imports something (the url isn't empty). |
| // * Each @import's media, if any, are the same as style's, if any. |
| if (style_characters_ != NULL && |
| driver_->IsRewritable(style_element_) && |
| style_element_->FindAttribute(HtmlName::kHref) == NULL && |
| style_element_->FindAttribute(HtmlName::kRel) == NULL && |
| style_element_->FindAttribute(HtmlName::kScoped) == NULL) { |
| // Parse imports until we hit the end of them; if there's anything else |
| // in the CSS we leave that in the inline style. |
| Css::Parser parser(style_characters_->contents()); |
| Css::Imports imports; |
| Css::Import* import; |
| StringVector media; |
| |
| // Extract the style's media attribute, if any. Fail if we can't decode it. |
| GoogleString media_attribute; |
| bool ok = ExtractMediaFromStyle(style_element_, &media_attribute); |
| |
| // The style's media converted to a vector of media types. This is parsed |
| // and set on first use but it's actually a loop invariant that could be |
| // set before the loop, but we don't in case we never end up needing it. |
| StringVector style_media; |
| bool style_media_is_determined = false; |
| |
| // Check each import in turn, failing if any of them have a problem. |
| while (ok && (import = parser.ParseNextImport()) != NULL) { |
| imports.push_back(import); |
| // Default the media for the link to the style's media attribute; |
| // CheckConversion... overrides that if the @import has its own media. |
| media.push_back(media_attribute); |
| ok = CheckConversionOfImportToLink(import, media_attribute, &media.back(), |
| &style_media_is_determined, |
| &style_media); |
| } |
| |
| if (ok && (imports.size() > 0) && |
| (parser.errors_seen_mask() == Css::Parser::kNoError)) { |
| for (int i = 0, n = imports.size(); i < n; ++i) { |
| Css::Import* import = imports[i]; |
| StringPiece url(import->link().utf8_data(), |
| import->link().utf8_length()); |
| // Create new link element to replace the @import. |
| HtmlElement* link_element = |
| driver_->NewElement(style_element_->parent(), HtmlName::kLink); |
| if (driver_->MimeTypeXhtmlStatus() != RewriteDriver::kIsNotXhtml) { |
| link_element->set_style(HtmlElement::BRIEF_CLOSE); |
| } |
| driver_->AddAttribute(link_element, HtmlName::kRel, |
| CssTagScanner::kStylesheet); |
| driver_->AddAttribute(link_element, HtmlName::kHref, url); |
| // Add all of the style attributes to the link. |
| const HtmlElement::AttributeList& attrs(style_element_->attributes()); |
| for (HtmlElement::AttributeConstIterator j(attrs.begin()); |
| j != attrs.end(); ++j) { |
| const HtmlElement::Attribute& attr = *j; |
| // If there's a media attribute, forget our remembered one so that we |
| // copy over the import's rather than the style's; although they're |
| // equivalent it's best to keep the "original". |
| if (attr.name().keyword() == HtmlName::kMedia) { |
| media[i].clear(); |
| } |
| link_element->AddAttribute(attr); |
| } |
| if (!media[i].empty()) { |
| driver_->AddAttribute(link_element, HtmlName::kMedia, media[i]); |
| } |
| // Add the link to the DOM. |
| driver_->InsertNodeBeforeNode(style_element_, link_element); |
| } |
| |
| if (parser.Done()) { |
| // <style> contained only @imports, so remove it now. |
| if (!driver_->DeleteNode(style_element_)) { |
| driver_->ErrorHere("Failed to delete inline style element"); |
| } |
| } else { |
| // Erase parsed @imports from contents, but leave rest of CSS. |
| int parser_offset = parser.CurrentOffset(); |
| style_characters_->mutable_contents()->erase(0, parser_offset); |
| // Note: parser cannot be used after this point. It contains invalid |
| // pointers into the old style_characters_->contents(). |
| } |
| |
| counter_->Add(1); |
| } |
| } |
| } |
| |
| } // namespace net_instaweb |