| /** |
| * Copyright 2010 Google Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Author: sligocki@google.com (Shawn Ligocki) |
| |
| #include "net/instaweb/rewriter/public/css_filter.h" |
| |
| #include "base/at_exit.h" |
| |
| #include "base/scoped_ptr.h" |
| #include "net/instaweb/htmlparse/public/html_parse.h" |
| #include "net/instaweb/rewriter/public/css_minify.h" |
| #include "net/instaweb/rewriter/public/output_resource.h" |
| #include "net/instaweb/rewriter/public/resource.h" |
| #include "net/instaweb/rewriter/public/resource_manager.h" |
| #include "net/instaweb/util/public/content_type.h" |
| #include "net/instaweb/util/public/google_url.h" |
| #include "net/instaweb/util/public/message_handler.h" |
| #include "net/instaweb/util/public/statistics.h" |
| #include "net/instaweb/util/public/string_writer.h" |
| #include "net/instaweb/util/public/url_escaper.h" |
| #include "net/instaweb/util/public/writer.h" |
| #include "webutil/css/parser.h" |
| |
| namespace { |
| |
| base::AtExitManager* at_exit_manager = NULL; |
| |
| } // namespace |
| |
| namespace net_instaweb { |
| |
| namespace { |
| |
| const char kStylesheet[] = "stylesheet"; |
| |
| } // namespace |
| |
| // Statistics variable names. |
| const char CssFilter::kFilesMinified[] = "css_filter_files_minified"; |
| const char CssFilter::kMinifiedBytesSaved[] = "css_filter_minified_bytes_saved"; |
| const char CssFilter::kParseFailures[] = "css_filter_parse_failures"; |
| |
| CssFilter::CssFilter(RewriteDriver* driver, const StringPiece& path_prefix) |
| : RewriteFilter(driver, path_prefix), |
| html_parse_(driver->html_parse()), |
| resource_manager_(driver->resource_manager()), |
| in_style_element_(false), |
| s_style_(html_parse_->Intern("style")), |
| s_link_(html_parse_->Intern("link")), |
| s_rel_(html_parse_->Intern("rel")), |
| s_href_(html_parse_->Intern("href")), |
| num_files_minified_(NULL), |
| minified_bytes_saved_(NULL), |
| num_parse_failures_(NULL) { |
| Statistics* stats = resource_manager_->statistics(); |
| if (stats != NULL) { |
| num_files_minified_ = stats->GetVariable(CssFilter::kFilesMinified); |
| minified_bytes_saved_ = stats->GetVariable(CssFilter::kMinifiedBytesSaved); |
| num_parse_failures_ = stats->GetVariable(CssFilter::kParseFailures); |
| } |
| } |
| |
| void CssFilter::Initialize(Statistics* statistics) { |
| statistics->AddVariable(CssFilter::kFilesMinified); |
| statistics->AddVariable(CssFilter::kMinifiedBytesSaved); |
| statistics->AddVariable(CssFilter::kParseFailures); |
| |
| // Note: This is not thread-safe, but I don't believe we need it to be. |
| if (at_exit_manager == NULL) { |
| at_exit_manager = new base::AtExitManager; |
| } |
| } |
| |
| void CssFilter::StartDocumentImpl() { |
| in_style_element_ = false; |
| } |
| |
| void CssFilter::StartElementImpl(HtmlElement* element) { |
| // HtmlParse should not pass us elements inside a style element. |
| CHECK(!in_style_element_); |
| if (element->tag() == s_style_) { |
| in_style_element_ = true; |
| style_element_ = element; |
| style_char_node_ = NULL; |
| } |
| // We deal with <link> elements in EndElement. |
| } |
| |
| void CssFilter::Characters(HtmlCharactersNode* characters_node) { |
| if (in_style_element_) { |
| if (style_char_node_ == NULL) { |
| style_char_node_ = characters_node; |
| } else { |
| html_parse_->ErrorHere("Multiple character nodes in style."); |
| in_style_element_ = false; |
| } |
| } |
| } |
| |
| void CssFilter::EndElementImpl(HtmlElement* element) { |
| // Rewrite an inline style. |
| if (in_style_element_) { |
| CHECK(style_element_ == element); // HtmlParse should not pass unmatching. |
| |
| if (html_parse_->IsRewritable(element) && style_char_node_ != NULL) { |
| CHECK(element == style_char_node_->parent()); // Sanity check. |
| std::string new_content; |
| if (RewriteCssText(style_char_node_->contents(), &new_content, |
| html_parse_->message_handler())) { |
| // Note: Copy of new_content here. |
| HtmlCharactersNode* new_style_char_node = |
| html_parse_->NewCharactersNode(element, new_content); |
| html_parse_->ReplaceNode(style_char_node_, new_style_char_node); |
| } |
| } |
| in_style_element_ = false; |
| |
| // Rewrite an external style. |
| } else if (element->tag() == s_link_ && html_parse_->IsRewritable(element)) { |
| StringPiece relation(element->AttributeValue(s_rel_)); |
| if (relation == kStylesheet) { |
| HtmlElement::Attribute* element_href = element->FindAttribute(s_href_); |
| if (element_href != NULL) { |
| // If it has a href= attribute |
| std::string new_url; |
| if (RewriteExternalCss(element_href->value(), &new_url)) { |
| element_href->SetValue(new_url); // Update the href= attribute. |
| } |
| } else { |
| html_parse_->ErrorHere("Link element with no href."); |
| } |
| } |
| } |
| } |
| |
| // Return value answers the question: May we rewrite? |
| // If return false, out_text is undefined. |
| bool CssFilter::RewriteCssText(const StringPiece& in_text, |
| std::string* out_text, |
| MessageHandler* handler) { |
| // Load stylesheet w/o expanding background attributes. |
| Css::Parser parser(in_text); |
| scoped_ptr<Css::Stylesheet> stylesheet(parser.ParseRawStylesheet()); |
| |
| bool ret = false; |
| if (parser.errors_seen_mask() != Css::Parser::kNoError) { |
| if (num_parse_failures_ != NULL) { |
| num_parse_failures_->Add(1); |
| } |
| } else { |
| // TODO(sligocki): Edit stylesheet. |
| |
| // Re-serialize stylesheet. |
| StringWriter writer(out_text); |
| CssMinify::Stylesheet(*stylesheet, &writer, handler); |
| |
| // Get signed versions so that we can subtract them. |
| int64 out_text_size = static_cast<int64>(out_text->size()); |
| int64 in_text_size = static_cast<int64>(in_text.size()); |
| |
| // Don't rewrite if we don't make it smaller. |
| ret = (out_text_size < in_text_size); |
| |
| // Don't rewrite if we blanked the CSS file! (This is a parse error) |
| if (out_text_size == 0) { |
| ret = false; |
| num_parse_failures_->Add(1); |
| } |
| |
| // Statistics |
| if (ret && num_files_minified_ != NULL) { |
| num_files_minified_->Add(1); |
| minified_bytes_saved_->Add(in_text_size - out_text_size); |
| } |
| // TODO(sligocki): Do we want to save the AST 'stylesheet' somewhere? |
| // It currently, deletes itself at the end of the function. |
| } |
| |
| return ret; |
| } |
| |
| |
| // Combine all 'original_stylesheets' (and all their sub stylescripts) into a |
| // single returned stylesheet which has no @imports or returns NULL if we fail |
| // to load some sub-resources. |
| // |
| // Note: we must cannibalize input stylesheets or we will have ownership |
| // problems or a lot of deep-copying. |
| Css::Stylesheet* CssFilter::CombineStylesheets( |
| std::vector<Css::Stylesheet*>* original_stylesheets) { |
| // Load all sub-stylesheets to assure that we can do the combination. |
| std::vector<Css::Stylesheet*> stylesheets; |
| std::vector<Css::Stylesheet*>::const_iterator iter; |
| for (iter = original_stylesheets->begin(); |
| iter < original_stylesheets->end(); ++iter) { |
| Css::Stylesheet* stylesheet = *iter; |
| if (!LoadAllSubStylesheets(stylesheet, &stylesheets)) { |
| return NULL; |
| } |
| } |
| |
| // Once all sub-stylesheets are loaded in memory, combine them. |
| Css::Stylesheet* combination = new Css::Stylesheet; |
| // TODO(sligocki): combination->rulesets().reserve(...); |
| for (std::vector<Css::Stylesheet*>::const_iterator iter = stylesheets.begin(); |
| iter < stylesheets.end(); ++iter) { |
| Css::Stylesheet* stylesheet = *iter; |
| // Append all rulesets from 'stylesheet' to 'combination' ... |
| combination->mutable_rulesets().insert( |
| combination->mutable_rulesets().end(), |
| stylesheet->rulesets().begin(), |
| stylesheet->rulesets().end()); |
| // ... and then clear rules from 'stylesheet' to avoid double ownership. |
| stylesheet->mutable_rulesets().clear(); |
| } |
| return combination; |
| } |
| |
| // Collect a list of all stylesheets @imported by base_stylesheet directly or |
| // indirectly in the order that they will be dealt with by a CSS parser and |
| // append them to vector 'all_stylesheets'. |
| bool CssFilter::LoadAllSubStylesheets( |
| Css::Stylesheet* base_stylesheet, |
| std::vector<Css::Stylesheet*>* all_stylesheets) { |
| const Css::Imports& imports = base_stylesheet->imports(); |
| for (Css::Imports::const_iterator iter = imports.begin(); |
| iter < imports.end(); ++iter) { |
| Css::Import* import = *iter; |
| StringPiece url(import->link.utf8_data(), import->link.utf8_length()); |
| |
| // Fetch external stylesheet from url ... |
| Css::Stylesheet* sub_stylesheet = LoadStylesheet(url); |
| if (sub_stylesheet == NULL) { |
| html_parse_->ErrorHere("Failed to load sub-resource %s", |
| url.as_string().c_str()); |
| return false; |
| } |
| |
| // ... and recursively add all its sub-stylesheets (and it) to vector. |
| if (!LoadAllSubStylesheets(sub_stylesheet, all_stylesheets)) { |
| return false; |
| } |
| } |
| // Add base stylesheet after all imports have been added. |
| all_stylesheets->push_back(base_stylesheet); |
| return true; |
| } |
| |
| |
| // Read an external CSS file, rewrite it and write a new external CSS file. |
| bool CssFilter::RewriteExternalCss(const StringPiece& in_url, |
| std::string* out_url) { |
| bool ret = false; |
| scoped_ptr<Resource> input_resource( |
| resource_manager_->CreateInputResource( |
| base_gurl(), in_url, html_parse_->message_handler())); |
| scoped_ptr<OutputResource> output_resource( |
| resource_manager_->CreateOutputResourceFromResource( |
| filter_prefix_, &kContentTypeCss, resource_manager_->url_escaper(), |
| input_resource.get(), html_parse_->message_handler())); |
| if (output_resource.get() != NULL && |
| RewriteExternalCssToResource(input_resource.get(), |
| output_resource.get())) { |
| ret = true; |
| *out_url = output_resource->url(); |
| } |
| return ret; |
| } |
| |
| bool CssFilter::RewriteExternalCssToResource(Resource* input_resource, |
| OutputResource* output_resource) { |
| // If this OutputResource has not already been created, create it. |
| if (!output_resource->IsWritten()) { |
| // Load input stylesheet. |
| MessageHandler* handler = html_parse_->message_handler(); |
| if (input_resource == NULL || |
| !resource_manager_->ReadIfCached(input_resource, handler) || |
| !input_resource->ContentsValid()) { |
| // TODO(sligocki): Should these really be HtmlParse errors? |
| html_parse_->ErrorHere("Failed to load resource %s", |
| input_resource->url().c_str()); |
| return false; |
| } |
| |
| // Rewrite stylesheet. |
| StringPiece in_contents = input_resource->contents(); |
| std::string out_contents; |
| if (!RewriteCssText(in_contents, &out_contents, |
| html_parse_->message_handler())) { |
| return false; |
| } |
| |
| // Write new stylesheet. |
| // TODO(sligocki): Set expire time. |
| if (!resource_manager_->Write(HttpStatus::kOK, out_contents, |
| output_resource, -1, handler)) { |
| return false; |
| } |
| } |
| |
| return output_resource->IsWritten(); |
| } |
| |
| bool CssFilter::Fetch(OutputResource* output_resource, |
| Writer* writer, |
| const MetaData& request_header, |
| MetaData* response_headers, |
| UrlAsyncFetcher* fetcher, |
| MessageHandler* message_handler, |
| UrlAsyncFetcher::Callback* callback) { |
| // TODO(sligocki): We do not use writer, *_headers or fetcher ... should we? |
| // It looks like nobody is using the fetcher, I'll let someone else get this |
| // right first. |
| // TODO(sligocki): If this doesn't work, we need to wait for it to finish |
| // fetching and then rewrite. |
| scoped_ptr<Resource> input_resource( |
| resource_manager_->CreateInputResourceFromOutputResource( |
| resource_manager_->url_escaper(), output_resource, message_handler)); |
| bool ret = RewriteExternalCssToResource(input_resource.get(), |
| output_resource); |
| // For some reason we only call the callback if we succeed. |
| if (ret) { |
| callback->Done(ret); |
| } |
| return ret; |
| } |
| |
| } // namespace net_instaweb |