/*
 * Copyright 2013 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: slamm@google.com (Stephen Lamm),
//         morlovich@google.com (Maksim Orlovich)
// See the header for overview.

#include "net/instaweb/rewriter/public/critical_selector_filter.h"

#include <algorithm>
#include <cstddef>
#include <set>

#include "base/logging.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/rewriter/flush_early.pb.h"
#include "net/instaweb/rewriter/public/critical_selector_finder.h"
#include "net/instaweb/rewriter/public/css_minify.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/rewriter/public/css_util.h"
#include "net/instaweb/rewriter/public/request_properties.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/static_asset_manager.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/null_message_handler.h"
#include "pagespeed/kernel/base/stl_util.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/string_writer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/html/html_parse.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/opt/logging/enums.pb.h"
#include "webutil/css/media.h"
#include "webutil/css/parser.h"
#include "webutil/css/selector.h"

namespace net_instaweb {

namespace {

// Helper that takes a std::vector-like collection, and compacts
// any null holes in it.
template<typename VectorType> void Compact(VectorType* cl) {
  typename VectorType::iterator new_end =
      std::remove(cl->begin(), cl->end(),
                  static_cast<typename VectorType::value_type>(NULL));
  cl->erase(new_end, cl->end());
}

}  // namespace

// When flush early filter is enabled, critical css rules are flushed early
// as innerHTML of a script element. When the CSS element appears in the
// document, find the previously flushed style data and copy it to the style
// element so it can be applied. This script is used for that.
const char CriticalSelectorFilter::kApplyFlushEarlyCss[] =
    "var applyFlushedCriticalCss = function(script_id, mediaString) {"
    "  var scripts = document.getElementsByTagName('script');"
    "  var styleScript = document.getElementById(script_id);"
    "  if (styleScript == null) {"
    "    return;"
    "  }"
    "  var cssText = styleScript.innerHTML || styleScript.textContent || "
    "                styleScript.data || \"\";"
    "  var styleElem = document.createElement('style');"
    "  styleElem.type = 'text/css';"
    "  if (styleElem.styleSheet) {"
    "    styleElem.styleSheet.cssText = cssText;"
    "  } else {"
    "    styleElem.appendChild(document.createTextNode(cssText));"
    "  }"
    "  if (mediaString) {"
    "    styleElem.setAttribute(\"media\", mediaString);"
    "  }"
    "  var currentScript = scripts[scripts.length-1];"
    "  currentScript.parentNode.insertBefore(styleElem, currentScript);"
    "};";

const char CriticalSelectorFilter::kInvokeFlushEarlyCssTemplate[] =
    "applyFlushedCriticalCss(\"%s\", \"%s\");";

const char CriticalSelectorFilter::kMoveScriptId[] = "psa_flush_style_early";
const char CriticalSelectorFilter::kNoscriptStylesClass[] = "psa_add_styles";

// TODO(morlovich): Check charset like CssInlineFilter::ShouldInline().

// Wrap CSS elements to move them later in the document.
// A simple list of elements is insufficient because link tags and style tags
// are inserted different.
class CriticalSelectorFilter::CssElement {
 public:
  CssElement(HtmlParse* p, HtmlElement* e, bool inside_noscript)
      : html_parse_(p), element_(p->CloneElement(e)),
        inside_noscript_(inside_noscript) {}

  // HtmlParse deletes the element (regardless of whether it is inserted).
  virtual ~CssElement() {}

  virtual void AppendTo(HtmlElement* parent) const {
    html_parse_->AppendChild(parent, element_);
  }

  bool inside_noscript() const { return inside_noscript_; }

 protected:
  HtmlParse* html_parse_;
  HtmlElement* element_;
  bool inside_noscript_;

 private:
  DISALLOW_COPY_AND_ASSIGN(CssElement);
};

// Wrap CSS style blocks to move them later in the document.
class CriticalSelectorFilter::CssStyleElement
    : public CriticalSelectorFilter::CssElement {
 public:
  CssStyleElement(HtmlParse* p, HtmlElement* e, bool inside_noscript)
      : CssElement(p, e, inside_noscript) {}
  virtual ~CssStyleElement() {}

  // Call before InsertBeforeCurrent.
  void AppendCharactersNode(HtmlCharactersNode* characters_node) {
    characters_nodes_.push_back(
        html_parse_->NewCharactersNode(NULL, characters_node->contents()));
  }

  virtual void AppendTo(HtmlElement* parent) const {
    HtmlElement* element = element_;
    CssElement::AppendTo(parent);
    for (CharactersNodeVector::const_iterator it = characters_nodes_.begin(),
         end = characters_nodes_.end(); it != end; ++it) {
      html_parse_->AppendChild(element, *it);
    }
  }

 protected:
  typedef std::vector<HtmlCharactersNode*> CharactersNodeVector;
  CharactersNodeVector characters_nodes_;

 private:
  DISALLOW_COPY_AND_ASSIGN(CssStyleElement);
};

// Wrap CSS related elements so they can be moved later in the document.
CriticalSelectorFilter::CriticalSelectorFilter(RewriteDriver* driver)
    : CssSummarizerBase(driver),
      saw_end_document_(false),
      any_rendered_(false),
      is_flush_script_added_(false) {
}

CriticalSelectorFilter::~CriticalSelectorFilter() {
}

void CriticalSelectorFilter::Summarize(Css::Stylesheet* stylesheet,
                                       GoogleString* out) const {
  for (int ruleset_index = 0, num_rulesets = stylesheet->rulesets().size();
       ruleset_index < num_rulesets; ++ruleset_index) {
    Css::Ruleset* r = stylesheet->mutable_rulesets().at(ruleset_index);
    if (r->type() == Css::Ruleset::UNPARSED_REGION) {
      // Couldn't parse this as a rule, leave unaltered. Hopefully it's not
      // too big..
      continue;
    }

    // TODO(morlovich): This does a lot of repeated work as the same media
    // entries are repeated for tons of rulesets.
    // TODO(morlovich): It's silly to serialize this, we should work directly
    // off AST once we have decision procedure on that.

    bool any_media_apply = r->media_queries().empty();
    for (int mediaquery_index = 0, num_mediaquery = r->media_queries().size();
         mediaquery_index < num_mediaquery; ++mediaquery_index) {
      Css::MediaQuery* mq = r->mutable_media_queries().at(mediaquery_index);
      if (css_util::CanMediaAffectScreen(mq->ToString())) {
        any_media_apply = true;
      } else {
        delete mq;
        r->mutable_media_queries()[mediaquery_index] = NULL;
      }
    }

    bool any_selectors_apply = false;
    if (any_media_apply) {
      // See which of the selectors for given declaration apply.
      // Note that in some partial parse errors we will get 0 selectors here,
      // in which case we retain things to be conservative.
      any_selectors_apply = r->selectors().empty();
      for (int selector_index = 0, num_selectors = r->selectors().size();
          selector_index < num_selectors; ++selector_index) {
        Css::Selector* s = r->mutable_selectors().at(selector_index);
        GoogleString portion_to_compare = css_util::JsDetectableSelector(*s);
        if (portion_to_compare.empty() ||
            critical_selectors_.find(portion_to_compare)
                != critical_selectors_.end()) {
          any_selectors_apply = true;
        } else {
          delete s;
          r->mutable_selectors()[selector_index] = NULL;
        }
      }
    }

    if (any_selectors_apply && any_media_apply) {
      // Just remove the irrelevant selectors & media
      Compact(&r->mutable_selectors());
      Compact(&r->mutable_media_queries());
    } else {
      // Remove the entire production
      delete r;
      stylesheet->mutable_rulesets()[ruleset_index] = NULL;
    }
  }
  Compact(&stylesheet->mutable_rulesets());

  // Serialize out the remaining subset.
  StringWriter writer(out);
  NullMessageHandler handler;
  CssMinify::Stylesheet(*stylesheet, &writer, &handler);
}

void CriticalSelectorFilter::RenderSummary(
    int pos, HtmlElement* element, HtmlCharactersNode* char_node,
    bool* is_element_deleted) {
  RememberFullCss(pos, element, char_node);

  const SummaryInfo& summary = GetSummaryForStyle(pos);
  DCHECK_EQ(kSummaryOk, summary.state);

  // If we're inlining an external CSS file, make sure to adjust the URLs
  // inside to the new base.
  const GoogleString* css_to_use = &summary.data;
  GoogleString resolved_css;
  if (summary.is_external) {
    StringWriter writer(&resolved_css);
    GoogleUrl input_css_base(summary.base);
    if (driver()->ResolveCssUrls(
            input_css_base, driver()->base_url().Spec(), summary.data,
            &writer, driver()->message_handler()) == RewriteDriver::kSuccess) {
      css_to_use = &resolved_css;
    }
  }

  // Update the DOM --- either an existing style element, or replace link
  // with style.
  if (char_node != NULL) {
    // Note: This depends upon all previous filters also mutating the contents
    // of the original Characters Node. If any previous filters replaces the
    // Characters Node with another one or makes some other change, this node
    // will be out of date and the update will not do anything.
    // TODO(sligocki): We should use a non-trivial ResourceSlot to update this
    // instead so that it is not so delicate.
    *char_node->mutable_contents() = *css_to_use;
  } else {
    HtmlElement* style_element = driver()->NewElement(NULL, HtmlName::kStyle);
    driver()->InsertNodeBeforeNode(element, style_element);

    HtmlCharactersNode* content =
        driver()->NewCharactersNode(style_element, *css_to_use);
    driver()->AppendChild(style_element, content);
    *is_element_deleted = driver()->DeleteNode(element);
    element = style_element;
  }

  // Update the media attribute to just the media that's relevant to screen.
  StringVector all_media;
  css_util::VectorizeMediaAttribute(summary.media_from_html, &all_media);

  element->DeleteAttribute(HtmlName::kMedia);
  bool drop_entire_element = false;
  if (css_to_use->empty()) {
    // Don't keep empty blocks around.
    drop_entire_element = true;
  } else if (summary.is_inside_noscript) {
    // Optimize summary version for scriptable environment, since noscript
    // environment will eagerly load the whole CSS anyway at the foot of the
    // page.
    drop_entire_element = true;
  } else if (summary.is_external &&
             CssTagScanner::IsAlternateStylesheet(summary.rel)) {
    // Likewise drop alternate stylesheets, they're non-critical.
    drop_entire_element = true;
  } else if (!all_media.empty()) {
    StringVector relevant_media;
    for (int i = 0, n = all_media.size(); i < n; ++i) {
      const GoogleString& medium = all_media[i];
      if (css_util::CanMediaAffectScreen(medium)) {
        relevant_media.push_back(medium);
      }
    }

    if (!relevant_media.empty()) {
      driver()->AddAttribute(element, HtmlName::kMedia,
                             css_util::StringifyMediaVector(relevant_media));
    } else {
      // None of the media applied to the screen, so remove the entire element.
      drop_entire_element = true;
    }
  }

  if (drop_entire_element) {
    driver()->DeleteNode(element);
  } else if (char_node == NULL) {
    const GoogleString& url = summary.location;
    if (IsCssFlushedEarly(url)) {
      ApplyCssFlushedEarly(element,
                           driver()->server_context()->hasher()->Hash(url),
                           element->AttributeValue(HtmlName::kMedia));
    } else if (driver()->flushing_early()) {
      // Add an attribute so the flush early filter can flush these
      // elements early.
      driver()->AddAttribute(element, HtmlName::kDataPagespeedFlushStyle,
                             driver()->server_context()->hasher()->Hash(url));
    }
  }

  // We've altered the CSS, so we should generate code to load the entire thing.
  // TODO(morlovich): Check if we actually dropped something?
  any_rendered_ = true;
}

void CriticalSelectorFilter::WillNotRenderSummary(
    int pos, HtmlElement* element, HtmlCharactersNode* char_node,
    bool* is_element_deleted) {
  RememberFullCss(pos, element, char_node);
}

GoogleString CriticalSelectorFilter::CacheKeySuffix() const {
  return cache_key_suffix_;
}

void CriticalSelectorFilter::StartDocumentImpl() {
  CssSummarizerBase::StartDocumentImpl();
  ServerContext* context = driver()->server_context();

  // Read critical selector info from pcache.
  critical_selectors_ =
      context->critical_selector_finder()->GetCriticalSelectors(driver());

  // Compute corresponding cache key suffix
  GoogleString all_selectors = JoinCollection(critical_selectors_, ",");
  cache_key_suffix_ = context->lock_hasher()->Hash(all_selectors);

  // Clear state between re-uses / check to make sure we wrapped up properly.
  DCHECK(css_elements_.empty());
  saw_end_document_ = false;
  any_rendered_ = false;
  is_flush_script_added_ = false;
}

void CriticalSelectorFilter::EndDocument() {
  CssSummarizerBase::EndDocument();

  saw_end_document_ = true;
}

void CriticalSelectorFilter::RenderDone() {
  CssSummarizerBase::RenderDone();

  // Only do this on very last flush window.
  if (!saw_end_document_) {
    return;
  }

  if (!css_elements_.empty() && any_rendered_ && !driver()->flushing_early()) {
    HtmlElement* noscript_element = NULL;
    Compact(&css_elements_);
    for (int i = 0, n = css_elements_.size(); i < n; ++i) {
      // Insert the full CSS, but hide all the style, link tags inside noscript
      // blocks so that look-ahead parser cannot find them; and mark the
      // portions that were visible to scripting-aware browser with
      // class = psa_add_styles.
      //
      // If the browser has scripting off, it will therefore read everything,
      // including portions of original CSS that were in noscript block.
      //
      // If the browser has scripting on, the parser will not do anything, but
      // we will add a loader script which will load things with
      // class = psa_add_styles (thus skipping over things that were originally
      // inside noscript).
      if (i == 0 || (css_elements_[i]->inside_noscript() !=
                     css_elements_[i - 1]->inside_noscript())) {
        noscript_element = driver()->NewElement(NULL, HtmlName::kNoscript);
        if (!css_elements_[i]->inside_noscript()) {
          driver()->AddAttribute(noscript_element, HtmlName::kClass,
                                 kNoscriptStylesClass);
        }
        InsertNodeAtBodyEnd(noscript_element);
      }
      css_elements_[i]->AppendTo(noscript_element);
    }

    HtmlElement* script = driver()->NewElement(NULL, HtmlName::kScript);
    driver()->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
    InsertNodeAtBodyEnd(script);
    GoogleString js =
        driver()->server_context()->static_asset_manager()->GetAsset(
            StaticAssetEnum::CRITICAL_CSS_LOADER_JS, driver()->options());
    if (!driver()->options()
             ->test_only_prioritize_critical_css_dont_apply_original_css()) {
      StrAppend(&js, "pagespeed.CriticalCssLoader.Run();");
    }
    AddJsToElement(js, script);
  }

  STLDeleteElements(&css_elements_);
}

void CriticalSelectorFilter::DetermineEnabled(GoogleString* disabled_reason) {
  // We shouldn't do anything if there is no information on critical selectors
  // in the property cache. Unfortunately, we also cannot run safely in case of
  // IE, since we do not understand IE conditional comments well enough to
  // replicate their behavior in the load-everything section.
  const StringSet& critical_selectors = driver()->server_context()
      ->critical_selector_finder()->GetCriticalSelectors(driver());
  bool ua_supports_critical_css =
      driver()->request_properties()->SupportsCriticalCss();
  bool can_run = ua_supports_critical_css && !critical_selectors.empty();
  driver()->log_record()->LogRewriterHtmlStatus(
      RewriteOptions::FilterId(RewriteOptions::kPrioritizeCriticalCss),
      (can_run ? RewriterHtmlApplication::ACTIVE
               : (ua_supports_critical_css
                      ? RewriterHtmlApplication::PROPERTY_CACHE_MISS
                      : RewriterHtmlApplication::USER_AGENT_NOT_SUPPORTED)));

  if (!can_run) {
    if (!ua_supports_critical_css) {
      *disabled_reason = "User agent not supported";
    } else {
      *disabled_reason = "No critical selector info in cache";
    }
  }

  set_is_enabled(can_run);
}

void CriticalSelectorFilter::RememberFullCss(
    int pos, HtmlElement* element, HtmlCharactersNode* char_node) {
  // Deep copy[1] into the css_elements_ array the CSS as optimized by all the
  // filters that ran before us and rendered their results, so that we can
  // emit it accurately at end, as a lazy-load sequence.
  // [1] We need a deep copy since some of the DOM data will get freed up at the
  //     end of each flush window.
  if (static_cast<size_t>(pos) >= css_elements_.size()) {
    css_elements_.resize(pos + 1);
  }
  bool noscript = GetSummaryForStyle(pos).is_inside_noscript;
  CssElement* save = NULL;
  if (char_node != NULL) {
    CssStyleElement* save_inline =
        new CssStyleElement(driver(), element, noscript);
    save_inline->AppendCharactersNode(char_node);
    save = save_inline;
  } else {
    save = new CssElement(driver(), element, noscript);
  }
  css_elements_[pos] = save;
}

bool CriticalSelectorFilter::IsCssFlushedEarly(const GoogleString& url) const {
  if (!driver()->flushed_early() ||
      !driver()->options()->enable_flush_early_critical_css() ||
      driver()->flush_early_info() == NULL) {
    return false;
  }

  // If the url is present in the DOM cohort, it is guaranteed to have
  // been flushed early.
  GoogleString escaped_url;
  HtmlKeywords::Escape(url, &escaped_url);
  // TODO(slamm): Replace with cheaper and more robust solution.
  return (driver()->flush_early_info()->resource_html().find(
      StrCat("\"", escaped_url, "\"")) != GoogleString::npos);
}

void CriticalSelectorFilter::ApplyCssFlushedEarly(
    HtmlElement* element, const GoogleString& style_id, const char* media) {
  // In this case we have already added the CSS rules to the head as
  // part of flushing early. Now, find the rule, remove the disabled tag
  // and move it here.

  // Add the JS function definition that moves and applies the flushed early
  // CSS rules, if it has not already been added.
  if (!is_flush_script_added_) {
    is_flush_script_added_ = true;
    HtmlElement* script =
        driver()->NewElement(element->parent(), HtmlName::kScript);
    // TODO(slamm): Remove this attribute and update webdriver test as needed.
    driver()->AddAttribute(script, HtmlName::kId, kMoveScriptId);
    driver()->AddAttribute(script, HtmlName::kDataPagespeedNoDefer, NULL);
    driver()->InsertNodeBeforeNode(element, script);
    AddJsToElement(kApplyFlushEarlyCss, script);
  }

  HtmlElement* script_element =
      driver()->NewElement(element->parent(), HtmlName::kScript);
  driver()->AddAttribute(script_element, HtmlName::kDataPagespeedNoDefer, NULL);
  driver()->ReplaceNode(element, script_element);

  GoogleString js_data = StringPrintf(kInvokeFlushEarlyCssTemplate,
                                      style_id.c_str(),
                                      (media != NULL ? media : ""));
  AddJsToElement(js_data, script_element);
}

}  // namespace net_instaweb
