/*
 * Copyright 2013 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: morlovich@google.com (Maksim Orlovich)

#include "net/instaweb/rewriter/public/css_summarizer_base.h"

#include <cstddef>
#include <memory>

#include "base/logging.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/common_filter.h"
#include "net/instaweb/rewriter/public/css_inline_filter.h"
#include "net/instaweb/rewriter/public/css_tag_scanner.h"
#include "net/instaweb/rewriter/public/data_url_input_resource.h"
#include "net/instaweb/rewriter/public/inline_resource_slot.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_result.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/single_rewrite_context.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/charset_util.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/thread_system.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/html/html_keywords.h"
#include "pagespeed/kernel/html/html_name.h"
#include "pagespeed/kernel/html/html_node.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/data_url.h"
#include "webutil/css/parser.h"

namespace net_instaweb {

class UrlSegmentEncoder;

// Rewrite context for CssSummarizerBase --- it invokes the filter's
// summarization functions on parsed CSS ASTs when available, and synchronizes
// them with the summaries_ table in the CssSummarizerBase.
class CssSummarizerBase::Context : public SingleRewriteContext {
 public:
  // pos denotes our position in the filters' summaries_ vector.
  Context(int pos, CssSummarizerBase* filter, RewriteDriver* driver);
  virtual ~Context();

  // Calls to finish initialization for given rewrite type; should be called
  // soon after construction.
  void SetupInlineRewrite(HtmlElement* element, HtmlCharactersNode* text);
  void SetupExternalRewrite(HtmlElement* element);

 protected:
  virtual void Render();
  virtual void WillNotRender();
  virtual void Cancel();
  virtual bool Partition(OutputPartitions* partitions,
                         OutputResourceVector* outputs);
  virtual void RewriteSingle(const ResourcePtr& input,
                             const OutputResourcePtr& output);
  virtual const char* id() const { return filter_->id(); }
  virtual OutputResourceKind kind() const { return kRewrittenResource; }
  virtual GoogleString CacheKeySuffix() const;
  virtual const UrlSegmentEncoder* encoder() const {
    return filter_->encoder();
  }

 private:
  // Reports completion of one summary (including failures).
  void ReportDone();

  int pos_;  // our position in the list of all styles in the page.
  CssSummarizerBase* filter_;

  HtmlElement* element_;
  HtmlCharactersNode* text_;

  // True if we're rewriting a <style> block, false if it's a <link>
  bool rewrite_inline_;

  DISALLOW_COPY_AND_ASSIGN(Context);
};

CssSummarizerBase::Context::Context(int pos,
                                    CssSummarizerBase* filter,
                                    RewriteDriver* driver)
    : SingleRewriteContext(driver, NULL /*parent*/, NULL /* resource_context*/),
      pos_(pos),
      filter_(filter),
      element_(NULL),
      text_(NULL),
      rewrite_inline_(false) {
}

CssSummarizerBase::Context::~Context() {
}

void CssSummarizerBase::Context::SetupInlineRewrite(HtmlElement* element,
                                                    HtmlCharactersNode* text) {
  rewrite_inline_ = true;
  element_ = element;
  text_ = text;
}

void CssSummarizerBase::Context::SetupExternalRewrite(HtmlElement* element) {
  rewrite_inline_ = false;
  element_ = element;
  text_ = NULL;
}

void CssSummarizerBase::Context::ReportDone() {
  bool should_report_all_done = false;
  {
    ScopedMutex hold(filter_->progress_lock_.get());
    --filter_->outstanding_rewrites_;
    if (filter_->saw_end_of_document_ &&
        (filter_->outstanding_rewrites_ == 0)) {
      should_report_all_done = true;
    }
  }
  if (should_report_all_done) {
    filter_->ReportSummariesDone();
  }
}

void CssSummarizerBase::Context::Render() {
  DCHECK_LE(0, pos_);
  DCHECK_LT(static_cast<size_t>(pos_), filter_->summaries_.size());
  SummaryInfo& summary_info = filter_->summaries_[pos_];
  bool is_element_deleted = false;
  if (num_output_partitions() == 0) {
    // Failed at partition -> resource fetch failed or uncacheable.
    summary_info.state = kSummaryInputUnavailable;
    filter_->WillNotRenderSummary(pos_, element_, text_, &is_element_deleted);
  } else {
    const CachedResult& result = *output_partition(0);
    // Transfer the summarization result from the metadata cache (where it was
    // stored by RewriteSingle) to the summary table;  we have to do it here
    // so it's available on a cache hit. Conveniently this will also never race
    // with the HTML thread, so the summary accessors will be safe to access
    // off parser events.
    if (result.has_inlined_data()) {
      summary_info.state = kSummaryOk;
      summary_info.data = result.inlined_data();
      // For external resources, fix up base to refer to the current URL in
      // the slot, as it may have been changed by an earlier filter.
      if (summary_info.is_external) {
        summary_info.base = slot(0)->resource()->url();
      }
      // TODO(sligocki): text_ could easily be out of date. We should use the
      // ResourceSlot to render the result.
      filter_->RenderSummary(pos_, element_, text_, &is_element_deleted);
    } else {
      summary_info.state = kSummaryCssParseError;
      filter_->WillNotRenderSummary(pos_, element_, text_, &is_element_deleted);
    }
  }
  if (is_element_deleted) {
    slot(0)->set_disable_further_processing(true);
  }
  ReportDone();
}

void CssSummarizerBase::Context::WillNotRender() {
  bool is_element_deleted = false;
  filter_->WillNotRenderSummary(pos_, element_, text_, &is_element_deleted);
  if (is_element_deleted) {
    slot(0)->set_disable_further_processing(true);
  }
}

void CssSummarizerBase::Context::Cancel() {
  ScopedMutex hold(filter_->progress_lock_.get());
  filter_->canceled_summaries_.push_back(pos_);
}

void CssSummarizerBase::Context::RewriteSingle(
    const ResourcePtr& input_resource,
    const OutputResourcePtr& output_resource) {
  StringPiece input_contents = input_resource->ExtractUncompressedContents();

  // TODO(morlovich): Should we keep track of this so it can be restored?
  StripUtf8Bom(&input_contents);

  // Load stylesheet w/o expanding background attributes and preserving as
  // much content as possible from the original document.
  Css::Parser parser(input_contents);
  parser.set_preservation_mode(true);

  // We avoid quirks-mode so that we do not "fix" something we shouldn't have.
  parser.set_quirks_mode(false);

  scoped_ptr<Css::Stylesheet> stylesheet(parser.ParseRawStylesheet());
  CachedResult* result = output_partition(0);
  if (stylesheet.get() == NULL ||
      parser.errors_seen_mask() != Css::Parser::kNoError) {
    // TODO(morlovich): do we want a stat here?
    result->clear_inlined_data();
  } else {
    filter_->Summarize(stylesheet.get(), result->mutable_inlined_data());
  }
  if (CssInlineFilter::HasClosingStyleTag(result->inlined_data())) {
    result->clear_inlined_data();
  }

  // We never produce output --- just write to the CachedResult; so we
  // technically fail.
  RewriteDone(kRewriteFailed, 0);
}

bool CssSummarizerBase::Context::Partition(OutputPartitions* partitions,
                                           OutputResourceVector* outputs) {
  if (num_slots() != 1) {
    return false;
  }
  ResourcePtr resource(slot(0)->resource());
  if (!rewrite_inline_ && !resource->IsSafeToRewrite(rewrite_uncacheable())) {
    // TODO(anupama): Shouldn't we check the closing style tag portion of
    // ShouldInline(resource) here?
    return false;
  }
  // We don't want an output resource but still want a non-trivial partition.
  // We use kOmitInputHash here as this is for content that will be inlined.
  CachedResult* partition = partitions->add_partition();
  resource->AddInputInfoToPartition(Resource::kOmitInputHash, 0, partition);
  outputs->push_back(OutputResourcePtr(NULL));
  return true;
}

GoogleString CssSummarizerBase::Context::CacheKeySuffix() const {
  return filter_->CacheKeySuffix();
}

const char CssSummarizerBase::kNumCssUsedForCriticalCssComputation[] =
    "num_css_used_for_critical_css_computation";
const char CssSummarizerBase::kNumCssNotUsedForCriticalCssComputation[] =
    "num_css_not_used_for_critical_css_computation";

CssSummarizerBase::CssSummarizerBase(RewriteDriver* driver)
    : RewriteFilter(driver),
      progress_lock_(driver->server_context()->thread_system()->NewMutex()) {
  Statistics* stats = server_context()->statistics();
  num_css_used_for_critical_css_computation_ =
      stats->GetVariable(kNumCssUsedForCriticalCssComputation);
  num_css_not_used_for_critical_css_computation_ =
      stats->GetVariable(kNumCssNotUsedForCriticalCssComputation);
  Clear();
}

CssSummarizerBase::~CssSummarizerBase() {
  Clear();
}

void CssSummarizerBase::InitStats(Statistics* statistics) {
  statistics->AddVariable(kNumCssUsedForCriticalCssComputation);
  statistics->AddVariable(kNumCssNotUsedForCriticalCssComputation);
}

GoogleString CssSummarizerBase::CacheKeySuffix() const {
  return GoogleString();
}

void CssSummarizerBase::SummariesDone() {
}

void CssSummarizerBase::RenderSummary(
    int pos, HtmlElement* element, HtmlCharactersNode* char_node,
    bool* is_element_deleted) {
}

void CssSummarizerBase::WillNotRenderSummary(
    int pos, HtmlElement* element, HtmlCharactersNode* char_node,
    bool* is_element_deleted) {
}

void CssSummarizerBase::Clear() {
  outstanding_rewrites_ = 0;
  saw_end_of_document_ = false;
  style_element_ = NULL;
  summaries_.clear();
  canceled_summaries_.clear();
}

void CssSummarizerBase::StartDocumentImpl() {
  // TODO(morlovich): we hold on to the summaries_ memory too long; refine this
  // once the data type is refined.
  DCHECK(canceled_summaries_.empty());
  Clear();
}

void CssSummarizerBase::EndDocument() {
  bool should_report_all_done = false;
  {
    ScopedMutex hold(progress_lock_.get());
    saw_end_of_document_ = true;
    if (outstanding_rewrites_ == 0) {
      // All done before it even got to us!
      should_report_all_done = true;
    }
  }

  if (should_report_all_done) {
    ReportSummariesDone();
  }
}

void CssSummarizerBase::StartElementImpl(HtmlElement* element) {
  // HtmlParse should not pass us elements inside a style element.
  CHECK(style_element_ == NULL);
  if (element->keyword() == HtmlName::kStyle &&
      element->FindAttribute(HtmlName::kScoped) == NULL) {
    style_element_ = element;
  }
  // We deal with <link> elements in EndElement.
  // We ignore scoped style elements, as they are already inlined,
  // can't safely be moved, and take precedence in cascade order
  // regardless of their position relative to non-scoped CSS.
}

void CssSummarizerBase::Characters(HtmlCharactersNode* characters_node) {
  CommonFilter::Characters(characters_node);
  if (style_element_ != NULL) {
    // Note: HtmlParse should guarantee that we only get one CharactersNode
    // per <style> block even if it is split by a flush.
    if (MustSummarize(style_element_)) {
      StartInlineRewrite(style_element_, characters_node);
    }
  }
}

void CssSummarizerBase::EndElementImpl(HtmlElement* element) {
  if (style_element_ != NULL) {
    // End of an inline style.
    CHECK_EQ(style_element_, element);  // HtmlParse should not pass unmatching.
    style_element_ = NULL;
    return;
  }
  if (element->keyword() == HtmlName::kLink) {
    // Rewrite an external style.
    StringPiece rel = element->AttributeValue(HtmlName::kRel);
    if (CssTagScanner::IsStylesheetOrAlternate(rel)) {
      HtmlElement::Attribute* element_href = element->FindAttribute(
          HtmlName::kHref);
      if (element_href != NULL) {
        // If it has a href= attribute
        if (MustSummarize(element)) {
          StartExternalRewrite(element, element_href, rel);
        }
      }
    }
  }
}

void CssSummarizerBase::RenderDone() {
  bool should_report_all_done = false;

  {
    ScopedMutex hold(progress_lock_.get());
    // Transfer from canceled_summaries_ to summaries_.
    for (int i = 0, n = canceled_summaries_.size(); i < n; ++i) {
      int pos = canceled_summaries_[i];
      summaries_[pos].state = kSummarySlotRemoved;
    }

    if (!canceled_summaries_.empty()) {
      outstanding_rewrites_ -= canceled_summaries_.size();
      if (outstanding_rewrites_ == 0) {
        should_report_all_done = saw_end_of_document_;
      }
    }
    canceled_summaries_.clear();
  }

  if (should_report_all_done) {
    ReportSummariesDone();
  }
}

void CssSummarizerBase::ReportSummariesDone() {
  if (DebugMode()) {
    GoogleString comment = "Summary computation status for ";
    StrAppend(&comment, Name(), "\n");
    for (int i = 0, n = summaries_.size(); i < n; ++i) {
      StrAppend(&comment, "Resource ", IntegerToString(i),
                " ", summaries_[i].location, ": ");
      switch (summaries_[i].state) {
        case kSummaryOk:
          StrAppend(&comment, "Computed OK\n");
          break;
        case kSummaryStillPending:
          StrAppend(&comment, "Computation still pending\n");
          break;
        case kSummaryCssParseError:
          StrAppend(&comment, "Unrecoverable CSS parse error or resource "
                              "contains closing style tag\n");
          break;
        case kSummaryResourceCreationFailed:
          StrAppend(&comment, kCreateResourceFailedDebugMsg, "\n");
          break;
        case kSummaryInputUnavailable:
          StrAppend(&comment,
                    "Fetch failed or resource not publicly cacheable\n");
          break;
        case kSummarySlotRemoved:
          StrAppend(&comment,
                    "Resource removed by another filter\n");
          break;
      }
    }
    GoogleString escaped;
    HtmlKeywords::Escape(comment, &escaped);
    InsertNodeAtBodyEnd(driver()->NewCommentNode(NULL, escaped));
  }
  for (int i = 0, n = summaries_.size(); i < n; ++i) {
    if (summaries_[i].state == kSummaryOk) {
      num_css_used_for_critical_css_computation_->Add(1);
    } else {
      num_css_not_used_for_critical_css_computation_->Add(1);
    }
  }
  SummariesDone();
}

void CssSummarizerBase::StartInlineRewrite(
    HtmlElement* style, HtmlCharactersNode* text) {
  ResourceSlotPtr slot(MakeSlotForInlineCss(text));
  Context* context =
      CreateContextAndSummaryInfo(style, false /* not external */,
                                  slot, slot->LocationString(),
                                  driver()->decoded_base(),
                                  StringPiece() /* rel, none since inline */);
  context->SetupInlineRewrite(style, text);
  driver()->InitiateRewrite(context);
}

void CssSummarizerBase::StartExternalRewrite(
    HtmlElement* link, HtmlElement::Attribute* src, StringPiece rel) {
  // Create the input resource for the slot.
  bool is_authorized;
  ResourcePtr input_resource(CreateInputResource(src->DecodedValueOrNull(),
                                                 &is_authorized));
  if (input_resource.get() == NULL) {
    // Record a failure, so the subclass knows of it.
    summaries_.push_back(SummaryInfo());
    summaries_.back().state = kSummaryResourceCreationFailed;
    const char* url = src->DecodedValueOrNull();
    summaries_.back().location = (url != NULL ? url : driver()->UrlLine());

    bool is_element_deleted = false;  // unused after call because no slot here
    WillNotRenderSummary(summaries_.size() - 1, link, NULL /* char_node */,
                         &is_element_deleted);

    // TODO(morlovich): Stat?
    if (DebugMode()) {
      if (is_authorized || url == NULL) {
        driver()->InsertComment(StrCat(
            Name(), ": ", kCreateResourceFailedDebugMsg));
      } else {
        // Do not write a debug message in this case because that has already
        // been done by the CSS rewriting filter.
      }
    }
    return;
  }
  ResourceSlotPtr slot(driver()->GetSlot(input_resource, link, src));
  Context* context = CreateContextAndSummaryInfo(
      link, true /* external*/, slot, input_resource->url() /* location*/,
      input_resource->url() /* base */, rel);
  context->SetupExternalRewrite(link);
  driver()->InitiateRewrite(context);
}

ResourceSlotPtr CssSummarizerBase::MakeSlotForInlineCss(
    HtmlCharactersNode* char_node) {
  // Create the input resource for the slot.
  GoogleString data_url;
  // TODO(morlovich): This does a lot of useless conversions and
  // copying. Get rid of them.
  DataUrl(kContentTypeCss, PLAIN, char_node->contents(), &data_url);
  ResourcePtr input_resource(DataUrlInputResource::Make(data_url, driver()));
  return ResourceSlotPtr(driver()->GetInlineSlot(input_resource, char_node));
}

CssSummarizerBase::Context* CssSummarizerBase::CreateContextAndSummaryInfo(
    const HtmlElement* element, bool external, const ResourceSlotPtr& slot,
    const GoogleString& location, StringPiece base_for_resources,
    StringPiece rel) {
  int id = summaries_.size();
  summaries_.push_back(SummaryInfo());
  SummaryInfo& new_summary = summaries_.back();
  new_summary.location = location;
  base_for_resources.CopyToString(&new_summary.base);
  const HtmlElement::Attribute* media_attribute =
        element->FindAttribute(HtmlName::kMedia);
  if (media_attribute != NULL &&
      media_attribute->DecodedValueOrNull() != NULL) {
    new_summary.media_from_html = media_attribute->DecodedValueOrNull();
  }
  rel.CopyToString(&new_summary.rel);
  new_summary.is_external = external;
  new_summary.is_inside_noscript = (noscript_element() != NULL);

  ++outstanding_rewrites_;

  Context* context = new Context(id, this, driver());
  context->AddSlot(slot);
  return context;
}

RewriteContext* CssSummarizerBase::MakeRewriteContext() {
  // We should not be registered under our id as a rewrite filter, since we
  // don't expect to answer fetches.
  LOG(DFATAL) << "CssSummarizerBase subclasses should not be registered "
                 "as handling fetches";
  return NULL;
}

}  // namespace net_instaweb
