/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: jmarantz@google.com (Joshua Marantz)

#include "net/instaweb/rewriter/public/cache_extender.h"

#include <memory>

#include "base/logging.h"
#include "net/instaweb/http/public/http_cache.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/rewriter/cached_result.pb.h"
#include "net/instaweb/rewriter/public/domain_lawyer.h"
#include "net/instaweb/rewriter/public/javascript_code_block.h"
#include "net/instaweb/rewriter/public/output_resource.h"
#include "net/instaweb/rewriter/public/output_resource_kind.h"
#include "net/instaweb/rewriter/public/resource.h"
#include "net/instaweb/rewriter/public/resource_slot.h"
#include "net/instaweb/rewriter/public/resource_tag_scanner.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "net/instaweb/rewriter/public/single_rewrite_context.h"
#include "net/instaweb/rewriter/public/url_namer.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/base/string_writer.h"
#include "pagespeed/kernel/base/timer.h"
#include "pagespeed/kernel/html/html_element.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/response_headers.h"
#include "pagespeed/kernel/http/semantic_type.h"
#include "pagespeed/opt/logging/enums.pb.h"

namespace net_instaweb {
class MessageHandler;
class RewriteContext;

// names for Statistics variables.
const char CacheExtender::kCacheExtensions[] = "cache_extensions";
const char CacheExtender::kNotCacheable[] = "not_cacheable";

// We do not want to bother to extend the cache lifetime for any resource
// that is already cached for a month.
const int64 kMinThresholdMs = Timer::kMonthMs;

class CacheExtender::Context : public SingleRewriteContext {
 public:
  Context(CacheExtender* extender, RewriteDriver* driver,
          RewriteContext* parent)
      : SingleRewriteContext(driver, parent,
                             NULL /* no resource context */),
        extender_(extender) {}
  virtual ~Context() {}

  virtual void Render();
  virtual void RewriteSingle(const ResourcePtr& input,
                             const OutputResourcePtr& output);
  virtual const char* id() const { return extender_->id(); }
  virtual OutputResourceKind kind() const { return kOnTheFlyResource; }

 private:
  CacheExtender* extender_;
  DISALLOW_COPY_AND_ASSIGN(Context);
};

CacheExtender::CacheExtender(RewriteDriver* driver)
    : RewriteFilter(driver) {
  Statistics* stats = server_context()->statistics();
  extension_count_ = stats->GetVariable(kCacheExtensions);
  not_cacheable_count_ = stats->GetVariable(kNotCacheable);
}

CacheExtender::~CacheExtender() {}

void CacheExtender::InitStats(Statistics* statistics) {
  statistics->AddVariable(kCacheExtensions);
  statistics->AddVariable(kNotCacheable);
}

bool CacheExtender::ShouldRewriteResource(
    const ResponseHeaders* headers, int64 now_ms,
    const ResourcePtr& input_resource, const StringPiece& url,
    CachedResult* result) const {
  const ContentType* input_resource_type = input_resource->type();
  if (input_resource_type == NULL) {
    return false;
  }
  if (input_resource_type->type() == ContentType::kJavascript &&
      driver()->options()->avoid_renaming_introspective_javascript() &&
      JavascriptCodeBlock::UnsafeToRename(
          input_resource->ExtractUncompressedContents())) {
    CHECK(result != NULL);
    result->add_debug_message(JavascriptCodeBlock::kIntrospectionComment);
    return false;
  }
  if ((headers->CacheExpirationTimeMs() - now_ms) < kMinThresholdMs) {
    // This also includes the case where a previous filter rewrote this.
    return true;
  }
  UrlNamer* url_namer = driver()->server_context()->url_namer();
  GoogleUrl origin_gurl(url);

  // We won't initiate a CacheExtender::Context with a pagespeed
  // resource URL.  However, an upstream filter might have rewritten
  // the resource after we queued the request, but before our
  // context is asked to rewrite it.  So we have to check again now
  // that the resource URL is finalized.
  if (server_context()->IsPagespeedResource(origin_gurl)) {
    return false;
  }

  if (url_namer->ProxyMode()) {
    return !url_namer->IsProxyEncoded(origin_gurl);
  }
  const DomainLawyer* lawyer = driver()->options()->domain_lawyer();

  // We return true for IsProxyMapped because when reconstructing
  // MAPPED_DOMAIN/file.pagespeed.ce.HASH.ext we won't be changing
  // the domain (WillDomainChange==false) but we want this function
  // to return true so that we can reconstruct the cache-extension and
  // serve the result with long public caching.  Without IsProxyMapped,
  // we'd serve the result with cache-control:private,max-age=300.
  return (lawyer->IsProxyMapped(origin_gurl) ||
          lawyer->WillDomainChange(origin_gurl));
}

void CacheExtender::StartElementImpl(HtmlElement* element) {
  resource_tag_scanner::UrlCategoryVector attributes;
  resource_tag_scanner::ScanElement(element, driver()->options(), &attributes);
  for (int i = 0, n = attributes.size(); i < n; ++i) {
    bool may_load = false;
    switch (attributes[i].category) {
      case semantic_type::kStylesheet:
        may_load = driver()->MayCacheExtendCss();
        break;
      case semantic_type::kImage:
        may_load = driver()->MayCacheExtendImages();
        break;
      case semantic_type::kScript:
        may_load = driver()->MayCacheExtendScripts();
        break;
      default:
        // Does the url in the attribute end in .pdf, ignoring query params?
        if (attributes[i].url->DecodedValueOrNull() != NULL
            && driver()->MayCacheExtendPdfs()) {
        GoogleUrl url(driver()->base_url(),
                      attributes[i].url->DecodedValueOrNull());
        if (url.IsWebValid() && StringCaseEndsWith(
                url.LeafSansQuery(), kContentTypePdf.file_extension())) {
          may_load = true;
        }
      }
      break;
    }
    if (!may_load) {
      continue;
    }

    // TODO(jmarantz): We ought to be able to domain-shard even if the
    // resources are non-cacheable or privately cacheable.
    if (driver()->IsRewritable(element)) {
      ResourcePtr input_resource(CreateInputResourceOrInsertDebugComment(
          attributes[i].url->DecodedValueOrNull(), element));
      if (input_resource.get() == NULL) {
        continue;
      }

      GoogleUrl input_gurl(input_resource->url());
      if (server_context()->IsPagespeedResource(input_gurl)) {
        continue;
      }

      ResourceSlotPtr slot(driver()->GetSlot(
          input_resource, element, attributes[i].url));
      Context* context = new Context(this, driver(), NULL /* not nested */);
      context->AddSlot(slot);
      driver()->InitiateRewrite(context);
    }
  }
}

bool CacheExtender::ComputeOnTheFly() const {
  return true;
}

void CacheExtender::Context::RewriteSingle(
    const ResourcePtr& input_resource,
    const OutputResourcePtr& output_resource) {
  // We only add link: rel = canonical to images and PDF; people don't normally
  // use search engines to look for .css and .js files, so adding it
  // there would just be a waste of bytes.
  if (input_resource->type() != NULL &&
      (input_resource->type()->IsImage() ||
       input_resource->type()->type() == ContentType::kPdf)) {
    AddLinkRelCanonical(input_resource, output_resource);
  }
  RewriteDone(
      extender_->RewriteLoadedResource(
          input_resource, output_resource, output_partition(0)), 0);
}

void CacheExtender::Context::Render() {
  if (num_output_partitions() == 1 && output_partition(0)->optimizable()) {
    extender_->extension_count_->Add(1);
    // Log applied rewriter id. Here, we care only about non-nested
    // cache extensions, and that too, those occurring in synchronous
    // flows only.
    if (Driver() != NULL) {
      ResourceSlotPtr the_slot = slot(0);
      if (the_slot->resource().get() != NULL &&
          the_slot->resource()->type() != NULL) {
        const char* filter_id = id();
        const ContentType* type = the_slot->resource()->type();
        if (type->type() == ContentType::kCss) {
          filter_id = RewriteOptions::FilterId(
              RewriteOptions::kExtendCacheCss);
        } else if (type->type() == ContentType::kJavascript) {
          filter_id = RewriteOptions::FilterId(
              RewriteOptions::kExtendCacheScripts);
        } else if (type->IsImage()) {
          filter_id = RewriteOptions::FilterId(
              RewriteOptions::kExtendCacheImages);
        }
        // TODO(anupama): Log cache extension for pdfs etc.
        Driver()->log_record()->SetRewriterLoggingStatus(
            filter_id,
            the_slot->resource()->url(),
            RewriterApplication::APPLIED_OK);
      }
    }
  }
}

RewriteResult CacheExtender::RewriteLoadedResource(
    const ResourcePtr& input_resource,
    const OutputResourcePtr& output_resource,
    // TODO(jmaessen): does this belong in CacheExtender::Context? to this
    // method and ShouldRewriteResource.
    CachedResult* result) {
  CHECK(input_resource->loaded());

  MessageHandler* message_handler = driver()->message_handler();
  const ResponseHeaders* headers = input_resource->response_headers();
  GoogleString url = input_resource->url();
  int64 now_ms = server_context()->timer()->NowMs();

  // See if the resource is cacheable; and if so whether there is any need
  // to cache extend it.
  bool ok = false;
  const ContentType* output_type = NULL;
  if (!server_context()->http_cache()->force_caching() &&
      !headers->IsProxyCacheable()) {
    // Note: RewriteContextTest.PreserveNoCacheWithFailedRewrites
    // relies on CacheExtender failing rewrites in this case.
    // If you change this behavior that test MUST be updated as it covers
    // security.
    not_cacheable_count_->Add(1);
  } else if (ShouldRewriteResource(
                 headers, now_ms, input_resource,url, result)) {
    // We must be careful what Content-Types we allow to be cache extended.
    // Specifically, we do not want to cache extend any Content-Types that
    // could execute scripts when loaded in a browser because that could
    // open XSS vectors in case of system misconfiguration.
    //
    // We whitelist a set of safe Content-Types here.
    //
    // TODO(sligocki): Should we whitelist more Content-Types as well?
    // We would also have to find and rewrite the URLs to these resources
    // if we want to cache extend them.
    const ContentType* input_type = input_resource->type();
    if (input_type->IsImage() ||  // images get sniffed only to other images
        (input_type->type() == ContentType::kPdf &&
         driver()->MayCacheExtendPdfs()) ||  // Don't accept PDFs by default.
        input_type->type() == ContentType::kCss ||  // CSS + JS left as-is.
        input_type->type() == ContentType::kJavascript) {
      output_type = input_type;
      ok = true;
    } else {
      // Fail to cache extend a file that isn't an approved type.
      ok = false;

      // If we decide not to fail to cache extend unapproved types, we
      // should convert their Content-Type to text/plain because as per
      // http://mimesniff.spec.whatwg.org/ it will never get turned into
      // anything dangerous.
      output_type = &kContentTypeText;
    }
  }

  if (!ok) {
    return kRewriteFailed;
  }

  StringPiece contents(input_resource->ExtractUncompressedContents());
  GoogleString transformed_contents;
  StringWriter writer(&transformed_contents);
  GoogleUrl input_resource_gurl(input_resource->url());
  if (output_type->type() == ContentType::kCss) {
    switch (driver()->ResolveCssUrls(input_resource_gurl,
                                     output_resource->resolved_base(),
                                     contents, &writer, message_handler)) {
      case RewriteDriver::kNoResolutionNeeded:
        break;
      case RewriteDriver::kWriteFailed:
        return kRewriteFailed;
      case RewriteDriver::kSuccess:
        // TODO(jmarantz): find a mechanism to write this directly into
        // the HTTPValue so we can reduce the number of times that we
        // copy entire resources.
        contents = transformed_contents;
        break;
    }
  }

  server_context()->MergeNonCachingResponseHeaders(
      input_resource, output_resource);
  if (driver()->Write(ResourceVector(1, input_resource),
                      contents,
                      output_type,
                      input_resource->charset(),
                      output_resource.get())) {
    return kRewriteOk;
  } else {
    return kRewriteFailed;
  }
}

RewriteContext* CacheExtender::MakeRewriteContext() {
  return new Context(this, driver(), NULL /*not nested*/);
}

RewriteContext* CacheExtender::MakeNestedContext(
    RewriteContext* parent, const ResourceSlotPtr& slot) {
  Context* context = new Context(this, NULL /* driver*/, parent);
  context->AddSlot(slot);
  return context;
}

}  // namespace net_instaweb
