/*
 * Copyright 2011 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: sligocki@google.com (Shawn Ligocki)

#include "pagespeed/automatic/proxy_interface.h"

#include "base/callback.h"
#include "base/logging.h"
#include "net/instaweb/config/rewrite_options_manager.h"
#include "net/instaweb/http/public/async_fetch.h"
#include "net/instaweb/http/public/log_record.h"
#include "net/instaweb/http/public/logging_proto_impl.h"
#include "net/instaweb/http/public/request_context.h"
#include "net/instaweb/http/public/request_timing_info.h"
#include "net/instaweb/rewriter/public/blink_util.h"
#include "net/instaweb/rewriter/public/experiment_matcher.h"
#include "net/instaweb/rewriter/public/resource_fetch.h"
#include "net/instaweb/rewriter/public/rewrite_driver.h"
#include "net/instaweb/rewriter/public/rewrite_options.h"
#include "net/instaweb/rewriter/public/rewrite_query.h"
#include "net/instaweb/rewriter/public/server_context.h"
#include "pagespeed/automatic/cache_html_flow.h"
#include "pagespeed/automatic/flush_early_flow.h"
#include "pagespeed/automatic/proxy_fetch.h"
#include "pagespeed/kernel/base/abstract_mutex.h"
#include "pagespeed/kernel/base/hasher.h"
#include "pagespeed/kernel/base/hostname_util.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
#include "pagespeed/kernel/base/scoped_ptr.h"
#include "pagespeed/kernel/base/statistics.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/http/content_type.h"
#include "pagespeed/kernel/http/google_url.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/kernel/http/query_params.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/http/response_headers.h"

namespace net_instaweb {

class MessageHandler;

const char ProxyInterface::kCacheHtmlRequestCount[] =
    "cache-html-requests";
namespace {

// Names for Statistics variables.
const char kTotalRequestCount[] = "all-requests";
const char kPagespeedRequestCount[] = "pagespeed-requests";
const char kRejectedRequestCount[] = "publisher-rejected-requests";
const char kRejectedRequestHtmlResponse[] = "Unable to serve "
    "content as the content is blocked by the administrator of the domain.";
const char kNoDomainConfigRequestCount[] = "without-domain-config-requests";
const char kNoDomainConfigResourceRequestCount[] =
    "without-domain-config-resource-requests";

}  // namespace

struct ProxyInterface::RequestData {
  bool is_resource_fetch;
  scoped_ptr<GoogleUrl> request_url;
  AsyncFetch* async_fetch;
  MessageHandler* handler;
};

ProxyInterface::ProxyInterface(const StringPiece& hostname, int port,
                               ServerContext* server_context,
                               Statistics* stats)
    : server_context_(server_context),
      hostname_(hostname.as_string()),
      port_(port),
      all_requests_(stats->GetTimedVariable(kTotalRequestCount)),
      pagespeed_requests_(stats->GetTimedVariable(kPagespeedRequestCount)),
      cache_html_flow_requests_(
          stats->GetTimedVariable(kCacheHtmlRequestCount)),
      rejected_requests_(stats->GetTimedVariable(kRejectedRequestCount)),
      requests_without_domain_config_(
          stats->GetTimedVariable(kNoDomainConfigRequestCount)),
      resource_requests_without_domain_config_(
          stats->GetTimedVariable(kNoDomainConfigResourceRequestCount)) {
  proxy_fetch_factory_.reset(new ProxyFetchFactory(server_context));
}

ProxyInterface::~ProxyInterface() {
}

void ProxyInterface::InitStats(Statistics* statistics) {
  statistics->AddTimedVariable(kTotalRequestCount,
                               ServerContext::kStatisticsGroup);
  statistics->AddTimedVariable(kPagespeedRequestCount,
                               ServerContext::kStatisticsGroup);
  statistics->AddTimedVariable(kCacheHtmlRequestCount,
                               ServerContext::kStatisticsGroup);
  statistics->AddTimedVariable(kRejectedRequestCount,
                               ServerContext::kStatisticsGroup);
  statistics->AddTimedVariable(kRejectedRequestCount,
                               ServerContext::kStatisticsGroup);
  statistics->AddTimedVariable(kNoDomainConfigRequestCount,
                               ServerContext::kStatisticsGroup);
  statistics->AddTimedVariable(kNoDomainConfigResourceRequestCount,
                               ServerContext::kStatisticsGroup);
  CacheHtmlFlow::InitStats(statistics);
  FlushEarlyFlow::InitStats(statistics);
}

bool ProxyInterface::IsWellFormedUrl(const GoogleUrl& url) {
  bool ret = false;
  if (url.IsWebValid()) {
    if (url.has_path()) {
      StringPiece path = url.PathAndLeaf();
      GoogleString filename = url.ExtractFileName();
      int path_len = path.size() - filename.size();
      if (path_len >= 0) {
        ret = true;
      }
    } else {
      LOG(ERROR) << "URL has no path: " << url.Spec();
    }
  }
  return ret;
}

bool ProxyInterface::UrlAndPortMatchThisServer(const GoogleUrl& url) {
  bool ret = false;
  if (url.IsWebValid() && (url.EffectiveIntPort() == port_)) {
    // TODO(atulvasu): This should support matching the actual host this
    // machine can receive requests from. Ideally some flag control would
    // help. For example this server could be running multiple virtual
    // servers, and we would like to know what server we are catering to for
    // pagespeed only queries.
    //
    // Allow for exact hostname matches, as well as a URL typed into the
    // browser window like "exeda.cam", which should match
    // "exeda.cam.corp.google.com".
    StringPiece host = url.Host();
    if (IsLocalhost(host, hostname_) ||
        StringPiece(hostname_).starts_with(StrCat(host, "."))) {
      ret = true;
    }
  }
  return ret;
}

void ProxyInterface::Fetch(const GoogleString& requested_url_string,
                           MessageHandler* handler,
                           AsyncFetch* async_fetch) {
  GoogleUrl requested_url(requested_url_string);
  const bool is_get_or_head =
      (async_fetch->request_headers()->method() == RequestHeaders::kGet) ||
      (async_fetch->request_headers()->method() == RequestHeaders::kHead);

  all_requests_->IncBy(1);
  if (!(requested_url.IsWebValid() && IsWellFormedUrl(requested_url))) {
    LOG(WARNING) << "Bad URL, failing request: " << requested_url_string;
    async_fetch->response_headers()->SetStatusAndReason(HttpStatus::kNotFound);
    async_fetch->Done(false);
  } else {
    // Try to handle this as a .pagespeed. resource.
    if (is_get_or_head && server_context_->IsPagespeedResource(requested_url)) {
      pagespeed_requests_->IncBy(1);
      LOG(INFO) << "Serving URL as pagespeed resource: "
                << requested_url.Spec();
      ProxyRequest(true, requested_url, async_fetch, handler);
    } else if (UrlAndPortMatchThisServer(requested_url)) {
      // Just respond with a 404 for now.
      async_fetch->response_headers()->SetStatusAndReason(
          HttpStatus::kNotFound);
      LOG(INFO) << "Returning 404 for URL: " << requested_url.Spec();
      async_fetch->Done(false);
    } else {
      // Otherwise we proxy it (rewriting if it is HTML).
      LOG(INFO) << "Proxying URL normally: " << requested_url.Spec();
      ProxyRequest(false, requested_url, async_fetch, handler);
    }
  }
}

void ProxyInterface::ProxyRequest(bool is_resource_fetch,
                                  const GoogleUrl& request_url,
                                  AsyncFetch* async_fetch,
                                  MessageHandler* handler) {
  RequestData* request_data = new RequestData;
  request_data->is_resource_fetch = is_resource_fetch;
  request_data->request_url.reset(new GoogleUrl);
  request_data->request_url->Reset(request_url);
  request_data->async_fetch = async_fetch;
  request_data->handler = handler;

  server_context_->rewrite_options_manager()->GetRewriteOptions(
      request_url,
      *async_fetch->request_headers(),
      NewCallback(this, &ProxyInterface::GetRewriteOptionsDone, request_data));
}

ProxyFetchPropertyCallbackCollector*
    ProxyInterface::InitiatePropertyCacheLookup(
    bool is_resource_fetch,
    const GoogleUrl& request_url,
    RewriteOptions* options,
    AsyncFetch* async_fetch,
    const bool requires_blink_cohort) {
  return ProxyFetchFactory::InitiatePropertyCacheLookup(
      is_resource_fetch, request_url, server_context_, options, async_fetch,
      requires_blink_cohort);
}

void ProxyInterface::GetRewriteOptionsDone(RequestData* request_data,
                                           RewriteOptions* domain_options) {
  scoped_ptr<RequestData> request_data_deleter(request_data);
  scoped_ptr<RewriteOptions> scoped_domain_options(domain_options);
  bool is_resource_fetch = request_data->is_resource_fetch;
  GoogleUrl* request_url = request_data->request_url.get();
  AsyncFetch* async_fetch = request_data->async_fetch;
  MessageHandler* handler = request_data->handler;

  if (domain_options == NULL) {
    requests_without_domain_config_->IncBy(1);
    if (is_resource_fetch) {
      resource_requests_without_domain_config_->IncBy(1);
    }
  }

  // Parse the query options, headers, and cookies.
  RewriteQuery query;
  if (!server_context_->GetQueryOptions(async_fetch->request_context(),
                                        domain_options, request_url,
                                        async_fetch->request_headers(),
                                        NULL /* response_headers */, &query)) {
    async_fetch->response_headers()->SetStatusAndReason(
        HttpStatus::kMethodNotAllowed);
    async_fetch->Write("Invalid PageSpeed query-params/request headers",
                       handler);
    async_fetch->Done(false);
    return;
  }

  RewriteOptions* options = server_context_->GetCustomOptions(
      async_fetch->request_headers(), scoped_domain_options.release(),
      query.ReleaseOptions());
  GoogleString url_string;
  request_url->Spec().CopyToString(&url_string);
  RequestHeaders* request_headers = async_fetch->request_headers();
  if (options != NULL &&
      options->IsRequestDeclined(url_string, request_headers)) {
    rejected_requests_->IncBy(1);
    ResponseHeaders* response_headers = async_fetch->response_headers();
    response_headers->SetStatusAndReason(HttpStatus::kProxyDeclinedRequest);
    response_headers->Replace(HttpAttributes::kContentType,
                              kContentTypeText.mime_type());
    response_headers->Replace(HttpAttributes::kCacheControl,
                              "private, max-age=0");
    async_fetch->Write(kRejectedRequestHtmlResponse, handler);
    async_fetch->Done(false);
    delete options;
    return;
  }
  if (ServerContext::ScanSplitHtmlRequest(
      async_fetch->request_context(), options, &url_string)) {
    request_url->Reset(url_string);
  }

  // Update request_headers.
  // We deal with encodings. So strip the users Accept-Encoding headers.
  async_fetch->request_headers()->RemoveAll(HttpAttributes::kAcceptEncoding);
  // Note: We preserve the User-Agent and Cookies so that the origin servers
  // send us the correct HTML. We will need to consider this for caching HTML.

  async_fetch->request_context()->mutable_timing_info()->ProcessingStarted();

  AbstractLogRecord* log_record =  async_fetch->request_context()->log_record();
  {
    ScopedMutex lock(log_record->mutex());
    log_record->logging_info()->set_is_pagespeed_resource(is_resource_fetch);
  }

  // Start fetch and rewrite.  If GetCustomOptions found options for us,
  // the RewriteDriver created by StartNewProxyFetch will take ownership.
  if (is_resource_fetch) {
    // TODO(sligocki): Set using_spdy appropriately.
    bool using_spdy = false;
    // TODO(pulkitg): Set is_original_resource_cacheable to false if pagespeed
    // resource is not cacheable.
    const RewriteOptions* these_options =
        (options == NULL ? server_context_->global_options() : options);
    // TODO(sligocki): Should we be setting default options and then overriding
    // here? It seems like it would be better to only set once, but that
    // involves a lot of complicated code changes.
    async_fetch->request_context()->ResetOptions(
        these_options->ComputeHttpOptions());
    ResourceFetch::Start(*request_url, options, using_spdy,
                         server_context_, async_fetch);
  } else {
    // TODO(nforman): If we are not running an experiment, remove the
    // experiment cookie.
    // If we don't already have custom options, and the global options say we're
    // running an experiment, then clone them into custom_options so we can
    // manipulate custom options without affecting the global options.
    if (options == NULL) {
      RewriteOptions* global_options = server_context_->global_options();
      if (global_options->running_experiment()) {
        options = global_options->Clone();
      }
    }
    // TODO(anupama): Adapt the experiment logic below for the FlushEarlyFlow as
    // well.
    bool need_to_store_experiment_data = false;
    if (options != NULL && options->running_experiment()) {
      need_to_store_experiment_data =
          server_context_->experiment_matcher()->ClassifyIntoExperiment(
              *async_fetch->request_headers(),
              *server_context_->user_agent_matcher(), options);
      options->set_need_to_store_experiment_data(need_to_store_experiment_data);
    }
    const char* user_agent = async_fetch->request_headers()->Lookup1(
        HttpAttributes::kUserAgent);

    // Whether it's a cache html request should not change despite the fact
    // a new driver is created later on.
    const bool is_cache_html_request = BlinkUtil::IsBlinkRequest(
        *request_url, async_fetch, options,
        user_agent, server_context_,
        RewriteOptions::kCachePartialHtml);

    ProxyFetchPropertyCallbackCollector* property_callback = NULL;

    if (options == NULL ||
        (options->enabled() && options->IsAllowed(request_url->Spec()))) {
      // Ownership of "property_callback" is eventually assumed by either
      // CacheHtmlFlow or ProxyFetch.
      property_callback = InitiatePropertyCacheLookup(is_resource_fetch,
                                                      *request_url,
                                                      options,
                                                      async_fetch,
                                                      is_cache_html_request);
    }

    if (options != NULL) {
      server_context_->ComputeSignature(options);
      {
        ScopedMutex lock(log_record->mutex());
        log_record->logging_info()->set_options_signature_hash(
            server_context_->contents_hasher()->HashToUint64(
                options->signature()));
      }
    }

    RewriteDriver* driver = NULL;
    RequestContextPtr request_ctx = async_fetch->request_context();
    DCHECK(request_ctx.get() != NULL) << "Async fetch must have a request"
                                      << "context but does not.";
    if (options == NULL) {
      driver = server_context_->NewRewriteDriver(request_ctx);
    } else {
      // NewCustomRewriteDriver takes ownership of custom_options_.
      driver = server_context_->NewCustomRewriteDriver(options, request_ctx);
    }
    // TODO(sligocki): Should we be setting default options and then overriding
    // here? It seems like it would be better to only set once, but that
    // involves a lot of complicated code changes.
    request_ctx->ResetOptions(driver->options()->ComputeHttpOptions());
    driver->SetRequestHeaders(*async_fetch->request_headers());
    // TODO(mmohabey): Factor out the below checks so that they are not
    // repeated in BlinkUtil::IsBlinkRequest().

    // Copy over any PageSpeed query parameters so we can re-add them if we
    // receive a redirection response to our fetch request.
    driver->set_pagespeed_query_params(
        query.pagespeed_query_params().ToEscapedString());
    // Copy over any PageSpeed cookies so we know which ones to clear in
    // ProxyFetch::HandleHeadersComplete().
    driver->set_pagespeed_option_cookies(
        query.pagespeed_option_cookies().ToEscapedString());

    if (driver->options() != NULL && driver->options()->enabled() &&
        property_callback != NULL &&
        driver->options()->IsAllowed(url_string)) {
      if (is_cache_html_request) {
        cache_html_flow_requests_->IncBy(1);
        CacheHtmlFlow::Start(url_string,
                             async_fetch,
                             driver,
                             proxy_fetch_factory_.get(),
                             // Takes ownership of property_callback.
                             property_callback);

        return;
      }
      // NOTE: The FlushEarly flow will run in parallel with the ProxyFetch,
      // but will not begin (FlushEarlyFlwow::FlushEarly) until the
      // PropertyCache lookup has completed.
      // Also it does NOT take ownership of property_callback.
      // FlushEarlyFlow might not start if the request is not GET or if the
      // useragent is unsupported etc.
      FlushEarlyFlow::TryStart(url_string, &async_fetch, driver,
                               proxy_fetch_factory_.get(),
                               property_callback);
    }
    // Takes ownership of property_callback.
    proxy_fetch_factory_->StartNewProxyFetch(
        url_string, async_fetch, driver, property_callback, NULL);
  }
}

}  // namespace net_instaweb
