/*
 * Copyright 2010 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

// Author: jmarantz@google.com (Joshua Marantz)

#ifndef NET_INSTAWEB_HTTP_PUBLIC_HTTP_CACHE_H_
#define NET_INSTAWEB_HTTP_PUBLIC_HTTP_CACHE_H_

#include "base/logging.h"
#include "net/instaweb/http/public/http_cache_failure.h"
#include "net/instaweb/http/public/http_value.h"
#include "net/instaweb/http/public/request_context.h"
#include "pagespeed/kernel/base/atomic_bool.h"
#include "pagespeed/kernel/base/basictypes.h"
#include "pagespeed/kernel/base/gtest_prod.h"
#include "pagespeed/kernel/base/ref_counted_ptr.h"
#include "pagespeed/kernel/base/string.h"
#include "pagespeed/kernel/base/string_util.h"
#include "pagespeed/kernel/cache/cache_interface.h"
#include "pagespeed/kernel/http/http_names.h"
#include "pagespeed/kernel/http/http_options.h"
#include "pagespeed/kernel/http/request_headers.h"
#include "pagespeed/kernel/http/response_headers.h"

namespace net_instaweb {

class Hasher;
class MessageHandler;
class Statistics;
class Timer;
class Variable;

// Implements HTTP caching semantics, including cache expiration and
// retention of the originally served cache headers.
class HTTPCache {
 public:
  // Names of statistics variables: exported for tests.
  static const char kCacheTimeUs[];
  static const char kCacheHits[];
  static const char kCacheMisses[];
  static const char kCacheBackendHits[];
  static const char kCacheBackendMisses[];
  static const char kCacheFallbacks[];
  static const char kCacheExpirations[];
  static const char kCacheInserts[];
  static const char kCacheDeletes[];

  // The prefix used for Etags.
  static const char kEtagPrefix[];

  // Function to format etags.
  static GoogleString FormatEtag(StringPiece hash);

  // Does not take ownership of any inputs.
  HTTPCache(CacheInterface* cache, Timer* timer, Hasher* hasher,
            Statistics* stats);
  ~HTTPCache();

  enum FindResultClassification {
    kFound,
    kNotFound,
    kRecentFailure,
  };

  // When a lookup is done in the HTTP Cache, it returns one of these values.
  struct FindResult {
    FindResult()
        : status(kNotFound), failure_details(kFetchStatusNotSet) {}

    FindResult(FindResultClassification in_status,
               FetchResponseStatus in_failure_details)
        : status(in_status), failure_details(in_failure_details) {}

    bool operator==(const FindResult& other) const {
      return status == other.status &&
             failure_details == other.failure_details;
    }

    bool operator!=(const FindResult& other) const {
      return !(*this == other);
    }

    FindResultClassification status;

    // This should be kFetchStatusNotSet if status is kNotFound.
    // This should be OK if status is kFound.
    // This should be one of the other values of FetchResponseStatus if
    // status is kRecentFailure, describing exactly the kind of failure that
    // got remembered.
    FetchResponseStatus failure_details;
  };

  void set_hasher(Hasher* hasher) { hasher_ = hasher; }

  // Class to handle an asynchronous cache lookup response.
  //
  // TODO(jmarantz): consider inheriting from AsyncFetch with an implementation
  // of Write/Flush/HeadersComplete -- we'd have to make Done take true/false so
  // this would impact callers.
  class Callback {
   public:
    // The 1-arg constructor does not learn anything about the
    // request, and thus pessimistically will assume it has cookies,
    // invalidating any response that has vary:cookie.  However it
    // will optimistically assume there is no authorization
    // requirement.  So a request-aware call to
    // ResponseHeaders::IsProxyCacheable (e.g. via the 2-arg Callback
    // constructor) must be applied when needed.
    explicit Callback(const RequestContextPtr& request_ctx)
        : response_headers_(NULL),
          owns_response_headers_(false),
          request_ctx_(request_ctx),
          cache_level_(0),
          is_background_(false) {
    }

    // The 2-arg constructor can be used in situations where we are confident
    // that the cookies and authorization in the request-headers are valid.
    Callback(const RequestContextPtr& request_ctx,
             RequestHeaders::Properties req_properties)
        : response_headers_(NULL),
          req_properties_(req_properties),
          owns_response_headers_(false),
          request_ctx_(request_ctx),
          cache_level_(0),
          is_background_(false) {
    }

    virtual ~Callback();
    virtual void Done(FindResult find_result) = 0;
    // A method that allows client Callbacks to apply invalidation checks.  We
    // first (in http_cache.cc) check whether the entry is expired using normal
    // http semantics, and if it is not expired, then this check is called --
    // thus callbacks can apply any further invalidation semantics it wants on
    // otherwise valid entries. But there's no way for a callback to override
    // when the HTTP semantics say the entry is expired.
    //
    // See also OptionsAwareHTTPCacheCallback in rewrite_driver.h for an
    // implementation you probably want to use.
    virtual bool IsCacheValid(const GoogleString& key,
                              const ResponseHeaders& headers) {
      return true;
    }

    // A method that allows client Callbacks to check if the response in cache
    // is fresh enough, in addition to it being valid.  This is used while
    // freshening  resources to check that the response in cache is not only
    // valid, but is also not going to expire anytime soon.
    // Note that if the response in cache is valid but not fresh, the HTTPCache
    // calls Callback::Done with find_result = kNotFound and fills in
    // fallback_http_value() with the cached response.
    virtual bool IsFresh(const ResponseHeaders& headers) { return true; }

    // Overrides the cache ttl of the cached response with the given value. Note
    // that this has no effect if the returned value is negative or less than
    // the cache ttl of the stored value.
    virtual int64 OverrideCacheTtlMs(const GoogleString& key) { return -1; }

    // Called upon completion of a cache lookup trigged by HTTPCache::Find by
    // the HTTPCache code with the latency in milliseconds.  Will invoke
    // ReportLatencyMsImpl for non-background fetches in order for system
    // implementations, like RequestTimingInfo, to record the cache
    // latency. Can be called multiple times for various levels of cache.
    void ReportLatencyMs(int64 latency_ms);

    // Determines whether this Get request was made in the context where
    // arbitrary Vary headers should be respected.
    //
    // Note that Vary:Accept-Encoding is ignored at this level independent
    // of this setting, and Vary:Cookie is always respected independent of
    // this setting.  Vary:Cookie prevents cacheing resources.  For HTML,
    // however, we can cache Vary:Cookie responses as long as there is
    // no cookie in the request.
    virtual ResponseHeaders::VaryOption RespectVaryOnResources() const = 0;

    // TODO(jmarantz): specify the dataflow between http_value and
    // response_headers.
    HTTPValue* http_value() { return &http_value_; }
    ResponseHeaders* response_headers() {
      if (response_headers_ == NULL) {
        response_headers_ = new ResponseHeaders(request_ctx_->options());
        owns_response_headers_ = true;
      }
      return response_headers_;
    }
    const ResponseHeaders* response_headers() const {
      return const_cast<Callback*>(this)->response_headers();
    }
    void set_response_headers(ResponseHeaders* headers) {
      DCHECK(!owns_response_headers_);
      if (owns_response_headers_) {
        delete response_headers_;
      }
      response_headers_ = headers;
      owns_response_headers_ = false;
    }
    HTTPValue* fallback_http_value() { return &fallback_http_value_; }

    const RequestContextPtr& request_context() { return request_ctx_; }
    void set_is_background(bool is_background) {
      is_background_ = is_background;
    }

    RequestHeaders::Properties req_properties() const {
      return req_properties_;
    }

   private:
    HTTPValue http_value_;
    // Stale value that can be used in case a fetch fails. Note that Find()
    // may fill in a stale value here but it will still return kNotFound.
    HTTPValue fallback_http_value_;
    ResponseHeaders* response_headers_;
    RequestHeaders::Properties req_properties_;
    bool owns_response_headers_;
    RequestContextPtr request_ctx_;
    int cache_level_;
    bool is_background_;

    DISALLOW_COPY_AND_ASSIGN(Callback);
  };

  // Makes the cache ignore put requests that do not record successes.
  void SetIgnoreFailurePuts();

  // Non-blocking Find.  Calls callback when done.  'handler' must all
  // stay valid until callback->Done() is called.
  void Find(const GoogleString& key,
                    const GoogleString& fragment,
                    MessageHandler* handler,
                    Callback* callback);

  // Note that Put takes a non-const pointer for HTTPValue so it can
  // bump the reference count.
  void Put(const GoogleString& key,
           const GoogleString& fragment,
           RequestHeaders::Properties req_properties,
           const HttpOptions& http_options,
           HTTPValue* value,
           MessageHandler* handler);

  // Note that Put takes a non-const pointer for ResponseHeaders* so it
  // can update the caching fields prior to storing.
  // If you call this method, you must be certain that the outgoing
  // request was not sent with Authorization:.
  void Put(const GoogleString& key,
           const GoogleString& fragment,
           RequestHeaders::Properties req_properties,
           // TODO(sligocki): Remove this arg and use headers->http_options().
           ResponseHeaders::VaryOption respect_vary_on_resources,
           ResponseHeaders* headers,
           const StringPiece& content, MessageHandler* handler);

  // Deletes an element in the cache.
  void Delete(const GoogleString& key, const GoogleString& fragment);

  void set_force_caching(bool force) { force_caching_ = force; }
  bool force_caching() const { return force_caching_; }
  void set_disable_html_caching_on_https(bool x) {
    disable_html_caching_on_https_ = x;
  }
  Timer* timer() const { return timer_; }
  CacheInterface* cache() { return cache_; }

  // Tell the HTTP Cache to remember that a fetch for particular key
  // failed for some reason (such an error, or being uncacheable, or
  // load shedding, etc). This will be cached according to
  // remember_failure_policy(). This can save work for our backends and us.
  void RememberFailure(const GoogleString& key,
                       const GoogleString& fragment,
                       FetchResponseStatus the_failure,
                       MessageHandler* handler);

  // Indicates if the response is within the cacheable size limit. Clients of
  // HTTPCache must check if they will be eventually able to cache their entries
  // before buffering them in memory. If the content length header is not found
  // then consider it as cacheable. This could be a chunked response.
  bool IsCacheableContentLength(ResponseHeaders* headers) const;
  // Indicates if the response body is within the cacheable size limit. If the
  // response headers do not have content length header, then the clients of
  // HTTPCache must check if the received response body is of cacheable size
  // before buffering them in memory.
  bool IsCacheableBodySize(int64 body_size) const;

  // Initialize statistics variables for the cache
  static void InitStats(Statistics* statistics);

  // Returns true if the resource is already at the point of expiration
  // and would never be used if inserted into the cache. Otherwise, returns
  // false.
  //
  // Note that this does not check for general cacheability, only for
  // expiration.  You must call ResponseHeaders::IsProxyCacheable() if
  // you want to also determine cacheability.
  bool IsExpired(const ResponseHeaders& headers);
  bool IsExpired(const ResponseHeaders& headers, int64 now_ms);

  // Stats for the HTTP cache.
  Variable* cache_time_us()     { return cache_time_us_; }
  Variable* cache_hits()        { return cache_hits_; }
  Variable* cache_misses()      { return cache_misses_; }
  Variable* cache_fallbacks()   { return cache_fallbacks_; }
  Variable* cache_expirations() { return cache_expirations_; }
  Variable* cache_inserts()     { return cache_inserts_; }
  Variable* cache_deletes()     { return cache_deletes_; }

  int failure_caching_ttl_sec(FetchResponseStatus kind) const {
    return remember_failure_policy_.ttl_sec_for_status[kind];
  }

  void set_failure_caching_ttl_sec(FetchResponseStatus kind, int ttl_sec) {
    remember_failure_policy_.ttl_sec_for_status[kind] = ttl_sec;
  }

  int max_cacheable_response_content_length() {
    return max_cacheable_response_content_length_;
  }

  void set_max_cacheable_response_content_length(int64 value);

  // Sets how many levels the cache has. Affects reporting of statistics ---
  // we don't want them for lower levels of multi-level setups.
  void set_cache_levels(int levels) { cache_levels_ = levels; }
  int cache_levels() const { return cache_levels_; }

  // Sets the compression level of HTTP Cache. 9 being the most compression, -1
  // being the gzip default (6), and 0 being off.
  void SetCompressionLevel(int level) {
    if (level >= -1 && level <= 9) {
      compression_level_ = level;
    } else {
      LOG(INFO) << "Invalid compression level specified, defaulting to -1";
      compression_level_ = -1;
    }
  }
  int compression_level() const { return compression_level_; }

  GoogleString Name() const { return FormatName(cache_->Name()); }
  static GoogleString FormatName(StringPiece cache);

  GoogleString CompositeKey(StringPiece key, StringPiece fragment) const {
    DCHECK(fragment.find("/") == StringPiece::npos);

    // Return "version/fragment/key" if there's a fragment, otherwise just
    // return "version/key".
    return StrCat(version_prefix_, fragment, fragment.empty() ? "" : "/", key);
  }

 private:
  friend class HTTPCacheCallback;
  FRIEND_TEST(HTTPCacheTest, UpdateVersion);

  // If headers is passed as NULL, the response headers will be extracted from
  // the HTTPValue. Otherwise, the headers passed in will be used.
  void PutInternal(bool preserve_response_headers,
                   const GoogleString& key,
                   const GoogleString& fragment,
                   int64 start_us,
                   HTTPValue* value,
                   ResponseHeaders* headers,
                   MessageHandler* handler);
  void DeleteInternal(const GoogleString& key_fragment);

  // Used by constructor and tests.
  void SetVersion(int version_number);
  void set_version_prefix(StringPiece version_prefix) {
    version_prefix.CopyToString(&version_prefix_);
  }

  bool MayCacheUrl(const GoogleString& url, const ResponseHeaders& headers);
  // Requires either content or value to be non-NULL.
  // Applies changes to headers. If the headers are actually changed or if value
  // is NULL then it builds and returns a new HTTPValue. If content is NULL
  // then content is extracted from value.
  HTTPValue* ApplyHeaderChangesForPut(
      int64 start_us, const StringPiece* content, ResponseHeaders* headers,
      HTTPValue* value, MessageHandler* handler);
  void UpdateStats(const GoogleString& key, const GoogleString& fragment,
                   CacheInterface::KeyState backend_state, FindResult result,
                   bool has_fallback, bool is_expired, MessageHandler* handler);
  void RememberFetchFailedOrNotCacheableHelper(
      const GoogleString& key, const GoogleString& fragment,
      MessageHandler* handler, HttpStatus::Code code, int64 ttl_sec);

  CacheInterface* cache_;  // Owned by the caller.
  Timer* timer_;
  Hasher* hasher_;
  bool force_caching_;
  // Whether to disable caching of HTML content fetched via https.
  bool disable_html_caching_on_https_;

  int cache_levels_;
  int compression_level_;

  // Total cumulative time spent accessing backend cache.
  Variable* cache_time_us_;
  // # of Find() requests which are found in cache and are still valid.
  Variable* cache_hits_;
  // # of other Find() requests that fail or are expired.
  Variable* cache_misses_;
  // # of Find() requests which are found in backend cache (whether or not
  // they are valid).
  Variable* cache_backend_hits_;
  // # of Find() requests not found in backend cache.
  Variable* cache_backend_misses_;
  Variable* cache_fallbacks_;
  Variable* cache_expirations_;
  Variable* cache_inserts_;
  Variable* cache_deletes_;

  GoogleString name_;
  HttpCacheFailurePolicy remember_failure_policy_;
  int64 max_cacheable_response_content_length_;
  AtomicBool ignore_failure_puts_;

  GoogleString version_prefix_;

  DISALLOW_COPY_AND_ASSIGN(HTTPCache);
};

}  // namespace net_instaweb

#endif  // NET_INSTAWEB_HTTP_PUBLIC_HTTP_CACHE_H_
