modules/cache/cache_storage.c - httpd - Git at Google

 /* Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #include "mod_cache.h"

 #include "cache_storage.h"
 #include "cache_util.h"

 APLOG_USE_MODULE(cache);

 extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;

 extern module AP_MODULE_DECLARE_DATA cache_module;

 /* -------------------------------------------------------------- */

 /*
  * delete all URL entities from the cache
  *
  */
 int cache_remove_url(cache_request_rec *cache, request_rec *r)
 {
     cache_provider_list *list;
     cache_handle_t *h;

     list = cache->providers;

     /* Remove the stale cache entry if present. If not, we're
      * being called from outside of a request; remove the
      * non-stale handle.
      */
     h = cache->stale_handle ? cache->stale_handle : cache->handle;
     if (!h) {
        return OK;
     }
     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00691)
                  "cache: Removing url %s from the cache", h->cache_obj->key);

     /* for each specified cache type, delete the URL */
     while (list) {
         list->provider->remove_url(h, r);
         list = list->next;
     }
     return OK;
 }


 /*
  * create a new URL entity in the cache
  *
  * It is possible to store more than once entity per URL. This
  * function will always create a new entity, regardless of whether
  * other entities already exist for the same URL.
  *
  * The size of the entity is provided so that a cache module can
  * decide whether or not it wants to cache this particular entity.
  * If the size is unknown, a size of -1 should be set.
  */
 int cache_create_entity(cache_request_rec *cache, request_rec *r,
                         apr_off_t size, apr_bucket_brigade *in)
 {
     cache_provider_list *list;
     cache_handle_t *h = apr_pcalloc(r->pool, sizeof(cache_handle_t));
     apr_status_t rv;

     if (!cache) {
         /* This should never happen */
         ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r, APLOGNO(00692)
                 "cache: No cache request information available for key"
                 " generation");
         return APR_EGENERAL;
     }

     if (!cache->key) {
         rv = cache_generate_key(r, r->pool, &cache->key);
         if (rv != APR_SUCCESS) {
             return rv;
         }
     }

     list = cache->providers;
     /* for each specified cache type, delete the URL */
     while (list) {
         switch (rv = list->provider->create_entity(h, r, cache->key, size, in)) {
         case OK: {
             cache->handle = h;
             cache->provider = list->provider;
             cache->provider_name = list->provider_name;
             return OK;
         }
         case DECLINED: {
             list = list->next;
             continue;
         }
         default: {
             return rv;
         }
         }
     }
     return DECLINED;
 }

 static int filter_header_do(void *v, const char *key, const char *val)
 {
     if ((*key == 'W' || *key == 'w') && !ap_cstr_casecmp(key, "Warning")
             && *val == '1') {
         /* any stored Warning headers with warn-code 1xx (see section
          * 14.46) MUST be deleted from the cache entry and the forwarded
          * response.
          */
     }
     else {
         apr_table_addn(v, key, val);
     }
     return 1;
 }
 static int remove_header_do(void *v, const char *key, const char *val)
 {
     if ((*key == 'W' || *key == 'w') && !ap_cstr_casecmp(key, "Warning")) {
         /* any stored Warning headers with warn-code 2xx MUST be retained
          * in the cache entry and the forwarded response.
          */
     }
     else {
         apr_table_unset(v, key);
     }
     return 1;
 }
 static int add_header_do(void *v, const char *key, const char *val)
 {
     apr_table_addn(v, key, val);
     return 1;
 }

 /**
  * Take two sets of headers, sandwich them together, and apply the result to
  * r->headers_out.
  *
  * To complicate this, a header may be duplicated in either table. Should a
  * header exist in the top table, all matching headers will be removed from
  * the bottom table before the headers are combined. The Warning headers are
  * handled specially. Warnings are added rather than being replaced, while
  * in the case of revalidation 1xx Warnings are stripped.
  *
  * The Content-Type and Last-Modified headers are then re-parsed and inserted
  * into the request.
  */
 void cache_accept_headers(cache_handle_t *h, request_rec *r, apr_table_t *top,
         apr_table_t *bottom, int revalidation)
 {
     const char *v;

     if (revalidation) {
         r->headers_out = apr_table_make(r->pool, 10);
         apr_table_do(filter_header_do, r->headers_out, bottom, NULL);
     }
     else if (r->headers_out != bottom) {
         r->headers_out = apr_table_copy(r->pool, bottom);
     }
     apr_table_do(remove_header_do, r->headers_out, top, NULL);
     apr_table_do(add_header_do, r->headers_out, top, NULL);

     v = apr_table_get(r->headers_out, "Content-Type");
     if (v) {
         ap_set_content_type(r, v);
         /*
          * Also unset possible Content-Type headers in r->headers_out and
          * r->err_headers_out as they may be different to what we have received
          * from the cache.
          * Actually they are not needed as r->content_type set by
          * ap_set_content_type above will be used in the store_headers functions
          * of the storage providers as a fallback and the HTTP_HEADER filter
          * does overwrite the Content-Type header with r->content_type anyway.
          */
         apr_table_unset(r->headers_out, "Content-Type");
         apr_table_unset(r->err_headers_out, "Content-Type");
     }

     /* If the cache gave us a Last-Modified header, we can't just
      * pass it on blindly because of restrictions on future values.
      */
     v = apr_table_get(r->headers_out, "Last-Modified");
     if (v) {
         ap_update_mtime(r, apr_date_parse_http(v));
         ap_set_last_modified(r);
     }

 }

 /*
  * select a specific URL entity in the cache
  *
  * It is possible to store more than one entity per URL. Content
  * negotiation is used to select an entity. Once an entity is
  * selected, details of it are stored in the per request
  * config to save time when serving the request later.
  *
  * This function returns OK if successful, DECLINED if no
  * cached entity fits the bill.
  */
 int cache_select(cache_request_rec *cache, request_rec *r)
 {
     cache_provider_list *list;
     apr_status_t rv;
     cache_handle_t *h;

     if (!cache) {
         /* This should never happen */
         ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EGENERAL, r, APLOGNO(00693)
                 "cache: No cache request information available for key"
                 " generation");
         return DECLINED;
     }

     /* if no-cache, we can't serve from the cache, but we may store to the
      * cache.
      */
     if (!ap_cache_check_no_cache(cache, r)) {
         return DECLINED;
     }

     if (!cache->key) {
         rv = cache_generate_key(r, r->pool, &cache->key);
         if (rv != APR_SUCCESS) {
             return DECLINED;
         }
     }

     /* go through the cache types till we get a match */
     h = apr_palloc(r->pool, sizeof(cache_handle_t));

     list = cache->providers;

     while (list) {
         switch ((rv = list->provider->open_entity(h, r, cache->key))) {
         case OK: {
             char *vary = NULL;
             int mismatch = 0;
             char *last = NULL;

             if (list->provider->recall_headers(h, r) != APR_SUCCESS) {
                 /* try again with next cache type */
                 list = list->next;
                 continue;
             }

             /*
              * Check Content-Negotiation - Vary
              *
              * At this point we need to make sure that the object we found in
              * the cache is the same object that would be delivered to the
              * client, when the effects of content negotiation are taken into
              * effect.
              *
              * In plain english, we want to make sure that a language-negotiated
              * document in one language is not given to a client asking for a
              * language negotiated document in a different language by mistake.
              *
              * This code makes the assumption that the storage manager will
              * cache the req_hdrs if the response contains a Vary
              * header.
              *
              * RFC2616 13.6 and 14.44 describe the Vary mechanism.
              */
             vary = cache_strqtok(
                     apr_pstrdup(r->pool,
                             cache_table_getm(r->pool, h->resp_hdrs, "Vary")),
                     CACHE_SEPARATOR, &last);
             while (vary) {
                 const char *h1, *h2;

                 /*
                  * is this header in the request and the header in the cached
                  * request identical? If not, we give up and do a straight get
                  */
                 h1 = cache_table_getm(r->pool, r->headers_in, vary);
                 h2 = cache_table_getm(r->pool, h->req_hdrs, vary);
                 if (h1 == h2) {
                     /* both headers NULL, so a match - do nothing */
                 }
                 else if (h1 && h2 && !strcmp(h1, h2)) {
                     /* both headers exist and are equal - do nothing */
                 }
                 else {
                     /* headers do not match, so Vary failed */
                     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
                             r, APLOGNO(00694) "cache_select(): Vary header mismatch.");
                     mismatch = 1;
                     break;
                 }
                 vary = cache_strqtok(NULL, CACHE_SEPARATOR, &last);
             }

             /* no vary match, try next provider */
             if (mismatch) {
                 /* try again with next cache type */
                 list = list->next;
                 continue;
             }

             cache->provider = list->provider;
             cache->provider_name = list->provider_name;

             /*
              * RFC2616 13.3.4 Rules for When to Use Entity Tags and Last-Modified
              * Dates: An HTTP/1.1 caching proxy, upon receiving a conditional request
              * that includes both a Last-Modified date and one or more entity tags as
              * cache validators, MUST NOT return a locally cached response to the
              * client unless that cached response is consistent with all of the
              * conditional header fields in the request.
              */
             if (ap_condition_if_match(r, h->resp_hdrs) == AP_CONDITION_NOMATCH
                     || ap_condition_if_unmodified_since(r, h->resp_hdrs)
                             == AP_CONDITION_NOMATCH
                     || ap_condition_if_none_match(r, h->resp_hdrs)
                             == AP_CONDITION_NOMATCH
                     || ap_condition_if_modified_since(r, h->resp_hdrs)
                             == AP_CONDITION_NOMATCH
                     || ap_condition_if_range(r, h->resp_hdrs) == AP_CONDITION_NOMATCH) {
                 mismatch = 1;
             }

             /* Is our cached response fresh enough? */
             if (mismatch || !cache_check_freshness(h, cache, r)) {
                 const char *etag, *lastmod;

                 /* Cache-Control: only-if-cached and revalidation required, try
                  * the next provider
                  */
                 if (cache->control_in.only_if_cached) {
                     /* try again with next cache type */
                     list = list->next;
                     continue;
                 }

                 /* set aside the stale entry for accessing later */
                 cache->stale_headers = apr_table_copy(r->pool,
                         r->headers_in);
                 cache->stale_handle = h;

                 /* if no existing conditionals, use conditionals of our own */
                 if (!mismatch) {

                     ap_log_rerror(
                             APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r, APLOGNO(00695) "Cached response for %s isn't fresh. Adding "
                             "conditional request headers.", r->uri);

                     /* Remove existing conditionals that might conflict with ours */
                     apr_table_unset(r->headers_in, "If-Match");
                     apr_table_unset(r->headers_in, "If-Modified-Since");
                     apr_table_unset(r->headers_in, "If-None-Match");
                     apr_table_unset(r->headers_in, "If-Range");
                     apr_table_unset(r->headers_in, "If-Unmodified-Since");

                     etag = apr_table_get(h->resp_hdrs, "ETag");
                     lastmod = apr_table_get(h->resp_hdrs, "Last-Modified");

                     if (etag || lastmod) {
                         /* If we have a cached etag and/or Last-Modified add in
                          * our own conditionals.
                          */

                         if (etag) {
                             apr_table_set(r->headers_in, "If-None-Match", etag);
                         }

                         if (lastmod) {
                             apr_table_set(r->headers_in, "If-Modified-Since",
                                     lastmod);
                         }

                         /*
                          * Do not do Range requests with our own conditionals: If
                          * we get 304 the Range does not matter and otherwise the
                          * entity changed and we want to have the complete entity
                          */
                         apr_table_unset(r->headers_in, "Range");

                     }

                 }

                 /* ready to revalidate, pretend we were never here */
                 return DECLINED;
             }

             /* Okay, this response looks okay.  Merge in our stuff and go. */
             cache_accept_headers(h, r, h->resp_hdrs, r->headers_out, 0);

             cache->handle = h;
             return OK;
         }
         case DECLINED: {
             /* try again with next cache type */
             list = list->next;
             continue;
         }
         default: {
             /* oo-er! an error */
             return rv;
         }
         }
     }

     /* if Cache-Control: only-if-cached, and not cached, return 504 */
     if (cache->control_in.only_if_cached) {
         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r, APLOGNO(00696)
                 "cache: 'only-if-cached' requested and no cached entity, "
                 "returning 504 Gateway Timeout for: %s", r->uri);
         return HTTP_GATEWAY_TIME_OUT;
     }

     return DECLINED;
 }

 static apr_status_t cache_canonicalise_key(request_rec *r, apr_pool_t* p,
         const char *uri, apr_uri_t *parsed_uri, const char **key)
 {
     cache_server_conf *conf;
     char *port_str, *hn, *lcs;
     const char *hostname, *scheme;
     int i;
     const char *path;
     char *querystring;

     if (*key) {
         /*
          * We have been here before during the processing of this request.
          */
         return APR_SUCCESS;
     }

     /*
      * Get the module configuration. We need this for the CacheIgnoreQueryString
      * option below.
      */
     conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
             &cache_module);

     /*
      * Use the canonical name to improve cache hit rate, but only if this is
      * not a proxy request or if this is a reverse proxy request.
      * We need to handle both cases in the same manner as for the reverse proxy
      * case we have the following situation:
      *
      * If a cached entry is looked up by mod_cache's quick handler r->proxyreq
      * is still unset in the reverse proxy case as it only gets set in the
      * translate name hook (either by ProxyPass or mod_rewrite) which is run
      * after the quick handler hook. This is different to the forward proxy
      * case where it gets set before the quick handler is run (in the
      * post_read_request hook).
      * If a cache entry is created by the CACHE_SAVE filter we always have
      * r->proxyreq set correctly.
      * So we must ensure that in the reverse proxy case we use the same code
      * path and using the canonical name seems to be the right thing to do
      * in the reverse proxy case.
      */
     if (!r->proxyreq || (r->proxyreq == PROXYREQ_REVERSE)) {
         if (conf->base_uri && conf->base_uri->hostname) {
             hostname = conf->base_uri->hostname;
         }
         else {
             /* Use _default_ as the hostname if none present, as in mod_vhost */
             hostname = ap_get_server_name(r);
             if (!hostname) {
                 hostname = "_default_";
             }
         }
     }
     else if (parsed_uri->hostname) {
         /* Copy the parsed uri hostname */
         hn = apr_pstrdup(p, parsed_uri->hostname);
         ap_str_tolower(hn);
         /* const work-around */
         hostname = hn;
     }
     else {
         /* We are a proxied request, with no hostname. Unlikely
          * to get very far - but just in case */
         hostname = "_default_";
     }

     /*
      * Copy the scheme, ensuring that it is lower case. If the parsed uri
      * contains no string or if this is not a proxy request get the http
      * scheme for this request. As r->parsed_uri.scheme is not set if this
      * is a reverse proxy request, it is ensured that the cases
      * "no proxy request" and "reverse proxy request" are handled in the same
      * manner (see above why this is needed).
      */
     if (r->proxyreq && parsed_uri->scheme) {
         /* Copy the scheme and lower-case it */
         lcs = apr_pstrdup(p, parsed_uri->scheme);
         ap_str_tolower(lcs);
         /* const work-around */
         scheme = lcs;
     }
     else {
         if (conf->base_uri && conf->base_uri->scheme) {
             scheme = conf->base_uri->scheme;
         }
         else {
             scheme = ap_http_scheme(r);
         }
     }

     /*
      * If this is a proxy request, but not a reverse proxy request (see comment
      * above why these cases must be handled in the same manner), copy the
      * URI's port-string (which may be a service name). If the URI contains
      * no port-string, use apr-util's notion of the default port for that
      * scheme - if available. Otherwise use the port-number of the current
      * server.
      */
     if (r->proxyreq && (r->proxyreq != PROXYREQ_REVERSE)) {
         if (parsed_uri->port_str) {
             port_str = apr_pcalloc(p, strlen(parsed_uri->port_str) + 2);
             port_str[0] = ':';
             for (i = 0; parsed_uri->port_str[i]; i++) {
                 port_str[i + 1] = apr_tolower(parsed_uri->port_str[i]);
             }
         }
         else if (apr_uri_port_of_scheme(scheme)) {
             port_str = apr_psprintf(p, ":%u", apr_uri_port_of_scheme(scheme));
         }
         else {
             /* No port string given in the AbsoluteUri, and we have no
              * idea what the default port for the scheme is. Leave it
              * blank and live with the inefficiency of some extra cached
              * entities.
              */
             port_str = "";
         }
     }
     else {
         if (conf->base_uri && conf->base_uri->port_str) {
             port_str = conf->base_uri->port_str;
         }
         else if (conf->base_uri && conf->base_uri->hostname) {
             port_str = "";
         }
         else {
             /* Use the server port */
             port_str = apr_psprintf(p, ":%u", ap_get_server_port(r));
         }
     }

     /*
      * Check if we need to ignore session identifiers in the URL and do so
      * if needed.
      */
     path = uri;
     querystring = parsed_uri->query;
     if (conf->ignore_session_id->nelts) {
         int i;
         char **identifier;

         identifier = (char **) conf->ignore_session_id->elts;
         for (i = 0; i < conf->ignore_session_id->nelts; i++, identifier++) {
             int len;
             const char *param;

             len = strlen(*identifier);
             /*
              * Check that we have a parameter separator in the last segment
              * of the path and that the parameter matches our identifier
              */
             if ((param = ap_strrchr_c(path, ';'))
                     && !strncmp(param + 1, *identifier, len)
                     && (*(param + len + 1) == '=')
                     && !ap_strchr_c(param + len + 2, '/')) {
                 path = apr_pstrmemdup(p, path, param - path);
                 continue;
             }
             /*
              * Check if the identifier is in the querystring and cut it out.
              */
             if (querystring) {
                 /*
                  * First check if the identifier is at the beginning of the
                  * querystring and followed by a '='
                  */
                 if (!strncmp(querystring, *identifier, len)
                         && (*(querystring + len) == '=')) {
                     param = querystring;
                 }
                 else {
                     char *complete;

                     /*
                      * In order to avoid subkey matching (PR 48401) prepend
                      * identifier with a '&' and append a '='
                      */
                     complete = apr_pstrcat(p, "&", *identifier, "=", NULL);
                     param = strstr(querystring, complete);
                     /* If we found something we are sitting on the '&' */
                     if (param) {
                         param++;
                     }
                 }
                 if (param) {
                     const char *amp;

                     if (querystring != param) {
                         querystring = apr_pstrndup(p, querystring,
                                 param - querystring);
                     }
                     else {
                         querystring = "";
                     }

                     if ((amp = ap_strchr_c(param + len + 1, '&'))) {
                         querystring = apr_pstrcat(p, querystring, amp + 1,
                                 NULL);
                     }
                     else {
                         /*
                          * If querystring is not "", then we have the case
                          * that the identifier parameter we removed was the
                          * last one in the original querystring. Hence we have
                          * a trailing '&' which needs to be removed.
                          */
                         if (*querystring) {
                             querystring[strlen(querystring) - 1] = '\0';
                         }
                     }
                 }
             }
         }
     }

     /* Key format is a URI, optionally without the query-string */
     if (conf->ignorequerystring) {
         *key = apr_pstrcat(p, scheme, "://", hostname, port_str, path, "?",
                 NULL);
     }
     else {
         *key = apr_pstrcat(p, scheme, "://", hostname, port_str, path, "?",
                 querystring, NULL);
     }

     /*
      * Store the key in the request_config for the cache as r->parsed_uri
      * might have changed in the time from our first visit here triggered by the
      * quick handler and our possible second visit triggered by the CACHE_SAVE
      * filter (e.g. r->parsed_uri got unescaped). In this case we would save the
      * resource in the cache under a key where it is never found by the quick
      * handler during following requests.
      */
     ap_log_rerror(
             APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r, APLOGNO(00698) "cache: Key for entity %s?%s is %s", uri, parsed_uri->query, *key);

     return APR_SUCCESS;
 }

 apr_status_t cache_generate_key_default(request_rec *r, apr_pool_t* p,
         const char **key)
 {
     return cache_canonicalise_key(r, p, r->uri, &r->parsed_uri, key);
 }

 /*
  * Invalidate a specific URL entity in all caches
  *
  * All cached entities for this URL are removed, usually in
  * response to a POST/PUT or DELETE.
  *
  * This function returns OK if at least one entity was found and
  * removed, and DECLINED if no cached entities were removed.
  */
 int cache_invalidate(cache_request_rec *cache, request_rec *r)
 {
     cache_provider_list *list;
     apr_status_t rv, status = DECLINED;
     cache_handle_t *h;
     apr_uri_t location_uri;
     apr_uri_t content_location_uri;

     const char *location, *location_key = NULL;
     const char *content_location, *content_location_key = NULL;

     if (!cache) {
         /* This should never happen */
         ap_log_rerror(
                 APLOG_MARK, APLOG_ERR, APR_EGENERAL, r, APLOGNO(00697) "cache: No cache request information available for key"
                 " generation");
         return DECLINED;
     }

     if (!cache->key) {
         rv = cache_generate_key(r, r->pool, &cache->key);
         if (rv != APR_SUCCESS) {
             return DECLINED;
         }
     }

     location = apr_table_get(r->headers_out, "Location");
     if (location) {
         if (APR_SUCCESS != apr_uri_parse(r->pool, location, &location_uri)
                 || APR_SUCCESS
                         != cache_canonicalise_key(r, r->pool, location,
                                 &location_uri, &location_key)
                 || !(r->parsed_uri.hostname && location_uri.hostname
                         && !strcmp(r->parsed_uri.hostname,
                                 location_uri.hostname))) {
             location_key = NULL;
         }
     }

     content_location = apr_table_get(r->headers_out, "Content-Location");
     if (content_location) {
         if (APR_SUCCESS
                 != apr_uri_parse(r->pool, content_location,
                         &content_location_uri)
                 || APR_SUCCESS
                         != cache_canonicalise_key(r, r->pool, content_location,
                                 &content_location_uri, &content_location_key)
                 || !(r->parsed_uri.hostname && content_location_uri.hostname
                         && !strcmp(r->parsed_uri.hostname,
                                 content_location_uri.hostname))) {
             content_location_key = NULL;
         }
     }

     /* go through the cache types */
     h = apr_palloc(r->pool, sizeof(cache_handle_t));

     list = cache->providers;

     while (list) {

         /* invalidate the request uri */
         rv = list->provider->open_entity(h, r, cache->key);
         if (OK == rv) {
             rv = list->provider->invalidate_entity(h, r);
             status = OK;
         }
         ap_log_rerror(
                 APLOG_MARK, APLOG_DEBUG, rv, r, APLOGNO(02468) "cache: Attempted to invalidate cached entity with key: %s", cache->key);

         /* invalidate the Location */
         if (location_key) {
             rv = list->provider->open_entity(h, r, location_key);
             if (OK == rv) {
                 rv = list->provider->invalidate_entity(h, r);
                 status = OK;
             }
             ap_log_rerror(
                     APLOG_MARK, APLOG_DEBUG, rv, r, APLOGNO(02469) "cache: Attempted to invalidate cached entity with key: %s", location_key);
         }

         /* invalidate the Content-Location */
         if (content_location_key) {
             rv = list->provider->open_entity(h, r, content_location_key);
             if (OK == rv) {
                 rv = list->provider->invalidate_entity(h, r);
                 status = OK;
             }
             ap_log_rerror(
                     APLOG_MARK, APLOG_DEBUG, rv, r, APLOGNO(02470) "cache: Attempted to invalidate cached entity with key: %s", content_location_key);
         }

         list = list->next;
     }

     return status;
 }