blob: b27c7fbb2ace3aaa766ef3a5244562a3c5afa54e [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "mod_cache.h"
module AP_MODULE_DECLARE_DATA cache_module;
APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
/* -------------------------------------------------------------- */
/* Handles for cache filters, resolved at startup to eliminate
* a name-to-function mapping on each request
*/
static ap_filter_rec_t *cache_save_filter_handle;
static ap_filter_rec_t *cache_save_subreq_filter_handle;
static ap_filter_rec_t *cache_out_filter_handle;
static ap_filter_rec_t *cache_out_subreq_filter_handle;
static ap_filter_rec_t *cache_remove_url_filter_handle;
/*
* CACHE handler
* -------------
*
* Can we deliver this request from the cache?
* If yes:
* deliver the content by installing the CACHE_OUT filter.
* If no:
* check whether we're allowed to try cache it
* If yes:
* add CACHE_SAVE filter
* If No:
* oh well.
*/
static int cache_url_handler(request_rec *r, int lookup)
{
apr_status_t rv;
const char *auth;
cache_provider_list *providers;
cache_request_rec *cache;
cache_server_conf *conf;
apr_bucket_brigade *out;
ap_filter_t *next;
ap_filter_rec_t *cache_out_handle;
/* Delay initialization until we know we are handling a GET */
if (r->method_number != M_GET) {
return DECLINED;
}
conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
&cache_module);
/*
* Which cache module (if any) should handle this request?
*/
if (!(providers = ap_cache_get_providers(r, conf, r->parsed_uri))) {
return DECLINED;
}
/* make space for the per request config */
cache = (cache_request_rec *) ap_get_module_config(r->request_config,
&cache_module);
if (!cache) {
cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
ap_set_module_config(r->request_config, &cache_module, cache);
}
/* save away the possible providers */
cache->providers = providers;
/*
* Are we allowed to serve cached info at all?
*/
/* find certain cache controlling headers */
auth = apr_table_get(r->headers_in, "Authorization");
/* First things first - does the request allow us to return
* cached information at all? If not, just decline the request.
*/
if (auth) {
return DECLINED;
}
/*
* Try to serve this request from the cache.
*
* If no existing cache file (DECLINED)
* add cache_save filter
* If cached file (OK)
* clear filter stack
* add cache_out filter
* return OK
*/
rv = cache_select(r);
if (rv != OK) {
if (rv == DECLINED) {
if (!lookup) {
/*
* Add cache_save filter to cache this request. Choose
* the correct filter by checking if we are a subrequest
* or not.
*/
if (r->main) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
r->server,
"Adding CACHE_SAVE_SUBREQ filter for %s",
r->uri);
ap_add_output_filter_handle(cache_save_subreq_filter_handle,
NULL, r, r->connection);
}
else {
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
r->server, "Adding CACHE_SAVE filter for %s",
r->uri);
ap_add_output_filter_handle(cache_save_filter_handle,
NULL, r, r->connection);
}
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
"Adding CACHE_REMOVE_URL filter for %s",
r->uri);
/* Add cache_remove_url filter to this request to remove a
* stale cache entry if needed. Also put the current cache
* request rec in the filter context, as the request that
* is available later during running the filter maybe
* different due to an internal redirect.
*/
cache->remove_url_filter =
ap_add_output_filter_handle(cache_remove_url_filter_handle,
cache, r, r->connection);
}
else {
if (cache->stale_headers) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
r->server, "Restoring request headers for %s",
r->uri);
r->headers_in = cache->stale_headers;
}
/* Delete our per-request configuration. */
ap_set_module_config(r->request_config, &cache_module, NULL);
}
}
else {
/* error */
ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
"cache: error returned while checking for cached "
"file by %s cache", cache->provider_name);
}
return DECLINED;
}
/* if we are a lookup, we are exiting soon one way or another; Restore
* the headers. */
if (lookup) {
if (cache->stale_headers) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
"Restoring request headers.");
r->headers_in = cache->stale_headers;
}
/* Delete our per-request configuration. */
ap_set_module_config(r->request_config, &cache_module, NULL);
}
rv = ap_meets_conditions(r);
if (rv != OK) {
/* If we are a lookup, we have to return DECLINED as we have no
* way of knowing if we will be able to serve the content.
*/
if (lookup) {
return DECLINED;
}
/* Return cached status. */
return rv;
}
/* If we're a lookup, we can exit now instead of serving the content. */
if (lookup) {
return OK;
}
/* Serve up the content */
/* We are in the quick handler hook, which means that no output
* filters have been set. So lets run the insert_filter hook.
*/
ap_run_insert_filter(r);
/*
* Add cache_out filter to serve this request. Choose
* the correct filter by checking if we are a subrequest
* or not.
*/
if (r->main) {
cache_out_handle = cache_out_subreq_filter_handle;
}
else {
cache_out_handle = cache_out_filter_handle;
}
ap_add_output_filter_handle(cache_out_handle, NULL, r, r->connection);
/*
* Remove all filters that are before the cache_out filter. This ensures
* that we kick off the filter stack with our cache_out filter being the
* first in the chain. This make sense because we want to restore things
* in the same manner as we saved them.
* There may be filters before our cache_out filter, because
*
* 1. We call ap_set_content_type during cache_select. This causes
* Content-Type specific filters to be added.
* 2. We call the insert_filter hook. This causes filters e.g. like
* the ones set with SetOutputFilter to be added.
*/
next = r->output_filters;
while (next && (next->frec != cache_out_handle)) {
ap_remove_output_filter(next);
next = next->next;
}
/* kick off the filter stack */
out = apr_brigade_create(r->pool, r->connection->bucket_alloc);
rv = ap_pass_brigade(r->output_filters, out);
if (rv != APR_SUCCESS) {
if (rv != AP_FILTER_ERROR) {
ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
"cache: error returned while trying to return %s "
"cached data",
cache->provider_name);
}
return rv;
}
return OK;
}
/*
* CACHE_OUT filter
* ----------------
*
* Deliver cached content (headers and body) up the stack.
*/
static int cache_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
request_rec *r = f->r;
cache_request_rec *cache;
cache = (cache_request_rec *) ap_get_module_config(r->request_config,
&cache_module);
if (!cache) {
/* user likely configured CACHE_OUT manually; they should use mod_cache
* configuration to do that */
ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
"CACHE_OUT enabled unexpectedly");
ap_remove_output_filter(f);
return ap_pass_brigade(f->next, bb);
}
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
"cache: running CACHE_OUT filter");
/* restore status of cached response */
/* XXX: This exposes a bug in mem_cache, since it does not
* restore the status into it's handle. */
r->status = cache->handle->cache_obj->info.status;
/* recall_headers() was called in cache_select() */
cache->provider->recall_body(cache->handle, r->pool, bb);
/* This filter is done once it has served up its content */
ap_remove_output_filter(f);
ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
"cache: serving %s", r->uri);
return ap_pass_brigade(f->next, bb);
}
/*
* CACHE_SAVE filter
* ---------------
*
* Decide whether or not this content should be cached.
* If we decide no it should not:
* remove the filter from the chain
* If we decide yes it should:
* Have we already started saving the response?
* If we have started, pass the data to the storage manager via store_body
* Otherwise:
* Check to see if we *can* save this particular response.
* If we can, call cache_create_entity() and save the headers and body
* Finally, pass the data to the next filter (the network or whatever)
*/
static int cache_save_filter(ap_filter_t *f, apr_bucket_brigade *in)
{
int rv = !OK;
request_rec *r = f->r;
cache_request_rec *cache;
cache_server_conf *conf;
const char *cc_out, *cl;
const char *exps, *lastmods, *dates, *etag;
apr_time_t exp, date, lastmod, now;
apr_off_t size;
cache_info *info = NULL;
char *reason;
apr_pool_t *p;
conf = (cache_server_conf *) ap_get_module_config(r->server->module_config,
&cache_module);
/* Setup cache_request_rec */
cache = (cache_request_rec *) ap_get_module_config(r->request_config,
&cache_module);
if (!cache) {
/* user likely configured CACHE_SAVE manually; they should really use
* mod_cache configuration to do that
*/
cache = apr_pcalloc(r->pool, sizeof(cache_request_rec));
ap_set_module_config(r->request_config, &cache_module, cache);
}
reason = NULL;
p = r->pool;
/*
* Pass Data to Cache
* ------------------
* This section passes the brigades into the cache modules, but only
* if the setup section (see below) is complete.
*/
if (cache->block_response) {
/* We've already sent down the response and EOS. So, ignore
* whatever comes now.
*/
return APR_SUCCESS;
}
/* have we already run the cachability check and set up the
* cached file handle?
*/
if (cache->in_checked) {
/* pass the brigades into the cache, then pass them
* up the filter stack
*/
rv = cache->provider->store_body(cache->handle, r, in);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"cache: Cache provider's store_body failed!");
ap_remove_output_filter(f);
}
return ap_pass_brigade(f->next, in);
}
/*
* Setup Data in Cache
* -------------------
* This section opens the cache entity and sets various caching
* parameters, and decides whether this URL should be cached at
* all. This section is* run before the above section.
*/
/* read expiry date; if a bad date, then leave it so the client can
* read it
*/
exps = apr_table_get(r->err_headers_out, "Expires");
if (exps == NULL) {
exps = apr_table_get(r->headers_out, "Expires");
}
if (exps != NULL) {
if (APR_DATE_BAD == (exp = apr_date_parse_http(exps))) {
exps = NULL;
}
}
else {
exp = APR_DATE_BAD;
}
/* read the last-modified date; if the date is bad, then delete it */
lastmods = apr_table_get(r->err_headers_out, "Last-Modified");
if (lastmods == NULL) {
lastmods = apr_table_get(r->headers_out, "Last-Modified");
}
if (lastmods != NULL) {
lastmod = apr_date_parse_http(lastmods);
if (lastmod == APR_DATE_BAD) {
lastmods = NULL;
}
}
else {
lastmod = APR_DATE_BAD;
}
/* read the etag and cache-control from the entity */
etag = apr_table_get(r->err_headers_out, "Etag");
if (etag == NULL) {
etag = apr_table_get(r->headers_out, "Etag");
}
cc_out = apr_table_get(r->err_headers_out, "Cache-Control");
if (cc_out == NULL) {
cc_out = apr_table_get(r->headers_out, "Cache-Control");
}
/*
* what responses should we not cache?
*
* At this point we decide based on the response headers whether it
* is appropriate _NOT_ to cache the data from the server. There are
* a whole lot of conditions that prevent us from caching this data.
* They are tested here one by one to be clear and unambiguous.
*/
if (r->status != HTTP_OK && r->status != HTTP_NON_AUTHORITATIVE
&& r->status != HTTP_MULTIPLE_CHOICES
&& r->status != HTTP_MOVED_PERMANENTLY
&& r->status != HTTP_NOT_MODIFIED) {
/* RFC2616 13.4 we are allowed to cache 200, 203, 206, 300, 301 or 410
* We don't cache 206, because we don't (yet) cache partial responses.
* We include 304 Not Modified here too as this is the origin server
* telling us to serve the cached copy.
*/
if (exps != NULL || cc_out != NULL) {
/* We are also allowed to cache any response given that it has a
* valid Expires or Cache Control header. If we find a either of
* those here, we pass request through the rest of the tests. From
* the RFC:
*
* A response received with any other status code (e.g. status
* codes 302 and 307) MUST NOT be returned in a reply to a
* subsequent request unless there are cache-control directives or
* another header(s) that explicitly allow it. For example, these
* include the following: an Expires header (section 14.21); a
* "max-age", "s-maxage", "must-revalidate", "proxy-revalidate",
* "public" or "private" cache-control directive (section 14.9).
*/
}
else {
reason = apr_psprintf(p, "Response status %d", r->status);
}
}
if (reason) {
/* noop */
}
else if (exps != NULL && exp == APR_DATE_BAD) {
/* if a broken Expires header is present, don't cache it */
reason = apr_pstrcat(p, "Broken expires header: ", exps, NULL);
}
else if (exp != APR_DATE_BAD && exp < r->request_time)
{
/* if a Expires header is in the past, don't cache it */
reason = "Expires header already expired, not cacheable";
}
else if (!conf->ignorequerystring && r->parsed_uri.query && exps == NULL &&
!ap_cache_liststr(NULL, cc_out, "max-age", NULL)) {
/* if a query string is present but no explicit expiration time,
* don't cache it (RFC 2616/13.9 & 13.2.1)
*/
reason = "Query string present but no explicit expiration time";
}
else if (r->status == HTTP_NOT_MODIFIED &&
!cache->handle && !cache->stale_handle) {
/* if the server said 304 Not Modified but we have no cache
* file - pass this untouched to the user agent, it's not for us.
*/
reason = "HTTP Status 304 Not Modified";
}
else if (r->status == HTTP_OK && lastmods == NULL && etag == NULL
&& (exps == NULL) && (conf->no_last_mod_ignore ==0)) {
/* 200 OK response from HTTP/1.0 and up without Last-Modified,
* Etag, or Expires headers.
*/
/* Note: mod-include clears last_modified/expires/etags - this
* is why we have an optional function for a key-gen ;-)
*/
reason = "No Last-Modified, Etag, or Expires headers";
}
else if (r->header_only && !cache->stale_handle) {
/* Forbid HEAD requests unless we have it cached already */
reason = "HTTP HEAD request";
}
else if (!conf->store_nostore &&
ap_cache_liststr(NULL, cc_out, "no-store", NULL)) {
/* RFC2616 14.9.2 Cache-Control: no-store response
* indicating do not cache, or stop now if you are
* trying to cache it.
*/
/* FIXME: The Cache-Control: no-store could have come in on a 304,
* FIXME: while the original request wasn't conditional. IOW, we
* FIXME: made the the request conditional earlier to revalidate
* FIXME: our cached response.
*/
reason = "Cache-Control: no-store present";
}
else if (!conf->store_private &&
ap_cache_liststr(NULL, cc_out, "private", NULL)) {
/* RFC2616 14.9.1 Cache-Control: private response
* this object is marked for this user's eyes only. Behave
* as a tunnel.
*/
/* FIXME: See above (no-store) */
reason = "Cache-Control: private present";
}
else if (apr_table_get(r->headers_in, "Authorization") != NULL
&& !(ap_cache_liststr(NULL, cc_out, "s-maxage", NULL)
|| ap_cache_liststr(NULL, cc_out, "must-revalidate", NULL)
|| ap_cache_liststr(NULL, cc_out, "public", NULL))) {
/* RFC2616 14.8 Authorisation:
* if authorisation is included in the request, we don't cache,
* but we can cache if the following exceptions are true:
* 1) If Cache-Control: s-maxage is included
* 2) If Cache-Control: must-revalidate is included
* 3) If Cache-Control: public is included
*/
reason = "Authorization required";
}
else if (ap_cache_liststr(NULL,
apr_table_get(r->headers_out, "Vary"),
"*", NULL)) {
reason = "Vary header contains '*'";
}
else if (apr_table_get(r->subprocess_env, "no-cache") != NULL) {
reason = "environment variable 'no-cache' is set";
}
else if (r->no_cache) {
/* or we've been asked not to cache it above */
reason = "r->no_cache present";
}
if (reason) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache: %s not cached. Reason: %s", r->unparsed_uri,
reason);
/* remove this filter from the chain */
ap_remove_output_filter(f);
/* ship the data up the stack */
return ap_pass_brigade(f->next, in);
}
/* Make it so that we don't execute this path again. */
cache->in_checked = 1;
/* Set the content length if known.
*/
cl = apr_table_get(r->err_headers_out, "Content-Length");
if (cl == NULL) {
cl = apr_table_get(r->headers_out, "Content-Length");
}
if (cl) {
char *errp;
if (apr_strtoff(&size, cl, &errp, 10) || *errp || size < 0) {
cl = NULL; /* parse error, see next 'if' block */
}
}
if (!cl) {
/* if we don't get the content-length, see if we have all the
* buckets and use their length to calculate the size
*/
apr_bucket *e;
int all_buckets_here=0;
int unresolved_length = 0;
size=0;
for (e = APR_BRIGADE_FIRST(in);
e != APR_BRIGADE_SENTINEL(in);
e = APR_BUCKET_NEXT(e))
{
if (APR_BUCKET_IS_EOS(e)) {
all_buckets_here=1;
break;
}
if (APR_BUCKET_IS_FLUSH(e)) {
unresolved_length = 1;
continue;
}
if (e->length == (apr_size_t)-1) {
break;
}
size += e->length;
}
if (!all_buckets_here) {
size = -1;
}
}
/* It's safe to cache the response.
*
* There are two possiblities at this point:
* - cache->handle == NULL. In this case there is no previously
* cached entity anywhere on the system. We must create a brand
* new entity and store the response in it.
* - cache->stale_handle != NULL. In this case there is a stale
* entity in the system which needs to be replaced by new
* content (unless the result was 304 Not Modified, which means
* the cached entity is actually fresh, and we should update
* the headers).
*/
/* Did we have a stale cache entry that really is stale?
*
* Note that for HEAD requests, we won't get the body, so for a stale
* HEAD request, we don't remove the entity - instead we let the
* CACHE_REMOVE_URL filter remove the stale item from the cache.
*/
if (cache->stale_handle) {
if (r->status == HTTP_NOT_MODIFIED) {
/* Oh, hey. It isn't that stale! Yay! */
cache->handle = cache->stale_handle;
info = &cache->handle->cache_obj->info;
rv = OK;
}
else if (!r->header_only) {
/* Oh, well. Toss it. */
cache->provider->remove_entity(cache->stale_handle);
/* Treat the request as if it wasn't conditional. */
cache->stale_handle = NULL;
/*
* Restore the original request headers as they may be needed
* by further output filters like the byterange filter to make
* the correct decisions.
*/
r->headers_in = cache->stale_headers;
}
}
/* no cache handle, create a new entity only for non-HEAD requests */
if (!cache->handle && !r->header_only) {
rv = cache_create_entity(r, size);
info = apr_pcalloc(r->pool, sizeof(cache_info));
/* We only set info->status upon the initial creation. */
info->status = r->status;
}
if (rv != OK) {
/* Caching layer declined the opportunity to cache the response */
ap_remove_output_filter(f);
return ap_pass_brigade(f->next, in);
}
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache: Caching url: %s", r->unparsed_uri);
/* We are actually caching this response. So it does not
* make sense to remove this entity any more.
*/
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache: Removing CACHE_REMOVE_URL filter.");
ap_remove_output_filter(cache->remove_url_filter);
/*
* We now want to update the cache file header information with
* the new date, last modified, expire and content length and write
* it away to our cache file. First, we determine these values from
* the response, using heuristics if appropriate.
*
* In addition, we make HTTP/1.1 age calculations and write them away
* too.
*/
/* Read the date. Generate one if one is not supplied */
dates = apr_table_get(r->err_headers_out, "Date");
if (dates == NULL) {
dates = apr_table_get(r->headers_out, "Date");
}
if (dates != NULL) {
info->date = apr_date_parse_http(dates);
}
else {
info->date = APR_DATE_BAD;
}
now = apr_time_now();
if (info->date == APR_DATE_BAD) { /* No, or bad date */
/* no date header (or bad header)! */
info->date = now;
}
date = info->date;
/* set response_time for HTTP/1.1 age calculations */
info->response_time = now;
/* get the request time */
info->request_time = r->request_time;
/* check last-modified date */
if (lastmod != APR_DATE_BAD && lastmod > date) {
/* if it's in the future, then replace by date */
lastmod = date;
lastmods = dates;
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0,
r->server,
"cache: Last modified is in the future, "
"replacing with now");
}
/* if no expiry date then
* if Cache-Control: max-age
* expiry date = date + max-age
* else if lastmod
* expiry date = date + min((date - lastmod) * factor, maxexpire)
* else
* expire date = date + defaultexpire
*/
if (exp == APR_DATE_BAD) {
char *max_age_val;
if (ap_cache_liststr(r->pool, cc_out, "max-age", &max_age_val) &&
max_age_val != NULL) {
apr_int64_t x;
errno = 0;
x = apr_atoi64(max_age_val);
if (errno) {
x = conf->defex;
}
else {
x = x * MSEC_ONE_SEC;
}
if (x < conf->minex) {
x = conf->minex;
}
if (x > conf->maxex) {
x = conf->maxex;
}
exp = date + x;
}
else if ((lastmod != APR_DATE_BAD) && (lastmod < date)) {
/* if lastmod == date then you get 0*conf->factor which results in
* an expiration time of now. This causes some problems with
* freshness calculations, so we choose the else path...
*/
apr_time_t x = (apr_time_t) ((date - lastmod) * conf->factor);
if (x < conf->minex) {
x = conf->minex;
}
if (x > conf->maxex) {
x = conf->maxex;
}
exp = date + x;
}
else {
exp = date + conf->defex;
}
}
info->expire = exp;
/* We found a stale entry which wasn't really stale. */
if (cache->stale_handle) {
/* Load in the saved status and clear the status line. */
r->status = info->status;
r->status_line = NULL;
/* RFC 2616 10.3.5 states that entity headers are not supposed
* to be in the 304 response. Therefore, we need to combine the
* response headers with the cached headers *before* we update
* the cached headers.
*
* However, before doing that, we need to first merge in
* err_headers_out and we also need to strip any hop-by-hop
* headers that might have snuck in.
*/
r->headers_out = ap_cache_cacheable_headers_out(r);
/* Merge in our cached headers. However, keep any updated values. */
ap_cache_accept_headers(cache->handle, r, 1);
}
/* Write away header information to cache. It is possible that we are
* trying to update headers for an entity which has already been cached.
*
* This may fail, due to an unwritable cache area. E.g. filesystem full,
* permissions problems or a read-only (re)mount. This must be handled
* later.
*/
rv = cache->provider->store_headers(cache->handle, r, info);
/* Did we just update the cached headers on a revalidated response?
*
* If so, we can now decide what to serve to the client. This is done in
* the same way as with a regular response, but conditions are now checked
* against the cached or merged response headers.
*/
if (cache->stale_handle) {
apr_bucket_brigade *bb;
apr_bucket *bkt;
int status;
bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
/* Restore the original request headers and see if we need to
* return anything else than the cached response (ie. the original
* request was conditional).
*/
r->headers_in = cache->stale_headers;
status = ap_meets_conditions(r);
if (status != OK) {
r->status = status;
bkt = apr_bucket_flush_create(bb->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(bb, bkt);
}
else {
cache->provider->recall_body(cache->handle, r->pool, bb);
}
cache->block_response = 1;
/* Before returning we need to handle the possible case of an
* unwritable cache. Rather than leaving the entity in the cache
* and having it constantly re-validated, now that we have recalled
* the body it is safe to try and remove the url from the cache.
*/
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"cache: updating headers with store_headers failed. "
"Removing cached url.");
rv = cache->provider->remove_url(cache->stale_handle, r->pool);
if (rv != OK) {
/* Probably a mod_disk_cache cache area has been (re)mounted
* read-only, or that there is a permissions problem.
*/
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"cache: attempt to remove url from cache unsuccessful.");
}
}
return ap_pass_brigade(f->next, bb);
}
if(rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"cache: store_headers failed");
ap_remove_output_filter(f);
return ap_pass_brigade(f->next, in);
}
rv = cache->provider->store_body(cache->handle, r, in);
if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
"cache: store_body failed");
ap_remove_output_filter(f);
}
return ap_pass_brigade(f->next, in);
}
/*
* CACHE_REMOVE_URL filter
* ---------------
*
* This filter gets added in the quick handler every time the CACHE_SAVE filter
* gets inserted. Its purpose is to remove a confirmed stale cache entry from
* the cache.
*
* CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
* the response is a canned error message, which removes the content filters
* and thus the CACHE_SAVE filter from the chain.
*
* CACHE_REMOVE_URL expects cache request rec within its context because the
* request this filter runs on can be different from the one whose cache entry
* should be removed, due to internal redirects.
*
* Note that CACHE_SAVE_URL (as a content-set filter, hence run before the
* protocol filters) will remove this filter if it decides to cache the file.
* Therefore, if this filter is left in, it must mean we need to toss any
* existing files.
*/
static int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in)
{
request_rec *r = f->r;
cache_request_rec *cache;
/* Setup cache_request_rec */
cache = (cache_request_rec *) f->ctx;
if (!cache) {
/* user likely configured CACHE_REMOVE_URL manually; they should really
* use mod_cache configuration to do that. So:
* 1. Remove ourselves
* 2. Do nothing and bail out
*/
ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
"cache: CACHE_REMOVE_URL enabled unexpectedly");
ap_remove_output_filter(f);
return ap_pass_brigade(f->next, in);
}
/* Now remove this cache entry from the cache */
cache_remove_url(cache, r->pool);
/* remove ourselves */
ap_remove_output_filter(f);
return ap_pass_brigade(f->next, in);
}
/* -------------------------------------------------------------- */
/* Setup configurable data */
static void * create_cache_config(apr_pool_t *p, server_rec *s)
{
cache_server_conf *ps = apr_pcalloc(p, sizeof(cache_server_conf));
/* array of URL prefixes for which caching is enabled */
ps->cacheenable = apr_array_make(p, 10, sizeof(struct cache_enable));
/* array of URL prefixes for which caching is disabled */
ps->cachedisable = apr_array_make(p, 10, sizeof(struct cache_disable));
/* maximum time to cache a document */
ps->maxex = DEFAULT_CACHE_MAXEXPIRE;
ps->maxex_set = 0;
ps->minex = DEFAULT_CACHE_MINEXPIRE;
ps->minex_set = 0;
/* default time to cache a document */
ps->defex = DEFAULT_CACHE_EXPIRE;
ps->defex_set = 0;
/* factor used to estimate Expires date from LastModified date */
ps->factor = DEFAULT_CACHE_LMFACTOR;
ps->factor_set = 0;
ps->no_last_mod_ignore_set = 0;
ps->no_last_mod_ignore = 0;
ps->ignorecachecontrol = 0;
ps->ignorecachecontrol_set = 0;
ps->store_private = 0;
ps->store_private_set = 0;
ps->store_nostore = 0;
ps->store_nostore_set = 0;
/* array of headers that should not be stored in cache */
ps->ignore_headers = apr_array_make(p, 10, sizeof(char *));
ps->ignore_headers_set = CACHE_IGNORE_HEADERS_UNSET;
/* flag indicating that query-string should be ignored when caching */
ps->ignorequerystring = 0;
ps->ignorequerystring_set = 0;
return ps;
}
static void * merge_cache_config(apr_pool_t *p, void *basev, void *overridesv)
{
cache_server_conf *ps = apr_pcalloc(p, sizeof(cache_server_conf));
cache_server_conf *base = (cache_server_conf *) basev;
cache_server_conf *overrides = (cache_server_conf *) overridesv;
/* array of URL prefixes for which caching is disabled */
ps->cachedisable = apr_array_append(p,
base->cachedisable,
overrides->cachedisable);
/* array of URL prefixes for which caching is enabled */
ps->cacheenable = apr_array_append(p,
base->cacheenable,
overrides->cacheenable);
/* maximum time to cache a document */
ps->maxex = (overrides->maxex_set == 0) ? base->maxex : overrides->maxex;
ps->minex = (overrides->minex_set == 0) ? base->minex : overrides->minex;
/* default time to cache a document */
ps->defex = (overrides->defex_set == 0) ? base->defex : overrides->defex;
/* factor used to estimate Expires date from LastModified date */
ps->factor =
(overrides->factor_set == 0) ? base->factor : overrides->factor;
ps->no_last_mod_ignore =
(overrides->no_last_mod_ignore_set == 0)
? base->no_last_mod_ignore
: overrides->no_last_mod_ignore;
ps->ignorecachecontrol =
(overrides->ignorecachecontrol_set == 0)
? base->ignorecachecontrol
: overrides->ignorecachecontrol;
ps->store_private =
(overrides->store_private_set == 0)
? base->store_private
: overrides->store_private;
ps->store_nostore =
(overrides->store_nostore_set == 0)
? base->store_nostore
: overrides->store_nostore;
ps->ignore_headers =
(overrides->ignore_headers_set == CACHE_IGNORE_HEADERS_UNSET)
? base->ignore_headers
: overrides->ignore_headers;
ps->ignorequerystring =
(overrides->ignorequerystring_set == 0)
? base->ignorequerystring
: overrides->ignorequerystring;
return ps;
}
static const char *set_cache_ignore_no_last_mod(cmd_parms *parms, void *dummy,
int flag)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->no_last_mod_ignore = flag;
conf->no_last_mod_ignore_set = 1;
return NULL;
}
static const char *set_cache_ignore_cachecontrol(cmd_parms *parms,
void *dummy, int flag)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->ignorecachecontrol = flag;
conf->ignorecachecontrol_set = 1;
return NULL;
}
static const char *set_cache_store_private(cmd_parms *parms, void *dummy,
int flag)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->store_private = flag;
conf->store_private_set = 1;
return NULL;
}
static const char *set_cache_store_nostore(cmd_parms *parms, void *dummy,
int flag)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->store_nostore = flag;
conf->store_nostore_set = 1;
return NULL;
}
static const char *add_ignore_header(cmd_parms *parms, void *dummy,
const char *header)
{
cache_server_conf *conf;
char **new;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
if (!strncasecmp(header, "None", 4)) {
/* if header None is listed clear array */
conf->ignore_headers->nelts = 0;
}
else {
if ((conf->ignore_headers_set == CACHE_IGNORE_HEADERS_UNSET) ||
(conf->ignore_headers->nelts)) {
/* Only add header if no "None" has been found in header list
* so far.
* (When 'None' is passed, IGNORE_HEADERS_SET && nelts == 0.)
*/
new = (char **)apr_array_push(conf->ignore_headers);
(*new) = (char *)header;
}
}
conf->ignore_headers_set = CACHE_IGNORE_HEADERS_SET;
return NULL;
}
static const char *add_cache_enable(cmd_parms *parms, void *dummy,
const char *type,
const char *url)
{
cache_server_conf *conf;
struct cache_enable *new;
if (*type == '/') {
return apr_psprintf(parms->pool,
"provider (%s) starts with a '/'. Are url and provider switched?",
type);
}
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
new = apr_array_push(conf->cacheenable);
new->type = type;
if (apr_uri_parse(parms->pool, url, &(new->url))) {
return NULL;
}
if (new->url.path) {
new->pathlen = strlen(new->url.path);
} else {
new->pathlen = 1;
new->url.path = "/";
}
return NULL;
}
static const char *add_cache_disable(cmd_parms *parms, void *dummy,
const char *url)
{
cache_server_conf *conf;
struct cache_disable *new;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
new = apr_array_push(conf->cachedisable);
if (apr_uri_parse(parms->pool, url, &(new->url))) {
return NULL;
}
if (new->url.path) {
new->pathlen = strlen(new->url.path);
} else {
new->pathlen = 1;
new->url.path = "/";
}
return NULL;
}
static const char *set_cache_maxex(cmd_parms *parms, void *dummy,
const char *arg)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->maxex = (apr_time_t) (atol(arg) * MSEC_ONE_SEC);
conf->maxex_set = 1;
return NULL;
}
static const char *set_cache_minex(cmd_parms *parms, void *dummy,
const char *arg)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->minex = (apr_time_t) (atol(arg) * MSEC_ONE_SEC);
conf->minex_set = 1;
return NULL;
}
static const char *set_cache_defex(cmd_parms *parms, void *dummy,
const char *arg)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->defex = (apr_time_t) (atol(arg) * MSEC_ONE_SEC);
conf->defex_set = 1;
return NULL;
}
static const char *set_cache_factor(cmd_parms *parms, void *dummy,
const char *arg)
{
cache_server_conf *conf;
double val;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
if (sscanf(arg, "%lg", &val) != 1) {
return "CacheLastModifiedFactor value must be a float";
}
conf->factor = val;
conf->factor_set = 1;
return NULL;
}
static const char *set_cache_ignore_querystring(cmd_parms *parms, void *dummy,
int flag)
{
cache_server_conf *conf;
conf =
(cache_server_conf *)ap_get_module_config(parms->server->module_config,
&cache_module);
conf->ignorequerystring = flag;
conf->ignorequerystring_set = 1;
return NULL;
}
static int cache_post_config(apr_pool_t *p, apr_pool_t *plog,
apr_pool_t *ptemp, server_rec *s)
{
/* This is the means by which unusual (non-unix) os's may find alternate
* means to run a given command (e.g. shebang/registry parsing on Win32)
*/
cache_generate_key = APR_RETRIEVE_OPTIONAL_FN(ap_cache_generate_key);
if (!cache_generate_key) {
cache_generate_key = cache_generate_key_default;
}
return OK;
}
static const command_rec cache_cmds[] =
{
/* XXX
* Consider a new config directive that enables loading specific cache
* implememtations (like mod_cache_mem, mod_cache_file, etc.).
* Rather than using a LoadModule directive, admin would use something
* like CacheModule mem_cache_module | file_cache_module, etc,
* which would cause the approprpriate cache module to be loaded.
* This is more intuitive that requiring a LoadModule directive.
*/
AP_INIT_TAKE2("CacheEnable", add_cache_enable, NULL, RSRC_CONF,
"A cache type and partial URL prefix below which "
"caching is enabled"),
AP_INIT_TAKE1("CacheDisable", add_cache_disable, NULL, RSRC_CONF,
"A partial URL prefix below which caching is disabled"),
AP_INIT_TAKE1("CacheMaxExpire", set_cache_maxex, NULL, RSRC_CONF,
"The maximum time in seconds to cache a document"),
AP_INIT_TAKE1("CacheMinExpire", set_cache_minex, NULL, RSRC_CONF,
"The minimum time in seconds to cache a document"),
AP_INIT_TAKE1("CacheDefaultExpire", set_cache_defex, NULL, RSRC_CONF,
"The default time in seconds to cache a document"),
AP_INIT_FLAG("CacheIgnoreNoLastMod", set_cache_ignore_no_last_mod, NULL,
RSRC_CONF,
"Ignore Responses where there is no Last Modified Header"),
AP_INIT_FLAG("CacheIgnoreCacheControl", set_cache_ignore_cachecontrol,
NULL, RSRC_CONF,
"Ignore requests from the client for uncached content"),
AP_INIT_FLAG("CacheStorePrivate", set_cache_store_private,
NULL, RSRC_CONF,
"Ignore 'Cache-Control: private' and store private content"),
AP_INIT_FLAG("CacheStoreNoStore", set_cache_store_nostore,
NULL, RSRC_CONF,
"Ignore 'Cache-Control: no-store' and store sensitive content"),
AP_INIT_ITERATE("CacheIgnoreHeaders", add_ignore_header, NULL, RSRC_CONF,
"A space separated list of headers that should not be "
"stored by the cache"),
AP_INIT_FLAG("CacheIgnoreQueryString", set_cache_ignore_querystring,
NULL, RSRC_CONF,
"Ignore query-string when caching"),
AP_INIT_TAKE1("CacheLastModifiedFactor", set_cache_factor, NULL, RSRC_CONF,
"The factor used to estimate Expires date from "
"LastModified date"),
{NULL}
};
static void register_hooks(apr_pool_t *p)
{
/* cache initializer */
/* cache handler */
ap_hook_quick_handler(cache_url_handler, NULL, NULL, APR_HOOK_FIRST);
/* cache filters
* XXX The cache filters need to run right after the handlers and before
* any other filters. Consider creating AP_FTYPE_CACHE for this purpose.
*
* Depending on the type of request (subrequest / main request) they
* need to be run before AP_FTYPE_CONTENT_SET / after AP_FTYPE_CONTENT_SET
* filters. Thus create two filter handles for each type:
* cache_save_filter_handle / cache_out_filter_handle to be used by
* main requests and
* cache_save_subreq_filter_handle / cache_out_subreq_filter_handle
* to be run by subrequest
*/
/*
* CACHE_SAVE must go into the filter chain after a possible DEFLATE
* filter to ensure that the compressed content is stored.
* Incrementing filter type by 1 ensures his happens.
*/
cache_save_filter_handle =
ap_register_output_filter("CACHE_SAVE",
cache_save_filter,
NULL,
AP_FTYPE_CONTENT_SET+1);
/*
* CACHE_SAVE_SUBREQ must go into the filter chain before SUBREQ_CORE to
* handle subrequsts. Decrementing filter type by 1 ensures this
* happens.
*/
cache_save_subreq_filter_handle =
ap_register_output_filter("CACHE_SAVE_SUBREQ",
cache_save_filter,
NULL,
AP_FTYPE_CONTENT_SET-1);
/*
* CACHE_OUT must go into the filter chain after a possible DEFLATE
* filter to ensure that already compressed cache objects do not
* get compressed again. Incrementing filter type by 1 ensures
* his happens.
*/
cache_out_filter_handle =
ap_register_output_filter("CACHE_OUT",
cache_out_filter,
NULL,
AP_FTYPE_CONTENT_SET+1);
/*
* CACHE_OUT_SUBREQ must go into the filter chain before SUBREQ_CORE to
* handle subrequsts. Decrementing filter type by 1 ensures this
* happens.
*/
cache_out_subreq_filter_handle =
ap_register_output_filter("CACHE_OUT_SUBREQ",
cache_out_filter,
NULL,
AP_FTYPE_CONTENT_SET-1);
/* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
* run even if the response is a canned error message, which
* removes the content filters.
*/
cache_remove_url_filter_handle =
ap_register_output_filter("CACHE_REMOVE_URL",
cache_remove_url_filter,
NULL,
AP_FTYPE_PROTOCOL);
ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
}
module AP_MODULE_DECLARE_DATA cache_module =
{
STANDARD20_MODULE_STUFF,
NULL, /* create per-directory config structure */
NULL, /* merge per-directory config structures */
create_cache_config, /* create per-server config structure */
merge_cache_config, /* merge per-server config structures */
cache_cmds, /* command apr_table_t */
register_hooks
};