blob: b366cb3250ea69e01ab20e694dbd956e9836605b [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* mod_substitute.c: Perform content rewriting on the fly
*/
#include "httpd.h"
#include "http_config.h"
#include "http_core.h"
#include "http_log.h"
#include "apr_general.h"
#include "apr_strings.h"
#include "apr_strmatch.h"
#include "apr_lib.h"
#include "util_filter.h"
#include "util_varbuf.h"
#include "apr_buckets.h"
#include "http_request.h"
#define APR_WANT_STRFUNC
#include "apr_want.h"
/*
* We want to limit the memory usage in a way that is predictable.
* Therefore we limit the resulting length of the line.
* This is the default value.
*/
#define AP_SUBST_MAX_LINE_LENGTH (1024*1024)
static const char substitute_filter_name[] = "SUBSTITUTE";
module AP_MODULE_DECLARE_DATA substitute_module;
typedef struct subst_pattern_t {
const apr_strmatch_pattern *pattern;
const ap_regex_t *regexp;
const char *replacement;
apr_size_t replen;
apr_size_t patlen;
int flatten;
} subst_pattern_t;
typedef struct {
apr_array_header_t *patterns;
apr_size_t max_line_length;
int max_line_length_set;
int inherit_before;
} subst_dir_conf;
typedef struct {
apr_bucket_brigade *linebb;
apr_bucket_brigade *linesbb;
apr_bucket_brigade *passbb;
apr_bucket_brigade *pattbb;
apr_pool_t *tpool;
} substitute_module_ctx;
static void *create_substitute_dcfg(apr_pool_t *p, char *d)
{
subst_dir_conf *dcfg =
(subst_dir_conf *) apr_palloc(p, sizeof(subst_dir_conf));
dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t));
dcfg->max_line_length = AP_SUBST_MAX_LINE_LENGTH;
dcfg->max_line_length_set = 0;
dcfg->inherit_before = -1;
return dcfg;
}
static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv)
{
subst_dir_conf *a =
(subst_dir_conf *) apr_palloc(p, sizeof(subst_dir_conf));
subst_dir_conf *base = (subst_dir_conf *) basev;
subst_dir_conf *over = (subst_dir_conf *) overv;
a->inherit_before = (over->inherit_before != -1)
? over->inherit_before
: base->inherit_before;
/* SubstituteInheritBefore wasn't the default behavior until 2.5.x,
* and may be re-disabled as desired; the original default behavior
* was to apply inherited subst patterns after locally scoped patterns.
* In later 2.2 and 2.4 versions, SubstituteInheritBefore may be toggled
* 'on' to follow the corrected/expected behavior, without violating POLS.
*/
if (a->inherit_before) {
a->patterns = apr_array_append(p, base->patterns,
over->patterns);
}
else {
a->patterns = apr_array_append(p, over->patterns,
base->patterns);
}
a->max_line_length = over->max_line_length_set ?
over->max_line_length : base->max_line_length;
a->max_line_length_set = over->max_line_length_set
| base->max_line_length_set;
return a;
}
#define AP_MAX_BUCKETS 1000
#define SEDRMPATBCKT(b, offset, tmp_b, patlen) do { \
apr_bucket_split(b, offset); \
tmp_b = APR_BUCKET_NEXT(b); \
apr_bucket_split(tmp_b, patlen); \
b = APR_BUCKET_NEXT(tmp_b); \
apr_bucket_delete(tmp_b); \
} while (0)
static apr_status_t do_pattmatch(ap_filter_t *f, apr_bucket *inb,
apr_bucket_brigade *mybb,
apr_pool_t *pool)
{
int i;
int force_quick = 0;
ap_regmatch_t regm[AP_MAX_REG_MATCH];
apr_size_t bytes;
apr_size_t len;
const char *buff;
struct ap_varbuf vb;
apr_bucket *b;
apr_bucket *tmp_b;
subst_dir_conf *cfg =
(subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
&substitute_module);
subst_pattern_t *script;
APR_BRIGADE_INSERT_TAIL(mybb, inb);
ap_varbuf_init(pool, &vb, 0);
script = (subst_pattern_t *) cfg->patterns->elts;
/*
* Simple optimization. If we only have one pattern, then
* we can safely avoid the overhead of flattening
*/
if (cfg->patterns->nelts == 1) {
force_quick = 1;
}
for (i = 0; i < cfg->patterns->nelts; i++) {
for (b = APR_BRIGADE_FIRST(mybb);
b != APR_BRIGADE_SENTINEL(mybb);
b = APR_BUCKET_NEXT(b)) {
if (APR_BUCKET_IS_METADATA(b)) {
/*
* we should NEVER see this, because we should never
* be passed any, but "handle" it just in case.
*/
continue;
}
if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ)
== APR_SUCCESS) {
int have_match = 0;
vb.strlen = 0;
if (script->pattern) {
const char *repl;
/*
* space_left counts how many bytes we have left until the
* line length reaches max_line_length.
*/
apr_size_t space_left = cfg->max_line_length;
apr_size_t repl_len = strlen(script->replacement);
while ((repl = apr_strmatch(script->pattern, buff, bytes)))
{
have_match = 1;
/* get offset into buff for pattern */
len = (apr_size_t) (repl - buff);
if (script->flatten && !force_quick) {
/*
* We are flattening the buckets here, meaning
* that we don't do the fast bucket splits.
* Instead we copy over what the buckets would
* contain and use them. This is slow, since we
* are constanting allocing space and copying
* strings.
*/
if (vb.strlen + len + repl_len > cfg->max_line_length)
return APR_ENOMEM;
ap_varbuf_strmemcat(&vb, buff, len);
ap_varbuf_strmemcat(&vb, script->replacement, repl_len);
}
else {
/*
* The string before the match but after the
* previous match (if any) has length 'len'.
* Check if we still have space for this string and
* the replacement string.
*/
if (space_left < len + repl_len)
return APR_ENOMEM;
space_left -= len + repl_len;
/*
* We now split off the string before the match
* as its own bucket, then isolate the matched
* string and delete it.
*/
SEDRMPATBCKT(b, len, tmp_b, script->patlen);
/*
* Finally, we create a bucket that contains the
* replacement...
*/
tmp_b = apr_bucket_transient_create(script->replacement,
script->replen,
f->r->connection->bucket_alloc);
/* ... and insert it */
APR_BUCKET_INSERT_BEFORE(b, tmp_b);
}
/* now we need to adjust buff for all these changes */
len += script->patlen;
bytes -= len;
buff += len;
}
if (have_match) {
if (script->flatten && !force_quick) {
/* XXX: we should check for AP_MAX_BUCKETS here and
* XXX: call ap_pass_brigade accordingly
*/
char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0,
buff, bytes, &len);
tmp_b = apr_bucket_pool_create(copy, len, pool,
f->r->connection->bucket_alloc);
APR_BUCKET_INSERT_BEFORE(b, tmp_b);
apr_bucket_delete(b);
b = tmp_b;
}
else {
/*
* We want the behaviour to be predictable.
* Therefore we try to always error out if the
* line length is larger than the limit,
* regardless of the content of the line. So,
* let's check if the remaining non-matching
* string does not exceed the limit.
*/
if (space_left < b->length)
return APR_ENOMEM;
}
}
}
else if (script->regexp) {
int left = bytes;
const char *pos = buff;
char *repl;
apr_size_t space_left = cfg->max_line_length;
while (!ap_regexec_len(script->regexp, pos, left,
AP_MAX_REG_MATCH, regm, 0)) {
apr_status_t rv;
have_match = 1;
if (script->flatten && !force_quick) {
/* check remaining buffer size */
/* Note that the last param in ap_varbuf_regsub below
* must stay positive. If it gets 0, it would mean
* unlimited space available. */
if (vb.strlen + regm[0].rm_so >= cfg->max_line_length)
return APR_ENOMEM;
/* copy bytes before the match */
if (regm[0].rm_so > 0)
ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so);
/* add replacement string, last argument is unsigned! */
rv = ap_varbuf_regsub(&vb, script->replacement, pos,
AP_MAX_REG_MATCH, regm,
cfg->max_line_length - vb.strlen);
if (rv != APR_SUCCESS)
return rv;
}
else {
apr_size_t repl_len;
/* acount for string before the match */
if (space_left <= regm[0].rm_so)
return APR_ENOMEM;
space_left -= regm[0].rm_so;
rv = ap_pregsub_ex(pool, &repl,
script->replacement, pos,
AP_MAX_REG_MATCH, regm,
space_left);
if (rv != APR_SUCCESS)
return rv;
repl_len = strlen(repl);
space_left -= repl_len;
len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so);
SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
tmp_b = apr_bucket_transient_create(repl, repl_len,
f->r->connection->bucket_alloc);
APR_BUCKET_INSERT_BEFORE(b, tmp_b);
}
/*
* reset to past what we just did. pos now maps to b
* again
*/
pos += regm[0].rm_eo;
left -= regm[0].rm_eo;
}
if (have_match && script->flatten && !force_quick) {
char *copy;
/* Copy result plus the part after the last match into
* a bucket.
*/
copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left,
&len);
tmp_b = apr_bucket_pool_create(copy, len, pool,
f->r->connection->bucket_alloc);
APR_BUCKET_INSERT_BEFORE(b, tmp_b);
apr_bucket_delete(b);
b = tmp_b;
}
}
else {
ap_assert(0);
continue;
}
}
}
script++;
}
ap_varbuf_free(&vb);
return APR_SUCCESS;
}
static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
apr_size_t bytes;
apr_size_t len;
apr_size_t fbytes;
const char *buff;
const char *nl = NULL;
char *bflat;
apr_bucket *b;
apr_bucket *tmp_b;
apr_bucket_brigade *tmp_bb = NULL;
apr_status_t rv;
subst_dir_conf *cfg =
(subst_dir_conf *) ap_get_module_config(f->r->per_dir_config,
&substitute_module);
substitute_module_ctx *ctx = f->ctx;
/*
* First time around? Create the saved bb that we used for each pass
* through. Note that we can also get here when we explicitly clear ctx,
* for error handling
*/
if (!ctx) {
f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx));
/*
* Create all the temporary brigades we need and reuse them to avoid
* creating them over and over again from r->pool which would cost a
* lot of memory in some cases.
*/
ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
/*
* Everything to be passed to the next filter goes in
* here, our pass brigade.
*/
ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc);
/* Create our temporary pool only once */
apr_pool_create(&(ctx->tpool), f->r->pool);
apr_table_unset(f->r->headers_out, "Content-Length");
}
/*
* Shortcircuit processing
*/
if (APR_BRIGADE_EMPTY(bb))
return APR_SUCCESS;
/*
* Here's the concept:
* Read in the data and look for newlines. Once we
* find a full "line", add it to our working brigade.
* If we've finished reading the brigade and we have
* any left over data (not a "full" line), store that
* for the next pass.
*
* Note: anything stored in ctx->linebb for sure does not have
* a newline char, so we don't concat that bb with the
* new bb, since we would spending time searching for the newline
* in data we know it doesn't exist. So instead, we simply scan
* our current bb and, if we see a newline, prepend ctx->linebb
* to the front of it. This makes the code much less straight-
* forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb)
* and just scan for newlines and not bother with needing to know
* when ctx->linebb needs to be reset) but also faster. We'll take
* the speed.
*
* Note: apr_brigade_split_line would be nice here, but we
* really can't use it since we need more control and we want
* to re-use already read bucket data.
*
* See mod_include if still confused :)
*/
while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) {
if (APR_BUCKET_IS_EOS(b)) {
/*
* if we see the EOS, then we need to pass along everything we
* have. But if the ctx->linebb isn't empty, then we need to add
* that to the end of what we'll be passing.
*/
if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
rv = apr_brigade_pflatten(ctx->linebb, &bflat,
&fbytes, ctx->tpool);
if (rv != APR_SUCCESS)
goto err;
if (fbytes > cfg->max_line_length) {
rv = APR_ENOMEM;
goto err;
}
tmp_b = apr_bucket_transient_create(bflat, fbytes,
f->r->connection->bucket_alloc);
rv = do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool);
if (rv != APR_SUCCESS)
goto err;
APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
apr_brigade_cleanup(ctx->linebb);
}
APR_BUCKET_REMOVE(b);
APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
ap_remove_output_filter(f);
}
/*
* No need to handle FLUSH buckets separately as we call
* ap_pass_brigade anyway at the end of the loop.
*/
else if (APR_BUCKET_IS_METADATA(b)) {
APR_BUCKET_REMOVE(b);
APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
}
else {
/*
* We have actual "data" so read in as much as we can and start
* scanning and splitting from our read buffer
*/
rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ);
if (rv != APR_SUCCESS || bytes == 0) {
apr_bucket_delete(b);
}
else {
int num = 0;
while (bytes > 0) {
nl = memchr(buff, APR_ASCII_LF, bytes);
if (nl) {
len = (apr_size_t) (nl - buff) + 1;
/* split *after* the newline */
apr_bucket_split(b, len);
/*
* We've likely read more data, so bypass rereading
* bucket data and continue scanning through this
* buffer
*/
bytes -= len;
buff += len;
/*
* we need b to be updated for future potential
* splitting
*/
tmp_b = APR_BUCKET_NEXT(b);
APR_BUCKET_REMOVE(b);
/*
* Hey, we found a newline! Don't forget the old
* stuff that needs to be added to the front. So we
* add the split bucket to the end, flatten the whole
* bb, morph the whole shebang into a bucket which is
* then added to the tail of the newline bb.
*/
if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
rv = apr_brigade_pflatten(ctx->linebb, &bflat,
&fbytes, ctx->tpool);
if (rv != APR_SUCCESS)
goto err;
if (fbytes > cfg->max_line_length) {
/* Avoid pflattening further lines, we will
* abort later on anyway.
*/
rv = APR_ENOMEM;
goto err;
}
b = apr_bucket_transient_create(bflat, fbytes,
f->r->connection->bucket_alloc);
apr_brigade_cleanup(ctx->linebb);
}
rv = do_pattmatch(f, b, ctx->pattbb, ctx->tpool);
if (rv != APR_SUCCESS)
goto err;
/*
* Count how many buckets we have in ctx->passbb
* so far. Yes, this is correct we count ctx->passbb
* and not ctx->pattbb as we do not reset num on every
* iteration.
*/
for (b = APR_BRIGADE_FIRST(ctx->pattbb);
b != APR_BRIGADE_SENTINEL(ctx->pattbb);
b = APR_BUCKET_NEXT(b)) {
num++;
}
APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb);
/*
* If the number of buckets in ctx->passbb reaches an
* "insane" level, we consume much memory for all the
* buckets as such. So lets flush them down the chain
* in this case and thus clear ctx->passbb. This frees
* the buckets memory for further processing.
* Usually this condition should not become true, but
* it is a safety measure for edge cases.
*/
if (num > AP_MAX_BUCKETS) {
b = apr_bucket_flush_create(
f->r->connection->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(ctx->passbb, b);
rv = ap_pass_brigade(f->next, ctx->passbb);
apr_brigade_cleanup(ctx->passbb);
num = 0;
apr_pool_clear(ctx->tpool);
if (rv != APR_SUCCESS)
goto err;
}
b = tmp_b;
}
else {
/*
* no newline in whatever is left of this buffer so
* tuck data away and get next bucket
*/
APR_BUCKET_REMOVE(b);
APR_BRIGADE_INSERT_TAIL(ctx->linebb, b);
bytes = 0;
}
}
}
}
if (!APR_BRIGADE_EMPTY(ctx->passbb)) {
rv = ap_pass_brigade(f->next, ctx->passbb);
apr_brigade_cleanup(ctx->passbb);
if (rv != APR_SUCCESS)
goto err;
}
apr_pool_clear(ctx->tpool);
}
/* Anything left we want to save/setaside for the next go-around */
if (!APR_BRIGADE_EMPTY(ctx->linebb)) {
/*
* Provide ap_save_brigade with an existing empty brigade
* (ctx->linesbb) to avoid creating a new one.
*/
ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool);
tmp_bb = ctx->linebb;
ctx->linebb = ctx->linesbb;
ctx->linesbb = tmp_bb;
}
return APR_SUCCESS;
err:
if (rv == APR_ENOMEM)
ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01328) "Line too long, URI %s",
f->r->uri);
apr_pool_clear(ctx->tpool);
return rv;
}
static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line)
{
char *from = NULL;
char *to = NULL;
char *flags = NULL;
char *ourline;
char delim;
subst_pattern_t *nscript;
int is_pattern = 0;
int ignore_case = 0;
int flatten = 1;
ap_regex_t *r = NULL;
if (apr_tolower(*line) != 's') {
return "Bad Substitute format, must be an s/// pattern";
}
ourline = apr_pstrdup(cmd->pool, line);
delim = *++ourline;
if (delim)
from = ++ourline;
if (from) {
if (*ourline != delim) {
while (*++ourline && *ourline != delim);
}
if (*ourline) {
*ourline = '\0';
to = ++ourline;
}
}
if (to) {
if (*ourline != delim) {
while (*++ourline && *ourline != delim);
}
if (*ourline) {
*ourline = '\0';
flags = ++ourline;
}
}
if (!delim || !from || !*from || !to) {
return "Bad Substitute format, must be a complete s/// pattern";
}
if (flags) {
while (*flags) {
delim = apr_tolower(*flags); /* re-use */
if (delim == 'i')
ignore_case = 1;
else if (delim == 'n')
is_pattern = 1;
else if (delim == 'f')
flatten = 1;
else if (delim == 'q')
flatten = 0;
else
return "Bad Substitute flag, only s///[infq] are supported";
flags++;
}
}
/* first see if we can compile the regex */
if (!is_pattern) {
r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED |
(ignore_case ? AP_REG_ICASE : 0));
if (!r)
return "Substitute could not compile regex";
}
nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns);
/* init the new entries */
nscript->pattern = NULL;
nscript->regexp = NULL;
nscript->replacement = NULL;
nscript->patlen = 0;
if (is_pattern) {
nscript->patlen = strlen(from);
nscript->pattern = apr_strmatch_precompile(cmd->pool, from,
!ignore_case);
}
else {
nscript->regexp = r;
}
nscript->replacement = to;
nscript->replen = strlen(to);
nscript->flatten = flatten;
return NULL;
}
#define KBYTE 1024
#define MBYTE 1048576
#define GBYTE 1073741824
static const char *set_max_line_length(cmd_parms *cmd, void *cfg, const char *arg)
{
subst_dir_conf *dcfg = (subst_dir_conf *)cfg;
apr_off_t max;
char *end;
apr_status_t rv;
rv = apr_strtoff(&max, arg, &end, 10);
if (rv == APR_SUCCESS) {
if ((*end == 'K' || *end == 'k') && !end[1]) {
max *= KBYTE;
}
else if ((*end == 'M' || *end == 'm') && !end[1]) {
max *= MBYTE;
}
else if ((*end == 'G' || *end == 'g') && !end[1]) {
max *= GBYTE;
}
else if (*end && /* neither empty nor [Bb] */
((*end != 'B' && *end != 'b') || end[1])) {
rv = APR_EGENERAL;
}
}
if (rv != APR_SUCCESS || max < 0)
{
return "SubstituteMaxLineLength must be a non-negative integer optionally "
"suffixed with 'b', 'k', 'm' or 'g'.";
}
dcfg->max_line_length = (apr_size_t)max;
dcfg->max_line_length_set = 1;
return NULL;
}
#define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH
static void register_hooks(apr_pool_t *pool)
{
ap_register_output_filter(substitute_filter_name, substitute_filter,
NULL, AP_FTYPE_RESOURCE);
}
static const command_rec substitute_cmds[] = {
AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_FILEINFO,
"Pattern to filter the response content (s/foo/bar/[inf])"),
AP_INIT_TAKE1("SubstituteMaxLineLength", set_max_line_length, NULL, OR_FILEINFO,
"Maximum line length"),
AP_INIT_FLAG("SubstituteInheritBefore", ap_set_flag_slot,
(void *)APR_OFFSETOF(subst_dir_conf, inherit_before), OR_FILEINFO,
"Apply inherited patterns before those of the current context"),
{NULL}
};
AP_DECLARE_MODULE(substitute) = {
STANDARD20_MODULE_STUFF,
create_substitute_dcfg, /* dir config creater */
merge_substitute_dcfg, /* dir merger --- default is to override */
NULL, /* server config */
NULL, /* merge server config */
substitute_cmds, /* command table */
register_hooks /* register hooks */
};