blob: c154c8ee4af695ddbdc35b9b4e77086b71c67c20 [file] [log] [blame]
/* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* simple hokey charset recoding configuration module
*
* See mod_ebcdic and mod_charset for more thought-out examples. This
* one is just so Jeff can learn how a module works and experiment with
* basic character set recoding configuration.
*
* !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
*/
#include "httpd.h"
#include "http_config.h"
#include "http_core.h"
#include "http_log.h"
#include "http_main.h"
#include "http_protocol.h"
#include "http_request.h"
#include "util_charset.h"
#include "apr_buckets.h"
#include "util_filter.h"
#include "apr_strings.h"
#include "apr_lib.h"
#include "apr_xlate.h"
#define APR_WANT_STRFUNC
#include "apr_want.h"
#define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
#define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */
#define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much
* space left in the translation buffer
*/
#define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle
* two buckets
*/
/* extended error status codes; this is used in addition to an apr_status_t to
* track errors in the translation filter
*/
typedef enum {
EES_INIT = 0, /* no error info yet; value must be 0 for easy init */
EES_LIMIT, /* built-in restriction encountered */
EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
EES_BUCKET_READ,
EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
EES_BAD_INPUT /* input data invalid */
} ees_t;
/* registered name of the output translation filter */
#define XLATEOUT_FILTER_NAME "XLATEOUT"
/* registered name of input translation filter */
#define XLATEIN_FILTER_NAME "XLATEIN"
typedef struct charset_dir_t {
const char *charset_source; /* source encoding */
const char *charset_default; /* how to ship on wire */
/** module does ap_add_*_filter()? */
enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
/** treat all mimetypes as text? */
enum {FX_INIT, FX_FORCE, FX_NOFORCE} force_xlate;
} charset_dir_t;
/* charset_filter_ctx_t is created for each filter instance; because the same
* filter code is used for translating in both directions, we need this context
* data to tell the filter which translation handle to use; it also can hold a
* character which was split between buckets
*/
typedef struct charset_filter_ctx_t {
apr_xlate_t *xlate;
int is_sb; /* single-byte translation? */
charset_dir_t *dc;
ees_t ees; /* extended error status */
apr_size_t saved;
char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
int ran; /* has filter instance run before? */
int noop; /* should we pass brigades through unchanged? */
char *tmp; /* buffer for input filtering */
apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
apr_bucket_brigade *tmpbb; /* used for passing downstream */
} charset_filter_ctx_t;
/* charset_req_t is available via r->request_config if any translation is
* being performed
*/
typedef struct charset_req_t {
charset_dir_t *dc;
charset_filter_ctx_t *output_ctx, *input_ctx;
} charset_req_t;
module AP_MODULE_DECLARE_DATA charset_lite_module;
static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
{
charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t));
return dc;
}
static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
{
charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t));
charset_dir_t *base = (charset_dir_t *)basev,
*over = (charset_dir_t *)overridesv;
/* If it is defined in the current container, use it. Otherwise, use the one
* from the enclosing container.
*/
a->charset_default =
over->charset_default ? over->charset_default : base->charset_default;
a->charset_source =
over->charset_source ? over->charset_source : base->charset_source;
a->implicit_add =
over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
a->force_xlate=
over->force_xlate != FX_INIT ? over->force_xlate : base->force_xlate;
return a;
}
/* CharsetSourceEnc charset
*/
static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
const char *name)
{
charset_dir_t *dc = in_dc;
dc->charset_source = name;
return NULL;
}
/* CharsetDefault charset
*/
static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
const char *name)
{
charset_dir_t *dc = in_dc;
dc->charset_default = name;
return NULL;
}
/* CharsetOptions optionflag...
*/
static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
const char *flag)
{
charset_dir_t *dc = in_dc;
if (!strcasecmp(flag, "ImplicitAdd")) {
dc->implicit_add = IA_IMPADD;
}
else if (!strcasecmp(flag, "NoImplicitAdd")) {
dc->implicit_add = IA_NOIMPADD;
}
else if (!strcasecmp(flag, "TranslateAllMimeTypes")) {
dc->force_xlate = FX_FORCE;
}
else if (!strcasecmp(flag, "NoTranslateAllMimeTypes")) {
dc->force_xlate = FX_NOFORCE;
}
else {
return apr_pstrcat(cmd->temp_pool,
"Invalid CharsetOptions option: ",
flag,
NULL);
}
return NULL;
}
/* find_code_page() is a fixup hook that checks if the module is
* configured and the input or output potentially need to be translated.
* If so, context is initialized for the filters.
*/
static int find_code_page(request_rec *r)
{
charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
&charset_lite_module);
charset_req_t *reqinfo;
charset_filter_ctx_t *input_ctx, *output_ctx;
apr_status_t rv;
ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
"uri: %s file: %s method: %d "
"imt: %s flags: %s%s%s %s->%s",
r->uri,
r->filename ? r->filename : "(none)",
r->method_number,
r->content_type ? r->content_type : "(unknown)",
r->main ? "S" : "", /* S if subrequest */
r->prev ? "R" : "", /* R if redirect */
r->proxyreq ? "P" : "", /* P if proxy */
dc->charset_source, dc->charset_default);
/* If we don't have a full directory configuration, bail out.
*/
if (!dc->charset_source || !dc->charset_default) {
ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01448)
"incomplete configuration: src %s, dst %s",
dc->charset_source ? dc->charset_source : "unspecified",
dc->charset_default ? dc->charset_default : "unspecified");
return DECLINED;
}
/* catch proxy requests */
if (r->proxyreq) {
return DECLINED;
}
/* mod_rewrite indicators */
if (r->filename
&& (!strncmp(r->filename, "redirect:", 9)
|| !strncmp(r->filename, "gone:", 5)
|| !strncmp(r->filename, "passthrough:", 12)
|| !strncmp(r->filename, "forbidden:", 10))) {
return DECLINED;
}
/* no translation when server and network charsets are set to the same value */
if (!strcasecmp(dc->charset_source, dc->charset_default)) {
return DECLINED;
}
/* Get storage for the request data and the output filter context.
* We rarely need the input filter context, so allocate that separately.
*/
reqinfo = (charset_req_t *)apr_pcalloc(r->pool,
sizeof(charset_req_t) +
sizeof(charset_filter_ctx_t));
output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);
reqinfo->dc = dc;
output_ctx->dc = dc;
output_ctx->tmpbb = apr_brigade_create(r->pool,
r->connection->bucket_alloc);
ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);
reqinfo->output_ctx = output_ctx;
switch (r->method_number) {
case M_PUT:
case M_POST:
/* Set up input translation. Note: A request body can be included
* with the OPTIONS method, but for now we don't set up translation
* of it.
*/
input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
input_ctx->bb = apr_brigade_create(r->pool,
r->connection->bucket_alloc);
input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
input_ctx->dc = dc;
reqinfo->input_ctx = input_ctx;
rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
dc->charset_default, r->pool);
if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01449)
"can't open translation %s->%s",
dc->charset_default, dc->charset_source);
return HTTP_INTERNAL_SERVER_ERROR;
}
if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) {
input_ctx->is_sb = 0;
}
}
return DECLINED;
}
static int configured_in_list(request_rec *r, const char *filter_name,
struct ap_filter_t *filter_list)
{
struct ap_filter_t *filter = filter_list;
while (filter) {
if (!strcasecmp(filter_name, filter->frec->name)) {
return 1;
}
filter = filter->next;
}
return 0;
}
static int configured_on_input(request_rec *r, const char *filter_name)
{
return configured_in_list(r, filter_name, r->input_filters);
}
static int configured_on_output(request_rec *r, const char *filter_name)
{
return configured_in_list(r, filter_name, r->output_filters);
}
/* xlate_insert_filter() is a filter hook which decides whether or not
* to insert a translation filter for the current request.
*/
static void xlate_insert_filter(request_rec *r)
{
/* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
charset_req_t *reqinfo = ap_get_module_config(r->request_config,
&charset_lite_module);
charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
&charset_lite_module);
if (dc && (dc->implicit_add == IA_NOIMPADD)) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, r,
"xlate output filter not added implicitly because "
"CharsetOptions included 'NoImplicitAdd'");
return;
}
if (reqinfo) {
if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
r->connection);
}
ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
"xlate output filter not added implicitly because %s",
!reqinfo->output_ctx ?
"no output configuration available" :
"another module added the filter");
if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r,
r->connection);
}
ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r,
"xlate input filter not added implicitly because %s",
!reqinfo->input_ctx ?
"no input configuration available" :
"another module added the filter");
}
}
/* stuff that sucks that I know of:
*
* bucket handling:
* why create an eos bucket when we see it come down the stream? just send the one
* passed as input... news flash: this will be fixed when xlate_out_filter() starts
* using the more generic xlate_brigade()
*
* translation mechanics:
* we don't handle characters that straddle more than two buckets; an error
* will be generated
*/
static apr_status_t send_bucket_downstream(ap_filter_t *f, apr_bucket *b)
{
charset_filter_ctx_t *ctx = f->ctx;
apr_status_t rv;
APR_BRIGADE_INSERT_TAIL(ctx->tmpbb, b);
rv = ap_pass_brigade(f->next, ctx->tmpbb);
if (rv != APR_SUCCESS) {
ctx->ees = EES_DOWNSTREAM;
}
apr_brigade_cleanup(ctx->tmpbb);
return rv;
}
/* send_downstream() is passed the translated data; it puts it in a single-
* bucket brigade and passes the brigade to the next filter
*/
static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
{
request_rec *r = f->r;
conn_rec *c = r->connection;
apr_bucket *b;
b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
return send_bucket_downstream(f, b);
}
static apr_status_t send_eos(ap_filter_t *f)
{
request_rec *r = f->r;
conn_rec *c = r->connection;
apr_bucket_brigade *bb;
apr_bucket *b;
charset_filter_ctx_t *ctx = f->ctx;
apr_status_t rv;
bb = apr_brigade_create(r->pool, c->bucket_alloc);
b = apr_bucket_eos_create(c->bucket_alloc);
APR_BRIGADE_INSERT_TAIL(bb, b);
rv = ap_pass_brigade(f->next, bb);
if (rv != APR_SUCCESS) {
ctx->ees = EES_DOWNSTREAM;
}
return rv;
}
static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx,
const char *partial,
apr_size_t partial_len)
{
apr_status_t rv;
if (sizeof(ctx->buf) > partial_len) {
ctx->saved = partial_len;
memcpy(ctx->buf, partial, partial_len);
rv = APR_SUCCESS;
}
else {
rv = APR_INCOMPLETE;
ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
* buckets
*/
}
return rv;
}
static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx,
/* input buffer: */
const char **cur_str,
apr_size_t *cur_len,
/* output buffer: */
char **out_str,
apr_size_t *out_len)
{
apr_status_t rv;
apr_size_t tmp_input_len;
/* Keep adding bytes from the input string to the saved string until we
* 1) finish the input char
* 2) get an error
* or 3) run out of bytes to add
*/
do {
ctx->buf[ctx->saved] = **cur_str;
++ctx->saved;
++*cur_str;
--*cur_len;
tmp_input_len = ctx->saved;
rv = apr_xlate_conv_buffer(ctx->xlate,
ctx->buf,
&tmp_input_len,
*out_str,
out_len);
} while (rv == APR_INCOMPLETE && *cur_len);
if (rv == APR_SUCCESS) {
ctx->saved = 0;
}
else {
ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
* straddling more than two buckets
*/
}
return rv;
}
static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
{
charset_filter_ctx_t *ctx = f->ctx;
const char *msg;
char msgbuf[100];
apr_size_t len;
switch(ctx->ees) {
case EES_LIMIT:
rv = 0;
msg = APLOGNO(02193) "xlate filter - a built-in restriction was encountered";
break;
case EES_BAD_INPUT:
rv = 0;
msg = APLOGNO(02194) "xlate filter - an input character was invalid";
break;
case EES_BUCKET_READ:
rv = 0;
msg = APLOGNO(02195) "xlate filter - bucket read routine failed";
break;
case EES_INCOMPLETE_CHAR:
rv = 0;
strcpy(msgbuf, APLOGNO(02196) "xlate filter - incomplete char at end of input - ");
len = ctx->saved;
/* We must ensure not to process more than what would fit in the
* remaining of the destination buffer, including terminating NULL */
if (len > (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2)
len = (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2;
ap_bin2hex(ctx->buf, len, msgbuf + strlen(msgbuf));
msg = msgbuf;
break;
case EES_DOWNSTREAM:
msg = APLOGNO(02197) "xlate filter - an error occurred in a lower filter";
break;
default:
msg = APLOGNO(02198) "xlate filter - returning error";
}
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(02997) "%s", msg);
}
/* chk_filter_chain() is called once per filter instance; it tries to
* determine if the current filter instance should be disabled because
* its translation is incompatible with the translation of an existing
* instance of the translate filter
*
* Example bad scenario:
*
* configured filter chain for the request:
* INCLUDES XLATEOUT(8859-1->UTS-16)
* configured filter chain for the subrequest:
* XLATEOUT(8859-1->UTS-16)
*
* When the subrequest is processed, the filter chain will be
* XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
* This makes no sense, so the instance of XLATEOUT added for the
* subrequest will be noop-ed.
*
* Example good scenario:
*
* configured filter chain for the request:
* INCLUDES XLATEOUT(8859-1->UTS-16)
* configured filter chain for the subrequest:
* XLATEOUT(IBM-1047->8859-1)
*
* When the subrequest is processed, the filter chain will be
* XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
* This makes sense, so the instance of XLATEOUT added for the
* subrequest will be left alone and it will translate from
* IBM-1047->8859-1.
*/
static void chk_filter_chain(ap_filter_t *f)
{
ap_filter_t *curf;
charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL,
*ctx = f->ctx;
int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);
if (ctx->noop) {
return;
}
/* walk the filter chain; see if it makes sense for our filter to
* do any translation
*/
curf = output ? f->r->output_filters : f->r->input_filters;
while (curf) {
if (!strcasecmp(curf->frec->name, f->frec->name) &&
curf->ctx) {
curctx = (charset_filter_ctx_t *)curf->ctx;
if (!last_xlate_ctx) {
last_xlate_ctx = curctx;
}
else {
if (strcmp(last_xlate_ctx->dc->charset_default,
curctx->dc->charset_source)) {
/* incompatible translation
* if our filter instance is incompatible with an instance
* already in place, noop our instance
* Notes:
* . We are only willing to noop our own instance.
* . It is possible to noop another instance which has not
* yet run, but this is not currently implemented.
* Hopefully it will not be needed.
* . It is not possible to noop an instance which has
* already run.
*/
if (last_xlate_ctx == f->ctx) {
last_xlate_ctx->noop = 1;
if (APLOGrtrace1(f->r)) {
const char *symbol = output ? "->" : "<-";
ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
0, f->r, APLOGNO(01451)
"%s %s - disabling "
"translation %s%s%s; existing "
"translation %s%s%s",
f->r->uri ? "uri" : "file",
f->r->uri ? f->r->uri : f->r->filename,
last_xlate_ctx->dc->charset_source,
symbol,
last_xlate_ctx->dc->charset_default,
curctx->dc->charset_source,
symbol,
curctx->dc->charset_default);
}
}
else {
const char *symbol = output ? "->" : "<-";
ap_log_rerror(APLOG_MARK, APLOG_ERR,
0, f->r, APLOGNO(01452)
"chk_filter_chain() - can't disable "
"translation %s%s%s; existing "
"translation %s%s%s",
last_xlate_ctx->dc->charset_source,
symbol,
last_xlate_ctx->dc->charset_default,
curctx->dc->charset_source,
symbol,
curctx->dc->charset_default);
}
break;
}
}
}
curf = curf->next;
}
}
/* xlate_brigade() is used to filter request and response bodies
*
* we'll stop when one of the following occurs:
* . we run out of buckets
* . we run out of space in the output buffer
* . we hit an error or metadata
*
* inputs:
* bb: brigade to process
* buffer: storage to hold the translated characters
* buffer_avail: size of buffer
* (and a few more uninteresting parms)
*
* outputs:
* return value: APR_SUCCESS or some error code
* bb: we've removed any buckets representing the
* translated characters; the eos bucket, if
* present, will be left in the brigade
* buffer: filled in with translated characters
* buffer_avail: updated with the bytes remaining
* hit_eos: did we hit an EOS bucket?
*/
static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx,
apr_bucket_brigade *bb,
char *buffer,
apr_size_t *buffer_avail,
int *hit_eos)
{
apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
apr_bucket *consumed_bucket;
const char *bucket;
apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
apr_size_t bucket_avail; /* bytes left in current bucket */
apr_status_t rv = APR_SUCCESS;
*hit_eos = 0;
bucket_avail = 0;
consumed_bucket = NULL;
while (1) {
if (!bucket_avail) { /* no bytes left to process in the current bucket... */
if (consumed_bucket) {
apr_bucket_delete(consumed_bucket);
consumed_bucket = NULL;
}
b = APR_BRIGADE_FIRST(bb);
if (b == APR_BRIGADE_SENTINEL(bb) ||
APR_BUCKET_IS_METADATA(b)) {
break;
}
rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ);
if (rv != APR_SUCCESS) {
ctx->ees = EES_BUCKET_READ;
break;
}
bucket_avail = bytes_in_bucket;
consumed_bucket = b; /* for axing when we're done reading it */
}
if (bucket_avail) {
/* We've got data, so translate it. */
if (ctx->saved) {
/* Rats... we need to finish a partial character from the previous
* bucket.
*
* Strangely, finish_partial_char() increments the input buffer
* pointer but does not increment the output buffer pointer.
*/
apr_size_t old_buffer_avail = *buffer_avail;
rv = finish_partial_char(ctx,
&bucket, &bucket_avail,
&buffer, buffer_avail);
buffer += old_buffer_avail - *buffer_avail;
}
else {
apr_size_t old_buffer_avail = *buffer_avail;
apr_size_t old_bucket_avail = bucket_avail;
rv = apr_xlate_conv_buffer(ctx->xlate,
bucket, &bucket_avail,
buffer,
buffer_avail);
buffer += old_buffer_avail - *buffer_avail;
bucket += old_bucket_avail - bucket_avail;
if (rv == APR_INCOMPLETE) { /* partial character at end of input */
/* We need to save the final byte(s) for next time; we can't
* convert it until we look at the next bucket.
*/
rv = set_aside_partial_char(ctx, bucket, bucket_avail);
bucket_avail = 0;
}
}
if (rv != APR_SUCCESS) {
/* bad input byte or partial char too big to store */
break;
}
if (*buffer_avail < XLATE_MIN_BUFF_LEFT) {
/* if any data remains in the current bucket, split there */
if (bucket_avail) {
apr_bucket_split(b, bytes_in_bucket - bucket_avail);
}
apr_bucket_delete(b);
break;
}
}
}
if (!APR_BRIGADE_EMPTY(bb)) {
b = APR_BRIGADE_FIRST(bb);
if (APR_BUCKET_IS_EOS(b)) {
/* Leave the eos bucket in the brigade for reporting to
* subsequent filters.
*/
*hit_eos = 1;
if (ctx->saved) {
/* Oops... we have a partial char from the previous bucket
* that won't be completed because there's no more data.
*/
rv = APR_INCOMPLETE;
ctx->ees = EES_INCOMPLETE_CHAR;
}
}
}
return rv;
}
/* xlate_out_filter() handles (almost) arbitrary conversions from one charset
* to another...
* translation is determined in the fixup hook (find_code_page), which is
* where the filter's context data is set up... the context data gives us
* the translation handle
*/
static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
{
charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
&charset_lite_module);
charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
&charset_lite_module);
charset_filter_ctx_t *ctx = f->ctx;
apr_bucket *dptr, *consumed_bucket;
const char *cur_str;
apr_size_t cur_len, cur_avail;
char tmp[OUTPUT_XLATE_BUF_SIZE];
apr_size_t space_avail;
int done;
apr_status_t rv = APR_SUCCESS;
if (!ctx) {
/* this is SetOutputFilter path; grab the preallocated context,
* if any; note that if we decided not to do anything in an earlier
* handler, we won't even have a reqinfo
*/
if (reqinfo) {
ctx = f->ctx = reqinfo->output_ctx;
reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
* in the filter chain; we can't have two
* instances using the same context
*/
}
if (!ctx) { /* no idea how to translate; don't do anything */
ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
ctx->dc = dc;
ctx->noop = 1;
}
}
/* Check the mime type to see if translation should be performed.
*/
if (!ctx->noop && ctx->xlate == NULL) {
const char *mime_type = f->r->content_type;
if (mime_type && (ap_cstr_casecmpn(mime_type, "text/", 5) == 0 ||
#if APR_CHARSET_EBCDIC
/* On an EBCDIC machine, be willing to translate mod_autoindex-
* generated output. Otherwise, it doesn't look too cool.
*
* XXX This isn't a perfect fix because this doesn't trigger us
* to convert from the charset of the source code to ASCII. The
* general solution seems to be to allow a generator to set an
* indicator in the r specifying that the body is coded in the
* implementation character set (i.e., the charset of the source
* code). This would get several different types of documents
* translated properly: mod_autoindex output, mod_status output,
* mod_info output, hard-coded error documents, etc.
*/
strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
#endif
ap_cstr_casecmpn(mime_type, "message/", 8) == 0 ||
dc->force_xlate == FX_FORCE)) {
rv = apr_xlate_open(&ctx->xlate,
dc->charset_default, dc->charset_source, f->r->pool);
if (rv != APR_SUCCESS) {
ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01453)
"can't open translation %s->%s",
dc->charset_source, dc->charset_default);
ctx->noop = 1;
}
else {
if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) {
ctx->is_sb = 0;
}
}
}
else {
ctx->noop = 1;
if (mime_type) {
ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
"mime type is %s; no translation selected",
mime_type);
}
}
}
ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
"xlate_out_filter() - "
"charset_source: %s charset_default: %s",
dc && dc->charset_source ? dc->charset_source : "(none)",
dc && dc->charset_default ? dc->charset_default : "(none)");
if (!ctx->ran) { /* filter never ran before */
chk_filter_chain(f);
ctx->ran = 1;
if (!ctx->noop && !ctx->is_sb) {
/* We're not converting between two single-byte charsets, so unset
* Content-Length since it is unlikely to remain the same.
*/
apr_table_unset(f->r->headers_out, "Content-Length");
}
}
if (ctx->noop) {
return ap_pass_brigade(f->next, bb);
}
dptr = APR_BRIGADE_FIRST(bb);
done = 0;
cur_len = 0;
space_avail = sizeof(tmp);
consumed_bucket = NULL;
while (!done) {
if (!cur_len) { /* no bytes left to process in the current bucket... */
if (consumed_bucket) {
apr_bucket_delete(consumed_bucket);
consumed_bucket = NULL;
}
if (dptr == APR_BRIGADE_SENTINEL(bb)) {
break;
}
if (APR_BUCKET_IS_EOS(dptr)) {
cur_len = -1; /* XXX yuck, but that tells us to send
* eos down; when we minimize our bb construction
* we'll fix this crap */
if (ctx->saved) {
/* Oops... we have a partial char from the previous bucket
* that won't be completed because there's no more data.
*/
rv = APR_INCOMPLETE;
ctx->ees = EES_INCOMPLETE_CHAR;
}
break;
}
if (APR_BUCKET_IS_METADATA(dptr)) {
apr_bucket *metadata_bucket;
metadata_bucket = dptr;
dptr = APR_BUCKET_NEXT(dptr);
APR_BUCKET_REMOVE(metadata_bucket);
rv = send_bucket_downstream(f, metadata_bucket);
if (rv != APR_SUCCESS) {
done = 1;
}
continue;
}
rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
if (rv != APR_SUCCESS) {
ctx->ees = EES_BUCKET_READ;
break;
}
consumed_bucket = dptr; /* for axing when we're done reading it */
dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
* next bucket */
}
/* Try to fill up our tmp buffer with translated data. */
cur_avail = cur_len;
if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
if (ctx->saved) {
/* Rats... we need to finish a partial character from the previous
* bucket.
*/
char *tmp_tmp;
tmp_tmp = tmp + sizeof(tmp) - space_avail;
rv = finish_partial_char(ctx,
&cur_str, &cur_len,
&tmp_tmp, &space_avail);
}
else {
rv = apr_xlate_conv_buffer(ctx->xlate,
cur_str, &cur_avail,
tmp + sizeof(tmp) - space_avail, &space_avail);
/* Update input ptr and len after consuming some bytes */
cur_str += cur_len - cur_avail;
cur_len = cur_avail;
if (rv == APR_INCOMPLETE) { /* partial character at end of input */
/* We need to save the final byte(s) for next time; we can't
* convert it until we look at the next bucket.
*/
rv = set_aside_partial_char(ctx, cur_str, cur_len);
cur_len = 0;
}
}
}
if (rv != APR_SUCCESS) {
/* bad input byte or partial char too big to store */
done = 1;
}
if (space_avail < XLATE_MIN_BUFF_LEFT) {
/* It is time to flush, as there is not enough space left in the
* current output buffer to bother with converting more data.
*/
rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
if (rv != APR_SUCCESS) {
done = 1;
}
/* tmp is now empty */
space_avail = sizeof(tmp);
}
}
if (rv == APR_SUCCESS) {
if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
}
}
if (rv == APR_SUCCESS) {
if (cur_len == -1) {
rv = send_eos(f);
}
}
else {
log_xlate_error(f, rv);
}
return rv;
}
static apr_status_t xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
ap_input_mode_t mode, apr_read_type_e block,
apr_off_t readbytes)
{
apr_status_t rv;
charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
&charset_lite_module);
charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
&charset_lite_module);
charset_filter_ctx_t *ctx = f->ctx;
apr_size_t buffer_size;
int hit_eos;
/* just get out of the way of things we don't want. */
if (mode != AP_MODE_READBYTES) {
return ap_get_brigade(f->next, bb, mode, block, readbytes);
}
if (!ctx) {
/* this is SetInputFilter path; grab the preallocated context,
* if any; note that if we decided not to do anything in an earlier
* handler, we won't even have a reqinfo
*/
if (reqinfo) {
ctx = f->ctx = reqinfo->input_ctx;
reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
* in the filter chain; we can't have two
* instances using the same context
*/
}
if (!ctx) { /* no idea how to translate; don't do anything */
ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
ctx->dc = dc;
ctx->noop = 1;
}
}
ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r,
"xlate_in_filter() - "
"charset_source: %s charset_default: %s",
dc && dc->charset_source ? dc->charset_source : "(none)",
dc && dc->charset_default ? dc->charset_default : "(none)");
if (!ctx->ran) { /* filter never ran before */
chk_filter_chain(f);
ctx->ran = 1;
if (!ctx->noop && !ctx->is_sb
&& apr_table_get(f->r->headers_in, "Content-Length")) {
/* A Content-Length header is present, but it won't be valid after
* conversion because we're not converting between two single-byte
* charsets. This will affect most CGI scripts and may affect
* some modules.
* Content-Length can't be unset here because that would break
* being able to read the request body.
* Processing of chunked request bodies is not impacted by this
* filter since the length was not declared anyway.
*/
ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r,
"Request body length may change, resulting in "
"misprocessing by some modules or scripts");
}
}
if (ctx->noop) {
return ap_get_brigade(f->next, bb, mode, block, readbytes);
}
if (APR_BRIGADE_EMPTY(ctx->bb)) {
if ((rv = ap_get_brigade(f->next, bb, mode, block,
readbytes)) != APR_SUCCESS) {
return rv;
}
}
else {
APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
}
buffer_size = INPUT_XLATE_BUF_SIZE;
rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
if (rv == APR_SUCCESS) {
if (!hit_eos) {
/* move anything leftover into our context for next time;
* we don't currently "set aside" since the data came from
* down below, but I suspect that for long-term we need to
* do that
*/
APR_BRIGADE_CONCAT(ctx->bb, bb);
}
if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
apr_bucket *e;
e = apr_bucket_heap_create(ctx->tmp,
INPUT_XLATE_BUF_SIZE - buffer_size,
NULL, f->r->connection->bucket_alloc);
/* make sure we insert at the head, because there may be
* an eos bucket already there, and the eos bucket should
* come after the data
*/
APR_BRIGADE_INSERT_HEAD(bb, e);
}
else {
/* XXX need to get some more data... what if the last brigade
* we got had only the first byte of a multibyte char? we need
* to grab more data from the network instead of returning an
* empty brigade
*/
}
/* If we have any metadata at the head of ctx->bb, go ahead and move it
* onto the end of bb to be returned to our caller.
*/
if (!APR_BRIGADE_EMPTY(ctx->bb)) {
apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb);
while (b != APR_BRIGADE_SENTINEL(ctx->bb)
&& APR_BUCKET_IS_METADATA(b)) {
APR_BUCKET_REMOVE(b);
APR_BRIGADE_INSERT_TAIL(bb, b);
b = APR_BRIGADE_FIRST(ctx->bb);
}
}
}
else {
log_xlate_error(f, rv);
}
return rv;
}
static const command_rec cmds[] =
{
AP_INIT_TAKE1("CharsetSourceEnc",
add_charset_source,
NULL,
OR_FILEINFO,
"source (html,cgi,ssi) file charset"),
AP_INIT_TAKE1("CharsetDefault",
add_charset_default,
NULL,
OR_FILEINFO,
"name of default charset"),
AP_INIT_ITERATE("CharsetOptions",
add_charset_options,
NULL,
OR_FILEINFO,
"valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, "
"NoTranslateAllMimeTypes"),
{NULL}
};
static void charset_register_hooks(apr_pool_t *p)
{
ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
#if APR_CHARSET_EBCDIC
ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
AP_FTYPE_RESOURCE+1);
ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
AP_FTYPE_RESOURCE+1);
#else
ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
AP_FTYPE_RESOURCE);
ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
AP_FTYPE_RESOURCE);
#endif
}
AP_DECLARE_MODULE(charset_lite) =
{
STANDARD20_MODULE_STUFF,
create_charset_dir_conf,
merge_charset_dir_conf,
NULL,
NULL,
cmds,
charset_register_hooks
};