modules/experimental/mod_charset_lite.c - httpd - Git at Google

 /* Copyright 2000-2004 The Apache Software Foundation
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * simple hokey charset recoding configuration module
  *
  * See mod_ebcdic and mod_charset for more thought-out examples.  This
  * one is just so Jeff can learn how a module works and experiment with
  * basic character set recoding configuration.
  *
  * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!!
  */

 #include "httpd.h"
 #include "http_config.h"
 #define CORE_PRIVATE
 #include "http_core.h"
 #include "http_log.h"
 #include "http_main.h"
 #include "http_protocol.h"
 #include "http_request.h"
 #include "util_charset.h"
 #include "apr_buckets.h"
 #include "util_filter.h"
 #include "apr_strings.h"
 #include "apr_lib.h"
 #include "apr_xlate.h"
 #define APR_WANT_STRFUNC
 #include "apr_want.h"

 #define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */
 #define INPUT_XLATE_BUF_SIZE  (8*1024)  /* size of translation buffer used on input */

 #define XLATE_MIN_BUFF_LEFT 128  /* flush once there is no more than this much
                                   * space left in the translation buffer
                                   */

 #define FATTEST_CHAR  8          /* we don't handle chars wider than this that straddle
                                   * two buckets
                                   */

 /* extended error status codes; this is used in addition to an apr_status_t to
  * track errors in the translation filter
  */
 typedef enum {
     EES_INIT = 0,   /* no error info yet; value must be 0 for easy init */
     EES_LIMIT,      /* built-in restriction encountered */
     EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */
     EES_BUCKET_READ,
     EES_DOWNSTREAM, /* something bad happened in a filter below xlate */
     EES_BAD_INPUT   /* input data invalid */
 } ees_t;

 /* registered name of the output translation filter */
 #define XLATEOUT_FILTER_NAME "XLATEOUT"
 /* registered name of input translation filter */
 #define XLATEIN_FILTER_NAME  "XLATEIN"

 typedef struct charset_dir_t {
     /** debug level; -1 means uninitialized, 0 means no debug */
     int debug;
     const char *charset_source; /* source encoding */
     const char *charset_default; /* how to ship on wire */
     /** module does ap_add_*_filter()? */
     enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add;
 } charset_dir_t;

 /* charset_filter_ctx_t is created for each filter instance; because the same
  * filter code is used for translating in both directions, we need this context
  * data to tell the filter which translation handle to use; it also can hold a
  * character which was split between buckets
  */
 typedef struct charset_filter_ctx_t {
     apr_xlate_t *xlate;
     charset_dir_t *dc;
     ees_t ees;              /* extended error status */
     apr_size_t saved;
     char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */
     int ran;                /* has filter instance run before? */
     int noop;               /* should we pass brigades through unchanged? */
     char *tmp;              /* buffer for input filtering */
     apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */
 } charset_filter_ctx_t;

 /* charset_req_t is available via r->request_config if any translation is
  * being performed
  */
 typedef struct charset_req_t {
     charset_dir_t *dc;
     charset_filter_ctx_t *output_ctx, *input_ctx;
 } charset_req_t;

 /* debug level definitions */
 #define DBGLVL_GORY           9 /* gory details */
 #define DBGLVL_FLOW           4 /* enough messages to see what happens on
                                  * each request */
 #define DBGLVL_PMC            2 /* messages about possible misconfiguration */

 module AP_MODULE_DECLARE_DATA charset_lite_module;

 static void *create_charset_dir_conf(apr_pool_t *p,char *dummy)
 {
     charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t));

     dc->debug = -1;
     return dc;
 }

 static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv)
 {
     charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t));
     charset_dir_t *base = (charset_dir_t *)basev,
         *over = (charset_dir_t *)overridesv;

     /* If it is defined in the current container, use it.  Otherwise, use the one
      * from the enclosing container.
      */

     a->debug =
         over->debug != -1 ? over->debug : base->debug;
     a->charset_default =
         over->charset_default ? over->charset_default : base->charset_default;
     a->charset_source =
         over->charset_source ? over->charset_source : base->charset_source;
     a->implicit_add =
         over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add;
     return a;
 }

 /* CharsetSourceEnc charset
  */
 static const char *add_charset_source(cmd_parms *cmd, void *in_dc,
                                       const char *name)
 {
     charset_dir_t *dc = in_dc;

     dc->charset_source = name;
     return NULL;
 }

 /* CharsetDefault charset
  */
 static const char *add_charset_default(cmd_parms *cmd, void *in_dc,
                                        const char *name)
 {
     charset_dir_t *dc = in_dc;

     dc->charset_default = name;
     return NULL;
 }

 /* CharsetOptions optionflag...
  */
 static const char *add_charset_options(cmd_parms *cmd, void *in_dc,
                                        const char *flag)
 {
     charset_dir_t *dc = in_dc;

     if (!strcasecmp(flag, "ImplicitAdd")) {
         dc->implicit_add = IA_IMPADD;
     }
     else if (!strcasecmp(flag, "NoImplicitAdd")) {
         dc->implicit_add = IA_NOIMPADD;
     }
     else if (!strncasecmp(flag, "DebugLevel=", 11)) {
         dc->debug = atoi(flag + 11);
     }
     else {
         return apr_pstrcat(cmd->temp_pool,
                            "Invalid CharsetOptions option: ",
                            flag,
                            NULL);
     }

     return NULL;
 }

 /* find_code_page() is a fixup hook that decides if translation should be
  * enabled; if so, it sets up request data for use by the filter registration
  * hook so that it knows what to do
  */
 static int find_code_page(request_rec *r)
 {
     charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
                                              &charset_lite_module);
     charset_req_t *reqinfo;
     charset_filter_ctx_t *input_ctx, *output_ctx;
     apr_status_t rv;
     const char *mime_type;

     if (dc->debug >= DBGLVL_FLOW) {
         ap_log_rerror(APLOG_MARK,APLOG_DEBUG, 0, r,
                       "uri: %s file: %s method: %d "
                       "imt: %s flags: %s%s%s %s->%s",
                       r->uri, r->filename, r->method_number,
                       r->content_type ? r->content_type : "(unknown)",
                       r->main     ? "S" : "",    /* S if subrequest */
                       r->prev     ? "R" : "",    /* R if redirect */
                       r->proxyreq ? "P" : "",    /* P if proxy */
                       dc->charset_source, dc->charset_default);
     }

     /* If we don't have a full directory configuration, bail out.
      */
     if (!dc->charset_source || !dc->charset_default) {
         if (dc->debug >= DBGLVL_PMC) {
             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
                           "incomplete configuration: src %s, dst %s",
                           dc->charset_source ? dc->charset_source : "unspecified",
                           dc->charset_default ? dc->charset_default : "unspecified");
         }
         return DECLINED;
     }

     /* catch proxy requests */
     if (r->proxyreq) return DECLINED;
     /* mod_rewrite indicators */
     if (!strncmp(r->filename, "redirect:", 9)) return DECLINED;
     if (!strncmp(r->filename, "gone:", 5)) return DECLINED;
     if (!strncmp(r->filename, "passthrough:", 12)) return DECLINED;
     if (!strncmp(r->filename, "forbidden:", 10)) return DECLINED;

     mime_type = r->content_type ? r->content_type : ap_default_type(r);

     /* If mime type isn't text or message, bail out.
      */

 /* XXX When we handle translation of the request body, watch out here as
  *     1.3 allowed additional mime types: multipart and
  *     application/x-www-form-urlencoded
  */

     if (strncasecmp(mime_type, "text/", 5) &&
 #if APR_CHARSET_EBCDIC || AP_WANT_DIR_TRANSLATION
         /* On an EBCDIC machine, be willing to translate mod_autoindex-
          * generated output.  Otherwise, it doesn't look too cool.
          *
          * XXX This isn't a perfect fix because this doesn't trigger us
          * to convert from the charset of the source code to ASCII.  The
          * general solution seems to be to allow a generator to set an
          * indicator in the r specifying that the body is coded in the
          * implementation character set (i.e., the charset of the source
          * code).  This would get several different types of documents
          * translated properly: mod_autoindex output, mod_status output,
          * mod_info output, hard-coded error documents, etc.
          */
         strcmp(mime_type, DIR_MAGIC_TYPE) &&
 #endif
         strncasecmp(mime_type, "message/", 8)) {
         if (dc->debug >= DBGLVL_GORY) {
             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
                           "mime type is %s; no translation selected",
                           mime_type);
         }
         /* We must not bail out here (i.e., the MIME test must be in the filter
          * itself, not in the fixup, because only then is the final MIME type known.
          * Examples for late changes to the MIME type include CGI handling (MIME
          * type is set in the Content-Type header produced by the CGI script), or
          * PHP (until PHP runs, the MIME type is set to application/x-httpd-php)
          */
     }

     if (dc->debug >= DBGLVL_GORY) {
         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
                       "charset_source: %s charset_default: %s",
                       dc && dc->charset_source ? dc->charset_source : "(none)",
                       dc && dc->charset_default ? dc->charset_default : "(none)");
     }

     /* Get storage for the request data and the output filter context.
      * We rarely need the input filter context, so allocate that separately.
      */
     reqinfo = (charset_req_t *)apr_pcalloc(r->pool,
                                            sizeof(charset_req_t) +
                                            sizeof(charset_filter_ctx_t));
     output_ctx = (charset_filter_ctx_t *)(reqinfo + 1);

     reqinfo->dc = dc;
     output_ctx->dc = dc;
     ap_set_module_config(r->request_config, &charset_lite_module, reqinfo);

     reqinfo->output_ctx = output_ctx;

     /* We must not open the xlation table here yet, because the final MIME
      * type is not known until we are actually called in the output filter.
      * With POST or PUT request, the case is different, because their MIME
      * type is set in the request headers, and their data are prerequisites
      * for actually calling, e.g., the CGI handler later on.
      */
     output_ctx->xlate = NULL;

     switch (r->method_number) {
     case M_PUT:
     case M_POST:
         /* Set up input translation.  Note: A request body can be included
          * with the OPTIONS method, but for now we don't set up translation
          * of it.
          */
         input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t));
         input_ctx->bb = apr_brigade_create(r->pool,
                                            r->connection->bucket_alloc);
         input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE);
         input_ctx->dc = dc;
         reqinfo->input_ctx = input_ctx;
         rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source,
                             dc->charset_default, r->pool);
         if (rv != APR_SUCCESS) {
             ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
                           "can't open translation %s->%s",
                           dc->charset_default, dc->charset_source);
             return HTTP_INTERNAL_SERVER_ERROR;
         }
     }

     return DECLINED;
 }

 static int configured_in_list(request_rec *r, const char *filter_name,
                               struct ap_filter_t *filter_list)
 {
     struct ap_filter_t *filter = filter_list;

     while (filter) {
         if (!strcasecmp(filter_name, filter->frec->name)) {
             return 1;
         }
         filter = filter->next;
     }
     return 0;
 }

 static int configured_on_input(request_rec *r, const char *filter_name)
 {
     return configured_in_list(r, filter_name, r->input_filters);
 }

 static int configured_on_output(request_rec *r, const char *filter_name)
 {
     return configured_in_list(r, filter_name, r->output_filters);
 }

 /* xlate_insert_filter() is a filter hook which decides whether or not
  * to insert a translation filter for the current request.
  */
 static void xlate_insert_filter(request_rec *r)
 {
     /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */
     charset_req_t *reqinfo = ap_get_module_config(r->request_config,
                                                   &charset_lite_module);
     charset_dir_t *dc = ap_get_module_config(r->per_dir_config,
                                              &charset_lite_module);

     if (reqinfo) {
         if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) {
             ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r,
                                  r->connection);
         }
         else if (dc->debug >= DBGLVL_FLOW) {
             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
                           "xlate output filter not added implicitly because %s",
                           !reqinfo->output_ctx ?
                           "no output configuration available" :
                           "another module added the filter");
         }

         if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) {
             ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r,
                                 r->connection);
         }
         else if (dc->debug >= DBGLVL_FLOW) {
             ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r,
                           "xlate input filter not added implicitly because %s",
                           !reqinfo->input_ctx ?
                           "no input configuration available" :
                           "another module added the filter");
         }
     }
 }

 /* stuff that sucks that I know of:
  *
  * bucket handling:
  *  why create an eos bucket when we see it come down the stream?  just send the one
  *  passed as input...  news flash: this will be fixed when xlate_out_filter() starts
  *  using the more generic xlate_brigade()
  *
  * translation mechanics:
  *   we don't handle characters that straddle more than two buckets; an error
  *   will be generated
  */

 /* send_downstream() is passed the translated data; it puts it in a single-
  * bucket brigade and passes the brigade to the next filter
  */
 static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len)
 {
     request_rec *r = f->r;
     conn_rec *c = r->connection;
     apr_bucket_brigade *bb;
     apr_bucket *b;
     charset_filter_ctx_t *ctx = f->ctx;
     apr_status_t rv;

     bb = apr_brigade_create(r->pool, c->bucket_alloc);
     b = apr_bucket_transient_create(tmp, len, c->bucket_alloc);
     APR_BRIGADE_INSERT_TAIL(bb, b);
     rv = ap_pass_brigade(f->next, bb);
     if (rv != APR_SUCCESS) {
         ctx->ees = EES_DOWNSTREAM;
     }
     return rv;
 }

 static apr_status_t send_eos(ap_filter_t *f)
 {
     request_rec *r = f->r;
     conn_rec *c = r->connection;
     apr_bucket_brigade *bb;
     apr_bucket *b;
     charset_filter_ctx_t *ctx = f->ctx;
     apr_status_t rv;

     bb = apr_brigade_create(r->pool, c->bucket_alloc);
     b = apr_bucket_eos_create(c->bucket_alloc);
     APR_BRIGADE_INSERT_TAIL(bb, b);
     rv = ap_pass_brigade(f->next, bb);
     if (rv != APR_SUCCESS) {
         ctx->ees = EES_DOWNSTREAM;
     }
     return rv;
 }

 static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx,
                                            const char *partial,
                                            apr_size_t partial_len)
 {
     apr_status_t rv;

     if (sizeof(ctx->buf) > partial_len) {
         ctx->saved = partial_len;
         memcpy(ctx->buf, partial, partial_len);
         rv = APR_SUCCESS;
     }
     else {
         rv = APR_INCOMPLETE;
         ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle
                                * buckets
                                */
     }
     return rv;
 }

 static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx,
                                         /* input buffer: */
                                         const char **cur_str,
                                         apr_size_t *cur_len,
                                         /* output buffer: */
                                         char **out_str,
                                         apr_size_t *out_len)
 {
     apr_status_t rv;
     apr_size_t tmp_input_len;

     /* Keep adding bytes from the input string to the saved string until we
      *    1) finish the input char
      *    2) get an error
      * or 3) run out of bytes to add
      */

     do {
         ctx->buf[ctx->saved] = **cur_str;
         ++ctx->saved;
         ++*cur_str;
         --*cur_len;
         tmp_input_len = ctx->saved;
         rv = apr_xlate_conv_buffer(ctx->xlate,
                                    ctx->buf,
                                    &tmp_input_len,
                                    *out_str,
                                    out_len);
     } while (rv == APR_INCOMPLETE && *cur_len);

     if (rv == APR_SUCCESS) {
         ctx->saved = 0;
     }
     else {
         ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars
                                * straddling more than two buckets
                                */
     }

     return rv;
 }

 static void log_xlate_error(ap_filter_t *f, apr_status_t rv)
 {
     charset_filter_ctx_t *ctx = f->ctx;
     const char *msg;
     char msgbuf[100];
     int cur;

     switch(ctx->ees) {
     case EES_LIMIT:
         rv = 0;
         msg = "xlate filter - a built-in restriction was encountered";
         break;
     case EES_BAD_INPUT:
         rv = 0;
         msg = "xlate filter - an input character was invalid";
         break;
     case EES_BUCKET_READ:
         rv = 0;
         msg = "xlate filter - bucket read routine failed";
         break;
     case EES_INCOMPLETE_CHAR:
         rv = 0;
         strcpy(msgbuf, "xlate filter - incomplete char at end of input - ");
         cur = 0;
         while ((apr_size_t)cur < ctx->saved) {
             apr_snprintf(msgbuf + strlen(msgbuf), sizeof(msgbuf) - strlen(msgbuf),
                          "%02X", (unsigned)ctx->buf[cur]);
             ++cur;
         }
         msg = msgbuf;
         break;
     case EES_DOWNSTREAM:
         msg = "xlate filter - an error occurred in a lower filter";
         break;
     default:
         msg = "xlate filter - returning error";
     }
     ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
                   "%s", msg);
 }

 /* chk_filter_chain() is called once per filter instance; it tries to
  * determine if the current filter instance should be disabled because
  * its translation is incompatible with the translation of an existing
  * instance of the translate filter
  *
  * Example bad scenario:
  *
  *   configured filter chain for the request:
  *     INCLUDES XLATEOUT(8859-1->UTS-16)
  *   configured filter chain for the subrequest:
  *     XLATEOUT(8859-1->UTS-16)
  *
  *   When the subrequest is processed, the filter chain will be
  *     XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16)
  *   This makes no sense, so the instance of XLATEOUT added for the
  *   subrequest will be noop-ed.
  *
  * Example good scenario:
  *
  *   configured filter chain for the request:
  *     INCLUDES XLATEOUT(8859-1->UTS-16)
  *   configured filter chain for the subrequest:
  *     XLATEOUT(IBM-1047->8859-1)
  *
  *   When the subrequest is processed, the filter chain will be
  *     XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16)
  *   This makes sense, so the instance of XLATEOUT added for the
  *   subrequest will be left alone and it will translate from
  *   IBM-1047->8859-1.
  */
 static void chk_filter_chain(ap_filter_t *f)
 {
     ap_filter_t *curf;
     charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL,
         *ctx = f->ctx;
     int debug = ctx->dc->debug;
     int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME);

     if (ctx->noop) {
         return;
     }

     /* walk the filter chain; see if it makes sense for our filter to
      * do any translation
      */
     curf = output ? f->r->output_filters : f->r->input_filters;
     while (curf) {
         if (!strcasecmp(curf->frec->name, f->frec->name) &&
             curf->ctx) {
             curctx = (charset_filter_ctx_t *)curf->ctx;
             if (!last_xlate_ctx) {
                 last_xlate_ctx = curctx;
             }
             else {
                 if (strcmp(last_xlate_ctx->dc->charset_default,
                            curctx->dc->charset_source)) {
                     /* incompatible translation
                      * if our filter instance is incompatible with an instance
                      * already in place, noop our instance
                      * Notes:
                      * . We are only willing to noop our own instance.
                      * . It is possible to noop another instance which has not
                      *   yet run, but this is not currently implemented.
                      *   Hopefully it will not be needed.
                      * . It is not possible to noop an instance which has
                      *   already run.
                      */
                     if (last_xlate_ctx == f->ctx) {
                         last_xlate_ctx->noop = 1;
                         if (debug >= DBGLVL_PMC) {
                             const char *symbol = output ? "->" : "<-";

                             ap_log_rerror(APLOG_MARK, APLOG_DEBUG,
                                           0, f->r,
                                           "%s %s - disabling "
                                           "translation %s%s%s; existing "
                                           "translation %s%s%s",
                                           f->r->uri ? "uri" : "file",
                                           f->r->uri ? f->r->uri : f->r->filename,
                                           last_xlate_ctx->dc->charset_source,
                                           symbol,
                                           last_xlate_ctx->dc->charset_default,
                                           curctx->dc->charset_source,
                                           symbol,
                                           curctx->dc->charset_default);
                         }
                     }
                     else {
                         const char *symbol = output ? "->" : "<-";

                         ap_log_rerror(APLOG_MARK, APLOG_ERR,
                                       0, f->r,
                                       "chk_filter_chain() - can't disable "
                                       "translation %s%s%s; existing "
                                       "translation %s%s%s",
                                       last_xlate_ctx->dc->charset_source,
                                       symbol,
                                       last_xlate_ctx->dc->charset_default,
                                       curctx->dc->charset_source,
                                       symbol,
                                       curctx->dc->charset_default);
                     }
                     break;
                 }
             }
         }
         curf = curf->next;
     }
 }

 /* xlate_brigade() is used to filter request and response bodies
  *
  * we'll stop when one of the following occurs:
  * . we run out of buckets
  * . we run out of space in the output buffer
  * . we hit an error
  *
  * inputs:
  *   bb:               brigade to process
  *   buffer:           storage to hold the translated characters
  *   buffer_size:      size of buffer
  *   (and a few more uninteresting parms)
  *
  * outputs:
  *   return value:     APR_SUCCESS or some error code
  *   bb:               we've removed any buckets representing the
  *                     translated characters; the eos bucket, if
  *                     present, will be left in the brigade
  *   buffer:           filled in with translated characters
  *   buffer_size:      updated with the bytes remaining
  *   hit_eos:          did we hit an EOS bucket?
  */
 static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx,
                                   apr_bucket_brigade *bb,
                                   char *buffer,
                                   apr_size_t *buffer_avail,
                                   int *hit_eos)
 {
     apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */
     apr_bucket *consumed_bucket;
     const char *bucket;
     apr_size_t bytes_in_bucket; /* total bytes read from current bucket */
     apr_size_t bucket_avail;    /* bytes left in current bucket */
     apr_status_t rv = APR_SUCCESS;

     *hit_eos = 0;
     bucket_avail = 0;
     consumed_bucket = NULL;
     while (1) {
         if (!bucket_avail) { /* no bytes left to process in the current bucket... */
             if (consumed_bucket) {
                 apr_bucket_delete(consumed_bucket);
                 consumed_bucket = NULL;
             }
             b = APR_BRIGADE_FIRST(bb);
             if (b == APR_BRIGADE_SENTINEL(bb) ||
                 APR_BUCKET_IS_EOS(b)) {
                 break;
             }
             rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ);
             if (rv != APR_SUCCESS) {
                 ctx->ees = EES_BUCKET_READ;
                 break;
             }
             bucket_avail = bytes_in_bucket;
             consumed_bucket = b;   /* for axing when we're done reading it */
         }
         if (bucket_avail) {
             /* We've got data, so translate it. */
             if (ctx->saved) {
                 /* Rats... we need to finish a partial character from the previous
                  * bucket.
                  *
                  * Strangely, finish_partial_char() increments the input buffer
                  * pointer but does not increment the output buffer pointer.
                  */
                 apr_size_t old_buffer_avail = *buffer_avail;
                 rv = finish_partial_char(ctx,
                                          &bucket, &bucket_avail,
                                          &buffer, buffer_avail);
                 buffer += old_buffer_avail - *buffer_avail;
             }
             else {
                 apr_size_t old_buffer_avail = *buffer_avail;
                 apr_size_t old_bucket_avail = bucket_avail;
                 rv = apr_xlate_conv_buffer(ctx->xlate,
                                            bucket, &bucket_avail,
                                            buffer,
                                            buffer_avail);
                 buffer  += old_buffer_avail - *buffer_avail;
                 bucket  += old_bucket_avail - bucket_avail;

                 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
                     /* We need to save the final byte(s) for next time; we can't
                      * convert it until we look at the next bucket.
                      */
                     rv = set_aside_partial_char(ctx, bucket, bucket_avail);
                     bucket_avail = 0;
                 }
             }
             if (rv != APR_SUCCESS) {
                 /* bad input byte or partial char too big to store */
                 break;
             }
             if (*buffer_avail < XLATE_MIN_BUFF_LEFT) {
                 /* if any data remains in the current bucket, split there */
                 if (bucket_avail) {
                     apr_bucket_split(b, bytes_in_bucket - bucket_avail);
                 }
                 apr_bucket_delete(b);
                 break;
             }
         }
     }

     if (!APR_BRIGADE_EMPTY(bb)) {
         b = APR_BRIGADE_FIRST(bb);
         if (APR_BUCKET_IS_EOS(b)) {
             /* Leave the eos bucket in the brigade for reporting to
              * subsequent filters.
              */
             *hit_eos = 1;
             if (ctx->saved) {
                 /* Oops... we have a partial char from the previous bucket
                  * that won't be completed because there's no more data.
                  */
                 rv = APR_INCOMPLETE;
                 ctx->ees = EES_INCOMPLETE_CHAR;
             }
         }
     }

     return rv;
 }

 /* xlate_out_filter() handles (almost) arbitrary conversions from one charset
  * to another...
  * translation is determined in the fixup hook (find_code_page), which is
  * where the filter's context data is set up... the context data gives us
  * the translation handle
  */
 static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb)
 {
     charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
                                                   &charset_lite_module);
     charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
                                              &charset_lite_module);
     charset_filter_ctx_t *ctx = f->ctx;
     apr_bucket *dptr, *consumed_bucket;
     const char *cur_str;
     apr_size_t cur_len, cur_avail;
     char tmp[OUTPUT_XLATE_BUF_SIZE];
     apr_size_t space_avail;
     int done;
     apr_status_t rv = APR_SUCCESS;

     if (!ctx) {
         /* this is SetOutputFilter path; grab the preallocated context,
          * if any; note that if we decided not to do anything in an earlier
          * handler, we won't even have a reqinfo
          */
         if (reqinfo) {
             ctx = f->ctx = reqinfo->output_ctx;
             reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice
                                          * in the filter chain; we can't have two
                                          * instances using the same context
                                          */
         }
         if (!ctx) {                   /* no idea how to translate; don't do anything */
             ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
             ctx->dc = dc;
             ctx->noop = 1;
         }
     }

     /* catch proxy requests */
     if (f->r->proxyreq) return DECLINED;

     /* Opening the output translation (this used to be done in the fixup hook,
      * but that was too early: a subsequent type modification, e.g., by a
      * CGI script, would go unnoticed. Now we do it in the filter itself.)
      */
     if (!ctx->noop && ctx->xlate == NULL)
     {
         const char *mime_type = f->r->content_type ? f->r->content_type : ap_default_type(f->r);

         /* XXX When we handle translation of the request body, watch out here as
          *     1.3 allowed additional mime types: multipart and
          *     application/x-www-form-urlencoded
          */
         if (strncasecmp(mime_type, "text/", 5) == 0 ||
 #if APR_CHARSET_EBCDIC
         /* On an EBCDIC machine, be willing to translate mod_autoindex-
          * generated output.  Otherwise, it doesn't look too cool.
          *
          * XXX This isn't a perfect fix because this doesn't trigger us
          * to convert from the charset of the source code to ASCII.  The
          * general solution seems to be to allow a generator to set an
          * indicator in the r specifying that the body is coded in the
          * implementation character set (i.e., the charset of the source
          * code).  This would get several different types of documents
          * translated properly: mod_autoindex output, mod_status output,
          * mod_info output, hard-coded error documents, etc.
          */
         strcmp(mime_type, DIR_MAGIC_TYPE) == 0 ||
 #endif
         strncasecmp(mime_type, "message/", 8) == 0) {

             rv = apr_xlate_open(&ctx->xlate,
                         dc->charset_default, dc->charset_source, f->r->pool);
             if (rv != APR_SUCCESS) {
                 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r,
                               "can't open translation %s->%s",
                               dc->charset_source, dc->charset_default);
                 ctx->noop = 1;
 	    }
         }
         else {
                 ctx->noop = 1;
                 if (dc->debug >= DBGLVL_GORY)
                     ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
                                   "mime type is %s; no translation selected",
                                   mime_type);
             }
     }

     if (dc->debug >= DBGLVL_GORY) {
         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
                      "xlate_out_filter() - "
                      "charset_source: %s charset_default: %s",
                      dc && dc->charset_source ? dc->charset_source : "(none)",
                      dc && dc->charset_default ? dc->charset_default : "(none)");
     }

     if (!ctx->ran) {  /* filter never ran before */
         chk_filter_chain(f);
         ctx->ran = 1;
     }

     if (ctx->noop) {
         return ap_pass_brigade(f->next, bb);
     }

     dptr = APR_BRIGADE_FIRST(bb);
     done = 0;
     cur_len = 0;
     space_avail = sizeof(tmp);
     consumed_bucket = NULL;
     while (!done) {
         if (!cur_len) { /* no bytes left to process in the current bucket... */
             if (consumed_bucket) {
                 apr_bucket_delete(consumed_bucket);
                 consumed_bucket = NULL;
             }
             if (dptr == APR_BRIGADE_SENTINEL(bb)) {
                 done = 1;
                 break;
             }
             if (APR_BUCKET_IS_EOS(dptr)) {
                 done = 1;
                 cur_len = -1; /* XXX yuck, but that tells us to send
                                  * eos down; when we minimize our bb construction
                                  * we'll fix this crap */
                 if (ctx->saved) {
                     /* Oops... we have a partial char from the previous bucket
                      * that won't be completed because there's no more data.
                      */
                     rv = APR_INCOMPLETE;
                     ctx->ees = EES_INCOMPLETE_CHAR;
                 }
                 break;
             }
             rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ);
             if (rv != APR_SUCCESS) {
                 done = 1;
                 ctx->ees = EES_BUCKET_READ;
                 break;
             }
             consumed_bucket = dptr; /* for axing when we're done reading it */
             dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the
                                           * next bucket */
         }
         /* Try to fill up our tmp buffer with translated data. */
         cur_avail = cur_len;

         if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */
             if (ctx->saved) {
                 /* Rats... we need to finish a partial character from the previous
                  * bucket.
                  */
                 char *tmp_tmp;

                 tmp_tmp = tmp + sizeof(tmp) - space_avail;
                 rv = finish_partial_char(ctx,
                                          &cur_str, &cur_len,
                                          &tmp_tmp, &space_avail);
             }
             else {
                 rv = apr_xlate_conv_buffer(ctx->xlate,
                                            cur_str, &cur_avail,
                                            tmp + sizeof(tmp) - space_avail, &space_avail);

                 /* Update input ptr and len after consuming some bytes */
                 cur_str += cur_len - cur_avail;
                 cur_len = cur_avail;

                 if (rv == APR_INCOMPLETE) { /* partial character at end of input */
                     /* We need to save the final byte(s) for next time; we can't
                      * convert it until we look at the next bucket.
                      */
                     rv = set_aside_partial_char(ctx, cur_str, cur_len);
                     cur_len = 0;
                 }
             }
         }

         if (rv != APR_SUCCESS) {
             /* bad input byte or partial char too big to store */
             done = 1;
         }

         if (space_avail < XLATE_MIN_BUFF_LEFT) {
             /* It is time to flush, as there is not enough space left in the
              * current output buffer to bother with converting more data.
              */
             rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
             if (rv != APR_SUCCESS) {
                 done = 1;
             }

             /* tmp is now empty */
             space_avail = sizeof(tmp);
         }
     }

     if (rv == APR_SUCCESS) {
         if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */
             rv = send_downstream(f, tmp, sizeof(tmp) - space_avail);
         }
     }
     if (rv == APR_SUCCESS) {
         if (cur_len == -1) {
             rv = send_eos(f);
         }
     }
     else {
         log_xlate_error(f, rv);
     }

     return rv;
 }

 static int xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb,
                            ap_input_mode_t mode, apr_read_type_e block,
                            apr_off_t readbytes)
 {
     apr_status_t rv;
     charset_req_t *reqinfo = ap_get_module_config(f->r->request_config,
                                                   &charset_lite_module);
     charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config,
                                              &charset_lite_module);
     charset_filter_ctx_t *ctx = f->ctx;
     apr_size_t buffer_size;
     int hit_eos;

     if (!ctx) {
         /* this is SetInputFilter path; grab the preallocated context,
          * if any; note that if we decided not to do anything in an earlier
          * handler, we won't even have a reqinfo
          */
         if (reqinfo) {
             ctx = f->ctx = reqinfo->input_ctx;
             reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice
                                         * in the filter chain; we can't have two
                                         * instances using the same context
                                         */
         }
         if (!ctx) {                   /* no idea how to translate; don't do anything */
             ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t));
             ctx->dc = dc;
             ctx->noop = 1;
         }
     }

     if (dc->debug >= DBGLVL_GORY) {
         ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r,
                      "xlate_in_filter() - "
                      "charset_source: %s charset_default: %s",
                      dc && dc->charset_source ? dc->charset_source : "(none)",
                      dc && dc->charset_default ? dc->charset_default : "(none)");
     }

     if (!ctx->ran) {  /* filter never ran before */
         chk_filter_chain(f);
         ctx->ran = 1;
     }

     if (ctx->noop) {
         return ap_get_brigade(f->next, bb, mode, block, readbytes);
     }

     if (APR_BRIGADE_EMPTY(ctx->bb)) {
         if ((rv = ap_get_brigade(f->next, bb, mode, block,
                                  readbytes)) != APR_SUCCESS) {
             return rv;
         }
     }
     else {
         APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */
     }

     buffer_size = INPUT_XLATE_BUF_SIZE;
     rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos);
     if (rv == APR_SUCCESS) {
         if (!hit_eos) {
             /* move anything leftover into our context for next time;
              * we don't currently "set aside" since the data came from
              * down below, but I suspect that for long-term we need to
              * do that
              */
             APR_BRIGADE_CONCAT(ctx->bb, bb);
         }
         if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */
             apr_bucket *e;

             e = apr_bucket_heap_create(ctx->tmp,
                                        INPUT_XLATE_BUF_SIZE - buffer_size,
                                        NULL, f->r->connection->bucket_alloc);
             /* make sure we insert at the head, because there may be
              * an eos bucket already there, and the eos bucket should
              * come after the data
              */
             APR_BRIGADE_INSERT_HEAD(bb, e);
         }
         else {
             /* XXX need to get some more data... what if the last brigade
              * we got had only the first byte of a multibyte char?  we need
              * to grab more data from the network instead of returning an
              * empty brigade
              */
         }
     }
     else {
         log_xlate_error(f, rv);
     }

     return rv;
 }

 static const command_rec cmds[] =
 {
     AP_INIT_TAKE1("CharsetSourceEnc",
                   add_charset_source,
                   NULL,
                   OR_FILEINFO,
                   "source (html,cgi,ssi) file charset"),
     AP_INIT_TAKE1("CharsetDefault",
                   add_charset_default,
                   NULL,
                   OR_FILEINFO,
                   "name of default charset"),
     AP_INIT_ITERATE("CharsetOptions",
                     add_charset_options,
                     NULL,
                     OR_FILEINFO,
                     "valid options: ImplicitAdd, NoImplicitAdd, DebugLevel=n"),
     {NULL}
 };

 static void charset_register_hooks(apr_pool_t *p)
 {
     ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE);
     ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST);
     ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL,
                               AP_FTYPE_RESOURCE);
     ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL,
                              AP_FTYPE_RESOURCE);
 }

 module AP_MODULE_DECLARE_DATA charset_lite_module =
 {
     STANDARD20_MODULE_STUFF,
     create_charset_dir_conf,
     merge_charset_dir_conf,
     NULL,
     NULL,
     cmds,
     charset_register_hooks
 };