| /* Copyright (c) 2003-11, WebThing Ltd |
| * Copyright (c) 2011-, The Apache Software Foundation |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* GO_FASTER |
| You can #define GO_FASTER to disable trace logging. |
| */ |
| |
| #ifdef GO_FASTER |
| #define VERBOSE(x) |
| #define VERBOSEB(x) |
| #else |
| #define VERBOSE(x) if (verbose) x |
| #define VERBOSEB(x) if (verbose) {x} |
| #endif |
| |
| /* libxml2 */ |
| #include <libxml/HTMLparser.h> |
| |
| #include "http_protocol.h" |
| #include "http_config.h" |
| #include "http_log.h" |
| #include "apr_strings.h" |
| #include "apr_hash.h" |
| #include "apr_strmatch.h" |
| #include "apr_lib.h" |
| |
| #include "apr_optional.h" |
| #include "mod_xml2enc.h" |
| #include "http_request.h" |
| #include "ap_expr.h" |
| |
| /* globals set once at startup */ |
| static ap_rxplus_t *old_expr; |
| static ap_regex_t *seek_meta; |
| static const apr_strmatch_pattern* seek_content; |
| static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL; |
| static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL; |
| |
| module AP_MODULE_DECLARE_DATA proxy_html_module; |
| |
| #define M_HTML 0x01 |
| #define M_EVENTS 0x02 |
| #define M_CDATA 0x04 |
| #define M_REGEX 0x08 |
| #define M_ATSTART 0x10 |
| #define M_ATEND 0x20 |
| #define M_LAST 0x40 |
| #define M_NOTLAST 0x80 |
| #define M_INTERPOLATE_TO 0x100 |
| #define M_INTERPOLATE_FROM 0x200 |
| |
| typedef struct { |
| const char *val; |
| } tattr; |
| typedef struct { |
| unsigned int start; |
| unsigned int end; |
| } meta; |
| typedef struct urlmap { |
| struct urlmap *next; |
| unsigned int flags; |
| unsigned int regflags; |
| union { |
| const char *c; |
| ap_regex_t *r; |
| } from; |
| const char *to; |
| ap_expr_info_t *cond; |
| } urlmap; |
| typedef struct { |
| urlmap *map; |
| const char *doctype; |
| const char *etag; |
| unsigned int flags; |
| size_t bufsz; |
| apr_hash_t *links; |
| apr_array_header_t *events; |
| const char *charset_out; |
| int extfix; |
| int metafix; |
| int strip_comments; |
| int interp; |
| int enabled; |
| } proxy_html_conf; |
| typedef struct { |
| ap_filter_t *f; |
| proxy_html_conf *cfg; |
| htmlParserCtxtPtr parser; |
| apr_bucket_brigade *bb; |
| char *buf; |
| size_t offset; |
| size_t avail; |
| const char *encoding; |
| urlmap *map; |
| char rbuf[4]; |
| apr_size_t rlen; |
| apr_size_t rmin; |
| } saxctxt; |
| |
| |
| #define NORM_LC 0x1 |
| #define NORM_MSSLASH 0x2 |
| #define NORM_RESET 0x4 |
| static htmlSAXHandler sax; |
| |
| typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t; |
| |
| static const char *const fpi_html = |
| "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n"; |
| static const char *const fpi_html_legacy = |
| "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"; |
| static const char *const fpi_xhtml = |
| "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"; |
| static const char *const fpi_xhtml_legacy = |
| "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"; |
| static const char *const fpi_html5 = "<!DOCTYPE html>\n"; |
| static const char *const html_etag = ">"; |
| static const char *const xhtml_etag = " />"; |
| /*#define DEFAULT_DOCTYPE fpi_html */ |
| static const char *const DEFAULT_DOCTYPE = ""; |
| #define DEFAULT_ETAG html_etag |
| |
| static void normalise(unsigned int flags, char *str) |
| { |
| char *p; |
| if (flags & NORM_LC) |
| for (p = str; *p; ++p) |
| if (isupper(*p)) |
| *p = tolower(*p); |
| |
| if (flags & NORM_MSSLASH) |
| for (p = ap_strchr(str, '\\'); p; p = ap_strchr(p+1, '\\')) |
| *p = '/'; |
| |
| } |
| #define consume_buffer(ctx,inbuf,bytes,flag) \ |
| htmlParseChunk(ctx->parser, inbuf, bytes, flag) |
| |
| #define AP_fwrite(ctx,inbuf,bytes,flush) \ |
| ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes); |
| |
| /* This is always utf-8 on entry. We can convert charset within FLUSH */ |
| #define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0); begin = i+1 |
| static void pcharacters(void *ctxt, const xmlChar *uchars, int length) |
| { |
| const char *chars = (const char*) uchars; |
| saxctxt *ctx = (saxctxt*) ctxt; |
| int i; |
| int begin; |
| for (begin=i=0; i<length; i++) { |
| switch (chars[i]) { |
| case '&' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&"); break; |
| case '<' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "<"); break; |
| case '>' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, ">"); break; |
| case '"' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, """); break; |
| default : break; |
| } |
| } |
| FLUSH; |
| } |
| |
| static void preserve(saxctxt *ctx, const size_t len) |
| { |
| char *newbuf; |
| if (len <= (ctx->avail - ctx->offset)) |
| return; |
| else while (len > (ctx->avail - ctx->offset)) |
| ctx->avail += ctx->cfg->bufsz; |
| |
| newbuf = realloc(ctx->buf, ctx->avail); |
| if (newbuf != ctx->buf) { |
| if (ctx->buf) |
| apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, |
| (int(*)(void*))free); |
| apr_pool_cleanup_register(ctx->f->r->pool, newbuf, |
| (int(*)(void*))free, apr_pool_cleanup_null); |
| ctx->buf = newbuf; |
| } |
| } |
| |
| static void pappend(saxctxt *ctx, const char *buf, const size_t len) |
| { |
| preserve(ctx, len); |
| memcpy(ctx->buf+ctx->offset, buf, len); |
| ctx->offset += len; |
| } |
| |
| static void dump_content(saxctxt *ctx) |
| { |
| urlmap *m; |
| char *found; |
| size_t s_from, s_to; |
| size_t match; |
| char c = 0; |
| int nmatch; |
| ap_regmatch_t pmatch[10]; |
| char *subs; |
| size_t len, offs; |
| urlmap *themap = ctx->map; |
| #ifndef GO_FASTER |
| int verbose = APLOGrtrace1(ctx->f->r); |
| #endif |
| |
| pappend(ctx, &c, 1); /* append null byte */ |
| /* parse the text for URLs */ |
| for (m = themap; m; m = m->next) { |
| if (!(m->flags & M_CDATA)) |
| continue; |
| if (m->flags & M_REGEX) { |
| nmatch = 10; |
| offs = 0; |
| while (!ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0)) { |
| match = pmatch[0].rm_so; |
| s_from = pmatch[0].rm_eo - match; |
| subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs, |
| nmatch, pmatch); |
| s_to = strlen(subs); |
| len = strlen(ctx->buf); |
| offs += match; |
| VERBOSEB( |
| const char *f = apr_pstrndup(ctx->f->r->pool, |
| ctx->buf + offs, s_from); |
| ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r, |
| "C/RX: match at %s, substituting %s", f, subs); |
| ) |
| if (s_to > s_from) { |
| preserve(ctx, s_to - s_from); |
| memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from, |
| len + 1 - s_from - offs); |
| memcpy(ctx->buf+offs, subs, s_to); |
| } |
| else { |
| memcpy(ctx->buf + offs, subs, s_to); |
| memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from, |
| len + 1 - s_from - offs); |
| } |
| offs += s_to; |
| } |
| } |
| else { |
| s_from = strlen(m->from.c); |
| s_to = strlen(m->to); |
| for (found = strstr(ctx->buf, m->from.c); found; |
| found = strstr(ctx->buf+match+s_to, m->from.c)) { |
| match = found - ctx->buf; |
| if ((m->flags & M_ATSTART) && (match != 0)) |
| break; |
| len = strlen(ctx->buf); |
| if ((m->flags & M_ATEND) && (match < (len - s_from))) |
| continue; |
| VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r, |
| "C: matched %s, substituting %s", |
| m->from.c, m->to)); |
| if (s_to > s_from) { |
| preserve(ctx, s_to - s_from); |
| memmove(ctx->buf+match+s_to, ctx->buf+match+s_from, |
| len + 1 - s_from - match); |
| memcpy(ctx->buf+match, m->to, s_to); |
| } |
| else { |
| memcpy(ctx->buf+match, m->to, s_to); |
| memmove(ctx->buf+match+s_to, ctx->buf+match+s_from, |
| len + 1 - s_from - match); |
| } |
| } |
| } |
| } |
| AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1); |
| } |
| static void pcdata(void *ctxt, const xmlChar *uchars, int length) |
| { |
| const char *chars = (const char*) uchars; |
| saxctxt *ctx = (saxctxt*) ctxt; |
| if (ctx->cfg->extfix) { |
| pappend(ctx, chars, length); |
| } |
| else { |
| /* not sure if this should force-flush |
| * (i.e. can one cdata section come in multiple calls?) |
| */ |
| AP_fwrite(ctx, chars, length, 0); |
| } |
| } |
| static void pcomment(void *ctxt, const xmlChar *uchars) |
| { |
| const char *chars = (const char*) uchars; |
| saxctxt *ctx = (saxctxt*) ctxt; |
| if (ctx->cfg->strip_comments) |
| return; |
| |
| if (ctx->cfg->extfix) { |
| pappend(ctx, "<!--", 4); |
| pappend(ctx, chars, strlen(chars)); |
| pappend(ctx, "-->", 3); |
| } |
| else { |
| ap_fputs(ctx->f->next, ctx->bb, "<!--"); |
| AP_fwrite(ctx, chars, strlen(chars), 1); |
| ap_fputs(ctx->f->next, ctx->bb, "-->"); |
| dump_content(ctx); |
| } |
| } |
| static void pendElement(void *ctxt, const xmlChar *uname) |
| { |
| saxctxt *ctx = (saxctxt*) ctxt; |
| const char *name = (const char*) uname; |
| const htmlElemDesc* desc = htmlTagLookup(uname); |
| |
| if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) { |
| /* enforce html */ |
| if (!desc || desc->depr) |
| return; |
| |
| } |
| else if ((ctx->cfg->doctype == fpi_html_legacy) |
| || (ctx->cfg->doctype == fpi_xhtml_legacy)) { |
| /* enforce html legacy */ |
| if (!desc) |
| return; |
| } |
| /* TODO - implement HTML "allowed here" using the stack */ |
| /* nah. Keeping the stack is too much overhead */ |
| |
| if (ctx->offset > 0) { |
| dump_content(ctx); |
| ctx->offset = 0; /* having dumped it, we can re-use the memory */ |
| } |
| if (!desc || !desc->empty) { |
| ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name); |
| } |
| } |
| |
| static void pstartElement(void *ctxt, const xmlChar *uname, |
| const xmlChar** uattrs) |
| { |
| int required_attrs; |
| int num_match; |
| size_t offs, len; |
| char *subs; |
| rewrite_t is_uri; |
| const char** a; |
| urlmap *m; |
| size_t s_to, s_from, match; |
| char *found; |
| saxctxt *ctx = (saxctxt*) ctxt; |
| size_t nmatch; |
| ap_regmatch_t pmatch[10]; |
| #ifndef GO_FASTER |
| int verbose = APLOGrtrace1(ctx->f->r); |
| #endif |
| apr_array_header_t *linkattrs; |
| int i; |
| const char *name = (const char*) uname; |
| const char** attrs = (const char**) uattrs; |
| const htmlElemDesc* desc = htmlTagLookup(uname); |
| urlmap *themap = ctx->map; |
| #ifdef HAVE_STACK |
| const void** descp; |
| #endif |
| int enforce = 0; |
| if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) { |
| /* enforce html */ |
| if (!desc || desc->depr) { |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01416) |
| "Bogus HTML element %s dropped", name); |
| return; |
| } |
| enforce = 2; |
| } |
| else if ((ctx->cfg->doctype == fpi_html_legacy) |
| || (ctx->cfg->doctype == fpi_xhtml_legacy)) { |
| /* enforce html legacy */ |
| if (!desc) { |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01417) |
| "Deprecated HTML element %s dropped", name); |
| return; |
| } |
| enforce = 1; |
| } |
| #ifdef HAVE_STACK |
| descp = apr_array_push(ctx->stack); |
| *descp = desc; |
| /* TODO - implement HTML "allowed here" */ |
| #endif |
| |
| ap_fputc(ctx->f->next, ctx->bb, '<'); |
| ap_fputs(ctx->f->next, ctx->bb, name); |
| |
| required_attrs = 0; |
| if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL)) |
| for (a = desc->attrs_req; *a; a++) |
| ++required_attrs; |
| |
| if (attrs) { |
| linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING); |
| for (a = attrs; *a; a += 2) { |
| if (desc && enforce > 0) { |
| switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) { |
| case HTML_INVALID: |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01418) |
| "Bogus HTML attribute %s of %s dropped", |
| *a, name); |
| continue; |
| case HTML_DEPRECATED: |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01419) |
| "Deprecated HTML attribute %s of %s dropped", |
| *a, name); |
| continue; |
| case HTML_REQUIRED: |
| required_attrs--; /* cross off the number still needed */ |
| /* fallthrough - required implies valid */ |
| default: |
| break; |
| } |
| } |
| ctx->offset = 0; |
| if (a[1]) { |
| pappend(ctx, a[1], strlen(a[1])+1); |
| is_uri = ATTR_IGNORE; |
| if (linkattrs) { |
| tattr *attrs = (tattr*) linkattrs->elts; |
| for (i=0; i < linkattrs->nelts; ++i) { |
| if (!strcmp(*a, attrs[i].val)) { |
| is_uri = ATTR_URI; |
| break; |
| } |
| } |
| } |
| if ((is_uri == ATTR_IGNORE) && ctx->cfg->extfix |
| && (ctx->cfg->events != NULL)) { |
| for (i=0; i < ctx->cfg->events->nelts; ++i) { |
| tattr *attrs = (tattr*) ctx->cfg->events->elts; |
| if (!strcmp(*a, attrs[i].val)) { |
| is_uri = ATTR_EVENT; |
| break; |
| } |
| } |
| } |
| switch (is_uri) { |
| case ATTR_URI: |
| num_match = 0; |
| for (m = themap; m; m = m->next) { |
| if (!(m->flags & M_HTML)) |
| continue; |
| if (m->flags & M_REGEX) { |
| nmatch = 10; |
| if (!ap_regexec(m->from.r, ctx->buf, nmatch, |
| pmatch, 0)) { |
| ++num_match; |
| offs = match = pmatch[0].rm_so; |
| s_from = pmatch[0].rm_eo - match; |
| subs = ap_pregsub(ctx->f->r->pool, m->to, |
| ctx->buf, nmatch, pmatch); |
| VERBOSE({ |
| const char *f; |
| f = apr_pstrndup(ctx->f->r->pool, |
| ctx->buf + offs, s_from); |
| ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, |
| ctx->f->r, |
| "H/RX: match at %s, substituting %s", |
| f, subs); |
| }) |
| s_to = strlen(subs); |
| len = strlen(ctx->buf); |
| if (s_to > s_from) { |
| preserve(ctx, s_to - s_from); |
| memmove(ctx->buf+offs+s_to, |
| ctx->buf+offs+s_from, |
| len + 1 - s_from - offs); |
| memcpy(ctx->buf+offs, subs, s_to); |
| } |
| else { |
| memcpy(ctx->buf + offs, subs, s_to); |
| memmove(ctx->buf+offs+s_to, |
| ctx->buf+offs+s_from, |
| len + 1 - s_from - offs); |
| } |
| } |
| } else { |
| s_from = strlen(m->from.c); |
| if (!strncasecmp(ctx->buf, m->from.c, s_from)) { |
| ++num_match; |
| s_to = strlen(m->to); |
| len = strlen(ctx->buf); |
| VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, |
| 0, ctx->f->r, |
| "H: matched %s, substituting %s", |
| m->from.c, m->to)); |
| if (s_to > s_from) { |
| preserve(ctx, s_to - s_from); |
| memmove(ctx->buf+s_to, ctx->buf+s_from, |
| len + 1 - s_from); |
| memcpy(ctx->buf, m->to, s_to); |
| } |
| else { /* it fits in the existing space */ |
| memcpy(ctx->buf, m->to, s_to); |
| memmove(ctx->buf+s_to, ctx->buf+s_from, |
| len + 1 - s_from); |
| } |
| break; |
| } |
| } |
| /* URIs only want one match unless overridden in the config */ |
| if ((num_match > 0) && !(m->flags & M_NOTLAST)) |
| break; |
| } |
| break; |
| case ATTR_EVENT: |
| for (m = themap; m; m = m->next) { |
| num_match = 0; /* reset here since we're working per-rule */ |
| if (!(m->flags & M_EVENTS)) |
| continue; |
| if (m->flags & M_REGEX) { |
| nmatch = 10; |
| offs = 0; |
| while (!ap_regexec(m->from.r, ctx->buf+offs, |
| nmatch, pmatch, 0)) { |
| match = pmatch[0].rm_so; |
| s_from = pmatch[0].rm_eo - match; |
| subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs, |
| nmatch, pmatch); |
| VERBOSE({ |
| const char *f; |
| f = apr_pstrndup(ctx->f->r->pool, |
| ctx->buf + offs, s_from); |
| ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, |
| ctx->f->r, |
| "E/RX: match at %s, substituting %s", |
| f, subs); |
| }) |
| s_to = strlen(subs); |
| offs += match; |
| len = strlen(ctx->buf); |
| if (s_to > s_from) { |
| preserve(ctx, s_to - s_from); |
| memmove(ctx->buf+offs+s_to, |
| ctx->buf+offs+s_from, |
| len + 1 - s_from - offs); |
| memcpy(ctx->buf+offs, subs, s_to); |
| } |
| else { |
| memcpy(ctx->buf + offs, subs, s_to); |
| memmove(ctx->buf+offs+s_to, |
| ctx->buf+offs+s_from, |
| len + 1 - s_from - offs); |
| } |
| offs += s_to; |
| ++num_match; |
| } |
| } |
| else { |
| found = strstr(ctx->buf, m->from.c); |
| if ((m->flags & M_ATSTART) && (found != ctx->buf)) |
| continue; |
| while (found) { |
| s_from = strlen(m->from.c); |
| s_to = strlen(m->to); |
| match = found - ctx->buf; |
| if ((s_from < strlen(found)) |
| && (m->flags & M_ATEND)) { |
| found = strstr(ctx->buf+match+s_from, |
| m->from.c); |
| continue; |
| } |
| else { |
| found = strstr(ctx->buf+match+s_to, |
| m->from.c); |
| } |
| VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, |
| 0, ctx->f->r, |
| "E: matched %s, substituting %s", |
| m->from.c, m->to)); |
| len = strlen(ctx->buf); |
| if (s_to > s_from) { |
| preserve(ctx, s_to - s_from); |
| memmove(ctx->buf+match+s_to, |
| ctx->buf+match+s_from, |
| len + 1 - s_from - match); |
| memcpy(ctx->buf+match, m->to, s_to); |
| } |
| else { |
| memcpy(ctx->buf+match, m->to, s_to); |
| memmove(ctx->buf+match+s_to, |
| ctx->buf+match+s_from, |
| len + 1 - s_from - match); |
| } |
| ++num_match; |
| } |
| } |
| if (num_match && (m->flags & M_LAST)) |
| break; |
| } |
| break; |
| case ATTR_IGNORE: |
| break; |
| } |
| } |
| if (!a[1]) |
| ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL); |
| else { |
| |
| if (ctx->cfg->flags != 0) |
| normalise(ctx->cfg->flags, ctx->buf); |
| |
| /* write the attribute, using pcharacters to html-escape |
| anything that needs it in the value. |
| */ |
| ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL); |
| pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf)); |
| ap_fputc(ctx->f->next, ctx->bb, '"'); |
| } |
| } |
| } |
| ctx->offset = 0; |
| if (desc && desc->empty) |
| ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag); |
| else |
| ap_fputc(ctx->f->next, ctx->bb, '>'); |
| |
| if ((enforce > 0) && (required_attrs > 0)) { |
| /* if there are more required attributes than we found then complain */ |
| ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01420) |
| "HTML element %s is missing %d required attributes", |
| name, required_attrs); |
| } |
| } |
| |
| static meta *metafix(request_rec *r, const char *buf, apr_size_t len) |
| { |
| meta *ret = NULL; |
| size_t offs = 0; |
| const char *p; |
| const char *q; |
| char *header; |
| char *content; |
| ap_regmatch_t pmatch[2]; |
| char delim; |
| |
| while (offs < len && |
| !ap_regexec_len(seek_meta, buf + offs, len - offs, 2, pmatch, 0)) { |
| header = NULL; |
| content = NULL; |
| p = buf+offs+pmatch[1].rm_eo; |
| while (!apr_isalpha(*++p)); |
| for (q = p; apr_isalnum(*q) || (*q == '-'); ++q); |
| header = apr_pstrmemdup(r->pool, p, q-p); |
| if (strncasecmp(header, "Content-", 8)) { |
| /* find content=... string */ |
| p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so, |
| pmatch[0].rm_eo - pmatch[0].rm_so); |
| /* if it doesn't contain "content", ignore, don't crash! */ |
| if (p != NULL) { |
| while (*p) { |
| p += 7; |
| while (apr_isspace(*p)) |
| ++p; |
| /* XXX Should we search for another content= pattern? */ |
| if (*p != '=') |
| break; |
| while (*p && apr_isspace(*++p)); |
| if ((*p == '\'') || (*p == '"')) { |
| delim = *p++; |
| for (q = p; *q && *q != delim; ++q); |
| /* No terminating delimiter found? Skip the boggus directive */ |
| if (*q != delim) |
| break; |
| } else { |
| for (q = p; *q && !apr_isspace(*q) && (*q != '>'); ++q); |
| } |
| content = apr_pstrmemdup(r->pool, p, q-p); |
| break; |
| } |
| } |
| } |
| else if (!strncasecmp(header, "Content-Type", 12)) { |
| ret = apr_palloc(r->pool, sizeof(meta)); |
| ret->start = offs+pmatch[0].rm_so; |
| ret->end = offs+pmatch[0].rm_eo; |
| } |
| if (header && content) { |
| #ifndef GO_FASTER |
| ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, |
| "Adding header [%s: %s] from HTML META", |
| header, content); |
| #endif |
| apr_table_setn(r->headers_out, header, content); |
| } |
| offs += pmatch[0].rm_eo; |
| } |
| return ret; |
| } |
| |
| static const char *interpolate_vars(request_rec *r, const char *str) |
| { |
| const char *start; |
| const char *end; |
| const char *delim; |
| const char *before; |
| const char *after; |
| const char *replacement; |
| const char *var; |
| for (;;) { |
| if ((start = ap_strstr_c(str, "${")) == NULL) |
| break; |
| |
| if ((end = ap_strchr_c(start+2, '}')) == NULL) |
| break; |
| |
| delim = ap_strchr_c(start+2, '|'); |
| |
| /* Restrict delim to ${...} */ |
| if (delim && delim >= end) { |
| delim = NULL; |
| } |
| |
| before = apr_pstrmemdup(r->pool, str, start-str); |
| after = end+1; |
| if (delim) { |
| var = apr_pstrmemdup(r->pool, start+2, delim-start-2); |
| } |
| else { |
| var = apr_pstrmemdup(r->pool, start+2, end-start-2); |
| } |
| replacement = apr_table_get(r->subprocess_env, var); |
| if (!replacement) { |
| if (delim) |
| replacement = apr_pstrmemdup(r->pool, delim+1, end-delim-1); |
| else |
| replacement = ""; |
| } |
| str = apr_pstrcat(r->pool, before, replacement, after, NULL); |
| ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, |
| "Interpolating %s => %s", var, replacement); |
| } |
| return str; |
| } |
| static void fixup_rules(saxctxt *ctx) |
| { |
| urlmap *newp; |
| urlmap *p; |
| urlmap *prev = NULL; |
| request_rec *r = ctx->f->r; |
| |
| for (p = ctx->cfg->map; p; p = p->next) { |
| if (p->cond != NULL) { |
| const char *err; |
| int ok = ap_expr_exec(r, p->cond, &err); |
| if (err) { |
| ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01421) |
| "Error evaluating expr: %s", err); |
| } |
| if (ok == 0) { |
| continue; /* condition is unsatisfied */ |
| } |
| } |
| |
| newp = apr_pmemdup(r->pool, p, sizeof(urlmap)); |
| |
| if (newp->flags & M_INTERPOLATE_FROM) { |
| newp->from.c = interpolate_vars(r, newp->from.c); |
| if (!newp->from.c || !*newp->from.c) |
| continue; /* don't use empty from-pattern */ |
| if (newp->flags & M_REGEX) { |
| newp->from.r = ap_pregcomp(r->pool, newp->from.c, |
| newp->regflags); |
| } |
| } |
| if (newp->flags & M_INTERPOLATE_TO) { |
| newp->to = interpolate_vars(r, newp->to); |
| } |
| /* evaluate p->cond; continue if unsatisfied */ |
| /* create new urlmap with memcpy and append to map */ |
| /* interpolate from if flagged to do so */ |
| /* interpolate to if flagged to do so */ |
| |
| if (prev != NULL) |
| prev->next = newp; |
| else |
| ctx->map = newp; |
| prev = newp; |
| } |
| |
| if (prev) |
| prev->next = NULL; |
| } |
| |
| static saxctxt *check_filter_init (ap_filter_t *f) |
| { |
| saxctxt *fctx; |
| if (!f->ctx) { |
| proxy_html_conf *cfg; |
| const char *force; |
| const char *errmsg = NULL; |
| cfg = ap_get_module_config(f->r->per_dir_config, &proxy_html_module); |
| force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE"); |
| |
| if (!force) { |
| if (!f->r->proxyreq) { |
| errmsg = "Non-proxy request; not inserting proxy-html filter"; |
| } |
| else if (!f->r->content_type) { |
| errmsg = "No content-type; bailing out of proxy-html filter"; |
| } |
| else if (strncasecmp(f->r->content_type, "text/html", 9) && |
| strncasecmp(f->r->content_type, |
| "application/xhtml+xml", 21)) { |
| errmsg = "Non-HTML content; not inserting proxy-html filter"; |
| } |
| } |
| if (!cfg->links) { |
| errmsg = "No links configured: nothing for proxy-html filter to do"; |
| } |
| |
| if (errmsg) { |
| #ifndef GO_FASTER |
| ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r, "%s", errmsg); |
| #endif |
| ap_remove_output_filter(f); |
| return NULL; |
| } |
| |
| fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)); |
| fctx->f = f; |
| fctx->bb = apr_brigade_create(f->r->pool, |
| f->r->connection->bucket_alloc); |
| fctx->cfg = cfg; |
| apr_table_unset(f->r->headers_out, "Content-Length"); |
| |
| if (cfg->interp) |
| fixup_rules(fctx); |
| else |
| fctx->map = cfg->map; |
| /* defer dealing with charset_out until after sniffing charset_in |
| * so we can support setting one to t'other. |
| */ |
| } |
| return f->ctx; |
| } |
| |
| static void prepend_rbuf(saxctxt *ctxt, apr_bucket_brigade *bb) |
| { |
| if (ctxt->rlen) { |
| apr_bucket *b = apr_bucket_transient_create(ctxt->rbuf, |
| ctxt->rlen, |
| bb->bucket_alloc); |
| APR_BRIGADE_INSERT_HEAD(bb, b); |
| ctxt->rlen = 0; |
| } |
| } |
| |
| static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) |
| { |
| apr_bucket* b; |
| meta *m = NULL; |
| xmlCharEncoding enc; |
| const char *buf = 0; |
| apr_size_t bytes = 0; |
| #ifndef USE_OLD_LIBXML2 |
| int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET | |
| XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING; |
| #endif |
| |
| saxctxt *ctxt = check_filter_init(f); |
| if (!ctxt) |
| return ap_pass_brigade(f->next, bb); |
| for (b = APR_BRIGADE_FIRST(bb); |
| b != APR_BRIGADE_SENTINEL(bb); |
| b = APR_BUCKET_NEXT(b)) { |
| if (APR_BUCKET_IS_METADATA(b)) { |
| if (APR_BUCKET_IS_EOS(b)) { |
| if (ctxt->parser != NULL) { |
| consume_buffer(ctxt, "", 0, 1); |
| } |
| else { |
| prepend_rbuf(ctxt, ctxt->bb); |
| } |
| APR_BRIGADE_INSERT_TAIL(ctxt->bb, |
| apr_bucket_eos_create(ctxt->bb->bucket_alloc)); |
| ap_pass_brigade(ctxt->f->next, ctxt->bb); |
| apr_brigade_cleanup(ctxt->bb); |
| } |
| else if (APR_BUCKET_IS_FLUSH(b)) { |
| /* pass on flush, except at start where it would cause |
| * headers to be sent before doc sniffing |
| */ |
| if (ctxt->parser != NULL) { |
| ap_fflush(ctxt->f->next, ctxt->bb); |
| } |
| } |
| } |
| else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) |
| == APR_SUCCESS) { |
| if (ctxt->parser == NULL) { |
| const char *cenc; |
| |
| /* For documents smaller than four bytes, there is no reason to do |
| * HTML rewriting. The URL schema (i.e. 'http') needs four bytes alone. |
| * And the HTML parser needs at least four bytes to initialise correctly. |
| */ |
| ctxt->rmin += bytes; |
| if (ctxt->rmin < sizeof(ctxt->rbuf)) { |
| memcpy(ctxt->rbuf + ctxt->rlen, buf, bytes); |
| ctxt->rlen += bytes; |
| continue; |
| } |
| if (ctxt->rlen && ctxt->rlen < sizeof(ctxt->rbuf)) { |
| apr_size_t rem = sizeof(ctxt->rbuf) - ctxt->rlen; |
| memcpy(ctxt->rbuf + ctxt->rlen, buf, rem); |
| ctxt->rlen += rem; |
| buf += rem; |
| bytes -= rem; |
| } |
| |
| if (!xml2enc_charset || |
| (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) { |
| if (!xml2enc_charset) |
| ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01422) |
| "No i18n support found. Install mod_xml2enc if required"); |
| enc = XML_CHAR_ENCODING_NONE; |
| ap_set_content_type(f->r, "text/html;charset=utf-8"); |
| } |
| else { |
| /* if we wanted a non-default charset_out, insert the |
| * xml2enc filter now that we've sniffed it |
| */ |
| if (ctxt->cfg->charset_out && xml2enc_filter) { |
| if (*ctxt->cfg->charset_out != '*') |
| cenc = ctxt->cfg->charset_out; |
| xml2enc_filter(f->r, cenc, ENCIO_OUTPUT); |
| ap_set_content_type(f->r, |
| apr_pstrcat(f->r->pool, |
| "text/html;charset=", |
| cenc, NULL)); |
| } |
| else /* Normal case, everything worked, utf-8 output */ |
| ap_set_content_type(f->r, "text/html;charset=utf-8"); |
| } |
| |
| ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype); |
| |
| if (ctxt->rlen) { |
| ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, |
| ctxt->rbuf, |
| ctxt->rlen, |
| NULL, enc); |
| } |
| else { |
| ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 4, |
| NULL, enc); |
| buf += 4; |
| bytes -= 4; |
| } |
| if (ctxt->parser == NULL) { |
| prepend_rbuf(ctxt, bb); |
| ap_remove_output_filter(f); |
| return ap_pass_brigade(f->next, bb); |
| } |
| ctxt->rlen = 0; |
| apr_pool_cleanup_register(f->r->pool, ctxt->parser, |
| (int(*)(void*))htmlFreeParserCtxt, |
| apr_pool_cleanup_null); |
| #ifndef USE_OLD_LIBXML2 |
| if (xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts), xmlopts) |
| ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01423) |
| "Unsupported parser opts %x", xmlopts); |
| #endif |
| if (ctxt->cfg->metafix) |
| m = metafix(f->r, buf, bytes); |
| if (m) { |
| consume_buffer(ctxt, buf, m->start, 0); |
| consume_buffer(ctxt, buf+m->end, bytes-m->end, 0); |
| } |
| else { |
| consume_buffer(ctxt, buf, bytes, 0); |
| } |
| } |
| else { |
| consume_buffer(ctxt, buf, bytes, 0); |
| } |
| } |
| else { |
| ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01424) |
| "Error in bucket read"); |
| } |
| } |
| /*ap_fflush(ctxt->f->next, ctxt->bb); // uncomment for debug */ |
| apr_brigade_cleanup(bb); |
| return APR_SUCCESS; |
| } |
| |
| static void *proxy_html_config(apr_pool_t *pool, char *x) |
| { |
| proxy_html_conf *ret = apr_pcalloc(pool, sizeof(proxy_html_conf)); |
| ret->doctype = DEFAULT_DOCTYPE; |
| ret->etag = DEFAULT_ETAG; |
| ret->bufsz = 8192; |
| /* ret->interp = 1; */ |
| /* don't initialise links and events until they get set/used */ |
| return ret; |
| } |
| |
| static void *proxy_html_merge(apr_pool_t *pool, void *BASE, void *ADD) |
| { |
| proxy_html_conf *base = (proxy_html_conf *) BASE; |
| proxy_html_conf *add = (proxy_html_conf *) ADD; |
| proxy_html_conf *conf = apr_palloc(pool, sizeof(proxy_html_conf)); |
| |
| /* don't merge declarations - just use the most specific */ |
| conf->links = (add->links == NULL) ? base->links : add->links; |
| conf->events = (add->events == NULL) ? base->events : add->events; |
| |
| conf->charset_out = (add->charset_out == NULL) |
| ? base->charset_out : add->charset_out; |
| |
| if (add->map && base->map) { |
| urlmap *a; |
| conf->map = NULL; |
| for (a = base->map; a; a = a->next) { |
| urlmap *save = conf->map; |
| conf->map = apr_pmemdup(pool, a, sizeof(urlmap)); |
| conf->map->next = save; |
| } |
| for (a = add->map; a; a = a->next) { |
| urlmap *save = conf->map; |
| conf->map = apr_pmemdup(pool, a, sizeof(urlmap)); |
| conf->map->next = save; |
| } |
| } |
| else |
| conf->map = add->map ? add->map : base->map; |
| |
| conf->doctype = (add->doctype == DEFAULT_DOCTYPE) |
| ? base->doctype : add->doctype; |
| conf->etag = (add->etag == DEFAULT_ETAG) ? base->etag : add->etag; |
| conf->bufsz = add->bufsz; |
| if (add->flags & NORM_RESET) { |
| conf->flags = add->flags ^ NORM_RESET; |
| conf->metafix = add->metafix; |
| conf->extfix = add->extfix; |
| conf->interp = add->interp; |
| conf->strip_comments = add->strip_comments; |
| conf->enabled = add->enabled; |
| } |
| else { |
| conf->flags = base->flags | add->flags; |
| conf->metafix = base->metafix | add->metafix; |
| conf->extfix = base->extfix | add->extfix; |
| conf->interp = base->interp | add->interp; |
| conf->strip_comments = base->strip_comments | add->strip_comments; |
| conf->enabled = add->enabled | base->enabled; |
| } |
| return conf; |
| } |
| #define REGFLAG(n,s,c) ((s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0) |
| #define XREGFLAG(n,s,c) ((!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0) |
| static const char *comp_urlmap(cmd_parms *cmd, urlmap *newmap, |
| const char *from, const char *to, |
| const char *flags, const char *cond) |
| { |
| const char *err = NULL; |
| newmap->flags |
| = XREGFLAG(M_HTML,flags,'h') |
| | XREGFLAG(M_EVENTS,flags,'e') |
| | XREGFLAG(M_CDATA,flags,'c') |
| | REGFLAG(M_ATSTART,flags,'^') |
| | REGFLAG(M_ATEND,flags,'$') |
| | REGFLAG(M_REGEX,flags,'R') |
| | REGFLAG(M_LAST,flags,'L') |
| | REGFLAG(M_NOTLAST,flags,'l') |
| | REGFLAG(M_INTERPOLATE_TO,flags,'V') |
| | REGFLAG(M_INTERPOLATE_FROM,flags,'v'); |
| |
| if ((newmap->flags & M_INTERPOLATE_FROM) || !(newmap->flags & M_REGEX)) { |
| newmap->from.c = from; |
| newmap->to = to; |
| } |
| else { |
| newmap->regflags |
| = REGFLAG(AP_REG_EXTENDED,flags,'x') |
| | REGFLAG(AP_REG_ICASE,flags,'i') |
| | REGFLAG(AP_REG_NOSUB,flags,'n') |
| | REGFLAG(AP_REG_NEWLINE,flags,'s'); |
| newmap->from.r = ap_pregcomp(cmd->pool, from, newmap->regflags); |
| newmap->to = to; |
| } |
| if (cond != NULL) { |
| /* back-compatibility: support old-style ENV expressions |
| * by converting to ap_expr syntax. |
| * |
| * 1. var --> env(var) |
| * 2. var=val --> env(var)=val |
| * 3. !var --> !env(var) |
| * 4. !var=val --> env(var)!=val |
| */ |
| char *newcond = NULL; |
| if (ap_rxplus_exec(cmd->temp_pool, old_expr, cond, &newcond)) { |
| /* we got a substitution. Check for the case (3) above |
| * that the regexp gets wrong: a negation without a comparison. |
| */ |
| if ((cond[0] == '!') && !ap_strchr_c(cond, '=')) { |
| memmove(newcond+1, newcond, strlen(newcond)-1); |
| newcond[0] = '!'; |
| } |
| cond = newcond; |
| } |
| newmap->cond = ap_expr_parse_cmd(cmd, cond, 0, &err, NULL); |
| } |
| else { |
| newmap->cond = NULL; |
| } |
| return err; |
| } |
| |
| static const char *set_urlmap(cmd_parms *cmd, void *CFG, const char *args) |
| { |
| proxy_html_conf *cfg = (proxy_html_conf *)CFG; |
| urlmap *map; |
| apr_pool_t *pool = cmd->pool; |
| urlmap *newmap; |
| const char *usage = |
| "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]"; |
| const char *from; |
| const char *to; |
| const char *flags; |
| const char *cond = NULL; |
| |
| if (from = ap_getword_conf(cmd->pool, &args), !from) |
| return usage; |
| if (to = ap_getword_conf(cmd->pool, &args), !to) |
| return usage; |
| flags = ap_getword_conf(cmd->pool, &args); |
| if (flags && *flags) |
| cond = ap_getword_conf(cmd->pool, &args); |
| if (cond && !*cond) |
| cond = NULL; |
| |
| /* the args look OK, so let's use them */ |
| newmap = apr_palloc(pool, sizeof(urlmap)); |
| newmap->next = NULL; |
| if (cfg->map) { |
| for (map = cfg->map; map->next; map = map->next); |
| map->next = newmap; |
| } |
| else |
| cfg->map = newmap; |
| |
| return comp_urlmap(cmd, newmap, from, to, flags, cond); |
| } |
| |
| static const char *set_doctype(cmd_parms *cmd, void *CFG, |
| const char *t, const char *l) |
| { |
| proxy_html_conf *cfg = (proxy_html_conf *)CFG; |
| if (!strcasecmp(t, "xhtml")) { |
| cfg->etag = xhtml_etag; |
| if (l && !strcasecmp(l, "legacy")) |
| cfg->doctype = fpi_xhtml_legacy; |
| else |
| cfg->doctype = fpi_xhtml; |
| } |
| else if (!strcasecmp(t, "html")) { |
| cfg->etag = html_etag; |
| if (l && !strcasecmp(l, "legacy")) |
| cfg->doctype = fpi_html_legacy; |
| else |
| cfg->doctype = fpi_html; |
| } |
| else if (!strcasecmp(t, "html5")) { |
| cfg->etag = html_etag; |
| cfg->doctype = fpi_html5; |
| } |
| else { |
| cfg->doctype = t; |
| if (l && ((l[0] == 'x') || (l[0] == 'X'))) |
| cfg->etag = xhtml_etag; |
| else |
| cfg->etag = html_etag; |
| } |
| return NULL; |
| } |
| |
| static const char *set_flags(cmd_parms *cmd, void *CFG, const char *arg) |
| { |
| proxy_html_conf *cfg = CFG; |
| if (arg && *arg) { |
| if (!strcasecmp(arg, "lowercase")) |
| cfg->flags |= NORM_LC; |
| else if (!strcasecmp(arg, "dospath")) |
| cfg->flags |= NORM_MSSLASH; |
| else if (!strcasecmp(arg, "reset")) |
| cfg->flags |= NORM_RESET; |
| } |
| return NULL; |
| } |
| |
| static const char *set_events(cmd_parms *cmd, void *CFG, const char *arg) |
| { |
| tattr *attr; |
| proxy_html_conf *cfg = CFG; |
| if (cfg->events == NULL) |
| cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr)); |
| attr = apr_array_push(cfg->events); |
| attr->val = arg; |
| return NULL; |
| } |
| |
| static const char *set_links(cmd_parms *cmd, void *CFG, |
| const char *elt, const char *att) |
| { |
| apr_array_header_t *attrs; |
| tattr *attr; |
| proxy_html_conf *cfg = CFG; |
| |
| if (cfg->links == NULL) |
| cfg->links = apr_hash_make(cmd->pool); |
| |
| attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING); |
| if (!attrs) { |
| attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*)); |
| apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs); |
| } |
| attr = apr_array_push(attrs); |
| attr->val = att; |
| return NULL; |
| } |
| static const command_rec proxy_html_cmds[] = { |
| AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL, |
| RSRC_CONF|ACCESS_CONF, |
| "Strings to be treated as scripting events"), |
| AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL, |
| RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"), |
| AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL, |
| RSRC_CONF|ACCESS_CONF, "Map URL From To"), |
| AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL, |
| RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]"), |
| AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL, |
| RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath"), |
| AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, metafix), |
| RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements"), |
| AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, interp), |
| RSRC_CONF|ACCESS_CONF, |
| "Support interpolation and conditions in URLMaps"), |
| AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, extfix), |
| RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS"), |
| AP_INIT_FLAG("ProxyHTMLStripComments", ap_set_flag_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, strip_comments), |
| RSRC_CONF|ACCESS_CONF, "Strip out comments"), |
| AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, bufsz), |
| RSRC_CONF|ACCESS_CONF, "Buffer size"), |
| AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, charset_out), |
| RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset"), |
| AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot, |
| (void*)APR_OFFSETOF(proxy_html_conf, enabled), |
| RSRC_CONF|ACCESS_CONF, |
| "Enable proxy-html and xml2enc filters"), |
| { NULL } |
| }; |
| static int mod_proxy_html(apr_pool_t *p, apr_pool_t *p1, apr_pool_t *p2) |
| { |
| seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>", |
| AP_REG_EXTENDED|AP_REG_ICASE); |
| seek_content = apr_strmatch_precompile(p, "content", 0); |
| memset(&sax, 0, sizeof(htmlSAXHandler)); |
| sax.startElement = pstartElement; |
| sax.endElement = pendElement; |
| sax.characters = pcharacters; |
| sax.comment = pcomment; |
| sax.cdataBlock = pcdata; |
| xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset); |
| xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter); |
| if (!xml2enc_charset) { |
| ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2, APLOGNO(01425) |
| "I18n support in mod_proxy_html requires mod_xml2enc. " |
| "Without it, non-ASCII characters in proxied pages are " |
| "likely to display incorrectly."); |
| } |
| |
| /* old_expr only needs to last the life of the config phase */ |
| old_expr = ap_rxplus_compile(p1, "s/^(!)?(\\w+)((=)(.+))?$/reqenv('$2')$1$4'$5'/"); |
| return OK; |
| } |
| static void proxy_html_insert(request_rec *r) |
| { |
| proxy_html_conf *cfg; |
| cfg = ap_get_module_config(r->per_dir_config, &proxy_html_module); |
| if (cfg->enabled) { |
| if (xml2enc_filter) |
| xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS); |
| ap_add_output_filter("proxy-html", NULL, r, r->connection); |
| } |
| } |
| static void proxy_html_hooks(apr_pool_t *p) |
| { |
| static const char *aszSucc[] = { "mod_filter.c", NULL }; |
| ap_register_output_filter_protocol("proxy-html", proxy_html_filter, |
| NULL, AP_FTYPE_RESOURCE, |
| AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH); |
| /* move this to pre_config so old_expr is available to interpret |
| * old-style conditions on URL maps. |
| */ |
| ap_hook_pre_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE); |
| ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE); |
| } |
| |
| AP_DECLARE_MODULE(proxy_html) = { |
| STANDARD20_MODULE_STUFF, |
| proxy_html_config, |
| proxy_html_merge, |
| NULL, |
| NULL, |
| proxy_html_cmds, |
| proxy_html_hooks |
| }; |