connectors/jk/native/common/jk_url.c - tomcat55 - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */

 /***************************************************************************
  * Description: URL manupilation subroutines. (ported from mod_proxy).     *
  * Version:     $Revision: 531816 $                                        *
  ***************************************************************************/

 #include "jk_global.h"
 #include "jk_url.h"

 #ifdef HAVE_APR
 #define JK_ISXDIGIT(x) apr_isxdigit((x))
 #define JK_ISDIGIT(x)  apr_isdigit((x))
 #define JK_ISUPPER(x)  apr_isupper((x))
 #define JK_ISALNUM(x)  apr_isalnum((x))
 #else
 #define JK_ISXDIGIT(x) isxdigit((int)(unsigned char)((x)))
 #define JK_ISDIGIT(x)  isdigit((int)(unsigned char)((x)))
 #define JK_ISUPPER(x)  isupper((int)(unsigned char)((x)))
 #define JK_ISALNUM(x)  isalnum((int)(unsigned char)((x)))
 #endif

 /* already called in the knowledge that the characters are hex digits */
 static  int jk_hex2c(const char *x)
 {
     int i, ch;

 #if !CHARSET_EBCDIC
     ch = x[0];
     if (JK_ISDIGIT(ch)) {
         i = ch - '0';
     }
     else if (JK_ISUPPER(ch)) {
         i = ch - ('A' - 10);
     }
     else {
         i = ch - ('a' - 10);
     }
     i <<= 4;

     ch = x[1];
     if (JK_ISDIGIT(ch)) {
         i += ch - '0';
     }
     else if (JK_ISUPPER(ch)) {
         i += ch - ('A' - 10);
     }
     else {
         i += ch - ('a' - 10);
     }
     return i;
 #else /*CHARSET_EBCDIC*/
     /*
      * we assume that the hex value refers to an ASCII character
      * so convert to EBCDIC so that it makes sense locally;
      *
      * example:
      *
      * client specifies %20 in URL to refer to a space char;
      * at this point we're called with EBCDIC "20"; after turning
      * EBCDIC "20" into binary 0x20, we then need to assume that 0x20
      * represents an ASCII char and convert 0x20 to EBCDIC, yielding
      * 0x40
      */
     char buf[1];

     if (1 == sscanf(x, "%2x", &i)) {
         buf[0] = i & 0xFF;
         jk_xlate_from_ascii(buf, 1);
         return buf[0];
     }
     else {
         return 0;
     }
 #endif /*CHARSET_EBCDIC*/
 }

 static void jk_c2hex(int ch, char *x)
 {
 #if !CHARSET_EBCDIC
     int i;

     x[0] = '%';
     i = (ch & 0xF0) >> 4;
     if (i >= 10) {
         x[1] = ('A' - 10) + i;
     }
     else {
         x[1] = '0' + i;
     }

     i = ch & 0x0F;
     if (i >= 10) {
         x[2] = ('A' - 10) + i;
     }
     else {
         x[2] = '0' + i;
     }
 #else /*CHARSET_EBCDIC*/
     static const char ntoa[] = { "0123456789ABCDEF" };
     char buf[1];

     ch &= 0xFF;

     buf[0] = ch;
     jk_xlate_to_ascii(buf, 1);

     x[0] = '%';
     x[1] = ntoa[(buf[0] >> 4) & 0x0F];
     x[2] = ntoa[buf[0] & 0x0F];
     x[3] = '\0';
 #endif /*CHARSET_EBCDIC*/
 }

 /*
  * canonicalise a URL-encoded string
  */

 /*
  * Convert a URL-encoded string to canonical form.
  * It decodes characters which need not be encoded,
  * and encodes those which must be encoded, and does not touch
  * those which must not be touched.
  */
 char * jk_canonenc(char *y, const char *x, int len,
                                        enum enctype t, int forcedec,
                                        int proxyreq)
 {
     int i, j, ch;
     char *allowed;  /* characters which should not be encoded */
     char *reserved; /* characters which much not be en/de-coded */

 /*
  * N.B. in addition to :@&=, this allows ';' in an http path
  * and '?' in an ftp path -- this may be revised
  *
  * Also, it makes a '+' character in a search string reserved, as
  * it may be form-encoded. (Although RFC 1738 doesn't allow this -
  * it only permits ; / ? : @ = & as reserved chars.)
  */
     if (t == enc_path) {
         allowed = "~$-_.+!*'(),;:@&=";
     }
     else if (t == enc_search) {
         allowed = "$-_.!*'(),;:@&=";
     }
     else if (t == enc_user) {
         allowed = "$-_.+!*'(),;@&=";
     }
     else if (t == enc_fpath) {
         allowed = "$-_.+!*'(),?:@&=";
     }
     else {            /* if (t == enc_parm) */
         allowed = "$-_.+!*'(),?/:@&=";
     }

     if (t == enc_path) {
         reserved = "/";
     }
     else if (t == enc_search) {
         reserved = "+";
     }
     else {
         reserved = "";
     }

     /* y = apr_palloc(p, 3 * len + 1); */

     for (i = 0, j = 0; i < len; i++, j++) {
 /* always handle '/' first */
         ch = x[i];
         if (strchr(reserved, ch)) {
             y[j] = ch;
             continue;
         }
 /*
  * decode it if not already done. do not decode reverse proxied URLs
  * unless specifically forced
  */
         if ((forcedec || (proxyreq && proxyreq != JK_PROXYREQ_REVERSE)) && ch == '%') {
             if (!JK_ISXDIGIT(x[i + 1]) || !JK_ISXDIGIT(x[i + 2])) {
                 return NULL;
             }
             ch = jk_hex2c(&x[i + 1]);
             i += 2;
             if (ch != 0 && strchr(reserved, ch)) {  /* keep it encoded */
                 jk_c2hex(ch, &y[j]);
                 j += 2;
                 continue;
             }
         }
 /* recode it, if necessary */
         if (!JK_ISALNUM(ch) && !strchr(allowed, ch)) {
             jk_c2hex(ch, &y[j]);
             j += 2;
         }
         else {
             y[j] = ch;
         }
     }
     y[j] = '\0';
     return y;
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/***************************************************************************
	* Description: URL manupilation subroutines. (ported from mod_proxy). *
	* Version: $Revision: 531816 $ *
	***************************************************************************/

	#include "jk_global.h"
	#include "jk_url.h"

	#ifdef HAVE_APR
	#define JK_ISXDIGIT(x) apr_isxdigit((x))
	#define JK_ISDIGIT(x) apr_isdigit((x))
	#define JK_ISUPPER(x) apr_isupper((x))
	#define JK_ISALNUM(x) apr_isalnum((x))
	#else
	#define JK_ISXDIGIT(x) isxdigit((int)(unsigned char)((x)))
	#define JK_ISDIGIT(x) isdigit((int)(unsigned char)((x)))
	#define JK_ISUPPER(x) isupper((int)(unsigned char)((x)))
	#define JK_ISALNUM(x) isalnum((int)(unsigned char)((x)))
	#endif

	/* already called in the knowledge that the characters are hex digits */
	static int jk_hex2c(const char *x)
	{
	int i, ch;

	#if !CHARSET_EBCDIC
	ch = x[0];
	if (JK_ISDIGIT(ch)) {
	i = ch - '0';
	}
	else if (JK_ISUPPER(ch)) {
	i = ch - ('A' - 10);
	}
	else {
	i = ch - ('a' - 10);
	}
	i <<= 4;

	ch = x[1];
	if (JK_ISDIGIT(ch)) {
	i += ch - '0';
	}
	else if (JK_ISUPPER(ch)) {
	i += ch - ('A' - 10);
	}
	else {
	i += ch - ('a' - 10);
	}
	return i;
	#else /CHARSET_EBCDIC/
	/*
	* we assume that the hex value refers to an ASCII character
	* so convert to EBCDIC so that it makes sense locally;
	*
	* example:
	*
	* client specifies %20 in URL to refer to a space char;
	* at this point we're called with EBCDIC "20"; after turning
	* EBCDIC "20" into binary 0x20, we then need to assume that 0x20
	* represents an ASCII char and convert 0x20 to EBCDIC, yielding
	* 0x40
	*/
	char buf[1];

	if (1 == sscanf(x, "%2x", &i)) {
	buf[0] = i & 0xFF;
	jk_xlate_from_ascii(buf, 1);
	return buf[0];
	}
	else {
	return 0;
	}
	#endif /CHARSET_EBCDIC/
	}

	static void jk_c2hex(int ch, char *x)
	{
	#if !CHARSET_EBCDIC
	int i;

	x[0] = '%';
	i = (ch & 0xF0) >> 4;
	if (i >= 10) {
	x[1] = ('A' - 10) + i;
	}
	else {
	x[1] = '0' + i;
	}

	i = ch & 0x0F;
	if (i >= 10) {
	x[2] = ('A' - 10) + i;
	}
	else {
	x[2] = '0' + i;
	}
	#else /CHARSET_EBCDIC/
	static const char ntoa[] = { "0123456789ABCDEF" };
	char buf[1];

	ch &= 0xFF;

	buf[0] = ch;
	jk_xlate_to_ascii(buf, 1);

	x[0] = '%';
	x[1] = ntoa[(buf[0] >> 4) & 0x0F];
	x[2] = ntoa[buf[0] & 0x0F];
	x[3] = '\0';
	#endif /CHARSET_EBCDIC/
	}

	/*
	* canonicalise a URL-encoded string
	*/

	/*
	* Convert a URL-encoded string to canonical form.
	* It decodes characters which need not be encoded,
	* and encodes those which must be encoded, and does not touch
	* those which must not be touched.
	*/
	char * jk_canonenc(char y, const char x, int len,
	enum enctype t, int forcedec,
	int proxyreq)
	{
	int i, j, ch;
	char allowed; / characters which should not be encoded */
	char reserved; / characters which much not be en/de-coded */

	/*
	* N.B. in addition to :@&=, this allows ';' in an http path
	* and '?' in an ftp path -- this may be revised
	*
	* Also, it makes a '+' character in a search string reserved, as
	* it may be form-encoded. (Although RFC 1738 doesn't allow this -
	* it only permits ; / ? : @ = & as reserved chars.)
	*/
	if (t == enc_path) {
	allowed = "~$-_.+!*'(),;:@&=";
	}
	else if (t == enc_search) {
	allowed = "$-_.!*'(),;:@&=";
	}
	else if (t == enc_user) {
	allowed = "$-_.+!*'(),;@&=";
	}
	else if (t == enc_fpath) {
	allowed = "$-_.+!*'(),?:@&=";
	}
	else { /* if (t == enc_parm) */
	allowed = "$-_.+!*'(),?/:@&=";
	}

	if (t == enc_path) {
	reserved = "/";
	}
	else if (t == enc_search) {
	reserved = "+";
	}
	else {
	reserved = "";
	}

	/* y = apr_palloc(p, 3 * len + 1); */

	for (i = 0, j = 0; i < len; i++, j++) {
	/* always handle '/' first */
	ch = x[i];
	if (strchr(reserved, ch)) {
	y[j] = ch;
	continue;
	}
	/*
	* decode it if not already done. do not decode reverse proxied URLs
	* unless specifically forced
	*/
	if ((forcedec \|\| (proxyreq && proxyreq != JK_PROXYREQ_REVERSE)) && ch == '%') {
	if (!JK_ISXDIGIT(x[i + 1]) \|\| !JK_ISXDIGIT(x[i + 2])) {
	return NULL;
	}
	ch = jk_hex2c(&x[i + 1]);
	i += 2;
	if (ch != 0 && strchr(reserved, ch)) { /* keep it encoded */
	jk_c2hex(ch, &y[j]);
	j += 2;
	continue;
	}
	}
	/* recode it, if necessary */
	if (!JK_ISALNUM(ch) && !strchr(allowed, ch)) {
	jk_c2hex(ch, &y[j]);
	j += 2;
	}
	else {
	y[j] = ch;
	}
	}
	y[j] = '\0';
	return y;
	}