| /* Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifdef CROSS_COMPILE |
| |
| #include <ctype.h> |
| #define apr_isalnum(c) (isalnum(((unsigned char)(c)))) |
| #define apr_isalpha(c) (isalpha(((unsigned char)(c)))) |
| #define apr_iscntrl(c) (iscntrl(((unsigned char)(c)))) |
| #define apr_isprint(c) (isprint(((unsigned char)(c)))) |
| #define APR_HAVE_STDIO_H 1 |
| #define APR_HAVE_STRING_H 1 |
| |
| #else |
| |
| #include "apr.h" |
| #include "apr_lib.h" |
| |
| #endif |
| |
| #if defined(WIN32) || defined(OS2) |
| #define NEED_ENHANCED_ESCAPES |
| #endif |
| |
| #if APR_HAVE_STDIO_H |
| #include <stdio.h> |
| #endif |
| #if APR_HAVE_STRING_H |
| #include <string.h> |
| #endif |
| |
| /* A bunch of functions in util.c scan strings looking for certain characters. |
| * To make that more efficient we encode a lookup table. |
| */ |
| #define T_ESCAPE_SHELL_CMD (0x01) |
| #define T_ESCAPE_PATH_SEGMENT (0x02) |
| #define T_OS_ESCAPE_PATH (0x04) |
| #define T_HTTP_TOKEN_STOP (0x08) |
| #define T_ESCAPE_LOGITEM (0x10) |
| #define T_ESCAPE_FORENSIC (0x20) |
| #define T_ESCAPE_URLENCODED (0x40) |
| #define T_HTTP_CTRLS (0x80) |
| #define T_VCHAR_OBSTEXT (0x100) |
| #define T_URI_UNRESERVED (0x200) |
| |
| int main(int argc, char *argv[]) |
| { |
| unsigned c; |
| unsigned short flags; |
| |
| printf("/* this file is automatically generated by gen_test_char, " |
| "do not edit */\n" |
| "#define T_ESCAPE_SHELL_CMD (%u)\n" |
| "#define T_ESCAPE_PATH_SEGMENT (%u)\n" |
| "#define T_OS_ESCAPE_PATH (%u)\n" |
| "#define T_HTTP_TOKEN_STOP (%u)\n" |
| "#define T_ESCAPE_LOGITEM (%u)\n" |
| "#define T_ESCAPE_FORENSIC (%u)\n" |
| "#define T_ESCAPE_URLENCODED (%u)\n" |
| "#define T_HTTP_CTRLS (%u)\n" |
| "#define T_VCHAR_OBSTEXT (%u)\n" |
| "#define T_URI_UNRESERVED (%u)\n" |
| "\n" |
| "static const unsigned short test_char_table[256] = {", |
| T_ESCAPE_SHELL_CMD, |
| T_ESCAPE_PATH_SEGMENT, |
| T_OS_ESCAPE_PATH, |
| T_HTTP_TOKEN_STOP, |
| T_ESCAPE_LOGITEM, |
| T_ESCAPE_FORENSIC, |
| T_ESCAPE_URLENCODED, |
| T_HTTP_CTRLS, |
| T_VCHAR_OBSTEXT, |
| T_URI_UNRESERVED |
| ); |
| |
| for (c = 0; c < 256; ++c) { |
| flags = 0; |
| if (c % 8 == 0) |
| printf("\n "); |
| |
| /* escape_shell_cmd */ |
| #ifdef NEED_ENHANCED_ESCAPES |
| /* Win32/OS2 have many of the same vulnerable characters |
| * as Unix sh, plus the carriage return and percent char. |
| * The proper escaping of these characters varies from unix |
| * since Win32/OS2 use carets or doubled-double quotes, |
| * and neither lf nor cr can be escaped. We escape unix |
| * specific as well, to assure that cross-compiled unix |
| * applications behave similarly when invoked on win32/os2. |
| * |
| * Rem please keep in-sync with apr's list in win32/filesys.c |
| */ |
| if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n\r%", c)) { |
| flags |= T_ESCAPE_SHELL_CMD; |
| } |
| #else |
| if (c && strchr("&;`'\"|*?~<>^()[]{}$\\\n", c)) { |
| flags |= T_ESCAPE_SHELL_CMD; |
| } |
| #endif |
| |
| if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:@&=~", c)) { |
| flags |= T_ESCAPE_PATH_SEGMENT; |
| } |
| |
| if (!apr_isalnum(c) && !strchr("$-_.+!*'(),:;@&=/~", c)) { |
| flags |= T_OS_ESCAPE_PATH; |
| } |
| |
| if (!apr_isalnum(c) && !strchr(".-*_ ", c)) { |
| flags |= T_ESCAPE_URLENCODED; |
| } |
| |
| /* Stop for any non-'token' character, including ctrls, obs-text, |
| * and "tspecials" (RFC2068) a.k.a. "separators" (RFC2616), which |
| * is easier to express as characters remaining in the ASCII token set |
| */ |
| if (!c || !(apr_isalnum(c) || strchr("!#$%&'*+-.^_`|~", c))) { |
| flags |= T_HTTP_TOKEN_STOP; |
| } |
| |
| /* Catch CTRLs other than VCHAR, HT and SP, and obs-text (RFC7230 3.2) |
| * This includes only the C0 plane, not C1 (which is obs-text itself.) |
| * XXX: We should verify that all ASCII C0 ctrls/DEL corresponding to |
| * the current EBCDIC translation are captured, and ASCII C1 ctrls |
| * corresponding are all permitted (as they fall under obs-text rule) |
| */ |
| if (!c || (apr_iscntrl(c) && c != '\t')) { |
| flags |= T_HTTP_CTRLS; |
| } |
| |
| /* From RFC3986, the specific sets of gen-delims, sub-delims (2.2), |
| * and unreserved (2.3) that are possible somewhere within a URI. |
| * Spec requires all others to be %XX encoded, including obs-text. |
| */ |
| if (c && !apr_iscntrl(c) && c != ' ') { |
| flags |= T_VCHAR_OBSTEXT; |
| } |
| |
| /* For logging, escape all control characters, |
| * double quotes (because they delimit the request in the log file) |
| * backslashes (because we use backslash for escaping) |
| * and 8-bit chars with the high bit set |
| */ |
| if (c && (!apr_isprint(c) || c == '"' || c == '\\' || apr_iscntrl(c))) { |
| flags |= T_ESCAPE_LOGITEM; |
| } |
| |
| /* For forensic logging, escape all control characters, top bit set, |
| * :, | (used as delimiters) and % (used for escaping). |
| */ |
| if (!apr_isprint(c) || c == ':' || c == '|' || c == '%' |
| || apr_iscntrl(c) || !c) { |
| flags |= T_ESCAPE_FORENSIC; |
| } |
| |
| /* Characters in the RFC 3986 "unreserved" set. |
| * https://datatracker.ietf.org/doc/html/rfc3986#section-2.3 */ |
| if (c && (apr_isalnum(c) || strchr("-._~", c))) { |
| flags |= T_URI_UNRESERVED; |
| } |
| |
| printf("0x%03x%c", flags, (c < 255) ? ',' : ' '); |
| } |
| |
| printf("\n};\n\n"); |
| |
| printf( |
| "/* we assume the folks using this ensure 0 <= c < 256... which means\n" |
| " * you need a cast to (unsigned char) first, you can't just plug a\n" |
| " * char in here and get it to work, because if char is signed then it\n" |
| " * will first be sign extended.\n" |
| " */\n" |
| "#define TEST_CHAR(c, f) (test_char_table[(unsigned char)(c)] & (f))\n" |
| ); |
| |
| return 0; |
| } |