| /* |
| * test_escape.c Test escape functions |
| * |
| * Copyright (c) 2022-2025, PostgreSQL Global Development Group |
| * |
| * IDENTIFICATION |
| * src/test/modules/test_escape/test_escape.c |
| */ |
| |
| #include "postgres_fe.h" |
| |
| #include <string.h> |
| #include <stdio.h> |
| |
| #include "fe_utils/psqlscan.h" |
| #include "fe_utils/string_utils.h" |
| #include "getopt_long.h" |
| #include "libpq-fe.h" |
| #include "mb/pg_wchar.h" |
| |
| |
| typedef struct pe_test_config |
| { |
| int verbosity; |
| bool force_unsupported; |
| const char *conninfo; |
| PGconn *conn; |
| |
| int test_count; |
| int failure_count; |
| } pe_test_config; |
| |
| |
| /* |
| * An escape function to be tested by this test. |
| */ |
| typedef struct pe_test_escape_func |
| { |
| const char *name; |
| |
| /* |
| * Can the escape method report errors? If so, we validate that it does in |
| * case of various invalid inputs. |
| */ |
| bool reports_errors; |
| |
| /* |
| * Is the escape method known to not handle invalidly encoded input? If |
| * so, we don't run the test unless --force-unsupported is used. |
| */ |
| bool supports_only_valid; |
| |
| /* |
| * Is the escape method known to only handle encodings where no byte in a |
| * multi-byte characters are valid ascii. |
| */ |
| bool supports_only_ascii_overlap; |
| |
| bool (*escape) (PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err); |
| } pe_test_escape_func; |
| |
| /* |
| * A single test input for this test. |
| */ |
| typedef struct pe_test_vector |
| { |
| const char *client_encoding; |
| size_t escape_len; |
| const char *escape; |
| } pe_test_vector; |
| |
| |
| /* |
| * Callback functions from flex lexer. Not currently used by the test. |
| */ |
| static const PsqlScanCallbacks test_scan_callbacks = { |
| NULL |
| }; |
| |
| |
| static bool |
| escape_literal(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| char *escaped; |
| |
| escaped = PQescapeLiteral(conn, unescaped, unescaped_len); |
| if (!escaped) |
| { |
| appendPQExpBuffer(escape_err, "%s", |
| PQerrorMessage(conn)); |
| escape_err->data[escape_err->len - 1] = 0; |
| escape_err->len--; |
| return false; |
| } |
| else |
| { |
| appendPQExpBufferStr(target, escaped); |
| PQfreemem(escaped); |
| return true; |
| } |
| } |
| |
| static bool |
| escape_identifier(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| char *escaped; |
| |
| escaped = PQescapeIdentifier(conn, unescaped, unescaped_len); |
| if (!escaped) |
| { |
| appendPQExpBuffer(escape_err, "%s", |
| PQerrorMessage(conn)); |
| escape_err->data[escape_err->len - 1] = 0; |
| escape_err->len--; |
| return false; |
| } |
| else |
| { |
| appendPQExpBufferStr(target, escaped); |
| PQfreemem(escaped); |
| return true; |
| } |
| } |
| |
| static bool |
| escape_string_conn(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| int error; |
| size_t sz; |
| |
| appendPQExpBufferChar(target, '\''); |
| enlargePQExpBuffer(target, unescaped_len * 2 + 1); |
| sz = PQescapeStringConn(conn, target->data + target->len, |
| unescaped, unescaped_len, |
| &error); |
| |
| target->len += sz; |
| appendPQExpBufferChar(target, '\''); |
| |
| if (error) |
| { |
| appendPQExpBuffer(escape_err, "%s", |
| PQerrorMessage(conn)); |
| escape_err->data[escape_err->len - 1] = 0; |
| escape_err->len--; |
| return false; |
| } |
| else |
| { |
| return true; |
| } |
| } |
| |
| static bool |
| escape_string(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| size_t sz; |
| |
| appendPQExpBufferChar(target, '\''); |
| enlargePQExpBuffer(target, unescaped_len * 2 + 1); |
| sz = PQescapeString(target->data + target->len, |
| unescaped, unescaped_len); |
| target->len += sz; |
| appendPQExpBufferChar(target, '\''); |
| |
| |
| return true; |
| } |
| |
| /* |
| * Escape via s/'/''/. Non-core drivers invariably wrap libpq or use this |
| * method. It suffices iff the input passes encoding validation, so it's |
| * marked as supports_only_valid. |
| */ |
| static bool |
| escape_replace(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| const char *s = unescaped; |
| |
| appendPQExpBufferChar(target, '\''); |
| |
| for (int i = 0; i < unescaped_len; i++) |
| { |
| char c = *s; |
| |
| if (c == '\'') |
| { |
| appendPQExpBufferStr(target, "''"); |
| } |
| else |
| appendPQExpBufferChar(target, c); |
| s++; |
| } |
| appendPQExpBufferChar(target, '\''); |
| |
| return true; |
| } |
| |
| static bool |
| escape_append_literal(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| appendStringLiteral(target, unescaped, PQclientEncoding(conn), 1); |
| |
| return true; |
| } |
| |
| static bool |
| escape_fmt_id(PGconn *conn, PQExpBuffer target, |
| const char *unescaped, size_t unescaped_len, |
| PQExpBuffer escape_err) |
| { |
| setFmtEncoding(PQclientEncoding(conn)); |
| appendPQExpBufferStr(target, fmtId(unescaped)); |
| |
| return true; |
| } |
| |
| static pe_test_escape_func pe_test_escape_funcs[] = |
| { |
| { |
| .name = "PQescapeLiteral", |
| .reports_errors = true, |
| .escape = escape_literal, |
| }, |
| { |
| .name = "PQescapeIdentifier", |
| .reports_errors = true, |
| .escape = escape_identifier |
| }, |
| { |
| .name = "PQescapeStringConn", |
| .reports_errors = true, |
| .escape = escape_string_conn |
| }, |
| { |
| .name = "PQescapeString", |
| .reports_errors = false, |
| .escape = escape_string |
| }, |
| { |
| .name = "replace", |
| .reports_errors = false, |
| .supports_only_valid = true, |
| .supports_only_ascii_overlap = true, |
| .escape = escape_replace |
| }, |
| { |
| .name = "appendStringLiteral", |
| .reports_errors = false, |
| .escape = escape_append_literal |
| }, |
| { |
| .name = "fmtId", |
| .reports_errors = false, |
| .escape = escape_fmt_id |
| }, |
| }; |
| |
| |
| #define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, } |
| static pe_test_vector pe_test_vectors[] = |
| { |
| /* expected to work sanity checks */ |
| TV("UTF-8", "1"), |
| TV("UTF-8", "'"), |
| TV("UTF-8", "\""), |
| |
| TV("UTF-8", "\'"), |
| TV("UTF-8", "\""), |
| |
| TV("UTF-8", "\\"), |
| |
| TV("UTF-8", "\\'"), |
| TV("UTF-8", "\\\""), |
| |
| /* trailing multi-byte character, paddable in available space */ |
| TV("UTF-8", "1\xC0"), |
| TV("UTF-8", "1\xE0 "), |
| TV("UTF-8", "1\xF0 "), |
| TV("UTF-8", "1\xF0 "), |
| TV("UTF-8", "1\xF0 "), |
| |
| /* trailing multi-byte character, not enough space to pad */ |
| TV("UTF-8", "1\xE0"), |
| TV("UTF-8", "1\xF0"), |
| TV("UTF-8", "\xF0"), |
| |
| /* try to smuggle in something in invalid characters */ |
| TV("UTF-8", "1\xE0'"), |
| TV("UTF-8", "1\xE0\""), |
| TV("UTF-8", "1\xF0'"), |
| TV("UTF-8", "1\xF0\""), |
| TV("UTF-8", "1\xF0'; "), |
| TV("UTF-8", "1\xF0\"; "), |
| TV("UTF-8", "1\xF0';;;;"), |
| TV("UTF-8", "1\xF0 ';;;;"), |
| TV("UTF-8", "1\xF0 \";;;;"), |
| TV("UTF-8", "1\xE0'; \\l ; "), |
| TV("UTF-8", "1\xE0\"; \\l ; "), |
| |
| /* null byte handling */ |
| TV("UTF-8", "some\0thing"), |
| TV("UTF-8", "some\0"), |
| TV("UTF-8", "some\xF0'\0"), |
| TV("UTF-8", "some\xF0'\0'"), |
| TV("UTF-8", "some\xF0" "ab\0'"), |
| |
| /* GB18030's 4 byte encoding requires a 2nd byte limited values */ |
| TV("GB18030", "\x90\x31"), |
| TV("GB18030", "\\\x81\x5c'"), |
| TV("GB18030", "\\\x81\x5c\""), |
| TV("GB18030", "\\\x81\x5c\0'"), |
| |
| /* |
| * \x81 indicates a 2 byte char. ' and " are not a valid second byte, but |
| * that requires encoding verification to know. E.g. replace_string() |
| * doesn't cope. |
| */ |
| TV("GB18030", "\\\x81';"), |
| TV("GB18030", "\\\x81\";"), |
| |
| /* |
| * \x81 indicates a 2 byte char. \ is a valid second character. |
| */ |
| TV("GB18030", "\\\x81\\';"), |
| TV("GB18030", "\\\x81\\\";"), |
| TV("GB18030", "\\\x81\0;"), |
| TV("GB18030", "\\\x81\0'"), |
| TV("GB18030", "\\\x81'\0"), |
| |
| TV("SJIS", "\xF0\x40;"), |
| |
| TV("SJIS", "\xF0';"), |
| TV("SJIS", "\xF0\";"), |
| TV("SJIS", "\xF0\0'"), |
| TV("SJIS", "\\\xF0\\';"), |
| TV("SJIS", "\\\xF0\\\";"), |
| |
| TV("gbk", "\x80';"), |
| TV("gbk", "\x80"), |
| TV("gbk", "\x80'"), |
| TV("gbk", "\x80\""), |
| TV("gbk", "\x80\\"), |
| |
| TV("mule_internal", "\\\x9c';\0;"), |
| |
| TV("sql_ascii", "1\xC0'"), |
| }; |
| |
| |
| /* |
| * Print the string into buf, making characters outside of plain ascii |
| * somewhat easier to recognize. |
| * |
| * The output format could stand to be improved significantly, it's not at all |
| * unambiguous. |
| */ |
| static void |
| escapify(PQExpBuffer buf, const char *str, size_t len) |
| { |
| for (size_t i = 0; i < len; i++) |
| { |
| char c = *str; |
| |
| if (c == '\n') |
| appendPQExpBufferStr(buf, "\\n"); |
| else if (c == '\0') |
| appendPQExpBufferStr(buf, "\\0"); |
| else if (c < ' ' || c > '~') |
| appendPQExpBuffer(buf, "\\x%2x", (uint8_t) c); |
| else |
| appendPQExpBufferChar(buf, c); |
| str++; |
| } |
| } |
| |
| static void |
| report_result(pe_test_config *tc, |
| bool success, |
| PQExpBuffer testname, |
| PQExpBuffer details, |
| const char *subname, |
| const char *resultdesc) |
| { |
| int test_id = ++tc->test_count; |
| bool print_details = true; |
| bool print_result = true; |
| |
| if (success) |
| { |
| if (tc->verbosity <= 0) |
| print_details = false; |
| if (tc->verbosity < 0) |
| print_result = false; |
| } |
| else |
| tc->failure_count++; |
| |
| if (print_details) |
| printf("%s", details->data); |
| |
| if (print_result) |
| printf("%s %d - %s: %s: %s\n", |
| success ? "ok" : "not ok", |
| test_id, testname->data, |
| subname, |
| resultdesc); |
| } |
| |
| /* |
| * Return true for encodings in which bytes in a multi-byte character look |
| * like valid ascii characters. |
| */ |
| static bool |
| encoding_conflicts_ascii(int encoding) |
| { |
| /* |
| * We don't store this property directly anywhere, but whether an encoding |
| * is a client-only encoding is a good proxy. |
| */ |
| if (encoding > PG_ENCODING_BE_LAST) |
| return true; |
| return false; |
| } |
| |
| static const char * |
| scan_res_s(PsqlScanResult res) |
| { |
| #define TOSTR_CASE(sym) case sym: return #sym |
| |
| switch (res) |
| { |
| TOSTR_CASE(PSCAN_SEMICOLON); |
| TOSTR_CASE(PSCAN_BACKSLASH); |
| TOSTR_CASE(PSCAN_INCOMPLETE); |
| TOSTR_CASE(PSCAN_EOL); |
| } |
| |
| pg_unreachable(); |
| return ""; /* silence compiler */ |
| } |
| |
| /* |
| * Verify that psql parses the input as a single statement. If this property |
| * is violated, the escape function does not effectively protect against |
| * smuggling in a second statement. |
| */ |
| static void |
| test_psql_parse(pe_test_config *tc, PQExpBuffer testname, |
| PQExpBuffer input_buf, PQExpBuffer details) |
| { |
| PsqlScanState scan_state; |
| PsqlScanResult scan_result; |
| PQExpBuffer query_buf; |
| promptStatus_t prompt_status = PROMPT_READY; |
| int matches = 0; |
| bool test_fails; |
| const char *resdesc; |
| |
| query_buf = createPQExpBuffer(); |
| |
| scan_state = psql_scan_create(&test_scan_callbacks); |
| |
| /* |
| * TODO: This hardcodes standard conforming strings, it would be useful to |
| * test without as well. |
| */ |
| psql_scan_setup(scan_state, input_buf->data, input_buf->len, |
| PQclientEncoding(tc->conn), 1); |
| |
| do |
| { |
| resetPQExpBuffer(query_buf); |
| |
| scan_result = psql_scan(scan_state, query_buf, |
| &prompt_status); |
| |
| appendPQExpBuffer(details, |
| "#\t\t %d: scan_result: %s prompt: %u, query_buf: ", |
| matches, scan_res_s(scan_result), prompt_status); |
| escapify(details, query_buf->data, query_buf->len); |
| appendPQExpBuffer(details, "\n"); |
| |
| matches++; |
| } |
| while (scan_result != PSCAN_INCOMPLETE && scan_result != PSCAN_EOL); |
| |
| psql_scan_destroy(scan_state); |
| destroyPQExpBuffer(query_buf); |
| |
| test_fails = matches > 1 || scan_result != PSCAN_EOL; |
| |
| if (matches > 1) |
| resdesc = "more than one match"; |
| else if (scan_result != PSCAN_EOL) |
| resdesc = "unexpected end state"; |
| else |
| resdesc = "ok"; |
| |
| report_result(tc, !test_fails, testname, details, |
| "psql parse", |
| resdesc); |
| } |
| |
| static void |
| test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef) |
| { |
| PQExpBuffer testname; |
| PQExpBuffer details; |
| PQExpBuffer escape_buf; |
| PQExpBuffer escape_err; |
| size_t input_encoding_validlen; |
| bool input_encoding_valid; |
| size_t input_encoding0_validlen; |
| bool input_encoding0_valid; |
| bool escape_success; |
| size_t escape_encoding_length; |
| bool escape_encoding_valid; |
| |
| escape_err = createPQExpBuffer(); |
| testname = createPQExpBuffer(); |
| details = createPQExpBuffer(); |
| escape_buf = createPQExpBuffer(); |
| |
| if (ef->supports_only_ascii_overlap && |
| encoding_conflicts_ascii(PQclientEncoding(tc->conn))) |
| { |
| goto out; |
| } |
| |
| /* name to describe the test */ |
| appendPQExpBuffer(testname, ">"); |
| escapify(testname, tv->escape, tv->escape_len); |
| appendPQExpBuffer(testname, "< - %s - %s", |
| tv->client_encoding, ef->name); |
| |
| /* details to describe the test, to allow for debugging */ |
| appendPQExpBuffer(details, "#\t input: %zd bytes: ", |
| tv->escape_len); |
| escapify(details, tv->escape, tv->escape_len); |
| appendPQExpBufferStr(details, "\n"); |
| appendPQExpBuffer(details, "#\t encoding: %s\n", |
| tv->client_encoding); |
| |
| |
| /* check encoding of input, to compare with after the test */ |
| input_encoding_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn), |
| tv->escape, |
| tv->escape_len); |
| input_encoding_valid = input_encoding_validlen == tv->escape_len; |
| appendPQExpBuffer(details, "#\t input encoding valid: %d\n", |
| input_encoding_valid); |
| |
| input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn), |
| tv->escape, |
| strlen(tv->escape)); |
| input_encoding0_valid = input_encoding0_validlen == strlen(tv->escape); |
| appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n", |
| input_encoding0_valid); |
| |
| appendPQExpBuffer(details, "#\t escape func: %s\n", |
| ef->name); |
| |
| if (!input_encoding_valid && ef->supports_only_valid |
| && !tc->force_unsupported) |
| goto out; |
| |
| |
| /* call the to-be-tested escape function */ |
| escape_success = ef->escape(tc->conn, escape_buf, |
| tv->escape, tv->escape_len, |
| escape_err); |
| if (!escape_success) |
| { |
| appendPQExpBuffer(details, "#\t escape error: %s\n", |
| escape_err->data); |
| } |
| |
| if (escape_buf->len > 0) |
| { |
| appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len); |
| escapify(details, escape_buf->data, escape_buf->len); |
| appendPQExpBufferChar(details, '\n'); |
| |
| escape_encoding_length = pg_encoding_verifymbstr(PQclientEncoding(tc->conn), |
| escape_buf->data, |
| escape_buf->len); |
| escape_encoding_valid = escape_encoding_length == escape_buf->len; |
| |
| appendPQExpBuffer(details, "#\t escape encoding valid: %d\n", |
| escape_encoding_valid); |
| } |
| else |
| { |
| escape_encoding_length = 0; |
| escape_encoding_valid = 1; |
| } |
| |
| /* |
| * If the test reports errors, and the input was invalidly encoded, |
| * escaping should fail. One edge-case that we accept for now is that the |
| * input could have an embedded null byte, which the escape functions will |
| * just treat as a shorter string. If the encoding error is after the zero |
| * byte, the output thus won't contain it. |
| */ |
| if (ef->reports_errors) |
| { |
| bool ok = true; |
| const char *resdesc = "ok"; |
| |
| if (escape_success) |
| { |
| if (!input_encoding0_valid) |
| { |
| ok = false; |
| resdesc = "invalid input escaped successfully"; |
| } |
| else if (!input_encoding_valid) |
| resdesc = "invalid input escaped successfully, due to zero byte"; |
| } |
| else |
| { |
| if (input_encoding0_valid) |
| { |
| ok = false; |
| resdesc = "valid input failed to escape"; |
| } |
| else if (input_encoding_valid) |
| resdesc = "valid input failed to escape, due to zero byte"; |
| } |
| |
| report_result(tc, ok, testname, details, |
| "input validity vs escape success", |
| resdesc); |
| } |
| |
| /* |
| * If the input is invalidly encoded, the output should also be invalidly |
| * encoded. We accept the same zero-byte edge case as above. |
| */ |
| { |
| bool ok = true; |
| const char *resdesc = "ok"; |
| |
| if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid) |
| { |
| resdesc = "invalid input produced valid output, due to zero byte"; |
| } |
| else if (input_encoding0_valid && !escape_encoding_valid) |
| { |
| ok = false; |
| resdesc = "valid input produced invalid output"; |
| } |
| else if (!input_encoding0_valid && |
| (!ef->reports_errors || escape_success) && |
| escape_encoding_valid) |
| { |
| ok = false; |
| resdesc = "invalid input produced valid output"; |
| } |
| |
| report_result(tc, ok, testname, details, |
| "input and escaped encoding validity", |
| resdesc); |
| } |
| |
| /* |
| * Test psql parsing whenever we get any string back, even if the escape |
| * function returned a failure. |
| */ |
| if (escape_buf->len > 0) |
| { |
| test_psql_parse(tc, testname, |
| escape_buf, details); |
| } |
| |
| out: |
| destroyPQExpBuffer(escape_err); |
| destroyPQExpBuffer(details); |
| destroyPQExpBuffer(testname); |
| destroyPQExpBuffer(escape_buf); |
| } |
| |
| static void |
| test_one_vector(pe_test_config *tc, const pe_test_vector *tv) |
| { |
| if (PQsetClientEncoding(tc->conn, tv->client_encoding)) |
| { |
| fprintf(stderr, "failed to set encoding to %s:\n%s\n", |
| tv->client_encoding, PQerrorMessage(tc->conn)); |
| exit(1); |
| } |
| |
| for (int escoff = 0; escoff < lengthof(pe_test_escape_funcs); escoff++) |
| { |
| const pe_test_escape_func *ef = &pe_test_escape_funcs[escoff]; |
| |
| test_one_vector_escape(tc, tv, ef); |
| } |
| } |
| |
| static void |
| usage(const char *hint) |
| { |
| if (hint) |
| fprintf(stderr, "Error: %s\n\n", hint); |
| |
| printf("PostgreSQL escape function test\n" |
| "\n" |
| "Usage:\n" |
| " test_escape --conninfo=CONNINFO [OPTIONS]\n" |
| "\n" |
| "Options:\n" |
| " -h, --help show this help\n" |
| " -c, --conninfo=CONNINFO connection information to use\n" |
| " -v, --verbose show test details even for successes\n" |
| " -q, --quiet only show failures\n" |
| " --force-unsupported test invalid input even if unsupported\n" |
| ); |
| |
| if (hint) |
| exit(1); |
| } |
| |
| int |
| main(int argc, char *argv[]) |
| { |
| pe_test_config tc = {0}; |
| int c; |
| int option_index; |
| |
| static const struct option long_options[] = { |
| {"help", no_argument, NULL, 'h'}, |
| {"conninfo", required_argument, NULL, 'c'}, |
| {"verbose", no_argument, NULL, 'v'}, |
| {"quiet", no_argument, NULL, 'q'}, |
| {"force-unsupported", no_argument, NULL, 'f'}, |
| {NULL, 0, NULL, 0}, |
| }; |
| |
| while ((c = getopt_long(argc, argv, "vqh", long_options, &option_index)) != -1) |
| { |
| switch (c) |
| { |
| case 'h': |
| usage(NULL); |
| exit(0); |
| break; |
| case 'c': |
| tc.conninfo = optarg; |
| break; |
| case 'v': |
| tc.verbosity++; |
| break; |
| case 'q': |
| tc.verbosity--; |
| break; |
| case 'f': |
| tc.force_unsupported = true; |
| break; |
| } |
| } |
| |
| if (argc - optind >= 1) |
| usage("unused option(s) specified"); |
| |
| if (tc.conninfo == NULL) |
| usage("--conninfo needs to be specified"); |
| |
| tc.conn = PQconnectdb(tc.conninfo); |
| |
| if (!tc.conn || PQstatus(tc.conn) != CONNECTION_OK) |
| { |
| fprintf(stderr, "could not connect: %s\n", |
| PQerrorMessage(tc.conn)); |
| exit(1); |
| } |
| |
| for (int i = 0; i < lengthof(pe_test_vectors); i++) |
| { |
| test_one_vector(&tc, &pe_test_vectors[i]); |
| } |
| |
| PQfinish(tc.conn); |
| |
| printf("# %d failures\n", tc.failure_count); |
| printf("1..%d\n", tc.test_count); |
| return tc.failure_count > 0; |
| } |