blob: 3ed70436155ab710e616032b0742feb63436ce43 [file] [log] [blame]
/*
* test_escape.c Test escape functions
*
* Copyright (c) 2022-2025, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/test/modules/test_escape/test_escape.c
*/
#include "postgres_fe.h"
#include <string.h>
#include <stdio.h>
#include "fe_utils/psqlscan.h"
#include "fe_utils/string_utils.h"
#include "getopt_long.h"
#include "libpq-fe.h"
#include "mb/pg_wchar.h"
typedef struct pe_test_config
{
int verbosity;
bool force_unsupported;
const char *conninfo;
PGconn *conn;
int test_count;
int failure_count;
} pe_test_config;
/*
* An escape function to be tested by this test.
*/
typedef struct pe_test_escape_func
{
const char *name;
/*
* Can the escape method report errors? If so, we validate that it does in
* case of various invalid inputs.
*/
bool reports_errors;
/*
* Is the escape method known to not handle invalidly encoded input? If
* so, we don't run the test unless --force-unsupported is used.
*/
bool supports_only_valid;
/*
* Is the escape method known to only handle encodings where no byte in a
* multi-byte characters are valid ascii.
*/
bool supports_only_ascii_overlap;
bool (*escape) (PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err);
} pe_test_escape_func;
/*
* A single test input for this test.
*/
typedef struct pe_test_vector
{
const char *client_encoding;
size_t escape_len;
const char *escape;
} pe_test_vector;
/*
* Callback functions from flex lexer. Not currently used by the test.
*/
static const PsqlScanCallbacks test_scan_callbacks = {
NULL
};
static bool
escape_literal(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
char *escaped;
escaped = PQescapeLiteral(conn, unescaped, unescaped_len);
if (!escaped)
{
appendPQExpBuffer(escape_err, "%s",
PQerrorMessage(conn));
escape_err->data[escape_err->len - 1] = 0;
escape_err->len--;
return false;
}
else
{
appendPQExpBufferStr(target, escaped);
PQfreemem(escaped);
return true;
}
}
static bool
escape_identifier(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
char *escaped;
escaped = PQescapeIdentifier(conn, unescaped, unescaped_len);
if (!escaped)
{
appendPQExpBuffer(escape_err, "%s",
PQerrorMessage(conn));
escape_err->data[escape_err->len - 1] = 0;
escape_err->len--;
return false;
}
else
{
appendPQExpBufferStr(target, escaped);
PQfreemem(escaped);
return true;
}
}
static bool
escape_string_conn(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
int error;
size_t sz;
appendPQExpBufferChar(target, '\'');
enlargePQExpBuffer(target, unescaped_len * 2 + 1);
sz = PQescapeStringConn(conn, target->data + target->len,
unescaped, unescaped_len,
&error);
target->len += sz;
appendPQExpBufferChar(target, '\'');
if (error)
{
appendPQExpBuffer(escape_err, "%s",
PQerrorMessage(conn));
escape_err->data[escape_err->len - 1] = 0;
escape_err->len--;
return false;
}
else
{
return true;
}
}
static bool
escape_string(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
size_t sz;
appendPQExpBufferChar(target, '\'');
enlargePQExpBuffer(target, unescaped_len * 2 + 1);
sz = PQescapeString(target->data + target->len,
unescaped, unescaped_len);
target->len += sz;
appendPQExpBufferChar(target, '\'');
return true;
}
/*
* Escape via s/'/''/. Non-core drivers invariably wrap libpq or use this
* method. It suffices iff the input passes encoding validation, so it's
* marked as supports_only_valid.
*/
static bool
escape_replace(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
const char *s = unescaped;
appendPQExpBufferChar(target, '\'');
for (int i = 0; i < unescaped_len; i++)
{
char c = *s;
if (c == '\'')
{
appendPQExpBufferStr(target, "''");
}
else
appendPQExpBufferChar(target, c);
s++;
}
appendPQExpBufferChar(target, '\'');
return true;
}
static bool
escape_append_literal(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
appendStringLiteral(target, unescaped, PQclientEncoding(conn), 1);
return true;
}
static bool
escape_fmt_id(PGconn *conn, PQExpBuffer target,
const char *unescaped, size_t unescaped_len,
PQExpBuffer escape_err)
{
setFmtEncoding(PQclientEncoding(conn));
appendPQExpBufferStr(target, fmtId(unescaped));
return true;
}
static pe_test_escape_func pe_test_escape_funcs[] =
{
{
.name = "PQescapeLiteral",
.reports_errors = true,
.escape = escape_literal,
},
{
.name = "PQescapeIdentifier",
.reports_errors = true,
.escape = escape_identifier
},
{
.name = "PQescapeStringConn",
.reports_errors = true,
.escape = escape_string_conn
},
{
.name = "PQescapeString",
.reports_errors = false,
.escape = escape_string
},
{
.name = "replace",
.reports_errors = false,
.supports_only_valid = true,
.supports_only_ascii_overlap = true,
.escape = escape_replace
},
{
.name = "appendStringLiteral",
.reports_errors = false,
.escape = escape_append_literal
},
{
.name = "fmtId",
.reports_errors = false,
.escape = escape_fmt_id
},
};
#define TV(enc, string) {.client_encoding = (enc), .escape=string, .escape_len=sizeof(string) - 1, }
static pe_test_vector pe_test_vectors[] =
{
/* expected to work sanity checks */
TV("UTF-8", "1"),
TV("UTF-8", "'"),
TV("UTF-8", "\""),
TV("UTF-8", "\'"),
TV("UTF-8", "\""),
TV("UTF-8", "\\"),
TV("UTF-8", "\\'"),
TV("UTF-8", "\\\""),
/* trailing multi-byte character, paddable in available space */
TV("UTF-8", "1\xC0"),
TV("UTF-8", "1\xE0 "),
TV("UTF-8", "1\xF0 "),
TV("UTF-8", "1\xF0 "),
TV("UTF-8", "1\xF0 "),
/* trailing multi-byte character, not enough space to pad */
TV("UTF-8", "1\xE0"),
TV("UTF-8", "1\xF0"),
TV("UTF-8", "\xF0"),
/* try to smuggle in something in invalid characters */
TV("UTF-8", "1\xE0'"),
TV("UTF-8", "1\xE0\""),
TV("UTF-8", "1\xF0'"),
TV("UTF-8", "1\xF0\""),
TV("UTF-8", "1\xF0'; "),
TV("UTF-8", "1\xF0\"; "),
TV("UTF-8", "1\xF0';;;;"),
TV("UTF-8", "1\xF0 ';;;;"),
TV("UTF-8", "1\xF0 \";;;;"),
TV("UTF-8", "1\xE0'; \\l ; "),
TV("UTF-8", "1\xE0\"; \\l ; "),
/* null byte handling */
TV("UTF-8", "some\0thing"),
TV("UTF-8", "some\0"),
TV("UTF-8", "some\xF0'\0"),
TV("UTF-8", "some\xF0'\0'"),
TV("UTF-8", "some\xF0" "ab\0'"),
/* GB18030's 4 byte encoding requires a 2nd byte limited values */
TV("GB18030", "\x90\x31"),
TV("GB18030", "\\\x81\x5c'"),
TV("GB18030", "\\\x81\x5c\""),
TV("GB18030", "\\\x81\x5c\0'"),
/*
* \x81 indicates a 2 byte char. ' and " are not a valid second byte, but
* that requires encoding verification to know. E.g. replace_string()
* doesn't cope.
*/
TV("GB18030", "\\\x81';"),
TV("GB18030", "\\\x81\";"),
/*
* \x81 indicates a 2 byte char. \ is a valid second character.
*/
TV("GB18030", "\\\x81\\';"),
TV("GB18030", "\\\x81\\\";"),
TV("GB18030", "\\\x81\0;"),
TV("GB18030", "\\\x81\0'"),
TV("GB18030", "\\\x81'\0"),
TV("SJIS", "\xF0\x40;"),
TV("SJIS", "\xF0';"),
TV("SJIS", "\xF0\";"),
TV("SJIS", "\xF0\0'"),
TV("SJIS", "\\\xF0\\';"),
TV("SJIS", "\\\xF0\\\";"),
TV("gbk", "\x80';"),
TV("gbk", "\x80"),
TV("gbk", "\x80'"),
TV("gbk", "\x80\""),
TV("gbk", "\x80\\"),
TV("mule_internal", "\\\x9c';\0;"),
TV("sql_ascii", "1\xC0'"),
};
/*
* Print the string into buf, making characters outside of plain ascii
* somewhat easier to recognize.
*
* The output format could stand to be improved significantly, it's not at all
* unambiguous.
*/
static void
escapify(PQExpBuffer buf, const char *str, size_t len)
{
for (size_t i = 0; i < len; i++)
{
char c = *str;
if (c == '\n')
appendPQExpBufferStr(buf, "\\n");
else if (c == '\0')
appendPQExpBufferStr(buf, "\\0");
else if (c < ' ' || c > '~')
appendPQExpBuffer(buf, "\\x%2x", (uint8_t) c);
else
appendPQExpBufferChar(buf, c);
str++;
}
}
static void
report_result(pe_test_config *tc,
bool success,
PQExpBuffer testname,
PQExpBuffer details,
const char *subname,
const char *resultdesc)
{
int test_id = ++tc->test_count;
bool print_details = true;
bool print_result = true;
if (success)
{
if (tc->verbosity <= 0)
print_details = false;
if (tc->verbosity < 0)
print_result = false;
}
else
tc->failure_count++;
if (print_details)
printf("%s", details->data);
if (print_result)
printf("%s %d - %s: %s: %s\n",
success ? "ok" : "not ok",
test_id, testname->data,
subname,
resultdesc);
}
/*
* Return true for encodings in which bytes in a multi-byte character look
* like valid ascii characters.
*/
static bool
encoding_conflicts_ascii(int encoding)
{
/*
* We don't store this property directly anywhere, but whether an encoding
* is a client-only encoding is a good proxy.
*/
if (encoding > PG_ENCODING_BE_LAST)
return true;
return false;
}
static const char *
scan_res_s(PsqlScanResult res)
{
#define TOSTR_CASE(sym) case sym: return #sym
switch (res)
{
TOSTR_CASE(PSCAN_SEMICOLON);
TOSTR_CASE(PSCAN_BACKSLASH);
TOSTR_CASE(PSCAN_INCOMPLETE);
TOSTR_CASE(PSCAN_EOL);
}
pg_unreachable();
return ""; /* silence compiler */
}
/*
* Verify that psql parses the input as a single statement. If this property
* is violated, the escape function does not effectively protect against
* smuggling in a second statement.
*/
static void
test_psql_parse(pe_test_config *tc, PQExpBuffer testname,
PQExpBuffer input_buf, PQExpBuffer details)
{
PsqlScanState scan_state;
PsqlScanResult scan_result;
PQExpBuffer query_buf;
promptStatus_t prompt_status = PROMPT_READY;
int matches = 0;
bool test_fails;
const char *resdesc;
query_buf = createPQExpBuffer();
scan_state = psql_scan_create(&test_scan_callbacks);
/*
* TODO: This hardcodes standard conforming strings, it would be useful to
* test without as well.
*/
psql_scan_setup(scan_state, input_buf->data, input_buf->len,
PQclientEncoding(tc->conn), 1);
do
{
resetPQExpBuffer(query_buf);
scan_result = psql_scan(scan_state, query_buf,
&prompt_status);
appendPQExpBuffer(details,
"#\t\t %d: scan_result: %s prompt: %u, query_buf: ",
matches, scan_res_s(scan_result), prompt_status);
escapify(details, query_buf->data, query_buf->len);
appendPQExpBuffer(details, "\n");
matches++;
}
while (scan_result != PSCAN_INCOMPLETE && scan_result != PSCAN_EOL);
psql_scan_destroy(scan_state);
destroyPQExpBuffer(query_buf);
test_fails = matches > 1 || scan_result != PSCAN_EOL;
if (matches > 1)
resdesc = "more than one match";
else if (scan_result != PSCAN_EOL)
resdesc = "unexpected end state";
else
resdesc = "ok";
report_result(tc, !test_fails, testname, details,
"psql parse",
resdesc);
}
static void
test_one_vector_escape(pe_test_config *tc, const pe_test_vector *tv, const pe_test_escape_func *ef)
{
PQExpBuffer testname;
PQExpBuffer details;
PQExpBuffer escape_buf;
PQExpBuffer escape_err;
size_t input_encoding_validlen;
bool input_encoding_valid;
size_t input_encoding0_validlen;
bool input_encoding0_valid;
bool escape_success;
size_t escape_encoding_length;
bool escape_encoding_valid;
escape_err = createPQExpBuffer();
testname = createPQExpBuffer();
details = createPQExpBuffer();
escape_buf = createPQExpBuffer();
if (ef->supports_only_ascii_overlap &&
encoding_conflicts_ascii(PQclientEncoding(tc->conn)))
{
goto out;
}
/* name to describe the test */
appendPQExpBuffer(testname, ">");
escapify(testname, tv->escape, tv->escape_len);
appendPQExpBuffer(testname, "< - %s - %s",
tv->client_encoding, ef->name);
/* details to describe the test, to allow for debugging */
appendPQExpBuffer(details, "#\t input: %zd bytes: ",
tv->escape_len);
escapify(details, tv->escape, tv->escape_len);
appendPQExpBufferStr(details, "\n");
appendPQExpBuffer(details, "#\t encoding: %s\n",
tv->client_encoding);
/* check encoding of input, to compare with after the test */
input_encoding_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
tv->escape,
tv->escape_len);
input_encoding_valid = input_encoding_validlen == tv->escape_len;
appendPQExpBuffer(details, "#\t input encoding valid: %d\n",
input_encoding_valid);
input_encoding0_validlen = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
tv->escape,
strlen(tv->escape));
input_encoding0_valid = input_encoding0_validlen == strlen(tv->escape);
appendPQExpBuffer(details, "#\t input encoding valid till 0: %d\n",
input_encoding0_valid);
appendPQExpBuffer(details, "#\t escape func: %s\n",
ef->name);
if (!input_encoding_valid && ef->supports_only_valid
&& !tc->force_unsupported)
goto out;
/* call the to-be-tested escape function */
escape_success = ef->escape(tc->conn, escape_buf,
tv->escape, tv->escape_len,
escape_err);
if (!escape_success)
{
appendPQExpBuffer(details, "#\t escape error: %s\n",
escape_err->data);
}
if (escape_buf->len > 0)
{
appendPQExpBuffer(details, "#\t escaped string: %zd bytes: ", escape_buf->len);
escapify(details, escape_buf->data, escape_buf->len);
appendPQExpBufferChar(details, '\n');
escape_encoding_length = pg_encoding_verifymbstr(PQclientEncoding(tc->conn),
escape_buf->data,
escape_buf->len);
escape_encoding_valid = escape_encoding_length == escape_buf->len;
appendPQExpBuffer(details, "#\t escape encoding valid: %d\n",
escape_encoding_valid);
}
else
{
escape_encoding_length = 0;
escape_encoding_valid = 1;
}
/*
* If the test reports errors, and the input was invalidly encoded,
* escaping should fail. One edge-case that we accept for now is that the
* input could have an embedded null byte, which the escape functions will
* just treat as a shorter string. If the encoding error is after the zero
* byte, the output thus won't contain it.
*/
if (ef->reports_errors)
{
bool ok = true;
const char *resdesc = "ok";
if (escape_success)
{
if (!input_encoding0_valid)
{
ok = false;
resdesc = "invalid input escaped successfully";
}
else if (!input_encoding_valid)
resdesc = "invalid input escaped successfully, due to zero byte";
}
else
{
if (input_encoding0_valid)
{
ok = false;
resdesc = "valid input failed to escape";
}
else if (input_encoding_valid)
resdesc = "valid input failed to escape, due to zero byte";
}
report_result(tc, ok, testname, details,
"input validity vs escape success",
resdesc);
}
/*
* If the input is invalidly encoded, the output should also be invalidly
* encoded. We accept the same zero-byte edge case as above.
*/
{
bool ok = true;
const char *resdesc = "ok";
if (input_encoding0_valid && !input_encoding_valid && escape_encoding_valid)
{
resdesc = "invalid input produced valid output, due to zero byte";
}
else if (input_encoding0_valid && !escape_encoding_valid)
{
ok = false;
resdesc = "valid input produced invalid output";
}
else if (!input_encoding0_valid &&
(!ef->reports_errors || escape_success) &&
escape_encoding_valid)
{
ok = false;
resdesc = "invalid input produced valid output";
}
report_result(tc, ok, testname, details,
"input and escaped encoding validity",
resdesc);
}
/*
* Test psql parsing whenever we get any string back, even if the escape
* function returned a failure.
*/
if (escape_buf->len > 0)
{
test_psql_parse(tc, testname,
escape_buf, details);
}
out:
destroyPQExpBuffer(escape_err);
destroyPQExpBuffer(details);
destroyPQExpBuffer(testname);
destroyPQExpBuffer(escape_buf);
}
static void
test_one_vector(pe_test_config *tc, const pe_test_vector *tv)
{
if (PQsetClientEncoding(tc->conn, tv->client_encoding))
{
fprintf(stderr, "failed to set encoding to %s:\n%s\n",
tv->client_encoding, PQerrorMessage(tc->conn));
exit(1);
}
for (int escoff = 0; escoff < lengthof(pe_test_escape_funcs); escoff++)
{
const pe_test_escape_func *ef = &pe_test_escape_funcs[escoff];
test_one_vector_escape(tc, tv, ef);
}
}
static void
usage(const char *hint)
{
if (hint)
fprintf(stderr, "Error: %s\n\n", hint);
printf("PostgreSQL escape function test\n"
"\n"
"Usage:\n"
" test_escape --conninfo=CONNINFO [OPTIONS]\n"
"\n"
"Options:\n"
" -h, --help show this help\n"
" -c, --conninfo=CONNINFO connection information to use\n"
" -v, --verbose show test details even for successes\n"
" -q, --quiet only show failures\n"
" --force-unsupported test invalid input even if unsupported\n"
);
if (hint)
exit(1);
}
int
main(int argc, char *argv[])
{
pe_test_config tc = {0};
int c;
int option_index;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h'},
{"conninfo", required_argument, NULL, 'c'},
{"verbose", no_argument, NULL, 'v'},
{"quiet", no_argument, NULL, 'q'},
{"force-unsupported", no_argument, NULL, 'f'},
{NULL, 0, NULL, 0},
};
while ((c = getopt_long(argc, argv, "vqh", long_options, &option_index)) != -1)
{
switch (c)
{
case 'h':
usage(NULL);
exit(0);
break;
case 'c':
tc.conninfo = optarg;
break;
case 'v':
tc.verbosity++;
break;
case 'q':
tc.verbosity--;
break;
case 'f':
tc.force_unsupported = true;
break;
}
}
if (argc - optind >= 1)
usage("unused option(s) specified");
if (tc.conninfo == NULL)
usage("--conninfo needs to be specified");
tc.conn = PQconnectdb(tc.conninfo);
if (!tc.conn || PQstatus(tc.conn) != CONNECTION_OK)
{
fprintf(stderr, "could not connect: %s\n",
PQerrorMessage(tc.conn));
exit(1);
}
for (int i = 0; i < lengthof(pe_test_vectors); i++)
{
test_one_vector(&tc, &pe_test_vectors[i]);
}
PQfinish(tc.conn);
printf("# %d failures\n", tc.failure_count);
printf("1..%d\n", tc.test_count);
return tc.failure_count > 0;
}