| /*------------------------------------------------------------------------- |
| * |
| * String-processing utility routines for frontend code |
| * |
| * Assorted utility functions that are useful in constructing SQL queries |
| * and interpreting backend output. |
| * |
| * |
| * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * src/fe_utils/string_utils.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres_fe.h" |
| |
| #include <ctype.h> |
| |
| #include "common/keywords.h" |
| #include "fe_utils/string_utils.h" |
| #include "mb/pg_wchar.h" |
| |
| static PQExpBuffer defaultGetLocalPQExpBuffer(void); |
| |
| /* Globals exported by this file */ |
| int quote_all_identifiers = 0; |
| PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer; |
| |
| static int fmtIdEncoding = -1; |
| |
| |
| /* |
| * Returns a temporary PQExpBuffer, valid until the next call to the function. |
| * This is used by fmtId and fmtQualifiedId. |
| * |
| * Non-reentrant and non-thread-safe but reduces memory leakage. You can |
| * replace this with a custom version by setting the getLocalPQExpBuffer |
| * function pointer. |
| */ |
| static PQExpBuffer |
| defaultGetLocalPQExpBuffer(void) |
| { |
| static PQExpBuffer id_return = NULL; |
| |
| if (id_return) /* first time through? */ |
| { |
| /* same buffer, just wipe contents */ |
| resetPQExpBuffer(id_return); |
| } |
| else |
| { |
| /* new buffer */ |
| id_return = createPQExpBuffer(); |
| } |
| |
| return id_return; |
| } |
| |
| /* |
| * Set the encoding that fmtId() and fmtQualifiedId() use. |
| * |
| * This is not safe against multiple connections having different encodings, |
| * but there is no real other way to address the need to know the encoding for |
| * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get |
| * rid of fmtId(). |
| */ |
| void |
| setFmtEncoding(int encoding) |
| { |
| fmtIdEncoding = encoding; |
| } |
| |
| /* |
| * Return the currently configured encoding for fmtId() and fmtQualifiedId(). |
| */ |
| static int |
| getFmtEncoding(void) |
| { |
| if (fmtIdEncoding != -1) |
| return fmtIdEncoding; |
| |
| /* |
| * In assertion builds it seems best to fail hard if the encoding was not |
| * set, to make it easier to find places with missing calls. But in |
| * production builds that seems like a bad idea, thus we instead just |
| * default to UTF-8. |
| */ |
| Assert(fmtIdEncoding != -1); |
| |
| return PG_UTF8; |
| } |
| |
| /* |
| * Quotes input string if it's not a legitimate SQL identifier as-is. |
| * |
| * Note that the returned string must be used before calling fmtIdEnc again, |
| * since we re-use the same return buffer each time. |
| */ |
| const char * |
| fmtIdEnc(const char *rawid, int encoding) |
| { |
| PQExpBuffer id_return = getLocalPQExpBuffer(); |
| |
| const char *cp; |
| bool need_quotes = false; |
| size_t remaining = strlen(rawid); |
| |
| /* |
| * These checks need to match the identifier production in scan.l. Don't |
| * use islower() etc. |
| */ |
| if (quote_all_identifiers) |
| need_quotes = true; |
| /* slightly different rules for first character */ |
| else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_')) |
| need_quotes = true; |
| else |
| { |
| /* otherwise check the entire string */ |
| cp = rawid; |
| for (size_t i = 0; i < remaining; i++, cp++) |
| { |
| if (!((*cp >= 'a' && *cp <= 'z') |
| || (*cp >= '0' && *cp <= '9') |
| || (*cp == '_'))) |
| { |
| need_quotes = true; |
| break; |
| } |
| } |
| } |
| |
| if (!need_quotes) |
| { |
| /* |
| * Check for keyword. We quote keywords except for unreserved ones. |
| * (In some cases we could avoid quoting a col_name or type_func_name |
| * keyword, but it seems much harder than it's worth to tell that.) |
| * |
| * Note: ScanKeywordLookup() does case-insensitive comparison, but |
| * that's fine, since we already know we have all-lower-case. |
| */ |
| int kwnum = ScanKeywordLookup(rawid, &ScanKeywords); |
| |
| if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD) |
| need_quotes = true; |
| } |
| |
| if (!need_quotes) |
| { |
| /* no quoting needed */ |
| appendPQExpBufferStr(id_return, rawid); |
| } |
| else |
| { |
| appendPQExpBufferChar(id_return, '"'); |
| |
| cp = &rawid[0]; |
| while (remaining > 0) |
| { |
| int charlen; |
| |
| /* Fast path for plain ASCII */ |
| if (!IS_HIGHBIT_SET(*cp)) |
| { |
| /* |
| * Did we find a double-quote in the string? Then make this a |
| * double double-quote per SQL99. Before, we put in a |
| * backslash/double-quote pair. - thomas 2000-08-05 |
| */ |
| if (*cp == '"') |
| appendPQExpBufferChar(id_return, '"'); |
| appendPQExpBufferChar(id_return, *cp); |
| remaining--; |
| cp++; |
| continue; |
| } |
| |
| /* Slow path for possible multibyte characters */ |
| charlen = pg_encoding_mblen(encoding, cp); |
| |
| if (remaining < charlen) |
| { |
| /* |
| * If the character is longer than the available input, |
| * replace the string with an invalid sequence. The invalid |
| * sequence ensures that the escaped string will trigger an |
| * error on the server-side, even if we can't directly report |
| * an error here. |
| */ |
| enlargePQExpBuffer(id_return, 2); |
| pg_encoding_set_invalid(encoding, |
| id_return->data + id_return->len); |
| id_return->len += 2; |
| id_return->data[id_return->len] = '\0'; |
| |
| /* there's no more input data, so we can stop */ |
| break; |
| } |
| else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1) |
| { |
| /* |
| * Multibyte character is invalid. It's important to verify |
| * that as invalid multi-byte characters could e.g. be used to |
| * "skip" over quote characters, e.g. when parsing |
| * character-by-character. |
| * |
| * Replace the bytes corresponding to the invalid character |
| * with an invalid sequence, for the same reason as above. |
| * |
| * It would be a bit faster to verify the whole string the |
| * first time we encounter a set highbit, but this way we can |
| * replace just the invalid characters, which probably makes |
| * it easier for users to find the invalidly encoded portion |
| * of a larger string. |
| */ |
| enlargePQExpBuffer(id_return, 2); |
| pg_encoding_set_invalid(encoding, |
| id_return->data + id_return->len); |
| id_return->len += 2; |
| id_return->data[id_return->len] = '\0'; |
| |
| /* |
| * Copy the rest of the string after the invalid multi-byte |
| * character. |
| */ |
| remaining -= charlen; |
| cp += charlen; |
| } |
| else |
| { |
| for (int i = 0; i < charlen; i++) |
| { |
| appendPQExpBufferChar(id_return, *cp); |
| remaining--; |
| cp++; |
| } |
| } |
| } |
| |
| appendPQExpBufferChar(id_return, '"'); |
| } |
| |
| return id_return->data; |
| } |
| |
| /* |
| * Quotes input string if it's not a legitimate SQL identifier as-is. |
| * |
| * Note that the returned string must be used before calling fmtId again, |
| * since we re-use the same return buffer each time. |
| * |
| * NB: This assumes setFmtEncoding() previously has been called to configure |
| * the encoding of rawid. It is preferable to use fmtIdEnc() with an |
| * explicit encoding. |
| */ |
| const char * |
| fmtId(const char *rawid) |
| { |
| return fmtIdEnc(rawid, getFmtEncoding()); |
| } |
| |
| /* |
| * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as |
| * needed. |
| * |
| * Like fmtId, use the result before calling again. |
| * |
| * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot |
| * use that buffer until we're finished with calling fmtId(). |
| */ |
| const char * |
| fmtQualifiedIdEnc(const char *schema, const char *id, int encoding) |
| { |
| PQExpBuffer id_return; |
| PQExpBuffer lcl_pqexp = createPQExpBuffer(); |
| |
| /* Some callers might fail to provide a schema name */ |
| if (schema && *schema) |
| { |
| appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding)); |
| } |
| appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding)); |
| |
| id_return = getLocalPQExpBuffer(); |
| |
| appendPQExpBufferStr(id_return, lcl_pqexp->data); |
| destroyPQExpBuffer(lcl_pqexp); |
| |
| return id_return->data; |
| } |
| |
| /* |
| * fmtQualifiedId - construct a schema-qualified name, with quoting as needed. |
| * |
| * Like fmtId, use the result before calling again. |
| * |
| * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot |
| * use that buffer until we're finished with calling fmtId(). |
| * |
| * NB: This assumes setFmtEncoding() previously has been called to configure |
| * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc() |
| * with an explicit encoding. |
| */ |
| const char * |
| fmtQualifiedId(const char *schema, const char *id) |
| { |
| return fmtQualifiedIdEnc(schema, id, getFmtEncoding()); |
| } |
| |
| |
| /* |
| * Format a Postgres version number (in the PG_VERSION_NUM integer format |
| * returned by PQserverVersion()) as a string. This exists mainly to |
| * encapsulate knowledge about two-part vs. three-part version numbers. |
| * |
| * For reentrancy, caller must supply the buffer the string is put in. |
| * Recommended size of the buffer is 32 bytes. |
| * |
| * Returns address of 'buf', as a notational convenience. |
| */ |
| char * |
| formatPGVersionNumber(int version_number, bool include_minor, |
| char *buf, size_t buflen) |
| { |
| if (version_number >= 100000) |
| { |
| /* New two-part style */ |
| if (include_minor) |
| snprintf(buf, buflen, "%d.%d", version_number / 10000, |
| version_number % 10000); |
| else |
| snprintf(buf, buflen, "%d", version_number / 10000); |
| } |
| else |
| { |
| /* Old three-part style */ |
| if (include_minor) |
| snprintf(buf, buflen, "%d.%d.%d", version_number / 10000, |
| (version_number / 100) % 100, |
| version_number % 100); |
| else |
| snprintf(buf, buflen, "%d.%d", version_number / 10000, |
| (version_number / 100) % 100); |
| } |
| return buf; |
| } |
| |
| |
| /* |
| * Convert a string value to an SQL string literal and append it to |
| * the given buffer. We assume the specified client_encoding and |
| * standard_conforming_strings settings. |
| * |
| * This is essentially equivalent to libpq's PQescapeStringInternal, |
| * except for the output buffer structure. We need it in situations |
| * where we do not have a PGconn available. Where we do, |
| * appendStringLiteralConn is a better choice. |
| */ |
| void |
| appendStringLiteral(PQExpBuffer buf, const char *str, |
| int encoding, bool std_strings) |
| { |
| size_t length = strlen(str); |
| const char *source = str; |
| char *target; |
| size_t remaining = length; |
| |
| if (!enlargePQExpBuffer(buf, 2 * length + 2)) |
| return; |
| |
| target = buf->data + buf->len; |
| *target++ = '\''; |
| |
| while (remaining > 0) |
| { |
| char c = *source; |
| int charlen; |
| int i; |
| |
| /* Fast path for plain ASCII */ |
| if (!IS_HIGHBIT_SET(c)) |
| { |
| /* Apply quoting if needed */ |
| if (SQL_STR_DOUBLE(c, !std_strings)) |
| *target++ = c; |
| /* Copy the character */ |
| *target++ = c; |
| source++; |
| remaining--; |
| continue; |
| } |
| |
| /* Slow path for possible multibyte characters */ |
| charlen = PQmblen(source, encoding); |
| |
| if (remaining < charlen) |
| { |
| /* |
| * If the character is longer than the available input, replace |
| * the string with an invalid sequence. The invalid sequence |
| * ensures that the escaped string will trigger an error on the |
| * server-side, even if we can't directly report an error here. |
| * |
| * We know there's enough space for the invalid sequence because |
| * the "target" buffer is 2 * length + 2 long, and at worst we're |
| * replacing a single input byte with two invalid bytes. |
| */ |
| pg_encoding_set_invalid(encoding, target); |
| target += 2; |
| |
| /* there's no more valid input data, so we can stop */ |
| break; |
| } |
| else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1) |
| { |
| /* |
| * Multibyte character is invalid. It's important to verify that |
| * as invalid multi-byte characters could e.g. be used to "skip" |
| * over quote characters, e.g. when parsing |
| * character-by-character. |
| * |
| * Replace the bytes corresponding to the invalid character with |
| * an invalid sequence, for the same reason as above. |
| * |
| * It would be a bit faster to verify the whole string the first |
| * time we encounter a set highbit, but this way we can replace |
| * just the invalid characters, which probably makes it easier for |
| * users to find the invalidly encoded portion of a larger string. |
| */ |
| pg_encoding_set_invalid(encoding, target); |
| target += 2; |
| remaining -= charlen; |
| |
| /* |
| * Copy the rest of the string after the invalid multi-byte |
| * character. |
| */ |
| source += charlen; |
| } |
| else |
| { |
| /* Copy the character */ |
| for (i = 0; i < charlen; i++) |
| { |
| *target++ = *source++; |
| remaining--; |
| } |
| } |
| } |
| |
| /* Write the terminating quote and NUL character. */ |
| *target++ = '\''; |
| *target = '\0'; |
| |
| buf->len = target - buf->data; |
| } |
| |
| |
| /* |
| * Convert a string value to an SQL string literal and append it to |
| * the given buffer. Encoding and string syntax rules are as indicated |
| * by current settings of the PGconn. |
| */ |
| void |
| appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn) |
| { |
| size_t length = strlen(str); |
| |
| /* |
| * XXX This is a kluge to silence escape_string_warning in our utility |
| * programs. It should go away someday. |
| */ |
| if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100) |
| { |
| /* ensure we are not adjacent to an identifier */ |
| if (buf->len > 0 && buf->data[buf->len - 1] != ' ') |
| appendPQExpBufferChar(buf, ' '); |
| appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX); |
| appendStringLiteral(buf, str, PQclientEncoding(conn), false); |
| return; |
| } |
| /* XXX end kluge */ |
| |
| if (!enlargePQExpBuffer(buf, 2 * length + 2)) |
| return; |
| appendPQExpBufferChar(buf, '\''); |
| buf->len += PQescapeStringConn(conn, buf->data + buf->len, |
| str, length, NULL); |
| appendPQExpBufferChar(buf, '\''); |
| } |
| |
| |
| /* |
| * Convert a string value to a dollar quoted literal and append it to |
| * the given buffer. If the dqprefix parameter is not NULL then the |
| * dollar quote delimiter will begin with that (after the opening $). |
| * |
| * No escaping is done at all on str, in compliance with the rules |
| * for parsing dollar quoted strings. Also, we need not worry about |
| * encoding issues. |
| */ |
| void |
| appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix) |
| { |
| static const char suffixes[] = "_XXXXXXX"; |
| int nextchar = 0; |
| PQExpBuffer delimBuf = createPQExpBuffer(); |
| |
| /* start with $ + dqprefix if not NULL */ |
| appendPQExpBufferChar(delimBuf, '$'); |
| if (dqprefix) |
| appendPQExpBufferStr(delimBuf, dqprefix); |
| |
| /* |
| * Make sure we choose a delimiter which (without the trailing $) is not |
| * present in the string being quoted. We don't check with the trailing $ |
| * because a string ending in $foo must not be quoted with $foo$. |
| */ |
| while (strstr(str, delimBuf->data) != NULL) |
| { |
| appendPQExpBufferChar(delimBuf, suffixes[nextchar++]); |
| nextchar %= sizeof(suffixes) - 1; |
| } |
| |
| /* add trailing $ */ |
| appendPQExpBufferChar(delimBuf, '$'); |
| |
| /* quote it and we are all done */ |
| appendPQExpBufferStr(buf, delimBuf->data); |
| appendPQExpBufferStr(buf, str); |
| appendPQExpBufferStr(buf, delimBuf->data); |
| |
| destroyPQExpBuffer(delimBuf); |
| } |
| |
| |
| /* |
| * Convert a bytea value (presented as raw bytes) to an SQL string literal |
| * and append it to the given buffer. We assume the specified |
| * standard_conforming_strings setting. |
| * |
| * This is needed in situations where we do not have a PGconn available. |
| * Where we do, PQescapeByteaConn is a better choice. |
| */ |
| void |
| appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length, |
| bool std_strings) |
| { |
| const unsigned char *source = str; |
| char *target; |
| |
| static const char hextbl[] = "0123456789abcdef"; |
| |
| /* |
| * This implementation is hard-wired to produce hex-format output. We do |
| * not know the server version the output will be loaded into, so making |
| * an intelligent format choice is impossible. It might be better to |
| * always use the old escaped format. |
| */ |
| if (!enlargePQExpBuffer(buf, 2 * length + 5)) |
| return; |
| |
| target = buf->data + buf->len; |
| *target++ = '\''; |
| if (!std_strings) |
| *target++ = '\\'; |
| *target++ = '\\'; |
| *target++ = 'x'; |
| |
| while (length-- > 0) |
| { |
| unsigned char c = *source++; |
| |
| *target++ = hextbl[(c >> 4) & 0xF]; |
| *target++ = hextbl[c & 0xF]; |
| } |
| |
| /* Write the terminating quote and NUL character. */ |
| *target++ = '\''; |
| *target = '\0'; |
| |
| buf->len = target - buf->data; |
| } |
| |
| |
| /* |
| * Append the given string to the shell command being built in the buffer, |
| * with shell-style quoting as needed to create exactly one argument. |
| * |
| * Forbid LF or CR characters, which have scant practical use beyond designing |
| * security breaches. The Windows command shell is unusable as a conduit for |
| * arguments containing LF or CR characters. A future major release should |
| * reject those characters in CREATE ROLE and CREATE DATABASE, because use |
| * there eventually leads to errors here. |
| * |
| * appendShellString() simply prints an error and dies if LF or CR appears. |
| * appendShellStringNoError() omits those characters from the result, and |
| * returns false if there were any. |
| */ |
| void |
| appendShellString(PQExpBuffer buf, const char *str) |
| { |
| if (!appendShellStringNoError(buf, str)) |
| { |
| fprintf(stderr, |
| _("shell command argument contains a newline or carriage return: \"%s\"\n"), |
| str); |
| exit(EXIT_FAILURE); |
| } |
| } |
| |
| bool |
| appendShellStringNoError(PQExpBuffer buf, const char *str) |
| { |
| #ifdef WIN32 |
| int backslash_run_length = 0; |
| #endif |
| bool ok = true; |
| const char *p; |
| |
| /* |
| * Don't bother with adding quotes if the string is nonempty and clearly |
| * contains only safe characters. |
| */ |
| if (*str != '\0' && |
| strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str)) |
| { |
| appendPQExpBufferStr(buf, str); |
| return ok; |
| } |
| |
| #ifndef WIN32 |
| appendPQExpBufferChar(buf, '\''); |
| for (p = str; *p; p++) |
| { |
| if (*p == '\n' || *p == '\r') |
| { |
| ok = false; |
| continue; |
| } |
| |
| if (*p == '\'') |
| appendPQExpBufferStr(buf, "'\"'\"'"); |
| else |
| appendPQExpBufferChar(buf, *p); |
| } |
| appendPQExpBufferChar(buf, '\''); |
| #else /* WIN32 */ |
| |
| /* |
| * A Windows system() argument experiences two layers of interpretation. |
| * First, cmd.exe interprets the string. Its behavior is undocumented, |
| * but a caret escapes any byte except LF or CR that would otherwise have |
| * special meaning. Handling of a caret before LF or CR differs between |
| * "cmd.exe /c" and other modes, and it is unusable here. |
| * |
| * Second, the new process parses its command line to construct argv (see |
| * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx). This treats |
| * backslash-double quote sequences specially. |
| */ |
| appendPQExpBufferStr(buf, "^\""); |
| for (p = str; *p; p++) |
| { |
| if (*p == '\n' || *p == '\r') |
| { |
| ok = false; |
| continue; |
| } |
| |
| /* Change N backslashes before a double quote to 2N+1 backslashes. */ |
| if (*p == '"') |
| { |
| while (backslash_run_length) |
| { |
| appendPQExpBufferStr(buf, "^\\"); |
| backslash_run_length--; |
| } |
| appendPQExpBufferStr(buf, "^\\"); |
| } |
| else if (*p == '\\') |
| backslash_run_length++; |
| else |
| backslash_run_length = 0; |
| |
| /* |
| * Decline to caret-escape the most mundane characters, to ease |
| * debugging and lest we approach the command length limit. |
| */ |
| if (!((*p >= 'a' && *p <= 'z') || |
| (*p >= 'A' && *p <= 'Z') || |
| (*p >= '0' && *p <= '9'))) |
| appendPQExpBufferChar(buf, '^'); |
| appendPQExpBufferChar(buf, *p); |
| } |
| |
| /* |
| * Change N backslashes at end of argument to 2N backslashes, because they |
| * precede the double quote that terminates the argument. |
| */ |
| while (backslash_run_length) |
| { |
| appendPQExpBufferStr(buf, "^\\"); |
| backslash_run_length--; |
| } |
| appendPQExpBufferStr(buf, "^\""); |
| #endif /* WIN32 */ |
| |
| return ok; |
| } |
| |
| |
| /* |
| * Append the given string to the buffer, with suitable quoting for passing |
| * the string as a value in a keyword/value pair in a libpq connection string. |
| */ |
| void |
| appendConnStrVal(PQExpBuffer buf, const char *str) |
| { |
| const char *s; |
| bool needquotes; |
| |
| /* |
| * If the string is one or more plain ASCII characters, no need to quote |
| * it. This is quite conservative, but better safe than sorry. |
| */ |
| needquotes = true; |
| for (s = str; *s; s++) |
| { |
| if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || |
| (*s >= '0' && *s <= '9') || *s == '_' || *s == '.')) |
| { |
| needquotes = true; |
| break; |
| } |
| needquotes = false; |
| } |
| |
| if (needquotes) |
| { |
| appendPQExpBufferChar(buf, '\''); |
| while (*str) |
| { |
| /* ' and \ must be escaped by to \' and \\ */ |
| if (*str == '\'' || *str == '\\') |
| appendPQExpBufferChar(buf, '\\'); |
| |
| appendPQExpBufferChar(buf, *str); |
| str++; |
| } |
| appendPQExpBufferChar(buf, '\''); |
| } |
| else |
| appendPQExpBufferStr(buf, str); |
| } |
| |
| |
| /* |
| * Append a psql meta-command that connects to the given database with the |
| * then-current connection's user, host and port. |
| */ |
| void |
| appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname) |
| { |
| const char *s; |
| bool complex; |
| |
| /* |
| * If the name is plain ASCII characters, emit a trivial "\connect "foo"". |
| * For other names, even many not technically requiring it, skip to the |
| * general case. No database has a zero-length name. |
| */ |
| complex = false; |
| |
| for (s = dbname; *s; s++) |
| { |
| if (*s == '\n' || *s == '\r') |
| { |
| fprintf(stderr, |
| _("database name contains a newline or carriage return: \"%s\"\n"), |
| dbname); |
| exit(EXIT_FAILURE); |
| } |
| |
| if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') || |
| (*s >= '0' && *s <= '9') || *s == '_' || *s == '.')) |
| { |
| complex = true; |
| } |
| } |
| |
| if (complex) |
| { |
| PQExpBufferData connstr; |
| |
| initPQExpBuffer(&connstr); |
| |
| /* |
| * Force the target psql's encoding to SQL_ASCII. We don't really |
| * know the encoding of the database name, and it doesn't matter as |
| * long as psql will forward it to the server unchanged. |
| */ |
| appendPQExpBufferStr(buf, "\\encoding SQL_ASCII\n"); |
| appendPQExpBufferStr(buf, "\\connect -reuse-previous=on "); |
| |
| appendPQExpBufferStr(&connstr, "dbname="); |
| appendConnStrVal(&connstr, dbname); |
| |
| /* |
| * As long as the name does not contain a newline, SQL identifier |
| * quoting satisfies the psql meta-command parser. Prefer not to |
| * involve psql-interpreted single quotes, which behaved differently |
| * before PostgreSQL 9.2. |
| */ |
| appendPQExpBufferStr(buf, fmtIdEnc(connstr.data, PG_SQL_ASCII)); |
| |
| termPQExpBuffer(&connstr); |
| } |
| else |
| { |
| appendPQExpBufferStr(buf, "\\connect "); |
| appendPQExpBufferStr(buf, fmtIdEnc(dbname, PG_SQL_ASCII)); |
| } |
| appendPQExpBufferChar(buf, '\n'); |
| } |
| |
| |
| /* |
| * Deconstruct the text representation of a 1-dimensional Postgres array |
| * into individual items. |
| * |
| * On success, returns true and sets *itemarray and *nitems to describe |
| * an array of individual strings. On parse failure, returns false; |
| * *itemarray may exist or be NULL. |
| * |
| * NOTE: free'ing itemarray is sufficient to deallocate the working storage. |
| */ |
| bool |
| parsePGArray(const char *atext, char ***itemarray, int *nitems) |
| { |
| int inputlen; |
| char **items; |
| char *strings; |
| int curitem; |
| |
| /* |
| * We expect input in the form of "{item,item,item}" where any item is |
| * either raw data, or surrounded by double quotes (in which case embedded |
| * characters including backslashes and quotes are backslashed). |
| * |
| * We build the result as an array of pointers followed by the actual |
| * string data, all in one malloc block for convenience of deallocation. |
| * The worst-case storage need is not more than one pointer and one |
| * character for each input character (consider "{,,,,,,,,,,}"). |
| */ |
| *itemarray = NULL; |
| *nitems = 0; |
| inputlen = strlen(atext); |
| if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}') |
| return false; /* bad input */ |
| items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char))); |
| if (items == NULL) |
| return false; /* out of memory */ |
| *itemarray = items; |
| strings = (char *) (items + inputlen); |
| |
| atext++; /* advance over initial '{' */ |
| curitem = 0; |
| while (*atext != '}') |
| { |
| if (*atext == '\0') |
| return false; /* premature end of string */ |
| items[curitem] = strings; |
| while (*atext != '}' && *atext != ',') |
| { |
| if (*atext == '\0') |
| return false; /* premature end of string */ |
| if (*atext != '"') |
| *strings++ = *atext++; /* copy unquoted data */ |
| else |
| { |
| /* process quoted substring */ |
| atext++; |
| while (*atext != '"') |
| { |
| if (*atext == '\0') |
| return false; /* premature end of string */ |
| if (*atext == '\\') |
| { |
| atext++; |
| if (*atext == '\0') |
| return false; /* premature end of string */ |
| } |
| *strings++ = *atext++; /* copy quoted data */ |
| } |
| atext++; |
| } |
| } |
| *strings++ = '\0'; |
| if (*atext == ',') |
| atext++; |
| curitem++; |
| } |
| if (atext[1] != '\0') |
| return false; /* bogus syntax (embedded '}') */ |
| *nitems = curitem; |
| return true; |
| } |
| |
| |
| /* |
| * Append one element to the text representation of a 1-dimensional Postgres |
| * array. |
| * |
| * The caller must provide the initial '{' and closing '}' of the array. |
| * This function handles all else, including insertion of commas and |
| * quoting of values. |
| * |
| * We assume that typdelim is ','. |
| */ |
| void |
| appendPGArray(PQExpBuffer buffer, const char *value) |
| { |
| bool needquote; |
| const char *tmp; |
| |
| if (buffer->data[buffer->len - 1] != '{') |
| appendPQExpBufferChar(buffer, ','); |
| |
| /* Decide if we need quotes; this should match array_out()'s choices. */ |
| if (value[0] == '\0') |
| needquote = true; /* force quotes for empty string */ |
| else if (pg_strcasecmp(value, "NULL") == 0) |
| needquote = true; /* force quotes for literal NULL */ |
| else |
| needquote = false; |
| |
| if (!needquote) |
| { |
| for (tmp = value; *tmp; tmp++) |
| { |
| char ch = *tmp; |
| |
| if (ch == '"' || ch == '\\' || |
| ch == '{' || ch == '}' || ch == ',' || |
| /* these match array_isspace(): */ |
| ch == ' ' || ch == '\t' || ch == '\n' || |
| ch == '\r' || ch == '\v' || ch == '\f') |
| { |
| needquote = true; |
| break; |
| } |
| } |
| } |
| |
| if (needquote) |
| { |
| appendPQExpBufferChar(buffer, '"'); |
| for (tmp = value; *tmp; tmp++) |
| { |
| char ch = *tmp; |
| |
| if (ch == '"' || ch == '\\') |
| appendPQExpBufferChar(buffer, '\\'); |
| appendPQExpBufferChar(buffer, ch); |
| } |
| appendPQExpBufferChar(buffer, '"'); |
| } |
| else |
| appendPQExpBufferStr(buffer, value); |
| } |
| |
| |
| /* |
| * Format a reloptions array and append it to the given buffer. |
| * |
| * "prefix" is prepended to the option names; typically it's "" or "toast.". |
| * |
| * Returns false if the reloptions array could not be parsed (in which case |
| * nothing will have been appended to the buffer), or true on success. |
| * |
| * Note: this logic should generally match the backend's flatten_reloptions() |
| * (in adt/ruleutils.c). |
| */ |
| bool |
| appendReloptionsArray(PQExpBuffer buffer, const char *reloptions, |
| const char *prefix, int encoding, bool std_strings) |
| { |
| char **options; |
| int noptions; |
| int i; |
| |
| if (!parsePGArray(reloptions, &options, &noptions)) |
| { |
| free(options); |
| return false; |
| } |
| |
| for (i = 0; i < noptions; i++) |
| { |
| char *option = options[i]; |
| char *name; |
| char *separator; |
| char *value; |
| |
| /* |
| * Each array element should have the form name=value. If the "=" is |
| * missing for some reason, treat it like an empty value. |
| */ |
| name = option; |
| separator = strchr(option, '='); |
| if (separator) |
| { |
| *separator = '\0'; |
| value = separator + 1; |
| } |
| else |
| value = ""; |
| |
| if (i > 0) |
| appendPQExpBufferStr(buffer, ", "); |
| appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name)); |
| |
| /* |
| * In general we need to quote the value; but to avoid unnecessary |
| * clutter, do not quote if it is an identifier that would not need |
| * quoting. (We could also allow numbers, but that is a bit trickier |
| * than it looks --- for example, are leading zeroes significant? We |
| * don't want to assume very much here about what custom reloptions |
| * might mean.) |
| */ |
| if (strcmp(fmtId(value), value) == 0) |
| appendPQExpBufferStr(buffer, value); |
| else |
| appendStringLiteral(buffer, value, encoding, std_strings); |
| } |
| |
| free(options); |
| |
| return true; |
| } |
| |
| |
| /* |
| * processSQLNamePattern |
| * |
| * Scan a wildcard-pattern string and generate appropriate WHERE clauses |
| * to limit the set of objects returned. The WHERE clauses are appended |
| * to the already-partially-constructed query in buf. Returns whether |
| * any clause was added. |
| * |
| * conn: connection query will be sent to (consulted for escaping rules). |
| * buf: output parameter. |
| * pattern: user-specified pattern option, or NULL if none ("*" is implied). |
| * have_where: true if caller already emitted "WHERE" (clauses will be ANDed |
| * onto the existing WHERE clause). |
| * force_escape: always quote regexp special characters, even outside |
| * double quotes (else they are quoted only between double quotes). |
| * schemavar: name of query variable to match against a schema-name pattern. |
| * Can be NULL if no schema. |
| * namevar: name of query variable to match against an object-name pattern. |
| * altnamevar: NULL, or name of an alternative variable to match against name. |
| * visibilityrule: clause to use if we want to restrict to visible objects |
| * (for example, "pg_catalog.pg_table_is_visible(p.oid)"). Can be NULL. |
| * dbnamebuf: output parameter receiving the database name portion of the |
| * pattern, if any. Can be NULL. |
| * dotcnt: how many separators were parsed from the pattern, by reference. |
| * |
| * Formatting note: the text already present in buf should end with a newline. |
| * The appended text, if any, will end with one too. |
| */ |
| bool |
| processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern, |
| bool have_where, bool force_escape, |
| const char *schemavar, const char *namevar, |
| const char *altnamevar, const char *visibilityrule, |
| PQExpBuffer dbnamebuf, int *dotcnt) |
| { |
| PQExpBufferData schemabuf; |
| PQExpBufferData namebuf; |
| bool added_clause = false; |
| int dcnt; |
| |
| #define WHEREAND() \ |
| (appendPQExpBufferStr(buf, have_where ? " AND " : "WHERE "), \ |
| have_where = true, added_clause = true) |
| |
| if (dotcnt == NULL) |
| dotcnt = &dcnt; |
| *dotcnt = 0; |
| if (pattern == NULL) |
| { |
| /* Default: select all visible objects */ |
| if (visibilityrule) |
| { |
| WHEREAND(); |
| appendPQExpBuffer(buf, "%s\n", visibilityrule); |
| } |
| return added_clause; |
| } |
| |
| initPQExpBuffer(&schemabuf); |
| initPQExpBuffer(&namebuf); |
| |
| /* |
| * Convert shell-style 'pattern' into the regular expression(s) we want to |
| * execute. Quoting/escaping into SQL literal format will be done below |
| * using appendStringLiteralConn(). |
| * |
| * If the caller provided a schemavar, we want to split the pattern on |
| * ".", otherwise not. |
| */ |
| patternToSQLRegex(PQclientEncoding(conn), |
| (schemavar ? dbnamebuf : NULL), |
| (schemavar ? &schemabuf : NULL), |
| &namebuf, |
| pattern, force_escape, true, dotcnt); |
| |
| /* |
| * Now decide what we need to emit. We may run under a hostile |
| * search_path, so qualify EVERY name. Note there will be a leading "^(" |
| * in the patterns in any case. |
| * |
| * We want the regex matches to use the database's default collation where |
| * collation-sensitive behavior is required (for example, which characters |
| * match '\w'). That happened by default before PG v12, but if the server |
| * is >= v12 then we need to force it through explicit COLLATE clauses, |
| * otherwise the "C" collation attached to "name" catalog columns wins. |
| */ |
| if (namevar && namebuf.len > 2) |
| { |
| /* We have a name pattern, so constrain the namevar(s) */ |
| |
| /* Optimize away a "*" pattern */ |
| if (strcmp(namebuf.data, "^(.*)$") != 0) |
| { |
| WHEREAND(); |
| if (altnamevar) |
| { |
| appendPQExpBuffer(buf, |
| "(%s OPERATOR(pg_catalog.~) ", namevar); |
| appendStringLiteralConn(buf, namebuf.data, conn); |
| if (PQserverVersion(conn) >= 120000) |
| appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); |
| appendPQExpBuffer(buf, |
| "\n OR %s OPERATOR(pg_catalog.~) ", |
| altnamevar); |
| appendStringLiteralConn(buf, namebuf.data, conn); |
| if (PQserverVersion(conn) >= 120000) |
| appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); |
| appendPQExpBufferStr(buf, ")\n"); |
| } |
| else |
| { |
| appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar); |
| appendStringLiteralConn(buf, namebuf.data, conn); |
| if (PQserverVersion(conn) >= 120000) |
| appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); |
| appendPQExpBufferChar(buf, '\n'); |
| } |
| } |
| } |
| |
| if (schemavar && schemabuf.len > 2) |
| { |
| /* We have a schema pattern, so constrain the schemavar */ |
| |
| /* Optimize away a "*" pattern */ |
| if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar) |
| { |
| WHEREAND(); |
| appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar); |
| appendStringLiteralConn(buf, schemabuf.data, conn); |
| if (PQserverVersion(conn) >= 120000) |
| appendPQExpBufferStr(buf, " COLLATE pg_catalog.default"); |
| appendPQExpBufferChar(buf, '\n'); |
| } |
| } |
| else |
| { |
| /* No schema pattern given, so select only visible objects */ |
| if (visibilityrule) |
| { |
| WHEREAND(); |
| appendPQExpBuffer(buf, "%s\n", visibilityrule); |
| } |
| } |
| |
| termPQExpBuffer(&schemabuf); |
| termPQExpBuffer(&namebuf); |
| |
| return added_clause; |
| #undef WHEREAND |
| } |
| |
| /* |
| * Transform a possibly qualified shell-style object name pattern into up to |
| * three SQL-style regular expressions, converting quotes, lower-casing |
| * unquoted letters, and adjusting shell-style wildcard characters into regexp |
| * notation. |
| * |
| * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern |
| * contains two or more dbname/schema/name separators, we parse the portions of |
| * the pattern prior to the first and second separators into dbnamebuf and |
| * schemabuf, and the rest into namebuf. |
| * |
| * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at |
| * least one separator, we parse the first portion into schemabuf and the rest |
| * into namebuf. |
| * |
| * Otherwise, we parse all the pattern into namebuf. |
| * |
| * If the pattern contains more dotted parts than buffers to parse into, the |
| * extra dots will be treated as literal characters and written into the |
| * namebuf, though they will be counted. Callers should always check the value |
| * returned by reference in dotcnt and handle this error case appropriately. |
| * |
| * We surround the regexps with "^(...)$" to force them to match whole strings, |
| * as per SQL practice. We have to have parens in case strings contain "|", |
| * else the "^" and "$" will be bound into the first and last alternatives |
| * which is not what we want. Whether this is done for dbnamebuf is controlled |
| * by the want_literal_dbname parameter. |
| * |
| * The regexps we parse into the buffers are appended to the data (if any) |
| * already present. If we parse fewer fields than the number of buffers we |
| * were given, the extra buffers are unaltered. |
| * |
| * encoding: the character encoding for the given pattern |
| * dbnamebuf: output parameter receiving the database name portion of the |
| * pattern, if any. Can be NULL. |
| * schemabuf: output parameter receiving the schema name portion of the |
| * pattern, if any. Can be NULL. |
| * namebuf: output parameter receiving the database name portion of the |
| * pattern, if any. Can be NULL. |
| * pattern: user-specified pattern option, or NULL if none ("*" is implied). |
| * force_escape: always quote regexp special characters, even outside |
| * double quotes (else they are quoted only between double quotes). |
| * want_literal_dbname: if true, regexp special characters within the database |
| * name portion of the pattern will not be escaped, nor will the dbname be |
| * converted into a regular expression. |
| * dotcnt: output parameter receiving the number of separators parsed from the |
| * pattern. |
| */ |
| void |
| patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf, |
| PQExpBuffer namebuf, const char *pattern, bool force_escape, |
| bool want_literal_dbname, int *dotcnt) |
| { |
| PQExpBufferData buf[3]; |
| PQExpBufferData left_literal; |
| PQExpBuffer curbuf; |
| PQExpBuffer maxbuf; |
| int i; |
| bool inquotes; |
| bool left; |
| const char *cp; |
| |
| Assert(pattern != NULL); |
| Assert(namebuf != NULL); |
| |
| /* callers should never expect "dbname.relname" format */ |
| Assert(dbnamebuf == NULL || schemabuf != NULL); |
| Assert(dotcnt != NULL); |
| |
| *dotcnt = 0; |
| inquotes = false; |
| cp = pattern; |
| |
| if (dbnamebuf != NULL) |
| maxbuf = &buf[2]; |
| else if (schemabuf != NULL) |
| maxbuf = &buf[1]; |
| else |
| maxbuf = &buf[0]; |
| |
| curbuf = &buf[0]; |
| if (want_literal_dbname) |
| { |
| left = true; |
| initPQExpBuffer(&left_literal); |
| } |
| else |
| left = false; |
| initPQExpBuffer(curbuf); |
| appendPQExpBufferStr(curbuf, "^("); |
| while (*cp) |
| { |
| char ch = *cp; |
| |
| if (ch == '"') |
| { |
| if (inquotes && cp[1] == '"') |
| { |
| /* emit one quote, stay in inquotes mode */ |
| appendPQExpBufferChar(curbuf, '"'); |
| if (left) |
| appendPQExpBufferChar(&left_literal, '"'); |
| cp++; |
| } |
| else |
| inquotes = !inquotes; |
| cp++; |
| } |
| else if (!inquotes && isupper((unsigned char) ch)) |
| { |
| appendPQExpBufferChar(curbuf, |
| pg_tolower((unsigned char) ch)); |
| if (left) |
| appendPQExpBufferChar(&left_literal, |
| pg_tolower((unsigned char) ch)); |
| cp++; |
| } |
| else if (!inquotes && ch == '*') |
| { |
| appendPQExpBufferStr(curbuf, ".*"); |
| if (left) |
| appendPQExpBufferChar(&left_literal, '*'); |
| cp++; |
| } |
| else if (!inquotes && ch == '?') |
| { |
| appendPQExpBufferChar(curbuf, '.'); |
| if (left) |
| appendPQExpBufferChar(&left_literal, '?'); |
| cp++; |
| } |
| else if (!inquotes && ch == '.') |
| { |
| left = false; |
| if (dotcnt) |
| (*dotcnt)++; |
| if (curbuf < maxbuf) |
| { |
| appendPQExpBufferStr(curbuf, ")$"); |
| curbuf++; |
| initPQExpBuffer(curbuf); |
| appendPQExpBufferStr(curbuf, "^("); |
| cp++; |
| } |
| else |
| appendPQExpBufferChar(curbuf, *cp++); |
| } |
| else if (ch == '$') |
| { |
| /* |
| * Dollar is always quoted, whether inside quotes or not. The |
| * reason is that it's allowed in SQL identifiers, so there's a |
| * significant use-case for treating it literally, while because |
| * we anchor the pattern automatically there is no use-case for |
| * having it possess its regexp meaning. |
| */ |
| appendPQExpBufferStr(curbuf, "\\$"); |
| if (left) |
| appendPQExpBufferChar(&left_literal, '$'); |
| cp++; |
| } |
| else |
| { |
| /* |
| * Ordinary data character, transfer to pattern |
| * |
| * Inside double quotes, or at all times if force_escape is true, |
| * quote regexp special characters with a backslash to avoid |
| * regexp errors. Outside quotes, however, let them pass through |
| * as-is; this lets knowledgeable users build regexp expressions |
| * that are more powerful than shell-style patterns. |
| * |
| * As an exception to that, though, always quote "[]", as that's |
| * much more likely to be an attempt to write an array type name |
| * than it is to be the start of a regexp bracket expression. |
| */ |
| if ((inquotes || force_escape) && |
| strchr("|*+?()[]{}.^$\\", ch)) |
| appendPQExpBufferChar(curbuf, '\\'); |
| else if (ch == '[' && cp[1] == ']') |
| appendPQExpBufferChar(curbuf, '\\'); |
| i = PQmblenBounded(cp, encoding); |
| while (i--) |
| { |
| if (left) |
| appendPQExpBufferChar(&left_literal, *cp); |
| appendPQExpBufferChar(curbuf, *cp++); |
| } |
| } |
| } |
| appendPQExpBufferStr(curbuf, ")$"); |
| |
| if (namebuf) |
| { |
| appendPQExpBufferStr(namebuf, curbuf->data); |
| termPQExpBuffer(curbuf); |
| curbuf--; |
| } |
| |
| if (schemabuf && curbuf >= buf) |
| { |
| appendPQExpBufferStr(schemabuf, curbuf->data); |
| termPQExpBuffer(curbuf); |
| curbuf--; |
| } |
| |
| if (dbnamebuf && curbuf >= buf) |
| { |
| if (want_literal_dbname) |
| appendPQExpBufferStr(dbnamebuf, left_literal.data); |
| else |
| appendPQExpBufferStr(dbnamebuf, curbuf->data); |
| termPQExpBuffer(curbuf); |
| } |
| |
| if (want_literal_dbname) |
| termPQExpBuffer(&left_literal); |
| } |