src/fe_utils/string_utils.c - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * String-processing utility routines for frontend code
  *
  * Assorted utility functions that are useful in constructing SQL queries
  * and interpreting backend output.
  *
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * src/fe_utils/string_utils.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres_fe.h"

 #include <ctype.h>

 #include "common/keywords.h"
 #include "fe_utils/string_utils.h"
 #include "mb/pg_wchar.h"

 static PQExpBuffer defaultGetLocalPQExpBuffer(void);

 /* Globals exported by this file */
 int			quote_all_identifiers = 0;
 PQExpBuffer (*getLocalPQExpBuffer) (void) = defaultGetLocalPQExpBuffer;

 static int	fmtIdEncoding = -1;


 /*
  * Returns a temporary PQExpBuffer, valid until the next call to the function.
  * This is used by fmtId and fmtQualifiedId.
  *
  * Non-reentrant and non-thread-safe but reduces memory leakage. You can
  * replace this with a custom version by setting the getLocalPQExpBuffer
  * function pointer.
  */
 static PQExpBuffer
 defaultGetLocalPQExpBuffer(void)
 {
 	static PQExpBuffer id_return = NULL;

 	if (id_return)				/* first time through? */
 	{
 		/* same buffer, just wipe contents */
 		resetPQExpBuffer(id_return);
 	}
 	else
 	{
 		/* new buffer */
 		id_return = createPQExpBuffer();
 	}

 	return id_return;
 }

 /*
  * Set the encoding that fmtId() and fmtQualifiedId() use.
  *
  * This is not safe against multiple connections having different encodings,
  * but there is no real other way to address the need to know the encoding for
  * fmtId()/fmtQualifiedId() input for safe escaping. Eventually we should get
  * rid of fmtId().
  */
 void
 setFmtEncoding(int encoding)
 {
 	fmtIdEncoding = encoding;
 }

 /*
  * Return the currently configured encoding for fmtId() and fmtQualifiedId().
  */
 static int
 getFmtEncoding(void)
 {
 	if (fmtIdEncoding != -1)
 		return fmtIdEncoding;

 	/*
 	 * In assertion builds it seems best to fail hard if the encoding was not
 	 * set, to make it easier to find places with missing calls. But in
 	 * production builds that seems like a bad idea, thus we instead just
 	 * default to UTF-8.
 	 */
 	Assert(fmtIdEncoding != -1);

 	return PG_UTF8;
 }

 /*
  *	Quotes input string if it's not a legitimate SQL identifier as-is.
  *
  *	Note that the returned string must be used before calling fmtIdEnc again,
  *	since we re-use the same return buffer each time.
  */
 const char *
 fmtIdEnc(const char *rawid, int encoding)
 {
 	PQExpBuffer id_return = getLocalPQExpBuffer();

 	const char *cp;
 	bool		need_quotes = false;
 	size_t		remaining = strlen(rawid);

 	/*
 	 * These checks need to match the identifier production in scan.l. Don't
 	 * use islower() etc.
 	 */
 	if (quote_all_identifiers)
 		need_quotes = true;
 	/* slightly different rules for first character */
 	else if (!((rawid[0] >= 'a' && rawid[0] <= 'z') || rawid[0] == '_'))
 		need_quotes = true;
 	else
 	{
 		/* otherwise check the entire string */
 		cp = rawid;
 		for (size_t i = 0; i < remaining; i++, cp++)
 		{
 			if (!((*cp >= 'a' && *cp <= 'z')
 				  || (*cp >= '0' && *cp <= '9')
 				  || (*cp == '_')))
 			{
 				need_quotes = true;
 				break;
 			}
 		}
 	}

 	if (!need_quotes)
 	{
 		/*
 		 * Check for keyword.  We quote keywords except for unreserved ones.
 		 * (In some cases we could avoid quoting a col_name or type_func_name
 		 * keyword, but it seems much harder than it's worth to tell that.)
 		 *
 		 * Note: ScanKeywordLookup() does case-insensitive comparison, but
 		 * that's fine, since we already know we have all-lower-case.
 		 */
 		int			kwnum = ScanKeywordLookup(rawid, &ScanKeywords);

 		if (kwnum >= 0 && ScanKeywordCategories[kwnum] != UNRESERVED_KEYWORD)
 			need_quotes = true;
 	}

 	if (!need_quotes)
 	{
 		/* no quoting needed */
 		appendPQExpBufferStr(id_return, rawid);
 	}
 	else
 	{
 		appendPQExpBufferChar(id_return, '"');

 		cp = &rawid[0];
 		while (remaining > 0)
 		{
 			int			charlen;

 			/* Fast path for plain ASCII */
 			if (!IS_HIGHBIT_SET(*cp))
 			{
 				/*
 				 * Did we find a double-quote in the string? Then make this a
 				 * double double-quote per SQL99. Before, we put in a
 				 * backslash/double-quote pair. - thomas 2000-08-05
 				 */
 				if (*cp == '"')
 					appendPQExpBufferChar(id_return, '"');
 				appendPQExpBufferChar(id_return, *cp);
 				remaining--;
 				cp++;
 				continue;
 			}

 			/* Slow path for possible multibyte characters */
 			charlen = pg_encoding_mblen(encoding, cp);

 			if (remaining < charlen)
 			{
 				/*
 				 * If the character is longer than the available input,
 				 * replace the string with an invalid sequence. The invalid
 				 * sequence ensures that the escaped string will trigger an
 				 * error on the server-side, even if we can't directly report
 				 * an error here.
 				 */
 				enlargePQExpBuffer(id_return, 2);
 				pg_encoding_set_invalid(encoding,
 										id_return->data + id_return->len);
 				id_return->len += 2;
 				id_return->data[id_return->len] = '\0';

 				/* there's no more input data, so we can stop */
 				break;
 			}
 			else if (pg_encoding_verifymbchar(encoding, cp, charlen) == -1)
 			{
 				/*
 				 * Multibyte character is invalid.  It's important to verify
 				 * that as invalid multi-byte characters could e.g. be used to
 				 * "skip" over quote characters, e.g. when parsing
 				 * character-by-character.
 				 *
 				 * Replace the bytes corresponding to the invalid character
 				 * with an invalid sequence, for the same reason as above.
 				 *
 				 * It would be a bit faster to verify the whole string the
 				 * first time we encounter a set highbit, but this way we can
 				 * replace just the invalid characters, which probably makes
 				 * it easier for users to find the invalidly encoded portion
 				 * of a larger string.
 				 */
 				enlargePQExpBuffer(id_return, 2);
 				pg_encoding_set_invalid(encoding,
 										id_return->data + id_return->len);
 				id_return->len += 2;
 				id_return->data[id_return->len] = '\0';

 				/*
 				 * Copy the rest of the string after the invalid multi-byte
 				 * character.
 				 */
 				remaining -= charlen;
 				cp += charlen;
 			}
 			else
 			{
 				for (int i = 0; i < charlen; i++)
 				{
 					appendPQExpBufferChar(id_return, *cp);
 					remaining--;
 					cp++;
 				}
 			}
 		}

 		appendPQExpBufferChar(id_return, '"');
 	}

 	return id_return->data;
 }

 /*
  *	Quotes input string if it's not a legitimate SQL identifier as-is.
  *
  *	Note that the returned string must be used before calling fmtId again,
  *	since we re-use the same return buffer each time.
  *
  *  NB: This assumes setFmtEncoding() previously has been called to configure
  *  the encoding of rawid. It is preferable to use fmtIdEnc() with an
  *  explicit encoding.
  */
 const char *
 fmtId(const char *rawid)
 {
 	return fmtIdEnc(rawid, getFmtEncoding());
 }

 /*
  * fmtQualifiedIdEnc - construct a schema-qualified name, with quoting as
  * needed.
  *
  * Like fmtId, use the result before calling again.
  *
  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
  * use that buffer until we're finished with calling fmtId().
  */
 const char *
 fmtQualifiedIdEnc(const char *schema, const char *id, int encoding)
 {
 	PQExpBuffer id_return;
 	PQExpBuffer lcl_pqexp = createPQExpBuffer();

 	/* Some callers might fail to provide a schema name */
 	if (schema && *schema)
 	{
 		appendPQExpBuffer(lcl_pqexp, "%s.", fmtIdEnc(schema, encoding));
 	}
 	appendPQExpBufferStr(lcl_pqexp, fmtIdEnc(id, encoding));

 	id_return = getLocalPQExpBuffer();

 	appendPQExpBufferStr(id_return, lcl_pqexp->data);
 	destroyPQExpBuffer(lcl_pqexp);

 	return id_return->data;
 }

 /*
  * fmtQualifiedId - construct a schema-qualified name, with quoting as needed.
  *
  * Like fmtId, use the result before calling again.
  *
  * Since we call fmtId and it also uses getLocalPQExpBuffer() we cannot
  * use that buffer until we're finished with calling fmtId().
  *
  * NB: This assumes setFmtEncoding() previously has been called to configure
  * the encoding of schema/id. It is preferable to use fmtQualifiedIdEnc()
  * with an explicit encoding.
  */
 const char *
 fmtQualifiedId(const char *schema, const char *id)
 {
 	return fmtQualifiedIdEnc(schema, id, getFmtEncoding());
 }


 /*
  * Format a Postgres version number (in the PG_VERSION_NUM integer format
  * returned by PQserverVersion()) as a string.  This exists mainly to
  * encapsulate knowledge about two-part vs. three-part version numbers.
  *
  * For reentrancy, caller must supply the buffer the string is put in.
  * Recommended size of the buffer is 32 bytes.
  *
  * Returns address of 'buf', as a notational convenience.
  */
 char *
 formatPGVersionNumber(int version_number, bool include_minor,
 					  char *buf, size_t buflen)
 {
 	if (version_number >= 100000)
 	{
 		/* New two-part style */
 		if (include_minor)
 			snprintf(buf, buflen, "%d.%d", version_number / 10000,
 					 version_number % 10000);
 		else
 			snprintf(buf, buflen, "%d", version_number / 10000);
 	}
 	else
 	{
 		/* Old three-part style */
 		if (include_minor)
 			snprintf(buf, buflen, "%d.%d.%d", version_number / 10000,
 					 (version_number / 100) % 100,
 					 version_number % 100);
 		else
 			snprintf(buf, buflen, "%d.%d", version_number / 10000,
 					 (version_number / 100) % 100);
 	}
 	return buf;
 }


 /*
  * Convert a string value to an SQL string literal and append it to
  * the given buffer.  We assume the specified client_encoding and
  * standard_conforming_strings settings.
  *
  * This is essentially equivalent to libpq's PQescapeStringInternal,
  * except for the output buffer structure.  We need it in situations
  * where we do not have a PGconn available.  Where we do,
  * appendStringLiteralConn is a better choice.
  */
 void
 appendStringLiteral(PQExpBuffer buf, const char *str,
 					int encoding, bool std_strings)
 {
 	size_t		length = strlen(str);
 	const char *source = str;
 	char	   *target;
 	size_t		remaining = length;

 	if (!enlargePQExpBuffer(buf, 2 * length + 2))
 		return;

 	target = buf->data + buf->len;
 	*target++ = '\'';

 	while (remaining > 0)
 	{
 		char		c = *source;
 		int			charlen;
 		int			i;

 		/* Fast path for plain ASCII */
 		if (!IS_HIGHBIT_SET(c))
 		{
 			/* Apply quoting if needed */
 			if (SQL_STR_DOUBLE(c, !std_strings))
 				*target++ = c;
 			/* Copy the character */
 			*target++ = c;
 			source++;
 			remaining--;
 			continue;
 		}

 		/* Slow path for possible multibyte characters */
 		charlen = PQmblen(source, encoding);

 		if (remaining < charlen)
 		{
 			/*
 			 * If the character is longer than the available input, replace
 			 * the string with an invalid sequence. The invalid sequence
 			 * ensures that the escaped string will trigger an error on the
 			 * server-side, even if we can't directly report an error here.
 			 *
 			 * We know there's enough space for the invalid sequence because
 			 * the "target" buffer is 2 * length + 2 long, and at worst we're
 			 * replacing a single input byte with two invalid bytes.
 			 */
 			pg_encoding_set_invalid(encoding, target);
 			target += 2;

 			/* there's no more valid input data, so we can stop */
 			break;
 		}
 		else if (pg_encoding_verifymbchar(encoding, source, charlen) == -1)
 		{
 			/*
 			 * Multibyte character is invalid.  It's important to verify that
 			 * as invalid multi-byte characters could e.g. be used to "skip"
 			 * over quote characters, e.g. when parsing
 			 * character-by-character.
 			 *
 			 * Replace the bytes corresponding to the invalid character with
 			 * an invalid sequence, for the same reason as above.
 			 *
 			 * It would be a bit faster to verify the whole string the first
 			 * time we encounter a set highbit, but this way we can replace
 			 * just the invalid characters, which probably makes it easier for
 			 * users to find the invalidly encoded portion of a larger string.
 			 */
 			pg_encoding_set_invalid(encoding, target);
 			target += 2;
 			remaining -= charlen;

 			/*
 			 * Copy the rest of the string after the invalid multi-byte
 			 * character.
 			 */
 			source += charlen;
 		}
 		else
 		{
 			/* Copy the character */
 			for (i = 0; i < charlen; i++)
 			{
 				*target++ = *source++;
 				remaining--;
 			}
 		}
 	}

 	/* Write the terminating quote and NUL character. */
 	*target++ = '\'';
 	*target = '\0';

 	buf->len = target - buf->data;
 }


 /*
  * Convert a string value to an SQL string literal and append it to
  * the given buffer.  Encoding and string syntax rules are as indicated
  * by current settings of the PGconn.
  */
 void
 appendStringLiteralConn(PQExpBuffer buf, const char *str, PGconn *conn)
 {
 	size_t		length = strlen(str);

 	/*
 	 * XXX This is a kluge to silence escape_string_warning in our utility
 	 * programs.  It should go away someday.
 	 */
 	if (strchr(str, '\\') != NULL && PQserverVersion(conn) >= 80100)
 	{
 		/* ensure we are not adjacent to an identifier */
 		if (buf->len > 0 && buf->data[buf->len - 1] != ' ')
 			appendPQExpBufferChar(buf, ' ');
 		appendPQExpBufferChar(buf, ESCAPE_STRING_SYNTAX);
 		appendStringLiteral(buf, str, PQclientEncoding(conn), false);
 		return;
 	}
 	/* XXX end kluge */

 	if (!enlargePQExpBuffer(buf, 2 * length + 2))
 		return;
 	appendPQExpBufferChar(buf, '\'');
 	buf->len += PQescapeStringConn(conn, buf->data + buf->len,
 								   str, length, NULL);
 	appendPQExpBufferChar(buf, '\'');
 }


 /*
  * Convert a string value to a dollar quoted literal and append it to
  * the given buffer. If the dqprefix parameter is not NULL then the
  * dollar quote delimiter will begin with that (after the opening $).
  *
  * No escaping is done at all on str, in compliance with the rules
  * for parsing dollar quoted strings.  Also, we need not worry about
  * encoding issues.
  */
 void
 appendStringLiteralDQ(PQExpBuffer buf, const char *str, const char *dqprefix)
 {
 	static const char suffixes[] = "_XXXXXXX";
 	int			nextchar = 0;
 	PQExpBuffer delimBuf = createPQExpBuffer();

 	/* start with $ + dqprefix if not NULL */
 	appendPQExpBufferChar(delimBuf, '$');
 	if (dqprefix)
 		appendPQExpBufferStr(delimBuf, dqprefix);

 	/*
 	 * Make sure we choose a delimiter which (without the trailing $) is not
 	 * present in the string being quoted. We don't check with the trailing $
 	 * because a string ending in $foo must not be quoted with $foo$.
 	 */
 	while (strstr(str, delimBuf->data) != NULL)
 	{
 		appendPQExpBufferChar(delimBuf, suffixes[nextchar++]);
 		nextchar %= sizeof(suffixes) - 1;
 	}

 	/* add trailing $ */
 	appendPQExpBufferChar(delimBuf, '$');

 	/* quote it and we are all done */
 	appendPQExpBufferStr(buf, delimBuf->data);
 	appendPQExpBufferStr(buf, str);
 	appendPQExpBufferStr(buf, delimBuf->data);

 	destroyPQExpBuffer(delimBuf);
 }


 /*
  * Convert a bytea value (presented as raw bytes) to an SQL string literal
  * and append it to the given buffer.  We assume the specified
  * standard_conforming_strings setting.
  *
  * This is needed in situations where we do not have a PGconn available.
  * Where we do, PQescapeByteaConn is a better choice.
  */
 void
 appendByteaLiteral(PQExpBuffer buf, const unsigned char *str, size_t length,
 				   bool std_strings)
 {
 	const unsigned char *source = str;
 	char	   *target;

 	static const char hextbl[] = "0123456789abcdef";

 	/*
 	 * This implementation is hard-wired to produce hex-format output. We do
 	 * not know the server version the output will be loaded into, so making
 	 * an intelligent format choice is impossible.  It might be better to
 	 * always use the old escaped format.
 	 */
 	if (!enlargePQExpBuffer(buf, 2 * length + 5))
 		return;

 	target = buf->data + buf->len;
 	*target++ = '\'';
 	if (!std_strings)
 		*target++ = '\\';
 	*target++ = '\\';
 	*target++ = 'x';

 	while (length-- > 0)
 	{
 		unsigned char c = *source++;

 		*target++ = hextbl[(c >> 4) & 0xF];
 		*target++ = hextbl[c & 0xF];
 	}

 	/* Write the terminating quote and NUL character. */
 	*target++ = '\'';
 	*target = '\0';

 	buf->len = target - buf->data;
 }


 /*
  * Append the given string to the shell command being built in the buffer,
  * with shell-style quoting as needed to create exactly one argument.
  *
  * Forbid LF or CR characters, which have scant practical use beyond designing
  * security breaches.  The Windows command shell is unusable as a conduit for
  * arguments containing LF or CR characters.  A future major release should
  * reject those characters in CREATE ROLE and CREATE DATABASE, because use
  * there eventually leads to errors here.
  *
  * appendShellString() simply prints an error and dies if LF or CR appears.
  * appendShellStringNoError() omits those characters from the result, and
  * returns false if there were any.
  */
 void
 appendShellString(PQExpBuffer buf, const char *str)
 {
 	if (!appendShellStringNoError(buf, str))
 	{
 		fprintf(stderr,
 				_("shell command argument contains a newline or carriage return: \"%s\"\n"),
 				str);
 		exit(EXIT_FAILURE);
 	}
 }

 bool
 appendShellStringNoError(PQExpBuffer buf, const char *str)
 {
 #ifdef WIN32
 	int			backslash_run_length = 0;
 #endif
 	bool		ok = true;
 	const char *p;

 	/*
 	 * Don't bother with adding quotes if the string is nonempty and clearly
 	 * contains only safe characters.
 	 */
 	if (*str != '\0' &&
 		strspn(str, "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./:") == strlen(str))
 	{
 		appendPQExpBufferStr(buf, str);
 		return ok;
 	}

 #ifndef WIN32
 	appendPQExpBufferChar(buf, '\'');
 	for (p = str; *p; p++)
 	{
 		if (*p == '\n' || *p == '\r')
 		{
 			ok = false;
 			continue;
 		}

 		if (*p == '\'')
 			appendPQExpBufferStr(buf, "'\"'\"'");
 		else
 			appendPQExpBufferChar(buf, *p);
 	}
 	appendPQExpBufferChar(buf, '\'');
 #else							/* WIN32 */

 	/*
 	 * A Windows system() argument experiences two layers of interpretation.
 	 * First, cmd.exe interprets the string.  Its behavior is undocumented,
 	 * but a caret escapes any byte except LF or CR that would otherwise have
 	 * special meaning.  Handling of a caret before LF or CR differs between
 	 * "cmd.exe /c" and other modes, and it is unusable here.
 	 *
 	 * Second, the new process parses its command line to construct argv (see
 	 * https://msdn.microsoft.com/en-us/library/17w5ykft.aspx).  This treats
 	 * backslash-double quote sequences specially.
 	 */
 	appendPQExpBufferStr(buf, "^\"");
 	for (p = str; *p; p++)
 	{
 		if (*p == '\n' || *p == '\r')
 		{
 			ok = false;
 			continue;
 		}

 		/* Change N backslashes before a double quote to 2N+1 backslashes. */
 		if (*p == '"')
 		{
 			while (backslash_run_length)
 			{
 				appendPQExpBufferStr(buf, "^\\");
 				backslash_run_length--;
 			}
 			appendPQExpBufferStr(buf, "^\\");
 		}
 		else if (*p == '\\')
 			backslash_run_length++;
 		else
 			backslash_run_length = 0;

 		/*
 		 * Decline to caret-escape the most mundane characters, to ease
 		 * debugging and lest we approach the command length limit.
 		 */
 		if (!((*p >= 'a' && *p <= 'z') ||
 			  (*p >= 'A' && *p <= 'Z') ||
 			  (*p >= '0' && *p <= '9')))
 			appendPQExpBufferChar(buf, '^');
 		appendPQExpBufferChar(buf, *p);
 	}

 	/*
 	 * Change N backslashes at end of argument to 2N backslashes, because they
 	 * precede the double quote that terminates the argument.
 	 */
 	while (backslash_run_length)
 	{
 		appendPQExpBufferStr(buf, "^\\");
 		backslash_run_length--;
 	}
 	appendPQExpBufferStr(buf, "^\"");
 #endif							/* WIN32 */

 	return ok;
 }


 /*
  * Append the given string to the buffer, with suitable quoting for passing
  * the string as a value in a keyword/value pair in a libpq connection string.
  */
 void
 appendConnStrVal(PQExpBuffer buf, const char *str)
 {
 	const char *s;
 	bool		needquotes;

 	/*
 	 * If the string is one or more plain ASCII characters, no need to quote
 	 * it. This is quite conservative, but better safe than sorry.
 	 */
 	needquotes = true;
 	for (s = str; *s; s++)
 	{
 		if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
 			  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
 		{
 			needquotes = true;
 			break;
 		}
 		needquotes = false;
 	}

 	if (needquotes)
 	{
 		appendPQExpBufferChar(buf, '\'');
 		while (*str)
 		{
 			/* ' and \ must be escaped by to \' and \\ */
 			if (*str == '\'' || *str == '\\')
 				appendPQExpBufferChar(buf, '\\');

 			appendPQExpBufferChar(buf, *str);
 			str++;
 		}
 		appendPQExpBufferChar(buf, '\'');
 	}
 	else
 		appendPQExpBufferStr(buf, str);
 }


 /*
  * Append a psql meta-command that connects to the given database with the
  * then-current connection's user, host and port.
  */
 void
 appendPsqlMetaConnect(PQExpBuffer buf, const char *dbname)
 {
 	const char *s;
 	bool complex;

 	/*
 	 * If the name is plain ASCII characters, emit a trivial "\connect "foo"".
 	 * For other names, even many not technically requiring it, skip to the
 	 * general case.  No database has a zero-length name.
 	 */
 	complex = false;

 	for (s = dbname; *s; s++)
 	{
 		if (*s == '\n' || *s == '\r')
 		{
 			fprintf(stderr,
 					_("database name contains a newline or carriage return: \"%s\"\n"),
 					dbname);
 			exit(EXIT_FAILURE);
 		}

 		if (!((*s >= 'a' && *s <= 'z') || (*s >= 'A' && *s <= 'Z') ||
 			  (*s >= '0' && *s <= '9') || *s == '_' || *s == '.'))
 		{
 			complex = true;
 		}
 	}

 	if (complex)
 	{
 		PQExpBufferData connstr;

 		initPQExpBuffer(&connstr);

 		/*
 		 * Force the target psql's encoding to SQL_ASCII.  We don't really
 		 * know the encoding of the database name, and it doesn't matter as
 		 * long as psql will forward it to the server unchanged.
 		 */
 		appendPQExpBufferStr(buf, "\\encoding SQL_ASCII\n");
 		appendPQExpBufferStr(buf, "\\connect -reuse-previous=on ");

 		appendPQExpBufferStr(&connstr, "dbname=");
 		appendConnStrVal(&connstr, dbname);

 		/*
 		 * As long as the name does not contain a newline, SQL identifier
 		 * quoting satisfies the psql meta-command parser.  Prefer not to
 		 * involve psql-interpreted single quotes, which behaved differently
 		 * before PostgreSQL 9.2.
 		 */
 		appendPQExpBufferStr(buf, fmtIdEnc(connstr.data, PG_SQL_ASCII));

 		termPQExpBuffer(&connstr);
 	}
 	else
 	{
 		appendPQExpBufferStr(buf, "\\connect ");
 		appendPQExpBufferStr(buf, fmtIdEnc(dbname, PG_SQL_ASCII));
 	}
 	appendPQExpBufferChar(buf, '\n');
 }


 /*
  * Deconstruct the text representation of a 1-dimensional Postgres array
  * into individual items.
  *
  * On success, returns true and sets *itemarray and *nitems to describe
  * an array of individual strings.  On parse failure, returns false;
  * *itemarray may exist or be NULL.
  *
  * NOTE: free'ing itemarray is sufficient to deallocate the working storage.
  */
 bool
 parsePGArray(const char *atext, char ***itemarray, int *nitems)
 {
 	int			inputlen;
 	char	  **items;
 	char	   *strings;
 	int			curitem;

 	/*
 	 * We expect input in the form of "{item,item,item}" where any item is
 	 * either raw data, or surrounded by double quotes (in which case embedded
 	 * characters including backslashes and quotes are backslashed).
 	 *
 	 * We build the result as an array of pointers followed by the actual
 	 * string data, all in one malloc block for convenience of deallocation.
 	 * The worst-case storage need is not more than one pointer and one
 	 * character for each input character (consider "{,,,,,,,,,,}").
 	 */
 	*itemarray = NULL;
 	*nitems = 0;
 	inputlen = strlen(atext);
 	if (inputlen < 2 || atext[0] != '{' || atext[inputlen - 1] != '}')
 		return false;			/* bad input */
 	items = (char **) malloc(inputlen * (sizeof(char *) + sizeof(char)));
 	if (items == NULL)
 		return false;			/* out of memory */
 	*itemarray = items;
 	strings = (char *) (items + inputlen);

 	atext++;					/* advance over initial '{' */
 	curitem = 0;
 	while (*atext != '}')
 	{
 		if (*atext == '\0')
 			return false;		/* premature end of string */
 		items[curitem] = strings;
 		while (*atext != '}' && *atext != ',')
 		{
 			if (*atext == '\0')
 				return false;	/* premature end of string */
 			if (*atext != '"')
 				*strings++ = *atext++;	/* copy unquoted data */
 			else
 			{
 				/* process quoted substring */
 				atext++;
 				while (*atext != '"')
 				{
 					if (*atext == '\0')
 						return false;	/* premature end of string */
 					if (*atext == '\\')
 					{
 						atext++;
 						if (*atext == '\0')
 							return false;	/* premature end of string */
 					}
 					*strings++ = *atext++;	/* copy quoted data */
 				}
 				atext++;
 			}
 		}
 		*strings++ = '\0';
 		if (*atext == ',')
 			atext++;
 		curitem++;
 	}
 	if (atext[1] != '\0')
 		return false;			/* bogus syntax (embedded '}') */
 	*nitems = curitem;
 	return true;
 }


 /*
  * Append one element to the text representation of a 1-dimensional Postgres
  * array.
  *
  * The caller must provide the initial '{' and closing '}' of the array.
  * This function handles all else, including insertion of commas and
  * quoting of values.
  *
  * We assume that typdelim is ','.
  */
 void
 appendPGArray(PQExpBuffer buffer, const char *value)
 {
 	bool		needquote;
 	const char *tmp;

 	if (buffer->data[buffer->len - 1] != '{')
 		appendPQExpBufferChar(buffer, ',');

 	/* Decide if we need quotes; this should match array_out()'s choices. */
 	if (value[0] == '\0')
 		needquote = true;		/* force quotes for empty string */
 	else if (pg_strcasecmp(value, "NULL") == 0)
 		needquote = true;		/* force quotes for literal NULL */
 	else
 		needquote = false;

 	if (!needquote)
 	{
 		for (tmp = value; *tmp; tmp++)
 		{
 			char		ch = *tmp;

 			if (ch == '"' || ch == '\\' ||
 				ch == '{' || ch == '}' || ch == ',' ||
 			/* these match array_isspace(): */
 				ch == ' ' || ch == '\t' || ch == '\n' ||
 				ch == '\r' || ch == '\v' || ch == '\f')
 			{
 				needquote = true;
 				break;
 			}
 		}
 	}

 	if (needquote)
 	{
 		appendPQExpBufferChar(buffer, '"');
 		for (tmp = value; *tmp; tmp++)
 		{
 			char		ch = *tmp;

 			if (ch == '"' || ch == '\\')
 				appendPQExpBufferChar(buffer, '\\');
 			appendPQExpBufferChar(buffer, ch);
 		}
 		appendPQExpBufferChar(buffer, '"');
 	}
 	else
 		appendPQExpBufferStr(buffer, value);
 }


 /*
  * Format a reloptions array and append it to the given buffer.
  *
  * "prefix" is prepended to the option names; typically it's "" or "toast.".
  *
  * Returns false if the reloptions array could not be parsed (in which case
  * nothing will have been appended to the buffer), or true on success.
  *
  * Note: this logic should generally match the backend's flatten_reloptions()
  * (in adt/ruleutils.c).
  */
 bool
 appendReloptionsArray(PQExpBuffer buffer, const char *reloptions,
 					  const char *prefix, int encoding, bool std_strings)
 {
 	char	  **options;
 	int			noptions;
 	int			i;

 	if (!parsePGArray(reloptions, &options, &noptions))
 	{
 		free(options);
 		return false;
 	}

 	for (i = 0; i < noptions; i++)
 	{
 		char	   *option = options[i];
 		char	   *name;
 		char	   *separator;
 		char	   *value;

 		/*
 		 * Each array element should have the form name=value.  If the "=" is
 		 * missing for some reason, treat it like an empty value.
 		 */
 		name = option;
 		separator = strchr(option, '=');
 		if (separator)
 		{
 			*separator = '\0';
 			value = separator + 1;
 		}
 		else
 			value = "";

 		if (i > 0)
 			appendPQExpBufferStr(buffer, ", ");
 		appendPQExpBuffer(buffer, "%s%s=", prefix, fmtId(name));

 		/*
 		 * In general we need to quote the value; but to avoid unnecessary
 		 * clutter, do not quote if it is an identifier that would not need
 		 * quoting.  (We could also allow numbers, but that is a bit trickier
 		 * than it looks --- for example, are leading zeroes significant?  We
 		 * don't want to assume very much here about what custom reloptions
 		 * might mean.)
 		 */
 		if (strcmp(fmtId(value), value) == 0)
 			appendPQExpBufferStr(buffer, value);
 		else
 			appendStringLiteral(buffer, value, encoding, std_strings);
 	}

 	free(options);

 	return true;
 }


 /*
  * processSQLNamePattern
  *
  * Scan a wildcard-pattern string and generate appropriate WHERE clauses
  * to limit the set of objects returned.  The WHERE clauses are appended
  * to the already-partially-constructed query in buf.  Returns whether
  * any clause was added.
  *
  * conn: connection query will be sent to (consulted for escaping rules).
  * buf: output parameter.
  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
  * have_where: true if caller already emitted "WHERE" (clauses will be ANDed
  * onto the existing WHERE clause).
  * force_escape: always quote regexp special characters, even outside
  * double quotes (else they are quoted only between double quotes).
  * schemavar: name of query variable to match against a schema-name pattern.
  * Can be NULL if no schema.
  * namevar: name of query variable to match against an object-name pattern.
  * altnamevar: NULL, or name of an alternative variable to match against name.
  * visibilityrule: clause to use if we want to restrict to visible objects
  * (for example, "pg_catalog.pg_table_is_visible(p.oid)").  Can be NULL.
  * dbnamebuf: output parameter receiving the database name portion of the
  * pattern, if any.  Can be NULL.
  * dotcnt: how many separators were parsed from the pattern, by reference.
  *
  * Formatting note: the text already present in buf should end with a newline.
  * The appended text, if any, will end with one too.
  */
 bool
 processSQLNamePattern(PGconn *conn, PQExpBuffer buf, const char *pattern,
 					  bool have_where, bool force_escape,
 					  const char *schemavar, const char *namevar,
 					  const char *altnamevar, const char *visibilityrule,
 					  PQExpBuffer dbnamebuf, int *dotcnt)
 {
 	PQExpBufferData schemabuf;
 	PQExpBufferData namebuf;
 	bool		added_clause = false;
 	int			dcnt;

 #define WHEREAND() \
 	(appendPQExpBufferStr(buf, have_where ? "  AND " : "WHERE "), \
 	 have_where = true, added_clause = true)

 	if (dotcnt == NULL)
 		dotcnt = &dcnt;
 	*dotcnt = 0;
 	if (pattern == NULL)
 	{
 		/* Default: select all visible objects */
 		if (visibilityrule)
 		{
 			WHEREAND();
 			appendPQExpBuffer(buf, "%s\n", visibilityrule);
 		}
 		return added_clause;
 	}

 	initPQExpBuffer(&schemabuf);
 	initPQExpBuffer(&namebuf);

 	/*
 	 * Convert shell-style 'pattern' into the regular expression(s) we want to
 	 * execute.  Quoting/escaping into SQL literal format will be done below
 	 * using appendStringLiteralConn().
 	 *
 	 * If the caller provided a schemavar, we want to split the pattern on
 	 * ".", otherwise not.
 	 */
 	patternToSQLRegex(PQclientEncoding(conn),
 					  (schemavar ? dbnamebuf : NULL),
 					  (schemavar ? &schemabuf : NULL),
 					  &namebuf,
 					  pattern, force_escape, true, dotcnt);

 	/*
 	 * Now decide what we need to emit.  We may run under a hostile
 	 * search_path, so qualify EVERY name.  Note there will be a leading "^("
 	 * in the patterns in any case.
 	 *
 	 * We want the regex matches to use the database's default collation where
 	 * collation-sensitive behavior is required (for example, which characters
 	 * match '\w').  That happened by default before PG v12, but if the server
 	 * is >= v12 then we need to force it through explicit COLLATE clauses,
 	 * otherwise the "C" collation attached to "name" catalog columns wins.
 	 */
 	if (namevar && namebuf.len > 2)
 	{
 		/* We have a name pattern, so constrain the namevar(s) */

 		/* Optimize away a "*" pattern */
 		if (strcmp(namebuf.data, "^(.*)$") != 0)
 		{
 			WHEREAND();
 			if (altnamevar)
 			{
 				appendPQExpBuffer(buf,
 								  "(%s OPERATOR(pg_catalog.~) ", namevar);
 				appendStringLiteralConn(buf, namebuf.data, conn);
 				if (PQserverVersion(conn) >= 120000)
 					appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
 				appendPQExpBuffer(buf,
 								  "\n        OR %s OPERATOR(pg_catalog.~) ",
 								  altnamevar);
 				appendStringLiteralConn(buf, namebuf.data, conn);
 				if (PQserverVersion(conn) >= 120000)
 					appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
 				appendPQExpBufferStr(buf, ")\n");
 			}
 			else
 			{
 				appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", namevar);
 				appendStringLiteralConn(buf, namebuf.data, conn);
 				if (PQserverVersion(conn) >= 120000)
 					appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
 				appendPQExpBufferChar(buf, '\n');
 			}
 		}
 	}

 	if (schemavar && schemabuf.len > 2)
 	{
 		/* We have a schema pattern, so constrain the schemavar */

 		/* Optimize away a "*" pattern */
 		if (strcmp(schemabuf.data, "^(.*)$") != 0 && schemavar)
 		{
 			WHEREAND();
 			appendPQExpBuffer(buf, "%s OPERATOR(pg_catalog.~) ", schemavar);
 			appendStringLiteralConn(buf, schemabuf.data, conn);
 			if (PQserverVersion(conn) >= 120000)
 				appendPQExpBufferStr(buf, " COLLATE pg_catalog.default");
 			appendPQExpBufferChar(buf, '\n');
 		}
 	}
 	else
 	{
 		/* No schema pattern given, so select only visible objects */
 		if (visibilityrule)
 		{
 			WHEREAND();
 			appendPQExpBuffer(buf, "%s\n", visibilityrule);
 		}
 	}

 	termPQExpBuffer(&schemabuf);
 	termPQExpBuffer(&namebuf);

 	return added_clause;
 #undef WHEREAND
 }

 /*
  * Transform a possibly qualified shell-style object name pattern into up to
  * three SQL-style regular expressions, converting quotes, lower-casing
  * unquoted letters, and adjusting shell-style wildcard characters into regexp
  * notation.
  *
  * If the dbnamebuf and schemabuf arguments are non-NULL, and the pattern
  * contains two or more dbname/schema/name separators, we parse the portions of
  * the pattern prior to the first and second separators into dbnamebuf and
  * schemabuf, and the rest into namebuf.
  *
  * If dbnamebuf is NULL and schemabuf is non-NULL, and the pattern contains at
  * least one separator, we parse the first portion into schemabuf and the rest
  * into namebuf.
  *
  * Otherwise, we parse all the pattern into namebuf.
  *
  * If the pattern contains more dotted parts than buffers to parse into, the
  * extra dots will be treated as literal characters and written into the
  * namebuf, though they will be counted.  Callers should always check the value
  * returned by reference in dotcnt and handle this error case appropriately.
  *
  * We surround the regexps with "^(...)$" to force them to match whole strings,
  * as per SQL practice.  We have to have parens in case strings contain "|",
  * else the "^" and "$" will be bound into the first and last alternatives
  * which is not what we want.  Whether this is done for dbnamebuf is controlled
  * by the want_literal_dbname parameter.
  *
  * The regexps we parse into the buffers are appended to the data (if any)
  * already present.  If we parse fewer fields than the number of buffers we
  * were given, the extra buffers are unaltered.
  *
  * encoding: the character encoding for the given pattern
  * dbnamebuf: output parameter receiving the database name portion of the
  * pattern, if any.  Can be NULL.
  * schemabuf: output parameter receiving the schema name portion of the
  * pattern, if any.  Can be NULL.
  * namebuf: output parameter receiving the database name portion of the
  * pattern, if any.  Can be NULL.
  * pattern: user-specified pattern option, or NULL if none ("*" is implied).
  * force_escape: always quote regexp special characters, even outside
  * double quotes (else they are quoted only between double quotes).
  * want_literal_dbname: if true, regexp special characters within the database
  * name portion of the pattern will not be escaped, nor will the dbname be
  * converted into a regular expression.
  * dotcnt: output parameter receiving the number of separators parsed from the
  * pattern.
  */
 void
 patternToSQLRegex(int encoding, PQExpBuffer dbnamebuf, PQExpBuffer schemabuf,
 				  PQExpBuffer namebuf, const char *pattern, bool force_escape,
 				  bool want_literal_dbname, int *dotcnt)
 {
 	PQExpBufferData buf[3];
 	PQExpBufferData left_literal;
 	PQExpBuffer curbuf;
 	PQExpBuffer maxbuf;
 	int			i;
 	bool		inquotes;
 	bool		left;
 	const char *cp;

 	Assert(pattern != NULL);
 	Assert(namebuf != NULL);

 	/* callers should never expect "dbname.relname" format */
 	Assert(dbnamebuf == NULL || schemabuf != NULL);
 	Assert(dotcnt != NULL);

 	*dotcnt = 0;
 	inquotes = false;
 	cp = pattern;

 	if (dbnamebuf != NULL)
 		maxbuf = &buf[2];
 	else if (schemabuf != NULL)
 		maxbuf = &buf[1];
 	else
 		maxbuf = &buf[0];

 	curbuf = &buf[0];
 	if (want_literal_dbname)
 	{
 		left = true;
 		initPQExpBuffer(&left_literal);
 	}
 	else
 		left = false;
 	initPQExpBuffer(curbuf);
 	appendPQExpBufferStr(curbuf, "^(");
 	while (*cp)
 	{
 		char		ch = *cp;

 		if (ch == '"')
 		{
 			if (inquotes && cp[1] == '"')
 			{
 				/* emit one quote, stay in inquotes mode */
 				appendPQExpBufferChar(curbuf, '"');
 				if (left)
 					appendPQExpBufferChar(&left_literal, '"');
 				cp++;
 			}
 			else
 				inquotes = !inquotes;
 			cp++;
 		}
 		else if (!inquotes && isupper((unsigned char) ch))
 		{
 			appendPQExpBufferChar(curbuf,
 								  pg_tolower((unsigned char) ch));
 			if (left)
 				appendPQExpBufferChar(&left_literal,
 									  pg_tolower((unsigned char) ch));
 			cp++;
 		}
 		else if (!inquotes && ch == '*')
 		{
 			appendPQExpBufferStr(curbuf, ".*");
 			if (left)
 				appendPQExpBufferChar(&left_literal, '*');
 			cp++;
 		}
 		else if (!inquotes && ch == '?')
 		{
 			appendPQExpBufferChar(curbuf, '.');
 			if (left)
 				appendPQExpBufferChar(&left_literal, '?');
 			cp++;
 		}
 		else if (!inquotes && ch == '.')
 		{
 			left = false;
 			if (dotcnt)
 				(*dotcnt)++;
 			if (curbuf < maxbuf)
 			{
 				appendPQExpBufferStr(curbuf, ")$");
 				curbuf++;
 				initPQExpBuffer(curbuf);
 				appendPQExpBufferStr(curbuf, "^(");
 				cp++;
 			}
 			else
 				appendPQExpBufferChar(curbuf, *cp++);
 		}
 		else if (ch == '$')
 		{
 			/*
 			 * Dollar is always quoted, whether inside quotes or not. The
 			 * reason is that it's allowed in SQL identifiers, so there's a
 			 * significant use-case for treating it literally, while because
 			 * we anchor the pattern automatically there is no use-case for
 			 * having it possess its regexp meaning.
 			 */
 			appendPQExpBufferStr(curbuf, "\\$");
 			if (left)
 				appendPQExpBufferChar(&left_literal, '$');
 			cp++;
 		}
 		else
 		{
 			/*
 			 * Ordinary data character, transfer to pattern
 			 *
 			 * Inside double quotes, or at all times if force_escape is true,
 			 * quote regexp special characters with a backslash to avoid
 			 * regexp errors.  Outside quotes, however, let them pass through
 			 * as-is; this lets knowledgeable users build regexp expressions
 			 * that are more powerful than shell-style patterns.
 			 *
 			 * As an exception to that, though, always quote "[]", as that's
 			 * much more likely to be an attempt to write an array type name
 			 * than it is to be the start of a regexp bracket expression.
 			 */
 			if ((inquotes || force_escape) &&
 				strchr("|*+?()[]{}.^$\\", ch))
 				appendPQExpBufferChar(curbuf, '\\');
 			else if (ch == '[' && cp[1] == ']')
 				appendPQExpBufferChar(curbuf, '\\');
 			i = PQmblenBounded(cp, encoding);
 			while (i--)
 			{
 				if (left)
 					appendPQExpBufferChar(&left_literal, *cp);
 				appendPQExpBufferChar(curbuf, *cp++);
 			}
 		}
 	}
 	appendPQExpBufferStr(curbuf, ")$");

 	if (namebuf)
 	{
 		appendPQExpBufferStr(namebuf, curbuf->data);
 		termPQExpBuffer(curbuf);
 		curbuf--;
 	}

 	if (schemabuf && curbuf >= buf)
 	{
 		appendPQExpBufferStr(schemabuf, curbuf->data);
 		termPQExpBuffer(curbuf);
 		curbuf--;
 	}

 	if (dbnamebuf && curbuf >= buf)
 	{
 		if (want_literal_dbname)
 			appendPQExpBufferStr(dbnamebuf, left_literal.data);
 		else
 			appendPQExpBufferStr(dbnamebuf, curbuf->data);
 		termPQExpBuffer(curbuf);
 	}

 	if (want_literal_dbname)
 		termPQExpBuffer(&left_literal);
 }