src/bin/psql/psqlscan.l - hawq - Git at Google

 %{
 /*-------------------------------------------------------------------------
  *
  * psqlscan.l
  *	  lexical scanner for psql
  *
  * This code is mainly needed to determine where the end of a SQL statement
  * is: we are looking for semicolons that are not within quotes, comments,
  * or parentheses.  The most reliable way to handle this is to borrow the
  * backend's flex lexer rules, lock, stock, and barrel.  The rules below
  * are (except for a few) the same as the backend's, but their actions are
  * just ECHO whereas the backend's actions generally do other things.
  *
  * XXX The rules in this file must be kept in sync with the backend lexer!!!
  *
  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
  *
  * The most difficult aspect of this code is that we need to work in multibyte
  * encodings that are not ASCII-safe.  A "safe" encoding is one in which each
  * byte of a multibyte character has the high bit set (it's >= 0x80).  Since
  * all our lexing rules treat all high-bit-set characters alike, we don't
  * really need to care whether such a byte is part of a sequence or not.
  * In an "unsafe" encoding, we still expect the first byte of a multibyte
  * sequence to be >= 0x80, but later bytes might not be.  If we scan such
  * a sequence as-is, the lexing rules could easily be fooled into matching
  * such bytes to ordinary ASCII characters.  Our solution for this is to
  * substitute 0xFF for each non-first byte within the data presented to flex.
  * The flex rules will then pass the FF's through unmolested.  The emit()
  * subroutine is responsible for looking back to the original string and
  * replacing FF's with the corresponding original bytes.
  *
  * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  src/bin/psql/psqlscan.l
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres_fe.h"

 #include "psqlscan.h"

 #include <ctype.h>

 #include "common.h"
 #include "settings.h"
 #include "variables.h"

 #define unify_version(a,b,c) ((a<<16)+(b<<8)+c)
 #if unify_version(YY_FLEX_MAJOR_VERSION,YY_FLEX_MINOR_VERSION,YY_FLEX_SUBMINOR_VERSION) < unify_version(2,5,35)
 int yyget_lineno  (void);
 FILE *yyget_in  (void);
 FILE *yyget_out  (void);
 int yyget_leng  (void);
 char *yyget_text  (void);
 void yyset_lineno (int  line_number );
 void yyset_in (FILE *  in_str );
 void yyset_out (FILE *  out_str );
 int yyget_debug  (void);
 void yyset_debug (int  bdebug );
 int yylex_destroy  (void);
 #endif


 /*
  * We use a stack of flex buffers to handle substitution of psql variables.
  * Each stacked buffer contains the as-yet-unread text from one psql variable.
  * When we pop the stack all the way, we resume reading from the outer buffer
  * identified by scanbufhandle.
  */
 typedef struct StackElem
 {
 	YY_BUFFER_STATE buf;		/* flex input control structure */
 	char	   *bufstring;		/* data actually being scanned by flex */
 	char	   *origstring;		/* copy of original data, if needed */
 	char	   *varname;		/* name of variable providing data, or NULL */
 	struct StackElem *next;
 } StackElem;

 /*
  * All working state of the lexer must be stored in PsqlScanStateData
  * between calls.  This allows us to have multiple open lexer operations,
  * which is needed for nested include files.  The lexer itself is not
  * recursive, but it must be re-entrant.
  */
 typedef struct PsqlScanStateData
 {
 	StackElem  *buffer_stack;	/* stack of variable expansion buffers */
 	/*
 	 * These variables always refer to the outer buffer, never to any
 	 * stacked variable-expansion buffer.
 	 */
 	YY_BUFFER_STATE scanbufhandle;
 	char	   *scanbuf;		/* start of outer-level input buffer */
 	const char *scanline;		/* current input line at outer level */

 	/* safe_encoding, curline, refline are used by emit() to replace FFs */
 	int			encoding;		/* encoding being used now */
 	bool		safe_encoding;	/* is current encoding "safe"? */
 	const char *curline;		/* actual flex input string for cur buf */
 	const char *refline;		/* original data for cur buffer */

 	/*
 	 * All this state lives across successive input lines, until explicitly
 	 * reset by psql_scan_reset.
 	 */
 	int			start_state;	/* saved YY_START */
 	int			paren_depth;	/* depth of nesting in parentheses */
 	int			xcdepth;		/* depth of nesting in slash-star comments */
 	char	   *dolqstart;		/* current $foo$ quote start string */
 } PsqlScanStateData;

 static PsqlScanState cur_state;	/* current state while active */

 static PQExpBuffer output_buf;	/* current output buffer */

 /* these variables do not need to be saved across calls */
 static enum slash_option_type option_type;
 static char *option_quote;


 /* Return values from yylex() */
 #define LEXRES_EOL			0	/* end of input */
 #define LEXRES_SEMI			1	/* command-terminating semicolon found */
 #define LEXRES_BACKSLASH	2	/* backslash command start */
 #define LEXRES_OK			3	/* OK completion of backslash argument */


 int	yylex(void);

 static void push_new_buffer(const char *newstr, const char *varname);
 static void pop_buffer_stack(PsqlScanState state);
 static bool var_is_current_source(PsqlScanState state, const char *varname);
 static YY_BUFFER_STATE prepare_buffer(const char *txt, int len,
 									  char **txtcopy);
 static void emit(const char *txt, int len);
 static void escape_variable(bool as_ident);

 #ifdef ECHO
 #undef ECHO
 #endif
 #define ECHO emit(yytext, yyleng)

 %}

 %option 8bit
 %option never-interactive
 %option nodefault
 %option noinput
 %option nounput
 %option noyywrap

 /*
  * All of the following definitions and rules should exactly match
  * src/backend/parser/scan.l so far as the flex patterns are concerned.
  * The rule bodies are just ECHO as opposed to what the backend does,
  * however.  (But be sure to duplicate code that affects the lexing process,
  * such as BEGIN().)  Also, psqlscan uses a single <<EOF>> rule whereas
  * scan.l has a separate one for each exclusive state.
  */

 /*
  * OK, here is a short description of lex/flex rules behavior.
  * The longest pattern which matches an input string is always chosen.
  * For equal-length patterns, the first occurring in the rules list is chosen.
  * INITIAL is the starting state, to which all non-conditional rules apply.
  * Exclusive states change parsing rules while the state is active.  When in
  * an exclusive state, only those rules defined for that state apply.
  *
  * We use exclusive states for quoted strings, extended comments,
  * and to eliminate parsing troubles for numeric strings.
  * Exclusive states:
  *  <xb> bit string literal
  *  <xc> extended C-style comments
  *  <xd> delimited identifiers (double-quoted identifiers)
  *  <xh> hexadecimal numeric string
  *  <xq> standard quoted strings
  *  <xe> extended quoted strings (support backslash escape sequences)
  *  <xdolq> $foo$ quoted strings
  *  <xui> quoted identifier with Unicode escapes
  *  <xus> quoted string with Unicode escapes
  *
  * Note: we intentionally don't mimic the backend's <xeu> state; we have
  * no need to distinguish it from <xe> state, and no good way to get out
  * of it in error cases.  The backend just throws yyerror() in those
  * cases, but that's not an option here.
  */

 %x xb
 %x xc
 %x xd
 %x xh
 %x xe
 %x xq
 %x xdolq
 %x xui
 %x xus
 /* Additional exclusive states for psql only: lex backslash commands */
 %x xslashcmd
 %x xslasharg
 %x xslashquote
 %x xslashbackquote
 %x xslashdefaultarg
 %x xslashquotedarg
 %x xslashwholeline
 %x xslashend

 /*
  * In order to make the world safe for Windows and Mac clients as well as
  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
  * sequence will be seen as two successive newlines, but that doesn't cause
  * any problems.  Comments that start with -- and extend to the next
  * newline are treated as equivalent to a single whitespace character.
  *
  * NOTE a fine point: if there is no newline following --, we will absorb
  * everything to the end of the input as a comment.  This is correct.  Older
  * versions of Postgres failed to recognize -- as a comment if the input
  * did not end with a newline.
  *
  * XXX perhaps \f (formfeed) should be treated as a newline as well?
  *
  * XXX if you change the set of whitespace characters, fix scanner_isspace()
  * to agree, and see also the plpgsql lexer.
  */

 space			[ \t\n\r\f]
 horiz_space		[ \t\f]
 newline			[\n\r]
 non_newline		[^\n\r]

 comment			("--"{non_newline}*)

 whitespace		({space}+|{comment})

 /*
  * SQL requires at least one newline in the whitespace separating
  * string literals that are to be concatenated.  Silly, but who are we
  * to argue?  Note that {whitespace_with_newline} should not have * after
  * it, whereas {whitespace} should generally have a * after it...
  */

 special_whitespace		({space}+|{comment}{newline})
 horiz_whitespace		({horiz_space}|{comment})
 whitespace_with_newline	({horiz_whitespace}*{newline}{special_whitespace}*)

 /*
  * To ensure that {quotecontinue} can be scanned without having to back up
  * if the full pattern isn't matched, we include trailing whitespace in
  * {quotestop}.  This matches all cases where {quotecontinue} fails to match,
  * except for {quote} followed by whitespace and just one "-" (not two,
  * which would start a {comment}).  To cover that we have {quotefail}.
  * The actions for {quotestop} and {quotefail} must throw back characters
  * beyond the quote proper.
  */
 quote			'
 quotestop		{quote}{whitespace}*
 quotecontinue	{quote}{whitespace_with_newline}{quote}
 quotefail		{quote}{whitespace}*"-"

 /* Bit string
  * It is tempting to scan the string for only those characters
  * which are allowed. However, this leads to silently swallowed
  * characters if illegal characters are included in the string.
  * For example, if xbinside is [01] then B'ABCD' is interpreted
  * as a zero-length string, and the ABCD' is lost!
  * Better to pass the string forward and let the input routines
  * validate the contents.
  */
 xbstart			[bB]{quote}
 xbinside		[^']*

 /* Hexadecimal number */
 xhstart			[xX]{quote}
 xhinside		[^']*

 /* National character */
 xnstart			[nN]{quote}

 /* Quoted string that allows backslash escapes */
 xestart			[eE]{quote}
 xeinside		[^\\']+
 xeescape		[\\][^0-7]
 xeoctesc		[\\][0-7]{1,3}
 xehexesc		[\\]x[0-9A-Fa-f]{1,2}
 xeunicode		[\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
 xeunicodefail	[\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})

 /* Extended quote
  * xqdouble implements embedded quote, ''''
  */
 xqstart			{quote}
 xqdouble		{quote}{quote}
 xqinside		[^']+

 /* $foo$ style quotes ("dollar quoting")
  * The quoted string starts with $foo$ where "foo" is an optional string
  * in the form of an identifier, except that it may not contain "$",
  * and extends to the first occurrence of an identical string.
  * There is *no* processing of the quoted text.
  *
  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
  * fails to match its trailing "$".
  */
 dolq_start		[A-Za-z\200-\377_]
 dolq_cont		[A-Za-z\200-\377_0-9]
 dolqdelim		\$({dolq_start}{dolq_cont}*)?\$
 dolqfailed		\${dolq_start}{dolq_cont}*
 dolqinside		[^$]+

 /* Double quote
  * Allows embedded spaces and other special characters into identifiers.
  */
 dquote			\"
 xdstart			{dquote}
 xdstop			{dquote}
 xddouble		{dquote}{dquote}
 xdinside		[^"]+

 /* Unicode escapes */
 uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
 /* error rule to avoid backup */
 uescapefail		("-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU])

 /* Quoted identifier with Unicode escapes */
 xuistart		[uU]&{dquote}
 xuistop1		{dquote}{whitespace}*{uescapefail}?
 xuistop2		{dquote}{whitespace}*{uescape}

 /* Quoted string with Unicode escapes */
 xusstart		[uU]&{quote}
 xusstop1		{quote}{whitespace}*{uescapefail}?
 xusstop2		{quote}{whitespace}*{uescape}

 /* error rule to avoid backup */
 xufailed		[uU]&


 /* C-style comments
  *
  * The "extended comment" syntax closely resembles allowable operator syntax.
  * The tricky part here is to get lex to recognize a string starting with
  * slash-star as a comment, when interpreting it as an operator would produce
  * a longer match --- remember lex will prefer a longer match!  Also, if we
  * have something like plus-slash-star, lex will think this is a 3-character
  * operator whereas we want to see it as a + operator and a comment start.
  * The solution is two-fold:
  * 1. append {op_chars}* to xcstart so that it matches as much text as
  *    {operator} would. Then the tie-breaker (first matching rule of same
  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
  *    in case it contains a star-slash that should terminate the comment.
  * 2. In the operator rule, check for slash-star within the operator, and
  *    if found throw it back with yyless().  This handles the plus-slash-star
  *    problem.
  * Dash-dash comments have similar interactions with the operator rule.
  */
 xcstart			\/\*{op_chars}*
 xcstop			\*+\/
 xcinside		[^*/]+

 digit			[0-9]
 ident_start		[A-Za-z\200-\377_]
 ident_cont		[A-Za-z\200-\377_0-9\$]

 identifier		{ident_start}{ident_cont}*

 typecast		"::"
 dot_dot			\.\.
 colon_equals	":="

 /*
  * "self" is the set of chars that should be returned as single-character
  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
  * which can be one or more characters long (but if a single-char token
  * appears in the "self" set, it is not to be returned as an Op).  Note
  * that the sets overlap, but each has some chars that are not in the other.
  *
  * If you change either set, adjust the character lists appearing in the
  * rule for "operator"!
  */
 self			[,()\[\].;\:\+\-\*\/\%\^\<\>\=]
 op_chars		[\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
 operator		{op_chars}+

 /* we no longer allow unary minus in numbers.
  * instead we pass it separately to parser. there it gets
  * coerced via doNegate() -- Leon aug 20 1999
  *
  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
  *
  * {realfail1} and {realfail2} are added to prevent the need for scanner
  * backup when the {real} rule fails to match completely.
  */

 integer			{digit}+
 decimal			(({digit}*\.{digit}+)|({digit}+\.{digit}*))
 decimalfail		{digit}+\.\.
 real			({integer}|{decimal})[Ee][-+]?{digit}+
 realfail1		({integer}|{decimal})[Ee]
 realfail2		({integer}|{decimal})[Ee][-+]

 param			\${integer}

 other			.

 /*
  * Dollar quoted strings are totally opaque, and no escaping is done on them.
  * Other quoted strings must allow some special characters such as single-quote
  *  and newline.
  * Embedded single-quotes are implemented both in the SQL standard
  *  style of two adjacent single quotes "''" and in the Postgres/Java style
  *  of escaped-quote "\'".
  * Other embedded escaped characters are matched explicitly and the leading
  *  backslash is dropped from the string.
  * Note that xcstart must appear before operator, as explained above!
  *  Also whitespace (comment) must appear before operator.
  */

 %%

 {whitespace}	{
 					/*
 					 * Note that the whitespace rule includes both true
 					 * whitespace and single-line ("--" style) comments.
 					 * We suppress whitespace at the start of the query
 					 * buffer.  We also suppress all single-line comments,
 					 * which is pretty dubious but is the historical
 					 * behavior.
 					 */
 					if (!(output_buf->len == 0 || yytext[0] == '-'))
 						ECHO;
 				}

 {xcstart}		{
 					cur_state->xcdepth = 0;
 					BEGIN(xc);
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 					ECHO;
 				}

 <xc>{xcstart}	{
 					cur_state->xcdepth++;
 					/* Put back any characters past slash-star; see above */
 					yyless(2);
 					ECHO;
 				}

 <xc>{xcstop}	{
 					if (cur_state->xcdepth <= 0)
 					{
 						BEGIN(INITIAL);
 					}
 					else
 						cur_state->xcdepth--;
 					ECHO;
 				}

 <xc>{xcinside}	{
 					ECHO;
 				}

 <xc>{op_chars}	{
 					ECHO;
 				}

 <xc>\*+			{
 					ECHO;
 				}

 {xbstart}		{
 					BEGIN(xb);
 					ECHO;
 				}
 <xb>{quotestop}	|
 <xb>{quotefail} {
 					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xh>{xhinside}	|
 <xb>{xbinside}	{
 					ECHO;
 				}
 <xh>{quotecontinue}	|
 <xb>{quotecontinue}	{
 					ECHO;
 				}

 {xhstart}		{
 					/* Hexadecimal bit type.
 					 * At some point we should simply pass the string
 					 * forward to the parser and label it there.
 					 * In the meantime, place a leading "x" on the string
 					 * to mark it for the input routine as a hex string.
 					 */
 					BEGIN(xh);
 					ECHO;
 				}
 <xh>{quotestop}	|
 <xh>{quotefail} {
 					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}

 {xnstart}		{
 					yyless(1);				/* eat only 'n' this time */
 					ECHO;
 				}

 {xqstart}		{
 					if (standard_strings())
 						BEGIN(xq);
 					else
 						BEGIN(xe);
 					ECHO;
 				}
 {xestart}		{
 					BEGIN(xe);
 					ECHO;
 				}
 {xusstart}		{
 					BEGIN(xus);
 					ECHO;
 				}
 <xq,xe>{quotestop}	|
 <xq,xe>{quotefail} {
 					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xus>{xusstop1} {
 					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xus>{xusstop2} {
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xq,xe,xus>{xqdouble} {
 					ECHO;
 				}
 <xq,xus>{xqinside}  {
 					ECHO;
 				}
 <xe>{xeinside}  {
 					ECHO;
 				}
 <xe>{xeunicode} {
 					ECHO;
 				}
 <xe>{xeunicodefail}	{
 					ECHO;
 				}
 <xe>{xeescape}  {
 					ECHO;
 				}
 <xe>{xeoctesc}  {
 					ECHO;
 				}
 <xe>{xehexesc}  {
 					ECHO;
 				}
 <xq,xe,xus>{quotecontinue} {
 					ECHO;
 				}
 <xe>.			{
 					/* This is only needed for \ just before EOF */
 					ECHO;
 				}

 {dolqdelim}		{
 					cur_state->dolqstart = pg_strdup(yytext);
 					BEGIN(xdolq);
 					ECHO;
 				}
 {dolqfailed}	{
 					/* throw back all but the initial "$" */
 					yyless(1);
 					ECHO;
 				}
 <xdolq>{dolqdelim} {
 					if (strcmp(yytext, cur_state->dolqstart) == 0)
 					{
 						free(cur_state->dolqstart);
 						cur_state->dolqstart = NULL;
 						BEGIN(INITIAL);
 					}
 					else
 					{
 						/*
 						 * When we fail to match $...$ to dolqstart, transfer
 						 * the $... part to the output, but put back the final
 						 * $ for rescanning.  Consider $delim$...$junk$delim$
 						 */
 						yyless(yyleng-1);
 					}
 					ECHO;
 				}
 <xdolq>{dolqinside} {
 					ECHO;
 				}
 <xdolq>{dolqfailed} {
 					ECHO;
 				}
 <xdolq>.		{
 					/* This is only needed for $ inside the quoted text */
 					ECHO;
 				}

 {xdstart}		{
 					BEGIN(xd);
 					ECHO;
 				}
 {xuistart}		{
 					BEGIN(xui);
 					ECHO;
 				}
 <xd>{xdstop}	{
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xui>{xuistop1}	{
 					yyless(1);
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xui>{xuistop2}	{
 					BEGIN(INITIAL);
 					ECHO;
 				}
 <xd,xui>{xddouble}	{
 					ECHO;
 				}
 <xd,xui>{xdinside}	{
 					ECHO;
 				}

 {xufailed}	{
 					/* throw back all but the initial u/U */
 					yyless(1);
 					ECHO;
 				}

 {typecast}		{
 					ECHO;
 				}

 {dot_dot}		{
 					ECHO;
 				}

 {colon_equals}	{
 					ECHO;
 				}

 	/*
 	 * These rules are specific to psql --- they implement parenthesis
 	 * counting and detection of command-ending semicolon.  These must
 	 * appear before the {self} rule so that they take precedence over it.
 	 */

 "("				{
 					cur_state->paren_depth++;
 					ECHO;
 				}

 ")"				{
 					if (cur_state->paren_depth > 0)
 						cur_state->paren_depth--;
 					ECHO;
 				}

 ";"				{
 					ECHO;
 					if (cur_state->paren_depth == 0)
 					{
 						/* Terminate lexing temporarily */
 						return LEXRES_SEMI;
 					}
 				}

 	/*
 	 * psql-specific rules to handle backslash commands and variable
 	 * substitution.  We want these before {self}, also.
 	 */

 "\\"[;:]		{
 					/* Force a semicolon or colon into the query buffer */
 					emit(yytext + 1, 1);
 				}

 "\\"			{
 					/* Terminate lexing temporarily */
 					return LEXRES_BACKSLASH;
 				}

 :[A-Za-z0-9_]+	{
 					/* Possible psql variable substitution */
 					const char *varname = yytext + 1;
 					const char *value;

 					value = GetVariable(pset.vars, varname);

 					if (value)
 					{
 						/* It is a variable, check for recursion */
 						if (var_is_current_source(cur_state, varname))
 						{
 							/* Recursive expansion --- don't go there */
 							psql_error("skipping recursive expansion of variable \"%s\"\n",
 									   varname);
 							/* Instead copy the string as is */
 							ECHO;
 						}
 						else
 						{
 							/* OK, perform substitution */
 							push_new_buffer(value, varname);
 							/* yy_scan_string already made buffer active */
 						}
 					}
 					else
 					{
 						/*
 						 * if the variable doesn't exist we'll copy the
 						 * string as is
 						 */
 						ECHO;
 					}
 				}

 :'[A-Za-z0-9_]+'	{
 					escape_variable(false);
 				}

 :\"[A-Za-z0-9_]+\"	{
 					escape_variable(true);
 				}

 	/*
 	 * Back to backend-compatible rules.
 	 */

 {self}			{
 					ECHO;
 				}

 {operator}		{
 					/*
 					 * Check for embedded slash-star or dash-dash; those
 					 * are comment starts, so operator must stop there.
 					 * Note that slash-star or dash-dash at the first
 					 * character will match a prior rule, not this one.
 					 */
 					int		nchars = yyleng;
 					char   *slashstar = strstr(yytext, "/*");
 					char   *dashdash = strstr(yytext, "--");

 					if (slashstar && dashdash)
 					{
 						/* if both appear, take the first one */
 						if (slashstar > dashdash)
 							slashstar = dashdash;
 					}
 					else if (!slashstar)
 						slashstar = dashdash;
 					if (slashstar)
 						nchars = slashstar - yytext;

 					/*
 					 * For SQL compatibility, '+' and '-' cannot be the
 					 * last char of a multi-char operator unless the operator
 					 * contains chars that are not in SQL operators.
 					 * The idea is to lex '=-' as two operators, but not
 					 * to forbid operator names like '?-' that could not be
 					 * sequences of SQL operators.
 					 */
 					while (nchars > 1 &&
 						   (yytext[nchars-1] == '+' ||
 							yytext[nchars-1] == '-'))
 					{
 						int		ic;

 						for (ic = nchars-2; ic >= 0; ic--)
 						{
 							if (strchr("~!@#^&|`?%", yytext[ic]))
 								break;
 						}
 						if (ic >= 0)
 							break; /* found a char that makes it OK */
 						nchars--; /* else remove the +/-, and check again */
 					}

 					if (nchars < yyleng)
 					{
 						/* Strip the unwanted chars from the token */
 						yyless(nchars);
 					}
 					ECHO;
 				}

 {param}			{
 					ECHO;
 				}

 {integer}		{
 					ECHO;
 				}
 {decimal}		{
 					ECHO;
 				}
 {decimalfail}	{
 					/* throw back the .., and treat as integer */
 					yyless(yyleng-2);
 					ECHO;
 				}
 {real}			{
 					ECHO;
 				}
 {realfail1}		{
 					/*
 					 * throw back the [Ee], and treat as {decimal}.  Note
 					 * that it is possible the input is actually {integer},
 					 * but since this case will almost certainly lead to a
 					 * syntax error anyway, we don't bother to distinguish.
 					 */
 					yyless(yyleng-1);
 					ECHO;
 				}
 {realfail2}		{
 					/* throw back the [Ee][+-], and proceed as above */
 					yyless(yyleng-2);
 					ECHO;
 				}


 {identifier}	{
 					ECHO;
 				}

 {other}			{
 					ECHO;
 				}


 	/*
 	 * Everything from here down is psql-specific.
 	 */

 <<EOF>>			{
 					StackElem  *stackelem = cur_state->buffer_stack;

 					if (stackelem == NULL)
 						return LEXRES_EOL; /* end of input reached */

 					/*
 					 * We were expanding a variable, so pop the inclusion
 					 * stack and keep lexing
 					 */
 					pop_buffer_stack(cur_state);

 					stackelem = cur_state->buffer_stack;
 					if (stackelem != NULL)
 					{
 						yy_switch_to_buffer(stackelem->buf);
 						cur_state->curline = stackelem->bufstring;
 						cur_state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
 					}
 					else
 					{
 						yy_switch_to_buffer(cur_state->scanbufhandle);
 						cur_state->curline = cur_state->scanbuf;
 						cur_state->refline = cur_state->scanline;
 					}
 				}

 	/*
 	 * Exclusive lexer states to handle backslash command lexing
 	 */

 <xslashcmd>{
 	/* command name ends at whitespace or backslash; eat all else */

 {space}|"\\"	{
 					yyless(0);
 					return LEXRES_OK;
 				}

 	/* GPDB: This rule removes the need for a space after the "!" command */
 "!"             {
 					ECHO;
 					return LEXRES_OK;
 				}

 {other}			{ ECHO; }

 }

 <xslasharg>{
 	/* eat any whitespace, then decide what to do at first nonblank */

 {space}+		{ }

 "\\"			{
 					/*
 					 * backslash is end of command or next command, do not eat
 					 *
 					 * XXX this means we can't conveniently accept options
 					 * that start with a backslash; therefore, option
 					 * processing that encourages use of backslashes is rather
 					 * broken.
 					 */
 					yyless(0);
 					return LEXRES_OK;
 				}

 {quote}			{
 					*option_quote = '\'';
 					BEGIN(xslashquote);
 				}

 "`"				{
 					if (option_type == OT_VERBATIM)
 					{
 						/* in verbatim mode, backquote is not special */
 						ECHO;
 						BEGIN(xslashdefaultarg);
 					}
 					else
 					{
 						*option_quote = '`';
 						BEGIN(xslashbackquote);
 					}
 				}

 :[A-Za-z0-9_]*	{
 					/* Possible psql variable substitution */
 					if (option_type == OT_VERBATIM)
 						ECHO;
 					else
 					{
 						const char *value;

 						value = GetVariable(pset.vars, yytext + 1);

 						/*
 						 * The variable value is just emitted without any
 						 * further examination.  This is consistent with the
 						 * pre-8.0 code behavior, if not with the way that
 						 * variables are handled outside backslash commands.
 						 * Note that we needn't guard against recursion here.
 						 */
 						if (value)
 							appendPQExpBufferStr(output_buf, value);
 					}

 					*option_quote = ':';

 					return LEXRES_OK;
 				}

 :'[A-Za-z0-9_]+'	{
 					if (option_type == OT_VERBATIM)
 						ECHO;
 					else
 					{
 						escape_variable(false);
 						return LEXRES_OK;
 					}
 				}


 :\"[A-Za-z0-9_]+\"	{
 					if (option_type == OT_VERBATIM)
 						ECHO;
 					else
 					{
 						escape_variable(true);
 						return LEXRES_OK;
 					}
 				}

 "|"				{
 					ECHO;
 					if (option_type == OT_FILEPIPE)
 					{
 						/* treat like whole-string case */
 						BEGIN(xslashwholeline);
 					}
 					else
 					{
 						/* treat like default case */
 						BEGIN(xslashdefaultarg);
 					}
 				}

 {dquote}		{
 					*option_quote = '"';
 					ECHO;
 					BEGIN(xslashquotedarg);
 				}

 {other}			{
 					ECHO;
 					BEGIN(xslashdefaultarg);
 				}

 }

 <xslashquote>{
 	/*
 	 * single-quoted text: copy literally except for '' and backslash
 	 * sequences
 	 */

 {quote}			{ return LEXRES_OK; }

 {xqdouble}		{ appendPQExpBufferChar(output_buf, '\''); }

 "\\n"			{ appendPQExpBufferChar(output_buf, '\n'); }
 "\\t"			{ appendPQExpBufferChar(output_buf, '\t'); }
 "\\b"			{ appendPQExpBufferChar(output_buf, '\b'); }
 "\\r"			{ appendPQExpBufferChar(output_buf, '\r'); }
 "\\f"			{ appendPQExpBufferChar(output_buf, '\f'); }

 {xeoctesc}		{
 					/* octal case */
 					appendPQExpBufferChar(output_buf,
 										  (char) strtol(yytext + 1, NULL, 8));
 				}

 {xehexesc}		{
 					/* hex case */
 					appendPQExpBufferChar(output_buf,
 										  (char) strtol(yytext + 2, NULL, 16));
 				}

 "\\".			{ emit(yytext + 1, 1); }

 {other}|\n		{ ECHO; }

 }

 <xslashbackquote>{
 	/*
 	 * backticked text: copy everything until next backquote or end of line.
 	 * Invocation of the command will happen in psql_scan_slash_option.
 	 */

 "`"				{ return LEXRES_OK; }

 {other}|\n		{ ECHO; }

 }

 <xslashdefaultarg>{
 	/*
 	 * Copy everything until unquoted whitespace or end of line.  Quotes
 	 * do not get stripped yet.
 	 */

 {space}			{
 					yyless(0);
 					return LEXRES_OK;
 				}

 "\\"			{
 					/*
 					 * unquoted backslash is end of command or next command,
 					 * do not eat
 					 *
 					 * (this was not the behavior pre-8.0, but it seems
 					 * consistent)
 					 */
 					yyless(0);
 					return LEXRES_OK;
 				}

 {dquote}		{
 					*option_quote = '"';
 					ECHO;
 					BEGIN(xslashquotedarg);
 				}

 {other}			{ ECHO; }

 }

 <xslashquotedarg>{
 	/* double-quoted text within a default-type argument: copy */

 {dquote}		{
 					ECHO;
 					BEGIN(xslashdefaultarg);
 				}

 {other}|\n		{ ECHO; }

 }

 <xslashwholeline>{
 	/* copy everything until end of input line */
 	/* but suppress leading whitespace */

 {space}+		{
 					if (output_buf->len > 0)
 						ECHO;
 				}

 {other}			{ ECHO; }

 }

 <xslashend>{
 	/* at end of command, eat a double backslash, but not anything else */

 "\\\\"			{ return LEXRES_OK; }

 {other}|\n		{
 					yyless(0);
 					return LEXRES_OK;
 				}

 }

 %%

 /*
  * Create a lexer working state struct.
  */
 PsqlScanState
 psql_scan_create(void)
 {
 	PsqlScanState state;

 	state = (PsqlScanStateData *) pg_malloc_zero(sizeof(PsqlScanStateData));

 	psql_scan_reset(state);

 	return state;
 }

 /*
  * Destroy a lexer working state struct, releasing all resources.
  */
 void
 psql_scan_destroy(PsqlScanState state)
 {
 	psql_scan_finish(state);

 	psql_scan_reset(state);

 	free(state);
 }

 /*
  * Set up to perform lexing of the given input line.
  *
  * The text at *line, extending for line_len bytes, will be scanned by
  * subsequent calls to the psql_scan routines.  psql_scan_finish should
  * be called when scanning is complete.  Note that the lexer retains
  * a pointer to the storage at *line --- this string must not be altered
  * or freed until after psql_scan_finish is called.
  */
 void
 psql_scan_setup(PsqlScanState state,
 				const char *line, int line_len)
 {
 	/* Mustn't be scanning already */
 	psql_assert(state->scanbufhandle == NULL);
 	psql_assert(state->buffer_stack == NULL);

 	/* Do we need to hack the character set encoding? */
 	state->encoding = pset.encoding;
 	state->safe_encoding = pg_valid_server_encoding_id(state->encoding);

 	/* needed for prepare_buffer */
 	cur_state = state;

 	/* Set up flex input buffer with appropriate translation and padding */
 	state->scanbufhandle = prepare_buffer(line, line_len,
 										  &state->scanbuf);
 	state->scanline = line;

 	/* Set lookaside data in case we have to map unsafe encoding */
 	state->curline = state->scanbuf;
 	state->refline = state->scanline;
 }

 /*
  * Do lexical analysis of SQL command text.
  *
  * The text previously passed to psql_scan_setup is scanned, and appended
  * (possibly with transformation) to query_buf.
  *
  * The return value indicates the condition that stopped scanning:
  *
  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
  * transferred to query_buf.)  The command accumulated in query_buf should
  * be executed, then clear query_buf and call again to scan the remainder
  * of the line.
  *
  * PSCAN_BACKSLASH: found a backslash that starts a psql special command.
  * Any previous data on the line has been transferred to query_buf.
  * The caller will typically next call psql_scan_slash_command(),
  * perhaps psql_scan_slash_option(), and psql_scan_slash_command_end().
  *
  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
  *
  * PSCAN_EOL: the end of the line was reached, and there is no lexical
  * reason to consider the command incomplete.  The caller may or may not
  * choose to send it.  *prompt is set to the appropriate prompt type if
  * the caller chooses to collect more input.
  *
  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
  * be called next, then the cycle may be repeated with a fresh input line.
  *
  * In all cases, *prompt is set to an appropriate prompt type code for the
  * next line-input operation.
  */
 PsqlScanResult
 psql_scan(PsqlScanState state,
 		  PQExpBuffer query_buf,
 		  promptStatus_t *prompt)
 {
 	PsqlScanResult result;
 	int			lexresult;

 	/* Must be scanning already */
 	psql_assert(state->scanbufhandle);

 	/* Set up static variables that will be used by yylex */
 	cur_state = state;
 	output_buf = query_buf;

 	if (state->buffer_stack != NULL)
 		yy_switch_to_buffer(state->buffer_stack->buf);
 	else
 		yy_switch_to_buffer(state->scanbufhandle);

 	BEGIN(state->start_state);

 	/* And lex. */
 	lexresult = yylex();

 	/* Update static vars back to the state struct */
 	state->start_state = YY_START;

 	/*
 	 * Check termination state and return appropriate result info.
 	 */
 	switch (lexresult)
 	{
 		case LEXRES_EOL:		/* end of input */
 			switch (state->start_state)
 			{
 				/* This switch must cover all non-slash-command states. */
 				case INITIAL:
 					if (state->paren_depth > 0)
 					{
 						result = PSCAN_INCOMPLETE;
 						*prompt = PROMPT_PAREN;
 					}
 					else if (query_buf->len > 0)
 					{
 						result = PSCAN_EOL;
 						*prompt = PROMPT_CONTINUE;
 					}
 					else
 					{
 						/* never bother to send an empty buffer */
 						result = PSCAN_INCOMPLETE;
 						*prompt = PROMPT_READY;
 					}
 					break;
 				case xb:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_SINGLEQUOTE;
 					break;
 				case xc:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_COMMENT;
 					break;
 				case xd:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_DOUBLEQUOTE;
 					break;
 				case xh:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_SINGLEQUOTE;
 					break;
 				case xe:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_SINGLEQUOTE;
 					break;
 				case xq:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_SINGLEQUOTE;
 					break;
 				case xdolq:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_DOLLARQUOTE;
 					break;
 				case xui:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_DOUBLEQUOTE;
 					break;
 				case xus:
 					result = PSCAN_INCOMPLETE;
 					*prompt = PROMPT_SINGLEQUOTE;
 					break;
 				default:
 					/* can't get here */
 					fprintf(stderr, "invalid YY_START\n");
 					exit(1);
 			}
 			break;
 		case LEXRES_SEMI:		/* semicolon */
 			result = PSCAN_SEMICOLON;
 			*prompt = PROMPT_READY;
 			break;
 		case LEXRES_BACKSLASH:	/* backslash */
 			result = PSCAN_BACKSLASH;
 			*prompt = PROMPT_READY;
 			break;
 		default:
 			/* can't get here */
 			fprintf(stderr, "invalid yylex result\n");
 			exit(1);
 	}

 	return result;
 }

 /*
  * Clean up after scanning a string.  This flushes any unread input and
  * releases resources (but not the PsqlScanState itself).  Note however
  * that this does not reset the lexer scan state; that can be done by
  * psql_scan_reset(), which is an orthogonal operation.
  *
  * It is legal to call this when not scanning anything (makes it easier
  * to deal with error recovery).
  */
 void
 psql_scan_finish(PsqlScanState state)
 {
 	/* Drop any incomplete variable expansions. */
 	while (state->buffer_stack != NULL)
 		pop_buffer_stack(state);

 	/* Done with the outer scan buffer, too */
 	if (state->scanbufhandle)
 		yy_delete_buffer(state->scanbufhandle);
 	state->scanbufhandle = NULL;
 	if (state->scanbuf)
 		free(state->scanbuf);
 	state->scanbuf = NULL;
 }

 /*
  * Reset lexer scanning state to start conditions.  This is appropriate
  * for executing \r psql commands (or any other time that we discard the
  * prior contents of query_buf).  It is not, however, necessary to do this
  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
  * conditions are returned.
  *
  * Note that this is unrelated to flushing unread input; that task is
  * done by psql_scan_finish().
  */
 void
 psql_scan_reset(PsqlScanState state)
 {
 	state->start_state = INITIAL;
 	state->paren_depth = 0;
 	state->xcdepth = 0;			/* not really necessary */
 	if (state->dolqstart)
 		free(state->dolqstart);
 	state->dolqstart = NULL;
 }

 /*
  * Return true if lexer is currently in an "inside quotes" state.
  *
  * This is pretty grotty but is needed to preserve the old behavior
  * that mainloop.c drops blank lines not inside quotes without even
  * echoing them.
  */
 bool
 psql_scan_in_quote(PsqlScanState state)
 {
 	return state->start_state != INITIAL;
 }

 /*
  * Scan the command name of a psql backslash command.  This should be called
  * after psql_scan() returns PSCAN_BACKSLASH.  It is assumed that the input
  * has been consumed through the leading backslash.
  *
  * The return value is a malloc'd copy of the command name, as parsed off
  * from the input.
  */
 char *
 psql_scan_slash_command(PsqlScanState state)
 {
 	PQExpBufferData mybuf;
 	int			lexresult;

 	/* Must be scanning already */
 	psql_assert(state->scanbufhandle);

 	/* Build a local buffer that we'll return the data of */
 	initPQExpBuffer(&mybuf);

 	/* Set up static variables that will be used by yylex */
 	cur_state = state;
 	output_buf = &mybuf;

 	if (state->buffer_stack != NULL)
 		yy_switch_to_buffer(state->buffer_stack->buf);
 	else
 		yy_switch_to_buffer(state->scanbufhandle);

 	BEGIN(xslashcmd);

 	/* And lex. */
 	lexresult = yylex();

 	/* There are no possible errors in this lex state... */

 	return mybuf.data;
 }

 /*
  * Parse off the next argument for a backslash command, and return it as a
  * malloc'd string.  If there are no more arguments, returns NULL.
  *
  * type tells what processing, if any, to perform on the option string;
  * for example, if it's a SQL identifier, we want to downcase any unquoted
  * letters.
  *
  * if quote is not NULL, *quote is set to 0 if no quoting was found, else
  * the quote symbol.
  *
  * if semicolon is true, unquoted trailing semicolon(s) that would otherwise
  * be taken as part of the option string will be stripped.
  *
  * NOTE: the only possible syntax errors for backslash options are unmatched
  * quotes, which are detected when we run out of input.  Therefore, on a
  * syntax error we just throw away the string and return NULL; there is no
  * need to worry about flushing remaining input.
  */
 char *
 psql_scan_slash_option(PsqlScanState state,
 					   enum slash_option_type type,
 					   char *quote,
 					   bool semicolon)
 {
 	PQExpBufferData mybuf;
 	int			lexresult;
 	char		local_quote;
 	bool		badarg;

 	/* Must be scanning already */
 	psql_assert(state->scanbufhandle);

 	if (quote == NULL)
 		quote = &local_quote;
 	*quote = 0;

 	/* Build a local buffer that we'll return the data of */
 	initPQExpBuffer(&mybuf);

 	/* Set up static variables that will be used by yylex */
 	cur_state = state;
 	output_buf = &mybuf;
 	option_type = type;
 	option_quote = quote;

 	if (state->buffer_stack != NULL)
 		yy_switch_to_buffer(state->buffer_stack->buf);
 	else
 		yy_switch_to_buffer(state->scanbufhandle);

 	if (type == OT_WHOLE_LINE)
 		BEGIN(xslashwholeline);
 	else
 		BEGIN(xslasharg);

 	/* And lex. */
 	lexresult = yylex();

 	/*
 	 * Check the lex result: we should have gotten back either LEXRES_OK
 	 * or LEXRES_EOL (the latter indicating end of string).  If we were inside
 	 * a quoted string, as indicated by YY_START, EOL is an error.
 	 */
 	psql_assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
 	badarg = false;
 	switch (YY_START)
 	{
 		case xslasharg:
 			/* empty arg, or possibly a psql variable substitution */
 			break;
 		case xslashquote:
 			if (lexresult != LEXRES_OK)
 				badarg = true;		/* hit EOL not ending quote */
 			break;
 		case xslashbackquote:
 			if (lexresult != LEXRES_OK)
 				badarg = true;		/* hit EOL not ending quote */
 			else
 			{
 				/* Perform evaluation of backticked command */
 				char	   *cmd = mybuf.data;
 				FILE	   *fd;
 				bool		error = false;
 				PQExpBufferData output;
 				char		buf[512];
 				size_t		result;

 				fd = popen(cmd, PG_BINARY_R);
 				if (!fd)
 				{
 					psql_error("%s: %s\n", cmd, strerror(errno));
 					error = true;
 				}

 				initPQExpBuffer(&output);

 				if (!error)
 				{
 					do
 					{
 						result = fread(buf, 1, sizeof(buf), fd);
 						if (ferror(fd))
 						{
 							psql_error("%s: %s\n", cmd, strerror(errno));
 							error = true;
 							break;
 						}
 						appendBinaryPQExpBuffer(&output, buf, result);
 					} while (!feof(fd));
 				}

 				if (fd && pclose(fd) == -1)
 				{
 					psql_error("%s: %s\n", cmd, strerror(errno));
 					error = true;
 				}

 				if (PQExpBufferBroken(&output))
 				{
 					psql_error("%s: out of memory\n", cmd);
 					error = true;
 				}

 				/* Now done with cmd, transfer result to mybuf */
 				resetPQExpBuffer(&mybuf);

 				if (!error)
 				{
 					/* strip any trailing newline */
 					if (output.len > 0 &&
 						output.data[output.len - 1] == '\n')
 						output.len--;
 					appendBinaryPQExpBuffer(&mybuf, output.data, output.len);
 				}

 				termPQExpBuffer(&output);
 			}
 			break;
 		case xslashdefaultarg:
 			/* Strip any trailing semi-colons if requested */
 			if (semicolon)
 			{
 				while (mybuf.len > 0 &&
 					   mybuf.data[mybuf.len - 1] == ';')
 				{
 					mybuf.data[--mybuf.len] = '\0';
 				}
 			}

 			/*
 			 * If SQL identifier processing was requested, then we strip out
 			 * excess double quotes and downcase unquoted letters.
 			 * Doubled double-quotes become output double-quotes, per spec.
 			 *
 			 * Note that a string like FOO"BAR"BAZ will be converted to
 			 * fooBARbaz; this is somewhat inconsistent with the SQL spec,
 			 * which would have us parse it as several identifiers.  But
 			 * for psql's purposes, we want a string like "foo"."bar" to
 			 * be treated as one option, so there's little choice.
 			 */
 			if (type == OT_SQLID || type == OT_SQLIDHACK)
 			{
 				bool		inquotes = false;
 				char	   *cp = mybuf.data;

 				while (*cp)
 				{
 					if (*cp == '"')
 					{
 						if (inquotes && cp[1] == '"')
 						{
 							/* Keep the first quote, remove the second */
 							cp++;
 						}
 						inquotes = !inquotes;
 						/* Collapse out quote at *cp */
 						memmove(cp, cp + 1, strlen(cp));
 						mybuf.len--;
 						/* do not advance cp */
 					}
 					else
 					{
 						if (!inquotes && type == OT_SQLID)
 							*cp = pg_tolower((unsigned char) *cp);
 						cp += PQmblen(cp, pset.encoding);
 					}
 				}
 			}
 			break;
 		case xslashquotedarg:
 			/* must have hit EOL inside double quotes */
 			badarg = true;
 			break;
 		case xslashwholeline:
 			/* always okay */
 			break;
 		default:
 			/* can't get here */
 			fprintf(stderr, "invalid YY_START\n");
 			exit(1);
 	}

 	if (badarg)
 	{
 		psql_error("unterminated quoted string\n");
 		termPQExpBuffer(&mybuf);
 		return NULL;
 	}

 	/*
 	 * An unquoted empty argument isn't possible unless we are at end of
 	 * command.  Return NULL instead.
 	 */
 	if (mybuf.len == 0 && *quote == 0)
 	{
 		termPQExpBuffer(&mybuf);
 		return NULL;
 	}

 	/* Else return the completed string. */
 	return mybuf.data;
 }

 /*
  * Eat up any unused \\ to complete a backslash command.
  */
 void
 psql_scan_slash_command_end(PsqlScanState state)
 {
 	int			lexresult;

 	/* Must be scanning already */
 	psql_assert(state->scanbufhandle);

 	/* Set up static variables that will be used by yylex */
 	cur_state = state;
 	output_buf = NULL;

 	if (state->buffer_stack != NULL)
 		yy_switch_to_buffer(state->buffer_stack->buf);
 	else
 		yy_switch_to_buffer(state->scanbufhandle);

 	BEGIN(xslashend);

 	/* And lex. */
 	lexresult = yylex();

 	/* There are no possible errors in this lex state... */
 }


 /*
  * Push the given string onto the stack of stuff to scan.
  *
  * cur_state must point to the active PsqlScanState.
  *
  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
  */
 static void
 push_new_buffer(const char *newstr, const char *varname)
 {
 	StackElem  *stackelem;

 	stackelem = (StackElem *) pg_malloc(sizeof(StackElem));

 	/*
 	 * In current usage, the passed varname points at the current flex
 	 * input buffer; we must copy it before calling prepare_buffer()
 	 * because that will change the buffer state.
 	 */
 	stackelem->varname = varname ? pg_strdup(varname) : NULL;

 	stackelem->buf = prepare_buffer(newstr, strlen(newstr),
 									&stackelem->bufstring);
 	cur_state->curline = stackelem->bufstring;
 	if (cur_state->safe_encoding)
 	{
 		stackelem->origstring = NULL;
 		cur_state->refline = stackelem->bufstring;
 	}
 	else
 	{
 		stackelem->origstring = pg_strdup(newstr);
 		cur_state->refline = stackelem->origstring;
 	}
 	stackelem->next = cur_state->buffer_stack;
 	cur_state->buffer_stack = stackelem;
 }

 /*
  * Pop the topmost buffer stack item (there must be one!)
  *
  * NB: after this, the flex input state is unspecified; caller must
  * switch to an appropriate buffer to continue lexing.
  */
 static void
 pop_buffer_stack(PsqlScanState state)
 {
 	StackElem  *stackelem = state->buffer_stack;

 	state->buffer_stack = stackelem->next;
 	yy_delete_buffer(stackelem->buf);
 	free(stackelem->bufstring);
 	if (stackelem->origstring)
 		free(stackelem->origstring);
 	if (stackelem->varname)
 		free(stackelem->varname);
 	free(stackelem);
 }

 /*
  * Check if specified variable name is the source for any string
  * currently being scanned
  */
 static bool
 var_is_current_source(PsqlScanState state, const char *varname)
 {
 	StackElem  *stackelem;

 	for (stackelem = state->buffer_stack;
 		 stackelem != NULL;
 		 stackelem = stackelem->next)
 	{
 		if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
 			return true;
 	}
 	return false;
 }

 /*
  * Set up a flex input buffer to scan the given data.  We always make a
  * copy of the data.  If working in an unsafe encoding, the copy has
  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
  *
  * cur_state must point to the active PsqlScanState.
  *
  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
  */
 static YY_BUFFER_STATE
 prepare_buffer(const char *txt, int len, char **txtcopy)
 {
 	char	   *newtxt;

 	/* Flex wants two \0 characters after the actual data */
 	newtxt = pg_malloc(len + 2);
 	*txtcopy = newtxt;
 	newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;

 	if (cur_state->safe_encoding)
 		memcpy(newtxt, txt, len);
 	else
 	{
 		/* Gotta do it the hard way */
 		int		i = 0;

 		while (i < len)
 		{
 			int		thislen = PQmblen(txt + i, cur_state->encoding);

 			/* first byte should always be okay... */
 			newtxt[i] = txt[i];
 			i++;
 			while (--thislen > 0)
 				newtxt[i++] = (char) 0xFF;
 		}
 	}

 	return yy_scan_buffer(newtxt, len + 2);
 }

 /*
  * emit() --- body for ECHO macro
  *
  * NB: this must be used for ALL and ONLY the text copied from the flex
  * input data.  If you pass it something that is not part of the yytext
  * string, you are making a mistake.  Internally generated text can be
  * appended directly to output_buf.
  */
 static void
 emit(const char *txt, int len)
 {
 	if (cur_state->safe_encoding)
 		appendBinaryPQExpBuffer(output_buf, txt, len);
 	else
 	{
 		/* Gotta do it the hard way */
 		const char *reference = cur_state->refline;
 		int		i;

 		reference += (txt - cur_state->curline);

 		for (i = 0; i < len; i++)
 		{
 			char	ch = txt[i];

 			if (ch == (char) 0xFF)
 				ch = reference[i];
 			appendPQExpBufferChar(output_buf, ch);
 		}
 	}
 }

 static void
 escape_variable(bool as_ident)
 {
 	char		saved_char;
 	const char *value;

 	/* Variable lookup. */
 	saved_char = yytext[yyleng - 1];
 	yytext[yyleng - 1] = '\0';
 	value = GetVariable(pset.vars, yytext + 2);

 	/* Escaping. */
 	if (value)
 	{
 		if (!pset.db)
 			psql_error("can't escape without active connection\n");
 		else
 		{
 			char   *escaped_value;

 			if (as_ident)
 				escaped_value =
 					PQescapeIdentifier(pset.db, value, strlen(value));
 			else
 				escaped_value =
 					PQescapeLiteral(pset.db, value, strlen(value));
 			if (escaped_value == NULL)
 			{
 				const char *error = PQerrorMessage(pset.db);
 				psql_error("%s", error);
 			}
 			else
 			{
 				appendPQExpBufferStr(output_buf, escaped_value);
 				PQfreemem(escaped_value);
 				return;
 			}
 		}
 	}

 	/*
 	 * If we reach this point, some kind of error has occurred.  Emit the
 	 * original text into the output buffer.
 	 */
 	yytext[yyleng - 1] = saved_char;
 	emit(yytext, yyleng);
 }