src/backend/parser/parser.c - hawq - Git at Google

 /*-------------------------------------------------------------------------
  *
  * parser.c
  *		Main entry point/driver for PostgreSQL grammar
  *
  * Note that the grammar is not allowed to perform any table access
  * (since we need to be able to do basic parsing even while inside an
  * aborted transaction).  Therefore, the data structures returned by
  * the grammar are "raw" parsetrees that still need to be analyzed by
  * analyze.c and related files.
  *
  *
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.78 2009/06/11 14:49:00 momjian Exp $
  *
  *-------------------------------------------------------------------------
  */

 #include "postgres.h"

 #include "parser/gramparse.h"	/* required before parser/gram.h! */
 #include "parser/gram.h"
 #include "parser/parser.h"


 List	   *parsetree;			/* result of parsing is left here */

 static bool have_lookahead;		/* is lookahead info valid? */
 static int	lookahead_token;	/* one-token lookahead */
 static YYSTYPE lookahead_yylval;	/* yylval for lookahead token */
 static YYLTYPE lookahead_yylloc;	/* yylloc for lookahead token */


 /*
  * raw_parser
  *		Given a query in string form, do lexical and grammatical analysis.
  *
  * Returns a list of raw (un-analyzed) parse trees.
  */
 List *
 raw_parser(const char *str)
 {
 	int			yyresult;

 	parsetree = NIL;			/* in case grammar forgets to set it */
 	have_lookahead = false;

 	scanner_init(str);
 	parser_init();

 	yyresult = base_yyparse();

 	scanner_finish();

 	if (yyresult)				/* error */
 		return NIL;

 	return parsetree;
 }


 /*
  * pg_parse_string_token - get the value represented by a string literal
  *
  * Given the textual form of a SQL string literal, produce the represented
  * value as a palloc'd string.  It is caller's responsibility that the
  * passed string does represent one single string literal.
  *
  * We export this function to avoid having plpgsql depend on internal details
  * of the core grammar (such as the token code assigned to SCONST).  Note
  * that since the scanner isn't presently re-entrant, this cannot be used
  * during use of the main parser/scanner.
  */
 char *
 pg_parse_string_token(const char *token)
 {
 	int			ctoken;

 	scanner_init(token);

 	ctoken = base_yylex();

 	if (ctoken != SCONST)		/* caller error */
 		elog(ERROR, "expected string constant, got token code %d", ctoken);

 	scanner_finish();

 	return base_yylval.str;
 }


 /*
  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
  *
  * The filter is needed because in some cases the standard SQL grammar
  * requires more than one token lookahead.	We reduce these cases to one-token
  * lookahead by combining tokens here, in order to keep the grammar LALR(1).
  *
  * Using a filter is simpler than trying to recognize multiword tokens
  * directly in scan.l, because we'd have to allow for comments between the
  * words.  Furthermore it's not clear how to do it without re-introducing
  * scanner backtrack, which would cost more performance than this filter
  * layer does.
  */
 int
 filtered_base_yylex(void)
 {
 	int			cur_token;
 	int			next_token;
 	YYSTYPE		cur_yylval;
 	YYLTYPE		cur_yylloc;

 	/* Get next token --- we might already have it */
 	if (have_lookahead)
 	{
 		cur_token = lookahead_token;
 		base_yylval = lookahead_yylval;
 		base_yylloc = lookahead_yylloc;
 		have_lookahead = false;
 	}
 	else
 		cur_token = base_yylex();

 	/* Do we need to look ahead for a possible multiword token? */
 	switch (cur_token)
 	{
 		case NULLS_P:

 			/*
 			 * NULLS FIRST and NULLS LAST must be reduced to one token
 			 */
 			cur_yylval = base_yylval;
 			cur_yylloc = base_yylloc;
 			next_token = base_yylex();
 			switch (next_token)
 			{
 				case FIRST_P:
 					cur_token = NULLS_FIRST;
 					break;
 				case LAST_P:
 					cur_token = NULLS_LAST;
 					break;
 				default:
 					/* save the lookahead token for next time */
 					lookahead_token = next_token;
 					lookahead_yylval = base_yylval;
 					lookahead_yylloc = base_yylloc;
 					have_lookahead = true;
 					/* and back up the output info to cur_token */
 					base_yylval = cur_yylval;
 					base_yylloc = cur_yylloc;
 					break;
 			}
 			break;

 		case WITH:

 			/*
 			 * WITH TIME, CASCADED, LOCAL, or CHECK must be reduced to one token
 			 *
 			 * XXX an alternative way is to recognize just WITH_TIME and put
 			 * the ugliness into the datetime datatype productions instead of
 			 * WITH CHECK OPTION.  However that requires promoting WITH to a
 			 * fully reserved word.  If we ever have to do that anyway
 			 * (perhaps for SQL99 recursive queries), come back and simplify
 			 * this code.
 			 */
 			cur_yylval = base_yylval;
 			cur_yylloc = base_yylloc;
 			next_token = base_yylex();
 			switch (next_token)
 			{
 				case TIME:
 					cur_token = WITH_TIME;
 					break;
 				case CASCADED:
 					cur_token = WITH_CASCADED;
 					break;
 				case LOCAL:
 					cur_token = WITH_LOCAL;
 					break;
 				case CHECK:
 					cur_token = WITH_CHECK;
 					break;
 				default:
 					/* save the lookahead token for next time */
 					lookahead_token = next_token;
 					lookahead_yylval = base_yylval;
 					lookahead_yylloc = base_yylloc;
 					have_lookahead = true;
 					/* and back up the output info to cur_token */
 					base_yylval = cur_yylval;
 					base_yylloc = cur_yylloc;
 					break;
 			}
 			break;

 		default:
 			break;
 	}

 	return cur_token;
 }
	/*-------------------------------------------------------------------------
	*
	* parser.c
	* Main entry point/driver for PostgreSQL grammar
	*
	* Note that the grammar is not allowed to perform any table access
	* (since we need to be able to do basic parsing even while inside an
	* aborted transaction). Therefore, the data structures returned by
	* the grammar are "raw" parsetrees that still need to be analyzed by
	* analyze.c and related files.
	*
	*
	* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	* IDENTIFICATION
	* $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.78 2009/06/11 14:49:00 momjian Exp $
	*
	*-------------------------------------------------------------------------
	*/

	#include "postgres.h"

	#include "parser/gramparse.h" /* required before parser/gram.h! */
	#include "parser/gram.h"
	#include "parser/parser.h"


	List parsetree; / result of parsing is left here */

	static bool have_lookahead; /* is lookahead info valid? */
	static int lookahead_token; /* one-token lookahead */
	static YYSTYPE lookahead_yylval; /* yylval for lookahead token */
	static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */


	/*
	* raw_parser
	* Given a query in string form, do lexical and grammatical analysis.
	*
	* Returns a list of raw (un-analyzed) parse trees.
	*/
	List *
	raw_parser(const char *str)
	{
	int yyresult;

	parsetree = NIL; /* in case grammar forgets to set it */
	have_lookahead = false;

	scanner_init(str);
	parser_init();

	yyresult = base_yyparse();

	scanner_finish();

	if (yyresult) /* error */
	return NIL;

	return parsetree;
	}


	/*
	* pg_parse_string_token - get the value represented by a string literal
	*
	* Given the textual form of a SQL string literal, produce the represented
	* value as a palloc'd string. It is caller's responsibility that the
	* passed string does represent one single string literal.
	*
	* We export this function to avoid having plpgsql depend on internal details
	* of the core grammar (such as the token code assigned to SCONST). Note
	* that since the scanner isn't presently re-entrant, this cannot be used
	* during use of the main parser/scanner.
	*/
	char *
	pg_parse_string_token(const char *token)
	{
	int ctoken;

	scanner_init(token);

	ctoken = base_yylex();

	if (ctoken != SCONST) /* caller error */
	elog(ERROR, "expected string constant, got token code %d", ctoken);

	scanner_finish();

	return base_yylval.str;
	}


	/*
	* Intermediate filter between parser and base lexer (base_yylex in scan.l).
	*
	* The filter is needed because in some cases the standard SQL grammar
	* requires more than one token lookahead. We reduce these cases to one-token
	* lookahead by combining tokens here, in order to keep the grammar LALR(1).
	*
	* Using a filter is simpler than trying to recognize multiword tokens
	* directly in scan.l, because we'd have to allow for comments between the
	* words. Furthermore it's not clear how to do it without re-introducing
	* scanner backtrack, which would cost more performance than this filter
	* layer does.
	*/
	int
	filtered_base_yylex(void)
	{
	int cur_token;
	int next_token;
	YYSTYPE cur_yylval;
	YYLTYPE cur_yylloc;

	/* Get next token --- we might already have it */
	if (have_lookahead)
	{
	cur_token = lookahead_token;
	base_yylval = lookahead_yylval;
	base_yylloc = lookahead_yylloc;
	have_lookahead = false;
	}
	else
	cur_token = base_yylex();

	/* Do we need to look ahead for a possible multiword token? */
	switch (cur_token)
	{
	case NULLS_P:

	/*
	* NULLS FIRST and NULLS LAST must be reduced to one token
	*/
	cur_yylval = base_yylval;
	cur_yylloc = base_yylloc;
	next_token = base_yylex();
	switch (next_token)
	{
	case FIRST_P:
	cur_token = NULLS_FIRST;
	break;
	case LAST_P:
	cur_token = NULLS_LAST;
	break;
	default:
	/* save the lookahead token for next time */
	lookahead_token = next_token;
	lookahead_yylval = base_yylval;
	lookahead_yylloc = base_yylloc;
	have_lookahead = true;
	/* and back up the output info to cur_token */
	base_yylval = cur_yylval;
	base_yylloc = cur_yylloc;
	break;
	}
	break;

	case WITH:

	/*
	* WITH TIME, CASCADED, LOCAL, or CHECK must be reduced to one token
	*
	* XXX an alternative way is to recognize just WITH_TIME and put
	* the ugliness into the datetime datatype productions instead of
	* WITH CHECK OPTION. However that requires promoting WITH to a
	* fully reserved word. If we ever have to do that anyway
	* (perhaps for SQL99 recursive queries), come back and simplify
	* this code.
	*/
	cur_yylval = base_yylval;
	cur_yylloc = base_yylloc;
	next_token = base_yylex();
	switch (next_token)
	{
	case TIME:
	cur_token = WITH_TIME;
	break;
	case CASCADED:
	cur_token = WITH_CASCADED;
	break;
	case LOCAL:
	cur_token = WITH_LOCAL;
	break;
	case CHECK:
	cur_token = WITH_CHECK;
	break;
	default:
	/* save the lookahead token for next time */
	lookahead_token = next_token;
	lookahead_yylval = base_yylval;
	lookahead_yylloc = base_yylloc;
	have_lookahead = true;
	/* and back up the output info to cur_token */
	base_yylval = cur_yylval;
	base_yylloc = cur_yylloc;
	break;
	}
	break;

	default:
	break;
	}

	return cur_token;
	}