doc/src/sql.ll - madlib - Git at Google

 /* -----------------------------------------------------------------------------
  * sql.ll
  *
  * A simple flex file for use in companion with sql.yy. Use case: Documenting
  * .sql files with tools like Doxygen.
  *
  * Revision History:
  * 0.2: Florian Schoppmann, 16 Jan 2011, Converted to C++
  * 0.1:          "        , 10 Jan 2011, Initial version.
  * -----------------------------------------------------------------------------
  */

 /* Definitions */

 /* Use C++ */
 %option c++

 /* instructs flex to generate a batch scanner, the opposite of interactive
  * scanners */
 %option batch

 /* change the name of the scanner class. results in "SQLFlexLexer" */
 %option prefix="SQL"

 /* Generate a "case-insensitive" scanner. The case of letters given in the
  * `flex' input patterns will be ignored, and tokens in the input will be
  * matched regardless of case */
 %option case-insensitive

 /* makes the scanner not call `yywrap()' upon an end-of-file, but simply assume
  * that there are no more files to scan */
 %option noyywrap

 /* We really use yymore, but only in more(). We need to provide this option
  * because flex with otherwise complain:
  * "error: 'yymore_used_but_not_detected' was not declared in this scope */
 %option yymore

 /* instructs flex to generate a scanner which never considers its input
  * interactive. Normally, on each new input file the scanner calls isatty() in
  * an attempt to determine whether the scanner's input source is interactive and
  * thus should be read a character at a time. When this option is used, however,
  * then no such call is made.
  * We declare this option because otherwise flex will generate a redundant
  * declaration of isatty(), which may lead to compile errors. */
 %option never-interactive


 /* C++ Code */
 %{
     #if defined(__GNUC__)
         #pragma GCC diagnostic ignored "-Wconversion"
         #pragma GCC diagnostic ignored "-Wtype-limits"
     #endif

     /* We define COMPILING_SCANNER in order to know in sql.parser.hh whether we are compiling
      * the parser or the scanner */
     #define COMPILING_SCANNER 1

     #include "sql.parser.hh"

     #include <string>

     /* import the parser's token type into a local typedef */
     typedef bison::SQLParser::token    token;

     /* YY_USER_ACTION is called from the lex() function, which has the signature
      * and name as defined by macro YY_DECL. yylval, yylloc, and driver are
      * arguments. */
     #define YY_USER_ACTION preScannerAction(yylval, yylloc, driver);
 %}

 /* Definitions */
 CREATE_FUNCTION "CREATE"{SPACE}("OR"{SPACE}"REPLACE"{SPACE})?"FUNCTION"
 CREATE_AGGREGATE "CREATE"{SPACE}.*"AGGREGATE"
 COMMENT "--"[^\n\r]*(\n|\r\n?)?
 BEGIN_CCOMMENT "/*"
 END_CCOMMENT ([^\*]|\*[^/])*"*/"
 IDENTIFIER [[:alpha:]_][[:alnum:]_]*
 QUOTED_IDENTIFIER "\""{IDENTIFIER}"\""
 INTEGER [[:digit:]]+
 SPACE [[:space:]]+
 DOLLARQUOTE "$$"|"$"{IDENTIFIER}"$"
 BEGIN_SPECIAL_COMMENT "/*+"
 END_SPECIAL_COMMENT "*/"
 STRING_LITERAL "'"([^']|''|\\')*"'"
 EXPONENT "e"("+"|"-")?[[:digit:]]+
 FLOATING_POINT_LITERAL ([[:digit:]]+"."[[:digit:]]*|"."[[:digit:]]+){EXPONENT}?|[[:digit:]]+{EXPONENT}|"'"("+"|"-")?("NaN"|"Infinity")"'"


 /* State definitions */

 %s sFUNC_DECL
 %s sFUNC_ARGLIST
 %s sFUNC_OPTIONS
 %s sAGG_DECL
 %s sAGG_ARGLIST
 %s sAGG_OPTIONS
 %x sDOLLAR_STRING_LITERAL
 %x sCCOMMENT


 %%    /* Rules */

     /* Ignore spaces */
 {SPACE}

 {COMMENT} {
     /* only return as token if it is a Doxygen comment. Otherwise, ignore it. */
     if (yytext[2] == '!') {
         yytext[0] = yytext[1] = '/';
         yylval->str = static_cast<char *>( strdup(yytext) );
         return token::COMMENT;
     }
 }

     /* Since not all of Greenplum and PostgreSQL allow the following
      * - labeling arguments of aggregate functions,
      * - default arguments
      * we will simply uncomment C style comments in argument lists when they
      * begin with BEGIN_SPECIAL_COMMENT. */
 <sFUNC_ARGLIST,sAGG_ARGLIST>{
     {BEGIN_SPECIAL_COMMENT} { return token::BEGIN_SPECIAL; }
     {END_SPECIAL_COMMENT} { return token::END_SPECIAL; }
 }

     /* A C comment is split up into two parts. The reason is that flex tries to
      * match the longest rule and we want to give "normal" C comments a low
      * precedence according to this rule. */
 {BEGIN_CCOMMENT} {
     more();
     yy_push_state(sCCOMMENT);
 }

 <sCCOMMENT>{END_CCOMMENT} {
     yy_pop_state();

     /* only return as token if it is a Doxygen comment. Otherwise, ignore it. */
     if (yytext[2] == '*' || yytext[2] == '!') {
         yylval->str = strdup(yytext);
         return token::COMMENT;
     }
 }

 <sDOLLAR_STRING_LITERAL>{
     {DOLLARQUOTE} {
         if (strncmp(yytext + 1, stringLiteralQuotation, yyleng - 1) == 0) {
             yylval->str = "\"<omitted by lexer>\"";
             yy_pop_state();
             free(stringLiteralQuotation);
             stringLiteralQuotation = NULL;
             return token::STRING_LITERAL;
         }
     }
     /* Speed up the lexer by matching large chunks of text if possible */
     [^$]*
     "$"
 }

 {CREATE_FUNCTION} { BEGIN(sFUNC_DECL); return token::CREATE_FUNCTION; }

 {CREATE_AGGREGATE} { BEGIN(sAGG_DECL); return token::CREATE_AGGREGATE; }

 <sFUNC_DECL,sAGG_DECL>"(" {
     if (YY_START == sFUNC_DECL)
         BEGIN(sFUNC_ARGLIST);
     else
         BEGIN(sAGG_ARGLIST);

     return '(';
 }
 <sFUNC_ARGLIST,sAGG_ARGLIST>")" {
     if (YY_START == sFUNC_ARGLIST)
         BEGIN(sFUNC_OPTIONS);
     else
         BEGIN(sAGG_OPTIONS);

     return ')';
 }

     /* We disallow using the following keywords as argument names */
 <sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_ARGLIST,sAGG_OPTIONS>{
     "IN" return token::IN;
     "OUT" return token::OUT;
     "INOUT" return token::INOUT;

     "BIT" return token::BIT;
     "CHARACTER" return token::CHARACTER;
     "DOUBLE" return token::DOUBLE;
     "PRECISION" return token::PRECISION;
     "TIME" return token::TIME;
     "WITH" return token::WITH;
     "WITHOUT" return token::WITHOUT;
     "VOID" return token::VOID;
     "VARYING" return token::VARYING;
     "ZONE" return token::ZONE;

     "RETURNS" return token::RETURNS;
     "SETOF" return token::SETOF;

     "AS" return token::AS;
     "LANGUAGE" return token::LANGUAGE;
     "IMMUTABLE" return token::IMMUTABLE;
     "STABLE" return token::STABLE;
     "VOLATILE" return token::VOLATILE;
     "CALLED"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT" return token::CALLED_ON_NULL_INPUT;
     "RETURNS"{SPACE}"NULL"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT"|"STRICT" {
         return token::RETURNS_NULL_ON_NULL_INPUT; }
     ("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"INVOKER" return token::SECURITY_INVOKER;
     ("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"DEFINER" return token::SECURITY_DEFINER;

     "DEFAULT" return token::DEFAULT;
 }

     /* We disallow using the following keywords as argument names */
 <sAGG_ARGLIST,sAGG_OPTIONS>{
     "SFUNC" return token::SFUNC;
     "PREFUNC" return token::PREFUNC;
     "FINALFUNC" return token::FINALFUNC;
     "STYPE" return token::STYPE;
     "INITCOND" return token::INITCOND;
     "SORTOP" return token::SORTOP;
 }

 <sFUNC_DECL,sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_DECL,sAGG_ARGLIST,sAGG_OPTIONS>{
     "NULL" {
         yylval->str = strdup("NULL");
         return token::NULL_KEYWORD;
     }

     {QUOTED_IDENTIFIER} {
         yytext[yyleng - 1] = 0;
         yylval->str = strdup(yytext + 1);
         return token::IDENTIFIER;
     }
     {IDENTIFIER} { yylval->str = strlowerdup(yytext); return token::IDENTIFIER; }

     {INTEGER} {
         yylval->str = strdup(yytext);
         return token::INTEGER_LITERAL;
     }
     {FLOATING_POINT_LITERAL} {
         yylval->str = strdup(yytext);
         return token::FLOAT_LITERAL;
     }
     {STRING_LITERAL} {
         /* String literals in single quotes */
         yytext[0] = yytext[yyleng - 1] = '"';
         yylval->str = strdup(yytext);
         return token::STRING_LITERAL;
     }
     {DOLLARQUOTE} {
         /* String literals in dollar quotes, see
         http://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING */
         stringLiteralQuotation = static_cast<char *>( malloc(yyleng - 1) );
         strncpy(stringLiteralQuotation, yytext + 1, yyleng - 1);
         yy_push_state(sDOLLAR_STRING_LITERAL);
     }

     [^;] { return yytext[0]; }
 }

 ";" { BEGIN(INITIAL); return ';'; }

     /* Default action if nothing else applies: consume next character and do nothing */
 .|\n { BEGIN(INITIAL); }

 %%

 /* C++ code */

 namespace bison {

 /* The class declaration of SQLScanner is in sql.yy (because bison generates
  * the header file). */

 SQLScanner::SQLScanner(std::istream *arg_yyin, std::ostream *arg_yyout) :
     SQLFlexLexer(arg_yyin, arg_yyout), stringLiteralQuotation(NULL), oldLength(0) {
     /* only has an effect if %option debug or flex -d is used */
     set_debug(1);
 }

 SQLScanner::~SQLScanner() {
 }

 char *SQLScanner::strlowerdup(const char *inString) {
     char *returnStr = strdup(inString);
     for (int i = 0; returnStr[i]; i++)
         returnStr[i] = tolower(returnStr[i]);
     return returnStr;
 }

 void SQLScanner::preScannerAction(SQLParser::semantic_type * /* yylval */,
     SQLParser::location_type *yylloc, SQLDriver * /* driver */) {

     yylloc->step();

     // Start at oldLength: We don't want to count preserved text more than once
     for (size_t i = oldLength; i < yyleng; i++) {
         if (yytext[i] == '\r' &&
                 static_cast<size_t>(i + 1) < yyleng &&
                 yytext[i + 1] == '\n') {
             i++; yylloc->lines(1);
         } else if (yytext[i] == '\r' || yytext[i] == '\n') {
             yylloc->lines(1);
         } else {
             yylloc->columns(1);
         }
     }

     // Reset oldLength. more() needs to be called if yytext is to be preserved
     // again
     oldLength = 0;
 }

 void SQLScanner::more() {
     oldLength = yyleng;
     yymore();
 }

 } // namespace bison
	/* -----------------------------------------------------------------------------
	* sql.ll
	*
	* A simple flex file for use in companion with sql.yy. Use case: Documenting
	* .sql files with tools like Doxygen.
	*
	* Revision History:
	* 0.2: Florian Schoppmann, 16 Jan 2011, Converted to C++
	* 0.1: " , 10 Jan 2011, Initial version.
	* -----------------------------------------------------------------------------
	*/

	/* Definitions */

	/* Use C++ */
	%option c++

	/* instructs flex to generate a batch scanner, the opposite of interactive
	* scanners */
	%option batch

	/* change the name of the scanner class. results in "SQLFlexLexer" */
	%option prefix="SQL"

	/* Generate a "case-insensitive" scanner. The case of letters given in the
	* `flex' input patterns will be ignored, and tokens in the input will be
	* matched regardless of case */
	%option case-insensitive

	/* makes the scanner not call `yywrap()' upon an end-of-file, but simply assume
	* that there are no more files to scan */
	%option noyywrap

	/* We really use yymore, but only in more(). We need to provide this option
	* because flex with otherwise complain:
	* "error: 'yymore_used_but_not_detected' was not declared in this scope */
	%option yymore

	/* instructs flex to generate a scanner which never considers its input
	* interactive. Normally, on each new input file the scanner calls isatty() in
	* an attempt to determine whether the scanner's input source is interactive and
	* thus should be read a character at a time. When this option is used, however,
	* then no such call is made.
	* We declare this option because otherwise flex will generate a redundant
	* declaration of isatty(), which may lead to compile errors. */
	%option never-interactive


	/* C++ Code */
	%{
	#if defined(__GNUC__)
	#pragma GCC diagnostic ignored "-Wconversion"
	#pragma GCC diagnostic ignored "-Wtype-limits"
	#endif

	/* We define COMPILING_SCANNER in order to know in sql.parser.hh whether we are compiling
	* the parser or the scanner */
	#define COMPILING_SCANNER 1

	#include "sql.parser.hh"

	#include <string>

	/* import the parser's token type into a local typedef */
	typedef bison::SQLParser::token token;

	/* YY_USER_ACTION is called from the lex() function, which has the signature
	* and name as defined by macro YY_DECL. yylval, yylloc, and driver are
	* arguments. */
	#define YY_USER_ACTION preScannerAction(yylval, yylloc, driver);
	%}

	/* Definitions */
	CREATE_FUNCTION "CREATE"{SPACE}("OR"{SPACE}"REPLACE"{SPACE})?"FUNCTION"
	CREATE_AGGREGATE "CREATE"{SPACE}.*"AGGREGATE"
	COMMENT "--"[^\n\r]*(\n\|\r\n?)?
	BEGIN_CCOMMENT "/*"
	END_CCOMMENT ([^\]\|\[^/])"/"
	IDENTIFIER [[:alpha:]_][[:alnum:]_]*
	QUOTED_IDENTIFIER "\""{IDENTIFIER}"\""
	INTEGER [[:digit:]]+
	SPACE [[:space:]]+
	DOLLARQUOTE "$$"\|"$"{IDENTIFIER}"$"
	BEGIN_SPECIAL_COMMENT "/*+"
	END_SPECIAL_COMMENT "*/"
	STRING_LITERAL "'"([^']\|''\|\\')*"'"
	EXPONENT "e"("+"\|"-")?[[:digit:]]+
	FLOATING_POINT_LITERAL ([[:digit:]]+"."[[:digit:]]*\|"."[[:digit:]]+){EXPONENT}?\|[[:digit:]]+{EXPONENT}\|"'"("+"\|"-")?("NaN"\|"Infinity")"'"


	/* State definitions */

	%s sFUNC_DECL
	%s sFUNC_ARGLIST
	%s sFUNC_OPTIONS
	%s sAGG_DECL
	%s sAGG_ARGLIST
	%s sAGG_OPTIONS
	%x sDOLLAR_STRING_LITERAL
	%x sCCOMMENT



	%% /* Rules */

	/* Ignore spaces */
	{SPACE}

	{COMMENT} {
	/* only return as token if it is a Doxygen comment. Otherwise, ignore it. */
	if (yytext[2] == '!') {
	yytext[0] = yytext[1] = '/';
	yylval->str = static_cast<char *>( strdup(yytext) );
	return token::COMMENT;
	}
	}

	/* Since not all of Greenplum and PostgreSQL allow the following
	* - labeling arguments of aggregate functions,
	* - default arguments
	* we will simply uncomment C style comments in argument lists when they
	* begin with BEGIN_SPECIAL_COMMENT. */
	<sFUNC_ARGLIST,sAGG_ARGLIST>{
	{BEGIN_SPECIAL_COMMENT} { return token::BEGIN_SPECIAL; }
	{END_SPECIAL_COMMENT} { return token::END_SPECIAL; }
	}

	/* A C comment is split up into two parts. The reason is that flex tries to
	* match the longest rule and we want to give "normal" C comments a low
	* precedence according to this rule. */
	{BEGIN_CCOMMENT} {
	more();
	yy_push_state(sCCOMMENT);
	}

	<sCCOMMENT>{END_CCOMMENT} {
	yy_pop_state();

	/* only return as token if it is a Doxygen comment. Otherwise, ignore it. */
	if (yytext[2] == '*' \|\| yytext[2] == '!') {
	yylval->str = strdup(yytext);
	return token::COMMENT;
	}
	}

	<sDOLLAR_STRING_LITERAL>{
	{DOLLARQUOTE} {
	if (strncmp(yytext + 1, stringLiteralQuotation, yyleng - 1) == 0) {
	yylval->str = "\"<omitted by lexer>\"";
	yy_pop_state();
	free(stringLiteralQuotation);
	stringLiteralQuotation = NULL;
	return token::STRING_LITERAL;
	}
	}
	/* Speed up the lexer by matching large chunks of text if possible */
	[^$]*
	"$"
	}

	{CREATE_FUNCTION} { BEGIN(sFUNC_DECL); return token::CREATE_FUNCTION; }

	{CREATE_AGGREGATE} { BEGIN(sAGG_DECL); return token::CREATE_AGGREGATE; }

	<sFUNC_DECL,sAGG_DECL>"(" {
	if (YY_START == sFUNC_DECL)
	BEGIN(sFUNC_ARGLIST);
	else
	BEGIN(sAGG_ARGLIST);

	return '(';
	}
	<sFUNC_ARGLIST,sAGG_ARGLIST>")" {
	if (YY_START == sFUNC_ARGLIST)
	BEGIN(sFUNC_OPTIONS);
	else
	BEGIN(sAGG_OPTIONS);

	return ')';
	}

	/* We disallow using the following keywords as argument names */
	<sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_ARGLIST,sAGG_OPTIONS>{
	"IN" return token::IN;
	"OUT" return token::OUT;
	"INOUT" return token::INOUT;

	"BIT" return token::BIT;
	"CHARACTER" return token::CHARACTER;
	"DOUBLE" return token::DOUBLE;
	"PRECISION" return token::PRECISION;
	"TIME" return token::TIME;
	"WITH" return token::WITH;
	"WITHOUT" return token::WITHOUT;
	"VOID" return token::VOID;
	"VARYING" return token::VARYING;
	"ZONE" return token::ZONE;

	"RETURNS" return token::RETURNS;
	"SETOF" return token::SETOF;

	"AS" return token::AS;
	"LANGUAGE" return token::LANGUAGE;
	"IMMUTABLE" return token::IMMUTABLE;
	"STABLE" return token::STABLE;
	"VOLATILE" return token::VOLATILE;
	"CALLED"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT" return token::CALLED_ON_NULL_INPUT;
	"RETURNS"{SPACE}"NULL"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT"\|"STRICT" {
	return token::RETURNS_NULL_ON_NULL_INPUT; }
	("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"INVOKER" return token::SECURITY_INVOKER;
	("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"DEFINER" return token::SECURITY_DEFINER;

	"DEFAULT" return token::DEFAULT;
	}

	/* We disallow using the following keywords as argument names */
	<sAGG_ARGLIST,sAGG_OPTIONS>{
	"SFUNC" return token::SFUNC;
	"PREFUNC" return token::PREFUNC;
	"FINALFUNC" return token::FINALFUNC;
	"STYPE" return token::STYPE;
	"INITCOND" return token::INITCOND;
	"SORTOP" return token::SORTOP;
	}

	<sFUNC_DECL,sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_DECL,sAGG_ARGLIST,sAGG_OPTIONS>{
	"NULL" {
	yylval->str = strdup("NULL");
	return token::NULL_KEYWORD;
	}

	{QUOTED_IDENTIFIER} {
	yytext[yyleng - 1] = 0;
	yylval->str = strdup(yytext + 1);
	return token::IDENTIFIER;
	}
	{IDENTIFIER} { yylval->str = strlowerdup(yytext); return token::IDENTIFIER; }

	{INTEGER} {
	yylval->str = strdup(yytext);
	return token::INTEGER_LITERAL;
	}
	{FLOATING_POINT_LITERAL} {
	yylval->str = strdup(yytext);
	return token::FLOAT_LITERAL;
	}
	{STRING_LITERAL} {
	/* String literals in single quotes */
	yytext[0] = yytext[yyleng - 1] = '"';
	yylval->str = strdup(yytext);
	return token::STRING_LITERAL;
	}
	{DOLLARQUOTE} {
	/* String literals in dollar quotes, see
	http://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING */
	stringLiteralQuotation = static_cast<char *>( malloc(yyleng - 1) );
	strncpy(stringLiteralQuotation, yytext + 1, yyleng - 1);
	yy_push_state(sDOLLAR_STRING_LITERAL);
	}

	[^;] { return yytext[0]; }
	}

	";" { BEGIN(INITIAL); return ';'; }

	/* Default action if nothing else applies: consume next character and do nothing */
	.\|\n { BEGIN(INITIAL); }

	%%

	/* C++ code */

	namespace bison {

	/* The class declaration of SQLScanner is in sql.yy (because bison generates
	* the header file). */

	SQLScanner::SQLScanner(std::istream arg_yyin, std::ostream arg_yyout) :
	SQLFlexLexer(arg_yyin, arg_yyout), stringLiteralQuotation(NULL), oldLength(0) {
	/* only has an effect if %option debug or flex -d is used */
	set_debug(1);
	}

	SQLScanner::~SQLScanner() {
	}

	char SQLScanner::strlowerdup(const char inString) {
	char *returnStr = strdup(inString);
	for (int i = 0; returnStr[i]; i++)
	returnStr[i] = tolower(returnStr[i]);
	return returnStr;
	}

	void SQLScanner::preScannerAction(SQLParser::semantic_type * /* yylval */,
	SQLParser::location_type yylloc, SQLDriver /* driver */) {

	yylloc->step();

	// Start at oldLength: We don't want to count preserved text more than once
	for (size_t i = oldLength; i < yyleng; i++) {
	if (yytext[i] == '\r' &&
	static_cast<size_t>(i + 1) < yyleng &&
	yytext[i + 1] == '\n') {
	i++; yylloc->lines(1);
	} else if (yytext[i] == '\r' \|\| yytext[i] == '\n') {
	yylloc->lines(1);
	} else {
	yylloc->columns(1);
	}
	}

	// Reset oldLength. more() needs to be called if yytext is to be preserved
	// again
	oldLength = 0;
	}

	void SQLScanner::more() {
	oldLength = yyleng;
	yymore();
	}

	} // namespace bison