| /* ----------------------------------------------------------------------------- |
| * sql.ll |
| * |
| * A simple flex file for use in companion with sql.yy. Use case: Documenting |
| * .sql files with tools like Doxygen. |
| * |
| * Revision History: |
| * 0.2: Florian Schoppmann, 16 Jan 2011, Converted to C++ |
| * 0.1: " , 10 Jan 2011, Initial version. |
| * ----------------------------------------------------------------------------- |
| */ |
| |
| /* Definitions */ |
| |
| /* Use C++ */ |
| %option c++ |
| |
| /* instructs flex to generate a batch scanner, the opposite of interactive |
| * scanners */ |
| %option batch |
| |
| /* change the name of the scanner class. results in "SQLFlexLexer" */ |
| %option prefix="SQL" |
| |
| /* Generate a "case-insensitive" scanner. The case of letters given in the |
| * `flex' input patterns will be ignored, and tokens in the input will be |
| * matched regardless of case */ |
| %option case-insensitive |
| |
| /* makes the scanner not call `yywrap()' upon an end-of-file, but simply assume |
| * that there are no more files to scan */ |
| %option noyywrap |
| |
| /* We really use yymore, but only in more(). We need to provide this option |
| * because flex with otherwise complain: |
| * "error: 'yymore_used_but_not_detected' was not declared in this scope */ |
| %option yymore |
| |
| /* instructs flex to generate a scanner which never considers its input |
| * interactive. Normally, on each new input file the scanner calls isatty() in |
| * an attempt to determine whether the scanner's input source is interactive and |
| * thus should be read a character at a time. When this option is used, however, |
| * then no such call is made. |
| * We declare this option because otherwise flex will generate a redundant |
| * declaration of isatty(), which may lead to compile errors. */ |
| %option never-interactive |
| |
| |
| /* C++ Code */ |
| %{ |
| #if defined(__GNUC__) |
| #pragma GCC diagnostic ignored "-Wconversion" |
| #pragma GCC diagnostic ignored "-Wtype-limits" |
| #endif |
| |
| /* We define COMPILING_SCANNER in order to know in sql.parser.hh whether we are compiling |
| * the parser or the scanner */ |
| #define COMPILING_SCANNER 1 |
| |
| #include "sql.parser.hh" |
| |
| #include <string> |
| |
| /* import the parser's token type into a local typedef */ |
| typedef bison::SQLParser::token token; |
| |
| /* YY_USER_ACTION is called from the lex() function, which has the signature |
| * and name as defined by macro YY_DECL. yylval, yylloc, and driver are |
| * arguments. */ |
| #define YY_USER_ACTION preScannerAction(yylval, yylloc, driver); |
| %} |
| |
| /* Definitions */ |
| CREATE_FUNCTION "CREATE"{SPACE}("OR"{SPACE}"REPLACE"{SPACE})?"FUNCTION" |
| CREATE_AGGREGATE "CREATE"{SPACE}.*"AGGREGATE" |
| COMMENT "--"[^\n\r]*(\n|\r\n?)? |
| BEGIN_CCOMMENT "/*" |
| END_CCOMMENT ([^\*]|\*[^/])*"*/" |
| IDENTIFIER [[:alpha:]_][[:alnum:]_]* |
| QUOTED_IDENTIFIER "\""{IDENTIFIER}"\"" |
| INTEGER [[:digit:]]+ |
| SPACE [[:space:]]+ |
| DOLLARQUOTE "$$"|"$"{IDENTIFIER}"$" |
| BEGIN_SPECIAL_COMMENT "/*+" |
| END_SPECIAL_COMMENT "*/" |
| STRING_LITERAL "'"([^']|''|\\')*"'" |
| EXPONENT "e"("+"|"-")?[[:digit:]]+ |
| FLOATING_POINT_LITERAL ([[:digit:]]+"."[[:digit:]]*|"."[[:digit:]]+){EXPONENT}?|[[:digit:]]+{EXPONENT}|"'"("+"|"-")?("NaN"|"Infinity")"'" |
| |
| |
| /* State definitions */ |
| |
| %s sFUNC_DECL |
| %s sFUNC_ARGLIST |
| %s sFUNC_OPTIONS |
| %s sAGG_DECL |
| %s sAGG_ARGLIST |
| %s sAGG_OPTIONS |
| %x sDOLLAR_STRING_LITERAL |
| %x sCCOMMENT |
| |
| |
| |
| %% /* Rules */ |
| |
| /* Ignore spaces */ |
| {SPACE} |
| |
| {COMMENT} { |
| /* only return as token if it is a Doxygen comment. Otherwise, ignore it. */ |
| if (yytext[2] == '!') { |
| yytext[0] = yytext[1] = '/'; |
| yylval->str = static_cast<char *>( strdup(yytext) ); |
| return token::COMMENT; |
| } |
| } |
| |
| /* Since not all of Greenplum and PostgreSQL allow the following |
| * - labeling arguments of aggregate functions, |
| * - default arguments |
| * we will simply uncomment C style comments in argument lists when they |
| * begin with BEGIN_SPECIAL_COMMENT. */ |
| <sFUNC_ARGLIST,sAGG_ARGLIST>{ |
| {BEGIN_SPECIAL_COMMENT} { return token::BEGIN_SPECIAL; } |
| {END_SPECIAL_COMMENT} { return token::END_SPECIAL; } |
| } |
| |
| /* A C comment is split up into two parts. The reason is that flex tries to |
| * match the longest rule and we want to give "normal" C comments a low |
| * precedence according to this rule. */ |
| {BEGIN_CCOMMENT} { |
| more(); |
| yy_push_state(sCCOMMENT); |
| } |
| |
| <sCCOMMENT>{END_CCOMMENT} { |
| yy_pop_state(); |
| |
| /* only return as token if it is a Doxygen comment. Otherwise, ignore it. */ |
| if (yytext[2] == '*' || yytext[2] == '!') { |
| yylval->str = strdup(yytext); |
| return token::COMMENT; |
| } |
| } |
| |
| <sDOLLAR_STRING_LITERAL>{ |
| {DOLLARQUOTE} { |
| if (strncmp(yytext + 1, stringLiteralQuotation, yyleng - 1) == 0) { |
| yylval->str = "\"<omitted by lexer>\""; |
| yy_pop_state(); |
| free(stringLiteralQuotation); |
| stringLiteralQuotation = NULL; |
| return token::STRING_LITERAL; |
| } |
| } |
| /* Speed up the lexer by matching large chunks of text if possible */ |
| [^$]* |
| "$" |
| } |
| |
| {CREATE_FUNCTION} { BEGIN(sFUNC_DECL); return token::CREATE_FUNCTION; } |
| |
| {CREATE_AGGREGATE} { BEGIN(sAGG_DECL); return token::CREATE_AGGREGATE; } |
| |
| <sFUNC_DECL,sAGG_DECL>"(" { |
| if (YY_START == sFUNC_DECL) |
| BEGIN(sFUNC_ARGLIST); |
| else |
| BEGIN(sAGG_ARGLIST); |
| |
| return '('; |
| } |
| <sFUNC_ARGLIST,sAGG_ARGLIST>")" { |
| if (YY_START == sFUNC_ARGLIST) |
| BEGIN(sFUNC_OPTIONS); |
| else |
| BEGIN(sAGG_OPTIONS); |
| |
| return ')'; |
| } |
| |
| /* We disallow using the following keywords as argument names */ |
| <sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_ARGLIST,sAGG_OPTIONS>{ |
| "IN" return token::IN; |
| "OUT" return token::OUT; |
| "INOUT" return token::INOUT; |
| |
| "BIT" return token::BIT; |
| "CHARACTER" return token::CHARACTER; |
| "DOUBLE" return token::DOUBLE; |
| "PRECISION" return token::PRECISION; |
| "TIME" return token::TIME; |
| "WITH" return token::WITH; |
| "WITHOUT" return token::WITHOUT; |
| "VOID" return token::VOID; |
| "VARYING" return token::VARYING; |
| "ZONE" return token::ZONE; |
| |
| "RETURNS" return token::RETURNS; |
| "SETOF" return token::SETOF; |
| |
| "AS" return token::AS; |
| "LANGUAGE" return token::LANGUAGE; |
| "IMMUTABLE" return token::IMMUTABLE; |
| "STABLE" return token::STABLE; |
| "VOLATILE" return token::VOLATILE; |
| "CALLED"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT" return token::CALLED_ON_NULL_INPUT; |
| "RETURNS"{SPACE}"NULL"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT"|"STRICT" { |
| return token::RETURNS_NULL_ON_NULL_INPUT; } |
| ("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"INVOKER" return token::SECURITY_INVOKER; |
| ("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"DEFINER" return token::SECURITY_DEFINER; |
| |
| "DEFAULT" return token::DEFAULT; |
| } |
| |
| /* We disallow using the following keywords as argument names */ |
| <sAGG_ARGLIST,sAGG_OPTIONS>{ |
| "SFUNC" return token::SFUNC; |
| "PREFUNC" return token::PREFUNC; |
| "FINALFUNC" return token::FINALFUNC; |
| "STYPE" return token::STYPE; |
| "INITCOND" return token::INITCOND; |
| "SORTOP" return token::SORTOP; |
| } |
| |
| <sFUNC_DECL,sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_DECL,sAGG_ARGLIST,sAGG_OPTIONS>{ |
| "NULL" { |
| yylval->str = strdup("NULL"); |
| return token::NULL_KEYWORD; |
| } |
| |
| {QUOTED_IDENTIFIER} { |
| yytext[yyleng - 1] = 0; |
| yylval->str = strdup(yytext + 1); |
| return token::IDENTIFIER; |
| } |
| {IDENTIFIER} { yylval->str = strlowerdup(yytext); return token::IDENTIFIER; } |
| |
| {INTEGER} { |
| yylval->str = strdup(yytext); |
| return token::INTEGER_LITERAL; |
| } |
| {FLOATING_POINT_LITERAL} { |
| yylval->str = strdup(yytext); |
| return token::FLOAT_LITERAL; |
| } |
| {STRING_LITERAL} { |
| /* String literals in single quotes */ |
| yytext[0] = yytext[yyleng - 1] = '"'; |
| yylval->str = strdup(yytext); |
| return token::STRING_LITERAL; |
| } |
| {DOLLARQUOTE} { |
| /* String literals in dollar quotes, see |
| http://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING */ |
| stringLiteralQuotation = static_cast<char *>( malloc(yyleng - 1) ); |
| strncpy(stringLiteralQuotation, yytext + 1, yyleng - 1); |
| yy_push_state(sDOLLAR_STRING_LITERAL); |
| } |
| |
| [^;] { return yytext[0]; } |
| } |
| |
| ";" { BEGIN(INITIAL); return ';'; } |
| |
| /* Default action if nothing else applies: consume next character and do nothing */ |
| .|\n { BEGIN(INITIAL); } |
| |
| %% |
| |
| /* C++ code */ |
| |
| namespace bison { |
| |
| /* The class declaration of SQLScanner is in sql.yy (because bison generates |
| * the header file). */ |
| |
| SQLScanner::SQLScanner(std::istream *arg_yyin, std::ostream *arg_yyout) : |
| SQLFlexLexer(arg_yyin, arg_yyout), stringLiteralQuotation(NULL), oldLength(0) { |
| /* only has an effect if %option debug or flex -d is used */ |
| set_debug(1); |
| } |
| |
| SQLScanner::~SQLScanner() { |
| } |
| |
| char *SQLScanner::strlowerdup(const char *inString) { |
| char *returnStr = strdup(inString); |
| for (int i = 0; returnStr[i]; i++) |
| returnStr[i] = tolower(returnStr[i]); |
| return returnStr; |
| } |
| |
| void SQLScanner::preScannerAction(SQLParser::semantic_type * /* yylval */, |
| SQLParser::location_type *yylloc, SQLDriver * /* driver */) { |
| |
| yylloc->step(); |
| |
| // Start at oldLength: We don't want to count preserved text more than once |
| for (size_t i = oldLength; i < yyleng; i++) { |
| if (yytext[i] == '\r' && |
| static_cast<size_t>(i + 1) < yyleng && |
| yytext[i + 1] == '\n') { |
| i++; yylloc->lines(1); |
| } else if (yytext[i] == '\r' || yytext[i] == '\n') { |
| yylloc->lines(1); |
| } else { |
| yylloc->columns(1); |
| } |
| } |
| |
| // Reset oldLength. more() needs to be called if yytext is to be preserved |
| // again |
| oldLength = 0; |
| } |
| |
| void SQLScanner::more() { |
| oldLength = yyleng; |
| yymore(); |
| } |
| |
| } // namespace bison |