/* -----------------------------------------------------------------------------
 * sql.ll
 *
 * A simple flex file for use in companion with sql.yy. Use case: Documenting
 * .sql files with tools like Doxygen.
 *
 * Revision History:
 * 0.2: Florian Schoppmann, 16 Jan 2011, Converted to C++
 * 0.1:          "        , 10 Jan 2011, Initial version.
 * -----------------------------------------------------------------------------
 */

/* Definitions */

/* Use C++ */
%option c++

/* instructs flex to generate a batch scanner, the opposite of interactive
 * scanners */
%option batch

/* change the name of the scanner class. results in "SQLFlexLexer" */
%option prefix="SQL"

/* Generate a "case-insensitive" scanner. The case of letters given in the
 * `flex' input patterns will be ignored, and tokens in the input will be
 * matched regardless of case */
%option case-insensitive

/* makes the scanner not call `yywrap()' upon an end-of-file, but simply assume
 * that there are no more files to scan */
%option noyywrap

/* We really use yymore, but only in more(). We need to provide this option
 * because flex with otherwise complain:
 * "error: 'yymore_used_but_not_detected' was not declared in this scope */
%option yymore

/* instructs flex to generate a scanner which never considers its input
 * interactive. Normally, on each new input file the scanner calls isatty() in
 * an attempt to determine whether the scanner's input source is interactive and
 * thus should be read a character at a time. When this option is used, however,
 * then no such call is made.
 * We declare this option because otherwise flex will generate a redundant
 * declaration of isatty(), which may lead to compile errors. */
%option never-interactive


/* C++ Code */
%{
    #if defined(__GNUC__)
        #pragma GCC diagnostic ignored "-Wconversion"
        #pragma GCC diagnostic ignored "-Wtype-limits"
    #endif

    /* We define COMPILING_SCANNER in order to know in sql.parser.hh whether we are compiling
     * the parser or the scanner */
    #define COMPILING_SCANNER 1
    
    #include "sql.parser.hh"

    #include <string>

    /* import the parser's token type into a local typedef */
    typedef bison::SQLParser::token    token;
    
    /* YY_USER_ACTION is called from the lex() function, which has the signature
     * and name as defined by macro YY_DECL. yylval, yylloc, and driver are
     * arguments. */
    #define YY_USER_ACTION preScannerAction(yylval, yylloc, driver);
%}

/* Definitions */
CREATE_FUNCTION "CREATE"{SPACE}("OR"{SPACE}"REPLACE"{SPACE})?"FUNCTION"
CREATE_AGGREGATE "CREATE"{SPACE}.*"AGGREGATE"
COMMENT "--"[^\n\r]*(\n|\r\n?)?
BEGIN_CCOMMENT "/*"
END_CCOMMENT ([^\*]|\*[^/])*"*/"
IDENTIFIER [[:alpha:]_][[:alnum:]_]*
QUOTED_IDENTIFIER "\""{IDENTIFIER}"\""
INTEGER [[:digit:]]+
SPACE [[:space:]]+
DOLLARQUOTE "$$"|"$"{IDENTIFIER}"$"
BEGIN_SPECIAL_COMMENT "/*+"
END_SPECIAL_COMMENT "*/"
STRING_LITERAL "'"([^']|''|\\')*"'"
EXPONENT "e"("+"|"-")?[[:digit:]]+
FLOATING_POINT_LITERAL ([[:digit:]]+"."[[:digit:]]*|"."[[:digit:]]+){EXPONENT}?|[[:digit:]]+{EXPONENT}|"'"("+"|"-")?("NaN"|"Infinity")"'"


/* State definitions */

%s sFUNC_DECL
%s sFUNC_ARGLIST
%s sFUNC_OPTIONS
%s sAGG_DECL
%s sAGG_ARGLIST
%s sAGG_OPTIONS
%x sDOLLAR_STRING_LITERAL
%x sCCOMMENT



%%    /* Rules */

    /* Ignore spaces */
{SPACE}

{COMMENT} {
    /* only return as token if it is a Doxygen comment. Otherwise, ignore it. */
    if (yytext[2] == '!') {
        yytext[0] = yytext[1] = '/';
        yylval->str = static_cast<char *>( strdup(yytext) );
        return token::COMMENT;
    }
}

    /* Since not all of Greenplum and PostgreSQL allow the following
     * - labeling arguments of aggregate functions,
     * - default arguments
     * we will simply uncomment C style comments in argument lists when they
     * begin with BEGIN_SPECIAL_COMMENT. */
<sFUNC_ARGLIST,sAGG_ARGLIST>{
    {BEGIN_SPECIAL_COMMENT} { return token::BEGIN_SPECIAL; }
    {END_SPECIAL_COMMENT} { return token::END_SPECIAL; }
}

    /* A C comment is split up into two parts. The reason is that flex tries to
     * match the longest rule and we want to give "normal" C comments a low
     * precedence according to this rule. */
{BEGIN_CCOMMENT} {
    more();
    yy_push_state(sCCOMMENT);
}

<sCCOMMENT>{END_CCOMMENT} {
    yy_pop_state();

    /* only return as token if it is a Doxygen comment. Otherwise, ignore it. */
    if (yytext[2] == '*' || yytext[2] == '!') {
        yylval->str = strdup(yytext);
        return token::COMMENT;
    }
}

<sDOLLAR_STRING_LITERAL>{
    {DOLLARQUOTE} {
        if (strncmp(yytext + 1, stringLiteralQuotation, yyleng - 1) == 0) {
            yylval->str = "\"<omitted by lexer>\"";
            yy_pop_state();
            free(stringLiteralQuotation);
            stringLiteralQuotation = NULL;
            return token::STRING_LITERAL;
        }
    }
    /* Speed up the lexer by matching large chunks of text if possible */
    [^$]*
    "$"
}

{CREATE_FUNCTION} { BEGIN(sFUNC_DECL); return token::CREATE_FUNCTION; }

{CREATE_AGGREGATE} { BEGIN(sAGG_DECL); return token::CREATE_AGGREGATE; }

<sFUNC_DECL,sAGG_DECL>"(" {
    if (YY_START == sFUNC_DECL)
        BEGIN(sFUNC_ARGLIST);
    else
        BEGIN(sAGG_ARGLIST);

    return '(';
}
<sFUNC_ARGLIST,sAGG_ARGLIST>")" {
    if (YY_START == sFUNC_ARGLIST)
        BEGIN(sFUNC_OPTIONS);
    else
        BEGIN(sAGG_OPTIONS);

    return ')';
}

    /* We disallow using the following keywords as argument names */
<sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_ARGLIST,sAGG_OPTIONS>{
    "IN" return token::IN;
    "OUT" return token::OUT;
    "INOUT" return token::INOUT;
    
    "BIT" return token::BIT;
    "CHARACTER" return token::CHARACTER;
    "DOUBLE" return token::DOUBLE;
    "PRECISION" return token::PRECISION;
    "TIME" return token::TIME;
    "WITH" return token::WITH;
    "WITHOUT" return token::WITHOUT;
    "VOID" return token::VOID;
    "VARYING" return token::VARYING;
    "ZONE" return token::ZONE;

    "RETURNS" return token::RETURNS;
    "SETOF" return token::SETOF;
    
    "AS" return token::AS;
    "LANGUAGE" return token::LANGUAGE;
    "IMMUTABLE" return token::IMMUTABLE;
    "STABLE" return token::STABLE;
    "VOLATILE" return token::VOLATILE;
    "CALLED"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT" return token::CALLED_ON_NULL_INPUT;
    "RETURNS"{SPACE}"NULL"{SPACE}"ON"{SPACE}"NULL"{SPACE}"INPUT"|"STRICT" {
        return token::RETURNS_NULL_ON_NULL_INPUT; }
    ("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"INVOKER" return token::SECURITY_INVOKER;
    ("EXTERNAL"{SPACE})?"SECURITY"{SPACE}"DEFINER" return token::SECURITY_DEFINER;

    "DEFAULT" return token::DEFAULT;
}

    /* We disallow using the following keywords as argument names */
<sAGG_ARGLIST,sAGG_OPTIONS>{
    "SFUNC" return token::SFUNC;
    "PREFUNC" return token::PREFUNC;
    "FINALFUNC" return token::FINALFUNC;
    "STYPE" return token::STYPE;
    "INITCOND" return token::INITCOND;
    "SORTOP" return token::SORTOP;
}

<sFUNC_DECL,sFUNC_ARGLIST,sFUNC_OPTIONS,sAGG_DECL,sAGG_ARGLIST,sAGG_OPTIONS>{
    "NULL" {
        yylval->str = strdup("NULL");
        return token::NULL_KEYWORD;
    }

    {QUOTED_IDENTIFIER} {
        yytext[yyleng - 1] = 0;
        yylval->str = strdup(yytext + 1);
        return token::IDENTIFIER;
    }
    {IDENTIFIER} { yylval->str = strlowerdup(yytext); return token::IDENTIFIER; }

    {INTEGER} {
        yylval->str = strdup(yytext);
        return token::INTEGER_LITERAL;
    }
    {FLOATING_POINT_LITERAL} {
        yylval->str = strdup(yytext);
        return token::FLOAT_LITERAL;
    }
    {STRING_LITERAL} {
        /* String literals in single quotes */
        yytext[0] = yytext[yyleng - 1] = '"';
        yylval->str = strdup(yytext);
        return token::STRING_LITERAL;
    }
    {DOLLARQUOTE} {
        /* String literals in dollar quotes, see
        http://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-DOLLAR-QUOTING */
        stringLiteralQuotation = static_cast<char *>( malloc(yyleng - 1) );
        strncpy(stringLiteralQuotation, yytext + 1, yyleng - 1);
        yy_push_state(sDOLLAR_STRING_LITERAL);
    }

    [^;] { return yytext[0]; }
}

";" { BEGIN(INITIAL); return ';'; }

    /* Default action if nothing else applies: consume next character and do nothing */
.|\n { BEGIN(INITIAL); }

%%

/* C++ code */

namespace bison {

/* The class declaration of SQLScanner is in sql.yy (because bison generates
 * the header file). */

SQLScanner::SQLScanner(std::istream *arg_yyin, std::ostream *arg_yyout) :
    SQLFlexLexer(arg_yyin, arg_yyout), stringLiteralQuotation(NULL), oldLength(0) {
    /* only has an effect if %option debug or flex -d is used */
    set_debug(1);
}

SQLScanner::~SQLScanner() {
}

char *SQLScanner::strlowerdup(const char *inString) {
    char *returnStr = strdup(inString);
    for (int i = 0; returnStr[i]; i++)
        returnStr[i] = tolower(returnStr[i]);
    return returnStr;
}

void SQLScanner::preScannerAction(SQLParser::semantic_type * /* yylval */,
    SQLParser::location_type *yylloc, SQLDriver * /* driver */) {
    
    yylloc->step();

    // Start at oldLength: We don't want to count preserved text more than once
    for (size_t i = oldLength; i < yyleng; i++) {
        if (yytext[i] == '\r' &&
                static_cast<size_t>(i + 1) < yyleng &&
                yytext[i + 1] == '\n') {
            i++; yylloc->lines(1);
        } else if (yytext[i] == '\r' || yytext[i] == '\n') {
            yylloc->lines(1);
        } else {
            yylloc->columns(1);
        }
    }

    // Reset oldLength. more() needs to be called if yytext is to be preserved
    // again
    oldLength = 0;
}

void SQLScanner::more() {
    oldLength = yyleng;
    yymore();
}

} // namespace bison
