/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 **/

%option prefix="quickstep_yy"
%option warn nodefault
%option noyywrap noinput nounput
%option never-interactive nounistd
%option bison-bridge reentrant
%option yylineno
%option bison-locations

%{
#include <cstdio>
#include <string>
#include <vector>

#include "parser/ParseJoinedTableReference.hpp"
#include "parser/ParseLiteralValue.hpp"
#include "utility/PtrList.hpp"
#include "utility/PtrVector.hpp"

namespace quickstep {

class BinaryOperation;
class Comparison;
class ParseAssignment;
class ParseAttribute;
class ParseAttributeDefinition;
class ParseBlockProperties;
class ParseColumnConstraint;
class ParseCommand;
class ParseDataType;
class ParseExpression;
struct ParseFrameInfo;
class ParseFunctionCall;
class ParseGroupBy;
class ParseHaving;
class ParseKeyBoolValue;
class ParseKeyIntegerValue;
class ParseKeyStringValue;
class ParseKeyStringList;
class ParseKeyValue;
class ParseLimit;
class ParseOrderBy;
class ParseOrderByItem;
class ParsePartitionClause;
class ParsePredicate;
class ParsePriority;
class ParseSample;
class ParseScalarLiteral;
class ParseSearchedWhenClause;
class ParseSelect;
class ParseSelectionClause;
class ParseSelectionItem;
class ParseSelectionItemScalar;
class ParseSelectionList;
class ParseSetOperation;
class ParseSimpleTableReference;
class ParseSimpleWhenClause;
class ParseStringKeyLiteralValues;
class ParseStatement;
class ParseStatementCopy;
class ParseStatementCreateTable;
class ParseStatementDelete;
class ParseStatementDropTable;
class ParseStatementInsert;
class ParseStatementInsertTuple;
class ParseStatementInsertSelection;
class ParseStatementSetOperation;
class ParseStatementQuit;
class ParseStatementUpdate;
class ParseSubqueryExpression;
class ParseSubqueryTableReference;
class ParseTableReference;
class ParseTableReferenceSignature;
class ParseWindow;
class Type;
class UnaryOperation;

}  // namespace quickstep

#include "parser/ParseString.hpp"
#include "parser/SqlParser_gen.hpp"
#include "parser/ParserUtil.hpp"

#define YY_USER_ACTION                                    \
  {                                                       \
    yylloc->first_line = yylineno;                        \
    yylloc->first_column = yycolumn;                      \
    yycolumn += yyleng;                                   \
  }

%}

%e 1024

/* FIXME(chasseur, qzeng): Add support for hexadecimal literals. */

/**
 * These patterns are based on the SQL-2011 standard for syntax of numeric
 * literals (Part 2, Section 5.3 of the standard).
 **/
sign [\-\+]
unsigned_integer [0-9]+
signed_integer {sign}?{unsigned_integer}
exact_numeric_literal ({unsigned_integer}(\.{unsigned_integer}?)?)|(\.{unsigned_integer})
approximate_numeric_literal {exact_numeric_literal}[eE]{signed_integer}
unsigned_numeric_literal {exact_numeric_literal}|{approximate_numeric_literal}

%x CONDITION_SQL
%x CONDITION_COMMAND
%x CONDITION_STRING_SINGLE_QUOTED
%x CONDITION_STRING_SINGLE_QUOTED_ESCAPED
%x CONDITION_STRING_DOUBLE_QUOTED

%%

<INITIAL>{
  \\[a-zA-Z]+ {
    /* A forward slash character represents a system command. */
    BEGIN(CONDITION_COMMAND);
    yylval->string_value_ = new quickstep::ParseString(
        yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng));
    return TOKEN_COMMAND;
  }

  [^\\] {
    /* This is a SQL command. Place the char back and process normally. */
    yyless(0);
    yycolumn--;
    BEGIN(CONDITION_SQL);
  }
}

<CONDITION_COMMAND>{
  [a-zA-Z0-9_\-\.:]+ {
    /* This is a command argument. */
    yylval->string_value_ = new quickstep::ParseString(
        yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng));
    return TOKEN_COMMAND;
  }

  [ \t]+ {
    /* Ignore whitespace. */
  }

  [\n\r]+ {
    /* Newline reverts the lexer to the initial state. */
    yycolumn = 0;
    BEGIN(INITIAL);
    return '\n';
  }
}

<CONDITION_SQL>{
  "add"              return TOKEN_ADD;
  "all"              return TOKEN_ALL;
  "alter"            return TOKEN_ALTER;
  "and"              return TOKEN_AND;
  "as"               return TOKEN_AS;
  "asc"              return TOKEN_ASC;
  "ascending"        return TOKEN_ASC;
  "between"          return TOKEN_BETWEEN;
  "bigint"           return TOKEN_BIGINT;
  "bit"              return TOKEN_BIT;
  "bitweaving"       return TOKEN_BITWEAVING;
  "blockproperties"  return TOKEN_BLOCKPROPERTIES;
  "blocksample"      return TOKEN_BLOCKSAMPLE;
  "bloomfilter"      return TOKEN_BLOOM_FILTER;
  "case"             return TOKEN_CASE;
  "csbtree"          return TOKEN_CSB_TREE;
  "by"               return TOKEN_BY;
  "char"             return TOKEN_CHARACTER;
  "character"        return TOKEN_CHARACTER;
  "check"            return TOKEN_CHECK;
  "column"           return TOKEN_COLUMN;
  "constraint"       return TOKEN_CONSTRAINT;
  "copy"             return TOKEN_COPY;
  "create"           return TOKEN_CREATE;
  "current"          return TOKEN_CURRENT;
  "date"             return TOKEN_DATE;
  "datetime"         return TOKEN_DATETIME;
  "day"              return TOKEN_DAY;
  "decimal"          return TOKEN_DECIMAL;
  "default"          return TOKEN_DEFAULT;
  "delete"           return TOKEN_DELETE;
  "desc"             return TOKEN_DESC;
  "descending"       return TOKEN_DESC;
  "distinct"         return TOKEN_DISTINCT;
  "double"           return TOKEN_DOUBLE;
  "drop"             return TOKEN_DROP;
  "else"             return TOKEN_ELSE;
  "end"              return TOKEN_END;
  "exists"           return TOKEN_EXISTS;
  "extract"          return TOKEN_EXTRACT;
  "false"            return TOKEN_FALSE;
  "first"            return TOKEN_FIRST;
  "float"            return TOKEN_FLOAT;
  "following"        return TOKEN_FOLLOWING;
  "for"              return TOKEN_FOR;
  "foreign"          return TOKEN_FOREIGN;
  "from"             return TOKEN_FROM;
  "full"             return TOKEN_FULL;
  "group"            return TOKEN_GROUP;
  "hash"             return TOKEN_HASH;
  "having"           return TOKEN_HAVING;
  "hour"             return TOKEN_HOUR;
  "in"               return TOKEN_IN;
  "index"            return TOKEN_INDEX;
  "inner"            return TOKEN_INNER;
  "insert"           return TOKEN_INSERT;
  "int"              return TOKEN_INTEGER;
  "integer"          return TOKEN_INTEGER;
  "intersect"        return TOKEN_INTERSECT;
  "interval"         return TOKEN_INTERVAL;
  "into"             return TOKEN_INTO;
  "is"               return TOKEN_IS;
  "join"             return TOKEN_JOIN;
  "key"              return TOKEN_KEY;
  "last"             return TOKEN_LAST;
  "left"             return TOKEN_LEFT;
  "like"             return TOKEN_LIKE;
  "limit"            return TOKEN_LIMIT;
  "long"             return TOKEN_LONG;
  "minute"           return TOKEN_MINUTE;
  "month"            return TOKEN_MONTH;
  "not"              return TOKEN_NOT;
  "null"             return TOKEN_NULL;
  "nulls"            return TOKEN_NULLS;
  "off"              return TOKEN_OFF;
  "on"               return TOKEN_ON;
  "or"               return TOKEN_OR;
  "order"            return TOKEN_ORDER;
  "outer"            return TOKEN_OUTER;
  "over"             return TOKEN_OVER;
  "partition"        return TOKEN_PARTITION;
  "partitions"       return TOKEN_PARTITIONS;
  "percent"          return TOKEN_PERCENT;
  "preceding"        return TOKEN_PRECEDING;
  "primary"          return TOKEN_PRIMARY;
  "priority"         return TOKEN_PRIORITY;
  "quit"             return TOKEN_QUIT;
  "range"            return TOKEN_RANGE;
  "real"             return TOKEN_REAL;
  "references"       return TOKEN_REFERENCES;
  "regexp"           return TOKEN_REGEXP;
  "right"            return TOKEN_RIGHT;
  "row"              return TOKEN_ROW;
  "row_delimiter"    return TOKEN_ROW_DELIMITER;
  "rows"             return TOKEN_ROWS;
  "second"           return TOKEN_SECOND;
  "select"           return TOKEN_SELECT;
  "set"              return TOKEN_SET;
  "sma"              return TOKEN_SMA;
  "smallint"         return TOKEN_SMALLINT;
  "stderr"           return TOKEN_STDERR;
  "stdout"           return TOKEN_STDOUT;
  "substring"        return TOKEN_SUBSTRING;
  "table"            return TOKEN_TABLE;
  "then"             return TOKEN_THEN;
  "time"             return TOKEN_TIME;
  "timestamp"        return TOKEN_TIMESTAMP;
  "to"               return TOKEN_TO;
  "true"             return TOKEN_TRUE;
  "tuplesample"      return TOKEN_TUPLESAMPLE;
  "unbounded"        return TOKEN_UNBOUNDED;
  "union"            return TOKEN_UNION;
  "unique"           return TOKEN_UNIQUE;
  "update"           return TOKEN_UPDATE;
  "using"            return TOKEN_USING;
  "values"           return TOKEN_VALUES;
  "varchar"          return TOKEN_VARCHAR;
  "when"             return TOKEN_WHEN;
  "where"            return TOKEN_WHERE;
  "window"           return TOKEN_WINDOW;
  "with"             return TOKEN_WITH;
  "year"             return TOKEN_YEAR;
  "yearmonth"        return TOKEN_YEARMONTH;

  "="                return TOKEN_EQ;
  "!="               return TOKEN_NEQ;
  "<>"               return TOKEN_NEQ;
  "<"                return TOKEN_LT;
  ">"                return TOKEN_GT;
  "<="               return TOKEN_LEQ;
  ">="               return TOKEN_GEQ;

  [-+*/%(),.;]       return yytext[0];
  [\[\]]             return yytext[0];

   /**
    * Quoted strings. Prefacing a string with an 'e' or 'E' causes escape
    * sequences to be processed (as in PostgreSQL).
    **/
  [eE]\' {
    yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column);
    BEGIN(CONDITION_STRING_SINGLE_QUOTED_ESCAPED);
  }

  \' {
    yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column);
    BEGIN(CONDITION_STRING_SINGLE_QUOTED);
  }

  \" {
    yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column);
    BEGIN(CONDITION_STRING_DOUBLE_QUOTED);
  }

} /* CONDITION_SQL */

 /* Catch an unterminated string. */
<CONDITION_STRING_SINGLE_QUOTED,CONDITION_STRING_SINGLE_QUOTED_ESCAPED,CONDITION_STRING_DOUBLE_QUOTED>{
  <<EOF>> {
    delete yylval->string_value_;
    BEGIN(INITIAL);
    quickstep_yyerror(NULL, yyscanner, NULL, "unterminated string");
    return TOKEN_LEX_ERROR;
  }
}

 /* Process escape sequences. */
<CONDITION_STRING_SINGLE_QUOTED_ESCAPED>{
  \\[0-7]{1,3} {
    /* Octal code */
    unsigned int code;
    std::sscanf(yytext + 1, "%o", &code);
    if (code > 0xff) {
      delete yylval->string_value_;
      BEGIN(INITIAL);
      quickstep_yyerror(NULL, yyscanner, NULL, "octal escape sequence out of 1-byte range");
      return TOKEN_LEX_ERROR;
    }
    yylval->string_value_->push_back(code);
  }
  \\x[0-9a-fA-F]{1,2} {
    /* Hexadecimal code */
    unsigned int code;
    std::sscanf(yytext + 2, "%x", &code);
    yylval->string_value_->push_back(code);
  }
  \\[0-9]+ {
    /* A numeric escape sequence that isn't correctly specified. */
    delete yylval->string_value_;
    BEGIN(INITIAL);
    quickstep_yyerror(NULL, yyscanner, NULL, "bad numeric escape sequence (must be octal or hex)");
    return TOKEN_LEX_ERROR;
  }
  \\b {
    /* Backspace */
    yylval->string_value_->push_back('\b');
  }
  \\f {
    /* Form-feed */
    yylval->string_value_->push_back('\f');
  }
  \\n {
    /* Newline */
    yylval->string_value_->push_back('\n');
  }
  \\r {
    /* Carriage-return */
    yylval->string_value_->push_back('\r');
  }
  \\t {
    /* Horizontal Tab */
    yylval->string_value_->push_back('\t');
  }
  \\(.|\n|\r) {
    /* Any other character (including actual newline or carriage return) */
    yylval->string_value_->push_back(yytext[1]);
  }
  \\ {
    /* This should only be encountered right before an EOF. */
    delete yylval->string_value_;
    BEGIN(INITIAL);
    quickstep_yyerror(NULL, yyscanner, NULL, "unfinished escape sequence");
    return TOKEN_LEX_ERROR;
  }
}

<CONDITION_STRING_SINGLE_QUOTED,CONDITION_STRING_SINGLE_QUOTED_ESCAPED>{
  \'\' {
    /* Two quotes in a row become a single quote (this is specified by the SQL standard). */
    yylval->string_value_->push_back('\'');
  }
  \' {
    /* End string */
    BEGIN(CONDITION_SQL);
    return TOKEN_STRING_SINGLE_QUOTED;
  }
}

<CONDITION_STRING_DOUBLE_QUOTED>{
  \"\" {
    /* Two quotes in a row become a single quote (this is specified by the SQL standard). */
    yylval->string_value_->push_back('"');
  }
  \" {
    /* End string */
    BEGIN(CONDITION_SQL);
    return TOKEN_STRING_DOUBLE_QUOTED;
  }
}

<CONDITION_STRING_SINGLE_QUOTED>[^\']+ {
  /* Scan up to a quote. */
  yylval->string_value_->append(yytext, yyleng);
}

<CONDITION_STRING_SINGLE_QUOTED_ESCAPED>[^\\\']+ {
  /* Scan up to a quote or escape sequence. */
  yylval->string_value_->append(yytext, yyleng);
}

<CONDITION_STRING_DOUBLE_QUOTED>[^\"]+ {
  /* Scan up to a quote. */
  yylval->string_value_->append(yytext, yyleng);
}

<CONDITION_SQL>{
  [A-Za-z][A-Za-z0-9_]* {
    yylval->string_value_ = new quickstep::ParseString(
        yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng));
    return TOKEN_NAME;
  }

  {unsigned_numeric_literal} {
    yylval->numeric_literal_value_ = new quickstep::NumericParseLiteralValue(
        yylloc->first_line, yylloc->first_column, yytext);
    return TOKEN_UNSIGNED_NUMVAL;
  }

  "--".* /* comment */

  [\n] { yycolumn = 0; }

  [ \r\t]+ ; /* ignore white space */

} /* CONDITION_SQL */

<INITIAL,CONDITION_COMMAND,CONDITION_SQL><<EOF>> {
  /* All conditions except for mutli-state string extracting conditions. */
  BEGIN(INITIAL);
  return TOKEN_EOF;
}

<*>. {
  BEGIN(INITIAL);
  quickstep_yyerror(NULL, yyscanner, NULL, "illegal character");
  return TOKEN_LEX_ERROR;
}

%%
