| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| **/ |
| |
| %option prefix="quickstep_yy" |
| %option warn nodefault |
| %option noyywrap noinput nounput |
| %option never-interactive nounistd |
| %option bison-bridge reentrant |
| %option yylineno |
| %option bison-locations |
| |
| %{ |
| #include <cstdio> |
| #include <string> |
| #include <vector> |
| |
| #include "parser/ParseJoinedTableReference.hpp" |
| #include "parser/ParseLiteralValue.hpp" |
| #include "utility/PtrList.hpp" |
| #include "utility/PtrVector.hpp" |
| |
| namespace quickstep { |
| |
| class BinaryOperation; |
| class Comparison; |
| class ParseAssignment; |
| class ParseAttribute; |
| class ParseAttributeDefinition; |
| class ParseBlockProperties; |
| class ParseColumnConstraint; |
| class ParseCommand; |
| struct ParseCopyFromParams; |
| class ParseDataType; |
| class ParseExpression; |
| struct ParseFrameInfo; |
| class ParseFunctionCall; |
| class ParseGroupBy; |
| class ParseHaving; |
| class ParseKeyIntegerValue; |
| class ParseKeyStringValue; |
| class ParseKeyStringList; |
| class ParseKeyValue; |
| class ParseLimit; |
| class ParseOrderBy; |
| class ParseOrderByItem; |
| class ParsePartitionClause; |
| class ParsePredicate; |
| class ParsePriority; |
| class ParseSample; |
| class ParseScalarLiteral; |
| class ParseSearchedWhenClause; |
| class ParseSelect; |
| class ParseSelectionClause; |
| class ParseSelectionItem; |
| class ParseSelectionItemScalar; |
| class ParseSelectionList; |
| class ParseSimpleTableReference; |
| class ParseSimpleWhenClause; |
| class ParseStringKeyLiteralValues; |
| class ParseStatement; |
| class ParseStatementCopyFrom; |
| class ParseStatementCreateTable; |
| class ParseStatementDelete; |
| class ParseStatementDropTable; |
| class ParseStatementInsert; |
| class ParseStatementInsertTuple; |
| class ParseStatementInsertSelection; |
| class ParseStatementSelect; |
| class ParseStatementQuit; |
| class ParseStatementUpdate; |
| class ParseSubqueryExpression; |
| class ParseSubqueryTableReference; |
| class ParseTableReference; |
| class ParseTableReferenceSignature; |
| class ParseWindow; |
| class Type; |
| class UnaryOperation; |
| |
| } // namespace quickstep |
| |
| #include "parser/ParseString.hpp" |
| #include "parser/SqlParser_gen.hpp" |
| #include "parser/ParserUtil.hpp" |
| |
| #define YY_USER_ACTION \ |
| { \ |
| yylloc->first_line = yylineno; \ |
| yylloc->first_column = yycolumn; \ |
| yycolumn += yyleng; \ |
| } |
| |
| %} |
| |
| %e 1024 |
| |
| /* FIXME(chasseur, qzeng): Add support for hexadecimal literals. */ |
| |
| /** |
| * These patterns are based on the SQL-2011 standard for syntax of numeric |
| * literals (Part 2, Section 5.3 of the standard). |
| **/ |
| sign [\-\+] |
| unsigned_integer [0-9]+ |
| signed_integer {sign}?{unsigned_integer} |
| exact_numeric_literal ({unsigned_integer}(\.{unsigned_integer}?)?)|(\.{unsigned_integer}) |
| approximate_numeric_literal {exact_numeric_literal}[eE]{signed_integer} |
| unsigned_numeric_literal {exact_numeric_literal}|{approximate_numeric_literal} |
| |
| %x CONDITION_SQL |
| %x CONDITION_COMMAND |
| %x CONDITION_STRING_SINGLE_QUOTED |
| %x CONDITION_STRING_SINGLE_QUOTED_ESCAPED |
| %x CONDITION_STRING_DOUBLE_QUOTED |
| |
| %% |
| |
| <INITIAL>{ |
| \\[a-zA-Z]+ { |
| /* A forward slash character represents a system command. */ |
| BEGIN(CONDITION_COMMAND); |
| yylval->string_value_ = new quickstep::ParseString( |
| yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng)); |
| return TOKEN_COMMAND; |
| } |
| |
| [^\\] { |
| /* This is a SQL command. Place the char back and process normally. */ |
| yyless(0); |
| yycolumn--; |
| BEGIN(CONDITION_SQL); |
| } |
| } |
| |
| <CONDITION_COMMAND>{ |
| [a-zA-Z0-9\-\.:]+ { |
| /* This is a command argument. */ |
| yylval->string_value_ = new quickstep::ParseString( |
| yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng)); |
| return TOKEN_COMMAND; |
| } |
| |
| [ \t]+ { |
| /* Ignore whitespace. */ |
| } |
| |
| [\n\r]+ { |
| /* Newline reverts the lexer to the initial state. */ |
| yycolumn = 0; |
| BEGIN(INITIAL); |
| return '\n'; |
| } |
| } |
| |
| <CONDITION_SQL>{ |
| "add" return TOKEN_ADD; |
| "all" return TOKEN_ALL; |
| "alter" return TOKEN_ALTER; |
| "and" return TOKEN_AND; |
| "as" return TOKEN_AS; |
| "asc" return TOKEN_ASC; |
| "ascending" return TOKEN_ASC; |
| "between" return TOKEN_BETWEEN; |
| "bigint" return TOKEN_BIGINT; |
| "bit" return TOKEN_BIT; |
| "bitweaving" return TOKEN_BITWEAVING; |
| "blockproperties" return TOKEN_BLOCKPROPERTIES; |
| "blocksample" return TOKEN_BLOCKSAMPLE; |
| "bloomfilter" return TOKEN_BLOOM_FILTER; |
| "case" return TOKEN_CASE; |
| "csbtree" return TOKEN_CSB_TREE; |
| "by" return TOKEN_BY; |
| "char" return TOKEN_CHARACTER; |
| "character" return TOKEN_CHARACTER; |
| "check" return TOKEN_CHECK; |
| "column" return TOKEN_COLUMN; |
| "constraint" return TOKEN_CONSTRAINT; |
| "copy" return TOKEN_COPY; |
| "create" return TOKEN_CREATE; |
| "current" return TOKEN_CURRENT; |
| "date" return TOKEN_DATE; |
| "datetime" return TOKEN_DATETIME; |
| "day" return TOKEN_DAY; |
| "decimal" return TOKEN_DECIMAL; |
| "default" return TOKEN_DEFAULT; |
| "delete" return TOKEN_DELETE; |
| "delimiter" return TOKEN_DELIMITER; |
| "desc" return TOKEN_DESC; |
| "descending" return TOKEN_DESC; |
| "distinct" return TOKEN_DISTINCT; |
| "double" return TOKEN_DOUBLE; |
| "drop" return TOKEN_DROP; |
| "else" return TOKEN_ELSE; |
| "end" return TOKEN_END; |
| "escape_strings" return TOKEN_ESCAPE_STRINGS; |
| "exists" return TOKEN_EXISTS; |
| "extract" return TOKEN_EXTRACT; |
| "false" return TOKEN_FALSE; |
| "first" return TOKEN_FIRST; |
| "float" return TOKEN_FLOAT; |
| "following" return TOKEN_FOLLOWING; |
| "for" return TOKEN_FOR; |
| "foreign" return TOKEN_FOREIGN; |
| "from" return TOKEN_FROM; |
| "full" return TOKEN_FULL; |
| "group" return TOKEN_GROUP; |
| "hash" return TOKEN_HASH; |
| "having" return TOKEN_HAVING; |
| "hour" return TOKEN_HOUR; |
| "in" return TOKEN_IN; |
| "index" return TOKEN_INDEX; |
| "inner" return TOKEN_INNER; |
| "insert" return TOKEN_INSERT; |
| "int" return TOKEN_INTEGER; |
| "integer" return TOKEN_INTEGER; |
| "interval" return TOKEN_INTERVAL; |
| "into" return TOKEN_INTO; |
| "is" return TOKEN_IS; |
| "join" return TOKEN_JOIN; |
| "key" return TOKEN_KEY; |
| "last" return TOKEN_LAST; |
| "left" return TOKEN_LEFT; |
| "like" return TOKEN_LIKE; |
| "limit" return TOKEN_LIMIT; |
| "long" return TOKEN_LONG; |
| "minute" return TOKEN_MINUTE; |
| "month" return TOKEN_MONTH; |
| "not" return TOKEN_NOT; |
| "null" return TOKEN_NULL; |
| "nulls" return TOKEN_NULLS; |
| "off" return TOKEN_OFF; |
| "on" return TOKEN_ON; |
| "or" return TOKEN_OR; |
| "order" return TOKEN_ORDER; |
| "outer" return TOKEN_OUTER; |
| "over" return TOKEN_OVER; |
| "partition" return TOKEN_PARTITION; |
| "partitions" return TOKEN_PARTITIONS; |
| "percent" return TOKEN_PERCENT; |
| "preceding" return TOKEN_PRECEDING; |
| "primary" return TOKEN_PRIMARY; |
| "priority" return TOKEN_PRIORITY; |
| "quit" return TOKEN_QUIT; |
| "range" return TOKEN_RANGE; |
| "real" return TOKEN_REAL; |
| "references" return TOKEN_REFERENCES; |
| "regexp" return TOKEN_REGEXP; |
| "right" return TOKEN_RIGHT; |
| "row" return TOKEN_ROW; |
| "row_delimiter" return TOKEN_ROW_DELIMITER; |
| "rows" return TOKEN_ROWS; |
| "second" return TOKEN_SECOND; |
| "select" return TOKEN_SELECT; |
| "set" return TOKEN_SET; |
| "sma" return TOKEN_SMA; |
| "smallint" return TOKEN_SMALLINT; |
| "substring" return TOKEN_SUBSTRING; |
| "table" return TOKEN_TABLE; |
| "then" return TOKEN_THEN; |
| "time" return TOKEN_TIME; |
| "timestamp" return TOKEN_TIMESTAMP; |
| "true" return TOKEN_TRUE; |
| "tuplesample" return TOKEN_TUPLESAMPLE; |
| "unbounded" return TOKEN_UNBOUNDED; |
| "unique" return TOKEN_UNIQUE; |
| "update" return TOKEN_UPDATE; |
| "using" return TOKEN_USING; |
| "values" return TOKEN_VALUES; |
| "varchar" return TOKEN_VARCHAR; |
| "when" return TOKEN_WHEN; |
| "where" return TOKEN_WHERE; |
| "window" return TOKEN_WINDOW; |
| "with" return TOKEN_WITH; |
| "year" return TOKEN_YEAR; |
| "yearmonth" return TOKEN_YEARMONTH; |
| |
| "=" return TOKEN_EQ; |
| "!=" return TOKEN_NEQ; |
| "<>" return TOKEN_NEQ; |
| "<" return TOKEN_LT; |
| ">" return TOKEN_GT; |
| "<=" return TOKEN_LEQ; |
| ">=" return TOKEN_GEQ; |
| |
| [-+*/%(),.;] return yytext[0]; |
| [\[\]] return yytext[0]; |
| |
| /** |
| * Quoted strings. Prefacing a string with an 'e' or 'E' causes escape |
| * sequences to be processed (as in PostgreSQL). |
| **/ |
| [eE]\' { |
| yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column); |
| BEGIN(CONDITION_STRING_SINGLE_QUOTED_ESCAPED); |
| } |
| |
| \' { |
| yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column); |
| BEGIN(CONDITION_STRING_SINGLE_QUOTED); |
| } |
| |
| \" { |
| yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column); |
| BEGIN(CONDITION_STRING_DOUBLE_QUOTED); |
| } |
| |
| } /* CONDITION_SQL */ |
| |
| /* Catch an unterminated string. */ |
| <CONDITION_STRING_SINGLE_QUOTED,CONDITION_STRING_SINGLE_QUOTED_ESCAPED,CONDITION_STRING_DOUBLE_QUOTED>{ |
| <<EOF>> { |
| delete yylval->string_value_; |
| BEGIN(INITIAL); |
| quickstep_yyerror(NULL, yyscanner, NULL, "unterminated string"); |
| return TOKEN_LEX_ERROR; |
| } |
| } |
| |
| /* Process escape sequences. */ |
| <CONDITION_STRING_SINGLE_QUOTED_ESCAPED>{ |
| \\[0-7]{1,3} { |
| /* Octal code */ |
| unsigned int code; |
| std::sscanf(yytext + 1, "%o", &code); |
| if (code > 0xff) { |
| delete yylval->string_value_; |
| BEGIN(INITIAL); |
| quickstep_yyerror(NULL, yyscanner, NULL, "octal escape sequence out of 1-byte range"); |
| return TOKEN_LEX_ERROR; |
| } |
| yylval->string_value_->push_back(code); |
| } |
| \\x[0-9a-fA-F]{1,2} { |
| /* Hexadecimal code */ |
| unsigned int code; |
| std::sscanf(yytext + 2, "%x", &code); |
| yylval->string_value_->push_back(code); |
| } |
| \\[0-9]+ { |
| /* A numeric escape sequence that isn't correctly specified. */ |
| delete yylval->string_value_; |
| BEGIN(INITIAL); |
| quickstep_yyerror(NULL, yyscanner, NULL, "bad numeric escape sequence (must be octal or hex)"); |
| return TOKEN_LEX_ERROR; |
| } |
| \\b { |
| /* Backspace */ |
| yylval->string_value_->push_back('\b'); |
| } |
| \\f { |
| /* Form-feed */ |
| yylval->string_value_->push_back('\f'); |
| } |
| \\n { |
| /* Newline */ |
| yylval->string_value_->push_back('\n'); |
| } |
| \\r { |
| /* Carriage-return */ |
| yylval->string_value_->push_back('\r'); |
| } |
| \\t { |
| /* Horizontal Tab */ |
| yylval->string_value_->push_back('\t'); |
| } |
| \\(.|\n|\r) { |
| /* Any other character (including actual newline or carriage return) */ |
| yylval->string_value_->push_back(yytext[1]); |
| } |
| \\ { |
| /* This should only be encountered right before an EOF. */ |
| delete yylval->string_value_; |
| BEGIN(INITIAL); |
| quickstep_yyerror(NULL, yyscanner, NULL, "unfinished escape sequence"); |
| return TOKEN_LEX_ERROR; |
| } |
| } |
| |
| <CONDITION_STRING_SINGLE_QUOTED,CONDITION_STRING_SINGLE_QUOTED_ESCAPED>{ |
| \'\' { |
| /* Two quotes in a row become a single quote (this is specified by the SQL standard). */ |
| yylval->string_value_->push_back('\''); |
| } |
| \' { |
| /* End string */ |
| BEGIN(CONDITION_SQL); |
| return TOKEN_STRING_SINGLE_QUOTED; |
| } |
| } |
| |
| <CONDITION_STRING_DOUBLE_QUOTED>{ |
| \"\" { |
| /* Two quotes in a row become a single quote (this is specified by the SQL standard). */ |
| yylval->string_value_->push_back('"'); |
| } |
| \" { |
| /* End string */ |
| BEGIN(CONDITION_SQL); |
| return TOKEN_STRING_DOUBLE_QUOTED; |
| } |
| } |
| |
| <CONDITION_STRING_SINGLE_QUOTED>[^\']+ { |
| /* Scan up to a quote. */ |
| yylval->string_value_->append(yytext, yyleng); |
| } |
| |
| <CONDITION_STRING_SINGLE_QUOTED_ESCAPED>[^\\\']+ { |
| /* Scan up to a quote or escape sequence. */ |
| yylval->string_value_->append(yytext, yyleng); |
| } |
| |
| <CONDITION_STRING_DOUBLE_QUOTED>[^\"]+ { |
| /* Scan up to a quote. */ |
| yylval->string_value_->append(yytext, yyleng); |
| } |
| |
| <CONDITION_SQL>{ |
| [A-Za-z][A-Za-z0-9_]* { |
| yylval->string_value_ = new quickstep::ParseString( |
| yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng)); |
| return TOKEN_NAME; |
| } |
| |
| {unsigned_numeric_literal} { |
| yylval->numeric_literal_value_ = new quickstep::NumericParseLiteralValue( |
| yylloc->first_line, yylloc->first_column, yytext); |
| return TOKEN_UNSIGNED_NUMVAL; |
| } |
| |
| "--".* /* comment */ |
| |
| [\n] { yycolumn = 0; } |
| |
| [ \r\t]+ ; /* ignore white space */ |
| |
| } /* CONDITION_SQL */ |
| |
| <INITIAL,CONDITION_COMMAND,CONDITION_SQL><<EOF>> { |
| /* All conditions except for mutli-state string extracting conditions. */ |
| BEGIN(INITIAL); |
| return TOKEN_EOF; |
| } |
| |
| <*>. { |
| BEGIN(INITIAL); |
| quickstep_yyerror(NULL, yyscanner, NULL, "illegal character"); |
| return TOKEN_LEX_ERROR; |
| } |
| |
| %% |