blob: c6402e0fa49e23f57aad85fca1d9c591d0123e0c [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
%option prefix="quickstep_yy"
%option warn nodefault
%option noyywrap noinput nounput
%option never-interactive nounistd
%option bison-bridge reentrant
%option yylineno
%option bison-locations
%{
#include <cstdio>
#include <string>
#include <vector>
#include "parser/ParseJoinedTableReference.hpp"
#include "parser/ParseLiteralValue.hpp"
#include "utility/PtrList.hpp"
#include "utility/PtrVector.hpp"
namespace quickstep {
class BinaryOperation;
class Comparison;
class ParseAssignment;
class ParseAttribute;
class ParseAttributeDefinition;
class ParseBlockProperties;
class ParseColumnConstraint;
class ParseCommand;
struct ParseCopyFromParams;
class ParseDataType;
class ParseExpression;
struct ParseFrameInfo;
class ParseFunctionCall;
class ParseGroupBy;
class ParseHaving;
class ParseKeyIntegerValue;
class ParseKeyStringValue;
class ParseKeyStringList;
class ParseKeyValue;
class ParseLimit;
class ParseOrderBy;
class ParseOrderByItem;
class ParsePartitionClause;
class ParsePredicate;
class ParsePriority;
class ParseSample;
class ParseScalarLiteral;
class ParseSearchedWhenClause;
class ParseSelect;
class ParseSelectionClause;
class ParseSelectionItem;
class ParseSelectionItemScalar;
class ParseSelectionList;
class ParseSetOperation;
class ParseSimpleTableReference;
class ParseSimpleWhenClause;
class ParseStringKeyLiteralValues;
class ParseStatement;
class ParseStatementCopyFrom;
class ParseStatementCreateTable;
class ParseStatementDelete;
class ParseStatementDropTable;
class ParseStatementInsert;
class ParseStatementInsertTuple;
class ParseStatementInsertSelection;
class ParseStatementSetOperation;
class ParseStatementQuit;
class ParseStatementUpdate;
class ParseSubqueryExpression;
class ParseSubqueryTableReference;
class ParseTableReference;
class ParseTableReferenceSignature;
class ParseWindow;
class Type;
class UnaryOperation;
} // namespace quickstep
#include "parser/ParseString.hpp"
#include "parser/SqlParser_gen.hpp"
#include "parser/ParserUtil.hpp"
#define YY_USER_ACTION \
{ \
yylloc->first_line = yylineno; \
yylloc->first_column = yycolumn; \
yycolumn += yyleng; \
}
%}
%e 1024
/* FIXME(chasseur, qzeng): Add support for hexadecimal literals. */
/**
* These patterns are based on the SQL-2011 standard for syntax of numeric
* literals (Part 2, Section 5.3 of the standard).
**/
sign [\-\+]
unsigned_integer [0-9]+
signed_integer {sign}?{unsigned_integer}
exact_numeric_literal ({unsigned_integer}(\.{unsigned_integer}?)?)|(\.{unsigned_integer})
approximate_numeric_literal {exact_numeric_literal}[eE]{signed_integer}
unsigned_numeric_literal {exact_numeric_literal}|{approximate_numeric_literal}
%x CONDITION_SQL
%x CONDITION_COMMAND
%x CONDITION_STRING_SINGLE_QUOTED
%x CONDITION_STRING_SINGLE_QUOTED_ESCAPED
%x CONDITION_STRING_DOUBLE_QUOTED
%%
<INITIAL>{
\\[a-zA-Z]+ {
/* A forward slash character represents a system command. */
BEGIN(CONDITION_COMMAND);
yylval->string_value_ = new quickstep::ParseString(
yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng));
return TOKEN_COMMAND;
}
[^\\] {
/* This is a SQL command. Place the char back and process normally. */
yyless(0);
yycolumn--;
BEGIN(CONDITION_SQL);
}
}
<CONDITION_COMMAND>{
[a-zA-Z0-9_\-\.:]+ {
/* This is a command argument. */
yylval->string_value_ = new quickstep::ParseString(
yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng));
return TOKEN_COMMAND;
}
[ \t]+ {
/* Ignore whitespace. */
}
[\n\r]+ {
/* Newline reverts the lexer to the initial state. */
yycolumn = 0;
BEGIN(INITIAL);
return '\n';
}
}
<CONDITION_SQL>{
"add" return TOKEN_ADD;
"all" return TOKEN_ALL;
"alter" return TOKEN_ALTER;
"and" return TOKEN_AND;
"as" return TOKEN_AS;
"asc" return TOKEN_ASC;
"ascending" return TOKEN_ASC;
"between" return TOKEN_BETWEEN;
"bigint" return TOKEN_BIGINT;
"bit" return TOKEN_BIT;
"bitweaving" return TOKEN_BITWEAVING;
"blockproperties" return TOKEN_BLOCKPROPERTIES;
"blocksample" return TOKEN_BLOCKSAMPLE;
"bloomfilter" return TOKEN_BLOOM_FILTER;
"case" return TOKEN_CASE;
"csbtree" return TOKEN_CSB_TREE;
"by" return TOKEN_BY;
"char" return TOKEN_CHARACTER;
"character" return TOKEN_CHARACTER;
"check" return TOKEN_CHECK;
"column" return TOKEN_COLUMN;
"constraint" return TOKEN_CONSTRAINT;
"copy" return TOKEN_COPY;
"create" return TOKEN_CREATE;
"current" return TOKEN_CURRENT;
"date" return TOKEN_DATE;
"datetime" return TOKEN_DATETIME;
"day" return TOKEN_DAY;
"decimal" return TOKEN_DECIMAL;
"default" return TOKEN_DEFAULT;
"delete" return TOKEN_DELETE;
"delimiter" return TOKEN_DELIMITER;
"desc" return TOKEN_DESC;
"descending" return TOKEN_DESC;
"distinct" return TOKEN_DISTINCT;
"double" return TOKEN_DOUBLE;
"drop" return TOKEN_DROP;
"else" return TOKEN_ELSE;
"end" return TOKEN_END;
"escape_strings" return TOKEN_ESCAPE_STRINGS;
"exists" return TOKEN_EXISTS;
"extract" return TOKEN_EXTRACT;
"false" return TOKEN_FALSE;
"first" return TOKEN_FIRST;
"float" return TOKEN_FLOAT;
"following" return TOKEN_FOLLOWING;
"for" return TOKEN_FOR;
"foreign" return TOKEN_FOREIGN;
"from" return TOKEN_FROM;
"full" return TOKEN_FULL;
"group" return TOKEN_GROUP;
"hash" return TOKEN_HASH;
"having" return TOKEN_HAVING;
"hour" return TOKEN_HOUR;
"in" return TOKEN_IN;
"index" return TOKEN_INDEX;
"inner" return TOKEN_INNER;
"insert" return TOKEN_INSERT;
"int" return TOKEN_INTEGER;
"integer" return TOKEN_INTEGER;
"intersect" return TOKEN_INTERSECT;
"interval" return TOKEN_INTERVAL;
"into" return TOKEN_INTO;
"is" return TOKEN_IS;
"join" return TOKEN_JOIN;
"key" return TOKEN_KEY;
"last" return TOKEN_LAST;
"left" return TOKEN_LEFT;
"like" return TOKEN_LIKE;
"limit" return TOKEN_LIMIT;
"long" return TOKEN_LONG;
"minute" return TOKEN_MINUTE;
"month" return TOKEN_MONTH;
"not" return TOKEN_NOT;
"null" return TOKEN_NULL;
"nulls" return TOKEN_NULLS;
"off" return TOKEN_OFF;
"on" return TOKEN_ON;
"or" return TOKEN_OR;
"order" return TOKEN_ORDER;
"outer" return TOKEN_OUTER;
"over" return TOKEN_OVER;
"partition" return TOKEN_PARTITION;
"partitions" return TOKEN_PARTITIONS;
"percent" return TOKEN_PERCENT;
"preceding" return TOKEN_PRECEDING;
"primary" return TOKEN_PRIMARY;
"priority" return TOKEN_PRIORITY;
"quit" return TOKEN_QUIT;
"range" return TOKEN_RANGE;
"real" return TOKEN_REAL;
"references" return TOKEN_REFERENCES;
"regexp" return TOKEN_REGEXP;
"right" return TOKEN_RIGHT;
"row" return TOKEN_ROW;
"row_delimiter" return TOKEN_ROW_DELIMITER;
"rows" return TOKEN_ROWS;
"second" return TOKEN_SECOND;
"select" return TOKEN_SELECT;
"set" return TOKEN_SET;
"sma" return TOKEN_SMA;
"smallint" return TOKEN_SMALLINT;
"substring" return TOKEN_SUBSTRING;
"table" return TOKEN_TABLE;
"then" return TOKEN_THEN;
"time" return TOKEN_TIME;
"timestamp" return TOKEN_TIMESTAMP;
"true" return TOKEN_TRUE;
"tuplesample" return TOKEN_TUPLESAMPLE;
"unbounded" return TOKEN_UNBOUNDED;
"union" return TOKEN_UNION;
"unique" return TOKEN_UNIQUE;
"update" return TOKEN_UPDATE;
"using" return TOKEN_USING;
"values" return TOKEN_VALUES;
"varchar" return TOKEN_VARCHAR;
"when" return TOKEN_WHEN;
"where" return TOKEN_WHERE;
"window" return TOKEN_WINDOW;
"with" return TOKEN_WITH;
"year" return TOKEN_YEAR;
"yearmonth" return TOKEN_YEARMONTH;
"=" return TOKEN_EQ;
"!=" return TOKEN_NEQ;
"<>" return TOKEN_NEQ;
"<" return TOKEN_LT;
">" return TOKEN_GT;
"<=" return TOKEN_LEQ;
">=" return TOKEN_GEQ;
[-+*/%(),.;] return yytext[0];
[\[\]] return yytext[0];
/**
* Quoted strings. Prefacing a string with an 'e' or 'E' causes escape
* sequences to be processed (as in PostgreSQL).
**/
[eE]\' {
yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column);
BEGIN(CONDITION_STRING_SINGLE_QUOTED_ESCAPED);
}
\' {
yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column);
BEGIN(CONDITION_STRING_SINGLE_QUOTED);
}
\" {
yylval->string_value_ = new quickstep::ParseString(yylloc->first_line, yylloc->first_column);
BEGIN(CONDITION_STRING_DOUBLE_QUOTED);
}
} /* CONDITION_SQL */
/* Catch an unterminated string. */
<CONDITION_STRING_SINGLE_QUOTED,CONDITION_STRING_SINGLE_QUOTED_ESCAPED,CONDITION_STRING_DOUBLE_QUOTED>{
<<EOF>> {
delete yylval->string_value_;
BEGIN(INITIAL);
quickstep_yyerror(NULL, yyscanner, NULL, "unterminated string");
return TOKEN_LEX_ERROR;
}
}
/* Process escape sequences. */
<CONDITION_STRING_SINGLE_QUOTED_ESCAPED>{
\\[0-7]{1,3} {
/* Octal code */
unsigned int code;
std::sscanf(yytext + 1, "%o", &code);
if (code > 0xff) {
delete yylval->string_value_;
BEGIN(INITIAL);
quickstep_yyerror(NULL, yyscanner, NULL, "octal escape sequence out of 1-byte range");
return TOKEN_LEX_ERROR;
}
yylval->string_value_->push_back(code);
}
\\x[0-9a-fA-F]{1,2} {
/* Hexadecimal code */
unsigned int code;
std::sscanf(yytext + 2, "%x", &code);
yylval->string_value_->push_back(code);
}
\\[0-9]+ {
/* A numeric escape sequence that isn't correctly specified. */
delete yylval->string_value_;
BEGIN(INITIAL);
quickstep_yyerror(NULL, yyscanner, NULL, "bad numeric escape sequence (must be octal or hex)");
return TOKEN_LEX_ERROR;
}
\\b {
/* Backspace */
yylval->string_value_->push_back('\b');
}
\\f {
/* Form-feed */
yylval->string_value_->push_back('\f');
}
\\n {
/* Newline */
yylval->string_value_->push_back('\n');
}
\\r {
/* Carriage-return */
yylval->string_value_->push_back('\r');
}
\\t {
/* Horizontal Tab */
yylval->string_value_->push_back('\t');
}
\\(.|\n|\r) {
/* Any other character (including actual newline or carriage return) */
yylval->string_value_->push_back(yytext[1]);
}
\\ {
/* This should only be encountered right before an EOF. */
delete yylval->string_value_;
BEGIN(INITIAL);
quickstep_yyerror(NULL, yyscanner, NULL, "unfinished escape sequence");
return TOKEN_LEX_ERROR;
}
}
<CONDITION_STRING_SINGLE_QUOTED,CONDITION_STRING_SINGLE_QUOTED_ESCAPED>{
\'\' {
/* Two quotes in a row become a single quote (this is specified by the SQL standard). */
yylval->string_value_->push_back('\'');
}
\' {
/* End string */
BEGIN(CONDITION_SQL);
return TOKEN_STRING_SINGLE_QUOTED;
}
}
<CONDITION_STRING_DOUBLE_QUOTED>{
\"\" {
/* Two quotes in a row become a single quote (this is specified by the SQL standard). */
yylval->string_value_->push_back('"');
}
\" {
/* End string */
BEGIN(CONDITION_SQL);
return TOKEN_STRING_DOUBLE_QUOTED;
}
}
<CONDITION_STRING_SINGLE_QUOTED>[^\']+ {
/* Scan up to a quote. */
yylval->string_value_->append(yytext, yyleng);
}
<CONDITION_STRING_SINGLE_QUOTED_ESCAPED>[^\\\']+ {
/* Scan up to a quote or escape sequence. */
yylval->string_value_->append(yytext, yyleng);
}
<CONDITION_STRING_DOUBLE_QUOTED>[^\"]+ {
/* Scan up to a quote. */
yylval->string_value_->append(yytext, yyleng);
}
<CONDITION_SQL>{
[A-Za-z][A-Za-z0-9_]* {
yylval->string_value_ = new quickstep::ParseString(
yylloc->first_line, yylloc->first_column, std::string(yytext, yyleng));
return TOKEN_NAME;
}
{unsigned_numeric_literal} {
yylval->numeric_literal_value_ = new quickstep::NumericParseLiteralValue(
yylloc->first_line, yylloc->first_column, yytext);
return TOKEN_UNSIGNED_NUMVAL;
}
"--".* /* comment */
[\n] { yycolumn = 0; }
[ \r\t]+ ; /* ignore white space */
} /* CONDITION_SQL */
<INITIAL,CONDITION_COMMAND,CONDITION_SQL><<EOF>> {
/* All conditions except for mutli-state string extracting conditions. */
BEGIN(INITIAL);
return TOKEN_EOF;
}
<*>. {
BEGIN(INITIAL);
quickstep_yyerror(NULL, yyscanner, NULL, "illegal character");
return TOKEN_LEX_ERROR;
}
%%