blob: 5b4d6a7f36a96a92d2a960539edef7bee7d5790c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import java_cup.runtime.Symbol;
import java.lang.Integer;
import java.math.BigDecimal;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.Iterator;
import java.util.Arrays;
import java.util.HashSet;
import com.google.common.base.Preconditions;
import org.apache.impala.analysis.SqlParserSymbols;
import org.apache.impala.catalog.BuiltinsDb;
import org.apache.impala.service.BackendConfig;
import org.apache.impala.thrift.TReservedWordsVersion;
%%
%class SqlScanner
%cup
%public
%final
%eofval{
return newToken(SqlParserSymbols.EOF, null);
%eofval}
%unicode
%line
%column
%{
// Map from keyword string to token id.
// We use a linked hash map because the insertion order is important.
// for example, we want "and" to come after "&&" to make sure error reporting
// uses "and" as a display name and not "&&".
// Please keep the puts sorted alphabetically by keyword (where the order
// does not affect the desired error reporting)
static Map<String, Integer> keywordMap;
// Reserved words are words that cannot be used as identifiers. It is a superset of
// keywords.
static Set<String> reservedWords;
// map from token id to token description
static HashMap<Integer, String> tokenIdMap;
public static void init(TReservedWordsVersion reservedWordsVersion) {
// initilize keywords
keywordMap = new LinkedHashMap<>();
keywordMap.put("&&", SqlParserSymbols.KW_AND);
keywordMap.put("add", SqlParserSymbols.KW_ADD);
keywordMap.put("aggregate", SqlParserSymbols.KW_AGGREGATE);
keywordMap.put("all", SqlParserSymbols.KW_ALL);
keywordMap.put("alter", SqlParserSymbols.KW_ALTER);
keywordMap.put("analytic", SqlParserSymbols.KW_ANALYTIC);
keywordMap.put("and", SqlParserSymbols.KW_AND);
keywordMap.put("anti", SqlParserSymbols.KW_ANTI);
keywordMap.put("api_version", SqlParserSymbols.KW_API_VERSION);
keywordMap.put("array", SqlParserSymbols.KW_ARRAY);
keywordMap.put("as", SqlParserSymbols.KW_AS);
keywordMap.put("asc", SqlParserSymbols.KW_ASC);
keywordMap.put("authorization", SqlParserSymbols.KW_AUTHORIZATION);
keywordMap.put("avro", SqlParserSymbols.KW_AVRO);
keywordMap.put("between", SqlParserSymbols.KW_BETWEEN);
keywordMap.put("bigint", SqlParserSymbols.KW_BIGINT);
keywordMap.put("binary", SqlParserSymbols.KW_BINARY);
keywordMap.put("block_size", SqlParserSymbols.KW_BLOCKSIZE);
keywordMap.put("boolean", SqlParserSymbols.KW_BOOLEAN);
keywordMap.put("by", SqlParserSymbols.KW_BY);
keywordMap.put("cached", SqlParserSymbols.KW_CACHED);
keywordMap.put("case", SqlParserSymbols.KW_CASE);
keywordMap.put("cascade", SqlParserSymbols.KW_CASCADE);
keywordMap.put("cast", SqlParserSymbols.KW_CAST);
keywordMap.put("change", SqlParserSymbols.KW_CHANGE);
keywordMap.put("char", SqlParserSymbols.KW_CHAR);
keywordMap.put("class", SqlParserSymbols.KW_CLASS);
keywordMap.put("close_fn", SqlParserSymbols.KW_CLOSE_FN);
keywordMap.put("column", SqlParserSymbols.KW_COLUMN);
keywordMap.put("columns", SqlParserSymbols.KW_COLUMNS);
keywordMap.put("comment", SqlParserSymbols.KW_COMMENT);
keywordMap.put("compression", SqlParserSymbols.KW_COMPRESSION);
keywordMap.put("compute", SqlParserSymbols.KW_COMPUTE);
keywordMap.put("constraint", SqlParserSymbols.KW_CONSTRAINT);
keywordMap.put("copy", SqlParserSymbols.KW_COPY);
keywordMap.put("create", SqlParserSymbols.KW_CREATE);
keywordMap.put("cross", SqlParserSymbols.KW_CROSS);
keywordMap.put("current", SqlParserSymbols.KW_CURRENT);
keywordMap.put("data", SqlParserSymbols.KW_DATA);
keywordMap.put("database", SqlParserSymbols.KW_DATABASE);
keywordMap.put("databases", SqlParserSymbols.KW_DATABASES);
keywordMap.put("date", SqlParserSymbols.KW_DATE);
keywordMap.put("datetime", SqlParserSymbols.KW_DATETIME);
keywordMap.put("decimal", SqlParserSymbols.KW_DECIMAL);
keywordMap.put("default", SqlParserSymbols.KW_DEFAULT);
keywordMap.put("delete", SqlParserSymbols.KW_DELETE);
keywordMap.put("delimited", SqlParserSymbols.KW_DELIMITED);
keywordMap.put("desc", SqlParserSymbols.KW_DESC);
keywordMap.put("describe", SqlParserSymbols.KW_DESCRIBE);
keywordMap.put("disable", SqlParserSymbols.KW_DISABLE);
keywordMap.put("distinct", SqlParserSymbols.KW_DISTINCT);
keywordMap.put("div", SqlParserSymbols.KW_DIV);
keywordMap.put("double", SqlParserSymbols.KW_DOUBLE);
keywordMap.put("drop", SqlParserSymbols.KW_DROP);
keywordMap.put("else", SqlParserSymbols.KW_ELSE);
keywordMap.put("enable", SqlParserSymbols.KW_ENABLE);
keywordMap.put("encoding", SqlParserSymbols.KW_ENCODING);
keywordMap.put("end", SqlParserSymbols.KW_END);
keywordMap.put("escaped", SqlParserSymbols.KW_ESCAPED);
keywordMap.put("exists", SqlParserSymbols.KW_EXISTS);
keywordMap.put("explain", SqlParserSymbols.KW_EXPLAIN);
keywordMap.put("extended", SqlParserSymbols.KW_EXTENDED);
keywordMap.put("external", SqlParserSymbols.KW_EXTERNAL);
keywordMap.put("false", SqlParserSymbols.KW_FALSE);
keywordMap.put("fields", SqlParserSymbols.KW_FIELDS);
keywordMap.put("fileformat", SqlParserSymbols.KW_FILEFORMAT);
keywordMap.put("files", SqlParserSymbols.KW_FILES);
keywordMap.put("finalize_fn", SqlParserSymbols.KW_FINALIZE_FN);
keywordMap.put("first", SqlParserSymbols.KW_FIRST);
keywordMap.put("float", SqlParserSymbols.KW_FLOAT);
keywordMap.put("following", SqlParserSymbols.KW_FOLLOWING);
keywordMap.put("for", SqlParserSymbols.KW_FOR);
keywordMap.put("foreign", SqlParserSymbols.KW_FOREIGN);
keywordMap.put("format", SqlParserSymbols.KW_FORMAT);
keywordMap.put("formatted", SqlParserSymbols.KW_FORMATTED);
keywordMap.put("from", SqlParserSymbols.KW_FROM);
keywordMap.put("full", SqlParserSymbols.KW_FULL);
keywordMap.put("function", SqlParserSymbols.KW_FUNCTION);
keywordMap.put("functions", SqlParserSymbols.KW_FUNCTIONS);
keywordMap.put("grant", SqlParserSymbols.KW_GRANT);
keywordMap.put("group", SqlParserSymbols.KW_GROUP);
keywordMap.put("hash", SqlParserSymbols.KW_HASH);
keywordMap.put("having", SqlParserSymbols.KW_HAVING);
keywordMap.put("if", SqlParserSymbols.KW_IF);
keywordMap.put("ilike", SqlParserSymbols.KW_ILIKE);
keywordMap.put("ignore", SqlParserSymbols.KW_IGNORE);
keywordMap.put("in", SqlParserSymbols.KW_IN);
keywordMap.put("incremental", SqlParserSymbols.KW_INCREMENTAL);
keywordMap.put("init_fn", SqlParserSymbols.KW_INIT_FN);
keywordMap.put("inner", SqlParserSymbols.KW_INNER);
keywordMap.put("inpath", SqlParserSymbols.KW_INPATH);
keywordMap.put("insert", SqlParserSymbols.KW_INSERT);
keywordMap.put("int", SqlParserSymbols.KW_INT);
keywordMap.put("integer", SqlParserSymbols.KW_INT);
keywordMap.put("intermediate", SqlParserSymbols.KW_INTERMEDIATE);
keywordMap.put("interval", SqlParserSymbols.KW_INTERVAL);
keywordMap.put("into", SqlParserSymbols.KW_INTO);
keywordMap.put("invalidate", SqlParserSymbols.KW_INVALIDATE);
keywordMap.put("iregexp", SqlParserSymbols.KW_IREGEXP);
keywordMap.put("is", SqlParserSymbols.KW_IS);
keywordMap.put("join", SqlParserSymbols.KW_JOIN);
keywordMap.put("kudu", SqlParserSymbols.KW_KUDU);
keywordMap.put("last", SqlParserSymbols.KW_LAST);
keywordMap.put("left", SqlParserSymbols.KW_LEFT);
keywordMap.put("lexical", SqlParserSymbols.KW_LEXICAL);
keywordMap.put("like", SqlParserSymbols.KW_LIKE);
keywordMap.put("limit", SqlParserSymbols.KW_LIMIT);
keywordMap.put("lines", SqlParserSymbols.KW_LINES);
keywordMap.put("load", SqlParserSymbols.KW_LOAD);
keywordMap.put("location", SqlParserSymbols.KW_LOCATION);
keywordMap.put("map", SqlParserSymbols.KW_MAP);
keywordMap.put("merge_fn", SqlParserSymbols.KW_MERGE_FN);
keywordMap.put("metadata", SqlParserSymbols.KW_METADATA);
keywordMap.put("norely", SqlParserSymbols.KW_NORELY);
keywordMap.put("not", SqlParserSymbols.KW_NOT);
keywordMap.put("novalidate", SqlParserSymbols.KW_NOVALIDATE);
keywordMap.put("null", SqlParserSymbols.KW_NULL);
keywordMap.put("nulls", SqlParserSymbols.KW_NULLS);
keywordMap.put("offset", SqlParserSymbols.KW_OFFSET);
keywordMap.put("on", SqlParserSymbols.KW_ON);
keywordMap.put("||", SqlParserSymbols.KW_OR);
keywordMap.put("or", SqlParserSymbols.KW_OR);
keywordMap.put("orc", SqlParserSymbols.KW_ORC);
keywordMap.put("order", SqlParserSymbols.KW_ORDER);
keywordMap.put("outer", SqlParserSymbols.KW_OUTER);
keywordMap.put("over", SqlParserSymbols.KW_OVER);
keywordMap.put("overwrite", SqlParserSymbols.KW_OVERWRITE);
keywordMap.put("parquet", SqlParserSymbols.KW_PARQUET);
keywordMap.put("parquetfile", SqlParserSymbols.KW_PARQUETFILE);
keywordMap.put("partition", SqlParserSymbols.KW_PARTITION);
keywordMap.put("partitioned", SqlParserSymbols.KW_PARTITIONED);
keywordMap.put("partitions", SqlParserSymbols.KW_PARTITIONS);
keywordMap.put("preceding", SqlParserSymbols.KW_PRECEDING);
keywordMap.put("prepare_fn", SqlParserSymbols.KW_PREPARE_FN);
keywordMap.put("primary", SqlParserSymbols.KW_PRIMARY);
keywordMap.put("produced", SqlParserSymbols.KW_PRODUCED);
keywordMap.put("purge", SqlParserSymbols.KW_PURGE);
keywordMap.put("range", SqlParserSymbols.KW_RANGE);
keywordMap.put("rcfile", SqlParserSymbols.KW_RCFILE);
keywordMap.put("real", SqlParserSymbols.KW_DOUBLE);
keywordMap.put("recover", SqlParserSymbols.KW_RECOVER);
keywordMap.put("references", SqlParserSymbols.KW_REFERENCES);
keywordMap.put("refresh", SqlParserSymbols.KW_REFRESH);
keywordMap.put("regexp", SqlParserSymbols.KW_REGEXP);
keywordMap.put("rely", SqlParserSymbols.KW_RELY);
keywordMap.put("rename", SqlParserSymbols.KW_RENAME);
keywordMap.put("repeatable", SqlParserSymbols.KW_REPEATABLE);
keywordMap.put("replace", SqlParserSymbols.KW_REPLACE);
keywordMap.put("replication", SqlParserSymbols.KW_REPLICATION);
keywordMap.put("restrict", SqlParserSymbols.KW_RESTRICT);
keywordMap.put("returns", SqlParserSymbols.KW_RETURNS);
keywordMap.put("revoke", SqlParserSymbols.KW_REVOKE);
keywordMap.put("right", SqlParserSymbols.KW_RIGHT);
keywordMap.put("rlike", SqlParserSymbols.KW_RLIKE);
keywordMap.put("role", SqlParserSymbols.KW_ROLE);
keywordMap.put("roles", SqlParserSymbols.KW_ROLES);
keywordMap.put("row", SqlParserSymbols.KW_ROW);
keywordMap.put("rows", SqlParserSymbols.KW_ROWS);
keywordMap.put("schema", SqlParserSymbols.KW_SCHEMA);
keywordMap.put("schemas", SqlParserSymbols.KW_SCHEMAS);
keywordMap.put("select", SqlParserSymbols.KW_SELECT);
keywordMap.put("semi", SqlParserSymbols.KW_SEMI);
keywordMap.put("sequencefile", SqlParserSymbols.KW_SEQUENCEFILE);
keywordMap.put("serdeproperties", SqlParserSymbols.KW_SERDEPROPERTIES);
keywordMap.put("serialize_fn", SqlParserSymbols.KW_SERIALIZE_FN);
keywordMap.put("set", SqlParserSymbols.KW_SET);
keywordMap.put("show", SqlParserSymbols.KW_SHOW);
keywordMap.put("smallint", SqlParserSymbols.KW_SMALLINT);
keywordMap.put("sort", SqlParserSymbols.KW_SORT);
keywordMap.put("stats", SqlParserSymbols.KW_STATS);
keywordMap.put("stored", SqlParserSymbols.KW_STORED);
keywordMap.put("straight_join", SqlParserSymbols.KW_STRAIGHT_JOIN);
keywordMap.put("string", SqlParserSymbols.KW_STRING);
keywordMap.put("struct", SqlParserSymbols.KW_STRUCT);
keywordMap.put("symbol", SqlParserSymbols.KW_SYMBOL);
keywordMap.put("table", SqlParserSymbols.KW_TABLE);
keywordMap.put("tables", SqlParserSymbols.KW_TABLES);
keywordMap.put("tablesample", SqlParserSymbols.KW_TABLESAMPLE);
keywordMap.put("tblproperties", SqlParserSymbols.KW_TBLPROPERTIES);
keywordMap.put("terminated", SqlParserSymbols.KW_TERMINATED);
keywordMap.put("textfile", SqlParserSymbols.KW_TEXTFILE);
keywordMap.put("then", SqlParserSymbols.KW_THEN);
keywordMap.put("timestamp", SqlParserSymbols.KW_TIMESTAMP);
keywordMap.put("tinyint", SqlParserSymbols.KW_TINYINT);
keywordMap.put("to", SqlParserSymbols.KW_TO);
keywordMap.put("true", SqlParserSymbols.KW_TRUE);
keywordMap.put("truncate", SqlParserSymbols.KW_TRUNCATE);
keywordMap.put("unbounded", SqlParserSymbols.KW_UNBOUNDED);
keywordMap.put("uncached", SqlParserSymbols.KW_UNCACHED);
keywordMap.put("union", SqlParserSymbols.KW_UNION);
keywordMap.put("unknown", SqlParserSymbols.KW_UNKNOWN);
keywordMap.put("update", SqlParserSymbols.KW_UPDATE);
keywordMap.put("update_fn", SqlParserSymbols.KW_UPDATE_FN);
keywordMap.put("upsert", SqlParserSymbols.KW_UPSERT);
keywordMap.put("use", SqlParserSymbols.KW_USE);
keywordMap.put("using", SqlParserSymbols.KW_USING);
keywordMap.put("validate", SqlParserSymbols.KW_VALIDATE);
keywordMap.put("values", SqlParserSymbols.KW_VALUES);
keywordMap.put("varchar", SqlParserSymbols.KW_VARCHAR);
keywordMap.put("view", SqlParserSymbols.KW_VIEW);
keywordMap.put("when", SqlParserSymbols.KW_WHEN);
keywordMap.put("where", SqlParserSymbols.KW_WHERE);
keywordMap.put("with", SqlParserSymbols.KW_WITH);
keywordMap.put("zorder", SqlParserSymbols.KW_ZORDER);
// Initilize tokenIdMap for error reporting
tokenIdMap = new HashMap<>();
for (Map.Entry<String, Integer> entry : keywordMap.entrySet()) {
tokenIdMap.put(entry.getValue(), entry.getKey().toUpperCase());
}
// add non-keyword tokens. Please keep this in the same order as they are used in this
// file.
tokenIdMap.put(SqlParserSymbols.EOF, "EOF");
tokenIdMap.put(SqlParserSymbols.DOTDOTDOT, "...");
tokenIdMap.put(SqlParserSymbols.COLON, ":");
tokenIdMap.put(SqlParserSymbols.SEMICOLON, ";");
tokenIdMap.put(SqlParserSymbols.COMMA, "COMMA");
tokenIdMap.put(SqlParserSymbols.DOT, ".");
tokenIdMap.put(SqlParserSymbols.STAR, "*");
tokenIdMap.put(SqlParserSymbols.LPAREN, "(");
tokenIdMap.put(SqlParserSymbols.RPAREN, ")");
tokenIdMap.put(SqlParserSymbols.LBRACKET, "[");
tokenIdMap.put(SqlParserSymbols.RBRACKET, "]");
tokenIdMap.put(SqlParserSymbols.DIVIDE, "/");
tokenIdMap.put(SqlParserSymbols.MOD, "%");
tokenIdMap.put(SqlParserSymbols.ADD, "+");
tokenIdMap.put(SqlParserSymbols.SUBTRACT, "-");
tokenIdMap.put(SqlParserSymbols.BITAND, "&");
tokenIdMap.put(SqlParserSymbols.BITOR, "|");
tokenIdMap.put(SqlParserSymbols.BITXOR, "^");
tokenIdMap.put(SqlParserSymbols.BITNOT, "~");
tokenIdMap.put(SqlParserSymbols.EQUAL, "=");
tokenIdMap.put(SqlParserSymbols.NOT, "!");
tokenIdMap.put(SqlParserSymbols.LESSTHAN, "<");
tokenIdMap.put(SqlParserSymbols.GREATERTHAN, ">");
tokenIdMap.put(SqlParserSymbols.UNMATCHED_STRING_LITERAL, "UNMATCHED STRING LITERAL");
tokenIdMap.put(SqlParserSymbols.NOTEQUAL, "!=");
tokenIdMap.put(SqlParserSymbols.INTEGER_LITERAL, "INTEGER LITERAL");
tokenIdMap.put(SqlParserSymbols.NUMERIC_OVERFLOW, "NUMERIC OVERFLOW");
tokenIdMap.put(SqlParserSymbols.DECIMAL_LITERAL, "DECIMAL LITERAL");
tokenIdMap.put(SqlParserSymbols.EMPTY_IDENT, "EMPTY IDENTIFIER");
tokenIdMap.put(SqlParserSymbols.IDENT, "IDENTIFIER");
tokenIdMap.put(SqlParserSymbols.STRING_LITERAL, "STRING LITERAL");
tokenIdMap.put(SqlParserSymbols.COMMENTED_PLAN_HINT_START,
"COMMENTED_PLAN_HINT_START");
tokenIdMap.put(SqlParserSymbols.COMMENTED_PLAN_HINT_END, "COMMENTED_PLAN_HINT_END");
tokenIdMap.put(SqlParserSymbols.UNEXPECTED_CHAR, "Unexpected character");
// There are 4 symbols not in the tokenIdMap:
// - UNUSED_RESERVED_WORD. It is handled separately in sql-parser.cup
// - FACTORIAL and UNARYSIGN. These are placeholders to work around precedence.
// - error. It's a symbol defined by cup.
Preconditions.checkState(tokenIdMap.size() + 4 ==
SqlParserSymbols.class.getFields().length, "The sizes of tokenIdMap and " +
"SqlParserSymbols don't match. sql-scanner.flex should be updated.");
// Initilize reservedWords. For impala 2.11, reserved words = keywords.
if (reservedWordsVersion == TReservedWordsVersion.IMPALA_2_11) {
reservedWords = keywordMap.keySet();
return;
}
// For impala 3.0, reserved words = keywords + sql16ReservedWords - builtinFunctions
// - whitelist
// unused reserved words = reserved words - keywords. These words are reserved for
// forward compatibility purposes.
reservedWords = new HashSet<>(keywordMap.keySet());
// Add SQL:2016 reserved words
reservedWords.addAll(Arrays.asList(new String[] {
"abs", "acos", "allocate", "any", "are", "array_agg", "array_max_cardinality",
"asensitive", "asin", "asymmetric", "at", "atan", "atomic", "avg", "begin",
"begin_frame", "begin_partition", "blob", "both", "call", "called", "cardinality",
"cascaded", "ceil", "ceiling", "char_length", "character", "character_length",
"check", "classifier", "clob", "close", "coalesce", "collate", "collect",
"commit", "condition", "connect", "constraint", "contains", "convert", "copy",
"corr", "corresponding", "cos", "cosh", "count", "covar_pop", "covar_samp",
"cube", "cume_dist", "current_catalog", "current_date",
"current_default_transform_group", "current_path", "current_path", "current_role",
"current_role", "current_row", "current_schema", "current_time",
"current_timestamp", "current_transform_group_for_type", "current_user", "cursor",
"cycle", "day", "deallocate", "dec", "decfloat", "declare", "define",
"dense_rank", "deref", "deterministic", "disconnect", "dynamic", "each",
"element", "empty", "end-exec", "end_frame", "end_partition", "equals", "escape",
"every", "except", "exec", "execute", "exp", "extract", "fetch", "filter",
"first_value", "floor", "foreign", "frame_row", "free", "fusion", "get", "global",
"grouping", "groups", "hold", "hour", "identity", "indicator", "initial", "inout",
"insensitive", "integer", "intersect", "intersection", "json_array",
"json_arrayagg", "json_exists", "json_object", "json_objectagg", "json_query",
"json_table", "json_table_primitive", "json_value", "lag", "language", "large",
"last_value", "lateral", "lead", "leading", "like_regex", "listagg", "ln",
"local", "localtime", "localtimestamp", "log", "log10 ", "lower", "match",
"match_number", "match_recognize", "matches", "max", "member", "merge", "method",
"min", "minute", "mod", "modifies", "module", "month", "multiset", "national",
"natural", "nchar", "nclob", "new", "no", "none", "normalize", "nth_value",
"ntile", "nullif", "numeric", "occurrences_regex", "octet_length", "of", "old",
"omit", "one", "only", "open", "out", "overlaps", "overlay", "parameter",
"pattern", "per", "percent", "percent_rank", "percentile_cont", "percentile_disc",
"period", "portion", "position", "position_regex", "power", "precedes",
"precision", "prepare", "procedure", "ptf", "rank", "reads", "real", "recursive",
"ref", "references", "referencing", "regr_avgx", "regr_avgy", "regr_count",
"regr_intercept", "regr_r2", "regr_slope", "regr_sxx", "regr_sxy", "regr_syy",
"release", "result", "return", "rollback", "rollup", "row_number", "running",
"savepoint", "scope", "scroll", "search", "second", "seek", "sensitive",
"session_user", "similar", "sin", "sinh", "skip", "some", "specific",
"specifictype", "sql", "sqlexception", "sqlstate", "sqlwarning", "sqrt", "start",
"static", "stddev_pop", "stddev_samp", "submultiset", "subset", "substring",
"substring_regex", "succeeds", "sum", "symmetric", "system", "system_time",
"system_user", "tan", "tanh", "time", "timezone_hour", "timezone_minute",
"trailing", "translate", "translate_regex", "translation", "treat", "trigger",
"trim", "trim_array", "uescape", "unique", "unknown", "unnest", "update ",
"upper", "user", "value", "value_of", "var_pop", "var_samp", "varbinary",
"varying", "versioning", "whenever", "width_bucket", "window", "within",
"without", "year"}));
// Remove impala builtin function names
reservedWords.removeAll(BuiltinsDb.getInstance().getAllFunctions().keySet());
// Remove whitelist words. These words might be heavily used in production, and
// impala is unlikely to implement SQL features around these words in the near future.
reservedWords.removeAll(Arrays.asList(new String[] {
// time units
"year", "month", "day", "hour", "minute", "second",
"begin", "call", "check", "classifier", "close", "identity", "language",
"localtime", "member", "module", "new", "nullif", "old", "open", "parameter",
"period", "result", "return", "sql", "start", "system", "time", "user", "value"
}));
}
static {
// Default-initilize the static members for FE tests. Outside of FE tests, init() is
// called again in BackendConfig.create() once the backend configuration is passed to
// the FE, overwriting this initilization.
init(TReservedWordsVersion.IMPALA_3_0);
}
static boolean isReserved(String token) {
return token != null && reservedWords.contains(token.toLowerCase());
}
static boolean isKeyword(Integer tokenId) {
String token = tokenIdMap.get(tokenId);
return token != null && keywordMap.containsKey(token.toLowerCase());
}
private Symbol newToken(int id, Object value) {
return new Symbol(id, yyline+1, yycolumn+1, value);
}
%}
LineTerminator = \r|\n|\r\n
Whitespace = {LineTerminator} | [ \t\f]
// Order of rules to resolve ambiguity:
// The rule for recognizing integer literals must come before the rule for
// double literals to, e.g., recognize "1234" as an integer literal.
// The rule for recognizing double literals must come before the rule for
// identifiers to, e.g., recognize "1e6" as a double literal.
IntegerLiteral = [:digit:][:digit:]*
FLit1 = [0-9]+ \. [0-9]*
FLit2 = \. [0-9]+
FLit3 = [0-9]+
Exponent = [eE] [+-]? [0-9]+
DecimalLiteral = ({FLit1}|{FLit2}|{FLit3}) {Exponent}?
Identifier = [:digit:]*[:jletter:][:jletterdigit:]*
// Without \. {Identifier}, a dot followed by an identifier starting with digits will
// always be lexed to Flit2.
IdentifierOrKw = {Identifier} | \. {Identifier} | "&&" | "||"
QuotedIdentifier = \`(\\.|[^\\\`])*\`
SingleQuoteStringLiteral = \'(\\.|[^\\\'])*\'
DoubleQuoteStringLiteral = \"(\\.|[^\\\"])*\"
EolHintBegin = "--" " "* "+"
CommentedHintBegin = "/*" " "* "+"
CommentedHintEnd = "*/"
// Both types of plan hints must appear within a single line.
HintContent = " "* "+" [^\r\n]*
Comment = {TraditionalComment} | {EndOfLineComment}
// Match anything that has a comment end (*/) in it.
ContainsCommentEnd = [^]* "*/" [^]*
// Match anything that has a line terminator in it.
ContainsLineTerminator = [^]* {LineTerminator} [^]*
// A traditional comment is anything that starts and ends like a comment and has neither a
// plan hint inside nor a CommentEnd (*/).
TraditionalComment = "/*" !({HintContent}|{ContainsCommentEnd}) "*/"
// Similar for a end-of-line comment.
EndOfLineComment = "--" !({HintContent}|{ContainsLineTerminator}) {LineTerminator}?
// This additional state is needed because newlines signal the end of a end-of-line hint
// if one has been started earlier. Hence we need to discern between newlines within and
// outside of end-of-line hints.
%state EOLHINT
%%
// Put '...' before '.'
"..." { return newToken(SqlParserSymbols.DOTDOTDOT, null); }
// single-character tokens
":" { return newToken(SqlParserSymbols.COLON, null); }
";" { return newToken(SqlParserSymbols.SEMICOLON, null); }
"," { return newToken(SqlParserSymbols.COMMA, null); }
"." { return newToken(SqlParserSymbols.DOT, null); }
"*" { return newToken(SqlParserSymbols.STAR, null); }
"(" { return newToken(SqlParserSymbols.LPAREN, null); }
")" { return newToken(SqlParserSymbols.RPAREN, null); }
"[" { return newToken(SqlParserSymbols.LBRACKET, null); }
"]" { return newToken(SqlParserSymbols.RBRACKET, null); }
"/" { return newToken(SqlParserSymbols.DIVIDE, null); }
"%" { return newToken(SqlParserSymbols.MOD, null); }
"+" { return newToken(SqlParserSymbols.ADD, null); }
"-" { return newToken(SqlParserSymbols.SUBTRACT, null); }
"&" { return newToken(SqlParserSymbols.BITAND, null); }
"|" { return newToken(SqlParserSymbols.BITOR, null); }
"^" { return newToken(SqlParserSymbols.BITXOR, null); }
"~" { return newToken(SqlParserSymbols.BITNOT, null); }
"=" { return newToken(SqlParserSymbols.EQUAL, null); }
"!" { return newToken(SqlParserSymbols.NOT, null); }
"<" { return newToken(SqlParserSymbols.LESSTHAN, null); }
">" { return newToken(SqlParserSymbols.GREATERTHAN, null); }
"\"" { return newToken(SqlParserSymbols.UNMATCHED_STRING_LITERAL, null); }
"'" { return newToken(SqlParserSymbols.UNMATCHED_STRING_LITERAL, null); }
"`" { return newToken(SqlParserSymbols.UNMATCHED_STRING_LITERAL, null); }
// double-character tokens
"!=" { return newToken(SqlParserSymbols.NOTEQUAL, null); }
// The rules for IntegerLiteral and DecimalLiteral are the same, but it is useful
// to distinguish them, e.g., so the Parser can use integer literals without analysis.
{IntegerLiteral} {
try {
return newToken(SqlParserSymbols.INTEGER_LITERAL, new BigDecimal(yytext()));
} catch (NumberFormatException e) {
return newToken(SqlParserSymbols.NUMERIC_OVERFLOW, yytext());
}
}
{DecimalLiteral} {
try {
return newToken(SqlParserSymbols.DECIMAL_LITERAL, new BigDecimal(yytext()));
} catch (NumberFormatException e) {
return newToken(SqlParserSymbols.NUMERIC_OVERFLOW, yytext());
}
}
{QuotedIdentifier} {
// Remove the quotes and trim whitespace.
String trimmedIdent = yytext().substring(1, yytext().length() - 1).trim();
if (trimmedIdent.isEmpty()) {
return newToken(SqlParserSymbols.EMPTY_IDENT, yytext());
}
return newToken(SqlParserSymbols.IDENT, trimmedIdent);
}
{IdentifierOrKw} {
String text = yytext();
if (text.startsWith(".")) {
// If we see an identifier that starts with a dot, we push back the identifier
// minus the dot back into the input stream.
yypushback(text.length() - 1);
return newToken(SqlParserSymbols.DOT, yytext());
}
Integer kw_id = keywordMap.get(text.toLowerCase());
if (kw_id != null) {
return newToken(kw_id, text);
} else if (isReserved(text)) {
return newToken(SqlParserSymbols.UNUSED_RESERVED_WORD, text);
} else {
return newToken(SqlParserSymbols.IDENT, text);
}
}
{SingleQuoteStringLiteral} {
return newToken(SqlParserSymbols.STRING_LITERAL, yytext().substring(1, yytext().length()-1));
}
{DoubleQuoteStringLiteral} {
return newToken(SqlParserSymbols.STRING_LITERAL, yytext().substring(1, yytext().length()-1));
}
{CommentedHintBegin} {
return newToken(SqlParserSymbols.COMMENTED_PLAN_HINT_START, null);
}
{CommentedHintEnd} {
return newToken(SqlParserSymbols.COMMENTED_PLAN_HINT_END, null);
}
{EolHintBegin} {
yybegin(EOLHINT);
return newToken(SqlParserSymbols.COMMENTED_PLAN_HINT_START, null);
}
<EOLHINT> {LineTerminator} {
yybegin(YYINITIAL);
return newToken(SqlParserSymbols.COMMENTED_PLAN_HINT_END, null);
}
{Comment} { /* ignore */ }
{Whitespace} { /* ignore */ }
// Provide a default error token when nothing matches, otherwise the user sees
// "Error: could not match input" which is confusing.
[^] { return newToken(SqlParserSymbols.UNEXPECTED_CHAR, yytext()); }