blob: 29f10c29198129aaf3842a96a6a3afa42264bb28 [file] [log] [blame]
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Copied from Calcite 1.40.0 babel and modified for Pinot syntax. Update this file when upgrading Calcite version.
data: {
default: tdd("../default_config.fmpp")
# Data declarations for this parser.
#
# Default declarations are in default_config.fmpp; if you do not include a
# declaration ('imports' or 'nonReservedKeywords', for example) in this file,
# FMPP will use the declaration from default_config.fmpp.
parser: {
# Generated parser implementation class package and name
package: "org.apache.pinot.sql.parsers.parser",
class: "SqlParserImpl",
# List of additional classes and packages to import.
# Example: "org.apache.calcite.sql.*", "java.util.List".
imports: [
"org.apache.calcite.sql.babel.postgres.SqlSetOptions",
"org.apache.pinot.sql.parsers.parser.*"
]
# List of new keywords. Example: "DATABASES", "TABLES". If the keyword is
# not a reserved keyword, add it to the 'nonReservedKeywords' section.
keywords: [
"FILE"
"ARCHIVE"
# Pinot types allowed in CAST function
# LONG - for BIGINT
# BIG_DECIMAL - for DECIMAL
# STRING - for VARCHAR
# BYTES - for VARBINARY
"LONG"
"BIG_DECIMAL"
"STRING"
"BYTES"
]
# List of non-reserved keywords to add;
# items in this list become non-reserved
nonReservedKeywordsToAdd: [
"FILE"
"ARCHIVE"
# Pinot allows using DEFAULT as the catalog name
"DEFAULT_"
# Pinot allows using LONG/BIG_DECIMAL/STRING/BYTES/DATETIME/VARIANT/UUID as column name
"LONG"
"BIG_DECIMAL"
"STRING"
"BYTES"
"DATETIME"
"VARIANT"
"UUID"
# The following keywords are reserved in core Calcite,
# are reserved in some version of SQL,
# but are not reserved in Babel.
#
# Words that are commented out (e.g. "AND") are still reserved.
# These are the most important reserved words, and SQL cannot be
# unambiguously parsed if they are not reserved. For example, if
# "INNER" is not reserved then in the query
#
# select * from emp inner join dept using (deptno)"
#
# "inner" could be a table alias for "emp".
#
# TODO: remove unused/untested BABEL non-reserved keywords since
# we are only testing a small subset.
# @see: CalciteSqlCompilerTest:testReservedKeywords
"ABS"
"ABSOLUTE"
"ACTION"
"ADD"
"AFTER"
"ALL"
"ALLOCATE"
"ALLOW"
"ALTER"
"AND"
# "ANY"
"ARE"
"ARRAY"
# "ARRAY_AGG" # not a keyword in Calcite
"ARRAY_MAX_CARDINALITY"
"AS"
"ASC"
"ASENSITIVE"
"ASSERTION"
"ASYMMETRIC"
"AT"
"ATOMIC"
"AUTHORIZATION"
"AVG"
"BEFORE"
"BEGIN"
"BEGIN_FRAME"
"BEGIN_PARTITION"
"BETWEEN"
"BIGINT"
"BINARY"
"BIT"
# "BIT_LENGTH" # not a keyword in Calcite
"BLOB"
"BOOLEAN"
"BOTH"
"BREADTH"
"BY"
# "CALL"
"CALLED"
"CARDINALITY"
"CASCADE"
"CASCADED"
# "CASE"
"CAST"
"CATALOG"
"CEIL"
"CEILING"
"CHAR"
"CHARACTER"
"CHARACTER_LENGTH"
"CHAR_LENGTH"
"CHECK"
"CLASSIFIER"
"CLOB"
"CLOSE"
"COALESCE"
"COLLATE"
"COLLATION"
"COLLECT"
"COLUMN"
"COMMIT"
"CONDITION"
"CONNECT"
"CONNECTION"
"CONSTRAINT"
"CONSTRAINTS"
"CONSTRUCTOR"
"CONTAINS"
"CONTAINS_SUBSTR"
"CONTINUE"
"CONVERT"
"CORR"
"CORRESPONDING"
"COUNT"
"COVAR_POP"
"COVAR_SAMP"
# "CREATE"
# "CROSS"
"CUBE"
"CUME_DIST"
# "CURRENT"
"CURRENT_CATALOG"
"CURRENT_DATE"
"CURRENT_DEFAULT_TRANSFORM_GROUP"
"CURRENT_PATH"
"CURRENT_ROLE"
"CURRENT_ROW"
"CURRENT_SCHEMA"
"CURRENT_TIME"
"CURRENT_TIMESTAMP"
"CURRENT_TRANSFORM_GROUP_FOR_TYPE"
"CURRENT_USER"
# "CURSOR"
"CYCLE"
"DATA"
# "DATE"
"DATETIME_DIFF"
"DAY"
"DEALLOCATE"
"DEC"
"DECIMAL"
"DECLARE"
# "DEFAULT"
"DEFERRABLE"
"DEFERRED"
# "DEFINE"
# "DELETE"
"DENSE_RANK"
"DEPTH"
"DEREF"
"DESC"
# "DESCRIBE" # must be reserved
"DESCRIPTOR"
"DETERMINISTIC"
"DIAGNOSTICS"
"DISALLOW"
"DISCONNECT"
# "DISTINCT"
# "DO" # not a keyword in Calcite
"DOMAIN"
"DOUBLE"
# "DROP" # probably must be reserved
"DYNAMIC"
"EACH"
"ELEMENT"
"ELSE"
# "ELSEIF" # not a keyword in Calcite
"EMPTY"
"END"
# "END-EXEC" # not a keyword in Calcite, and contains '-'
"END_FRAME"
"END_PARTITION"
"EQUALS"
"ESCAPE"
"EVERY"
# "EXCEPT" # must be reserved
"EXCEPTION"
"EXEC"
"EXECUTE"
"EXISTS"
# "EXIT" # not a keyword in Calcite
"EXP"
# "EXPLAIN" # must be reserved
"EXTEND"
"EXTERNAL"
"EXTRACT"
"FALSE"
# "FETCH"
"FILTER"
"FIRST"
"FIRST_VALUE"
"FLOAT"
"FLOOR"
"FOR"
"FOREIGN"
# "FOREVER" # not a keyword in Calcite
"FOUND"
"FRAME_ROW"
"FREE"
# "FROM" # must be reserved
# "FULL" # must be reserved
"FUNCTION"
"FUSION"
"GENERAL"
"GET"
"GLOBAL"
"GO"
"GOTO"
# "GRANT"
# "GROUP"
# "GROUPING"
"GROUPS"
# "HANDLER" # not a keyword in Calcite
# "HAVING"
"HOLD"
"HOUR"
"IDENTITY"
"ILIKE"
"IMMEDIATE"
"IMMEDIATELY"
"IMPORT"
# "IN"
"INDICATOR"
"INITIAL"
"INITIALLY"
# "INNER"
"INOUT"
"INPUT"
"INSENSITIVE"
# "INSERT"
"INT"
"INTEGER"
# "INTERSECT"
"INTERSECTION"
# "INTERVAL"
# "INTO"
"IS"
"ISOLATION"
# "ITERATE" # not a keyword in Calcite
# "JOIN"
"JSON_ARRAY"
"JSON_ARRAYAGG"
"JSON_EXISTS"
"JSON_OBJECT"
"JSON_OBJECTAGG"
"JSON_QUERY"
"JSON_VALUE"
# "KEEP" # not a keyword in Calcite
"KEY"
"LAG"
"LANGUAGE"
"LARGE"
"LAST"
"LAST_VALUE"
# "LATERAL"
"LEAD"
"LEADING"
# "LEAVE" # not a keyword in Calcite
# "LEFT"
"LEVEL"
"LIKE"
"LIKE_REGEX"
# "LIMIT"
"LN"
"LOCAL"
"LOCALTIME"
"LOCALTIMESTAMP"
"LOCATOR"
# "LOOP" # not a keyword in Calcite
"LOWER"
"MAP"
"MATCH"
"MATCHES"
"MATCH_NUMBER"
# "MATCH_RECOGNIZE"
"MAX"
# "MAX_CARDINALITY" # not a keyword in Calcite
"MEASURES"
"MEMBER"
# "MERGE"
"METHOD"
"MIN"
# "MINUS"
"MINUTE"
"MOD"
"MODIFIES"
"MODULE"
"MONTH"
"MULTISET"
"NAMES"
"NATIONAL"
# "NATURAL"
"NCHAR"
"NCLOB"
# "NEW"
# "NEXT"
"NO"
"NONE"
"NORMALIZE"
"NOT"
"NTH_VALUE"
"NTILE"
# "NULL"
"NULLIF"
"NUMERIC"
"OBJECT"
"OCCURRENCES_REGEX"
"OCTET_LENGTH"
"OF"
# "OFFSET"
"OLD"
"OMIT"
# "ON"
"ONE"
"ONLY"
"OPEN"
"OPTION"
"OR"
# "ORDER"
"ORDINALITY"
"OUT"
# "OUTER"
"OUTPUT"
# "OVER"
"OVERLAPS"
"OVERLAY"
"PAD"
"PARAMETER"
"PARTIAL"
# "PARTITION"
"PATH"
# "PATTERN"
"PER"
"PERCENT"
"PERCENTILE_CONT"
"PERCENTILE_DISC"
"PERCENT_RANK"
"PERIOD"
"PERMUTE"
"PORTION"
"POSITION"
"POSITION_REGEX"
"POWER"
"PRECEDES"
"PRECISION"
"PREPARE"
"PRESERVE"
"PREV"
"PRIMARY"
"PRIOR"
"PRIVILEGES"
"PROCEDURE"
"PUBLIC"
# "RANGE"
"RANK"
"READ"
"READS"
"REAL"
"RECURSIVE"
"REF"
"REFERENCES"
"REFERENCING"
"REGR_AVGX"
"REGR_AVGY"
"REGR_COUNT"
"REGR_INTERCEPT"
"REGR_R2"
"REGR_SLOPE"
"REGR_SXX"
"REGR_SXY"
"REGR_SYY"
"RELATIVE"
"RELEASE"
# "REPEAT" # not a keyword in Calcite
"RESET"
# "RESIGNAL" # not a keyword in Calcite
"RESTRICT"
"RESULT"
"RETURN"
"RETURNS"
"REVOKE"
# "RIGHT"
"RLIKE"
"ROLE"
"ROLLBACK"
# "ROLLUP"
"ROUTINE"
# "ROW"
# "ROWS"
"ROW_NUMBER"
"RUNNING"
"SAVEPOINT"
"SCHEMA"
"SCOPE"
"SCROLL"
"SEARCH"
"SECOND"
"SECTION"
"SEEK"
# "SELECT"
"SENSITIVE"
"SESSION"
"SESSION_USER"
# "SET"
# "SETS"
"SHOW"
# "SIGNAL" # not a keyword in Calcite
"SIMILAR"
"SIZE"
# "SKIP" # messes with JavaCC's <SKIP> token
"SMALLINT"
# "SOME"
"SPACE"
"SPECIFIC"
"SPECIFICTYPE"
"SQL"
# "SQLCODE" # not a keyword in Calcite
# "SQLERROR" # not a keyword in Calcite
"SQLEXCEPTION"
"SQLSTATE"
"SQLWARNING"
"SQRT"
"START"
"STATE"
"STATIC"
"STDDEV_POP"
"STDDEV_SAMP"
# "STREAM"
"SUBMULTISET"
"SUBSET"
"SUBSTRING"
"SUBSTRING_REGEX"
"SUCCEEDS"
"SUM"
"SYMMETRIC"
"SYSTEM"
"SYSTEM_TIME"
"SYSTEM_USER"
# "TABLE"
# "TABLESAMPLE"
"TEMPORARY"
# "THEN"
# "TIME"
"TIME_DIFF"
"TIME_TRUNC"
# "TIMESTAMP"
"TIMESTAMP_DIFF"
"TIMESTAMP_TRUNC"
"TIMEZONE_HOUR"
"TIMEZONE_MINUTE"
"TINYINT"
"TO"
"TRAILING"
"TRANSACTION"
"TRANSLATE"
"TRANSLATE_REGEX"
"TRANSLATION"
"TREAT"
"TRIGGER"
"TRIM"
"TRIM_ARRAY"
"TRUE"
"TRUNCATE"
"UESCAPE"
"UNDER"
# "UNDO" # not a keyword in Calcite
# "UNION"
"UNIQUE"
"UNKNOWN"
# "UNNEST"
# "UNTIL" # not a keyword in Calcite
# "UPDATE"
"UPPER"
"UPSERT"
"USAGE"
"USER"
# "USING"
"VALUE"
# "VALUES"
"VALUE_OF"
"VARBINARY"
"VARCHAR"
"VARYING"
"VAR_POP"
"VAR_SAMP"
"VERSION"
"VERSIONING"
# "VERSIONS" # not a keyword in Calcite
"VIEW"
# "WHEN"
"WHENEVER"
# "WHERE"
# "WHILE" # not a keyword in Calcite
"WIDTH_BUCKET"
# "WINDOW"
# "WITH"
"WITHIN"
"WITHOUT"
"WORK"
"WRITE"
"YEAR"
"ZONE"
]
# List of non-reserved keywords to remove;
# items in this list become reserved.
nonReservedKeywordsToRemove: [
]
# List of additional join types. Each is a method with no arguments.
# Example: "LeftSemiJoin".
joinTypes: [
]
# List of methods for parsing custom SQL statements.
# Return type of method implementation should be 'SqlNode'.
# Example: "SqlShowDatabases()", "SqlShowTables()".
statementParserMethods: [
"SqlInsertFromFile()"
"SqlPhysicalExplain()"
]
# List of methods for parsing custom literals.
# Return type of method implementation should be "SqlNode".
# Example: ParseJsonLiteral().
literalParserMethods: [
]
# List of methods for parsing custom data types.
# Return type of method implementation should be "SqlTypeNameSpec".
# Example: SqlParseTimeStampZ().
dataTypeParserMethods: [
]
# List of methods for parsing builtin function calls.
# Return type of method implementation should be "SqlNode".
# Example: "DateTimeConstructorCall()".
builtinFunctionCallMethods: [
]
# List of methods for parsing extensions to "ALTER <scope>" calls.
# Each must accept arguments "(SqlParserPos pos, String scope)".
# Example: "SqlAlterTable".
alterStatementParserMethods: [
]
# List of methods for parsing extensions to "CREATE [OR REPLACE]" calls.
# Each must accept arguments "(SqlParserPos pos, boolean replace)".
# Example: "SqlCreateForeignSchema".
createStatementParserMethods: [
]
# List of methods for parsing extensions to "DROP" calls.
# Each must accept arguments "(SqlParserPos pos)".
# Example: "SqlDropSchema".
dropStatementParserMethods: [
]
# List of methods for parsing extensions to "TRUNCATE" calls.
# Each must accept arguments "(SqlParserPos pos)".
# Example: "SqlTruncate".
truncateStatementParserMethods: [
]
# Binary operators tokens.
# Example: "< INFIX_CAST: \"::\" >".
binaryOperatorsTokens: [
]
# Binary operators initialization.
# Example: "InfixCast".
extraBinaryExpressions: [
"SqlAtTimeZone"
]
# List of files in @includes directory that have parser method
# implementations for parsing custom SQL statements, literals or types
# given as part of "statementParserMethods", "literalParserMethods" or
# "dataTypeParserMethods".
# Example: "parserImpls.ftl".
implementationFiles: [
"parserImpls.ftl"
]
# Custom identifier token.
#
# PostgreSQL allows letters with diacritical marks and non-Latin letters
# in the beginning of identifier and additionally dollar sign in the rest of identifier.
# Letters with diacritical marks and non-Latin letters
# are represented by character codes 128 to 255 (or in octal \200 to \377).
# See https://learn.microsoft.com/en-gb/office/vba/language/reference/user-interface-help/character-set-128255
# See https://github.com/postgres/postgres/blob/master/src/backend/parser/scan.l
#
# MySQL allows digit in the beginning of identifier
customIdentifierToken: "< IDENTIFIER: (<LETTER>|<DIGIT>|[\"\\200\"-\"\\377\"]) (<LETTER>|<DIGIT>|<DOLLAR>|[\"\\200\"-\"\\377\"])* >"
includeParsingStringLiteralAsArrayLiteral: true
}
}
freemarkerLinks: {
includes: includes/
}