blob: 7fe232862af376be9e6c8be43dc3334e42b44170 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst
import scala.language.implicitConversions
import scala.util.matching.Regex
import org.apache.spark.sql.execution.command._
import org.apache.carbondata.common.exceptions.sql.MalformedCarbonCommandException
import org.apache.carbondata.common.logging.LogServiceFactory
/**
* TODO remove the duplicate code and add the common methods to common class.
* Parser for All Carbon DDL cases
*/
abstract class CarbonDDLSqlParser extends AbstractCarbonSparkSQLParser {
val LOGGER = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
protected val AGGREGATE = carbonKeyWord("AGGREGATE")
protected val AS = carbonKeyWord("AS")
protected val AGGREGATION = carbonKeyWord("AGGREGATION")
protected val ALL = carbonKeyWord("ALL")
protected val HIGH_CARDINALITY_DIMS = carbonKeyWord("NO_DICTIONARY")
protected val BEFORE = carbonKeyWord("BEFORE")
protected val BY = carbonKeyWord("BY")
protected val CASCADE = carbonKeyWord("CASCADE")
protected val CLASS = carbonKeyWord("CLASS")
protected val CLEAN = carbonKeyWord("CLEAN")
protected val COLS = carbonKeyWord("COLS")
protected val COLUMNS = carbonKeyWord("COLUMNS")
protected val COMPACT = carbonKeyWord("COMPACT")
protected val FINISH = carbonKeyWord("FINISH")
protected val STREAMING = carbonKeyWord("STREAMING")
protected val CREATE = carbonKeyWord("CREATE")
protected val CUBE = carbonKeyWord("CUBE")
protected val CUBES = carbonKeyWord("CUBES")
protected val DATA = carbonKeyWord("DATA")
protected val DATABASE = carbonKeyWord("DATABASE")
protected val DATABASES = carbonKeyWord("DATABASES")
protected val DELETE = carbonKeyWord("DELETE")
protected val DELIMITER = carbonKeyWord("DELIMITER")
protected val DESCRIBE = carbonKeyWord("DESCRIBE")
protected val DESC = carbonKeyWord("DESC")
protected val DETAIL = carbonKeyWord("DETAIL")
protected val DIMENSIONS = carbonKeyWord("DIMENSIONS")
protected val DIMFOLDERPATH = carbonKeyWord("DIMFOLDERPATH")
protected val DROP = carbonKeyWord("DROP")
protected val ESCAPECHAR = carbonKeyWord("ESCAPECHAR")
protected val EXCLUDE = carbonKeyWord("EXCLUDE")
protected val EXPLAIN = carbonKeyWord("EXPLAIN")
protected val EXTENDED = carbonKeyWord("EXTENDED")
protected val FORMATTED = carbonKeyWord("FORMATTED")
protected val FACT = carbonKeyWord("FACT")
protected val FIELDS = carbonKeyWord("FIELDS")
protected val FILEHEADER = carbonKeyWord("FILEHEADER")
protected val SERIALIZATION_NULL_FORMAT = carbonKeyWord("SERIALIZATION_NULL_FORMAT")
protected val BAD_RECORDS_LOGGER_ENABLE = carbonKeyWord("BAD_RECORDS_LOGGER_ENABLE")
protected val BAD_RECORDS_ACTION = carbonKeyWord("BAD_RECORDS_ACTION")
protected val IS_EMPTY_DATA_BAD_RECORD = carbonKeyWord("IS_EMPTY_DATA_BAD_RECORD")
protected val IS_EMPTY_COMMA_DATA_BAD_RECORD = carbonKeyWord("IS_NULL_DATA_BAD_RECORD")
protected val SKIP_EMPTY_LINE = carbonKeyWord("SKIP_EMPTY_LINE")
protected val FILES = carbonKeyWord("FILES")
protected val FROM = carbonKeyWord("FROM")
protected val HIERARCHIES = carbonKeyWord("HIERARCHIES")
protected val IN = carbonKeyWord("IN")
protected val INCLUDE = carbonKeyWord("INCLUDE")
protected val INPATH = carbonKeyWord("INPATH")
protected val INTO = carbonKeyWord("INTO")
protected val LEVELS = carbonKeyWord("LEVELS")
protected val LIKE = carbonKeyWord("LIKE")
protected val LOAD = carbonKeyWord("LOAD")
protected val LOCAL = carbonKeyWord("LOCAL")
protected val MAPPED = carbonKeyWord("MAPPED")
protected val MEASURES = carbonKeyWord("MEASURES")
protected val MERGE = carbonKeyWord("MERGE")
protected val MULTILINE = carbonKeyWord("MULTILINE")
protected val COMPLEX_DELIMITER_LEVEL_1 = carbonKeyWord("COMPLEX_DELIMITER_LEVEL_1")
protected val COMPLEX_DELIMITER_LEVEL_2 = carbonKeyWord("COMPLEX_DELIMITER_LEVEL_2")
protected val COMPLEX_DELIMITER_LEVEL_3 = carbonKeyWord("COMPLEX_DELIMITER_LEVEL_3")
protected val OPTIONS = carbonKeyWord("OPTIONS")
protected val OUTPATH = carbonKeyWord("OUTPATH")
protected val OVERWRITE = carbonKeyWord("OVERWRITE")
protected val PARTITION = carbonKeyWord("PARTITION")
protected val PARTITION_COUNT = carbonKeyWord("PARTITION_COUNT")
protected val PARTITIONDATA = carbonKeyWord("PARTITIONDATA")
protected val PARTITIONER = carbonKeyWord("PARTITIONER")
protected val PARTITIONS = carbonKeyWord("PARTITIONS")
protected val QUOTECHAR = carbonKeyWord("QUOTECHAR")
protected val RELATION = carbonKeyWord("RELATION")
protected val SCHEMA = carbonKeyWord("SCHEMA")
protected val SCHEMAS = carbonKeyWord("SCHEMAS")
protected val SET = Keyword("SET")
protected val SHOW = carbonKeyWord("SHOW")
protected val SPLIT = carbonKeyWord("SPLIT")
protected val TABLES = carbonKeyWord("TABLES")
protected val TABLE = carbonKeyWord("TABLE")
protected val TERMINATED = carbonKeyWord("TERMINATED")
protected val TYPE = carbonKeyWord("TYPE")
protected val UPDATE = carbonKeyWord("UPDATE")
protected val USE = carbonKeyWord("USE")
protected val WHERE = Keyword("WHERE")
protected val WITH = carbonKeyWord("WITH")
protected val AGGREGATETABLE = carbonKeyWord("AGGREGATETABLE")
protected val ABS = carbonKeyWord("abs")
protected val EXECUTOR = carbonKeyWord("EXECUTOR")
protected val FOR = carbonKeyWord("FOR")
protected val SCRIPTS = carbonKeyWord("SCRIPTS")
protected val USING = carbonKeyWord("USING")
protected val LIMIT = carbonKeyWord("LIMIT")
protected val DEFAULTS = carbonKeyWord("DEFAULTS")
protected val ALTER = carbonKeyWord("ALTER")
protected val ADD = carbonKeyWord("ADD")
protected val IF = carbonKeyWord("IF")
protected val NOT = carbonKeyWord("NOT")
protected val EXISTS = carbonKeyWord("EXISTS")
protected val DIMENSION = carbonKeyWord("DIMENSION")
protected val STARTTIME = carbonKeyWord("STARTTIME")
protected val HISTORY = carbonKeyWord("HISTORY")
protected val SEGMENTS = carbonKeyWord("SEGMENTS")
protected val SEGMENT = carbonKeyWord("SEGMENT")
protected val METACACHE = carbonKeyWord("METACACHE")
protected val STRING = carbonKeyWord("STRING")
protected val INTEGER = carbonKeyWord("INTEGER")
protected val TIMESTAMP = carbonKeyWord("TIMESTAMP")
protected val DATE = carbonKeyWord("DATE")
protected val CHAR = carbonKeyWord("CHAR")
protected val VARCHAR = carbonKeyWord("VARCHAR")
protected val NUMERIC = carbonKeyWord("NUMERIC")
protected val DECIMAL = carbonKeyWord("DECIMAL")
protected val DOUBLE = carbonKeyWord("DOUBLE")
protected val FLOAT = carbonKeyWord("FLOAT")
protected val SHORT = carbonKeyWord("SHORT")
protected val INT = carbonKeyWord("INT")
protected val BOOLEAN = carbonKeyWord("BOOLEAN")
protected val LONG = carbonKeyWord("LONG")
protected val BIGINT = carbonKeyWord("BIGINT")
protected val BINARY = carbonKeyWord("BINARY")
protected val ARRAY = carbonKeyWord("ARRAY")
protected val STRUCT = carbonKeyWord("STRUCT")
protected val MAP = carbonKeyWord("MAP")
protected val SMALLINT = carbonKeyWord("SMALLINT")
protected val CHANGE = carbonKeyWord("CHANGE")
protected val TBLPROPERTIES = carbonKeyWord("TBLPROPERTIES")
protected val ID = carbonKeyWord("ID")
protected val DATAMAP = carbonKeyWord("DATAMAP")
protected val ON = carbonKeyWord("ON")
protected val DMPROPERTIES = carbonKeyWord("DMPROPERTIES")
protected val SELECT = carbonKeyWord("SELECT")
protected val REBUILD = carbonKeyWord("REBUILD")
protected val DEFERRED = carbonKeyWord("DEFERRED")
protected val STREAM = carbonKeyWord("STREAM")
protected val STREAMS = carbonKeyWord("STREAMS")
protected val STMPROPERTIES = carbonKeyWord("STMPROPERTIES")
protected val CARBONCLI = carbonKeyWord("CARBONCLI")
protected val PATH = carbonKeyWord("PATH")
protected val INSERT = carbonKeyWord("INSERT")
protected val STAGE = carbonKeyWord("STAGE")
protected val INDEX = carbonKeyWord("INDEX")
protected val INDEXES = carbonKeyWord("INDEXES")
protected val REGISTER = carbonKeyWord("REGISTER")
protected val newReservedWords =
this.getClass
.getMethods
.filter(_.getReturnType == classOf[Keyword])
.map(_.invoke(this).asInstanceOf[Keyword].str)
override val lexical = {
val sqllex = new SqlLexical()
sqllex.initialize(newReservedWords)
sqllex
}
import lexical.Identifier
implicit def regexToParser(regex: Regex): Parser[String] = {
acceptMatch(
s"identifier matching regex ${ regex }",
{ case Identifier(str) if regex.unapplySeq(str).isDefined => str }
)
}
/**
* This will convert key word to regular expression.
*
* @param keys
* @return
*/
def carbonKeyWord(keys: String): Regex = {
("(?i)" + keys).r
}
protected lazy val dbTableIdentifier: Parser[Seq[String]] =
(ident <~ ".").? ~ ident ^^ {
case databaseName ~ tableName =>
if (databaseName.isDefined) {
Seq(databaseName.get, tableName)
} else {
Seq(tableName)
}
}
protected lazy val options: Parser[(String, String)] =
(stringLit <~ "=") ~ stringLit ^^ {
case opt ~ optvalue => (opt.trim.toLowerCase(), optvalue)
case _ => ("", "")
}
protected lazy val commandOptions: Parser[String] =
stringLit ^^ {
case optValue => optValue
case _ => ""
}
protected lazy val partitions: Parser[(String, Option[String])] =
(ident <~ "=".?) ~ stringLit.? ^^ {
case opt ~ optvalue => (opt.trim, optvalue)
case _ => ("", None)
}
protected lazy val valueOptions: Parser[(Int, Int)] =
(numericLit <~ ",") ~ numericLit ^^ {
case opt ~ optvalue => (opt.toInt, optvalue.toInt)
case _ => (0, 0)
}
protected lazy val columnOptions: Parser[(String, String)] =
(stringLit <~ ",") ~ stringLit ^^ {
case opt ~ optvalue => (opt, optvalue)
case _ =>
throw new MalformedCarbonCommandException(s"value cannot be empty")
}
protected lazy val dimCol: Parser[Field] = anyFieldDef
protected lazy val primitiveTypes =
STRING ^^^ "string" |BOOLEAN ^^^ "boolean" | INTEGER ^^^ "integer" |
TIMESTAMP ^^^ "timestamp" | NUMERIC ^^^ "numeric" |
(LONG | BIGINT) ^^^ "bigint" | (SHORT | SMALLINT) ^^^ "smallint" |
INT ^^^ "int" | DOUBLE ^^^ "double" | FLOAT ^^^ "double" | decimalType |
DATE ^^^ "date" | charType
protected lazy val miscType = BINARY ^^^ "binary"
/**
* Matching the char data type and returning the same.
*/
private lazy val charType =
(CHAR | VARCHAR ) ~ opt("(" ~>numericLit <~ ")") ^^ {
case (char ~ _) =>
s"$char"
}
/**
* Matching the decimal(10,0) data type and returning the same.
*/
private lazy val decimalType =
DECIMAL ~ (("(" ~> numericLit <~ ",") ~ (numericLit <~ ")")).? ^^ {
case decimal ~ precisionAndScale => if (precisionAndScale.isDefined) {
s"decimal(${ precisionAndScale.get._1 }, ${ precisionAndScale.get._2 })"
} else {
s"decimal(10,0)"
}
}
protected lazy val nestedType: Parser[Field] = structFieldType | arrayFieldType | mapFieldType |
primitiveFieldType | miscFieldType
lazy val anyFieldDef: Parser[Field] =
(ident | stringLit) ~ (":".? ~> nestedType) ~ (IN ~> (ident | stringLit)).? ^^ {
case e1 ~ e2 ~ e3 =>
Field(e1, e2.dataType, Some(e1), e2.children, null, e3)
}
protected lazy val primitiveFieldType: Parser[Field] =
primitiveTypes ^^ {
case e1 =>
Field("unknown", Some(e1), Some("unknown"), Some(null))
}
protected lazy val miscFieldType: Parser[Field] =
miscType ^^ {
case e1 =>
Field("unknown", Some(e1), Some("unknown"), Some(null))
}
protected lazy val arrayFieldType: Parser[Field] =
((ARRAY ^^^ "array") ~> "<" ~> nestedType <~ ">") ^^ {
case e1 =>
Field("unknown", Some("array"), Some("unknown"),
Some(List(Field("val", e1.dataType, Some("val"),
e1.children))))
}
protected lazy val structFieldType: Parser[Field] =
((STRUCT ^^^ "struct") ~> "<" ~> repsep(anyFieldDef, ",") <~ ">") ^^ {
case e1 =>
Field("unknown", Some("struct"), Some("unknown"), Some(e1))
}
// Map<Key,Value> is represented as Map<Struct<Key,Value>>
protected lazy val mapFieldType: Parser[Field] =
(MAP ^^^ "map") ~> "<" ~> primitiveFieldType ~ ("," ~> nestedType) <~ ">" ^^ {
case key ~ value =>
Field("unknown", Some("map"), Some("unknown"),
Some(List(
Field("val", Some("struct"), Some("unknown"),
Some(List(
Field("key", key.dataType, Some("key"), key.children),
Field("value", value.dataType, Some("value"), value.children)))))))
}
protected lazy val measureCol: Parser[Field] =
(ident | stringLit) ~ (INTEGER ^^^ "integer" | NUMERIC ^^^ "numeric" | SHORT ^^^ "smallint" |
BIGINT ^^^ "bigint" | DECIMAL ^^^ "decimal").? ~
(AS ~> (ident | stringLit)).? ~ (IN ~> (ident | stringLit)).? ^^ {
case e1 ~ e2 ~ e3 ~ e4 => Field(e1, e2, e3, Some(null))
}
protected lazy val segmentId: Parser[String] =
numericLit ^^ { u => u } |
elem("decimal", p => {
p.getClass.getSimpleName.equals("FloatLit") ||
p.getClass.getSimpleName.equals("DecimalLit")
}) ^^ (_.chars)
}