| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.spark.sql.catalyst |
| |
| import scala.language.implicitConversions |
| import scala.util.parsing.combinator.lexical.StdLexical |
| import scala.util.parsing.combinator.syntactical.StandardTokenParsers |
| import scala.util.parsing.combinator.PackratParsers |
| import scala.util.parsing.input.CharArrayReader.EofCh |
| |
| import org.apache.spark.sql.catalyst.plans.logical._ |
| import org.apache.spark.sql.util.CarbonException |
| |
| private[sql] abstract class AbstractCarbonSparkSQLParser |
| extends StandardTokenParsers with PackratParsers { |
| |
| def parse(input: String): LogicalPlan = synchronized { |
| // Initialize the Keywords. |
| initLexical |
| phrase(start)(new lexical.Scanner(input)) match { |
| case Success(plan, _) => plan |
| case failureOrError => CarbonException.analysisException(failureOrError.toString) |
| } |
| } |
| /* One time initialization of lexical.This avoid reinitialization of lexical in parse method */ |
| protected lazy val initLexical: Unit = lexical.initialize(reservedWords) |
| |
| protected case class Keyword(str: String) { |
| def normalize: String = lexical.normalizeKeyword(str) |
| def parser: Parser[String] = normalize |
| } |
| |
| protected implicit def asParser(k: Keyword): Parser[String] = k.parser |
| |
| // By default, use Reflection to find the reserved words defined in the sub class. |
| // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this |
| // method during the parent class instantiation, because the sub class instance |
| // isn't created yet. |
| protected lazy val reservedWords: Seq[String] = |
| this |
| .getClass |
| .getMethods |
| .filter(_.getReturnType == classOf[Keyword]) |
| .map(_.invoke(this).asInstanceOf[Keyword].normalize) |
| |
| // Set the keywords as empty by default, will change that later. |
| override val lexical = new SqlLexical |
| |
| protected def start: Parser[LogicalPlan] |
| |
| // Returns the whole input string |
| protected lazy val wholeInput: Parser[String] = new Parser[String] { |
| def apply(in: Input): ParseResult[String] = |
| Success(in.source.toString, in.drop(in.source.length())) |
| } |
| |
| // Returns the rest of the input string that are not parsed yet |
| protected lazy val restInput: Parser[String] = new Parser[String] { |
| def apply(in: Input): ParseResult[String] = |
| Success( |
| in.source.subSequence(in.offset, in.source.length()).toString, |
| in.drop(in.source.length())) |
| } |
| } |
| |
| class SqlLexical extends StdLexical { |
| case class FloatLit(chars: String) extends Token { |
| override def toString: String = chars |
| } |
| |
| /* This is a work around to support the lazy setting */ |
| def initialize(keywords: Seq[String]): Unit = { |
| reserved.clear() |
| reserved ++= keywords |
| } |
| |
| /* Normal the keyword string */ |
| def normalizeKeyword(str: String): String = str.toLowerCase |
| |
| delimiters += ( |
| "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")", |
| ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>" |
| ) |
| |
| protected override def processIdent(name: String) = { |
| val token = normalizeKeyword(name) |
| if (reserved contains token) Keyword(token) else Identifier(name) |
| } |
| |
| override lazy val token: Parser[Token] = |
| ( identChar ~ (identChar | digit).* ^^ |
| { case first ~ rest => processIdent((first :: rest).mkString) } |
| | digit.* ~ identChar ~ (identChar | digit).* ^^ |
| { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) } |
| | rep1(digit) ~ ('.' ~> digit.*).? ^^ { |
| case i ~ None => NumericLit(i.mkString) |
| case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString) |
| } |
| | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^ |
| { case chars => StringLit(chars mkString "") } |
| | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^ |
| { case chars => StringLit(chars mkString "") } |
| | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^ |
| { case chars => Identifier(chars mkString "") } |
| | EofCh ^^^ EOF |
| | '\'' ~> failure("unclosed string literal") |
| | '"' ~> failure("unclosed string literal") |
| | delim |
| | failure("illegal character") |
| ) |
| |
| override def identChar: Parser[Elem] = letter | elem('_') |
| |
| override def whitespace: Parser[Any] = |
| ( whitespaceChar |
| | '/' ~ '*' ~ comment |
| | '/' ~ '/' ~ chrExcept(EofCh, '\n').* |
| | '#' ~ chrExcept(EofCh, '\n').* |
| | '-' ~ '-' ~ chrExcept(EofCh, '\n').* |
| | '/' ~ '*' ~ failure("unclosed comment") |
| ).* |
| } |
| |