integration/spark/src/main/scala/org/apache/spark/sql/catalyst/AbstractCarbonSparkSQLParser.scala - carbondata - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.spark.sql.catalyst

 import scala.language.implicitConversions
 import scala.util.parsing.combinator.lexical.StdLexical
 import scala.util.parsing.combinator.syntactical.StandardTokenParsers
 import scala.util.parsing.combinator.PackratParsers
 import scala.util.parsing.input.CharArrayReader.EofCh

 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.util.CarbonException

 private[sql] abstract class AbstractCarbonSparkSQLParser
   extends StandardTokenParsers with PackratParsers {

   def parse(input: String): LogicalPlan = synchronized {
     // Initialize the Keywords.
     initLexical
     phrase(start)(new lexical.Scanner(input)) match {
       case Success(plan, _) => plan
       case failureOrError => CarbonException.analysisException(failureOrError.toString)
     }
   }
   /* One time initialization of lexical.This avoid reinitialization of  lexical in parse method */
   protected lazy val initLexical: Unit = lexical.initialize(reservedWords)

   protected case class Keyword(str: String) {
     def normalize: String = lexical.normalizeKeyword(str)
     def parser: Parser[String] = normalize
   }

   protected implicit def asParser(k: Keyword): Parser[String] = k.parser

   // By default, use Reflection to find the reserved words defined in the sub class.
   // NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
   // method during the parent class instantiation, because the sub class instance
   // isn't created yet.
   protected lazy val reservedWords: Seq[String] =
     this
       .getClass
       .getMethods
       .filter(_.getReturnType == classOf[Keyword])
       .map(_.invoke(this).asInstanceOf[Keyword].normalize)

   // Set the keywords as empty by default, will change that later.
   override val lexical = new SqlLexical

   protected def start: Parser[LogicalPlan]

   // Returns the whole input string
   protected lazy val wholeInput: Parser[String] = new Parser[String] {
     def apply(in: Input): ParseResult[String] =
       Success(in.source.toString, in.drop(in.source.length()))
   }

   // Returns the rest of the input string that are not parsed yet
   protected lazy val restInput: Parser[String] = new Parser[String] {
     def apply(in: Input): ParseResult[String] =
       Success(
         in.source.subSequence(in.offset, in.source.length()).toString,
         in.drop(in.source.length()))
   }
 }

 class SqlLexical extends StdLexical {
   case class FloatLit(chars: String) extends Token {
     override def toString: String = chars
   }

   /* This is a work around to support the lazy setting */
   def initialize(keywords: Seq[String]): Unit = {
     reserved.clear()
     reserved ++= keywords
   }

   /* Normal the keyword string */
   def normalizeKeyword(str: String): String = str.toLowerCase

   delimiters += (
     "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
     ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
     )

   protected override def processIdent(name: String) = {
     val token = normalizeKeyword(name)
     if (reserved contains token) Keyword(token) else Identifier(name)
   }

   override lazy val token: Parser[Token] =
     ( identChar ~ (identChar | digit).* ^^
       { case first ~ rest => processIdent((first :: rest).mkString) }
       | digit.* ~ identChar ~ (identChar | digit).* ^^
         { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
       | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
       case i ~ None => NumericLit(i.mkString)
       case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
     }
       | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
         { case chars => StringLit(chars mkString "") }
       | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
         { case chars => StringLit(chars mkString "") }
       | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
         { case chars => Identifier(chars mkString "") }
       | EofCh ^^^ EOF
       | '\'' ~> failure("unclosed string literal")
       | '"' ~> failure("unclosed string literal")
       | delim
       | failure("illegal character")
       )

   override def identChar: Parser[Elem] = letter | elem('_')

   override def whitespace: Parser[Any] =
     ( whitespaceChar
       | '/' ~ '*' ~ comment
       | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
       | '#' ~ chrExcept(EofCh, '\n').*
       | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
       | '/' ~ '*' ~ failure("unclosed comment")
       ).*
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.spark.sql.catalyst

	import scala.language.implicitConversions
	import scala.util.parsing.combinator.lexical.StdLexical
	import scala.util.parsing.combinator.syntactical.StandardTokenParsers
	import scala.util.parsing.combinator.PackratParsers
	import scala.util.parsing.input.CharArrayReader.EofCh

	import org.apache.spark.sql.catalyst.plans.logical._
	import org.apache.spark.sql.util.CarbonException

	private[sql] abstract class AbstractCarbonSparkSQLParser
	extends StandardTokenParsers with PackratParsers {

	def parse(input: String): LogicalPlan = synchronized {
	// Initialize the Keywords.
	initLexical
	phrase(start)(new lexical.Scanner(input)) match {
	case Success(plan, _) => plan
	case failureOrError => CarbonException.analysisException(failureOrError.toString)
	}
	}
	/* One time initialization of lexical.This avoid reinitialization of lexical in parse method */
	protected lazy val initLexical: Unit = lexical.initialize(reservedWords)

	protected case class Keyword(str: String) {
	def normalize: String = lexical.normalizeKeyword(str)
	def parser: Parser[String] = normalize
	}

	protected implicit def asParser(k: Keyword): Parser[String] = k.parser

	// By default, use Reflection to find the reserved words defined in the sub class.
	// NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
	// method during the parent class instantiation, because the sub class instance
	// isn't created yet.
	protected lazy val reservedWords: Seq[String] =
	this
	.getClass
	.getMethods
	.filter(_.getReturnType == classOf[Keyword])
	.map(_.invoke(this).asInstanceOf[Keyword].normalize)

	// Set the keywords as empty by default, will change that later.
	override val lexical = new SqlLexical

	protected def start: Parser[LogicalPlan]

	// Returns the whole input string
	protected lazy val wholeInput: Parser[String] = new Parser[String] {
	def apply(in: Input): ParseResult[String] =
	Success(in.source.toString, in.drop(in.source.length()))
	}

	// Returns the rest of the input string that are not parsed yet
	protected lazy val restInput: Parser[String] = new Parser[String] {
	def apply(in: Input): ParseResult[String] =
	Success(
	in.source.subSequence(in.offset, in.source.length()).toString,
	in.drop(in.source.length()))
	}
	}

	class SqlLexical extends StdLexical {
	case class FloatLit(chars: String) extends Token {
	override def toString: String = chars
	}

	/* This is a work around to support the lazy setting */
	def initialize(keywords: Seq[String]): Unit = {
	reserved.clear()
	reserved ++= keywords
	}

	/* Normal the keyword string */
	def normalizeKeyword(str: String): String = str.toLowerCase

	delimiters += (
	"@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
	",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "\|", "^", "~", "<=>"
	)

	protected override def processIdent(name: String) = {
	val token = normalizeKeyword(name)
	if (reserved contains token) Keyword(token) else Identifier(name)
	}

	override lazy val token: Parser[Token] =
	( identChar ~ (identChar \| digit).* ^^
	{ case first ~ rest => processIdent((first :: rest).mkString) }
	\| digit.* ~ identChar ~ (identChar \| digit).* ^^
	{ case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
	\| rep1(digit) ~ ('.' ~> digit.*).? ^^ {
	case i ~ None => NumericLit(i.mkString)
	case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
	}
	\| '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
	{ case chars => StringLit(chars mkString "") }
	\| '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
	{ case chars => StringLit(chars mkString "") }
	\| '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
	{ case chars => Identifier(chars mkString "") }
	\| EofCh ^^^ EOF
	\| '\'' ~> failure("unclosed string literal")
	\| '"' ~> failure("unclosed string literal")
	\| delim
	\| failure("illegal character")
	)

	override def identChar: Parser[Elem] = letter \| elem('_')

	override def whitespace: Parser[Any] =
	( whitespaceChar
	\| '/' ~ '*' ~ comment
	\| '/' ~ '/' ~ chrExcept(EofCh, '\n').*
	\| '#' ~ chrExcept(EofCh, '\n').*
	\| '-' ~ '-' ~ chrExcept(EofCh, '\n').*
	\| '/' ~ '*' ~ failure("unclosed comment")
	).*
	}