sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SparkSQLParser.scala - spark - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.spark.sql.catalyst

 import scala.language.implicitConversions
 import scala.util.parsing.combinator.lexical.StdLexical
 import scala.util.parsing.combinator.syntactical.StandardTokenParsers
 import scala.util.parsing.combinator.{PackratParsers, RegexParsers}
 import scala.util.parsing.input.CharArrayReader.EofCh

 import org.apache.spark.sql.catalyst.plans.logical._

 private[sql] abstract class AbstractSparkSQLParser
   extends StandardTokenParsers with PackratParsers {

   def apply(input: String): LogicalPlan = phrase(start)(new lexical.Scanner(input)) match {
     case Success(plan, _) => plan
     case failureOrError => sys.error(failureOrError.toString)
   }

   protected case class Keyword(str: String)

   protected def start: Parser[LogicalPlan]

   // Returns the whole input string
   protected lazy val wholeInput: Parser[String] = new Parser[String] {
     def apply(in: Input): ParseResult[String] =
       Success(in.source.toString, in.drop(in.source.length()))
   }

   // Returns the rest of the input string that are not parsed yet
   protected lazy val restInput: Parser[String] = new Parser[String] {
     def apply(in: Input): ParseResult[String] =
       Success(
         in.source.subSequence(in.offset, in.source.length()).toString,
         in.drop(in.source.length()))
   }
 }

 class SqlLexical(val keywords: Seq[String]) extends StdLexical {
   case class FloatLit(chars: String) extends Token {
     override def toString = chars
   }

   reserved ++= keywords.flatMap(w => allCaseVersions(w))

   delimiters += (
     "@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
     ",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
   )

   override lazy val token: Parser[Token] =
     ( identChar ~ (identChar | digit).* ^^
       { case first ~ rest => processIdent((first :: rest).mkString) }
     | rep1(digit) ~ ('.' ~> digit.*).? ^^ {
         case i ~ None    => NumericLit(i.mkString)
         case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
       }
     | '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
       { case chars => StringLit(chars mkString "") }
     | '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
       { case chars => StringLit(chars mkString "") }
     | '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
       { case chars => Identifier(chars mkString "") }
     | EofCh ^^^ EOF
     | '\'' ~> failure("unclosed string literal")
     | '"' ~> failure("unclosed string literal")
     | delim
     | failure("illegal character")
     )

   override def identChar = letter | elem('_')

   override def whitespace: Parser[Any] =
     ( whitespaceChar
     | '/' ~ '*' ~ comment
     | '/' ~ '/' ~ chrExcept(EofCh, '\n').*
     | '#' ~ chrExcept(EofCh, '\n').*
     | '-' ~ '-' ~ chrExcept(EofCh, '\n').*
     | '/' ~ '*' ~ failure("unclosed comment")
     ).*

   /** Generate all variations of upper and lower case of a given string */
   def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
     if (s.isEmpty) {
       Stream(prefix)
     } else {
       allCaseVersions(s.tail, prefix + s.head.toLower) #:::
         allCaseVersions(s.tail, prefix + s.head.toUpper)
     }
   }
 }

 /**
  * The top level Spark SQL parser. This parser recognizes syntaxes that are available for all SQL
  * dialects supported by Spark SQL, and delegates all the other syntaxes to the `fallback` parser.
  *
  * @param fallback A function that parses an input string to a logical plan
  */
 private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser {

   // A parser for the key-value part of the "SET [key = [value ]]" syntax
   private object SetCommandParser extends RegexParsers {
     private val key: Parser[String] = "(?m)[^=]+".r

     private val value: Parser[String] = "(?m).*$".r

     private val pair: Parser[LogicalPlan] =
       (key ~ ("=".r ~> value).?).? ^^ {
         case None => SetCommand(None)
         case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)))
       }

     def apply(input: String): LogicalPlan = parseAll(pair, input) match {
       case Success(plan, _) => plan
       case x => sys.error(x.toString)
     }
   }

   protected val AS      = Keyword("AS")
   protected val CACHE   = Keyword("CACHE")
   protected val LAZY    = Keyword("LAZY")
   protected val SET     = Keyword("SET")
   protected val TABLE   = Keyword("TABLE")
   protected val UNCACHE = Keyword("UNCACHE")

   protected implicit def asParser(k: Keyword): Parser[String] =
     lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ | _)

   private val reservedWords: Seq[String] =
     this
       .getClass
       .getMethods
       .filter(_.getReturnType == classOf[Keyword])
       .map(_.invoke(this).asInstanceOf[Keyword].str)

   override val lexical = new SqlLexical(reservedWords)

   override protected lazy val start: Parser[LogicalPlan] = cache | uncache | set | others

   private lazy val cache: Parser[LogicalPlan] =
     CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ {
       case isLazy ~ tableName ~ plan =>
         CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined)
     }

   private lazy val uncache: Parser[LogicalPlan] =
     UNCACHE ~ TABLE ~> ident ^^ {
       case tableName => UncacheTableCommand(tableName)
     }

   private lazy val set: Parser[LogicalPlan] =
     SET ~> restInput ^^ {
       case input => SetCommandParser(input)
     }

   private lazy val others: Parser[LogicalPlan] =
     wholeInput ^^ {
       case input => fallback(input)
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.spark.sql.catalyst

	import scala.language.implicitConversions
	import scala.util.parsing.combinator.lexical.StdLexical
	import scala.util.parsing.combinator.syntactical.StandardTokenParsers
	import scala.util.parsing.combinator.{PackratParsers, RegexParsers}
	import scala.util.parsing.input.CharArrayReader.EofCh

	import org.apache.spark.sql.catalyst.plans.logical._

	private[sql] abstract class AbstractSparkSQLParser
	extends StandardTokenParsers with PackratParsers {

	def apply(input: String): LogicalPlan = phrase(start)(new lexical.Scanner(input)) match {
	case Success(plan, _) => plan
	case failureOrError => sys.error(failureOrError.toString)
	}

	protected case class Keyword(str: String)

	protected def start: Parser[LogicalPlan]

	// Returns the whole input string
	protected lazy val wholeInput: Parser[String] = new Parser[String] {
	def apply(in: Input): ParseResult[String] =
	Success(in.source.toString, in.drop(in.source.length()))
	}

	// Returns the rest of the input string that are not parsed yet
	protected lazy val restInput: Parser[String] = new Parser[String] {
	def apply(in: Input): ParseResult[String] =
	Success(
	in.source.subSequence(in.offset, in.source.length()).toString,
	in.drop(in.source.length()))
	}
	}

	class SqlLexical(val keywords: Seq[String]) extends StdLexical {
	case class FloatLit(chars: String) extends Token {
	override def toString = chars
	}

	reserved ++= keywords.flatMap(w => allCaseVersions(w))

	delimiters += (
	"@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
	",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "\|", "^", "~", "<=>"
	)

	override lazy val token: Parser[Token] =
	( identChar ~ (identChar \| digit).* ^^
	{ case first ~ rest => processIdent((first :: rest).mkString) }
	\| rep1(digit) ~ ('.' ~> digit.*).? ^^ {
	case i ~ None => NumericLit(i.mkString)
	case i ~ Some(d) => FloatLit(i.mkString + "." + d.mkString)
	}
	\| '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
	{ case chars => StringLit(chars mkString "") }
	\| '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
	{ case chars => StringLit(chars mkString "") }
	\| '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
	{ case chars => Identifier(chars mkString "") }
	\| EofCh ^^^ EOF
	\| '\'' ~> failure("unclosed string literal")
	\| '"' ~> failure("unclosed string literal")
	\| delim
	\| failure("illegal character")
	)

	override def identChar = letter \| elem('_')

	override def whitespace: Parser[Any] =
	( whitespaceChar
	\| '/' ~ '*' ~ comment
	\| '/' ~ '/' ~ chrExcept(EofCh, '\n').*
	\| '#' ~ chrExcept(EofCh, '\n').*
	\| '-' ~ '-' ~ chrExcept(EofCh, '\n').*
	\| '/' ~ '*' ~ failure("unclosed comment")
	).*

	/** Generate all variations of upper and lower case of a given string */
	def allCaseVersions(s: String, prefix: String = ""): Stream[String] = {
	if (s.isEmpty) {
	Stream(prefix)
	} else {
	allCaseVersions(s.tail, prefix + s.head.toLower) #:::
	allCaseVersions(s.tail, prefix + s.head.toUpper)
	}
	}
	}

	/**
	* The top level Spark SQL parser. This parser recognizes syntaxes that are available for all SQL
	* dialects supported by Spark SQL, and delegates all the other syntaxes to the `fallback` parser.
	*
	* @param fallback A function that parses an input string to a logical plan
	*/
	private[sql] class SparkSQLParser(fallback: String => LogicalPlan) extends AbstractSparkSQLParser {

	// A parser for the key-value part of the "SET [key = [value ]]" syntax
	private object SetCommandParser extends RegexParsers {
	private val key: Parser[String] = "(?m)[^=]+".r

	private val value: Parser[String] = "(?m).*$".r

	private val pair: Parser[LogicalPlan] =
	(key ~ ("=".r ~> value).?).? ^^ {
	case None => SetCommand(None)
	case Some(k ~ v) => SetCommand(Some(k.trim -> v.map(_.trim)))
	}

	def apply(input: String): LogicalPlan = parseAll(pair, input) match {
	case Success(plan, _) => plan
	case x => sys.error(x.toString)
	}
	}

	protected val AS = Keyword("AS")
	protected val CACHE = Keyword("CACHE")
	protected val LAZY = Keyword("LAZY")
	protected val SET = Keyword("SET")
	protected val TABLE = Keyword("TABLE")
	protected val UNCACHE = Keyword("UNCACHE")

	protected implicit def asParser(k: Keyword): Parser[String] =
	lexical.allCaseVersions(k.str).map(x => x : Parser[String]).reduce(_ \| _)

	private val reservedWords: Seq[String] =
	this
	.getClass
	.getMethods
	.filter(_.getReturnType == classOf[Keyword])
	.map(_.invoke(this).asInstanceOf[Keyword].str)

	override val lexical = new SqlLexical(reservedWords)

	override protected lazy val start: Parser[LogicalPlan] = cache \| uncache \| set \| others

	private lazy val cache: Parser[LogicalPlan] =
	CACHE ~> LAZY.? ~ (TABLE ~> ident) ~ (AS ~> restInput).? ^^ {
	case isLazy ~ tableName ~ plan =>
	CacheTableCommand(tableName, plan.map(fallback), isLazy.isDefined)
	}

	private lazy val uncache: Parser[LogicalPlan] =
	UNCACHE ~ TABLE ~> ident ^^ {
	case tableName => UncacheTableCommand(tableName)
	}

	private lazy val set: Parser[LogicalPlan] =
	SET ~> restInput ^^ {
	case input => SetCommandParser(input)
	}

	private lazy val others: Parser[LogicalPlan] =
	wholeInput ^^ {
	case input => fallback(input)
	}
	}