| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| //! [`DFParser`]: DataFusion SQL Parser based on [`sqlparser`] |
| //! |
| //! This parser implements DataFusion specific statements such as |
| //! `CREATE EXTERNAL TABLE` |
| |
| use datafusion_common::DataFusionError; |
| use datafusion_common::config::SqlParserOptions; |
| use datafusion_common::{Diagnostic, Span, sql_err}; |
| use sqlparser::ast::{ExprWithAlias, Ident, OrderByOptions}; |
| use sqlparser::tokenizer::TokenWithSpan; |
| use sqlparser::{ |
| ast::{ |
| ColumnDef, ColumnOptionDef, ObjectName, OrderByExpr, Query, |
| Statement as SQLStatement, TableConstraint, Value, |
| }, |
| dialect::{Dialect, GenericDialect, keywords::Keyword}, |
| parser::{Parser, ParserError}, |
| tokenizer::{Token, Tokenizer, Word}, |
| }; |
| use std::collections::VecDeque; |
| use std::fmt; |
| |
| // Use `Parser::expected` instead, if possible |
| macro_rules! parser_err { |
| ($MSG:expr $(; diagnostic = $DIAG:expr)?) => {{ |
| |
| let err = DataFusionError::from(ParserError::ParserError($MSG.to_string())); |
| $( |
| let err = err.with_diagnostic($DIAG); |
| )? |
| Err(err) |
| }}; |
| } |
| |
| fn parse_file_type(s: &str) -> Result<String, DataFusionError> { |
| Ok(s.to_uppercase()) |
| } |
| |
| /// DataFusion specific `EXPLAIN` |
| /// |
| /// Syntax: |
| /// ```sql |
| /// EXPLAIN <ANALYZE> <VERBOSE> [FORMAT format] statement |
| /// ``` |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub struct ExplainStatement { |
| /// `EXPLAIN ANALYZE ..` |
| pub analyze: bool, |
| /// `EXPLAIN .. VERBOSE ..` |
| pub verbose: bool, |
| /// `EXPLAIN .. FORMAT ` |
| pub format: Option<String>, |
| /// The statement to analyze. Note this is a DataFusion [`Statement`] (not a |
| /// [`sqlparser::ast::Statement`] so that we can use `EXPLAIN`, `COPY`, and other |
| /// DataFusion specific statements |
| pub statement: Box<Statement>, |
| } |
| |
| impl fmt::Display for ExplainStatement { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| let Self { |
| analyze, |
| verbose, |
| format, |
| statement, |
| } = self; |
| |
| write!(f, "EXPLAIN ")?; |
| if *analyze { |
| write!(f, "ANALYZE ")?; |
| } |
| if *verbose { |
| write!(f, "VERBOSE ")?; |
| } |
| if let Some(format) = format.as_ref() { |
| write!(f, "FORMAT {format} ")?; |
| } |
| |
| write!(f, "{statement}") |
| } |
| } |
| |
| /// DataFusion extension DDL for `COPY` |
| /// |
| /// # Syntax: |
| /// |
| /// ```text |
| /// COPY <table_name | (<query>)> |
| /// TO |
| /// <destination_url> |
| /// (key_value_list) |
| /// ``` |
| /// |
| /// # Examples |
| /// |
| /// ```sql |
| /// COPY lineitem TO 'lineitem' |
| /// STORED AS PARQUET ( |
| /// partitions 16, |
| /// row_group_limit_rows 100000, |
| /// row_group_limit_bytes 200000 |
| /// ) |
| /// |
| /// COPY (SELECT l_orderkey from lineitem) to 'lineitem.parquet'; |
| /// ``` |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub struct CopyToStatement { |
| /// From where the data comes from |
| pub source: CopyToSource, |
| /// The URL to where the data is heading |
| pub target: String, |
| /// Partition keys |
| pub partitioned_by: Vec<String>, |
| /// File type (Parquet, NDJSON, CSV etc.) |
| pub stored_as: Option<String>, |
| /// Target specific options |
| pub options: Vec<(String, Value)>, |
| } |
| |
| impl fmt::Display for CopyToStatement { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| let Self { |
| source, |
| target, |
| partitioned_by, |
| stored_as, |
| options, |
| .. |
| } = self; |
| |
| write!(f, "COPY {source} TO {target}")?; |
| if let Some(file_type) = stored_as { |
| write!(f, " STORED AS {file_type}")?; |
| } |
| if !partitioned_by.is_empty() { |
| write!(f, " PARTITIONED BY ({})", partitioned_by.join(", "))?; |
| } |
| |
| if !options.is_empty() { |
| let opts: Vec<_> = |
| options.iter().map(|(k, v)| format!("'{k}' {v}")).collect(); |
| write!(f, " OPTIONS ({})", opts.join(", "))?; |
| } |
| |
| Ok(()) |
| } |
| } |
| |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub enum CopyToSource { |
| /// `COPY <table> TO ...` |
| Relation(ObjectName), |
| /// COPY (...query...) TO ... |
| Query(Box<Query>), |
| } |
| |
| impl fmt::Display for CopyToSource { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| match self { |
| CopyToSource::Relation(r) => write!(f, "{r}"), |
| CopyToSource::Query(q) => write!(f, "({q})"), |
| } |
| } |
| } |
| |
| /// This type defines a lexicographical ordering. |
| pub(crate) type LexOrdering = Vec<OrderByExpr>; |
| |
| /// DataFusion extension DDL for `CREATE EXTERNAL TABLE` |
| /// |
| /// Syntax: |
| /// |
| /// ```text |
| /// CREATE |
| /// [ OR REPLACE ] |
| /// EXTERNAL TABLE |
| /// [ IF NOT EXISTS ] |
| /// <TABLE_NAME>[ (<column_definition>) ] |
| /// STORED AS <file_type> |
| /// [ PARTITIONED BY (<column_definition list> | <column list>) ] |
| /// [ WITH ORDER (<ordered column list>) |
| /// [ OPTIONS (<key_value_list>) ] |
| /// LOCATION <literal> |
| /// |
| /// <column_definition> := (<column_name> <data_type>, ...) |
| /// |
| /// <column_list> := (<column_name>, ...) |
| /// |
| /// <ordered_column_list> := (<column_name> <sort_clause>, ...) |
| /// |
| /// <key_value_list> := (<literal> <literal, <literal> <literal>, ...) |
| /// ``` |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub struct CreateExternalTable { |
| /// Table name |
| pub name: ObjectName, |
| /// Optional schema |
| pub columns: Vec<ColumnDef>, |
| /// File type (Parquet, NDJSON, CSV, etc) |
| pub file_type: String, |
| /// Path to file |
| pub location: String, |
| /// Partition Columns |
| pub table_partition_cols: Vec<String>, |
| /// Ordered expressions |
| pub order_exprs: Vec<LexOrdering>, |
| /// Option to not error if table already exists |
| pub if_not_exists: bool, |
| /// Option to replace table content if table already exists |
| pub or_replace: bool, |
| /// Whether the table is a temporary table |
| pub temporary: bool, |
| /// Infinite streams? |
| pub unbounded: bool, |
| /// Table(provider) specific options |
| pub options: Vec<(String, Value)>, |
| /// A table-level constraint |
| pub constraints: Vec<TableConstraint>, |
| } |
| |
| impl fmt::Display for CreateExternalTable { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| write!(f, "CREATE EXTERNAL TABLE ")?; |
| if self.if_not_exists { |
| write!(f, "IF NOT EXISTS ")?; |
| } |
| write!(f, "{} ", self.name)?; |
| write!(f, "STORED AS {} ", self.file_type)?; |
| if !self.order_exprs.is_empty() { |
| write!(f, "WITH ORDER (")?; |
| let mut first = true; |
| for expr in self.order_exprs.iter().flatten() { |
| if !first { |
| write!(f, ", ")?; |
| } |
| write!(f, "{expr}")?; |
| first = false; |
| } |
| write!(f, ") ")?; |
| } |
| write!(f, "LOCATION {}", self.location) |
| } |
| } |
| |
| /// DataFusion extension for `RESET` |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub enum ResetStatement { |
| /// Reset a single configuration variable (stored as provided) |
| Variable(ObjectName), |
| } |
| |
| impl fmt::Display for ResetStatement { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| match self { |
| ResetStatement::Variable(name) => write!(f, "RESET {name}"), |
| } |
| } |
| } |
| |
| /// DataFusion SQL Statement. |
| /// |
| /// This can either be a [`Statement`] from [`sqlparser`] from a |
| /// standard SQL dialect, or a DataFusion extension such as `CREATE |
| /// EXTERNAL TABLE`. See [`DFParser`] for more information. |
| /// |
| /// [`Statement`]: sqlparser::ast::Statement |
| #[derive(Debug, Clone, PartialEq, Eq)] |
| pub enum Statement { |
| /// ANSI SQL AST node (from sqlparser-rs) |
| Statement(Box<SQLStatement>), |
| /// Extension: `CREATE EXTERNAL TABLE` |
| CreateExternalTable(CreateExternalTable), |
| /// Extension: `COPY TO` |
| CopyTo(CopyToStatement), |
| /// EXPLAIN for extensions |
| Explain(ExplainStatement), |
| /// Extension: `RESET` |
| Reset(ResetStatement), |
| } |
| |
| impl fmt::Display for Statement { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| match self { |
| Statement::Statement(stmt) => write!(f, "{stmt}"), |
| Statement::CreateExternalTable(stmt) => write!(f, "{stmt}"), |
| Statement::CopyTo(stmt) => write!(f, "{stmt}"), |
| Statement::Explain(stmt) => write!(f, "{stmt}"), |
| Statement::Reset(stmt) => write!(f, "{stmt}"), |
| } |
| } |
| } |
| |
| fn ensure_not_set<T>(field: &Option<T>, name: &str) -> Result<(), DataFusionError> { |
| if field.is_some() { |
| parser_err!(format!("{name} specified more than once",))? |
| } |
| Ok(()) |
| } |
| |
| /// DataFusion SQL Parser based on [`sqlparser`] |
| /// |
| /// Parses DataFusion's SQL dialect, often delegating to [`sqlparser`]'s [`Parser`]. |
| /// |
| /// DataFusion mostly follows existing SQL dialects via |
| /// `sqlparser`. However, certain statements such as `COPY` and |
| /// `CREATE EXTERNAL TABLE` have special syntax in DataFusion. See |
| /// [`Statement`] for a list of this special syntax |
| pub struct DFParser<'a> { |
| pub parser: Parser<'a>, |
| options: SqlParserOptions, |
| } |
| |
| /// Same as `sqlparser` |
| const DEFAULT_RECURSION_LIMIT: usize = 50; |
| const DEFAULT_DIALECT: GenericDialect = GenericDialect {}; |
| |
| /// Builder for [`DFParser`] |
| /// |
| /// # Example: Create and Parse SQL statements |
| /// ``` |
| /// # use datafusion_sql::parser::DFParserBuilder; |
| /// # use datafusion_common::Result; |
| /// # fn test() -> Result<()> { |
| /// let mut parser = DFParserBuilder::new("SELECT * FROM foo; SELECT 1 + 2").build()?; |
| /// // parse the SQL into DFStatements |
| /// let statements = parser.parse_statements()?; |
| /// assert_eq!(statements.len(), 2); |
| /// # Ok(()) |
| /// # } |
| /// ``` |
| /// |
| /// # Example: Create and Parse expression with a different dialect |
| /// ``` |
| /// # use datafusion_sql::parser::DFParserBuilder; |
| /// # use datafusion_common::Result; |
| /// # use datafusion_sql::sqlparser::dialect::MySqlDialect; |
| /// # use datafusion_sql::sqlparser::ast::Expr; |
| /// # fn test() -> Result<()> { |
| /// let dialect = MySqlDialect {}; // Parse using MySQL dialect |
| /// let mut parser = DFParserBuilder::new("1 + 2") |
| /// .with_dialect(&dialect) |
| /// .build()?; |
| /// // parse 1+2 into an sqlparser::ast::Expr |
| /// let res = parser.parse_expr()?; |
| /// assert!(matches!(res.expr, Expr::BinaryOp { .. })); |
| /// # Ok(()) |
| /// # } |
| /// ``` |
| pub struct DFParserBuilder<'a> { |
| /// The SQL string to parse |
| sql: &'a str, |
| /// The Dialect to use (defaults to [`GenericDialect`] |
| dialect: &'a dyn Dialect, |
| /// The recursion limit while parsing |
| recursion_limit: usize, |
| } |
| |
| impl<'a> DFParserBuilder<'a> { |
| /// Create a new parser builder for the specified tokens using the |
| /// [`GenericDialect`]. |
| pub fn new(sql: &'a str) -> Self { |
| Self { |
| sql, |
| dialect: &DEFAULT_DIALECT, |
| recursion_limit: DEFAULT_RECURSION_LIMIT, |
| } |
| } |
| |
| /// Adjust the parser builder's dialect. Defaults to [`GenericDialect`] |
| pub fn with_dialect(mut self, dialect: &'a dyn Dialect) -> Self { |
| self.dialect = dialect; |
| self |
| } |
| |
| /// Adjust the recursion limit of sql parsing. Defaults to 50 |
| pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self { |
| self.recursion_limit = recursion_limit; |
| self |
| } |
| |
| pub fn build(self) -> Result<DFParser<'a>, DataFusionError> { |
| let mut tokenizer = Tokenizer::new(self.dialect, self.sql); |
| // Convert TokenizerError -> ParserError |
| let tokens = tokenizer |
| .tokenize_with_location() |
| .map_err(ParserError::from)?; |
| |
| Ok(DFParser { |
| parser: Parser::new(self.dialect) |
| .with_tokens_with_locations(tokens) |
| .with_recursion_limit(self.recursion_limit), |
| options: SqlParserOptions { |
| recursion_limit: self.recursion_limit, |
| ..Default::default() |
| }, |
| }) |
| } |
| } |
| |
| impl<'a> DFParser<'a> { |
| #[deprecated(since = "46.0.0", note = "DFParserBuilder")] |
| pub fn new(sql: &'a str) -> Result<Self, DataFusionError> { |
| DFParserBuilder::new(sql).build() |
| } |
| |
| #[deprecated(since = "46.0.0", note = "DFParserBuilder")] |
| pub fn new_with_dialect( |
| sql: &'a str, |
| dialect: &'a dyn Dialect, |
| ) -> Result<Self, DataFusionError> { |
| DFParserBuilder::new(sql).with_dialect(dialect).build() |
| } |
| |
| /// Parse a sql string into one or [`Statement`]s using the |
| /// [`GenericDialect`]. |
| pub fn parse_sql(sql: &'a str) -> Result<VecDeque<Statement>, DataFusionError> { |
| let mut parser = DFParserBuilder::new(sql).build()?; |
| |
| parser.parse_statements() |
| } |
| |
| /// Parse a SQL string and produce one or more [`Statement`]s with |
| /// with the specified dialect. |
| pub fn parse_sql_with_dialect( |
| sql: &str, |
| dialect: &dyn Dialect, |
| ) -> Result<VecDeque<Statement>, DataFusionError> { |
| let mut parser = DFParserBuilder::new(sql).with_dialect(dialect).build()?; |
| parser.parse_statements() |
| } |
| |
| pub fn parse_sql_into_expr(sql: &str) -> Result<ExprWithAlias, DataFusionError> { |
| DFParserBuilder::new(sql).build()?.parse_into_expr() |
| } |
| |
| pub fn parse_sql_into_expr_with_dialect( |
| sql: &str, |
| dialect: &dyn Dialect, |
| ) -> Result<ExprWithAlias, DataFusionError> { |
| DFParserBuilder::new(sql) |
| .with_dialect(dialect) |
| .build()? |
| .parse_into_expr() |
| } |
| |
| /// Parse a sql string into one or [`Statement`]s |
| pub fn parse_statements(&mut self) -> Result<VecDeque<Statement>, DataFusionError> { |
| let mut stmts = VecDeque::new(); |
| let mut expecting_statement_delimiter = false; |
| loop { |
| // ignore empty statements (between successive statement delimiters) |
| while self.parser.consume_token(&Token::SemiColon) { |
| expecting_statement_delimiter = false; |
| } |
| |
| if self.parser.peek_token() == Token::EOF { |
| break; |
| } |
| if expecting_statement_delimiter { |
| return self.expected("end of statement", &self.parser.peek_token()); |
| } |
| |
| let statement = self.parse_statement()?; |
| stmts.push_back(statement); |
| expecting_statement_delimiter = true; |
| } |
| Ok(stmts) |
| } |
| |
| /// Report an unexpected token |
| fn expected<T>( |
| &self, |
| expected: &str, |
| found: &TokenWithSpan, |
| ) -> Result<T, DataFusionError> { |
| let sql_parser_span = found.span; |
| let span = Span::try_from_sqlparser_span(sql_parser_span); |
| let diagnostic = Diagnostic::new_error( |
| format!("Expected: {expected}, found: {found}{}", found.span.start), |
| span, |
| ); |
| parser_err!( |
| format!("Expected: {expected}, found: {found}{}", found.span.start); |
| diagnostic= |
| diagnostic |
| ) |
| } |
| |
| fn expect_token( |
| &mut self, |
| expected: &str, |
| token: &Token, |
| ) -> Result<(), DataFusionError> { |
| let next_token = self.parser.peek_token_ref(); |
| if next_token.token != *token { |
| self.expected(expected, next_token) |
| } else { |
| Ok(()) |
| } |
| } |
| |
| /// Parse a new expression |
| pub fn parse_statement(&mut self) -> Result<Statement, DataFusionError> { |
| match self.parser.peek_token().token { |
| Token::Word(w) => { |
| match w.keyword { |
| Keyword::CREATE => { |
| self.parser.next_token(); // CREATE |
| self.parse_create() |
| } |
| Keyword::COPY => { |
| if let Token::Word(w) = self.parser.peek_nth_token(1).token { |
| // use native parser for COPY INTO |
| if w.keyword == Keyword::INTO { |
| return self.parse_and_handle_statement(); |
| } |
| } |
| self.parser.next_token(); // COPY |
| self.parse_copy() |
| } |
| Keyword::EXPLAIN => { |
| self.parser.next_token(); // EXPLAIN |
| self.parse_explain() |
| } |
| Keyword::RESET => { |
| self.parser.next_token(); // RESET |
| self.parse_reset() |
| } |
| _ => { |
| // use sqlparser-rs parser |
| self.parse_and_handle_statement() |
| } |
| } |
| } |
| _ => { |
| // use the native parser |
| self.parse_and_handle_statement() |
| } |
| } |
| } |
| |
| pub fn parse_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> { |
| if let Token::Word(w) = self.parser.peek_token().token { |
| match w.keyword { |
| Keyword::CREATE | Keyword::COPY | Keyword::EXPLAIN => { |
| return parser_err!("Unsupported command in expression")?; |
| } |
| _ => {} |
| } |
| } |
| |
| Ok(self.parser.parse_expr_with_alias()?) |
| } |
| |
| /// Parses the entire SQL string into an expression. |
| /// |
| /// In contrast to [`DFParser::parse_expr`], this function will report an error if the input |
| /// contains any trailing, unparsed tokens. |
| pub fn parse_into_expr(&mut self) -> Result<ExprWithAlias, DataFusionError> { |
| let expr = self.parse_expr()?; |
| self.expect_token("end of expression", &Token::EOF)?; |
| Ok(expr) |
| } |
| |
| /// Helper method to parse a statement and handle errors consistently, especially for recursion limits |
| fn parse_and_handle_statement(&mut self) -> Result<Statement, DataFusionError> { |
| self.parser |
| .parse_statement() |
| .map(|stmt| Statement::Statement(Box::from(stmt))) |
| .map_err(|e| match e { |
| ParserError::RecursionLimitExceeded => DataFusionError::SQL( |
| Box::new(ParserError::RecursionLimitExceeded), |
| Some(format!( |
| " (current limit: {})", |
| self.options.recursion_limit |
| )), |
| ), |
| other => DataFusionError::SQL(Box::new(other), None), |
| }) |
| } |
| |
| /// Parse a SQL `COPY TO` statement |
| pub fn parse_copy(&mut self) -> Result<Statement, DataFusionError> { |
| // parse as a query |
| let source = if self.parser.consume_token(&Token::LParen) { |
| let query = self.parser.parse_query()?; |
| self.parser.expect_token(&Token::RParen)?; |
| CopyToSource::Query(query) |
| } else { |
| // parse as table reference |
| let table_name = self.parser.parse_object_name(true)?; |
| CopyToSource::Relation(table_name) |
| }; |
| |
| #[derive(Default)] |
| struct Builder { |
| stored_as: Option<String>, |
| target: Option<String>, |
| partitioned_by: Option<Vec<String>>, |
| options: Option<Vec<(String, Value)>>, |
| } |
| |
| let mut builder = Builder::default(); |
| |
| loop { |
| if let Some(keyword) = self.parser.parse_one_of_keywords(&[ |
| Keyword::STORED, |
| Keyword::TO, |
| Keyword::PARTITIONED, |
| Keyword::OPTIONS, |
| Keyword::WITH, |
| ]) { |
| match keyword { |
| Keyword::STORED => { |
| self.parser.expect_keyword(Keyword::AS)?; |
| ensure_not_set(&builder.stored_as, "STORED AS")?; |
| builder.stored_as = Some(self.parse_file_format()?); |
| } |
| Keyword::TO => { |
| ensure_not_set(&builder.target, "TO")?; |
| builder.target = Some(self.parser.parse_literal_string()?); |
| } |
| Keyword::WITH => { |
| self.parser.expect_keyword(Keyword::HEADER)?; |
| self.parser.expect_keyword(Keyword::ROW)?; |
| return parser_err!( |
| "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS ('format.has_header' 'true')" |
| )?; |
| } |
| Keyword::PARTITIONED => { |
| self.parser.expect_keyword(Keyword::BY)?; |
| ensure_not_set(&builder.partitioned_by, "PARTITIONED BY")?; |
| builder.partitioned_by = Some(self.parse_partitions()?); |
| } |
| Keyword::OPTIONS => { |
| ensure_not_set(&builder.options, "OPTIONS")?; |
| builder.options = Some(self.parse_value_options()?); |
| } |
| _ => { |
| unreachable!() |
| } |
| } |
| } else { |
| let token = self.parser.peek_token(); |
| if token == Token::EOF || token == Token::SemiColon { |
| break; |
| } else { |
| return self.expected("end of statement or ;", &token)?; |
| } |
| } |
| } |
| |
| let Some(target) = builder.target else { |
| return parser_err!("Missing TO clause in COPY statement")?; |
| }; |
| |
| Ok(Statement::CopyTo(CopyToStatement { |
| source, |
| target, |
| partitioned_by: builder.partitioned_by.unwrap_or(vec![]), |
| stored_as: builder.stored_as, |
| options: builder.options.unwrap_or(vec![]), |
| })) |
| } |
| |
| /// Parse the next token as a key name for an option list |
| /// |
| /// Note this is different than [`parse_literal_string`] |
| /// because it allows keywords as well as other non words |
| /// |
| /// [`parse_literal_string`]: sqlparser::parser::Parser::parse_literal_string |
| pub fn parse_option_key(&mut self) -> Result<String, DataFusionError> { |
| let next_token = self.parser.next_token(); |
| match next_token.token { |
| Token::Word(Word { value, .. }) => { |
| let mut parts = vec![value]; |
| while self.parser.consume_token(&Token::Period) { |
| let next_token = self.parser.next_token(); |
| if let Token::Word(Word { value, .. }) = next_token.token { |
| parts.push(value); |
| } else { |
| // Unquoted namespaced keys have to conform to the syntax |
| // "<WORD>[\.<WORD>]*". If we have a key that breaks this |
| // pattern, error out: |
| return self.expected("key name", &next_token); |
| } |
| } |
| Ok(parts.join(".")) |
| } |
| Token::SingleQuotedString(s) => Ok(s), |
| Token::DoubleQuotedString(s) => Ok(s), |
| Token::EscapedStringLiteral(s) => Ok(s), |
| _ => self.expected("key name", &next_token), |
| } |
| } |
| |
| /// Parse the next token as a value for an option list |
| /// |
| /// Note this is different than [`parse_value`] as it allows any |
| /// word or keyword in this location. |
| /// |
| /// [`parse_value`]: sqlparser::parser::Parser::parse_value |
| pub fn parse_option_value(&mut self) -> Result<Value, DataFusionError> { |
| let next_token = self.parser.next_token(); |
| match next_token.token { |
| // e.g. things like "snappy" or "gzip" that may be keywords |
| Token::Word(word) => Ok(Value::SingleQuotedString(word.value)), |
| Token::SingleQuotedString(s) => Ok(Value::SingleQuotedString(s)), |
| Token::DoubleQuotedString(s) => Ok(Value::DoubleQuotedString(s)), |
| Token::EscapedStringLiteral(s) => Ok(Value::EscapedStringLiteral(s)), |
| Token::Number(n, l) => Ok(Value::Number(n, l)), |
| _ => self.expected("string or numeric value", &next_token), |
| } |
| } |
| |
| /// Parse a SQL `EXPLAIN` |
| pub fn parse_explain(&mut self) -> Result<Statement, DataFusionError> { |
| let analyze = self.parser.parse_keyword(Keyword::ANALYZE); |
| let verbose = self.parser.parse_keyword(Keyword::VERBOSE); |
| let format = self.parse_explain_format()?; |
| |
| let statement = self.parse_statement()?; |
| |
| Ok(Statement::Explain(ExplainStatement { |
| statement: Box::new(statement), |
| analyze, |
| verbose, |
| format, |
| })) |
| } |
| |
| /// Parse a SQL `RESET` |
| pub fn parse_reset(&mut self) -> Result<Statement, DataFusionError> { |
| let mut parts: Vec<String> = Vec::new(); |
| let mut expecting_segment = true; |
| |
| loop { |
| let next_token = self.parser.peek_token(); |
| match &next_token.token { |
| Token::Word(word) => { |
| self.parser.next_token(); |
| parts.push(word.value.clone()); |
| expecting_segment = false; |
| } |
| Token::SingleQuotedString(s) |
| | Token::DoubleQuotedString(s) |
| | Token::EscapedStringLiteral(s) => { |
| self.parser.next_token(); |
| parts.push(s.clone()); |
| expecting_segment = false; |
| } |
| Token::Period => { |
| self.parser.next_token(); |
| if expecting_segment || parts.is_empty() { |
| return self.expected("configuration parameter", &next_token); |
| } |
| expecting_segment = true; |
| } |
| Token::EOF | Token::SemiColon => break, |
| _ => return self.expected("configuration parameter", &next_token), |
| } |
| } |
| |
| if parts.is_empty() || expecting_segment { |
| return self.expected("configuration parameter", &self.parser.peek_token()); |
| } |
| |
| let idents: Vec<Ident> = parts.into_iter().map(Ident::new).collect(); |
| let variable = ObjectName::from(idents); |
| Ok(Statement::Reset(ResetStatement::Variable(variable))) |
| } |
| |
| pub fn parse_explain_format(&mut self) -> Result<Option<String>, DataFusionError> { |
| if !self.parser.parse_keyword(Keyword::FORMAT) { |
| return Ok(None); |
| } |
| |
| let next_token = self.parser.next_token(); |
| let format = match next_token.token { |
| Token::Word(w) => Ok(w.value), |
| Token::SingleQuotedString(w) => Ok(w), |
| Token::DoubleQuotedString(w) => Ok(w), |
| _ => self.expected("an explain format such as TREE", &next_token), |
| }?; |
| Ok(Some(format)) |
| } |
| |
| /// Parse a SQL `CREATE` statement handling `CREATE EXTERNAL TABLE` |
| pub fn parse_create(&mut self) -> Result<Statement, DataFusionError> { |
| // TODO: Change sql parser to take in `or_replace: bool` inside parse_create() |
| if self |
| .parser |
| .parse_keywords(&[Keyword::OR, Keyword::REPLACE, Keyword::EXTERNAL]) |
| { |
| self.parse_create_external_table(false, true) |
| } else if self.parser.parse_keywords(&[ |
| Keyword::OR, |
| Keyword::REPLACE, |
| Keyword::UNBOUNDED, |
| Keyword::EXTERNAL, |
| ]) { |
| self.parse_create_external_table(true, true) |
| } else if self.parser.parse_keyword(Keyword::EXTERNAL) { |
| self.parse_create_external_table(false, false) |
| } else if self |
| .parser |
| .parse_keywords(&[Keyword::UNBOUNDED, Keyword::EXTERNAL]) |
| { |
| self.parse_create_external_table(true, false) |
| } else { |
| Ok(Statement::Statement(Box::from(self.parser.parse_create()?))) |
| } |
| } |
| |
| fn parse_partitions(&mut self) -> Result<Vec<String>, DataFusionError> { |
| let mut partitions: Vec<String> = vec![]; |
| if !self.parser.consume_token(&Token::LParen) |
| || self.parser.consume_token(&Token::RParen) |
| { |
| return Ok(partitions); |
| } |
| |
| loop { |
| if let Token::Word(_) = self.parser.peek_token().token { |
| let identifier = self.parser.parse_identifier()?; |
| partitions.push(identifier.to_string()); |
| } else { |
| return self.expected("partition name", &self.parser.peek_token()); |
| } |
| let comma = self.parser.consume_token(&Token::Comma); |
| if self.parser.consume_token(&Token::RParen) { |
| // allow a trailing comma, even though it's not in standard |
| break; |
| } else if !comma { |
| return self.expected( |
| "',' or ')' after partition definition", |
| &self.parser.peek_token(), |
| ); |
| } |
| } |
| Ok(partitions) |
| } |
| |
| /// Parse the ordering clause of a `CREATE EXTERNAL TABLE` SQL statement |
| pub fn parse_order_by_exprs(&mut self) -> Result<Vec<OrderByExpr>, DataFusionError> { |
| let mut values = vec![]; |
| self.parser.expect_token(&Token::LParen)?; |
| loop { |
| values.push(self.parse_order_by_expr()?); |
| if !self.parser.consume_token(&Token::Comma) { |
| self.parser.expect_token(&Token::RParen)?; |
| return Ok(values); |
| } |
| } |
| } |
| |
| /// Parse an ORDER BY sub-expression optionally followed by ASC or DESC. |
| pub fn parse_order_by_expr(&mut self) -> Result<OrderByExpr, DataFusionError> { |
| let expr = self.parser.parse_expr()?; |
| |
| let asc = if self.parser.parse_keyword(Keyword::ASC) { |
| Some(true) |
| } else if self.parser.parse_keyword(Keyword::DESC) { |
| Some(false) |
| } else { |
| None |
| }; |
| |
| let nulls_first = if self |
| .parser |
| .parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) |
| { |
| Some(true) |
| } else if self.parser.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { |
| Some(false) |
| } else { |
| None |
| }; |
| |
| Ok(OrderByExpr { |
| expr, |
| options: OrderByOptions { asc, nulls_first }, |
| with_fill: None, |
| }) |
| } |
| |
| // This is a copy of the equivalent implementation in sqlparser. |
| fn parse_columns( |
| &mut self, |
| ) -> Result<(Vec<ColumnDef>, Vec<TableConstraint>), DataFusionError> { |
| let mut columns = vec![]; |
| let mut constraints = vec![]; |
| if !self.parser.consume_token(&Token::LParen) |
| || self.parser.consume_token(&Token::RParen) |
| { |
| return Ok((columns, constraints)); |
| } |
| |
| loop { |
| if let Some(constraint) = self.parser.parse_optional_table_constraint()? { |
| constraints.push(constraint); |
| } else if let Token::Word(_) = self.parser.peek_token().token { |
| let column_def = self.parse_column_def()?; |
| columns.push(column_def); |
| } else { |
| return self.expected( |
| "column name or constraint definition", |
| &self.parser.peek_token(), |
| ); |
| } |
| let comma = self.parser.consume_token(&Token::Comma); |
| if self.parser.consume_token(&Token::RParen) { |
| // allow a trailing comma, even though it's not in standard |
| break; |
| } else if !comma { |
| return self.expected( |
| "',' or ')' after column definition", |
| &self.parser.peek_token(), |
| ); |
| } |
| } |
| |
| Ok((columns, constraints)) |
| } |
| |
| fn parse_column_def(&mut self) -> Result<ColumnDef, DataFusionError> { |
| let name = self.parser.parse_identifier()?; |
| let data_type = self.parser.parse_data_type()?; |
| let mut options = vec![]; |
| loop { |
| if self.parser.parse_keyword(Keyword::CONSTRAINT) { |
| let name = Some(self.parser.parse_identifier()?); |
| if let Some(option) = self.parser.parse_optional_column_option()? { |
| options.push(ColumnOptionDef { name, option }); |
| } else { |
| return self.expected( |
| "constraint details after CONSTRAINT <name>", |
| &self.parser.peek_token(), |
| ); |
| } |
| } else if let Some(option) = self.parser.parse_optional_column_option()? { |
| options.push(ColumnOptionDef { name: None, option }); |
| } else { |
| break; |
| }; |
| } |
| Ok(ColumnDef { |
| name, |
| data_type, |
| options, |
| }) |
| } |
| |
| fn parse_create_external_table( |
| &mut self, |
| unbounded: bool, |
| or_replace: bool, |
| ) -> Result<Statement, DataFusionError> { |
| let temporary = self |
| .parser |
| .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) |
| .is_some(); |
| |
| self.parser.expect_keyword(Keyword::TABLE)?; |
| let if_not_exists = |
| self.parser |
| .parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); |
| |
| if if_not_exists && or_replace { |
| return parser_err!("'IF NOT EXISTS' cannot coexist with 'REPLACE'"); |
| } |
| |
| let table_name = self.parser.parse_object_name(true)?; |
| let (mut columns, constraints) = self.parse_columns()?; |
| |
| #[derive(Default)] |
| struct Builder { |
| file_type: Option<String>, |
| location: Option<String>, |
| table_partition_cols: Option<Vec<String>>, |
| order_exprs: Vec<LexOrdering>, |
| options: Option<Vec<(String, Value)>>, |
| } |
| let mut builder = Builder::default(); |
| |
| loop { |
| if let Some(keyword) = self.parser.parse_one_of_keywords(&[ |
| Keyword::STORED, |
| Keyword::LOCATION, |
| Keyword::WITH, |
| Keyword::DELIMITER, |
| Keyword::COMPRESSION, |
| Keyword::PARTITIONED, |
| Keyword::OPTIONS, |
| ]) { |
| match keyword { |
| Keyword::STORED => { |
| self.parser.expect_keyword(Keyword::AS)?; |
| ensure_not_set(&builder.file_type, "STORED AS")?; |
| builder.file_type = Some(self.parse_file_format()?); |
| } |
| Keyword::LOCATION => { |
| ensure_not_set(&builder.location, "LOCATION")?; |
| builder.location = Some(self.parser.parse_literal_string()?); |
| } |
| Keyword::WITH => { |
| if self.parser.parse_keyword(Keyword::ORDER) { |
| builder.order_exprs.push(self.parse_order_by_exprs()?); |
| } else { |
| self.parser.expect_keyword(Keyword::HEADER)?; |
| self.parser.expect_keyword(Keyword::ROW)?; |
| return parser_err!( |
| "WITH HEADER ROW clause is no longer in use. Please use the OPTIONS clause with 'format.has_header' set appropriately, e.g., OPTIONS (format.has_header true)" |
| )?; |
| } |
| } |
| Keyword::DELIMITER => { |
| return parser_err!( |
| "DELIMITER clause is no longer in use. Please use the OPTIONS clause with 'format.delimiter' set appropriately, e.g., OPTIONS (format.delimiter ',')" |
| )?; |
| } |
| Keyword::COMPRESSION => { |
| self.parser.expect_keyword(Keyword::TYPE)?; |
| return parser_err!( |
| "COMPRESSION TYPE clause is no longer in use. Please use the OPTIONS clause with 'format.compression' set appropriately, e.g., OPTIONS (format.compression gzip)" |
| )?; |
| } |
| Keyword::PARTITIONED => { |
| self.parser.expect_keyword(Keyword::BY)?; |
| ensure_not_set(&builder.table_partition_cols, "PARTITIONED BY")?; |
| // Expects either list of column names (col_name [, col_name]*) |
| // or list of column definitions (col_name datatype [, col_name datatype]* ) |
| // use the token after the name to decide which parsing rule to use |
| // Note that mixing both names and definitions is not allowed |
| let peeked = self.parser.peek_nth_token(2); |
| if peeked == Token::Comma || peeked == Token::RParen { |
| // List of column names |
| builder.table_partition_cols = Some(self.parse_partitions()?) |
| } else { |
| // List of column defs |
| let (cols, cons) = self.parse_columns()?; |
| builder.table_partition_cols = Some( |
| cols.iter().map(|col| col.name.to_string()).collect(), |
| ); |
| |
| columns.extend(cols); |
| |
| if !cons.is_empty() { |
| return sql_err!(ParserError::ParserError( |
| "Constraints on Partition Columns are not supported" |
| .to_string(), |
| )); |
| } |
| } |
| } |
| Keyword::OPTIONS => { |
| ensure_not_set(&builder.options, "OPTIONS")?; |
| builder.options = Some(self.parse_value_options()?); |
| } |
| _ => { |
| unreachable!() |
| } |
| } |
| } else { |
| let token = self.parser.peek_token(); |
| if token == Token::EOF || token == Token::SemiColon { |
| break; |
| } else { |
| return self.expected("end of statement or ;", &token)?; |
| } |
| } |
| } |
| |
| // Validations: location and file_type are required |
| if builder.file_type.is_none() { |
| return sql_err!(ParserError::ParserError( |
| "Missing STORED AS clause in CREATE EXTERNAL TABLE statement".into(), |
| )); |
| } |
| if builder.location.is_none() { |
| return sql_err!(ParserError::ParserError( |
| "Missing LOCATION clause in CREATE EXTERNAL TABLE statement".into(), |
| )); |
| } |
| |
| let create = CreateExternalTable { |
| name: table_name, |
| columns, |
| file_type: builder.file_type.unwrap(), |
| location: builder.location.unwrap(), |
| table_partition_cols: builder.table_partition_cols.unwrap_or(vec![]), |
| order_exprs: builder.order_exprs, |
| if_not_exists, |
| or_replace, |
| temporary, |
| unbounded, |
| options: builder.options.unwrap_or(Vec::new()), |
| constraints, |
| }; |
| Ok(Statement::CreateExternalTable(create)) |
| } |
| |
| /// Parses the set of valid formats |
| fn parse_file_format(&mut self) -> Result<String, DataFusionError> { |
| let token = self.parser.next_token(); |
| match &token.token { |
| Token::Word(w) => parse_file_type(&w.value), |
| _ => self.expected("one of ARROW, PARQUET, NDJSON, or CSV", &token), |
| } |
| } |
| |
| /// Parses (key value) style options into a map of String --> [`Value`]. |
| /// |
| /// This method supports keywords as key names as well as multiple |
| /// value types such as Numbers as well as Strings. |
| fn parse_value_options(&mut self) -> Result<Vec<(String, Value)>, DataFusionError> { |
| let mut options = vec![]; |
| self.parser.expect_token(&Token::LParen)?; |
| |
| loop { |
| let key = self.parse_option_key()?; |
| let value = self.parse_option_value()?; |
| options.push((key, value)); |
| let comma = self.parser.consume_token(&Token::Comma); |
| if self.parser.consume_token(&Token::RParen) { |
| // Allow a trailing comma, even though it's not in standard |
| break; |
| } else if !comma { |
| return self.expected( |
| "',' or ')' after option definition", |
| &self.parser.peek_token(), |
| ); |
| } |
| } |
| Ok(options) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use datafusion_common::assert_contains; |
| use sqlparser::ast::Expr::Identifier; |
| use sqlparser::ast::{ |
| BinaryOperator, DataType, ExactNumberInfo, Expr, Ident, ValueWithSpan, |
| }; |
| use sqlparser::dialect::SnowflakeDialect; |
| use sqlparser::tokenizer::Span; |
| |
| fn expect_parse_ok(sql: &str, expected: Statement) -> Result<(), DataFusionError> { |
| let statements = DFParser::parse_sql(sql)?; |
| assert_eq!( |
| statements.len(), |
| 1, |
| "Expected to parse exactly one statement" |
| ); |
| assert_eq!(statements[0], expected, "actual:\n{:#?}", statements[0]); |
| Ok(()) |
| } |
| |
| /// Parses sql and asserts that the expected error message was found |
| fn expect_parse_error(sql: &str, expected_error: &str) { |
| match DFParser::parse_sql(sql) { |
| Ok(statements) => { |
| panic!( |
| "Expected parse error for '{sql}', but was successful: {statements:?}" |
| ); |
| } |
| Err(e) => { |
| let error_message = e.to_string(); |
| assert!( |
| error_message.contains(expected_error), |
| "Expected error '{expected_error}' not found in actual error '{error_message}'" |
| ); |
| } |
| } |
| } |
| |
| fn make_column_def(name: impl Into<String>, data_type: DataType) -> ColumnDef { |
| ColumnDef { |
| name: Ident { |
| value: name.into(), |
| quote_style: None, |
| span: Span::empty(), |
| }, |
| data_type, |
| options: vec![], |
| } |
| } |
| |
| #[test] |
| fn create_external_table() -> Result<(), DataFusionError> { |
| // positive case |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'"; |
| let display = None; |
| let name = ObjectName::from(vec![Ident::from("t")]); |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(display))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: leading space |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' "; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(None))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: leading space + semicolon |
| let sql = |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(None))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case with delimiter |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS (format.delimiter '|')"; |
| let display = None; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(display))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![( |
| "format.delimiter".into(), |
| Value::SingleQuotedString("|".into()), |
| )], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: partitioned by |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'"; |
| let display = None; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(display))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec!["p1".to_string(), "p2".to_string()], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens |
| let sqls = |
| vec![ |
| ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS |
| ('format.compression' 'GZIP')", "GZIP"), |
| ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS |
| ('format.compression' 'BZIP2')", "BZIP2"), |
| ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS |
| ('format.compression' 'XZ')", "XZ"), |
| ("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' OPTIONS |
| ('format.compression' 'ZSTD')", "ZSTD"), |
| ]; |
| for (sql, compression) in sqls { |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(display))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![( |
| "format.compression".into(), |
| Value::SingleQuotedString(compression.into()), |
| )], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| } |
| |
| // positive case: it is ok for parquet files not to have columns specified |
| let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "PARQUET".to_string(), |
| location: "foo.parquet".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: it is ok for parquet files to be other than upper case |
| let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "PARQUET".to_string(), |
| location: "foo.parquet".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: it is ok for avro files not to have columns specified |
| let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "AVRO".to_string(), |
| location: "foo.avro".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: it is ok for avro files not to have columns specified |
| let sql = "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "PARQUET".to_string(), |
| location: "foo.parquet".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: true, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: or replace |
| let sql = |
| "CREATE OR REPLACE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "PARQUET".to_string(), |
| location: "foo.parquet".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: true, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: column definition allowed in 'partition by' clause |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![ |
| make_column_def("c1", DataType::Int(None)), |
| make_column_def("p1", DataType::Int(None)), |
| ], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec!["p1".to_string()], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // negative case: mixed column defs and column names in `PARTITIONED BY` clause |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int, c1) LOCATION 'foo.csv'"; |
| expect_parse_error( |
| sql, |
| "SQL error: ParserError(\"Expected: a data type name, found: ) at Line: 1, Column: 73\")", |
| ); |
| |
| // negative case: mixed column defs and column names in `PARTITIONED BY` clause |
| let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (c1, p1 int) LOCATION 'foo.csv'"; |
| expect_parse_error( |
| sql, |
| "SQL error: ParserError(\"Expected: ',' or ')' after partition definition, found: int at Line: 1, Column: 70\")", |
| ); |
| |
| // positive case: additional options (one entry) can be specified |
| let sql = |
| "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "X".to_string(), |
| location: "blahblah".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![("k1".into(), Value::SingleQuotedString("v1".into()))], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // positive case: additional options (multiple entries) can be specified |
| let sql = "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![], |
| file_type: "X".to_string(), |
| location: "blahblah".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![ |
| ("k1".into(), Value::SingleQuotedString("v1".into())), |
| ("k2".into(), Value::SingleQuotedString("v2".into())), |
| ], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // Ordered Col |
| let sqls = [ |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'", |
| "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'", |
| ]; |
| let expected = vec![ |
| (None, None), |
| (None, Some(true)), |
| (None, Some(false)), |
| (Some(true), None), |
| (Some(false), None), |
| (Some(false), Some(true)), |
| (Some(false), Some(false)), |
| (Some(true), Some(true)), |
| (Some(true), Some(false)), |
| ]; |
| for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) { |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(None))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![vec![OrderByExpr { |
| expr: Identifier(Ident { |
| value: "c1".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| }), |
| options: OrderByOptions { asc, nulls_first }, |
| with_fill: None, |
| }]], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| } |
| |
| // Ordered Col |
| let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'"; |
| let display = None; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![ |
| make_column_def("c1", DataType::Int(display)), |
| make_column_def("c2", DataType::Int(display)), |
| ], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![vec![ |
| OrderByExpr { |
| expr: Identifier(Ident { |
| value: "c1".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| }), |
| options: OrderByOptions { |
| asc: Some(true), |
| nulls_first: None, |
| }, |
| with_fill: None, |
| }, |
| OrderByExpr { |
| expr: Identifier(Ident { |
| value: "c2".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| }), |
| options: OrderByOptions { |
| asc: Some(false), |
| nulls_first: Some(true), |
| }, |
| with_fill: None, |
| }, |
| ]], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // Ordered Binary op |
| let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'"; |
| let display = None; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![ |
| make_column_def("c1", DataType::Int(display)), |
| make_column_def("c2", DataType::Int(display)), |
| ], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![vec![OrderByExpr { |
| expr: Expr::BinaryOp { |
| left: Box::new(Identifier(Ident { |
| value: "c1".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| })), |
| op: BinaryOperator::Minus, |
| right: Box::new(Identifier(Ident { |
| value: "c2".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| })), |
| }, |
| options: OrderByOptions { |
| asc: Some(true), |
| nulls_first: None, |
| }, |
| with_fill: None, |
| }]], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // Most complete CREATE EXTERNAL TABLE statement possible (using IF NOT EXISTS) |
| let sql = " |
| CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float) |
| STORED AS PARQUET |
| WITH ORDER (c1 - c2 ASC) |
| PARTITIONED BY (c1) |
| LOCATION 'foo.parquet' |
| OPTIONS ('format.compression' 'zstd', |
| 'format.delimiter' '*', |
| 'ROW_GROUP_SIZE' '1024', |
| 'TRUNCATE' 'NO', |
| 'format.has_header' 'true')"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![ |
| make_column_def("c1", DataType::Int(None)), |
| make_column_def("c2", DataType::Float(ExactNumberInfo::None)), |
| ], |
| file_type: "PARQUET".to_string(), |
| location: "foo.parquet".into(), |
| table_partition_cols: vec!["c1".into()], |
| order_exprs: vec![vec![OrderByExpr { |
| expr: Expr::BinaryOp { |
| left: Box::new(Identifier(Ident { |
| value: "c1".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| })), |
| op: BinaryOperator::Minus, |
| right: Box::new(Identifier(Ident { |
| value: "c2".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| })), |
| }, |
| options: OrderByOptions { |
| asc: Some(true), |
| nulls_first: None, |
| }, |
| with_fill: None, |
| }]], |
| if_not_exists: true, |
| or_replace: false, |
| temporary: false, |
| unbounded: true, |
| options: vec![ |
| ( |
| "format.compression".into(), |
| Value::SingleQuotedString("zstd".into()), |
| ), |
| ( |
| "format.delimiter".into(), |
| Value::SingleQuotedString("*".into()), |
| ), |
| ( |
| "ROW_GROUP_SIZE".into(), |
| Value::SingleQuotedString("1024".into()), |
| ), |
| ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())), |
| ( |
| "format.has_header".into(), |
| Value::SingleQuotedString("true".into()), |
| ), |
| ], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // Most complete CREATE EXTERNAL TABLE statement possible (using OR REPLACE) |
| let sql = " |
| CREATE OR REPLACE UNBOUNDED EXTERNAL TABLE t (c1 int, c2 float) |
| STORED AS PARQUET |
| WITH ORDER (c1 - c2 ASC) |
| PARTITIONED BY (c1) |
| LOCATION 'foo.parquet' |
| OPTIONS ('format.compression' 'zstd', |
| 'format.delimiter' '*', |
| 'ROW_GROUP_SIZE' '1024', |
| 'TRUNCATE' 'NO', |
| 'format.has_header' 'true')"; |
| let expected = Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![ |
| make_column_def("c1", DataType::Int(None)), |
| make_column_def("c2", DataType::Float(ExactNumberInfo::None)), |
| ], |
| file_type: "PARQUET".to_string(), |
| location: "foo.parquet".into(), |
| table_partition_cols: vec!["c1".into()], |
| order_exprs: vec![vec![OrderByExpr { |
| expr: Expr::BinaryOp { |
| left: Box::new(Identifier(Ident { |
| value: "c1".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| })), |
| op: BinaryOperator::Minus, |
| right: Box::new(Identifier(Ident { |
| value: "c2".to_owned(), |
| quote_style: None, |
| span: Span::empty(), |
| })), |
| }, |
| options: OrderByOptions { |
| asc: Some(true), |
| nulls_first: None, |
| }, |
| with_fill: None, |
| }]], |
| if_not_exists: false, |
| or_replace: true, |
| temporary: false, |
| unbounded: true, |
| options: vec![ |
| ( |
| "format.compression".into(), |
| Value::SingleQuotedString("zstd".into()), |
| ), |
| ( |
| "format.delimiter".into(), |
| Value::SingleQuotedString("*".into()), |
| ), |
| ( |
| "ROW_GROUP_SIZE".into(), |
| Value::SingleQuotedString("1024".into()), |
| ), |
| ("TRUNCATE".into(), Value::SingleQuotedString("NO".into())), |
| ( |
| "format.has_header".into(), |
| Value::SingleQuotedString("true".into()), |
| ), |
| ], |
| constraints: vec![], |
| }); |
| expect_parse_ok(sql, expected)?; |
| |
| // For error cases, see: `create_external_table.slt` |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn copy_to_table_to_table() -> Result<(), DataFusionError> { |
| // positive case |
| let sql = "COPY foo TO bar STORED AS CSV"; |
| let expected = Statement::CopyTo(CopyToStatement { |
| source: object_name("foo"), |
| target: "bar".to_string(), |
| partitioned_by: vec![], |
| stored_as: Some("CSV".to_owned()), |
| options: vec![], |
| }); |
| |
| assert_eq!(verified_stmt(sql), expected); |
| Ok(()) |
| } |
| |
| #[test] |
| fn skip_copy_into_snowflake() -> Result<(), DataFusionError> { |
| let sql = "COPY INTO foo FROM @~/staged FILE_FORMAT = (FORMAT_NAME = 'mycsv');"; |
| let dialect = Box::new(SnowflakeDialect); |
| let statements = DFParser::parse_sql_with_dialect(sql, dialect.as_ref())?; |
| |
| assert_eq!( |
| statements.len(), |
| 1, |
| "Expected to parse exactly one statement" |
| ); |
| if let Statement::CopyTo(_) = &statements[0] { |
| panic!("Expected non COPY TO statement, but was successful: {statements:?}"); |
| } |
| Ok(()) |
| } |
| |
| #[test] |
| fn explain_copy_to_table_to_table() -> Result<(), DataFusionError> { |
| let cases = vec![ |
| ("EXPLAIN COPY foo TO bar STORED AS PARQUET", false, false), |
| ( |
| "EXPLAIN ANALYZE COPY foo TO bar STORED AS PARQUET", |
| true, |
| false, |
| ), |
| ( |
| "EXPLAIN VERBOSE COPY foo TO bar STORED AS PARQUET", |
| false, |
| true, |
| ), |
| ( |
| "EXPLAIN ANALYZE VERBOSE COPY foo TO bar STORED AS PARQUET", |
| true, |
| true, |
| ), |
| ]; |
| for (sql, analyze, verbose) in cases { |
| println!("sql: {sql}, analyze: {analyze}, verbose: {verbose}"); |
| |
| let expected_copy = Statement::CopyTo(CopyToStatement { |
| source: object_name("foo"), |
| target: "bar".to_string(), |
| partitioned_by: vec![], |
| stored_as: Some("PARQUET".to_owned()), |
| options: vec![], |
| }); |
| let expected = Statement::Explain(ExplainStatement { |
| analyze, |
| verbose, |
| format: None, |
| statement: Box::new(expected_copy), |
| }); |
| assert_eq!(verified_stmt(sql), expected); |
| } |
| Ok(()) |
| } |
| |
| #[test] |
| fn copy_to_query_to_table() -> Result<(), DataFusionError> { |
| let statement = verified_stmt("SELECT 1"); |
| |
| // unwrap the various layers |
| let statement = if let Statement::Statement(statement) = statement { |
| *statement |
| } else { |
| panic!("Expected statement, got {statement:?}"); |
| }; |
| |
| let query = if let SQLStatement::Query(query) = statement { |
| query |
| } else { |
| panic!("Expected query, got {statement:?}"); |
| }; |
| |
| let sql = |
| "COPY (SELECT 1) TO bar STORED AS CSV OPTIONS ('format.has_header' 'true')"; |
| let expected = Statement::CopyTo(CopyToStatement { |
| source: CopyToSource::Query(query), |
| target: "bar".to_string(), |
| partitioned_by: vec![], |
| stored_as: Some("CSV".to_owned()), |
| options: vec![( |
| "format.has_header".into(), |
| Value::SingleQuotedString("true".into()), |
| )], |
| }); |
| assert_eq!(verified_stmt(sql), expected); |
| Ok(()) |
| } |
| |
| #[test] |
| fn copy_to_options() -> Result<(), DataFusionError> { |
| let sql = "COPY foo TO bar STORED AS CSV OPTIONS ('row_group_size' '55')"; |
| let expected = Statement::CopyTo(CopyToStatement { |
| source: object_name("foo"), |
| target: "bar".to_string(), |
| partitioned_by: vec![], |
| stored_as: Some("CSV".to_owned()), |
| options: vec![( |
| "row_group_size".to_string(), |
| Value::SingleQuotedString("55".to_string()), |
| )], |
| }); |
| assert_eq!(verified_stmt(sql), expected); |
| Ok(()) |
| } |
| |
| #[test] |
| fn copy_to_partitioned_by() -> Result<(), DataFusionError> { |
| let sql = "COPY foo TO bar STORED AS CSV PARTITIONED BY (a) OPTIONS ('row_group_size' '55')"; |
| let expected = Statement::CopyTo(CopyToStatement { |
| source: object_name("foo"), |
| target: "bar".to_string(), |
| partitioned_by: vec!["a".to_string()], |
| stored_as: Some("CSV".to_owned()), |
| options: vec![( |
| "row_group_size".to_string(), |
| Value::SingleQuotedString("55".to_string()), |
| )], |
| }); |
| assert_eq!(verified_stmt(sql), expected); |
| Ok(()) |
| } |
| |
| #[test] |
| fn copy_to_multi_options() -> Result<(), DataFusionError> { |
| // order of options is preserved |
| let sql = "COPY foo TO bar STORED AS parquet OPTIONS ('format.row_group_size' 55, 'format.compression' snappy, 'execution.keep_partition_by_columns' true)"; |
| |
| let expected_options = vec![ |
| ( |
| "format.row_group_size".to_string(), |
| Value::Number("55".to_string(), false), |
| ), |
| ( |
| "format.compression".to_string(), |
| Value::SingleQuotedString("snappy".to_string()), |
| ), |
| ( |
| "execution.keep_partition_by_columns".to_string(), |
| Value::SingleQuotedString("true".to_string()), |
| ), |
| ]; |
| |
| let mut statements = DFParser::parse_sql(sql).unwrap(); |
| assert_eq!(statements.len(), 1); |
| let only_statement = statements.pop_front().unwrap(); |
| |
| let options = if let Statement::CopyTo(copy_to) = only_statement { |
| copy_to.options |
| } else { |
| panic!("Expected copy"); |
| }; |
| |
| assert_eq!(options, expected_options); |
| |
| Ok(()) |
| } |
| |
| // For error cases, see: `copy.slt` |
| |
| fn object_name(name: &str) -> CopyToSource { |
| CopyToSource::Relation(ObjectName::from(vec![Ident::new(name)])) |
| } |
| |
| // Based on sqlparser-rs |
| // https://github.com/sqlparser-rs/sqlparser-rs/blob/ae3b5844c839072c235965fe0d1bddc473dced87/src/test_utils.rs#L104-L116 |
| |
| /// Ensures that `sql` parses as a single [Statement] |
| /// |
| /// If `canonical` is non empty,this function additionally asserts |
| /// that: |
| /// |
| /// 1. parsing `sql` results in the same [`Statement`] as parsing |
| /// `canonical`. |
| /// |
| /// 2. re-serializing the result of parsing `sql` produces the same |
| /// `canonical` sql string |
| fn one_statement_parses_to(sql: &str, canonical: &str) -> Statement { |
| let mut statements = DFParser::parse_sql(sql).unwrap(); |
| assert_eq!(statements.len(), 1); |
| |
| if sql != canonical { |
| assert_eq!(DFParser::parse_sql(canonical).unwrap(), statements); |
| } |
| |
| let only_statement = statements.pop_front().unwrap(); |
| assert_eq!( |
| canonical.to_uppercase(), |
| only_statement.to_string().to_uppercase() |
| ); |
| only_statement |
| } |
| |
| /// Ensures that `sql` parses as a single [Statement], and that |
| /// re-serializing the parse result produces the same `sql` |
| /// string (is not modified after a serialization round-trip). |
| fn verified_stmt(sql: &str) -> Statement { |
| one_statement_parses_to(sql, sql) |
| } |
| |
| #[test] |
| /// Checks the recursion limit works for sql queries |
| /// Recursion can happen easily with binary exprs (i.e, AND or OR) |
| fn test_recursion_limit() { |
| let sql = "SELECT 1 OR 2"; |
| |
| // Expect parse to succeed |
| DFParserBuilder::new(sql) |
| .build() |
| .unwrap() |
| .parse_statements() |
| .unwrap(); |
| |
| let err = DFParserBuilder::new(sql) |
| .with_recursion_limit(1) |
| .build() |
| .unwrap() |
| .parse_statements() |
| .unwrap_err(); |
| |
| assert_contains!( |
| err.to_string(), |
| "SQL error: RecursionLimitExceeded (current limit: 1)" |
| ); |
| } |
| |
| #[test] |
| fn test_multistatement() { |
| let sql = "COPY foo TO bar STORED AS CSV; \ |
| CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'; \ |
| RESET var;"; |
| let statements = DFParser::parse_sql(sql).unwrap(); |
| assert_eq!( |
| statements, |
| vec![ |
| Statement::CopyTo(CopyToStatement { |
| source: object_name("foo"), |
| target: "bar".to_string(), |
| partitioned_by: vec![], |
| stored_as: Some("CSV".to_owned()), |
| options: vec![], |
| }), |
| { |
| let name = ObjectName::from(vec![Ident::from("t")]); |
| let display = None; |
| Statement::CreateExternalTable(CreateExternalTable { |
| name: name.clone(), |
| columns: vec![make_column_def("c1", DataType::Int(display))], |
| file_type: "CSV".to_string(), |
| location: "foo.csv".into(), |
| table_partition_cols: vec![], |
| order_exprs: vec![], |
| if_not_exists: false, |
| or_replace: false, |
| temporary: false, |
| unbounded: false, |
| options: vec![], |
| constraints: vec![], |
| }) |
| }, |
| { |
| let name = ObjectName::from(vec![Ident::from("var")]); |
| Statement::Reset(ResetStatement::Variable(name)) |
| } |
| ] |
| ); |
| } |
| |
| fn expect_parse_expr_ok(sql: &str, expected: ExprWithAlias) { |
| let expr = DFParser::parse_sql_into_expr(sql).unwrap(); |
| assert_eq!(expr, expected, "actual:\n{expr:#?}"); |
| } |
| |
| /// Parses sql and asserts that the expected error message was found |
| fn expect_parse_expr_error(sql: &str, expected_error: &str) { |
| match DFParser::parse_sql_into_expr(sql) { |
| Ok(expr) => { |
| panic!("Expected parse error for '{sql}', but was successful: {expr:#?}"); |
| } |
| Err(e) => { |
| let error_message = e.to_string(); |
| assert!( |
| error_message.contains(expected_error), |
| "Expected error '{expected_error}' not found in actual error '{error_message}'" |
| ); |
| } |
| } |
| } |
| |
| #[test] |
| fn literal() { |
| expect_parse_expr_ok( |
| "1234", |
| ExprWithAlias { |
| expr: Expr::Value(ValueWithSpan::from(Value::Number( |
| "1234".to_string(), |
| false, |
| ))), |
| alias: None, |
| }, |
| ) |
| } |
| |
| #[test] |
| fn literal_with_alias() { |
| expect_parse_expr_ok( |
| "1234 as foo", |
| ExprWithAlias { |
| expr: Expr::Value(ValueWithSpan::from(Value::Number( |
| "1234".to_string(), |
| false, |
| ))), |
| alias: Some(Ident::from("foo")), |
| }, |
| ) |
| } |
| |
| #[test] |
| fn literal_with_alias_and_trailing_tokens() { |
| expect_parse_expr_error( |
| "1234 as foo.bar", |
| "Expected: end of expression, found: .", |
| ) |
| } |
| |
| #[test] |
| fn literal_with_alias_and_trailing_whitespace() { |
| expect_parse_expr_ok( |
| "1234 as foo ", |
| ExprWithAlias { |
| expr: Expr::Value(ValueWithSpan::from(Value::Number( |
| "1234".to_string(), |
| false, |
| ))), |
| alias: Some(Ident::from("foo")), |
| }, |
| ) |
| } |
| |
| #[test] |
| fn literal_with_alias_and_trailing_whitespace_and_token() { |
| expect_parse_expr_error( |
| "1234 as foo bar", |
| "Expected: end of expression, found: bar", |
| ) |
| } |
| } |