| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #[cfg(not(feature = "std"))] |
| use crate::alloc::string::ToString; |
| use crate::ast::helpers::key_value_options::{KeyValueOption, KeyValueOptionType, KeyValueOptions}; |
| use crate::ast::helpers::stmt_create_table::CreateTableBuilder; |
| use crate::ast::helpers::stmt_data_loading::{ |
| FileStagingCommand, StageLoadSelectItem, StageLoadSelectItemKind, StageParamsObject, |
| }; |
| use crate::ast::{ |
| ColumnOption, ColumnPolicy, ColumnPolicyProperty, CopyIntoSnowflakeKind, Ident, |
| IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, |
| IdentityPropertyOrder, ObjectName, RowAccessPolicy, ShowObjects, SqlOption, Statement, |
| TagsColumnOption, WrappedCollection, |
| }; |
| use crate::dialect::{Dialect, Precedence}; |
| use crate::keywords::Keyword; |
| use crate::parser::{IsOptional, Parser, ParserError}; |
| use crate::tokenizer::{Token, Word}; |
| #[cfg(not(feature = "std"))] |
| use alloc::boxed::Box; |
| #[cfg(not(feature = "std"))] |
| use alloc::string::String; |
| #[cfg(not(feature = "std"))] |
| use alloc::vec::Vec; |
| #[cfg(not(feature = "std"))] |
| use alloc::{format, vec}; |
| |
| use super::keywords::RESERVED_FOR_IDENTIFIER; |
| use sqlparser::ast::StorageSerializationPolicy; |
| |
| const RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR: [Keyword; 1] = [Keyword::CONNECT_BY_ROOT]; |
| /// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) |
| #[derive(Debug, Default)] |
| pub struct SnowflakeDialect; |
| |
| impl Dialect for SnowflakeDialect { |
| // see https://docs.snowflake.com/en/sql-reference/identifiers-syntax.html |
| fn is_identifier_start(&self, ch: char) -> bool { |
| ch.is_ascii_lowercase() || ch.is_ascii_uppercase() || ch == '_' |
| } |
| |
| fn supports_projection_trailing_commas(&self) -> bool { |
| true |
| } |
| |
| fn supports_from_trailing_commas(&self) -> bool { |
| true |
| } |
| |
| // Snowflake supports double-dot notation when the schema name is not specified |
| // In this case the default PUBLIC schema is used |
| // |
| // see https://docs.snowflake.com/en/sql-reference/name-resolution#resolution-when-schema-omitted-double-dot-notation |
| fn supports_object_name_double_dot_notation(&self) -> bool { |
| true |
| } |
| |
| fn is_identifier_part(&self, ch: char) -> bool { |
| ch.is_ascii_lowercase() |
| || ch.is_ascii_uppercase() |
| || ch.is_ascii_digit() |
| || ch == '$' |
| || ch == '_' |
| } |
| |
| // See https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#escape_sequences |
| fn supports_string_literal_backslash_escape(&self) -> bool { |
| true |
| } |
| |
| fn supports_within_after_array_aggregation(&self) -> bool { |
| true |
| } |
| |
| /// See <https://docs.snowflake.com/en/sql-reference/constructs/where#joins-in-the-where-clause> |
| fn supports_outer_join_operator(&self) -> bool { |
| true |
| } |
| |
| fn supports_connect_by(&self) -> bool { |
| true |
| } |
| |
| /// See <https://docs.snowflake.com/en/sql-reference/sql/execute-immediate> |
| fn supports_execute_immediate(&self) -> bool { |
| true |
| } |
| |
| fn supports_match_recognize(&self) -> bool { |
| true |
| } |
| |
| // Snowflake uses this syntax for "object constants" (the values of which |
| // are not actually required to be constants). |
| // |
| // https://docs.snowflake.com/en/sql-reference/data-types-semistructured#label-object-constant |
| fn supports_dictionary_syntax(&self) -> bool { |
| true |
| } |
| |
| // Snowflake doesn't document this but `FIRST_VALUE(arg, { IGNORE | RESPECT } NULLS)` |
| // works (i.e. inside the argument list instead of after). |
| fn supports_window_function_null_treatment_arg(&self) -> bool { |
| true |
| } |
| |
| /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/set#syntax) |
| fn supports_parenthesized_set_variables(&self) -> bool { |
| true |
| } |
| |
| /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/comment) |
| fn supports_comment_on(&self) -> bool { |
| true |
| } |
| |
| fn parse_statement(&self, parser: &mut Parser) -> Option<Result<Statement, ParserError>> { |
| if parser.parse_keywords(&[Keyword::ALTER, Keyword::SESSION]) { |
| // ALTER SESSION |
| let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) { |
| Some(Keyword::SET) => true, |
| Some(Keyword::UNSET) => false, |
| _ => return Some(parser.expected("SET or UNSET", parser.peek_token())), |
| }; |
| return Some(parse_alter_session(parser, set)); |
| } |
| |
| if parser.parse_keyword(Keyword::CREATE) { |
| // possibly CREATE STAGE |
| //[ OR REPLACE ] |
| let or_replace = parser.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); |
| // LOCAL | GLOBAL |
| let global = match parser.parse_one_of_keywords(&[Keyword::LOCAL, Keyword::GLOBAL]) { |
| Some(Keyword::LOCAL) => Some(false), |
| Some(Keyword::GLOBAL) => Some(true), |
| _ => None, |
| }; |
| |
| let mut temporary = false; |
| let mut volatile = false; |
| let mut transient = false; |
| let mut iceberg = false; |
| |
| match parser.parse_one_of_keywords(&[ |
| Keyword::TEMP, |
| Keyword::TEMPORARY, |
| Keyword::VOLATILE, |
| Keyword::TRANSIENT, |
| Keyword::ICEBERG, |
| ]) { |
| Some(Keyword::TEMP | Keyword::TEMPORARY) => temporary = true, |
| Some(Keyword::VOLATILE) => volatile = true, |
| Some(Keyword::TRANSIENT) => transient = true, |
| Some(Keyword::ICEBERG) => iceberg = true, |
| _ => {} |
| } |
| |
| if parser.parse_keyword(Keyword::STAGE) { |
| // OK - this is CREATE STAGE statement |
| return Some(parse_create_stage(or_replace, temporary, parser)); |
| } else if parser.parse_keyword(Keyword::TABLE) { |
| return Some(parse_create_table( |
| or_replace, global, temporary, volatile, transient, iceberg, parser, |
| )); |
| } else { |
| // need to go back with the cursor |
| let mut back = 1; |
| if or_replace { |
| back += 2 |
| } |
| if temporary { |
| back += 1 |
| } |
| for _i in 0..back { |
| parser.prev_token(); |
| } |
| } |
| } |
| if parser.parse_keywords(&[Keyword::COPY, Keyword::INTO]) { |
| // COPY INTO |
| return Some(parse_copy_into(parser)); |
| } |
| |
| if let Some(kw) = parser.parse_one_of_keywords(&[ |
| Keyword::LIST, |
| Keyword::LS, |
| Keyword::REMOVE, |
| Keyword::RM, |
| ]) { |
| return Some(parse_file_staging_command(kw, parser)); |
| } |
| |
| if parser.parse_keyword(Keyword::SHOW) { |
| let terse = parser.parse_keyword(Keyword::TERSE); |
| if parser.parse_keyword(Keyword::OBJECTS) { |
| return Some(parse_show_objects(terse, parser)); |
| } |
| //Give back Keyword::TERSE |
| if terse { |
| parser.prev_token(); |
| } |
| //Give back Keyword::SHOW |
| parser.prev_token(); |
| } |
| |
| None |
| } |
| |
| fn parse_column_option( |
| &self, |
| parser: &mut Parser, |
| ) -> Result<Option<Result<Option<ColumnOption>, ParserError>>, ParserError> { |
| parser.maybe_parse(|parser| { |
| let with = parser.parse_keyword(Keyword::WITH); |
| |
| if parser.parse_keyword(Keyword::IDENTITY) { |
| Ok(parse_identity_property(parser) |
| .map(|p| Some(ColumnOption::Identity(IdentityPropertyKind::Identity(p))))) |
| } else if parser.parse_keyword(Keyword::AUTOINCREMENT) { |
| Ok(parse_identity_property(parser).map(|p| { |
| Some(ColumnOption::Identity(IdentityPropertyKind::Autoincrement( |
| p, |
| ))) |
| })) |
| } else if parser.parse_keywords(&[Keyword::MASKING, Keyword::POLICY]) { |
| Ok(parse_column_policy_property(parser, with) |
| .map(|p| Some(ColumnOption::Policy(ColumnPolicy::MaskingPolicy(p))))) |
| } else if parser.parse_keywords(&[Keyword::PROJECTION, Keyword::POLICY]) { |
| Ok(parse_column_policy_property(parser, with) |
| .map(|p| Some(ColumnOption::Policy(ColumnPolicy::ProjectionPolicy(p))))) |
| } else if parser.parse_keywords(&[Keyword::TAG]) { |
| Ok(parse_column_tags(parser, with).map(|p| Some(ColumnOption::Tags(p)))) |
| } else { |
| Err(ParserError::ParserError("not found match".to_string())) |
| } |
| }) |
| } |
| |
| fn get_next_precedence(&self, parser: &Parser) -> Option<Result<u8, ParserError>> { |
| let token = parser.peek_token(); |
| // Snowflake supports the `:` cast operator unlike other dialects |
| match token.token { |
| Token::Colon => Some(Ok(self.prec_value(Precedence::DoubleColon))), |
| _ => None, |
| } |
| } |
| |
| fn describe_requires_table_keyword(&self) -> bool { |
| true |
| } |
| |
| fn allow_extract_custom(&self) -> bool { |
| true |
| } |
| |
| fn allow_extract_single_quotes(&self) -> bool { |
| true |
| } |
| |
| /// Snowflake expects the `LIKE` option before the `IN` option, |
| /// for example: <https://docs.snowflake.com/en/sql-reference/sql/show-views#syntax> |
| fn supports_show_like_before_in(&self) -> bool { |
| true |
| } |
| |
| fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { |
| // Unreserve some keywords that Snowflake accepts as identifiers |
| // See: https://docs.snowflake.com/en/sql-reference/reserved-keywords |
| if matches!(kw, Keyword::INTERVAL) { |
| false |
| } else { |
| RESERVED_FOR_IDENTIFIER.contains(&kw) |
| } |
| } |
| |
| fn supports_partiql(&self) -> bool { |
| true |
| } |
| |
| fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { |
| explicit |
| || match kw { |
| // The following keywords can be considered an alias as long as |
| // they are not followed by other tokens that may change their meaning |
| // e.g. `SELECT * EXCEPT (col1) FROM tbl` |
| Keyword::EXCEPT |
| // e.g. `SELECT 1 LIMIT 5` |
| | Keyword::LIMIT |
| // e.g. `SELECT 1 OFFSET 5 ROWS` |
| | Keyword::OFFSET |
| // e.g. `INSERT INTO t SELECT 1 RETURNING *` |
| | Keyword::RETURNING if !matches!(parser.peek_token_ref().token, Token::Comma | Token::EOF) => |
| { |
| false |
| } |
| |
| // `FETCH` can be considered an alias as long as it's not followed by `FIRST`` or `NEXT` |
| // which would give it a different meanings, for example: `SELECT 1 FETCH FIRST 10 ROWS` - not an alias |
| Keyword::FETCH |
| if parser.peek_keyword(Keyword::FIRST) || parser.peek_keyword(Keyword::NEXT) => |
| { |
| false |
| } |
| |
| // Reserved keywords by the Snowflake dialect, which seem to be less strictive |
| // than what is listed in `keywords::RESERVED_FOR_COLUMN_ALIAS`. The following |
| // keywords were tested with the this statement: `SELECT 1 <KW>`. |
| Keyword::FROM |
| | Keyword::GROUP |
| | Keyword::HAVING |
| | Keyword::INTERSECT |
| | Keyword::INTO |
| | Keyword::MINUS |
| | Keyword::ORDER |
| | Keyword::SELECT |
| | Keyword::UNION |
| | Keyword::WHERE |
| | Keyword::WITH => false, |
| |
| // Any other word is considered an alias |
| _ => true, |
| } |
| } |
| |
| /// See: <https://docs.snowflake.com/en/sql-reference/constructs/at-before> |
| fn supports_timestamp_versioning(&self) -> bool { |
| true |
| } |
| |
| /// See: <https://docs.snowflake.com/en/sql-reference/constructs/group-by> |
| fn supports_group_by_expr(&self) -> bool { |
| true |
| } |
| |
| /// See: <https://docs.snowflake.com/en/sql-reference/constructs/connect-by> |
| fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] { |
| &RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR |
| } |
| } |
| |
| fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> { |
| let stage = parse_snowflake_stage_name(parser)?; |
| let pattern = if parser.parse_keyword(Keyword::PATTERN) { |
| parser.expect_token(&Token::Eq)?; |
| Some(parser.parse_literal_string()?) |
| } else { |
| None |
| }; |
| |
| match kw { |
| Keyword::LIST | Keyword::LS => Ok(Statement::List(FileStagingCommand { stage, pattern })), |
| Keyword::REMOVE | Keyword::RM => { |
| Ok(Statement::Remove(FileStagingCommand { stage, pattern })) |
| } |
| _ => Err(ParserError::ParserError( |
| "unexpected stage command, expecting LIST, LS, REMOVE or RM".to_string(), |
| )), |
| } |
| } |
| |
| /// Parse snowflake alter session. |
| /// <https://docs.snowflake.com/en/sql-reference/sql/alter-session> |
| fn parse_alter_session(parser: &mut Parser, set: bool) -> Result<Statement, ParserError> { |
| let session_options = parse_session_options(parser, set)?; |
| Ok(Statement::AlterSession { |
| set, |
| session_params: KeyValueOptions { |
| options: session_options, |
| }, |
| }) |
| } |
| |
| /// Parse snowflake create table statement. |
| /// <https://docs.snowflake.com/en/sql-reference/sql/create-table> |
| /// <https://docs.snowflake.com/en/sql-reference/sql/create-iceberg-table> |
| pub fn parse_create_table( |
| or_replace: bool, |
| global: Option<bool>, |
| temporary: bool, |
| volatile: bool, |
| transient: bool, |
| iceberg: bool, |
| parser: &mut Parser, |
| ) -> Result<Statement, ParserError> { |
| let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); |
| let table_name = parser.parse_object_name(false)?; |
| |
| let mut builder = CreateTableBuilder::new(table_name) |
| .or_replace(or_replace) |
| .if_not_exists(if_not_exists) |
| .temporary(temporary) |
| .transient(transient) |
| .volatile(volatile) |
| .iceberg(iceberg) |
| .global(global) |
| .hive_formats(Some(Default::default())); |
| |
| // Snowflake does not enforce order of the parameters in the statement. The parser needs to |
| // parse the statement in a loop. |
| // |
| // "CREATE TABLE x COPY GRANTS (c INT)" and "CREATE TABLE x (c INT) COPY GRANTS" are both |
| // accepted by Snowflake |
| |
| let mut plain_options = vec![]; |
| |
| loop { |
| let next_token = parser.next_token(); |
| match &next_token.token { |
| Token::Word(word) => match word.keyword { |
| Keyword::COPY => { |
| parser.expect_keyword_is(Keyword::GRANTS)?; |
| builder = builder.copy_grants(true); |
| } |
| Keyword::COMMENT => { |
| // Rewind the COMMENT keyword |
| parser.prev_token(); |
| if let Some(comment_def) = parser.parse_optional_inline_comment()? { |
| plain_options.push(SqlOption::Comment(comment_def)) |
| } |
| } |
| Keyword::AS => { |
| let query = parser.parse_query()?; |
| builder = builder.query(Some(query)); |
| break; |
| } |
| Keyword::CLONE => { |
| let clone = parser.parse_object_name(false).ok(); |
| builder = builder.clone_clause(clone); |
| break; |
| } |
| Keyword::LIKE => { |
| let like = parser.parse_object_name(false).ok(); |
| builder = builder.like(like); |
| break; |
| } |
| Keyword::CLUSTER => { |
| parser.expect_keyword_is(Keyword::BY)?; |
| parser.expect_token(&Token::LParen)?; |
| let cluster_by = Some(WrappedCollection::Parentheses( |
| parser.parse_comma_separated(|p| p.parse_expr())?, |
| )); |
| parser.expect_token(&Token::RParen)?; |
| |
| builder = builder.cluster_by(cluster_by) |
| } |
| Keyword::ENABLE_SCHEMA_EVOLUTION => { |
| parser.expect_token(&Token::Eq)?; |
| let enable_schema_evolution = |
| match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { |
| Some(Keyword::TRUE) => true, |
| Some(Keyword::FALSE) => false, |
| _ => { |
| return parser.expected("TRUE or FALSE", next_token); |
| } |
| }; |
| |
| builder = builder.enable_schema_evolution(Some(enable_schema_evolution)); |
| } |
| Keyword::CHANGE_TRACKING => { |
| parser.expect_token(&Token::Eq)?; |
| let change_tracking = |
| match parser.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { |
| Some(Keyword::TRUE) => true, |
| Some(Keyword::FALSE) => false, |
| _ => { |
| return parser.expected("TRUE or FALSE", next_token); |
| } |
| }; |
| |
| builder = builder.change_tracking(Some(change_tracking)); |
| } |
| Keyword::DATA_RETENTION_TIME_IN_DAYS => { |
| parser.expect_token(&Token::Eq)?; |
| let data_retention_time_in_days = parser.parse_literal_uint()?; |
| builder = |
| builder.data_retention_time_in_days(Some(data_retention_time_in_days)); |
| } |
| Keyword::MAX_DATA_EXTENSION_TIME_IN_DAYS => { |
| parser.expect_token(&Token::Eq)?; |
| let max_data_extension_time_in_days = parser.parse_literal_uint()?; |
| builder = builder |
| .max_data_extension_time_in_days(Some(max_data_extension_time_in_days)); |
| } |
| Keyword::DEFAULT_DDL_COLLATION => { |
| parser.expect_token(&Token::Eq)?; |
| let default_ddl_collation = parser.parse_literal_string()?; |
| builder = builder.default_ddl_collation(Some(default_ddl_collation)); |
| } |
| // WITH is optional, we just verify that next token is one of the expected ones and |
| // fallback to the default match statement |
| Keyword::WITH => { |
| parser.expect_one_of_keywords(&[ |
| Keyword::AGGREGATION, |
| Keyword::TAG, |
| Keyword::ROW, |
| ])?; |
| parser.prev_token(); |
| } |
| Keyword::AGGREGATION => { |
| parser.expect_keyword_is(Keyword::POLICY)?; |
| let aggregation_policy = parser.parse_object_name(false)?; |
| builder = builder.with_aggregation_policy(Some(aggregation_policy)); |
| } |
| Keyword::ROW => { |
| parser.expect_keywords(&[Keyword::ACCESS, Keyword::POLICY])?; |
| let policy = parser.parse_object_name(false)?; |
| parser.expect_keyword_is(Keyword::ON)?; |
| parser.expect_token(&Token::LParen)?; |
| let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; |
| parser.expect_token(&Token::RParen)?; |
| |
| builder = |
| builder.with_row_access_policy(Some(RowAccessPolicy::new(policy, columns))) |
| } |
| Keyword::TAG => { |
| parser.expect_token(&Token::LParen)?; |
| let tags = parser.parse_comma_separated(Parser::parse_tag)?; |
| parser.expect_token(&Token::RParen)?; |
| builder = builder.with_tags(Some(tags)); |
| } |
| Keyword::ON if parser.parse_keyword(Keyword::COMMIT) => { |
| let on_commit = Some(parser.parse_create_table_on_commit()?); |
| builder = builder.on_commit(on_commit); |
| } |
| Keyword::EXTERNAL_VOLUME => { |
| parser.expect_token(&Token::Eq)?; |
| builder.external_volume = Some(parser.parse_literal_string()?); |
| } |
| Keyword::CATALOG => { |
| parser.expect_token(&Token::Eq)?; |
| builder.catalog = Some(parser.parse_literal_string()?); |
| } |
| Keyword::BASE_LOCATION => { |
| parser.expect_token(&Token::Eq)?; |
| builder.base_location = Some(parser.parse_literal_string()?); |
| } |
| Keyword::CATALOG_SYNC => { |
| parser.expect_token(&Token::Eq)?; |
| builder.catalog_sync = Some(parser.parse_literal_string()?); |
| } |
| Keyword::STORAGE_SERIALIZATION_POLICY => { |
| parser.expect_token(&Token::Eq)?; |
| |
| builder.storage_serialization_policy = |
| Some(parse_storage_serialization_policy(parser)?); |
| } |
| Keyword::IF if parser.parse_keywords(&[Keyword::NOT, Keyword::EXISTS]) => { |
| builder = builder.if_not_exists(true); |
| } |
| _ => { |
| return parser.expected("end of statement", next_token); |
| } |
| }, |
| Token::LParen => { |
| parser.prev_token(); |
| let (columns, constraints) = parser.parse_columns()?; |
| builder = builder.columns(columns).constraints(constraints); |
| } |
| Token::EOF => { |
| if builder.columns.is_empty() { |
| return Err(ParserError::ParserError( |
| "unexpected end of input".to_string(), |
| )); |
| } |
| |
| break; |
| } |
| Token::SemiColon => { |
| if builder.columns.is_empty() { |
| return Err(ParserError::ParserError( |
| "unexpected end of input".to_string(), |
| )); |
| } |
| |
| parser.prev_token(); |
| break; |
| } |
| _ => { |
| return parser.expected("end of statement", next_token); |
| } |
| } |
| } |
| let table_options = if !plain_options.is_empty() { |
| crate::ast::CreateTableOptions::Plain(plain_options) |
| } else { |
| crate::ast::CreateTableOptions::None |
| }; |
| |
| builder = builder.table_options(table_options); |
| |
| if iceberg && builder.base_location.is_none() { |
| return Err(ParserError::ParserError( |
| "BASE_LOCATION is required for ICEBERG tables".to_string(), |
| )); |
| } |
| |
| Ok(builder.build()) |
| } |
| |
| pub fn parse_storage_serialization_policy( |
| parser: &mut Parser, |
| ) -> Result<StorageSerializationPolicy, ParserError> { |
| let next_token = parser.next_token(); |
| match &next_token.token { |
| Token::Word(w) => match w.keyword { |
| Keyword::COMPATIBLE => Ok(StorageSerializationPolicy::Compatible), |
| Keyword::OPTIMIZED => Ok(StorageSerializationPolicy::Optimized), |
| _ => parser.expected("storage_serialization_policy", next_token), |
| }, |
| _ => parser.expected("storage_serialization_policy", next_token), |
| } |
| } |
| |
| pub fn parse_create_stage( |
| or_replace: bool, |
| temporary: bool, |
| parser: &mut Parser, |
| ) -> Result<Statement, ParserError> { |
| //[ IF NOT EXISTS ] |
| let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); |
| let name = parser.parse_object_name(false)?; |
| let mut directory_table_params = Vec::new(); |
| let mut file_format = Vec::new(); |
| let mut copy_options = Vec::new(); |
| let mut comment = None; |
| |
| // [ internalStageParams | externalStageParams ] |
| let stage_params = parse_stage_params(parser)?; |
| |
| // [ directoryTableParams ] |
| if parser.parse_keyword(Keyword::DIRECTORY) { |
| parser.expect_token(&Token::Eq)?; |
| directory_table_params = parse_parentheses_options(parser)?; |
| } |
| |
| // [ file_format] |
| if parser.parse_keyword(Keyword::FILE_FORMAT) { |
| parser.expect_token(&Token::Eq)?; |
| file_format = parse_parentheses_options(parser)?; |
| } |
| |
| // [ copy_options ] |
| if parser.parse_keyword(Keyword::COPY_OPTIONS) { |
| parser.expect_token(&Token::Eq)?; |
| copy_options = parse_parentheses_options(parser)?; |
| } |
| |
| // [ comment ] |
| if parser.parse_keyword(Keyword::COMMENT) { |
| parser.expect_token(&Token::Eq)?; |
| comment = Some(parser.parse_comment_value()?); |
| } |
| |
| Ok(Statement::CreateStage { |
| or_replace, |
| temporary, |
| if_not_exists, |
| name, |
| stage_params, |
| directory_table_params: KeyValueOptions { |
| options: directory_table_params, |
| }, |
| file_format: KeyValueOptions { |
| options: file_format, |
| }, |
| copy_options: KeyValueOptions { |
| options: copy_options, |
| }, |
| comment, |
| }) |
| } |
| |
| pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result<Ident, ParserError> { |
| let mut ident = String::new(); |
| while let Some(next_token) = parser.next_token_no_skip() { |
| match &next_token.token { |
| Token::Whitespace(_) | Token::SemiColon => break, |
| Token::Period => { |
| parser.prev_token(); |
| break; |
| } |
| Token::RParen => { |
| parser.prev_token(); |
| break; |
| } |
| Token::AtSign => ident.push('@'), |
| Token::Tilde => ident.push('~'), |
| Token::Mod => ident.push('%'), |
| Token::Div => ident.push('/'), |
| Token::Word(w) => ident.push_str(&w.to_string()), |
| _ => return parser.expected("stage name identifier", parser.peek_token()), |
| } |
| } |
| Ok(Ident::new(ident)) |
| } |
| |
| pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result<ObjectName, ParserError> { |
| match parser.next_token().token { |
| Token::AtSign => { |
| parser.prev_token(); |
| let mut idents = vec![]; |
| loop { |
| idents.push(parse_stage_name_identifier(parser)?); |
| if !parser.consume_token(&Token::Period) { |
| break; |
| } |
| } |
| Ok(ObjectName::from(idents)) |
| } |
| _ => { |
| parser.prev_token(); |
| Ok(parser.parse_object_name(false)?) |
| } |
| } |
| } |
| |
| /// Parses a `COPY INTO` statement. Snowflake has two variants, `COPY INTO <table>` |
| /// and `COPY INTO <location>` which have different syntax. |
| pub fn parse_copy_into(parser: &mut Parser) -> Result<Statement, ParserError> { |
| let kind = match parser.peek_token().token { |
| // Indicates an internal stage |
| Token::AtSign => CopyIntoSnowflakeKind::Location, |
| // Indicates an external stage, i.e. s3://, gcs:// or azure:// |
| Token::SingleQuotedString(s) if s.contains("://") => CopyIntoSnowflakeKind::Location, |
| _ => CopyIntoSnowflakeKind::Table, |
| }; |
| |
| let mut files: Vec<String> = vec![]; |
| let mut from_transformations: Option<Vec<StageLoadSelectItemKind>> = None; |
| let mut from_stage_alias = None; |
| let mut from_stage = None; |
| let mut stage_params = StageParamsObject { |
| url: None, |
| encryption: KeyValueOptions { options: vec![] }, |
| endpoint: None, |
| storage_integration: None, |
| credentials: KeyValueOptions { options: vec![] }, |
| }; |
| let mut from_query = None; |
| let mut partition = None; |
| let mut file_format = Vec::new(); |
| let mut pattern = None; |
| let mut validation_mode = None; |
| let mut copy_options = Vec::new(); |
| |
| let into: ObjectName = parse_snowflake_stage_name(parser)?; |
| if kind == CopyIntoSnowflakeKind::Location { |
| stage_params = parse_stage_params(parser)?; |
| } |
| |
| let into_columns = match &parser.peek_token().token { |
| Token::LParen => Some(parser.parse_parenthesized_column_list(IsOptional::Optional, true)?), |
| _ => None, |
| }; |
| |
| parser.expect_keyword_is(Keyword::FROM)?; |
| match parser.next_token().token { |
| Token::LParen if kind == CopyIntoSnowflakeKind::Table => { |
| // Data load with transformations |
| parser.expect_keyword_is(Keyword::SELECT)?; |
| from_transformations = parse_select_items_for_data_load(parser)?; |
| |
| parser.expect_keyword_is(Keyword::FROM)?; |
| from_stage = Some(parse_snowflake_stage_name(parser)?); |
| stage_params = parse_stage_params(parser)?; |
| |
| // Parse an optional alias |
| from_stage_alias = parser |
| .maybe_parse_table_alias()? |
| .map(|table_alias| table_alias.name); |
| parser.expect_token(&Token::RParen)?; |
| } |
| Token::LParen if kind == CopyIntoSnowflakeKind::Location => { |
| // Data unload with a query |
| from_query = Some(parser.parse_query()?); |
| parser.expect_token(&Token::RParen)?; |
| } |
| _ => { |
| parser.prev_token(); |
| from_stage = Some(parse_snowflake_stage_name(parser)?); |
| stage_params = parse_stage_params(parser)?; |
| |
| // as |
| from_stage_alias = if parser.parse_keyword(Keyword::AS) { |
| Some(match parser.next_token().token { |
| Token::Word(w) => Ok(Ident::new(w.value)), |
| _ => parser.expected("stage alias", parser.peek_token()), |
| }?) |
| } else { |
| None |
| }; |
| } |
| } |
| |
| loop { |
| // FILE_FORMAT |
| if parser.parse_keyword(Keyword::FILE_FORMAT) { |
| parser.expect_token(&Token::Eq)?; |
| file_format = parse_parentheses_options(parser)?; |
| // PARTITION BY |
| } else if parser.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { |
| partition = Some(Box::new(parser.parse_expr()?)) |
| // FILES |
| } else if parser.parse_keyword(Keyword::FILES) { |
| parser.expect_token(&Token::Eq)?; |
| parser.expect_token(&Token::LParen)?; |
| let mut continue_loop = true; |
| while continue_loop { |
| continue_loop = false; |
| let next_token = parser.next_token(); |
| match next_token.token { |
| Token::SingleQuotedString(s) => files.push(s), |
| _ => parser.expected("file token", next_token)?, |
| }; |
| if parser.next_token().token.eq(&Token::Comma) { |
| continue_loop = true; |
| } else { |
| parser.prev_token(); // not a comma, need to go back |
| } |
| } |
| parser.expect_token(&Token::RParen)?; |
| // PATTERN |
| } else if parser.parse_keyword(Keyword::PATTERN) { |
| parser.expect_token(&Token::Eq)?; |
| let next_token = parser.next_token(); |
| pattern = Some(match next_token.token { |
| Token::SingleQuotedString(s) => s, |
| _ => parser.expected("pattern", next_token)?, |
| }); |
| // VALIDATION MODE |
| } else if parser.parse_keyword(Keyword::VALIDATION_MODE) { |
| parser.expect_token(&Token::Eq)?; |
| validation_mode = Some(parser.next_token().token.to_string()); |
| // COPY OPTIONS |
| } else if parser.parse_keyword(Keyword::COPY_OPTIONS) { |
| parser.expect_token(&Token::Eq)?; |
| copy_options = parse_parentheses_options(parser)?; |
| } else { |
| match parser.next_token().token { |
| Token::SemiColon | Token::EOF => break, |
| Token::Comma => continue, |
| // In `COPY INTO <location>` the copy options do not have a shared key |
| // like in `COPY INTO <table>` |
| Token::Word(key) => copy_options.push(parse_option(parser, key)?), |
| _ => return parser.expected("another copy option, ; or EOF'", parser.peek_token()), |
| } |
| } |
| } |
| |
| Ok(Statement::CopyIntoSnowflake { |
| kind, |
| into, |
| into_columns, |
| from_obj: from_stage, |
| from_obj_alias: from_stage_alias, |
| stage_params, |
| from_transformations, |
| from_query, |
| files: if files.is_empty() { None } else { Some(files) }, |
| pattern, |
| file_format: KeyValueOptions { |
| options: file_format, |
| }, |
| copy_options: KeyValueOptions { |
| options: copy_options, |
| }, |
| validation_mode, |
| partition, |
| }) |
| } |
| |
| fn parse_select_items_for_data_load( |
| parser: &mut Parser, |
| ) -> Result<Option<Vec<StageLoadSelectItemKind>>, ParserError> { |
| let mut select_items: Vec<StageLoadSelectItemKind> = vec![]; |
| loop { |
| match parser.maybe_parse(parse_select_item_for_data_load)? { |
| // [<alias>.]$<file_col_num>[.<element>] [ , [<alias>.]$<file_col_num>[.<element>] ... ] |
| Some(item) => select_items.push(StageLoadSelectItemKind::StageLoadSelectItem(item)), |
| // Fallback, try to parse a standard SQL select item |
| None => select_items.push(StageLoadSelectItemKind::SelectItem( |
| parser.parse_select_item()?, |
| )), |
| } |
| if matches!(parser.peek_token_ref().token, Token::Comma) { |
| parser.advance_token(); |
| } else { |
| break; |
| } |
| } |
| Ok(Some(select_items)) |
| } |
| |
| fn parse_select_item_for_data_load( |
| parser: &mut Parser, |
| ) -> Result<StageLoadSelectItem, ParserError> { |
| let mut alias: Option<Ident> = None; |
| let mut file_col_num: i32 = 0; |
| let mut element: Option<Ident> = None; |
| let mut item_as: Option<Ident> = None; |
| |
| let next_token = parser.next_token(); |
| match next_token.token { |
| Token::Placeholder(w) => { |
| file_col_num = w.to_string().split_off(1).parse::<i32>().map_err(|e| { |
| ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}")) |
| })?; |
| Ok(()) |
| } |
| Token::Word(w) => { |
| alias = Some(Ident::new(w.value)); |
| Ok(()) |
| } |
| _ => parser.expected("alias or file_col_num", next_token), |
| }?; |
| |
| if alias.is_some() { |
| parser.expect_token(&Token::Period)?; |
| // now we get col_num token |
| let col_num_token = parser.next_token(); |
| match col_num_token.token { |
| Token::Placeholder(w) => { |
| file_col_num = w.to_string().split_off(1).parse::<i32>().map_err(|e| { |
| ParserError::ParserError(format!("Could not parse '{w}' as i32: {e}")) |
| })?; |
| Ok(()) |
| } |
| _ => parser.expected("file_col_num", col_num_token), |
| }?; |
| } |
| |
| // try extracting optional element |
| match parser.next_token().token { |
| Token::Colon => { |
| // parse element |
| element = Some(Ident::new(match parser.next_token().token { |
| Token::Word(w) => Ok(w.value), |
| _ => parser.expected("file_col_num", parser.peek_token()), |
| }?)); |
| } |
| _ => { |
| // element not present move back |
| parser.prev_token(); |
| } |
| } |
| |
| // as |
| if parser.parse_keyword(Keyword::AS) { |
| item_as = Some(match parser.next_token().token { |
| Token::Word(w) => Ok(Ident::new(w.value)), |
| _ => parser.expected("column item alias", parser.peek_token()), |
| }?); |
| } |
| |
| Ok(StageLoadSelectItem { |
| alias, |
| file_col_num, |
| element, |
| item_as, |
| }) |
| } |
| |
| fn parse_stage_params(parser: &mut Parser) -> Result<StageParamsObject, ParserError> { |
| let (mut url, mut storage_integration, mut endpoint) = (None, None, None); |
| let mut encryption: KeyValueOptions = KeyValueOptions { options: vec![] }; |
| let mut credentials: KeyValueOptions = KeyValueOptions { options: vec![] }; |
| |
| // URL |
| if parser.parse_keyword(Keyword::URL) { |
| parser.expect_token(&Token::Eq)?; |
| url = Some(match parser.next_token().token { |
| Token::SingleQuotedString(word) => Ok(word), |
| _ => parser.expected("a URL statement", parser.peek_token()), |
| }?) |
| } |
| |
| // STORAGE INTEGRATION |
| if parser.parse_keyword(Keyword::STORAGE_INTEGRATION) { |
| parser.expect_token(&Token::Eq)?; |
| storage_integration = Some(parser.next_token().token.to_string()); |
| } |
| |
| // ENDPOINT |
| if parser.parse_keyword(Keyword::ENDPOINT) { |
| parser.expect_token(&Token::Eq)?; |
| endpoint = Some(match parser.next_token().token { |
| Token::SingleQuotedString(word) => Ok(word), |
| _ => parser.expected("an endpoint statement", parser.peek_token()), |
| }?) |
| } |
| |
| // CREDENTIALS |
| if parser.parse_keyword(Keyword::CREDENTIALS) { |
| parser.expect_token(&Token::Eq)?; |
| credentials = KeyValueOptions { |
| options: parse_parentheses_options(parser)?, |
| }; |
| } |
| |
| // ENCRYPTION |
| if parser.parse_keyword(Keyword::ENCRYPTION) { |
| parser.expect_token(&Token::Eq)?; |
| encryption = KeyValueOptions { |
| options: parse_parentheses_options(parser)?, |
| }; |
| } |
| |
| Ok(StageParamsObject { |
| url, |
| encryption, |
| endpoint, |
| storage_integration, |
| credentials, |
| }) |
| } |
| |
| /// Parses options separated by blank spaces, commas, or new lines like: |
| /// ABORT_DETACHED_QUERY = { TRUE | FALSE } |
| /// [ ACTIVE_PYTHON_PROFILER = { 'LINE' | 'MEMORY' } ] |
| /// [ BINARY_INPUT_FORMAT = '\<string\>' ] |
| fn parse_session_options( |
| parser: &mut Parser, |
| set: bool, |
| ) -> Result<Vec<KeyValueOption>, ParserError> { |
| let mut options: Vec<KeyValueOption> = Vec::new(); |
| let empty = String::new; |
| loop { |
| let next_token = parser.peek_token(); |
| match next_token.token { |
| Token::SemiColon | Token::EOF => break, |
| Token::Comma => { |
| parser.advance_token(); |
| continue; |
| } |
| Token::Word(key) => { |
| parser.advance_token(); |
| if set { |
| let option = parse_option(parser, key)?; |
| options.push(option); |
| } else { |
| options.push(KeyValueOption { |
| option_name: key.value, |
| option_type: KeyValueOptionType::STRING, |
| value: empty(), |
| }); |
| } |
| } |
| _ => { |
| return parser.expected("another option or end of statement", next_token); |
| } |
| } |
| } |
| if options.is_empty() { |
| Err(ParserError::ParserError( |
| "expected at least one option".to_string(), |
| )) |
| } else { |
| Ok(options) |
| } |
| } |
| |
| /// Parses options provided within parentheses like: |
| /// ( ENABLE = { TRUE | FALSE } |
| /// [ AUTO_REFRESH = { TRUE | FALSE } ] |
| /// [ REFRESH_ON_CREATE = { TRUE | FALSE } ] |
| /// [ NOTIFICATION_INTEGRATION = '<notification_integration_name>' ] ) |
| /// |
| fn parse_parentheses_options(parser: &mut Parser) -> Result<Vec<KeyValueOption>, ParserError> { |
| let mut options: Vec<KeyValueOption> = Vec::new(); |
| parser.expect_token(&Token::LParen)?; |
| loop { |
| match parser.next_token().token { |
| Token::RParen => break, |
| Token::Comma => continue, |
| Token::Word(key) => options.push(parse_option(parser, key)?), |
| _ => return parser.expected("another option or ')'", parser.peek_token()), |
| }; |
| } |
| Ok(options) |
| } |
| |
| /// Parses a `KEY = VALUE` construct based on the specified key |
| fn parse_option(parser: &mut Parser, key: Word) -> Result<KeyValueOption, ParserError> { |
| parser.expect_token(&Token::Eq)?; |
| if parser.parse_keyword(Keyword::TRUE) { |
| Ok(KeyValueOption { |
| option_name: key.value, |
| option_type: KeyValueOptionType::BOOLEAN, |
| value: "TRUE".to_string(), |
| }) |
| } else if parser.parse_keyword(Keyword::FALSE) { |
| Ok(KeyValueOption { |
| option_name: key.value, |
| option_type: KeyValueOptionType::BOOLEAN, |
| value: "FALSE".to_string(), |
| }) |
| } else { |
| match parser.next_token().token { |
| Token::SingleQuotedString(value) => Ok(KeyValueOption { |
| option_name: key.value, |
| option_type: KeyValueOptionType::STRING, |
| value, |
| }), |
| Token::Word(word) => Ok(KeyValueOption { |
| option_name: key.value, |
| option_type: KeyValueOptionType::ENUM, |
| value: word.value, |
| }), |
| Token::Number(n, _) => Ok(KeyValueOption { |
| option_name: key.value, |
| option_type: KeyValueOptionType::NUMBER, |
| value: n, |
| }), |
| _ => parser.expected("expected option value", parser.peek_token()), |
| } |
| } |
| } |
| |
| /// Parsing a property of identity or autoincrement column option |
| /// Syntax: |
| /// ```sql |
| /// [ (seed , increment) | START num INCREMENT num ] [ ORDER | NOORDER ] |
| /// ``` |
| /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table |
| fn parse_identity_property(parser: &mut Parser) -> Result<IdentityProperty, ParserError> { |
| let parameters = if parser.consume_token(&Token::LParen) { |
| let seed = parser.parse_number()?; |
| parser.expect_token(&Token::Comma)?; |
| let increment = parser.parse_number()?; |
| parser.expect_token(&Token::RParen)?; |
| |
| Some(IdentityPropertyFormatKind::FunctionCall( |
| IdentityParameters { seed, increment }, |
| )) |
| } else if parser.parse_keyword(Keyword::START) { |
| let seed = parser.parse_number()?; |
| parser.expect_keyword_is(Keyword::INCREMENT)?; |
| let increment = parser.parse_number()?; |
| |
| Some(IdentityPropertyFormatKind::StartAndIncrement( |
| IdentityParameters { seed, increment }, |
| )) |
| } else { |
| None |
| }; |
| let order = match parser.parse_one_of_keywords(&[Keyword::ORDER, Keyword::NOORDER]) { |
| Some(Keyword::ORDER) => Some(IdentityPropertyOrder::Order), |
| Some(Keyword::NOORDER) => Some(IdentityPropertyOrder::NoOrder), |
| _ => None, |
| }; |
| Ok(IdentityProperty { parameters, order }) |
| } |
| |
| /// Parsing a policy property of column option |
| /// Syntax: |
| /// ```sql |
| /// <policy_name> [ USING ( <col_name> , <cond_col1> , ... ) |
| /// ``` |
| /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table |
| fn parse_column_policy_property( |
| parser: &mut Parser, |
| with: bool, |
| ) -> Result<ColumnPolicyProperty, ParserError> { |
| let policy_name = parser.parse_identifier()?; |
| let using_columns = if parser.parse_keyword(Keyword::USING) { |
| parser.expect_token(&Token::LParen)?; |
| let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; |
| parser.expect_token(&Token::RParen)?; |
| Some(columns) |
| } else { |
| None |
| }; |
| |
| Ok(ColumnPolicyProperty { |
| with, |
| policy_name, |
| using_columns, |
| }) |
| } |
| |
| /// Parsing tags list of column |
| /// Syntax: |
| /// ```sql |
| /// ( <tag_name> = '<tag_value>' [ , <tag_name> = '<tag_value>' , ... ] ) |
| /// ``` |
| /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table |
| fn parse_column_tags(parser: &mut Parser, with: bool) -> Result<TagsColumnOption, ParserError> { |
| parser.expect_token(&Token::LParen)?; |
| let tags = parser.parse_comma_separated(Parser::parse_tag)?; |
| parser.expect_token(&Token::RParen)?; |
| |
| Ok(TagsColumnOption { with, tags }) |
| } |
| |
| /// Parse snowflake show objects. |
| /// <https://docs.snowflake.com/en/sql-reference/sql/show-objects> |
| fn parse_show_objects(terse: bool, parser: &mut Parser) -> Result<Statement, ParserError> { |
| let show_options = parser.parse_show_stmt_options()?; |
| Ok(Statement::ShowObjects(ShowObjects { |
| terse, |
| show_options, |
| })) |
| } |