| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #![warn(clippy::all)] |
| |
| //! Test SQL syntax specific to Hive. The parser based on the generic dialect |
| //! is also tested (on the inputs it can handle). |
| |
| use sqlparser::ast::{ |
| ClusteredBy, CommentDef, CreateFunctionBody, CreateFunctionUsing, CreateTable, Expr, Function, |
| FunctionArgumentList, FunctionArguments, Ident, ObjectName, OneOrManyWithParens, OrderByExpr, |
| SelectItem, Statement, TableFactor, UnaryOperator, Use, Value, |
| }; |
| use sqlparser::dialect::{GenericDialect, HiveDialect, MsSqlDialect}; |
| use sqlparser::parser::ParserError; |
| use sqlparser::test_utils::*; |
| |
| #[test] |
| fn parse_table_create() { |
| let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; |
| let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; |
| let serdeproperties = r#"CREATE EXTERNAL TABLE IF NOT EXISTS db.table (a STRING, b STRING, c STRING) PARTITIONED BY (d STRING, e STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde.config' WITH SERDEPROPERTIES ('prop_a' = 'a', 'prop_b' = 'b') STORED AS TEXTFILE LOCATION 's3://...' TBLPROPERTIES ('prop_c' = 'c')"#; |
| |
| hive().verified_stmt(sql); |
| hive().verified_stmt(iof); |
| hive().verified_stmt(serdeproperties); |
| } |
| |
| #[test] |
| fn parse_describe() { |
| hive_and_generic().verified_stmt(r#"DESCRIBE namespace.`table`"#); |
| hive_and_generic().verified_stmt(r#"DESCRIBE namespace.table"#); |
| hive_and_generic().verified_stmt(r#"DESCRIBE table"#); |
| } |
| |
| #[test] |
| fn explain_describe_formatted() { |
| hive().verified_stmt("DESCRIBE FORMATTED test.table"); |
| } |
| |
| #[test] |
| fn explain_describe_extended() { |
| hive().verified_stmt("DESCRIBE EXTENDED test.table"); |
| } |
| |
| #[test] |
| fn parse_insert_overwrite() { |
| let insert_partitions = r#"INSERT OVERWRITE TABLE db.new_table PARTITION (a = '1', b) SELECT a, b, c FROM db.table"#; |
| hive().verified_stmt(insert_partitions); |
| } |
| |
| #[test] |
| fn test_truncate() { |
| let truncate = r#"TRUNCATE TABLE db.table"#; |
| hive().verified_stmt(truncate); |
| } |
| |
| #[test] |
| fn parse_analyze() { |
| let analyze = r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS NOSCAN CACHE METADATA"#; |
| hive().verified_stmt(analyze); |
| } |
| |
| #[test] |
| fn parse_analyze_for_columns() { |
| let analyze = |
| r#"ANALYZE TABLE db.table_name PARTITION (a = '1234', b) COMPUTE STATISTICS FOR COLUMNS"#; |
| hive().verified_stmt(analyze); |
| } |
| |
| #[test] |
| fn parse_msck() { |
| let msck = r#"MSCK REPAIR TABLE db.table_name ADD PARTITIONS"#; |
| let msck2 = r#"MSCK REPAIR TABLE db.table_name"#; |
| hive().verified_stmt(msck); |
| hive().verified_stmt(msck2); |
| } |
| |
| #[test] |
| fn parse_set() { |
| let set = "SET HIVEVAR:name = a, b, c_d"; |
| hive().verified_stmt(set); |
| } |
| |
| #[test] |
| fn test_spaceship() { |
| let spaceship = "SELECT * FROM db.table WHERE a <=> b"; |
| hive().verified_stmt(spaceship); |
| } |
| |
| #[test] |
| fn parse_with_cte() { |
| let with = "WITH a AS (SELECT * FROM b) INSERT INTO TABLE db.table_table PARTITION (a) SELECT * FROM b"; |
| hive().verified_stmt(with); |
| } |
| |
| #[test] |
| fn drop_table_purge() { |
| let purge = "DROP TABLE db.table_name PURGE"; |
| hive().verified_stmt(purge); |
| } |
| |
| #[test] |
| fn create_table_like() { |
| let like = "CREATE TABLE db.table_name LIKE db.other_table"; |
| hive().verified_stmt(like); |
| } |
| |
| #[test] |
| fn create_table_with_comment() { |
| let sql = concat!( |
| "CREATE TABLE db.table_name (a INT, b STRING)", |
| " COMMENT 'table comment'", |
| " PARTITIONED BY (a INT, b STRING)", |
| " CLUSTERED BY (a, b) SORTED BY (a ASC, b DESC)", |
| " INTO 4 BUCKETS" |
| ); |
| match hive().verified_stmt(sql) { |
| Statement::CreateTable(CreateTable { comment, .. }) => { |
| assert_eq!( |
| comment, |
| Some(CommentDef::AfterColumnDefsWithoutEq( |
| "table comment".to_string() |
| )) |
| ) |
| } |
| _ => unreachable!(), |
| } |
| |
| // negative test case |
| let invalid_sql = concat!( |
| "CREATE TABLE db.table_name (a INT, b STRING)", |
| " PARTITIONED BY (a INT, b STRING)", |
| " COMMENT 'table comment'", |
| ); |
| assert_eq!( |
| hive().parse_sql_statements(invalid_sql).unwrap_err(), |
| ParserError::ParserError("Expected: end of statement, found: COMMENT".to_string()) |
| ); |
| } |
| |
| #[test] |
| fn create_table_with_clustered_by() { |
| let sql = concat!( |
| "CREATE TABLE db.table_name (a INT, b STRING)", |
| " PARTITIONED BY (a INT, b STRING)", |
| " CLUSTERED BY (a, b) SORTED BY (a ASC, b DESC)", |
| " INTO 4 BUCKETS" |
| ); |
| match hive_and_generic().verified_stmt(sql) { |
| Statement::CreateTable(CreateTable { clustered_by, .. }) => { |
| assert_eq!( |
| clustered_by.unwrap(), |
| ClusteredBy { |
| columns: vec![Ident::new("a"), Ident::new("b")], |
| sorted_by: Some(vec![ |
| OrderByExpr { |
| expr: Expr::Identifier(Ident::new("a")), |
| asc: Some(true), |
| nulls_first: None, |
| with_fill: None, |
| }, |
| OrderByExpr { |
| expr: Expr::Identifier(Ident::new("b")), |
| asc: Some(false), |
| nulls_first: None, |
| with_fill: None, |
| }, |
| ]), |
| num_buckets: Value::Number("4".parse().unwrap(), false), |
| } |
| ) |
| } |
| _ => unreachable!(), |
| } |
| |
| // SORTED BY is optional |
| hive_and_generic().verified_stmt("CREATE TABLE db.table_name (a INT, b STRING) PARTITIONED BY (a INT, b STRING) CLUSTERED BY (a, b) INTO 4 BUCKETS"); |
| |
| // missing INTO BUCKETS |
| assert_eq!( |
| hive_and_generic().parse_sql_statements( |
| "CREATE TABLE db.table_name (a INT, b STRING) PARTITIONED BY (a INT, b STRING) CLUSTERED BY (a, b)" |
| ).unwrap_err(), |
| ParserError::ParserError("Expected: INTO, found: EOF".to_string()) |
| ); |
| // missing CLUSTER BY columns |
| assert_eq!( |
| hive_and_generic().parse_sql_statements( |
| "CREATE TABLE db.table_name (a INT, b STRING) PARTITIONED BY (a INT, b STRING) CLUSTERED BY () INTO 4 BUCKETS" |
| ).unwrap_err(), |
| ParserError::ParserError("Expected: identifier, found: )".to_string()) |
| ); |
| // missing SORT BY columns |
| assert_eq!( |
| hive_and_generic().parse_sql_statements( |
| "CREATE TABLE db.table_name (a INT, b STRING) PARTITIONED BY (a INT, b STRING) CLUSTERED BY (a, b) SORTED BY INTO 4 BUCKETS" |
| ).unwrap_err(), |
| ParserError::ParserError("Expected: (, found: INTO".to_string()) |
| ); |
| // missing number BUCKETS |
| assert_eq!( |
| hive_and_generic().parse_sql_statements( |
| "CREATE TABLE db.table_name (a INT, b STRING) PARTITIONED BY (a INT, b STRING) CLUSTERED BY (a, b) SORTED BY (a ASC, b DESC) INTO" |
| ).unwrap_err(), |
| ParserError::ParserError("Expected: a value, found: EOF".to_string()) |
| ); |
| } |
| |
| // Turning off this test until we can parse identifiers starting with numbers :( |
| #[test] |
| fn test_identifier() { |
| let between = "SELECT a AS 3_barrr_asdf FROM db.table_name"; |
| hive().verified_stmt(between); |
| } |
| |
| #[test] |
| fn test_alter_partition() { |
| let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1)"; |
| hive().verified_stmt(alter); |
| } |
| |
| #[test] |
| fn test_alter_with_location() { |
| let alter = |
| "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1) LOCATION 's3://...'"; |
| hive().verified_stmt(alter); |
| } |
| |
| #[test] |
| fn test_alter_with_set_location() { |
| let alter = "ALTER TABLE db.table PARTITION (a = 2) RENAME TO PARTITION (a = 1) SET LOCATION 's3://...'"; |
| hive().verified_stmt(alter); |
| } |
| |
| #[test] |
| fn test_add_partition() { |
| let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (a = 'asdf', b = 2)"; |
| hive().verified_stmt(add); |
| } |
| |
| #[test] |
| fn test_add_multiple_partitions() { |
| let add = "ALTER TABLE db.table ADD IF NOT EXISTS PARTITION (`a` = 'asdf', `b` = 2) PARTITION (`a` = 'asdh', `b` = 3)"; |
| hive().verified_stmt(add); |
| } |
| |
| #[test] |
| fn test_drop_partition() { |
| let drop = "ALTER TABLE db.table DROP PARTITION (a = 1)"; |
| hive().verified_stmt(drop); |
| } |
| |
| #[test] |
| fn test_drop_if_exists() { |
| let drop = "ALTER TABLE db.table DROP IF EXISTS PARTITION (a = 'b', c = 'd')"; |
| hive().verified_stmt(drop); |
| } |
| |
| #[test] |
| fn test_cluster_by() { |
| let cluster = "SELECT a FROM db.table CLUSTER BY a, b"; |
| hive().verified_stmt(cluster); |
| } |
| |
| #[test] |
| fn test_distribute_by() { |
| let cluster = "SELECT a FROM db.table DISTRIBUTE BY a, b"; |
| hive().verified_stmt(cluster); |
| } |
| |
| #[test] |
| fn no_join_condition() { |
| let join = "SELECT a, b FROM db.table_name JOIN a"; |
| hive().verified_stmt(join); |
| } |
| |
| #[test] |
| fn columns_after_partition() { |
| let query = "INSERT INTO db.table_name PARTITION (a, b) (c, d) SELECT a, b, c, d FROM db.table"; |
| hive().verified_stmt(query); |
| } |
| |
| #[test] |
| fn long_numerics() { |
| let query = r#"SELECT MIN(MIN(10, 5), 1L) AS a"#; |
| hive().verified_stmt(query); |
| } |
| |
| #[test] |
| fn decimal_precision() { |
| let query = "SELECT CAST(a AS DECIMAL(18,2)) FROM db.table"; |
| hive().verified_stmt(query); |
| } |
| |
| #[test] |
| fn create_temp_table() { |
| let query = "CREATE TEMPORARY TABLE db.table (a INT NOT NULL)"; |
| let query2 = "CREATE TEMP TABLE db.table (a INT NOT NULL)"; |
| |
| hive().verified_stmt(query); |
| hive().one_statement_parses_to(query2, query); |
| } |
| |
| #[test] |
| fn create_delimited_table() { |
| let query = "CREATE TABLE tab (cola STRING, colb BIGINT) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ESCAPED BY '\"' MAP KEYS TERMINATED BY '\"'"; |
| hive().verified_stmt(query); |
| } |
| |
| #[test] |
| fn create_local_directory() { |
| let query = |
| "INSERT OVERWRITE LOCAL DIRECTORY '/home/blah' STORED AS TEXTFILE SELECT * FROM db.table"; |
| hive().verified_stmt(query); |
| } |
| |
| #[test] |
| fn lateral_view() { |
| let view = "SELECT a FROM db.table LATERAL VIEW explode(a) t AS j, P LATERAL VIEW OUTER explode(a) t AS a, b WHERE a = 1"; |
| hive().verified_stmt(view); |
| } |
| |
| #[test] |
| fn sort_by() { |
| let sort_by = "SELECT * FROM db.table SORT BY a"; |
| hive().verified_stmt(sort_by); |
| } |
| |
| #[test] |
| fn rename_table() { |
| let rename = "ALTER TABLE db.table_name RENAME TO db.table_2"; |
| hive().verified_stmt(rename); |
| } |
| |
| #[test] |
| fn map_access() { |
| let rename = r#"SELECT a.b["asdf"] FROM db.table WHERE a = 2"#; |
| hive().verified_stmt(rename); |
| } |
| |
| #[test] |
| fn from_cte() { |
| let rename = |
| "WITH cte AS (SELECT * FROM a.b) FROM cte INSERT INTO TABLE a.b PARTITION (a) SELECT *"; |
| println!("{}", hive().verified_stmt(rename)); |
| } |
| |
| #[test] |
| fn set_statement_with_minus() { |
| assert_eq!( |
| hive().verified_stmt("SET hive.tez.java.opts = -Xmx4g"), |
| Statement::SetVariable { |
| local: false, |
| hivevar: false, |
| variables: OneOrManyWithParens::One(ObjectName(vec![ |
| Ident::new("hive"), |
| Ident::new("tez"), |
| Ident::new("java"), |
| Ident::new("opts") |
| ])), |
| value: vec![Expr::UnaryOp { |
| op: UnaryOperator::Minus, |
| expr: Box::new(Expr::Identifier(Ident::new("Xmx4g"))) |
| }], |
| } |
| ); |
| |
| assert_eq!( |
| hive().parse_sql_statements("SET hive.tez.java.opts = -"), |
| Err(ParserError::ParserError( |
| "Expected: variable value, found: EOF".to_string() |
| )) |
| ) |
| } |
| |
| #[test] |
| fn parse_create_function() { |
| let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR 'hdfs://somewhere.com:8020/very/far'"; |
| match hive().verified_stmt(sql) { |
| Statement::CreateFunction { |
| temporary, |
| name, |
| function_body, |
| using, |
| .. |
| } => { |
| assert!(temporary); |
| assert_eq!(name.to_string(), "mydb.myfunc"); |
| assert_eq!( |
| function_body, |
| Some(CreateFunctionBody::AsBeforeOptions(Expr::Value( |
| Value::SingleQuotedString("org.random.class.Name".to_string()) |
| ))) |
| ); |
| assert_eq!( |
| using, |
| Some(CreateFunctionUsing::Jar( |
| "hdfs://somewhere.com:8020/very/far".to_string() |
| )), |
| ) |
| } |
| _ => unreachable!(), |
| } |
| |
| // Test error in dialect that doesn't support parsing CREATE FUNCTION |
| let unsupported_dialects = TestedDialects { |
| dialects: vec![Box::new(MsSqlDialect {})], |
| options: None, |
| }; |
| |
| assert_eq!( |
| unsupported_dialects.parse_sql_statements(sql).unwrap_err(), |
| ParserError::ParserError( |
| "Expected: an object type after CREATE, found: FUNCTION".to_string() |
| ) |
| ); |
| |
| let sql = "CREATE TEMPORARY FUNCTION mydb.myfunc AS 'org.random.class.Name' USING JAR"; |
| assert_eq!( |
| hive().parse_sql_statements(sql).unwrap_err(), |
| ParserError::ParserError("Expected: literal string, found: EOF".to_string()), |
| ); |
| } |
| |
| #[test] |
| fn filter_as_alias() { |
| let sql = "SELECT name filter FROM region"; |
| let expected = "SELECT name AS filter FROM region"; |
| println!("{}", hive().one_statement_parses_to(sql, expected)); |
| } |
| |
| #[test] |
| fn parse_delimited_identifiers() { |
| // check that quoted identifiers in any position remain quoted after serialization |
| let select = hive().verified_only_select( |
| r#"SELECT "alias"."bar baz", "myfun"(), "simple id" AS "column alias" FROM "a table" AS "alias""#, |
| ); |
| // check FROM |
| match only(select.from).relation { |
| TableFactor::Table { |
| name, |
| alias, |
| args, |
| with_hints, |
| version, |
| with_ordinality: _, |
| partitions: _, |
| } => { |
| assert_eq!(vec![Ident::with_quote('"', "a table")], name.0); |
| assert_eq!(Ident::with_quote('"', "alias"), alias.unwrap().name); |
| assert!(args.is_none()); |
| assert!(with_hints.is_empty()); |
| assert!(version.is_none()); |
| } |
| _ => panic!("Expecting TableFactor::Table"), |
| } |
| // check SELECT |
| assert_eq!(3, select.projection.len()); |
| assert_eq!( |
| &Expr::CompoundIdentifier(vec![ |
| Ident::with_quote('"', "alias"), |
| Ident::with_quote('"', "bar baz"), |
| ]), |
| expr_from_projection(&select.projection[0]), |
| ); |
| assert_eq!( |
| &Expr::Function(Function { |
| name: ObjectName(vec![Ident::with_quote('"', "myfun")]), |
| parameters: FunctionArguments::None, |
| args: FunctionArguments::List(FunctionArgumentList { |
| duplicate_treatment: None, |
| args: vec![], |
| clauses: vec![], |
| }), |
| null_treatment: None, |
| filter: None, |
| over: None, |
| within_group: vec![], |
| }), |
| expr_from_projection(&select.projection[1]), |
| ); |
| match &select.projection[2] { |
| SelectItem::ExprWithAlias { expr, alias } => { |
| assert_eq!(&Expr::Identifier(Ident::with_quote('"', "simple id")), expr); |
| assert_eq!(&Ident::with_quote('"', "column alias"), alias); |
| } |
| _ => panic!("Expected: ExprWithAlias"), |
| } |
| |
| hive().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#); |
| hive().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY KEY (baz)"#); |
| //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#); |
| } |
| |
| #[test] |
| fn parse_use() { |
| let valid_object_names = ["mydb", "SCHEMA", "DATABASE", "CATALOG", "WAREHOUSE"]; |
| let quote_styles = ['\'', '"', '`']; |
| for object_name in &valid_object_names { |
| // Test single identifier without quotes |
| assert_eq!( |
| hive().verified_stmt(&format!("USE {}", object_name)), |
| Statement::Use(Use::Object(ObjectName(vec![Ident::new( |
| object_name.to_string() |
| )]))) |
| ); |
| for "e in "e_styles { |
| // Test single identifier with different type of quotes |
| assert_eq!( |
| hive().verified_stmt(&format!("USE {}{}{}", quote, object_name, quote)), |
| Statement::Use(Use::Object(ObjectName(vec![Ident::with_quote( |
| quote, |
| object_name.to_string(), |
| )]))) |
| ); |
| } |
| } |
| // Test DEFAULT keyword that is special case in Hive |
| assert_eq!( |
| hive().verified_stmt("USE DEFAULT"), |
| Statement::Use(Use::Default) |
| ); |
| } |
| |
| fn hive() -> TestedDialects { |
| TestedDialects { |
| dialects: vec![Box::new(HiveDialect {})], |
| options: None, |
| } |
| } |
| |
| fn hive_and_generic() -> TestedDialects { |
| TestedDialects { |
| dialects: vec![Box::new(HiveDialect {}), Box::new(GenericDialect {})], |
| options: None, |
| } |
| } |