Add support of the ENUM8|ENUM16 for ClickHouse dialect (#1574)
diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs
index ccca7f4..5b0239e 100644
--- a/src/ast/data_type.rs
+++ b/src/ast/data_type.rs
@@ -25,10 +25,21 @@
#[cfg(feature = "visitor")]
use sqlparser_derive::{Visit, VisitMut};
-use crate::ast::{display_comma_separated, ObjectName, StructField, UnionField};
+use crate::ast::{display_comma_separated, Expr, ObjectName, StructField, UnionField};
use super::{value::escape_single_quote_string, ColumnDef};
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum EnumMember {
+ Name(String),
+ /// ClickHouse allows to specify an integer value for each enum value.
+ ///
+ /// [clickhouse](https://clickhouse.com/docs/en/sql-reference/data-types/enum)
+ NamedValue(String, Expr),
+}
+
/// SQL data types
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
@@ -334,7 +345,7 @@
/// [clickhouse]: https://clickhouse.com/docs/en/sql-reference/data-types/nested-data-structures/nested
Nested(Vec<ColumnDef>),
/// Enums
- Enum(Vec<String>),
+ Enum(Vec<EnumMember>, Option<u8>),
/// Set
Set(Vec<String>),
/// Struct
@@ -546,13 +557,24 @@
write!(f, "{}({})", ty, modifiers.join(", "))
}
}
- DataType::Enum(vals) => {
- write!(f, "ENUM(")?;
+ DataType::Enum(vals, bits) => {
+ match bits {
+ Some(bits) => write!(f, "ENUM{}", bits),
+ None => write!(f, "ENUM"),
+ }?;
+ write!(f, "(")?;
for (i, v) in vals.iter().enumerate() {
if i != 0 {
write!(f, ", ")?;
}
- write!(f, "'{}'", escape_single_quote_string(v))?;
+ match v {
+ EnumMember::Name(name) => {
+ write!(f, "'{}'", escape_single_quote_string(name))?
+ }
+ EnumMember::NamedValue(name, value) => {
+ write!(f, "'{}' = {}", escape_single_quote_string(name), value)?
+ }
+ }
}
write!(f, ")")
}
diff --git a/src/ast/mod.rs b/src/ast/mod.rs
index 326375b..f782b36 100644
--- a/src/ast/mod.rs
+++ b/src/ast/mod.rs
@@ -40,7 +40,7 @@
use crate::tokenizer::Span;
pub use self::data_type::{
- ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, ExactNumberInfo,
+ ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumMember, ExactNumberInfo,
StructBracketKind, TimezoneInfo,
};
pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue, Use};
diff --git a/src/keywords.rs b/src/keywords.rs
index e00e26a..be3910f 100644
--- a/src/keywords.rs
+++ b/src/keywords.rs
@@ -286,6 +286,8 @@
ENFORCED,
ENGINE,
ENUM,
+ ENUM16,
+ ENUM8,
EPHEMERAL,
EPOCH,
EQUALS,
diff --git a/src/parser/mod.rs b/src/parser/mod.rs
index 7b175f1..04a103c 100644
--- a/src/parser/mod.rs
+++ b/src/parser/mod.rs
@@ -1049,18 +1049,18 @@
| Keyword::CURRENT_USER
| Keyword::SESSION_USER
| Keyword::USER
- if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
- {
- Ok(Some(Expr::Function(Function {
- name: ObjectName(vec![w.to_ident(w_span)]),
- parameters: FunctionArguments::None,
- args: FunctionArguments::None,
- null_treatment: None,
- filter: None,
- over: None,
- within_group: vec![],
- })))
- }
+ if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
+ {
+ Ok(Some(Expr::Function(Function {
+ name: ObjectName(vec![w.to_ident(w_span)]),
+ parameters: FunctionArguments::None,
+ args: FunctionArguments::None,
+ null_treatment: None,
+ filter: None,
+ over: None,
+ within_group: vec![],
+ })))
+ }
Keyword::CURRENT_TIMESTAMP
| Keyword::CURRENT_TIME
| Keyword::CURRENT_DATE
@@ -1075,18 +1075,18 @@
Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)),
Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)),
Keyword::EXISTS
- // Support parsing Databricks has a function named `exists`.
- if !dialect_of!(self is DatabricksDialect)
- || matches!(
+ // Support parsing Databricks has a function named `exists`.
+ if !dialect_of!(self is DatabricksDialect)
+ || matches!(
self.peek_nth_token(1).token,
Token::Word(Word {
keyword: Keyword::SELECT | Keyword::WITH,
..
})
) =>
- {
- Ok(Some(self.parse_exists_expr(false)?))
- }
+ {
+ Ok(Some(self.parse_exists_expr(false)?))
+ }
Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)),
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
@@ -1103,22 +1103,22 @@
Ok(Some(self.parse_array_expr(true)?))
}
Keyword::ARRAY
- if self.peek_token() == Token::LParen
- && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
- {
- self.expect_token(&Token::LParen)?;
- let query = self.parse_query()?;
- self.expect_token(&Token::RParen)?;
- Ok(Some(Expr::Function(Function {
- name: ObjectName(vec![w.to_ident(w_span)]),
- parameters: FunctionArguments::None,
- args: FunctionArguments::Subquery(query),
- filter: None,
- null_treatment: None,
- over: None,
- within_group: vec![],
- })))
- }
+ if self.peek_token() == Token::LParen
+ && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) =>
+ {
+ self.expect_token(&Token::LParen)?;
+ let query = self.parse_query()?;
+ self.expect_token(&Token::RParen)?;
+ Ok(Some(Expr::Function(Function {
+ name: ObjectName(vec![w.to_ident(w_span)]),
+ parameters: FunctionArguments::None,
+ args: FunctionArguments::Subquery(query),
+ filter: None,
+ null_treatment: None,
+ over: None,
+ within_group: vec![],
+ })))
+ }
Keyword::NOT => Ok(Some(self.parse_not()?)),
Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
Ok(Some(self.parse_match_against()?))
@@ -5023,7 +5023,7 @@
return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}")))
}
}
- },
+ }
};
Ok(owner)
}
@@ -7997,6 +7997,23 @@
}
}
+ pub fn parse_enum_values(&mut self) -> Result<Vec<EnumMember>, ParserError> {
+ self.expect_token(&Token::LParen)?;
+ let values = self.parse_comma_separated(|parser| {
+ let name = parser.parse_literal_string()?;
+ let e = if parser.consume_token(&Token::Eq) {
+ let value = parser.parse_number()?;
+ EnumMember::NamedValue(name, value)
+ } else {
+ EnumMember::Name(name)
+ };
+ Ok(e)
+ })?;
+ self.expect_token(&Token::RParen)?;
+
+ Ok(values)
+ }
+
/// Parse a SQL datatype (in the context of a CREATE TABLE statement for example)
pub fn parse_data_type(&mut self) -> Result<DataType, ParserError> {
let (ty, trailing_bracket) = self.parse_data_type_helper()?;
@@ -8235,7 +8252,9 @@
Keyword::BIGDECIMAL => Ok(DataType::BigDecimal(
self.parse_exact_number_optional_precision_scale()?,
)),
- Keyword::ENUM => Ok(DataType::Enum(self.parse_string_values()?)),
+ Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)),
+ Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))),
+ Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))),
Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)),
Keyword::ARRAY => {
if dialect_of!(self is SnowflakeDialect) {
diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs
index e802238..61c742d 100644
--- a/tests/sqlparser_common.rs
+++ b/tests/sqlparser_common.rs
@@ -51,6 +51,7 @@
use pretty_assertions::assert_eq;
use sqlparser::ast::ColumnOption::Comment;
use sqlparser::ast::Expr::{Identifier, UnaryOp};
+use sqlparser::ast::Value::Number;
use sqlparser::test_utils::all_dialects_except;
#[test]
@@ -9250,7 +9251,7 @@
format!(
"CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) {sql}",
)
- .as_str()
+ .as_str()
),
Statement::Cache {
table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
@@ -9275,7 +9276,7 @@
format!(
"CACHE {table_flag} TABLE '{cache_table_name}' OPTIONS('K1' = 'V1', 'K2' = 0.88) AS {sql}",
)
- .as_str()
+ .as_str()
),
Statement::Cache {
table_flag: Some(ObjectName(vec![Ident::new(table_flag)])),
@@ -11459,7 +11460,7 @@
}),
},
];
- run_explain_analyze (
+ run_explain_analyze(
all_dialects_where(|d| d.supports_explain_with_utility_options()),
"EXPLAIN (ANALYZE, VERBOSE true, WAL OFF, FORMAT YAML, USER_DEF_NUM -100.1) SELECT sqrt(id) FROM foo",
false,
@@ -12459,3 +12460,83 @@
_ => unreachable!(),
}
}
+
+#[test]
+fn parse_create_table_with_enum_types() {
+ let sql = "CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = 2), bar ENUM16('a' = 1, 'b' = 2), baz ENUM('a', 'b'))";
+ match all_dialects().verified_stmt(sql) {
+ Statement::CreateTable(CreateTable { name, columns, .. }) => {
+ assert_eq!(name.to_string(), "t0");
+ assert_eq!(
+ vec![
+ ColumnDef {
+ name: Ident::new("foo"),
+ data_type: DataType::Enum(
+ vec![
+ EnumMember::NamedValue(
+ "a".to_string(),
+ Expr::Value(Number("1".parse().unwrap(), false))
+ ),
+ EnumMember::NamedValue(
+ "b".to_string(),
+ Expr::Value(Number("2".parse().unwrap(), false))
+ )
+ ],
+ Some(8)
+ ),
+ collation: None,
+ options: vec![],
+ },
+ ColumnDef {
+ name: Ident::new("bar"),
+ data_type: DataType::Enum(
+ vec![
+ EnumMember::NamedValue(
+ "a".to_string(),
+ Expr::Value(Number("1".parse().unwrap(), false))
+ ),
+ EnumMember::NamedValue(
+ "b".to_string(),
+ Expr::Value(Number("2".parse().unwrap(), false))
+ )
+ ],
+ Some(16)
+ ),
+ collation: None,
+ options: vec![],
+ },
+ ColumnDef {
+ name: Ident::new("baz"),
+ data_type: DataType::Enum(
+ vec![
+ EnumMember::Name("a".to_string()),
+ EnumMember::Name("b".to_string())
+ ],
+ None
+ ),
+ collation: None,
+ options: vec![],
+ }
+ ],
+ columns
+ );
+ }
+ _ => unreachable!(),
+ }
+
+ // invalid case missing value for enum pair
+ assert_eq!(
+ all_dialects()
+ .parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 'b' = ))")
+ .unwrap_err(),
+ ParserError::ParserError("Expected: a value, found: )".to_string())
+ );
+
+ // invalid case that name is not a string
+ assert_eq!(
+ all_dialects()
+ .parse_sql_statements("CREATE TABLE t0 (foo ENUM8('a' = 1, 2))")
+ .unwrap_err(),
+ ParserError::ParserError("Expected: literal string, found: 2".to_string())
+ );
+}
diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs
index f7a21f9..cac1af8 100644
--- a/tests/sqlparser_mysql.rs
+++ b/tests/sqlparser_mysql.rs
@@ -685,7 +685,7 @@
#[test]
fn parse_create_table_primary_and_unique_key() {
let sqls = ["UNIQUE KEY", "PRIMARY KEY"]
- .map(|key_ty|format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
+ .map(|key_ty| format!("CREATE TABLE foo (id INT PRIMARY KEY AUTO_INCREMENT, bar INT NOT NULL, CONSTRAINT bar_key {key_ty} (bar))"));
let index_type_display = [Some(KeyOrIndexDisplay::Key), None];
@@ -753,7 +753,7 @@
#[test]
fn parse_create_table_primary_and_unique_key_with_index_options() {
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
- .map(|key_ty|format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
+ .map(|key_ty| format!("CREATE TABLE foo (bar INT, var INT, CONSTRAINT constr {key_ty} index_name (bar, var) USING HASH COMMENT 'yes, ' USING BTREE COMMENT 'MySQL allows')"));
let index_type_display = [Some(KeyOrIndexDisplay::Index), None];
@@ -827,7 +827,7 @@
#[test]
fn parse_create_table_primary_and_unique_key_characteristic_test() {
let sqls = ["UNIQUE INDEX", "PRIMARY KEY"]
- .map(|key_ty|format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
+ .map(|key_ty| format!("CREATE TABLE x (y INT, CONSTRAINT constr {key_ty} (y) NOT DEFERRABLE INITIALLY IMMEDIATE)"));
for sql in &sqls {
mysql_and_generic().verified_stmt(sql);
}
@@ -890,7 +890,13 @@
},
ColumnDef {
name: Ident::new("baz"),
- data_type: DataType::Enum(vec!["a".to_string(), "b".to_string()]),
+ data_type: DataType::Enum(
+ vec![
+ EnumMember::Name("a".to_string()),
+ EnumMember::Name("b".to_string())
+ ],
+ None
+ ),
collation: None,
options: vec![],
}