| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| //! Logical Expressions: [`Expr`] |
| |
| use std::cmp::Ordering; |
| use std::collections::HashSet; |
| use std::fmt::{self, Display, Formatter, Write}; |
| use std::hash::{Hash, Hasher}; |
| use std::mem; |
| use std::sync::Arc; |
| |
| use crate::expr_fn::binary_expr; |
| use crate::function::WindowFunctionSimplification; |
| use crate::logical_plan::Subquery; |
| use crate::{AggregateUDF, Volatility}; |
| use crate::{ExprSchemable, Operator, Signature, WindowFrame, WindowUDF}; |
| |
| use arrow::datatypes::{DataType, Field, FieldRef}; |
| use datafusion_common::cse::{HashNode, NormalizeEq, Normalizeable}; |
| use datafusion_common::tree_node::{ |
| Transformed, TransformedResult, TreeNode, TreeNodeContainer, TreeNodeRecursion, |
| }; |
| use datafusion_common::{ |
| Column, DFSchema, HashMap, Result, ScalarValue, Spans, TableReference, |
| }; |
| use datafusion_functions_window_common::field::WindowUDFFieldArgs; |
| #[cfg(feature = "sql")] |
| use sqlparser::ast::{ |
| ExceptSelectItem, ExcludeSelectItem, IlikeSelectItem, RenameSelectItem, |
| ReplaceSelectElement, |
| }; |
| |
| // Moved in 51.0.0 to datafusion_common |
| pub use datafusion_common::metadata::FieldMetadata; |
| use datafusion_common::metadata::ScalarAndMetadata; |
| |
| // This mirrors sqlparser::ast::NullTreatment but we need our own variant |
| // for when the sql feature is disabled. |
| #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Ord, PartialOrd)] |
| pub enum NullTreatment { |
| IgnoreNulls, |
| RespectNulls, |
| } |
| |
| impl Display for NullTreatment { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| f.write_str(match self { |
| NullTreatment::IgnoreNulls => "IGNORE NULLS", |
| NullTreatment::RespectNulls => "RESPECT NULLS", |
| }) |
| } |
| } |
| |
| #[cfg(feature = "sql")] |
| impl From<sqlparser::ast::NullTreatment> for NullTreatment { |
| fn from(value: sqlparser::ast::NullTreatment) -> Self { |
| match value { |
| sqlparser::ast::NullTreatment::IgnoreNulls => Self::IgnoreNulls, |
| sqlparser::ast::NullTreatment::RespectNulls => Self::RespectNulls, |
| } |
| } |
| } |
| |
| /// Represents logical expressions such as `A + 1`, or `CAST(c1 AS int)`. |
| /// |
| /// For example the expression `A + 1` will be represented as |
| /// |
| ///```text |
| /// BinaryExpr { |
| /// left: Expr::Column("A"), |
| /// op: Operator::Plus, |
| /// right: Expr::Literal(ScalarValue::Int32(Some(1)), None) |
| /// } |
| /// ``` |
| /// |
| /// # Creating Expressions |
| /// |
| /// `Expr`s can be created directly, but it is often easier and less verbose to |
| /// use the fluent APIs in [`crate::expr_fn`] such as [`col`] and [`lit`], or |
| /// methods such as [`Expr::alias`], [`Expr::cast_to`], and [`Expr::Like`]). |
| /// |
| /// See also [`ExprFunctionExt`] for creating aggregate and window functions. |
| /// |
| /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt |
| /// |
| /// # Printing Expressions |
| /// |
| /// You can print `Expr`s using the `Debug` trait, `Display` trait, or |
| /// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below. |
| /// |
| /// If you need SQL to pass to other systems, consider using [`Unparser`]. |
| /// |
| /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html |
| /// |
| /// # Schema Access |
| /// |
| /// See [`ExprSchemable::get_type`] to access the [`DataType`] and nullability |
| /// of an `Expr`. |
| /// |
| /// # Visiting and Rewriting `Expr`s |
| /// |
| /// The `Expr` struct implements the [`TreeNode`] trait for walking and |
| /// rewriting expressions. For example [`TreeNode::apply`] recursively visits an |
| /// `Expr` and [`TreeNode::transform`] can be used to rewrite an expression. See |
| /// the examples below and [`TreeNode`] for more information. |
| /// |
| /// # Examples: Creating and Using `Expr`s |
| /// |
| /// ## Column References and Literals |
| /// |
| /// [`Expr::Column`] refer to the values of columns and are often created with |
| /// the [`col`] function. For example to create an expression `c1` referring to |
| /// column named "c1": |
| /// |
| /// [`col`]: crate::expr_fn::col |
| /// |
| /// ``` |
| /// # use datafusion_common::Column; |
| /// # use datafusion_expr::{lit, col, Expr}; |
| /// let expr = col("c1"); |
| /// assert_eq!(expr, Expr::Column(Column::from_name("c1"))); |
| /// ``` |
| /// |
| /// [`Expr::Literal`] refer to literal, or constant, values. These are created |
| /// with the [`lit`] function. For example to create an expression `42`: |
| /// |
| /// [`lit`]: crate::lit |
| /// |
| /// ``` |
| /// # use datafusion_common::{Column, ScalarValue}; |
| /// # use datafusion_expr::{lit, col, Expr}; |
| /// // All literals are strongly typed in DataFusion. To make an `i64` 42: |
| /// let expr = lit(42i64); |
| /// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None)); |
| /// assert_eq!(expr, Expr::Literal(ScalarValue::Int64(Some(42)), None)); |
| /// // To make a (typed) NULL: |
| /// let expr = Expr::Literal(ScalarValue::Int64(None), None); |
| /// // to make an (untyped) NULL (the optimizer will coerce this to the correct type): |
| /// let expr = lit(ScalarValue::Null); |
| /// ``` |
| /// |
| /// ## Binary Expressions |
| /// |
| /// Exprs implement traits that allow easy to understand construction of more |
| /// complex expressions. For example, to create `c1 + c2` to add columns "c1" and |
| /// "c2" together |
| /// |
| /// ``` |
| /// # use datafusion_expr::{lit, col, Operator, Expr}; |
| /// // Use the `+` operator to add two columns together |
| /// let expr = col("c1") + col("c2"); |
| /// assert!(matches!(expr, Expr::BinaryExpr { .. })); |
| /// if let Expr::BinaryExpr(binary_expr) = expr { |
| /// assert_eq!(*binary_expr.left, col("c1")); |
| /// assert_eq!(*binary_expr.right, col("c2")); |
| /// assert_eq!(binary_expr.op, Operator::Plus); |
| /// } |
| /// ``` |
| /// |
| /// The expression `c1 = 42` to compares the value in column "c1" to the |
| /// literal value `42`: |
| /// |
| /// ``` |
| /// # use datafusion_common::ScalarValue; |
| /// # use datafusion_expr::{lit, col, Operator, Expr}; |
| /// let expr = col("c1").eq(lit(42_i32)); |
| /// assert!(matches!(expr, Expr::BinaryExpr { .. })); |
| /// if let Expr::BinaryExpr(binary_expr) = expr { |
| /// assert_eq!(*binary_expr.left, col("c1")); |
| /// let scalar = ScalarValue::Int32(Some(42)); |
| /// assert_eq!(*binary_expr.right, Expr::Literal(scalar, None)); |
| /// assert_eq!(binary_expr.op, Operator::Eq); |
| /// } |
| /// ``` |
| /// |
| /// Here is how to implement the equivalent of `SELECT *` to select all |
| /// [`Expr::Column`] from a [`DFSchema`]'s columns: |
| /// |
| /// ``` |
| /// # use arrow::datatypes::{DataType, Field, Schema}; |
| /// # use datafusion_common::{DFSchema, Column}; |
| /// # use datafusion_expr::Expr; |
| /// // Create a schema c1(int, c2 float) |
| /// let arrow_schema = Schema::new(vec![ |
| /// Field::new("c1", DataType::Int32, false), |
| /// Field::new("c2", DataType::Float64, false), |
| /// ]); |
| /// // DFSchema is a an Arrow schema with optional relation name |
| /// let df_schema = DFSchema::try_from_qualified_schema("t1", &arrow_schema).unwrap(); |
| /// |
| /// // Form Vec<Expr> with an expression for each column in the schema |
| /// let exprs: Vec<_> = df_schema.iter().map(Expr::from).collect(); |
| /// |
| /// assert_eq!( |
| /// exprs, |
| /// vec![ |
| /// Expr::from(Column::from_qualified_name("t1.c1")), |
| /// Expr::from(Column::from_qualified_name("t1.c2")), |
| /// ] |
| /// ); |
| /// ``` |
| /// |
| /// # Examples: Displaying `Exprs` |
| /// |
| /// There are three ways to print an `Expr` depending on the usecase. |
| /// |
| /// ## Use `Debug` trait |
| /// |
| /// Following Rust conventions, the `Debug` implementation prints out the |
| /// internal structure of the expression, which is useful for debugging. |
| /// |
| /// ``` |
| /// # use datafusion_expr::{lit, col}; |
| /// let expr = col("c1") + lit(42); |
| /// assert_eq!(format!("{expr:?}"), "BinaryExpr(BinaryExpr { left: Column(Column { relation: None, name: \"c1\" }), op: Plus, right: Literal(Int32(42), None) })"); |
| /// ``` |
| /// |
| /// ## Use the `Display` trait (detailed expression) |
| /// |
| /// The `Display` implementation prints out the expression in a SQL-like form, |
| /// but has additional details such as the data type of literals. This is useful |
| /// for understanding the expression in more detail and is used for the low level |
| /// [`ExplainFormat::Indent`] explain plan format. |
| /// |
| /// [`ExplainFormat::Indent`]: crate::logical_plan::ExplainFormat::Indent |
| /// |
| /// ``` |
| /// # use datafusion_expr::{lit, col}; |
| /// let expr = col("c1") + lit(42); |
| /// assert_eq!(format!("{expr}"), "c1 + Int32(42)"); |
| /// ``` |
| /// |
| /// ## Use [`Self::human_display`] (human readable) |
| /// |
| /// [`Self::human_display`] prints out the expression in a SQL-like form, optimized |
| /// for human consumption by end users. It is used for the |
| /// [`ExplainFormat::Tree`] explain plan format. |
| /// |
| /// [`ExplainFormat::Tree`]: crate::logical_plan::ExplainFormat::Tree |
| /// |
| ///``` |
| /// # use datafusion_expr::{lit, col}; |
| /// let expr = col("c1") + lit(42); |
| /// assert_eq!(format!("{}", expr.human_display()), "c1 + 42"); |
| /// ``` |
| /// |
| /// # Examples: Visiting and Rewriting `Expr`s |
| /// |
| /// Here is an example that finds all literals in an `Expr` tree: |
| /// ``` |
| /// # use std::collections::{HashSet}; |
| /// use datafusion_common::ScalarValue; |
| /// # use datafusion_expr::{col, Expr, lit}; |
| /// use datafusion_common::tree_node::{TreeNode, TreeNodeRecursion}; |
| /// // Expression a = 5 AND b = 6 |
| /// let expr = col("a").eq(lit(5)) & col("b").eq(lit(6)); |
| /// // find all literals in a HashMap |
| /// let mut scalars = HashSet::new(); |
| /// // apply recursively visits all nodes in the expression tree |
| /// expr.apply(|e| { |
| /// if let Expr::Literal(scalar, _) = e { |
| /// scalars.insert(scalar); |
| /// } |
| /// // The return value controls whether to continue visiting the tree |
| /// Ok(TreeNodeRecursion::Continue) |
| /// }) |
| /// .unwrap(); |
| /// // All subtrees have been visited and literals found |
| /// assert_eq!(scalars.len(), 2); |
| /// assert!(scalars.contains(&ScalarValue::Int32(Some(5)))); |
| /// assert!(scalars.contains(&ScalarValue::Int32(Some(6)))); |
| /// ``` |
| /// |
| /// Rewrite an expression, replacing references to column "a" in an |
| /// to the literal `42`: |
| /// |
| /// ``` |
| /// # use datafusion_common::tree_node::{Transformed, TreeNode}; |
| /// # use datafusion_expr::{col, Expr, lit}; |
| /// // expression a = 5 AND b = 6 |
| /// let expr = col("a").eq(lit(5)).and(col("b").eq(lit(6))); |
| /// // rewrite all references to column "a" to the literal 42 |
| /// let rewritten = expr.transform(|e| { |
| /// if let Expr::Column(c) = &e { |
| /// if &c.name == "a" { |
| /// // return Transformed::yes to indicate the node was changed |
| /// return Ok(Transformed::yes(lit(42))) |
| /// } |
| /// } |
| /// // return Transformed::no to indicate the node was not changed |
| /// Ok(Transformed::no(e)) |
| /// }).unwrap(); |
| /// // The expression has been rewritten |
| /// assert!(rewritten.transformed); |
| /// // to 42 = 5 AND b = 6 |
| /// assert_eq!(rewritten.data, lit(42).eq(lit(5)).and(col("b").eq(lit(6)))); |
| #[derive(Clone, PartialEq, PartialOrd, Eq, Debug, Hash)] |
| pub enum Expr { |
| /// An expression with a specific name. |
| Alias(Alias), |
| /// A named reference to a qualified field in a schema. |
| Column(Column), |
| /// A named reference to a variable in a registry. |
| ScalarVariable(FieldRef, Vec<String>), |
| /// A constant value along with associated [`FieldMetadata`]. |
| Literal(ScalarValue, Option<FieldMetadata>), |
| /// A binary expression such as "age > 21" |
| BinaryExpr(BinaryExpr), |
| /// LIKE expression |
| Like(Like), |
| /// LIKE expression that uses regular expressions |
| SimilarTo(Like), |
| /// Negation of an expression. The expression's type must be a boolean to make sense. |
| Not(Box<Expr>), |
| /// True if argument is not NULL, false otherwise. This expression itself is never NULL. |
| IsNotNull(Box<Expr>), |
| /// True if argument is NULL, false otherwise. This expression itself is never NULL. |
| IsNull(Box<Expr>), |
| /// True if argument is true, false otherwise. This expression itself is never NULL. |
| IsTrue(Box<Expr>), |
| /// True if argument is false, false otherwise. This expression itself is never NULL. |
| IsFalse(Box<Expr>), |
| /// True if argument is NULL, false otherwise. This expression itself is never NULL. |
| IsUnknown(Box<Expr>), |
| /// True if argument is FALSE or NULL, false otherwise. This expression itself is never NULL. |
| IsNotTrue(Box<Expr>), |
| /// True if argument is TRUE OR NULL, false otherwise. This expression itself is never NULL. |
| IsNotFalse(Box<Expr>), |
| /// True if argument is TRUE or FALSE, false otherwise. This expression itself is never NULL. |
| IsNotUnknown(Box<Expr>), |
| /// arithmetic negation of an expression, the operand must be of a signed numeric data type |
| Negative(Box<Expr>), |
| /// Whether an expression is between a given range. |
| Between(Between), |
| /// A CASE expression (see docs on [`Case`]) |
| Case(Case), |
| /// Casts the expression to a given type and will return a runtime error if the expression cannot be cast. |
| /// This expression is guaranteed to have a fixed type. |
| Cast(Cast), |
| /// Casts the expression to a given type and will return a null value if the expression cannot be cast. |
| /// This expression is guaranteed to have a fixed type. |
| TryCast(TryCast), |
| /// Call a scalar function with a set of arguments. |
| ScalarFunction(ScalarFunction), |
| /// Calls an aggregate function with arguments, and optional |
| /// `ORDER BY`, `FILTER`, `DISTINCT` and `NULL TREATMENT`. |
| /// |
| /// See also [`ExprFunctionExt`] to set these fields. |
| /// |
| /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt |
| AggregateFunction(AggregateFunction), |
| /// Call a window function with a set of arguments. |
| WindowFunction(Box<WindowFunction>), |
| /// Returns whether the list contains the expr value. |
| InList(InList), |
| /// EXISTS subquery |
| Exists(Exists), |
| /// IN subquery |
| InSubquery(InSubquery), |
| /// Set comparison subquery (e.g. `= ANY`, `> ALL`) |
| SetComparison(SetComparison), |
| /// Scalar subquery |
| ScalarSubquery(Subquery), |
| /// Represents a reference to all available fields in a specific schema, |
| /// with an optional (schema) qualifier. |
| /// |
| /// This expr has to be resolved to a list of columns before translating logical |
| /// plan into physical plan. |
| #[deprecated( |
| since = "46.0.0", |
| note = "A wildcard needs to be resolved to concrete expressions when constructing the logical plan. See https://github.com/apache/datafusion/issues/7765" |
| )] |
| Wildcard { |
| qualifier: Option<TableReference>, |
| options: Box<WildcardOptions>, |
| }, |
| /// List of grouping set expressions. Only valid in the context of an aggregate |
| /// GROUP BY expression list |
| GroupingSet(GroupingSet), |
| /// A place holder for parameters in a prepared statement |
| /// (e.g. `$foo` or `$1`) |
| Placeholder(Placeholder), |
| /// A placeholder which holds a reference to a qualified field |
| /// in the outer query, used for correlated sub queries. |
| OuterReferenceColumn(FieldRef, Column), |
| /// Unnest expression |
| Unnest(Unnest), |
| } |
| |
| impl Default for Expr { |
| fn default() -> Self { |
| Expr::Literal(ScalarValue::Null, None) |
| } |
| } |
| |
| impl AsRef<Expr> for Expr { |
| fn as_ref(&self) -> &Expr { |
| self |
| } |
| } |
| |
| /// Create an [`Expr`] from a [`Column`] |
| impl From<Column> for Expr { |
| fn from(value: Column) -> Self { |
| Expr::Column(value) |
| } |
| } |
| |
| /// Create an [`Expr`] from a [`WindowFunction`] |
| impl From<WindowFunction> for Expr { |
| fn from(value: WindowFunction) -> Self { |
| Expr::WindowFunction(Box::new(value)) |
| } |
| } |
| |
| /// Create an [`Expr`] from an [`ScalarAndMetadata`] |
| impl From<ScalarAndMetadata> for Expr { |
| fn from(value: ScalarAndMetadata) -> Self { |
| let (value, metadata) = value.into_inner(); |
| Expr::Literal(value, metadata) |
| } |
| } |
| |
| /// Create an [`Expr`] from an optional qualifier and a [`FieldRef`]. This is |
| /// useful for creating [`Expr`] from a [`DFSchema`]. |
| /// |
| /// See example on [`Expr`] |
| impl<'a> From<(Option<&'a TableReference>, &'a FieldRef)> for Expr { |
| fn from(value: (Option<&'a TableReference>, &'a FieldRef)) -> Self { |
| Expr::from(Column::from(value)) |
| } |
| } |
| |
| impl<'a> TreeNodeContainer<'a, Self> for Expr { |
| fn apply_elements<F: FnMut(&'a Self) -> Result<TreeNodeRecursion>>( |
| &'a self, |
| mut f: F, |
| ) -> Result<TreeNodeRecursion> { |
| f(self) |
| } |
| |
| fn map_elements<F: FnMut(Self) -> Result<Transformed<Self>>>( |
| self, |
| mut f: F, |
| ) -> Result<Transformed<Self>> { |
| f(self) |
| } |
| } |
| |
| /// The metadata used in [`Field::metadata`]. |
| /// |
| /// This represents the metadata associated with an Arrow [`Field`]. The metadata consists of key-value pairs. |
| /// |
| /// # Common Use Cases |
| /// |
| /// Field metadata is commonly used to store: |
| /// - Default values for columns when data is missing |
| /// - Column descriptions or documentation |
| /// - Data lineage information |
| /// - Custom application-specific annotations |
| /// - Encoding hints or display formatting preferences |
| /// |
| /// # Example: Storing Default Values |
| /// |
| /// A practical example of using field metadata is storing default values for columns |
| /// that may be missing in the physical data but present in the logical schema. |
| /// See the [default_column_values.rs] example implementation. |
| /// |
| /// [default_column_values.rs]: https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/custom_data_source/default_column_values.rs |
| pub type SchemaFieldMetadata = std::collections::HashMap<String, String>; |
| |
| /// Intersects multiple metadata instances for UNION operations. |
| /// |
| /// This function implements the intersection strategy used by UNION operations, |
| /// where only metadata keys that exist in ALL inputs with identical values |
| /// are preserved in the result. |
| /// |
| /// # Union Metadata Behavior |
| /// |
| /// Union operations require consistent metadata across all branches: |
| /// - Only metadata keys present in ALL union branches are kept |
| /// - For each kept key, the value must be identical across all branches |
| /// - If a key has different values across branches, it is excluded from the result |
| /// - If any input has no metadata, the result will be empty |
| /// |
| /// # Arguments |
| /// |
| /// * `metadatas` - An iterator of `SchemaFieldMetadata` instances to intersect |
| /// |
| /// # Returns |
| /// |
| /// A new `SchemaFieldMetadata` containing only the intersected metadata |
| pub fn intersect_metadata_for_union<'a>( |
| metadatas: impl IntoIterator<Item = &'a SchemaFieldMetadata>, |
| ) -> SchemaFieldMetadata { |
| let mut metadatas = metadatas.into_iter(); |
| let Some(mut intersected) = metadatas.next().cloned() else { |
| return Default::default(); |
| }; |
| |
| for metadata in metadatas { |
| // Only keep keys that exist in both with the same value |
| intersected.retain(|k, v| metadata.get(k) == Some(v)); |
| } |
| |
| intersected |
| } |
| |
| /// UNNEST expression. |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Unnest { |
| pub expr: Box<Expr>, |
| } |
| |
| impl Unnest { |
| /// Create a new Unnest expression. |
| pub fn new(expr: Expr) -> Self { |
| Self { |
| expr: Box::new(expr), |
| } |
| } |
| |
| /// Create a new Unnest expression. |
| pub fn new_boxed(boxed: Box<Expr>) -> Self { |
| Self { expr: boxed } |
| } |
| } |
| |
| /// Alias expression |
| #[derive(Clone, PartialEq, Eq, Debug)] |
| pub struct Alias { |
| pub expr: Box<Expr>, |
| pub relation: Option<TableReference>, |
| pub name: String, |
| pub metadata: Option<FieldMetadata>, |
| } |
| |
| impl Hash for Alias { |
| fn hash<H: Hasher>(&self, state: &mut H) { |
| self.expr.hash(state); |
| self.relation.hash(state); |
| self.name.hash(state); |
| } |
| } |
| |
| impl PartialOrd for Alias { |
| fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
| let cmp = self.expr.partial_cmp(&other.expr); |
| let Some(Ordering::Equal) = cmp else { |
| return cmp; |
| }; |
| let cmp = self.relation.partial_cmp(&other.relation); |
| let Some(Ordering::Equal) = cmp else { |
| return cmp; |
| }; |
| self.name |
| .partial_cmp(&other.name) |
| // TODO (https://github.com/apache/datafusion/issues/17477) avoid recomparing all fields |
| .filter(|cmp| *cmp != Ordering::Equal || self == other) |
| } |
| } |
| |
| impl Alias { |
| /// Create an alias with an optional schema/field qualifier. |
| pub fn new( |
| expr: Expr, |
| relation: Option<impl Into<TableReference>>, |
| name: impl Into<String>, |
| ) -> Self { |
| Self { |
| expr: Box::new(expr), |
| relation: relation.map(|r| r.into()), |
| name: name.into(), |
| metadata: None, |
| } |
| } |
| |
| pub fn with_metadata(mut self, metadata: Option<FieldMetadata>) -> Self { |
| self.metadata = metadata; |
| self |
| } |
| } |
| |
| /// Binary expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct BinaryExpr { |
| /// Left-hand side of the expression |
| pub left: Box<Expr>, |
| /// The comparison operator |
| pub op: Operator, |
| /// Right-hand side of the expression |
| pub right: Box<Expr>, |
| } |
| |
| impl BinaryExpr { |
| /// Create a new binary expression |
| pub fn new(left: Box<Expr>, op: Operator, right: Box<Expr>) -> Self { |
| Self { left, op, right } |
| } |
| } |
| |
| impl Display for BinaryExpr { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| // Put parentheses around child binary expressions so that we can see the difference |
| // between `(a OR b) AND c` and `a OR (b AND c)`. We only insert parentheses when needed, |
| // based on operator precedence. For example, `(a AND b) OR c` and `a AND b OR c` are |
| // equivalent and the parentheses are not necessary. |
| |
| fn write_child( |
| f: &mut Formatter<'_>, |
| expr: &Expr, |
| precedence: u8, |
| ) -> fmt::Result { |
| match expr { |
| Expr::BinaryExpr(child) => { |
| let p = child.op.precedence(); |
| if p == 0 || p < precedence { |
| write!(f, "({child})")?; |
| } else { |
| write!(f, "{child}")?; |
| } |
| } |
| _ => write!(f, "{expr}")?, |
| } |
| Ok(()) |
| } |
| |
| let precedence = self.op.precedence(); |
| write_child(f, self.left.as_ref(), precedence)?; |
| write!(f, " {} ", self.op)?; |
| write_child(f, self.right.as_ref(), precedence) |
| } |
| } |
| |
| /// CASE expression |
| /// |
| /// The CASE expression is similar to a series of nested if/else and there are two forms that |
| /// can be used. The first form consists of a series of boolean "when" expressions with |
| /// corresponding "then" expressions, and an optional "else" expression. |
| /// |
| /// ```text |
| /// CASE WHEN condition THEN result |
| /// [WHEN ...] |
| /// [ELSE result] |
| /// END |
| /// ``` |
| /// |
| /// The second form uses a base expression and then a series of "when" clauses that match on a |
| /// literal value. |
| /// |
| /// ```text |
| /// CASE expression |
| /// WHEN value THEN result |
| /// [WHEN ...] |
| /// [ELSE result] |
| /// END |
| /// ``` |
| #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Hash)] |
| pub struct Case { |
| /// Optional base expression that can be compared to literal values in the "when" expressions |
| pub expr: Option<Box<Expr>>, |
| /// One or more when/then expressions |
| pub when_then_expr: Vec<(Box<Expr>, Box<Expr>)>, |
| /// Optional "else" expression |
| pub else_expr: Option<Box<Expr>>, |
| } |
| |
| impl Case { |
| /// Create a new Case expression |
| pub fn new( |
| expr: Option<Box<Expr>>, |
| when_then_expr: Vec<(Box<Expr>, Box<Expr>)>, |
| else_expr: Option<Box<Expr>>, |
| ) -> Self { |
| Self { |
| expr, |
| when_then_expr, |
| else_expr, |
| } |
| } |
| } |
| |
| /// LIKE expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Like { |
| pub negated: bool, |
| pub expr: Box<Expr>, |
| pub pattern: Box<Expr>, |
| pub escape_char: Option<char>, |
| /// Whether to ignore case on comparing |
| pub case_insensitive: bool, |
| } |
| |
| impl Like { |
| /// Create a new Like expression |
| pub fn new( |
| negated: bool, |
| expr: Box<Expr>, |
| pattern: Box<Expr>, |
| escape_char: Option<char>, |
| case_insensitive: bool, |
| ) -> Self { |
| Self { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| case_insensitive, |
| } |
| } |
| } |
| |
| /// BETWEEN expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Between { |
| /// The value to compare |
| pub expr: Box<Expr>, |
| /// Whether the expression is negated |
| pub negated: bool, |
| /// The low end of the range |
| pub low: Box<Expr>, |
| /// The high end of the range |
| pub high: Box<Expr>, |
| } |
| |
| impl Between { |
| /// Create a new Between expression |
| pub fn new(expr: Box<Expr>, negated: bool, low: Box<Expr>, high: Box<Expr>) -> Self { |
| Self { |
| expr, |
| negated, |
| low, |
| high, |
| } |
| } |
| } |
| |
| /// Invoke a [`ScalarUDF`] with a set of arguments |
| /// |
| /// [`ScalarUDF`]: crate::ScalarUDF |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct ScalarFunction { |
| /// The function |
| pub func: Arc<crate::ScalarUDF>, |
| /// List of expressions to feed to the functions as arguments |
| pub args: Vec<Expr>, |
| } |
| |
| impl ScalarFunction { |
| // return the Function's name |
| pub fn name(&self) -> &str { |
| self.func.name() |
| } |
| } |
| |
| impl ScalarFunction { |
| /// Create a new `ScalarFunction` from a [`ScalarUDF`] |
| /// |
| /// [`ScalarUDF`]: crate::ScalarUDF |
| pub fn new_udf(udf: Arc<crate::ScalarUDF>, args: Vec<Expr>) -> Self { |
| Self { func: udf, args } |
| } |
| } |
| |
| /// Access a sub field of a nested type, such as `Field` or `List` |
| #[derive(Clone, PartialEq, Eq, Hash, Debug)] |
| pub enum GetFieldAccess { |
| /// Named field, for example `struct["name"]` |
| NamedStructField { name: ScalarValue }, |
| /// Single list index, for example: `list[i]` |
| ListIndex { key: Box<Expr> }, |
| /// List stride, for example `list[i:j:k]` |
| ListRange { |
| start: Box<Expr>, |
| stop: Box<Expr>, |
| stride: Box<Expr>, |
| }, |
| } |
| |
| /// Cast expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Cast { |
| /// The expression being cast |
| pub expr: Box<Expr>, |
| /// The `DataType` the expression will yield |
| pub data_type: DataType, |
| } |
| |
| impl Cast { |
| /// Create a new Cast expression |
| pub fn new(expr: Box<Expr>, data_type: DataType) -> Self { |
| Self { expr, data_type } |
| } |
| } |
| |
| /// TryCast Expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct TryCast { |
| /// The expression being cast |
| pub expr: Box<Expr>, |
| /// The `DataType` the expression will yield |
| pub data_type: DataType, |
| } |
| |
| impl TryCast { |
| /// Create a new TryCast expression |
| pub fn new(expr: Box<Expr>, data_type: DataType) -> Self { |
| Self { expr, data_type } |
| } |
| } |
| |
| /// SORT expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Sort { |
| /// The expression to sort on |
| pub expr: Expr, |
| /// The direction of the sort |
| pub asc: bool, |
| /// Whether to put Nulls before all other data values |
| pub nulls_first: bool, |
| } |
| |
| impl Sort { |
| /// Create a new Sort expression |
| pub fn new(expr: Expr, asc: bool, nulls_first: bool) -> Self { |
| Self { |
| expr, |
| asc, |
| nulls_first, |
| } |
| } |
| |
| /// Create a new Sort expression with the opposite sort direction |
| pub fn reverse(&self) -> Self { |
| Self { |
| expr: self.expr.clone(), |
| asc: !self.asc, |
| nulls_first: !self.nulls_first, |
| } |
| } |
| |
| /// Replaces the Sort expressions with `expr` |
| pub fn with_expr(&self, expr: Expr) -> Self { |
| Self { |
| expr, |
| asc: self.asc, |
| nulls_first: self.nulls_first, |
| } |
| } |
| } |
| |
| impl Display for Sort { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| write!(f, "{}", self.expr)?; |
| if self.asc { |
| write!(f, " ASC")?; |
| } else { |
| write!(f, " DESC")?; |
| } |
| if self.nulls_first { |
| write!(f, " NULLS FIRST")?; |
| } else { |
| write!(f, " NULLS LAST")?; |
| } |
| Ok(()) |
| } |
| } |
| |
| impl<'a> TreeNodeContainer<'a, Expr> for Sort { |
| fn apply_elements<F: FnMut(&'a Expr) -> Result<TreeNodeRecursion>>( |
| &'a self, |
| f: F, |
| ) -> Result<TreeNodeRecursion> { |
| self.expr.apply_elements(f) |
| } |
| |
| fn map_elements<F: FnMut(Expr) -> Result<Transformed<Expr>>>( |
| self, |
| f: F, |
| ) -> Result<Transformed<Self>> { |
| self.expr |
| .map_elements(f)? |
| .map_data(|expr| Ok(Self { expr, ..self })) |
| } |
| } |
| |
| /// Aggregate function |
| /// |
| /// See also [`ExprFunctionExt`] to set these fields on `Expr` |
| /// |
| /// [`ExprFunctionExt`]: crate::expr_fn::ExprFunctionExt |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct AggregateFunction { |
| /// Name of the function |
| pub func: Arc<AggregateUDF>, |
| pub params: AggregateFunctionParams, |
| } |
| |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct AggregateFunctionParams { |
| pub args: Vec<Expr>, |
| /// Whether this is a DISTINCT aggregation or not |
| pub distinct: bool, |
| /// Optional filter |
| pub filter: Option<Box<Expr>>, |
| /// Optional ordering |
| pub order_by: Vec<Sort>, |
| pub null_treatment: Option<NullTreatment>, |
| } |
| |
| impl AggregateFunction { |
| /// Create a new AggregateFunction expression with a user-defined function (UDF) |
| pub fn new_udf( |
| func: Arc<AggregateUDF>, |
| args: Vec<Expr>, |
| distinct: bool, |
| filter: Option<Box<Expr>>, |
| order_by: Vec<Sort>, |
| null_treatment: Option<NullTreatment>, |
| ) -> Self { |
| Self { |
| func, |
| params: AggregateFunctionParams { |
| args, |
| distinct, |
| filter, |
| order_by, |
| null_treatment, |
| }, |
| } |
| } |
| } |
| |
| /// A function used as a SQL window function |
| /// |
| /// In SQL, you can use: |
| /// - Actual window functions ([`WindowUDF`]) |
| /// - Normal aggregate functions ([`AggregateUDF`]) |
| #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Hash)] |
| pub enum WindowFunctionDefinition { |
| /// A user defined aggregate function |
| AggregateUDF(Arc<AggregateUDF>), |
| /// A user defined window function |
| WindowUDF(Arc<WindowUDF>), |
| } |
| |
| impl WindowFunctionDefinition { |
| /// Returns the datatype of the window function |
| pub fn return_field( |
| &self, |
| input_expr_fields: &[FieldRef], |
| display_name: &str, |
| ) -> Result<FieldRef> { |
| match self { |
| WindowFunctionDefinition::AggregateUDF(fun) => { |
| fun.return_field(input_expr_fields) |
| } |
| WindowFunctionDefinition::WindowUDF(fun) => { |
| fun.field(WindowUDFFieldArgs::new(input_expr_fields, display_name)) |
| } |
| } |
| } |
| |
| /// The signatures supported by the function `fun`. |
| pub fn signature(&self) -> Signature { |
| match self { |
| WindowFunctionDefinition::AggregateUDF(fun) => fun.signature().clone(), |
| WindowFunctionDefinition::WindowUDF(fun) => fun.signature().clone(), |
| } |
| } |
| |
| /// Function's name for display |
| pub fn name(&self) -> &str { |
| match self { |
| WindowFunctionDefinition::WindowUDF(fun) => fun.name(), |
| WindowFunctionDefinition::AggregateUDF(fun) => fun.name(), |
| } |
| } |
| |
| /// Return the inner window simplification function, if any |
| /// |
| /// See [`WindowFunctionSimplification`] for more information |
| pub fn simplify(&self) -> Option<WindowFunctionSimplification> { |
| match self { |
| WindowFunctionDefinition::AggregateUDF(_) => None, |
| WindowFunctionDefinition::WindowUDF(udwf) => udwf.simplify(), |
| } |
| } |
| } |
| |
| impl Display for WindowFunctionDefinition { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| match self { |
| WindowFunctionDefinition::AggregateUDF(fun) => Display::fmt(fun, f), |
| WindowFunctionDefinition::WindowUDF(fun) => Display::fmt(fun, f), |
| } |
| } |
| } |
| |
| impl From<Arc<AggregateUDF>> for WindowFunctionDefinition { |
| fn from(value: Arc<AggregateUDF>) -> Self { |
| Self::AggregateUDF(value) |
| } |
| } |
| |
| impl From<Arc<WindowUDF>> for WindowFunctionDefinition { |
| fn from(value: Arc<WindowUDF>) -> Self { |
| Self::WindowUDF(value) |
| } |
| } |
| |
| /// Window function |
| /// |
| /// Holds the actual function to call [`WindowFunction`] as well as its |
| /// arguments (`args`) and the contents of the `OVER` clause: |
| /// |
| /// 1. `PARTITION BY` |
| /// 2. `ORDER BY` |
| /// 3. Window frame (e.g. `ROWS 1 PRECEDING AND 1 FOLLOWING`) |
| /// |
| /// See [`ExprFunctionExt`] for examples of how to create a `WindowFunction`. |
| /// |
| /// [`ExprFunctionExt`]: crate::ExprFunctionExt |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct WindowFunction { |
| /// Name of the function |
| pub fun: WindowFunctionDefinition, |
| pub params: WindowFunctionParams, |
| } |
| |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct WindowFunctionParams { |
| /// List of expressions to feed to the functions as arguments |
| pub args: Vec<Expr>, |
| /// List of partition by expressions |
| pub partition_by: Vec<Expr>, |
| /// List of order by expressions |
| pub order_by: Vec<Sort>, |
| /// Window frame |
| pub window_frame: WindowFrame, |
| /// Optional filter expression (FILTER (WHERE ...)) |
| pub filter: Option<Box<Expr>>, |
| /// Specifies how NULL value is treated: ignore or respect |
| pub null_treatment: Option<NullTreatment>, |
| /// Distinct flag |
| pub distinct: bool, |
| } |
| |
| impl WindowFunction { |
| /// Create a new Window expression with the specified argument an |
| /// empty `OVER` clause |
| pub fn new(fun: impl Into<WindowFunctionDefinition>, args: Vec<Expr>) -> Self { |
| Self { |
| fun: fun.into(), |
| params: WindowFunctionParams { |
| args, |
| partition_by: Vec::default(), |
| order_by: Vec::default(), |
| window_frame: WindowFrame::new(None), |
| filter: None, |
| null_treatment: None, |
| distinct: false, |
| }, |
| } |
| } |
| |
| /// Return the inner window simplification function, if any |
| /// |
| /// See [`WindowFunctionSimplification`] for more information |
| pub fn simplify(&self) -> Option<WindowFunctionSimplification> { |
| self.fun.simplify() |
| } |
| } |
| |
| /// EXISTS expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Exists { |
| /// Subquery that will produce a single column of data |
| pub subquery: Subquery, |
| /// Whether the expression is negated |
| pub negated: bool, |
| } |
| |
| impl Exists { |
| // Create a new Exists expression. |
| pub fn new(subquery: Subquery, negated: bool) -> Self { |
| Self { subquery, negated } |
| } |
| } |
| |
| /// Whether the set comparison uses `ANY`/`SOME` or `ALL` |
| #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub enum SetQuantifier { |
| /// `ANY` (or `SOME`) |
| Any, |
| /// `ALL` |
| All, |
| } |
| |
| impl Display for SetQuantifier { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| match self { |
| SetQuantifier::Any => write!(f, "ANY"), |
| SetQuantifier::All => write!(f, "ALL"), |
| } |
| } |
| } |
| |
| /// Set comparison subquery (e.g. `= ANY`, `> ALL`) |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct SetComparison { |
| /// The expression to compare |
| pub expr: Box<Expr>, |
| /// Subquery that will produce a single column of data to compare against |
| pub subquery: Subquery, |
| /// Comparison operator (e.g. `=`, `>`, `<`) |
| pub op: Operator, |
| /// Quantifier (`ANY`/`ALL`) |
| pub quantifier: SetQuantifier, |
| } |
| |
| impl SetComparison { |
| /// Create a new set comparison expression |
| pub fn new( |
| expr: Box<Expr>, |
| subquery: Subquery, |
| op: Operator, |
| quantifier: SetQuantifier, |
| ) -> Self { |
| Self { |
| expr, |
| subquery, |
| op, |
| quantifier, |
| } |
| } |
| } |
| |
| /// InList expression |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct InList { |
| /// The expression to compare |
| pub expr: Box<Expr>, |
| /// The list of values to compare against |
| pub list: Vec<Expr>, |
| /// Whether the expression is negated |
| pub negated: bool, |
| } |
| |
| impl InList { |
| /// Create a new InList expression |
| pub fn new(expr: Box<Expr>, list: Vec<Expr>, negated: bool) -> Self { |
| Self { |
| expr, |
| list, |
| negated, |
| } |
| } |
| } |
| |
| /// IN subquery |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct InSubquery { |
| /// The expression to compare |
| pub expr: Box<Expr>, |
| /// Subquery that will produce a single column of data to compare against |
| pub subquery: Subquery, |
| /// Whether the expression is negated |
| pub negated: bool, |
| } |
| |
| impl InSubquery { |
| /// Create a new InSubquery expression |
| pub fn new(expr: Box<Expr>, subquery: Subquery, negated: bool) -> Self { |
| Self { |
| expr, |
| subquery, |
| negated, |
| } |
| } |
| } |
| |
| /// Placeholder, representing bind parameter values such as `$1` or `$name`. |
| /// |
| /// The type of these parameters is inferred using [`Expr::infer_placeholder_types`] |
| /// or can be specified directly using `PREPARE` statements. |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub struct Placeholder { |
| /// The identifier of the parameter, including the leading `$` (e.g, `"$1"` or `"$foo"`) |
| pub id: String, |
| /// The type the parameter will be filled in with |
| pub field: Option<FieldRef>, |
| } |
| |
| impl Placeholder { |
| /// Create a new Placeholder expression |
| #[deprecated(since = "51.0.0", note = "Use new_with_field instead")] |
| pub fn new(id: String, data_type: Option<DataType>) -> Self { |
| Self { |
| id, |
| field: data_type.map(|dt| Arc::new(Field::new("", dt, true))), |
| } |
| } |
| |
| /// Create a new Placeholder expression from a Field |
| pub fn new_with_field(id: String, field: Option<FieldRef>) -> Self { |
| Self { id, field } |
| } |
| } |
| |
| /// Grouping sets |
| /// |
| /// See <https://www.postgresql.org/docs/current/queries-table-expressions.html#QUERIES-GROUPING-SETS> |
| /// for Postgres definition. |
| /// See <https://spark.apache.org/docs/latest/sql-ref-syntax-qry-select-groupby.html> |
| /// for Apache Spark definition. |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| pub enum GroupingSet { |
| /// Rollup grouping sets |
| Rollup(Vec<Expr>), |
| /// Cube grouping sets |
| Cube(Vec<Expr>), |
| /// User-defined grouping sets |
| GroupingSets(Vec<Vec<Expr>>), |
| } |
| |
| impl GroupingSet { |
| /// Return all distinct exprs in the grouping set. For `CUBE` and `ROLLUP` this |
| /// is just the underlying list of exprs. For `GROUPING SET` we need to deduplicate |
| /// the exprs in the underlying sets. |
| pub fn distinct_expr(&self) -> Vec<&Expr> { |
| match self { |
| GroupingSet::Rollup(exprs) | GroupingSet::Cube(exprs) => { |
| exprs.iter().collect() |
| } |
| GroupingSet::GroupingSets(groups) => { |
| let mut exprs: Vec<&Expr> = vec![]; |
| for exp in groups.iter().flatten() { |
| if !exprs.contains(&exp) { |
| exprs.push(exp); |
| } |
| } |
| exprs |
| } |
| } |
| } |
| } |
| |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| #[cfg(not(feature = "sql"))] |
| pub struct IlikeSelectItem { |
| pub pattern: String, |
| } |
| #[cfg(not(feature = "sql"))] |
| impl Display for IlikeSelectItem { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "ILIKE '{}'", &self.pattern)?; |
| Ok(()) |
| } |
| } |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| #[cfg(not(feature = "sql"))] |
| pub enum ExcludeSelectItem { |
| Single(Ident), |
| Multiple(Vec<Ident>), |
| } |
| #[cfg(not(feature = "sql"))] |
| impl Display for ExcludeSelectItem { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "EXCLUDE")?; |
| match self { |
| Self::Single(column) => { |
| write!(f, " {column}")?; |
| } |
| Self::Multiple(columns) => { |
| write!(f, " ({})", display_comma_separated(columns))?; |
| } |
| } |
| Ok(()) |
| } |
| } |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| #[cfg(not(feature = "sql"))] |
| pub struct ExceptSelectItem { |
| pub first_element: Ident, |
| pub additional_elements: Vec<Ident>, |
| } |
| #[cfg(not(feature = "sql"))] |
| impl Display for ExceptSelectItem { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "EXCEPT ")?; |
| if self.additional_elements.is_empty() { |
| write!(f, "({})", self.first_element)?; |
| } else { |
| write!( |
| f, |
| "({}, {})", |
| self.first_element, |
| display_comma_separated(&self.additional_elements) |
| )?; |
| } |
| Ok(()) |
| } |
| } |
| |
| pub fn display_comma_separated<T>(slice: &[T]) -> String |
| where |
| T: Display, |
| { |
| use itertools::Itertools; |
| slice.iter().map(|v| format!("{v}")).join(", ") |
| } |
| |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| #[cfg(not(feature = "sql"))] |
| pub enum RenameSelectItem { |
| Single(String), |
| Multiple(Vec<String>), |
| } |
| #[cfg(not(feature = "sql"))] |
| impl Display for RenameSelectItem { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "RENAME")?; |
| match self { |
| Self::Single(column) => { |
| write!(f, " {column}")?; |
| } |
| Self::Multiple(columns) => { |
| write!(f, " ({})", display_comma_separated(columns))?; |
| } |
| } |
| Ok(()) |
| } |
| } |
| |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| #[cfg(not(feature = "sql"))] |
| pub struct Ident { |
| /// The value of the identifier without quotes. |
| pub value: String, |
| /// The starting quote if any. Valid quote characters are the single quote, |
| /// double quote, backtick, and opening square bracket. |
| pub quote_style: Option<char>, |
| /// The span of the identifier in the original SQL string. |
| pub span: String, |
| } |
| #[cfg(not(feature = "sql"))] |
| impl Display for Ident { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "[{}]", self.value) |
| } |
| } |
| |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug)] |
| #[cfg(not(feature = "sql"))] |
| pub struct ReplaceSelectElement { |
| pub expr: String, |
| pub column_name: Ident, |
| pub as_keyword: bool, |
| } |
| #[cfg(not(feature = "sql"))] |
| impl Display for ReplaceSelectElement { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| if self.as_keyword { |
| write!(f, "{} AS {}", self.expr, self.column_name) |
| } else { |
| write!(f, "{} {}", self.expr, self.column_name) |
| } |
| } |
| } |
| |
| /// Additional options for wildcards, e.g. Snowflake `EXCLUDE`/`RENAME` and Bigquery `EXCEPT`. |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)] |
| pub struct WildcardOptions { |
| /// `[ILIKE...]`. |
| /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
| pub ilike: Option<IlikeSelectItem>, |
| /// `[EXCLUDE...]`. |
| /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
| pub exclude: Option<ExcludeSelectItem>, |
| /// `[EXCEPT...]`. |
| /// BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_except> |
| /// Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#except> |
| pub except: Option<ExceptSelectItem>, |
| /// `[REPLACE]` |
| /// BigQuery syntax: <https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace> |
| /// Clickhouse syntax: <https://clickhouse.com/docs/en/sql-reference/statements/select#replace> |
| /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
| pub replace: Option<PlannedReplaceSelectItem>, |
| /// `[RENAME ...]`. |
| /// Snowflake syntax: <https://docs.snowflake.com/en/sql-reference/sql/select#parameters> |
| pub rename: Option<RenameSelectItem>, |
| } |
| |
| impl WildcardOptions { |
| pub fn with_replace(self, replace: PlannedReplaceSelectItem) -> Self { |
| WildcardOptions { |
| ilike: self.ilike, |
| exclude: self.exclude, |
| except: self.except, |
| replace: Some(replace), |
| rename: self.rename, |
| } |
| } |
| } |
| |
| impl Display for WildcardOptions { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| if let Some(ilike) = &self.ilike { |
| write!(f, " {ilike}")?; |
| } |
| if let Some(exclude) = &self.exclude { |
| write!(f, " {exclude}")?; |
| } |
| if let Some(except) = &self.except { |
| write!(f, " {except}")?; |
| } |
| if let Some(replace) = &self.replace { |
| write!(f, " {replace}")?; |
| } |
| if let Some(rename) = &self.rename { |
| write!(f, " {rename}")?; |
| } |
| Ok(()) |
| } |
| } |
| |
| /// The planned expressions for `REPLACE` |
| #[derive(Clone, PartialEq, Eq, PartialOrd, Hash, Debug, Default)] |
| pub struct PlannedReplaceSelectItem { |
| /// The original ast nodes |
| pub items: Vec<ReplaceSelectElement>, |
| /// The expression planned from the ast nodes. They will be used when expanding the wildcard. |
| pub planned_expressions: Vec<Expr>, |
| } |
| |
| impl Display for PlannedReplaceSelectItem { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| write!(f, "REPLACE")?; |
| write!(f, " ({})", display_comma_separated(&self.items))?; |
| Ok(()) |
| } |
| } |
| |
| impl PlannedReplaceSelectItem { |
| pub fn items(&self) -> &[ReplaceSelectElement] { |
| &self.items |
| } |
| |
| pub fn expressions(&self) -> &[Expr] { |
| &self.planned_expressions |
| } |
| } |
| |
| impl Expr { |
| /// The name of the column (field) that this `Expr` will produce. |
| /// |
| /// For example, for a projection (e.g. `SELECT <expr>`) the resulting arrow |
| /// [`Schema`] will have a field with this name. |
| /// |
| /// Note that the resulting string is subtlety different from the `Display` |
| /// representation for certain `Expr`. Some differences: |
| /// |
| /// 1. [`Expr::Alias`], which shows only the alias itself |
| /// 2. [`Expr::Cast`] / [`Expr::TryCast`], which only displays the expression |
| /// |
| /// # Example |
| /// ``` |
| /// # use datafusion_expr::{col, lit}; |
| /// let expr = col("foo").eq(lit(42)); |
| /// assert_eq!("foo = Int32(42)", expr.schema_name().to_string()); |
| /// |
| /// let expr = col("foo").alias("bar").eq(lit(11)); |
| /// assert_eq!("bar = Int32(11)", expr.schema_name().to_string()); |
| /// ``` |
| /// |
| /// [`Schema`]: arrow::datatypes::Schema |
| pub fn schema_name(&self) -> impl Display + '_ { |
| SchemaDisplay(self) |
| } |
| |
| /// Human readable display formatting for this expression. |
| /// |
| /// This function is primarily used in printing the explain tree output, |
| /// (e.g. `EXPLAIN FORMAT TREE <query>`), providing a readable format to |
| /// show how expressions are used in physical and logical plans. See the |
| /// [`Expr`] for other ways to format expressions |
| /// |
| /// Note this format is intended for human consumption rather than SQL for |
| /// other systems. If you need SQL to pass to other systems, consider using |
| /// [`Unparser`]. |
| /// |
| /// [`Unparser`]: https://docs.rs/datafusion/latest/datafusion/sql/unparser/struct.Unparser.html |
| /// |
| /// # Example |
| /// ``` |
| /// # use datafusion_expr::{col, lit}; |
| /// let expr = col("foo") + lit(42); |
| /// // For EXPLAIN output: |
| /// // "foo + 42" |
| /// println!("{}", expr.human_display()); |
| /// ``` |
| pub fn human_display(&self) -> impl Display + '_ { |
| SqlDisplay(self) |
| } |
| |
| /// Returns the qualifier and the schema name of this expression. |
| /// |
| /// Used when the expression forms the output field of a certain plan. |
| /// The result is the field's qualifier and field name in the plan's |
| /// output schema. We can use this qualified name to reference the field. |
| pub fn qualified_name(&self) -> (Option<TableReference>, String) { |
| match self { |
| Expr::Column(Column { |
| relation, |
| name, |
| spans: _, |
| }) => (relation.clone(), name.clone()), |
| Expr::Alias(Alias { relation, name, .. }) => (relation.clone(), name.clone()), |
| _ => (None, self.schema_name().to_string()), |
| } |
| } |
| |
| /// Return String representation of the variant represented by `self` |
| /// Useful for non-rust based bindings |
| pub fn variant_name(&self) -> &str { |
| match self { |
| Expr::AggregateFunction { .. } => "AggregateFunction", |
| Expr::Alias(..) => "Alias", |
| Expr::Between { .. } => "Between", |
| Expr::BinaryExpr { .. } => "BinaryExpr", |
| Expr::Case { .. } => "Case", |
| Expr::Cast { .. } => "Cast", |
| Expr::Column(..) => "Column", |
| Expr::OuterReferenceColumn(_, _) => "Outer", |
| Expr::Exists { .. } => "Exists", |
| Expr::GroupingSet(..) => "GroupingSet", |
| Expr::InList { .. } => "InList", |
| Expr::InSubquery(..) => "InSubquery", |
| Expr::SetComparison(..) => "SetComparison", |
| Expr::IsNotNull(..) => "IsNotNull", |
| Expr::IsNull(..) => "IsNull", |
| Expr::Like { .. } => "Like", |
| Expr::SimilarTo { .. } => "RLike", |
| Expr::IsTrue(..) => "IsTrue", |
| Expr::IsFalse(..) => "IsFalse", |
| Expr::IsUnknown(..) => "IsUnknown", |
| Expr::IsNotTrue(..) => "IsNotTrue", |
| Expr::IsNotFalse(..) => "IsNotFalse", |
| Expr::IsNotUnknown(..) => "IsNotUnknown", |
| Expr::Literal(..) => "Literal", |
| Expr::Negative(..) => "Negative", |
| Expr::Not(..) => "Not", |
| Expr::Placeholder(_) => "Placeholder", |
| Expr::ScalarFunction(..) => "ScalarFunction", |
| Expr::ScalarSubquery { .. } => "ScalarSubquery", |
| Expr::ScalarVariable(..) => "ScalarVariable", |
| Expr::TryCast { .. } => "TryCast", |
| Expr::WindowFunction { .. } => "WindowFunction", |
| #[expect(deprecated)] |
| Expr::Wildcard { .. } => "Wildcard", |
| Expr::Unnest { .. } => "Unnest", |
| } |
| } |
| |
| /// Return `self == other` |
| pub fn eq(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::Eq, other) |
| } |
| |
| /// Return `self != other` |
| pub fn not_eq(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::NotEq, other) |
| } |
| |
| /// Return `self > other` |
| pub fn gt(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::Gt, other) |
| } |
| |
| /// Return `self >= other` |
| pub fn gt_eq(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::GtEq, other) |
| } |
| |
| /// Return `self < other` |
| pub fn lt(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::Lt, other) |
| } |
| |
| /// Return `self <= other` |
| pub fn lt_eq(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::LtEq, other) |
| } |
| |
| /// Return `self && other` |
| pub fn and(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::And, other) |
| } |
| |
| /// Return `self || other` |
| pub fn or(self, other: Expr) -> Expr { |
| binary_expr(self, Operator::Or, other) |
| } |
| |
| /// Return `self LIKE other` |
| pub fn like(self, other: Expr) -> Expr { |
| Expr::Like(Like::new( |
| false, |
| Box::new(self), |
| Box::new(other), |
| None, |
| false, |
| )) |
| } |
| |
| /// Return `self NOT LIKE other` |
| pub fn not_like(self, other: Expr) -> Expr { |
| Expr::Like(Like::new( |
| true, |
| Box::new(self), |
| Box::new(other), |
| None, |
| false, |
| )) |
| } |
| |
| /// Return `self ILIKE other` |
| pub fn ilike(self, other: Expr) -> Expr { |
| Expr::Like(Like::new( |
| false, |
| Box::new(self), |
| Box::new(other), |
| None, |
| true, |
| )) |
| } |
| |
| /// Return `self NOT ILIKE other` |
| pub fn not_ilike(self, other: Expr) -> Expr { |
| Expr::Like(Like::new(true, Box::new(self), Box::new(other), None, true)) |
| } |
| |
| /// Return the name to use for the specific Expr |
| pub fn name_for_alias(&self) -> Result<String> { |
| Ok(self.schema_name().to_string()) |
| } |
| |
| /// Ensure `expr` has the name as `original_name` by adding an |
| /// alias if necessary. |
| pub fn alias_if_changed(self, original_name: String) -> Result<Expr> { |
| let new_name = self.name_for_alias()?; |
| if new_name == original_name { |
| return Ok(self); |
| } |
| |
| Ok(self.alias(original_name)) |
| } |
| |
| /// Return `self AS name` alias expression |
| pub fn alias(self, name: impl Into<String>) -> Expr { |
| Expr::Alias(Alias::new(self, None::<&str>, name.into())) |
| } |
| |
| /// Return `self AS name` alias expression with metadata |
| /// |
| /// The metadata will be attached to the Arrow Schema field when the expression |
| /// is converted to a field via `Expr.to_field()`. |
| /// |
| /// # Example |
| /// ``` |
| /// # use datafusion_expr::col; |
| /// # use std::collections::HashMap; |
| /// # use datafusion_common::metadata::FieldMetadata; |
| /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]); |
| /// let metadata = FieldMetadata::from(metadata); |
| /// let expr = col("foo").alias_with_metadata("bar", Some(metadata)); |
| /// ``` |
| pub fn alias_with_metadata( |
| self, |
| name: impl Into<String>, |
| metadata: Option<FieldMetadata>, |
| ) -> Expr { |
| Expr::Alias(Alias::new(self, None::<&str>, name.into()).with_metadata(metadata)) |
| } |
| |
| /// Return `self AS name` alias expression with a specific qualifier |
| pub fn alias_qualified( |
| self, |
| relation: Option<impl Into<TableReference>>, |
| name: impl Into<String>, |
| ) -> Expr { |
| Expr::Alias(Alias::new(self, relation, name.into())) |
| } |
| |
| /// Return `self AS name` alias expression with a specific qualifier and metadata |
| /// |
| /// The metadata will be attached to the Arrow Schema field when the expression |
| /// is converted to a field via `Expr.to_field()`. |
| /// |
| /// # Example |
| /// ``` |
| /// # use datafusion_expr::col; |
| /// # use std::collections::HashMap; |
| /// # use datafusion_common::metadata::FieldMetadata; |
| /// let metadata = HashMap::from([("key".to_string(), "value".to_string())]); |
| /// let metadata = FieldMetadata::from(metadata); |
| /// let expr = |
| /// col("foo").alias_qualified_with_metadata(Some("tbl"), "bar", Some(metadata)); |
| /// ``` |
| pub fn alias_qualified_with_metadata( |
| self, |
| relation: Option<impl Into<TableReference>>, |
| name: impl Into<String>, |
| metadata: Option<FieldMetadata>, |
| ) -> Expr { |
| Expr::Alias(Alias::new(self, relation, name.into()).with_metadata(metadata)) |
| } |
| |
| /// Remove an alias from an expression if one exists. |
| /// |
| /// If the expression is not an alias, the expression is returned unchanged. |
| /// This method does not remove aliases from nested expressions. |
| /// |
| /// # Example |
| /// ``` |
| /// # use datafusion_expr::col; |
| /// // `foo as "bar"` is unaliased to `foo` |
| /// let expr = col("foo").alias("bar"); |
| /// assert_eq!(expr.unalias(), col("foo")); |
| /// |
| /// // `foo as "bar" + baz` is not unaliased |
| /// let expr = col("foo").alias("bar") + col("baz"); |
| /// assert_eq!(expr.clone().unalias(), expr); |
| /// |
| /// // `foo as "bar" as "baz" is unaliased to foo as "bar" |
| /// let expr = col("foo").alias("bar").alias("baz"); |
| /// assert_eq!(expr.unalias(), col("foo").alias("bar")); |
| /// ``` |
| pub fn unalias(self) -> Expr { |
| match self { |
| Expr::Alias(alias) => *alias.expr, |
| _ => self, |
| } |
| } |
| |
| /// Recursively removed potentially multiple aliases from an expression. |
| /// |
| /// This method removes nested aliases and returns [`Transformed`] |
| /// to signal if the expression was changed. |
| /// |
| /// # Example |
| /// ``` |
| /// # use datafusion_expr::col; |
| /// // `foo as "bar"` is unaliased to `foo` |
| /// let expr = col("foo").alias("bar"); |
| /// assert_eq!(expr.unalias_nested().data, col("foo")); |
| /// |
| /// // `foo as "bar" + baz` is unaliased |
| /// let expr = col("foo").alias("bar") + col("baz"); |
| /// assert_eq!(expr.clone().unalias_nested().data, col("foo") + col("baz")); |
| /// |
| /// // `foo as "bar" as "baz" is unalaised to foo |
| /// let expr = col("foo").alias("bar").alias("baz"); |
| /// assert_eq!(expr.unalias_nested().data, col("foo")); |
| /// ``` |
| pub fn unalias_nested(self) -> Transformed<Expr> { |
| self.transform_down_up( |
| |expr| { |
| // f_down: skip subqueries. Check in f_down to avoid recursing into them |
| let recursion = if matches!( |
| expr, |
| Expr::Exists { .. } | Expr::ScalarSubquery(_) | Expr::InSubquery(_) |
| ) { |
| // Subqueries could contain aliases so don't recurse into those |
| TreeNodeRecursion::Jump |
| } else { |
| TreeNodeRecursion::Continue |
| }; |
| Ok(Transformed::new(expr, false, recursion)) |
| }, |
| |expr| { |
| // f_up: unalias on up so we can remove nested aliases like |
| // `(x as foo) as bar` |
| if let Expr::Alias(alias) = expr { |
| match alias |
| .metadata |
| .as_ref() |
| .map(|h| h.is_empty()) |
| .unwrap_or(true) |
| { |
| true => Ok(Transformed::yes(*alias.expr)), |
| false => Ok(Transformed::no(Expr::Alias(alias))), |
| } |
| } else { |
| Ok(Transformed::no(expr)) |
| } |
| }, |
| ) |
| // Unreachable code: internal closure doesn't return err |
| .unwrap() |
| } |
| |
| /// Return `self IN <list>` if `negated` is false, otherwise |
| /// return `self NOT IN <list>`.a |
| pub fn in_list(self, list: Vec<Expr>, negated: bool) -> Expr { |
| Expr::InList(InList::new(Box::new(self), list, negated)) |
| } |
| |
| /// Return `IsNull(Box(self)) |
| pub fn is_null(self) -> Expr { |
| Expr::IsNull(Box::new(self)) |
| } |
| |
| /// Return `IsNotNull(Box(self)) |
| pub fn is_not_null(self) -> Expr { |
| Expr::IsNotNull(Box::new(self)) |
| } |
| |
| /// Create a sort configuration from an existing expression. |
| /// |
| /// ``` |
| /// # use datafusion_expr::col; |
| /// let sort_expr = col("foo").sort(true, true); // SORT ASC NULLS_FIRST |
| /// ``` |
| pub fn sort(self, asc: bool, nulls_first: bool) -> Sort { |
| Sort::new(self, asc, nulls_first) |
| } |
| |
| /// Return `IsTrue(Box(self))` |
| pub fn is_true(self) -> Expr { |
| Expr::IsTrue(Box::new(self)) |
| } |
| |
| /// Return `IsNotTrue(Box(self))` |
| pub fn is_not_true(self) -> Expr { |
| Expr::IsNotTrue(Box::new(self)) |
| } |
| |
| /// Return `IsFalse(Box(self))` |
| pub fn is_false(self) -> Expr { |
| Expr::IsFalse(Box::new(self)) |
| } |
| |
| /// Return `IsNotFalse(Box(self))` |
| pub fn is_not_false(self) -> Expr { |
| Expr::IsNotFalse(Box::new(self)) |
| } |
| |
| /// Return `IsUnknown(Box(self))` |
| pub fn is_unknown(self) -> Expr { |
| Expr::IsUnknown(Box::new(self)) |
| } |
| |
| /// Return `IsNotUnknown(Box(self))` |
| pub fn is_not_unknown(self) -> Expr { |
| Expr::IsNotUnknown(Box::new(self)) |
| } |
| |
| /// return `self BETWEEN low AND high` |
| pub fn between(self, low: Expr, high: Expr) -> Expr { |
| Expr::Between(Between::new( |
| Box::new(self), |
| false, |
| Box::new(low), |
| Box::new(high), |
| )) |
| } |
| |
| /// Return `self NOT BETWEEN low AND high` |
| pub fn not_between(self, low: Expr, high: Expr) -> Expr { |
| Expr::Between(Between::new( |
| Box::new(self), |
| true, |
| Box::new(low), |
| Box::new(high), |
| )) |
| } |
| /// Return a reference to the inner `Column` if any |
| /// |
| /// returns `None` if the expression is not a `Column` |
| /// |
| /// Note: None may be returned for expressions that are not `Column` but |
| /// are convertible to `Column` such as `Cast` expressions. |
| /// |
| /// Example |
| /// ``` |
| /// # use datafusion_common::Column; |
| /// use datafusion_expr::{col, Expr}; |
| /// let expr = col("foo"); |
| /// assert_eq!(expr.try_as_col(), Some(&Column::from("foo"))); |
| /// |
| /// let expr = col("foo").alias("bar"); |
| /// assert_eq!(expr.try_as_col(), None); |
| /// ``` |
| pub fn try_as_col(&self) -> Option<&Column> { |
| if let Expr::Column(it) = self { |
| Some(it) |
| } else { |
| None |
| } |
| } |
| |
| /// Returns the inner `Column` if any. This is a specialized version of |
| /// [`Self::try_as_col`] that take Cast expressions into account when the |
| /// expression is as on condition for joins. |
| /// |
| /// Called this method when you are sure that the expression is a `Column` |
| /// or a `Cast` expression that wraps a `Column`. |
| pub fn get_as_join_column(&self) -> Option<&Column> { |
| match self { |
| Expr::Column(c) => Some(c), |
| Expr::Cast(Cast { expr, .. }) => match &**expr { |
| Expr::Column(c) => Some(c), |
| _ => None, |
| }, |
| _ => None, |
| } |
| } |
| |
| /// Return all references to columns in this expression. |
| /// |
| /// # Example |
| /// ``` |
| /// # use std::collections::HashSet; |
| /// # use datafusion_common::Column; |
| /// # use datafusion_expr::col; |
| /// // For an expression `a + (b * a)` |
| /// let expr = col("a") + (col("b") * col("a")); |
| /// let refs = expr.column_refs(); |
| /// // refs contains "a" and "b" |
| /// assert_eq!(refs.len(), 2); |
| /// assert!(refs.contains(&Column::new_unqualified("a"))); |
| /// assert!(refs.contains(&Column::new_unqualified("b"))); |
| /// ``` |
| pub fn column_refs(&self) -> HashSet<&Column> { |
| let mut using_columns = HashSet::new(); |
| self.add_column_refs(&mut using_columns); |
| using_columns |
| } |
| |
| /// Adds references to all columns in this expression to the set |
| /// |
| /// See [`Self::column_refs`] for details |
| pub fn add_column_refs<'a>(&'a self, set: &mut HashSet<&'a Column>) { |
| self.apply(|expr| { |
| if let Expr::Column(col) = expr { |
| set.insert(col); |
| } |
| Ok(TreeNodeRecursion::Continue) |
| }) |
| .expect("traversal is infallible"); |
| } |
| |
| /// Return all references to columns and their occurrence counts in the expression. |
| /// |
| /// # Example |
| /// ``` |
| /// # use std::collections::HashMap; |
| /// # use datafusion_common::Column; |
| /// # use datafusion_expr::col; |
| /// // For an expression `a + (b * a)` |
| /// let expr = col("a") + (col("b") * col("a")); |
| /// let mut refs = expr.column_refs_counts(); |
| /// // refs contains "a" and "b" |
| /// assert_eq!(refs.len(), 2); |
| /// assert_eq!(*refs.get(&Column::new_unqualified("a")).unwrap(), 2); |
| /// assert_eq!(*refs.get(&Column::new_unqualified("b")).unwrap(), 1); |
| /// ``` |
| pub fn column_refs_counts(&self) -> HashMap<&Column, usize> { |
| let mut map = HashMap::new(); |
| self.add_column_ref_counts(&mut map); |
| map |
| } |
| |
| /// Adds references to all columns and their occurrence counts in the expression to |
| /// the map. |
| /// |
| /// See [`Self::column_refs_counts`] for details |
| pub fn add_column_ref_counts<'a>(&'a self, map: &mut HashMap<&'a Column, usize>) { |
| self.apply(|expr| { |
| if let Expr::Column(col) = expr { |
| *map.entry(col).or_default() += 1; |
| } |
| Ok(TreeNodeRecursion::Continue) |
| }) |
| .expect("traversal is infallible"); |
| } |
| |
| /// Returns true if there are any column references in this Expr |
| pub fn any_column_refs(&self) -> bool { |
| self.exists(|expr| Ok(matches!(expr, Expr::Column(_)))) |
| .expect("exists closure is infallible") |
| } |
| |
| /// Return true if the expression contains out reference(correlated) expressions. |
| pub fn contains_outer(&self) -> bool { |
| self.exists(|expr| Ok(matches!(expr, Expr::OuterReferenceColumn { .. }))) |
| .expect("exists closure is infallible") |
| } |
| |
| /// Returns true if the expression node is volatile, i.e. whether it can return |
| /// different results when evaluated multiple times with the same input. |
| /// Note: unlike [`Self::is_volatile`], this function does not consider inputs: |
| /// - `rand()` returns `true`, |
| /// - `a + rand()` returns `false` |
| pub fn is_volatile_node(&self) -> bool { |
| matches!(self, Expr::ScalarFunction(func) if func.func.signature().volatility == Volatility::Volatile) |
| } |
| |
| /// Returns true if the expression is volatile, i.e. whether it can return different |
| /// results when evaluated multiple times with the same input. |
| /// |
| /// For example the function call `RANDOM()` is volatile as each call will |
| /// return a different value. |
| /// |
| /// See [`Volatility`] for more information. |
| pub fn is_volatile(&self) -> bool { |
| self.exists(|expr| Ok(expr.is_volatile_node())) |
| .expect("exists closure is infallible") |
| } |
| |
| /// Recursively find all [`Expr::Placeholder`] expressions, and |
| /// to infer their [`DataType`] from the context of their use. |
| /// |
| /// For example, given an expression like `<int32> = $0` will infer `$0` to |
| /// have type `int32`. |
| /// |
| /// Returns transformed expression and flag that is true if expression contains |
| /// at least one placeholder. |
| pub fn infer_placeholder_types(self, schema: &DFSchema) -> Result<(Expr, bool)> { |
| let mut has_placeholder = false; |
| self.transform(|mut expr| { |
| match &mut expr { |
| // Default to assuming the arguments are the same type |
| Expr::BinaryExpr(BinaryExpr { left, op: _, right }) => { |
| rewrite_placeholder(left.as_mut(), right.as_ref(), schema)?; |
| rewrite_placeholder(right.as_mut(), left.as_ref(), schema)?; |
| } |
| Expr::Between(Between { |
| expr, |
| negated: _, |
| low, |
| high, |
| }) => { |
| rewrite_placeholder(low.as_mut(), expr.as_ref(), schema)?; |
| rewrite_placeholder(high.as_mut(), expr.as_ref(), schema)?; |
| } |
| Expr::InList(InList { |
| expr, |
| list, |
| negated: _, |
| }) => { |
| for item in list.iter_mut() { |
| rewrite_placeholder(item, expr.as_ref(), schema)?; |
| } |
| } |
| Expr::Like(Like { expr, pattern, .. }) |
| | Expr::SimilarTo(Like { expr, pattern, .. }) => { |
| rewrite_placeholder(pattern.as_mut(), expr.as_ref(), schema)?; |
| } |
| Expr::Placeholder(_) => { |
| has_placeholder = true; |
| } |
| _ => {} |
| } |
| Ok(Transformed::yes(expr)) |
| }) |
| .data() |
| .map(|data| (data, has_placeholder)) |
| } |
| |
| /// Returns true if some of this `exprs` subexpressions may not be evaluated |
| /// and thus any side effects (like divide by zero) may not be encountered |
| pub fn short_circuits(&self) -> bool { |
| match self { |
| Expr::ScalarFunction(ScalarFunction { func, .. }) => func.short_circuits(), |
| Expr::BinaryExpr(BinaryExpr { op, .. }) => { |
| matches!(op, Operator::And | Operator::Or) |
| } |
| Expr::Case { .. } => true, |
| // Use explicit pattern match instead of a default |
| // implementation, so that in the future if someone adds |
| // new Expr types, they will check here as well |
| // TODO: remove the next line after `Expr::Wildcard` is removed |
| #[expect(deprecated)] |
| Expr::AggregateFunction(..) |
| | Expr::Alias(..) |
| | Expr::Between(..) |
| | Expr::Cast(..) |
| | Expr::Column(..) |
| | Expr::Exists(..) |
| | Expr::GroupingSet(..) |
| | Expr::InList(..) |
| | Expr::InSubquery(..) |
| | Expr::SetComparison(..) |
| | Expr::IsFalse(..) |
| | Expr::IsNotFalse(..) |
| | Expr::IsNotNull(..) |
| | Expr::IsNotTrue(..) |
| | Expr::IsNotUnknown(..) |
| | Expr::IsNull(..) |
| | Expr::IsTrue(..) |
| | Expr::IsUnknown(..) |
| | Expr::Like(..) |
| | Expr::ScalarSubquery(..) |
| | Expr::ScalarVariable(_, _) |
| | Expr::SimilarTo(..) |
| | Expr::Not(..) |
| | Expr::Negative(..) |
| | Expr::OuterReferenceColumn(_, _) |
| | Expr::TryCast(..) |
| | Expr::Unnest(..) |
| | Expr::Wildcard { .. } |
| | Expr::WindowFunction(..) |
| | Expr::Literal(..) |
| | Expr::Placeholder(..) => false, |
| } |
| } |
| |
| /// Returns a reference to the set of locations in the SQL query where this |
| /// expression appears, if known. [`None`] is returned if the expression |
| /// type doesn't support tracking locations yet. |
| pub fn spans(&self) -> Option<&Spans> { |
| match self { |
| Expr::Column(col) => Some(&col.spans), |
| _ => None, |
| } |
| } |
| |
| /// Check if the Expr is literal and get the literal value if it is. |
| pub fn as_literal(&self) -> Option<&ScalarValue> { |
| if let Expr::Literal(lit, _) = self { |
| Some(lit) |
| } else { |
| None |
| } |
| } |
| } |
| |
| impl Normalizeable for Expr { |
| fn can_normalize(&self) -> bool { |
| #[expect(clippy::match_like_matches_macro)] |
| match self { |
| Expr::BinaryExpr(BinaryExpr { |
| op: |
| _op @ (Operator::Plus |
| | Operator::Multiply |
| | Operator::BitwiseAnd |
| | Operator::BitwiseOr |
| | Operator::BitwiseXor |
| | Operator::Eq |
| | Operator::NotEq), |
| .. |
| }) => true, |
| _ => false, |
| } |
| } |
| } |
| |
| impl NormalizeEq for Expr { |
| fn normalize_eq(&self, other: &Self) -> bool { |
| match (self, other) { |
| ( |
| Expr::BinaryExpr(BinaryExpr { |
| left: self_left, |
| op: self_op, |
| right: self_right, |
| }), |
| Expr::BinaryExpr(BinaryExpr { |
| left: other_left, |
| op: other_op, |
| right: other_right, |
| }), |
| ) => { |
| if self_op != other_op { |
| return false; |
| } |
| |
| if matches!( |
| self_op, |
| Operator::Plus |
| | Operator::Multiply |
| | Operator::BitwiseAnd |
| | Operator::BitwiseOr |
| | Operator::BitwiseXor |
| | Operator::Eq |
| | Operator::NotEq |
| ) { |
| (self_left.normalize_eq(other_left) |
| && self_right.normalize_eq(other_right)) |
| || (self_left.normalize_eq(other_right) |
| && self_right.normalize_eq(other_left)) |
| } else { |
| self_left.normalize_eq(other_left) |
| && self_right.normalize_eq(other_right) |
| } |
| } |
| ( |
| Expr::Alias(Alias { |
| expr: self_expr, |
| relation: self_relation, |
| name: self_name, |
| .. |
| }), |
| Expr::Alias(Alias { |
| expr: other_expr, |
| relation: other_relation, |
| name: other_name, |
| .. |
| }), |
| ) => { |
| self_name == other_name |
| && self_relation == other_relation |
| && self_expr.normalize_eq(other_expr) |
| } |
| ( |
| Expr::Like(Like { |
| negated: self_negated, |
| expr: self_expr, |
| pattern: self_pattern, |
| escape_char: self_escape_char, |
| case_insensitive: self_case_insensitive, |
| }), |
| Expr::Like(Like { |
| negated: other_negated, |
| expr: other_expr, |
| pattern: other_pattern, |
| escape_char: other_escape_char, |
| case_insensitive: other_case_insensitive, |
| }), |
| ) |
| | ( |
| Expr::SimilarTo(Like { |
| negated: self_negated, |
| expr: self_expr, |
| pattern: self_pattern, |
| escape_char: self_escape_char, |
| case_insensitive: self_case_insensitive, |
| }), |
| Expr::SimilarTo(Like { |
| negated: other_negated, |
| expr: other_expr, |
| pattern: other_pattern, |
| escape_char: other_escape_char, |
| case_insensitive: other_case_insensitive, |
| }), |
| ) => { |
| self_negated == other_negated |
| && self_escape_char == other_escape_char |
| && self_case_insensitive == other_case_insensitive |
| && self_expr.normalize_eq(other_expr) |
| && self_pattern.normalize_eq(other_pattern) |
| } |
| (Expr::Not(self_expr), Expr::Not(other_expr)) |
| | (Expr::IsNull(self_expr), Expr::IsNull(other_expr)) |
| | (Expr::IsTrue(self_expr), Expr::IsTrue(other_expr)) |
| | (Expr::IsFalse(self_expr), Expr::IsFalse(other_expr)) |
| | (Expr::IsUnknown(self_expr), Expr::IsUnknown(other_expr)) |
| | (Expr::IsNotNull(self_expr), Expr::IsNotNull(other_expr)) |
| | (Expr::IsNotTrue(self_expr), Expr::IsNotTrue(other_expr)) |
| | (Expr::IsNotFalse(self_expr), Expr::IsNotFalse(other_expr)) |
| | (Expr::IsNotUnknown(self_expr), Expr::IsNotUnknown(other_expr)) |
| | (Expr::Negative(self_expr), Expr::Negative(other_expr)) |
| | ( |
| Expr::Unnest(Unnest { expr: self_expr }), |
| Expr::Unnest(Unnest { expr: other_expr }), |
| ) => self_expr.normalize_eq(other_expr), |
| ( |
| Expr::Between(Between { |
| expr: self_expr, |
| negated: self_negated, |
| low: self_low, |
| high: self_high, |
| }), |
| Expr::Between(Between { |
| expr: other_expr, |
| negated: other_negated, |
| low: other_low, |
| high: other_high, |
| }), |
| ) => { |
| self_negated == other_negated |
| && self_expr.normalize_eq(other_expr) |
| && self_low.normalize_eq(other_low) |
| && self_high.normalize_eq(other_high) |
| } |
| ( |
| Expr::Cast(Cast { |
| expr: self_expr, |
| data_type: self_data_type, |
| }), |
| Expr::Cast(Cast { |
| expr: other_expr, |
| data_type: other_data_type, |
| }), |
| ) |
| | ( |
| Expr::TryCast(TryCast { |
| expr: self_expr, |
| data_type: self_data_type, |
| }), |
| Expr::TryCast(TryCast { |
| expr: other_expr, |
| data_type: other_data_type, |
| }), |
| ) => self_data_type == other_data_type && self_expr.normalize_eq(other_expr), |
| ( |
| Expr::ScalarFunction(ScalarFunction { |
| func: self_func, |
| args: self_args, |
| }), |
| Expr::ScalarFunction(ScalarFunction { |
| func: other_func, |
| args: other_args, |
| }), |
| ) => { |
| self_func.name() == other_func.name() |
| && self_args.len() == other_args.len() |
| && self_args |
| .iter() |
| .zip(other_args.iter()) |
| .all(|(a, b)| a.normalize_eq(b)) |
| } |
| ( |
| Expr::AggregateFunction(AggregateFunction { |
| func: self_func, |
| params: |
| AggregateFunctionParams { |
| args: self_args, |
| distinct: self_distinct, |
| filter: self_filter, |
| order_by: self_order_by, |
| null_treatment: self_null_treatment, |
| }, |
| }), |
| Expr::AggregateFunction(AggregateFunction { |
| func: other_func, |
| params: |
| AggregateFunctionParams { |
| args: other_args, |
| distinct: other_distinct, |
| filter: other_filter, |
| order_by: other_order_by, |
| null_treatment: other_null_treatment, |
| }, |
| }), |
| ) => { |
| self_func.name() == other_func.name() |
| && self_distinct == other_distinct |
| && self_null_treatment == other_null_treatment |
| && self_args.len() == other_args.len() |
| && self_args |
| .iter() |
| .zip(other_args.iter()) |
| .all(|(a, b)| a.normalize_eq(b)) |
| && match (self_filter, other_filter) { |
| (Some(self_filter), Some(other_filter)) => { |
| self_filter.normalize_eq(other_filter) |
| } |
| (None, None) => true, |
| _ => false, |
| } |
| && self_order_by |
| .iter() |
| .zip(other_order_by.iter()) |
| .all(|(a, b)| { |
| a.asc == b.asc |
| && a.nulls_first == b.nulls_first |
| && a.expr.normalize_eq(&b.expr) |
| }) |
| && self_order_by.len() == other_order_by.len() |
| } |
| (Expr::WindowFunction(left), Expr::WindowFunction(other)) => { |
| let WindowFunction { |
| fun: self_fun, |
| params: |
| WindowFunctionParams { |
| args: self_args, |
| window_frame: self_window_frame, |
| partition_by: self_partition_by, |
| order_by: self_order_by, |
| filter: self_filter, |
| null_treatment: self_null_treatment, |
| distinct: self_distinct, |
| }, |
| } = left.as_ref(); |
| let WindowFunction { |
| fun: other_fun, |
| params: |
| WindowFunctionParams { |
| args: other_args, |
| window_frame: other_window_frame, |
| partition_by: other_partition_by, |
| order_by: other_order_by, |
| filter: other_filter, |
| null_treatment: other_null_treatment, |
| distinct: other_distinct, |
| }, |
| } = other.as_ref(); |
| |
| self_fun.name() == other_fun.name() |
| && self_window_frame == other_window_frame |
| && match (self_filter, other_filter) { |
| (Some(a), Some(b)) => a.normalize_eq(b), |
| (None, None) => true, |
| _ => false, |
| } |
| && self_null_treatment == other_null_treatment |
| && self_args.len() == other_args.len() |
| && self_args |
| .iter() |
| .zip(other_args.iter()) |
| .all(|(a, b)| a.normalize_eq(b)) |
| && self_partition_by |
| .iter() |
| .zip(other_partition_by.iter()) |
| .all(|(a, b)| a.normalize_eq(b)) |
| && self_order_by |
| .iter() |
| .zip(other_order_by.iter()) |
| .all(|(a, b)| { |
| a.asc == b.asc |
| && a.nulls_first == b.nulls_first |
| && a.expr.normalize_eq(&b.expr) |
| }) |
| && self_distinct == other_distinct |
| } |
| ( |
| Expr::Exists(Exists { |
| subquery: self_subquery, |
| negated: self_negated, |
| }), |
| Expr::Exists(Exists { |
| subquery: other_subquery, |
| negated: other_negated, |
| }), |
| ) => { |
| self_negated == other_negated |
| && self_subquery.normalize_eq(other_subquery) |
| } |
| ( |
| Expr::InSubquery(InSubquery { |
| expr: self_expr, |
| subquery: self_subquery, |
| negated: self_negated, |
| }), |
| Expr::InSubquery(InSubquery { |
| expr: other_expr, |
| subquery: other_subquery, |
| negated: other_negated, |
| }), |
| ) => { |
| self_negated == other_negated |
| && self_expr.normalize_eq(other_expr) |
| && self_subquery.normalize_eq(other_subquery) |
| } |
| ( |
| Expr::ScalarSubquery(self_subquery), |
| Expr::ScalarSubquery(other_subquery), |
| ) => self_subquery.normalize_eq(other_subquery), |
| ( |
| Expr::GroupingSet(GroupingSet::Rollup(self_exprs)), |
| Expr::GroupingSet(GroupingSet::Rollup(other_exprs)), |
| ) |
| | ( |
| Expr::GroupingSet(GroupingSet::Cube(self_exprs)), |
| Expr::GroupingSet(GroupingSet::Cube(other_exprs)), |
| ) => { |
| self_exprs.len() == other_exprs.len() |
| && self_exprs |
| .iter() |
| .zip(other_exprs.iter()) |
| .all(|(a, b)| a.normalize_eq(b)) |
| } |
| ( |
| Expr::GroupingSet(GroupingSet::GroupingSets(self_exprs)), |
| Expr::GroupingSet(GroupingSet::GroupingSets(other_exprs)), |
| ) => { |
| self_exprs.len() == other_exprs.len() |
| && self_exprs.iter().zip(other_exprs.iter()).all(|(a, b)| { |
| a.len() == b.len() |
| && a.iter().zip(b.iter()).all(|(x, y)| x.normalize_eq(y)) |
| }) |
| } |
| ( |
| Expr::InList(InList { |
| expr: self_expr, |
| list: self_list, |
| negated: self_negated, |
| }), |
| Expr::InList(InList { |
| expr: other_expr, |
| list: other_list, |
| negated: other_negated, |
| }), |
| ) => { |
| // TODO: normalize_eq for lists, for example `a IN (c1 + c3, c3)` is equal to `a IN (c3, c1 + c3)` |
| self_negated == other_negated |
| && self_expr.normalize_eq(other_expr) |
| && self_list.len() == other_list.len() |
| && self_list |
| .iter() |
| .zip(other_list.iter()) |
| .all(|(a, b)| a.normalize_eq(b)) |
| } |
| ( |
| Expr::Case(Case { |
| expr: self_expr, |
| when_then_expr: self_when_then_expr, |
| else_expr: self_else_expr, |
| }), |
| Expr::Case(Case { |
| expr: other_expr, |
| when_then_expr: other_when_then_expr, |
| else_expr: other_else_expr, |
| }), |
| ) => { |
| // TODO: normalize_eq for when_then_expr |
| // for example `CASE a WHEN 1 THEN 2 WHEN 3 THEN 4 ELSE 5 END` is equal to `CASE a WHEN 3 THEN 4 WHEN 1 THEN 2 ELSE 5 END` |
| self_when_then_expr.len() == other_when_then_expr.len() |
| && self_when_then_expr |
| .iter() |
| .zip(other_when_then_expr.iter()) |
| .all(|((self_when, self_then), (other_when, other_then))| { |
| self_when.normalize_eq(other_when) |
| && self_then.normalize_eq(other_then) |
| }) |
| && match (self_expr, other_expr) { |
| (Some(self_expr), Some(other_expr)) => { |
| self_expr.normalize_eq(other_expr) |
| } |
| (None, None) => true, |
| (_, _) => false, |
| } |
| && match (self_else_expr, other_else_expr) { |
| (Some(self_else_expr), Some(other_else_expr)) => { |
| self_else_expr.normalize_eq(other_else_expr) |
| } |
| (None, None) => true, |
| (_, _) => false, |
| } |
| } |
| (_, _) => self == other, |
| } |
| } |
| } |
| |
| impl HashNode for Expr { |
| /// As it is pretty easy to forget changing this method when `Expr` changes the |
| /// implementation doesn't use wildcard patterns (`..`, `_`) to catch changes |
| /// compile time. |
| fn hash_node<H: Hasher>(&self, state: &mut H) { |
| mem::discriminant(self).hash(state); |
| match self { |
| Expr::Alias(Alias { |
| expr: _expr, |
| relation, |
| name, |
| .. |
| }) => { |
| relation.hash(state); |
| name.hash(state); |
| } |
| Expr::Column(column) => { |
| column.hash(state); |
| } |
| Expr::ScalarVariable(field, name) => { |
| field.hash(state); |
| name.hash(state); |
| } |
| Expr::Literal(scalar_value, _) => { |
| scalar_value.hash(state); |
| } |
| Expr::BinaryExpr(BinaryExpr { |
| left: _left, |
| op, |
| right: _right, |
| }) => { |
| op.hash(state); |
| } |
| Expr::Like(Like { |
| negated, |
| expr: _expr, |
| pattern: _pattern, |
| escape_char, |
| case_insensitive, |
| }) |
| | Expr::SimilarTo(Like { |
| negated, |
| expr: _expr, |
| pattern: _pattern, |
| escape_char, |
| case_insensitive, |
| }) => { |
| negated.hash(state); |
| escape_char.hash(state); |
| case_insensitive.hash(state); |
| } |
| Expr::Not(_expr) |
| | Expr::IsNotNull(_expr) |
| | Expr::IsNull(_expr) |
| | Expr::IsTrue(_expr) |
| | Expr::IsFalse(_expr) |
| | Expr::IsUnknown(_expr) |
| | Expr::IsNotTrue(_expr) |
| | Expr::IsNotFalse(_expr) |
| | Expr::IsNotUnknown(_expr) |
| | Expr::Negative(_expr) => {} |
| Expr::Between(Between { |
| expr: _expr, |
| negated, |
| low: _low, |
| high: _high, |
| }) => { |
| negated.hash(state); |
| } |
| Expr::Case(Case { |
| expr: _expr, |
| when_then_expr: _when_then_expr, |
| else_expr: _else_expr, |
| }) => {} |
| Expr::Cast(Cast { |
| expr: _expr, |
| data_type, |
| }) |
| | Expr::TryCast(TryCast { |
| expr: _expr, |
| data_type, |
| }) => { |
| data_type.hash(state); |
| } |
| Expr::ScalarFunction(ScalarFunction { func, args: _args }) => { |
| func.hash(state); |
| } |
| Expr::AggregateFunction(AggregateFunction { |
| func, |
| params: |
| AggregateFunctionParams { |
| args: _args, |
| distinct, |
| filter: _, |
| order_by: _, |
| null_treatment, |
| }, |
| }) => { |
| func.hash(state); |
| distinct.hash(state); |
| null_treatment.hash(state); |
| } |
| Expr::WindowFunction(window_fun) => { |
| let WindowFunction { |
| fun, |
| params: |
| WindowFunctionParams { |
| args: _args, |
| partition_by: _, |
| order_by: _, |
| window_frame, |
| filter, |
| null_treatment, |
| distinct, |
| }, |
| } = window_fun.as_ref(); |
| fun.hash(state); |
| window_frame.hash(state); |
| filter.hash(state); |
| null_treatment.hash(state); |
| distinct.hash(state); |
| } |
| Expr::InList(InList { |
| expr: _expr, |
| list: _list, |
| negated, |
| }) => { |
| negated.hash(state); |
| } |
| Expr::Exists(Exists { subquery, negated }) => { |
| subquery.hash(state); |
| negated.hash(state); |
| } |
| Expr::InSubquery(InSubquery { |
| expr: _expr, |
| subquery, |
| negated, |
| }) => { |
| subquery.hash(state); |
| negated.hash(state); |
| } |
| Expr::SetComparison(SetComparison { |
| expr: _, |
| subquery, |
| op, |
| quantifier, |
| }) => { |
| subquery.hash(state); |
| op.hash(state); |
| quantifier.hash(state); |
| } |
| Expr::ScalarSubquery(subquery) => { |
| subquery.hash(state); |
| } |
| #[expect(deprecated)] |
| Expr::Wildcard { qualifier, options } => { |
| qualifier.hash(state); |
| options.hash(state); |
| } |
| Expr::GroupingSet(grouping_set) => { |
| mem::discriminant(grouping_set).hash(state); |
| match grouping_set { |
| GroupingSet::Rollup(_exprs) | GroupingSet::Cube(_exprs) => {} |
| GroupingSet::GroupingSets(_exprs) => {} |
| } |
| } |
| Expr::Placeholder(place_holder) => { |
| place_holder.hash(state); |
| } |
| Expr::OuterReferenceColumn(field, column) => { |
| field.hash(state); |
| column.hash(state); |
| } |
| Expr::Unnest(Unnest { expr: _expr }) => {} |
| }; |
| } |
| } |
| |
| // Modifies expr to match the DataType, metadata, and nullability of other if it is |
| // a placeholder with previously unspecified type information (i.e., most placeholders) |
| fn rewrite_placeholder(expr: &mut Expr, other: &Expr, schema: &DFSchema) -> Result<()> { |
| if let Expr::Placeholder(Placeholder { id: _, field }) = expr |
| && field.is_none() |
| { |
| let other_field = other.to_field(schema); |
| match other_field { |
| Err(e) => { |
| Err(e.context(format!( |
| "Can not find type of {other} needed to infer type of {expr}" |
| )))?; |
| } |
| Ok((_, other_field)) => { |
| // We can't infer the nullability of the future parameter that might |
| // be bound, so ensure this is set to true |
| *field = Some(other_field.as_ref().clone().with_nullable(true).into()); |
| } |
| } |
| }; |
| Ok(()) |
| } |
| |
| #[macro_export] |
| macro_rules! expr_vec_fmt { |
| ( $ARRAY:expr ) => {{ |
| $ARRAY |
| .iter() |
| .map(|e| format!("{e}")) |
| .collect::<Vec<String>>() |
| .join(", ") |
| }}; |
| } |
| |
| struct SchemaDisplay<'a>(&'a Expr); |
| impl Display for SchemaDisplay<'_> { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| match self.0 { |
| // The same as Display |
| // TODO: remove the next line after `Expr::Wildcard` is removed |
| #[expect(deprecated)] |
| Expr::Column(_) |
| | Expr::Literal(_, _) |
| | Expr::ScalarVariable(..) |
| | Expr::OuterReferenceColumn(..) |
| | Expr::Placeholder(_) |
| | Expr::Wildcard { .. } => write!(f, "{}", self.0), |
| Expr::AggregateFunction(AggregateFunction { func, params }) => { |
| match func.schema_name(params) { |
| Ok(name) => { |
| write!(f, "{name}") |
| } |
| Err(e) => { |
| write!(f, "got error from schema_name {e}") |
| } |
| } |
| } |
| // Expr is not shown since it is aliased |
| Expr::Alias(Alias { |
| name, |
| relation: Some(relation), |
| .. |
| }) => write!(f, "{relation}.{name}"), |
| Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), |
| Expr::Between(Between { |
| expr, |
| negated, |
| low, |
| high, |
| }) => { |
| if *negated { |
| write!( |
| f, |
| "{} NOT BETWEEN {} AND {}", |
| SchemaDisplay(expr), |
| SchemaDisplay(low), |
| SchemaDisplay(high), |
| ) |
| } else { |
| write!( |
| f, |
| "{} BETWEEN {} AND {}", |
| SchemaDisplay(expr), |
| SchemaDisplay(low), |
| SchemaDisplay(high), |
| ) |
| } |
| } |
| Expr::BinaryExpr(BinaryExpr { left, op, right }) => { |
| write!(f, "{} {op} {}", SchemaDisplay(left), SchemaDisplay(right),) |
| } |
| Expr::Case(Case { |
| expr, |
| when_then_expr, |
| else_expr, |
| }) => { |
| write!(f, "CASE ")?; |
| |
| if let Some(e) = expr { |
| write!(f, "{} ", SchemaDisplay(e))?; |
| } |
| |
| for (when, then) in when_then_expr { |
| write!( |
| f, |
| "WHEN {} THEN {} ", |
| SchemaDisplay(when), |
| SchemaDisplay(then), |
| )?; |
| } |
| |
| if let Some(e) = else_expr { |
| write!(f, "ELSE {} ", SchemaDisplay(e))?; |
| } |
| |
| write!(f, "END") |
| } |
| // Cast expr is not shown to be consistent with Postgres and Spark <https://github.com/apache/datafusion/pull/3222> |
| Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => { |
| write!(f, "{}", SchemaDisplay(expr)) |
| } |
| Expr::InList(InList { |
| expr, |
| list, |
| negated, |
| }) => { |
| let inlist_name = schema_name_from_exprs(list)?; |
| |
| if *negated { |
| write!(f, "{} NOT IN {}", SchemaDisplay(expr), inlist_name) |
| } else { |
| write!(f, "{} IN {}", SchemaDisplay(expr), inlist_name) |
| } |
| } |
| Expr::Exists(Exists { negated: true, .. }) => write!(f, "NOT EXISTS"), |
| Expr::Exists(Exists { negated: false, .. }) => write!(f, "EXISTS"), |
| Expr::GroupingSet(GroupingSet::Cube(exprs)) => { |
| write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?) |
| } |
| Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => { |
| write!(f, "GROUPING SETS (")?; |
| for exprs in lists_of_exprs.iter() { |
| write!(f, "({})", schema_name_from_exprs(exprs)?)?; |
| } |
| write!(f, ")") |
| } |
| Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { |
| write!(f, "ROLLUP ({})", schema_name_from_exprs(exprs)?) |
| } |
| Expr::IsNull(expr) => write!(f, "{} IS NULL", SchemaDisplay(expr)), |
| Expr::IsNotNull(expr) => { |
| write!(f, "{} IS NOT NULL", SchemaDisplay(expr)) |
| } |
| Expr::IsUnknown(expr) => { |
| write!(f, "{} IS UNKNOWN", SchemaDisplay(expr)) |
| } |
| Expr::IsNotUnknown(expr) => { |
| write!(f, "{} IS NOT UNKNOWN", SchemaDisplay(expr)) |
| } |
| Expr::InSubquery(InSubquery { negated: true, .. }) => { |
| write!(f, "NOT IN") |
| } |
| Expr::InSubquery(InSubquery { negated: false, .. }) => write!(f, "IN"), |
| Expr::SetComparison(SetComparison { |
| expr, |
| op, |
| quantifier, |
| .. |
| }) => write!(f, "{} {op} {quantifier}", SchemaDisplay(expr.as_ref())), |
| Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SchemaDisplay(expr)), |
| Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SchemaDisplay(expr)), |
| Expr::IsNotTrue(expr) => { |
| write!(f, "{} IS NOT TRUE", SchemaDisplay(expr)) |
| } |
| Expr::IsNotFalse(expr) => { |
| write!(f, "{} IS NOT FALSE", SchemaDisplay(expr)) |
| } |
| Expr::Like(Like { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| case_insensitive, |
| }) => { |
| write!( |
| f, |
| "{} {}{} {}", |
| SchemaDisplay(expr), |
| if *negated { "NOT " } else { "" }, |
| if *case_insensitive { "ILIKE" } else { "LIKE" }, |
| SchemaDisplay(pattern), |
| )?; |
| |
| if let Some(char) = escape_char { |
| write!(f, " CHAR '{char}'")?; |
| } |
| |
| Ok(()) |
| } |
| Expr::Negative(expr) => write!(f, "(- {})", SchemaDisplay(expr)), |
| Expr::Not(expr) => write!(f, "NOT {}", SchemaDisplay(expr)), |
| Expr::Unnest(Unnest { expr }) => { |
| write!(f, "UNNEST({})", SchemaDisplay(expr)) |
| } |
| Expr::ScalarFunction(ScalarFunction { func, args }) => { |
| match func.schema_name(args) { |
| Ok(name) => { |
| write!(f, "{name}") |
| } |
| Err(e) => { |
| write!(f, "got error from schema_name {e}") |
| } |
| } |
| } |
| Expr::ScalarSubquery(Subquery { subquery, .. }) => { |
| write!(f, "{}", subquery.schema().field(0).name()) |
| } |
| Expr::SimilarTo(Like { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| .. |
| }) => { |
| write!( |
| f, |
| "{} {} {}", |
| SchemaDisplay(expr), |
| if *negated { |
| "NOT SIMILAR TO" |
| } else { |
| "SIMILAR TO" |
| }, |
| SchemaDisplay(pattern), |
| )?; |
| if let Some(char) = escape_char { |
| write!(f, " CHAR '{char}'")?; |
| } |
| |
| Ok(()) |
| } |
| Expr::WindowFunction(window_fun) => { |
| let WindowFunction { fun, params } = window_fun.as_ref(); |
| match fun { |
| WindowFunctionDefinition::AggregateUDF(fun) => { |
| match fun.window_function_schema_name(params) { |
| Ok(name) => { |
| write!(f, "{name}") |
| } |
| Err(e) => { |
| write!( |
| f, |
| "got error from window_function_schema_name {e}" |
| ) |
| } |
| } |
| } |
| _ => { |
| let WindowFunctionParams { |
| args, |
| partition_by, |
| order_by, |
| window_frame, |
| filter, |
| null_treatment, |
| distinct, |
| } = params; |
| |
| // Write function name and open parenthesis |
| write!(f, "{fun}(")?; |
| |
| // If DISTINCT, emit the keyword |
| if *distinct { |
| write!(f, "DISTINCT ")?; |
| } |
| |
| // Write the comma‑separated argument list |
| write!( |
| f, |
| "{}", |
| schema_name_from_exprs_comma_separated_without_space(args)? |
| )?; |
| |
| // **Close the argument parenthesis** |
| write!(f, ")")?; |
| |
| if let Some(null_treatment) = null_treatment { |
| write!(f, " {null_treatment}")?; |
| } |
| |
| if let Some(filter) = filter { |
| write!(f, " FILTER (WHERE {filter})")?; |
| } |
| |
| if !partition_by.is_empty() { |
| write!( |
| f, |
| " PARTITION BY [{}]", |
| schema_name_from_exprs(partition_by)? |
| )?; |
| } |
| |
| if !order_by.is_empty() { |
| write!( |
| f, |
| " ORDER BY [{}]", |
| schema_name_from_sorts(order_by)? |
| )?; |
| }; |
| |
| write!(f, " {window_frame}") |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| /// A helper struct for displaying an `Expr` as an SQL-like string. |
| struct SqlDisplay<'a>(&'a Expr); |
| |
| impl Display for SqlDisplay<'_> { |
| fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { |
| match self.0 { |
| Expr::Literal(scalar, _) => scalar.fmt(f), |
| Expr::Alias(Alias { name, .. }) => write!(f, "{name}"), |
| Expr::Between(Between { |
| expr, |
| negated, |
| low, |
| high, |
| }) => { |
| if *negated { |
| write!( |
| f, |
| "{} NOT BETWEEN {} AND {}", |
| SqlDisplay(expr), |
| SqlDisplay(low), |
| SqlDisplay(high), |
| ) |
| } else { |
| write!( |
| f, |
| "{} BETWEEN {} AND {}", |
| SqlDisplay(expr), |
| SqlDisplay(low), |
| SqlDisplay(high), |
| ) |
| } |
| } |
| Expr::BinaryExpr(BinaryExpr { left, op, right }) => { |
| write!(f, "{} {op} {}", SqlDisplay(left), SqlDisplay(right),) |
| } |
| Expr::Case(Case { |
| expr, |
| when_then_expr, |
| else_expr, |
| }) => { |
| write!(f, "CASE ")?; |
| |
| if let Some(e) = expr { |
| write!(f, "{} ", SqlDisplay(e))?; |
| } |
| |
| for (when, then) in when_then_expr { |
| write!(f, "WHEN {} THEN {} ", SqlDisplay(when), SqlDisplay(then),)?; |
| } |
| |
| if let Some(e) = else_expr { |
| write!(f, "ELSE {} ", SqlDisplay(e))?; |
| } |
| |
| write!(f, "END") |
| } |
| Expr::Cast(Cast { expr, .. }) | Expr::TryCast(TryCast { expr, .. }) => { |
| write!(f, "{}", SqlDisplay(expr)) |
| } |
| Expr::InList(InList { |
| expr, |
| list, |
| negated, |
| }) => { |
| write!( |
| f, |
| "{}{} IN {}", |
| SqlDisplay(expr), |
| if *negated { " NOT" } else { "" }, |
| ExprListDisplay::comma_separated(list.as_slice()) |
| ) |
| } |
| Expr::GroupingSet(GroupingSet::Cube(exprs)) => { |
| write!( |
| f, |
| "ROLLUP ({})", |
| ExprListDisplay::comma_separated(exprs.as_slice()) |
| ) |
| } |
| Expr::GroupingSet(GroupingSet::GroupingSets(lists_of_exprs)) => { |
| write!(f, "GROUPING SETS (")?; |
| for exprs in lists_of_exprs.iter() { |
| write!( |
| f, |
| "({})", |
| ExprListDisplay::comma_separated(exprs.as_slice()) |
| )?; |
| } |
| write!(f, ")") |
| } |
| Expr::GroupingSet(GroupingSet::Rollup(exprs)) => { |
| write!( |
| f, |
| "ROLLUP ({})", |
| ExprListDisplay::comma_separated(exprs.as_slice()) |
| ) |
| } |
| Expr::IsNull(expr) => write!(f, "{} IS NULL", SqlDisplay(expr)), |
| Expr::IsNotNull(expr) => { |
| write!(f, "{} IS NOT NULL", SqlDisplay(expr)) |
| } |
| Expr::IsUnknown(expr) => { |
| write!(f, "{} IS UNKNOWN", SqlDisplay(expr)) |
| } |
| Expr::IsNotUnknown(expr) => { |
| write!(f, "{} IS NOT UNKNOWN", SqlDisplay(expr)) |
| } |
| Expr::IsTrue(expr) => write!(f, "{} IS TRUE", SqlDisplay(expr)), |
| Expr::IsFalse(expr) => write!(f, "{} IS FALSE", SqlDisplay(expr)), |
| Expr::IsNotTrue(expr) => { |
| write!(f, "{} IS NOT TRUE", SqlDisplay(expr)) |
| } |
| Expr::IsNotFalse(expr) => { |
| write!(f, "{} IS NOT FALSE", SqlDisplay(expr)) |
| } |
| Expr::Like(Like { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| case_insensitive, |
| }) => { |
| write!( |
| f, |
| "{} {}{} {}", |
| SqlDisplay(expr), |
| if *negated { "NOT " } else { "" }, |
| if *case_insensitive { "ILIKE" } else { "LIKE" }, |
| SqlDisplay(pattern), |
| )?; |
| |
| if let Some(char) = escape_char { |
| write!(f, " CHAR '{char}'")?; |
| } |
| |
| Ok(()) |
| } |
| Expr::Negative(expr) => write!(f, "(- {})", SqlDisplay(expr)), |
| Expr::Not(expr) => write!(f, "NOT {}", SqlDisplay(expr)), |
| Expr::Unnest(Unnest { expr }) => { |
| write!(f, "UNNEST({})", SqlDisplay(expr)) |
| } |
| Expr::SimilarTo(Like { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| .. |
| }) => { |
| write!( |
| f, |
| "{} {} {}", |
| SqlDisplay(expr), |
| if *negated { |
| "NOT SIMILAR TO" |
| } else { |
| "SIMILAR TO" |
| }, |
| SqlDisplay(pattern), |
| )?; |
| if let Some(char) = escape_char { |
| write!(f, " CHAR '{char}'")?; |
| } |
| |
| Ok(()) |
| } |
| Expr::AggregateFunction(AggregateFunction { func, params }) => { |
| match func.human_display(params) { |
| Ok(name) => { |
| write!(f, "{name}") |
| } |
| Err(e) => { |
| write!(f, "got error from schema_name {e}") |
| } |
| } |
| } |
| _ => write!(f, "{}", self.0), |
| } |
| } |
| } |
| |
| /// Get schema_name for Vector of expressions |
| /// |
| /// Internal usage. Please call `schema_name_from_exprs` instead |
| // TODO: Use ", " to standardize the formatting of Vec<Expr>, |
| // <https://github.com/apache/datafusion/issues/10364> |
| pub(crate) fn schema_name_from_exprs_comma_separated_without_space( |
| exprs: &[Expr], |
| ) -> Result<String, fmt::Error> { |
| schema_name_from_exprs_inner(exprs, ",") |
| } |
| |
| /// Formats a list of `&Expr` with a custom separator using SQL display format |
| pub struct ExprListDisplay<'a> { |
| exprs: &'a [Expr], |
| sep: &'a str, |
| } |
| |
| impl<'a> ExprListDisplay<'a> { |
| /// Create a new display struct with the given expressions and separator |
| pub fn new(exprs: &'a [Expr], sep: &'a str) -> Self { |
| Self { exprs, sep } |
| } |
| |
| /// Create a new display struct with comma-space separator |
| pub fn comma_separated(exprs: &'a [Expr]) -> Self { |
| Self::new(exprs, ", ") |
| } |
| } |
| |
| impl Display for ExprListDisplay<'_> { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| let mut first = true; |
| for expr in self.exprs { |
| if !first { |
| write!(f, "{}", self.sep)?; |
| } |
| write!(f, "{}", SqlDisplay(expr))?; |
| first = false; |
| } |
| Ok(()) |
| } |
| } |
| |
| /// Get schema_name for Vector of expressions |
| pub fn schema_name_from_exprs(exprs: &[Expr]) -> Result<String, fmt::Error> { |
| schema_name_from_exprs_inner(exprs, ", ") |
| } |
| |
| fn schema_name_from_exprs_inner(exprs: &[Expr], sep: &str) -> Result<String, fmt::Error> { |
| let mut s = String::new(); |
| for (i, e) in exprs.iter().enumerate() { |
| if i > 0 { |
| write!(&mut s, "{sep}")?; |
| } |
| write!(&mut s, "{}", SchemaDisplay(e))?; |
| } |
| |
| Ok(s) |
| } |
| |
| pub fn schema_name_from_sorts(sorts: &[Sort]) -> Result<String, fmt::Error> { |
| let mut s = String::new(); |
| for (i, e) in sorts.iter().enumerate() { |
| if i > 0 { |
| write!(&mut s, ", ")?; |
| } |
| let ordering = if e.asc { "ASC" } else { "DESC" }; |
| let nulls_ordering = if e.nulls_first { |
| "NULLS FIRST" |
| } else { |
| "NULLS LAST" |
| }; |
| write!(&mut s, "{} {} {}", e.expr, ordering, nulls_ordering)?; |
| } |
| |
| Ok(s) |
| } |
| |
| pub const OUTER_REFERENCE_COLUMN_PREFIX: &str = "outer_ref"; |
| pub const UNNEST_COLUMN_PREFIX: &str = "UNNEST"; |
| |
| /// Format expressions for display as part of a logical plan. In many cases, this will produce |
| /// similar output to `Expr.name()` except that column names will be prefixed with '#'. |
| impl Display for Expr { |
| fn fmt(&self, f: &mut Formatter) -> fmt::Result { |
| match self { |
| Expr::Alias(Alias { expr, name, .. }) => write!(f, "{expr} AS {name}"), |
| Expr::Column(c) => write!(f, "{c}"), |
| Expr::OuterReferenceColumn(_, c) => { |
| write!(f, "{OUTER_REFERENCE_COLUMN_PREFIX}({c})") |
| } |
| Expr::ScalarVariable(_, var_names) => write!(f, "{}", var_names.join(".")), |
| Expr::Literal(v, metadata) => { |
| match metadata.as_ref().map(|m| m.is_empty()).unwrap_or(true) { |
| false => write!(f, "{v:?} {:?}", metadata.as_ref().unwrap()), |
| true => write!(f, "{v:?}"), |
| } |
| } |
| Expr::Case(case) => { |
| write!(f, "CASE ")?; |
| if let Some(e) = &case.expr { |
| write!(f, "{e} ")?; |
| } |
| for (w, t) in &case.when_then_expr { |
| write!(f, "WHEN {w} THEN {t} ")?; |
| } |
| if let Some(e) = &case.else_expr { |
| write!(f, "ELSE {e} ")?; |
| } |
| write!(f, "END") |
| } |
| Expr::Cast(Cast { expr, data_type }) => { |
| write!(f, "CAST({expr} AS {data_type})") |
| } |
| Expr::TryCast(TryCast { expr, data_type }) => { |
| write!(f, "TRY_CAST({expr} AS {data_type})") |
| } |
| Expr::Not(expr) => write!(f, "NOT {expr}"), |
| Expr::Negative(expr) => write!(f, "(- {expr})"), |
| Expr::IsNull(expr) => write!(f, "{expr} IS NULL"), |
| Expr::IsNotNull(expr) => write!(f, "{expr} IS NOT NULL"), |
| Expr::IsTrue(expr) => write!(f, "{expr} IS TRUE"), |
| Expr::IsFalse(expr) => write!(f, "{expr} IS FALSE"), |
| Expr::IsUnknown(expr) => write!(f, "{expr} IS UNKNOWN"), |
| Expr::IsNotTrue(expr) => write!(f, "{expr} IS NOT TRUE"), |
| Expr::IsNotFalse(expr) => write!(f, "{expr} IS NOT FALSE"), |
| Expr::IsNotUnknown(expr) => write!(f, "{expr} IS NOT UNKNOWN"), |
| Expr::Exists(Exists { |
| subquery, |
| negated: true, |
| }) => write!(f, "NOT EXISTS ({subquery:?})"), |
| Expr::Exists(Exists { |
| subquery, |
| negated: false, |
| }) => write!(f, "EXISTS ({subquery:?})"), |
| Expr::InSubquery(InSubquery { |
| expr, |
| subquery, |
| negated: true, |
| }) => write!(f, "{expr} NOT IN ({subquery:?})"), |
| Expr::InSubquery(InSubquery { |
| expr, |
| subquery, |
| negated: false, |
| }) => write!(f, "{expr} IN ({subquery:?})"), |
| Expr::SetComparison(SetComparison { |
| expr, |
| subquery, |
| op, |
| quantifier, |
| }) => write!(f, "{expr} {op} {quantifier} ({subquery:?})"), |
| Expr::ScalarSubquery(subquery) => write!(f, "({subquery:?})"), |
| Expr::BinaryExpr(expr) => write!(f, "{expr}"), |
| Expr::ScalarFunction(fun) => { |
| fmt_function(f, fun.name(), false, &fun.args, true) |
| } |
| Expr::WindowFunction(window_fun) => { |
| let WindowFunction { fun, params } = window_fun.as_ref(); |
| match fun { |
| WindowFunctionDefinition::AggregateUDF(fun) => { |
| match fun.window_function_display_name(params) { |
| Ok(name) => { |
| write!(f, "{name}") |
| } |
| Err(e) => { |
| write!( |
| f, |
| "got error from window_function_display_name {e}" |
| ) |
| } |
| } |
| } |
| WindowFunctionDefinition::WindowUDF(fun) => { |
| let WindowFunctionParams { |
| args, |
| partition_by, |
| order_by, |
| window_frame, |
| filter, |
| null_treatment, |
| distinct, |
| } = params; |
| |
| fmt_function(f, &fun.to_string(), *distinct, args, true)?; |
| |
| if let Some(nt) = null_treatment { |
| write!(f, "{nt}")?; |
| } |
| |
| if let Some(fe) = filter { |
| write!(f, " FILTER (WHERE {fe})")?; |
| } |
| |
| if !partition_by.is_empty() { |
| write!(f, " PARTITION BY [{}]", expr_vec_fmt!(partition_by))?; |
| } |
| if !order_by.is_empty() { |
| write!(f, " ORDER BY [{}]", expr_vec_fmt!(order_by))?; |
| } |
| write!( |
| f, |
| " {} BETWEEN {} AND {}", |
| window_frame.units, |
| window_frame.start_bound, |
| window_frame.end_bound |
| ) |
| } |
| } |
| } |
| Expr::AggregateFunction(AggregateFunction { func, params }) => { |
| match func.display_name(params) { |
| Ok(name) => { |
| write!(f, "{name}") |
| } |
| Err(e) => { |
| write!(f, "got error from display_name {e}") |
| } |
| } |
| } |
| Expr::Between(Between { |
| expr, |
| negated, |
| low, |
| high, |
| }) => { |
| if *negated { |
| write!(f, "{expr} NOT BETWEEN {low} AND {high}") |
| } else { |
| write!(f, "{expr} BETWEEN {low} AND {high}") |
| } |
| } |
| Expr::Like(Like { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| case_insensitive, |
| }) => { |
| write!(f, "{expr}")?; |
| let op_name = if *case_insensitive { "ILIKE" } else { "LIKE" }; |
| if *negated { |
| write!(f, " NOT")?; |
| } |
| if let Some(char) = escape_char { |
| write!(f, " {op_name} {pattern} ESCAPE '{char}'") |
| } else { |
| write!(f, " {op_name} {pattern}") |
| } |
| } |
| Expr::SimilarTo(Like { |
| negated, |
| expr, |
| pattern, |
| escape_char, |
| case_insensitive: _, |
| }) => { |
| write!(f, "{expr}")?; |
| if *negated { |
| write!(f, " NOT")?; |
| } |
| if let Some(char) = escape_char { |
| write!(f, " SIMILAR TO {pattern} ESCAPE '{char}'") |
| } else { |
| write!(f, " SIMILAR TO {pattern}") |
| } |
| } |
| Expr::InList(InList { |
| expr, |
| list, |
| negated, |
| }) => { |
| if *negated { |
| write!(f, "{expr} NOT IN ([{}])", expr_vec_fmt!(list)) |
| } else { |
| write!(f, "{expr} IN ([{}])", expr_vec_fmt!(list)) |
| } |
| } |
| #[expect(deprecated)] |
| Expr::Wildcard { qualifier, options } => match qualifier { |
| Some(qualifier) => write!(f, "{qualifier}.*{options}"), |
| None => write!(f, "*{options}"), |
| }, |
| Expr::GroupingSet(grouping_sets) => match grouping_sets { |
| GroupingSet::Rollup(exprs) => { |
| // ROLLUP (c0, c1, c2) |
| write!(f, "ROLLUP ({})", expr_vec_fmt!(exprs)) |
| } |
| GroupingSet::Cube(exprs) => { |
| // CUBE (c0, c1, c2) |
| write!(f, "CUBE ({})", expr_vec_fmt!(exprs)) |
| } |
| GroupingSet::GroupingSets(lists_of_exprs) => { |
| // GROUPING SETS ((c0), (c1, c2), (c3, c4)) |
| write!( |
| f, |
| "GROUPING SETS ({})", |
| lists_of_exprs |
| .iter() |
| .map(|exprs| format!("({})", expr_vec_fmt!(exprs))) |
| .collect::<Vec<String>>() |
| .join(", ") |
| ) |
| } |
| }, |
| Expr::Placeholder(Placeholder { id, .. }) => write!(f, "{id}"), |
| Expr::Unnest(Unnest { expr }) => { |
| write!(f, "{UNNEST_COLUMN_PREFIX}({expr})") |
| } |
| } |
| } |
| } |
| |
| fn fmt_function( |
| f: &mut Formatter, |
| fun: &str, |
| distinct: bool, |
| args: &[Expr], |
| display: bool, |
| ) -> fmt::Result { |
| let args: Vec<String> = match display { |
| true => args.iter().map(|arg| format!("{arg}")).collect(), |
| false => args.iter().map(|arg| format!("{arg:?}")).collect(), |
| }; |
| |
| let distinct_str = match distinct { |
| true => "DISTINCT ", |
| false => "", |
| }; |
| write!(f, "{}({}{})", fun, distinct_str, args.join(", ")) |
| } |
| |
| /// The name of the column (field) that this `Expr` will produce in the physical plan. |
| /// The difference from [Expr::schema_name] is that top-level columns are unqualified. |
| pub fn physical_name(expr: &Expr) -> Result<String> { |
| match expr { |
| Expr::Column(col) => Ok(col.name.clone()), |
| Expr::Alias(alias) => Ok(alias.name.clone()), |
| _ => Ok(expr.schema_name().to_string()), |
| } |
| } |
| |
| #[cfg(test)] |
| mod test { |
| use crate::expr_fn::col; |
| use crate::{ |
| ColumnarValue, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Volatility, case, |
| lit, placeholder, qualified_wildcard, wildcard, wildcard_with_options, |
| }; |
| use arrow::datatypes::{Field, Schema}; |
| use sqlparser::ast; |
| use sqlparser::ast::{Ident, IdentWithAlias}; |
| use std::any::Any; |
| |
| #[test] |
| fn infer_placeholder_in_clause() { |
| // SELECT * FROM employees WHERE department_id IN ($1, $2, $3); |
| let column = col("department_id"); |
| let param_placeholders = vec![ |
| Expr::Placeholder(Placeholder { |
| id: "$1".to_string(), |
| field: None, |
| }), |
| Expr::Placeholder(Placeholder { |
| id: "$2".to_string(), |
| field: None, |
| }), |
| Expr::Placeholder(Placeholder { |
| id: "$3".to_string(), |
| field: None, |
| }), |
| ]; |
| let in_list = Expr::InList(InList { |
| expr: Box::new(column), |
| list: param_placeholders, |
| negated: false, |
| }); |
| |
| let schema = Arc::new(Schema::new(vec![ |
| Field::new("name", DataType::Utf8, true), |
| Field::new("department_id", DataType::Int32, true), |
| ])); |
| let df_schema = DFSchema::try_from(schema).unwrap(); |
| |
| let (inferred_expr, contains_placeholder) = |
| in_list.infer_placeholder_types(&df_schema).unwrap(); |
| |
| assert!(contains_placeholder); |
| |
| match inferred_expr { |
| Expr::InList(in_list) => { |
| for expr in in_list.list { |
| match expr { |
| Expr::Placeholder(placeholder) => { |
| assert_eq!( |
| placeholder.field.unwrap().data_type(), |
| &DataType::Int32, |
| "Placeholder {} should infer Int32", |
| placeholder.id |
| ); |
| } |
| _ => panic!("Expected Placeholder expression"), |
| } |
| } |
| } |
| _ => panic!("Expected InList expression"), |
| } |
| } |
| |
| #[test] |
| fn infer_placeholder_like_and_similar_to() { |
| // name LIKE $1 |
| let schema = |
| Arc::new(Schema::new(vec![Field::new("name", DataType::Utf8, true)])); |
| let df_schema = DFSchema::try_from(schema).unwrap(); |
| |
| let like = Like { |
| expr: Box::new(col("name")), |
| pattern: Box::new(Expr::Placeholder(Placeholder { |
| id: "$1".to_string(), |
| field: None, |
| })), |
| negated: false, |
| case_insensitive: false, |
| escape_char: None, |
| }; |
| |
| let expr = Expr::Like(like.clone()); |
| |
| let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap(); |
| match inferred_expr { |
| Expr::Like(like) => match *like.pattern { |
| Expr::Placeholder(placeholder) => { |
| assert_eq!(placeholder.field.unwrap().data_type(), &DataType::Utf8); |
| } |
| _ => panic!("Expected Placeholder"), |
| }, |
| _ => panic!("Expected Like"), |
| } |
| |
| // name SIMILAR TO $1 |
| let expr = Expr::SimilarTo(like); |
| |
| let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap(); |
| match inferred_expr { |
| Expr::SimilarTo(like) => match *like.pattern { |
| Expr::Placeholder(placeholder) => { |
| assert_eq!( |
| placeholder.field.unwrap().data_type(), |
| &DataType::Utf8, |
| "Placeholder {} should infer Utf8", |
| placeholder.id |
| ); |
| } |
| _ => panic!("Expected Placeholder expression"), |
| }, |
| _ => panic!("Expected SimilarTo expression"), |
| } |
| } |
| |
| #[test] |
| fn infer_placeholder_with_metadata() { |
| // name == $1, where name is a non-nullable string |
| let schema = Arc::new(Schema::new(vec![ |
| Field::new("name", DataType::Utf8, false).with_metadata( |
| [("some_key".to_string(), "some_value".to_string())].into(), |
| ), |
| ])); |
| let df_schema = DFSchema::try_from(schema).unwrap(); |
| |
| let expr = binary_expr(col("name"), Operator::Eq, placeholder("$1")); |
| |
| let (inferred_expr, _) = expr.infer_placeholder_types(&df_schema).unwrap(); |
| match inferred_expr { |
| Expr::BinaryExpr(BinaryExpr { right, .. }) => match *right { |
| Expr::Placeholder(placeholder) => { |
| assert_eq!( |
| placeholder.field.as_ref().unwrap().data_type(), |
| &DataType::Utf8 |
| ); |
| assert_eq!( |
| placeholder.field.as_ref().unwrap().metadata(), |
| df_schema.field(0).metadata() |
| ); |
| // Inferred placeholder should still be nullable |
| assert!(placeholder.field.as_ref().unwrap().is_nullable()); |
| } |
| _ => panic!("Expected Placeholder"), |
| }, |
| _ => panic!("Expected BinaryExpr"), |
| } |
| } |
| |
| #[test] |
| fn format_case_when() -> Result<()> { |
| let expr = case(col("a")) |
| .when(lit(1), lit(true)) |
| .when(lit(0), lit(false)) |
| .otherwise(lit(ScalarValue::Null))?; |
| let expected = "CASE a WHEN Int32(1) THEN Boolean(true) WHEN Int32(0) THEN Boolean(false) ELSE NULL END"; |
| assert_eq!(expected, format!("{expr}")); |
| Ok(()) |
| } |
| |
| #[test] |
| fn format_cast() -> Result<()> { |
| let expr = Expr::Cast(Cast { |
| expr: Box::new(Expr::Literal(ScalarValue::Float32(Some(1.23)), None)), |
| data_type: DataType::Utf8, |
| }); |
| let expected_canonical = "CAST(Float32(1.23) AS Utf8)"; |
| assert_eq!(expected_canonical, format!("{expr}")); |
| // Note that CAST intentionally has a name that is different from its `Display` |
| // representation. CAST does not change the name of expressions. |
| assert_eq!("Float32(1.23)", expr.schema_name().to_string()); |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_partial_ord() { |
| // Test validates that partial ord is defined for Expr, not |
| // intended to exhaustively test all possibilities |
| let exp1 = col("a") + lit(1); |
| let exp2 = col("a") + lit(2); |
| let exp3 = !(col("a") + lit(2)); |
| |
| assert!(exp1 < exp2); |
| assert!(exp3 > exp2); |
| assert!(exp1 < exp3) |
| } |
| |
| #[test] |
| fn test_collect_expr() -> Result<()> { |
| // single column |
| { |
| let expr = &Expr::Cast(Cast::new(Box::new(col("a")), DataType::Float64)); |
| let columns = expr.column_refs(); |
| assert_eq!(1, columns.len()); |
| assert!(columns.contains(&Column::from_name("a"))); |
| } |
| |
| // multiple columns |
| { |
| let expr = col("a") + col("b") + lit(1); |
| let columns = expr.column_refs(); |
| assert_eq!(2, columns.len()); |
| assert!(columns.contains(&Column::from_name("a"))); |
| assert!(columns.contains(&Column::from_name("b"))); |
| } |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_logical_ops() { |
| assert_eq!( |
| format!("{}", lit(1u32).eq(lit(2u32))), |
| "UInt32(1) = UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).not_eq(lit(2u32))), |
| "UInt32(1) != UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).gt(lit(2u32))), |
| "UInt32(1) > UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).gt_eq(lit(2u32))), |
| "UInt32(1) >= UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).lt(lit(2u32))), |
| "UInt32(1) < UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).lt_eq(lit(2u32))), |
| "UInt32(1) <= UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).and(lit(2u32))), |
| "UInt32(1) AND UInt32(2)" |
| ); |
| assert_eq!( |
| format!("{}", lit(1u32).or(lit(2u32))), |
| "UInt32(1) OR UInt32(2)" |
| ); |
| } |
| |
| #[test] |
| fn test_is_volatile_scalar_func() { |
| // UDF |
| #[derive(Debug, PartialEq, Eq, Hash)] |
| struct TestScalarUDF { |
| signature: Signature, |
| } |
| impl ScalarUDFImpl for TestScalarUDF { |
| fn as_any(&self) -> &dyn Any { |
| self |
| } |
| fn name(&self) -> &str { |
| "TestScalarUDF" |
| } |
| |
| fn signature(&self) -> &Signature { |
| &self.signature |
| } |
| |
| fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { |
| Ok(DataType::Utf8) |
| } |
| |
| fn invoke_with_args( |
| &self, |
| _args: ScalarFunctionArgs, |
| ) -> Result<ColumnarValue> { |
| Ok(ColumnarValue::Scalar(ScalarValue::from("a"))) |
| } |
| } |
| let udf = Arc::new(ScalarUDF::from(TestScalarUDF { |
| signature: Signature::uniform(1, vec![DataType::Float32], Volatility::Stable), |
| })); |
| assert_ne!(udf.signature().volatility, Volatility::Volatile); |
| |
| let udf = Arc::new(ScalarUDF::from(TestScalarUDF { |
| signature: Signature::uniform( |
| 1, |
| vec![DataType::Float32], |
| Volatility::Volatile, |
| ), |
| })); |
| assert_eq!(udf.signature().volatility, Volatility::Volatile); |
| } |
| |
| use super::*; |
| use crate::logical_plan::{EmptyRelation, LogicalPlan}; |
| |
| #[test] |
| fn test_display_wildcard() { |
| assert_eq!(format!("{}", wildcard()), "*"); |
| assert_eq!(format!("{}", qualified_wildcard("t1")), "t1.*"); |
| assert_eq!( |
| format!( |
| "{}", |
| wildcard_with_options(wildcard_options( |
| Some(IlikeSelectItem { |
| pattern: "c1".to_string() |
| }), |
| None, |
| None, |
| None, |
| None |
| )) |
| ), |
| "* ILIKE 'c1'" |
| ); |
| assert_eq!( |
| format!( |
| "{}", |
| wildcard_with_options(wildcard_options( |
| None, |
| Some(ExcludeSelectItem::Multiple(vec![ |
| Ident::from("c1"), |
| Ident::from("c2") |
| ])), |
| None, |
| None, |
| None |
| )) |
| ), |
| "* EXCLUDE (c1, c2)" |
| ); |
| assert_eq!( |
| format!( |
| "{}", |
| wildcard_with_options(wildcard_options( |
| None, |
| None, |
| Some(ExceptSelectItem { |
| first_element: Ident::from("c1"), |
| additional_elements: vec![Ident::from("c2")] |
| }), |
| None, |
| None |
| )) |
| ), |
| "* EXCEPT (c1, c2)" |
| ); |
| assert_eq!( |
| format!( |
| "{}", |
| wildcard_with_options(wildcard_options( |
| None, |
| None, |
| None, |
| Some(PlannedReplaceSelectItem { |
| items: vec![ReplaceSelectElement { |
| expr: ast::Expr::Identifier(Ident::from("c1")), |
| column_name: Ident::from("a1"), |
| as_keyword: false |
| }], |
| planned_expressions: vec![] |
| }), |
| None |
| )) |
| ), |
| "* REPLACE (c1 a1)" |
| ); |
| assert_eq!( |
| format!( |
| "{}", |
| wildcard_with_options(wildcard_options( |
| None, |
| None, |
| None, |
| None, |
| Some(RenameSelectItem::Multiple(vec![IdentWithAlias { |
| ident: Ident::from("c1"), |
| alias: Ident::from("a1") |
| }])) |
| )) |
| ), |
| "* RENAME (c1 AS a1)" |
| ) |
| } |
| |
| #[test] |
| fn test_display_set_comparison() { |
| let subquery = Subquery { |
| subquery: Arc::new(LogicalPlan::EmptyRelation(EmptyRelation { |
| produce_one_row: false, |
| schema: Arc::new(DFSchema::empty()), |
| })), |
| outer_ref_columns: vec![], |
| spans: Spans::new(), |
| }; |
| |
| let expr = Expr::SetComparison(SetComparison::new( |
| Box::new(Expr::Column(Column::from_name("a"))), |
| subquery, |
| Operator::Gt, |
| SetQuantifier::Any, |
| )); |
| |
| assert_eq!(format!("{expr}"), "a > ANY (<subquery>)"); |
| assert_eq!(format!("{}", expr.human_display()), "a > ANY (<subquery>)"); |
| } |
| |
| #[test] |
| fn test_schema_display_alias_with_relation() { |
| assert_eq!( |
| format!( |
| "{}", |
| SchemaDisplay( |
| &lit(1).alias_qualified("table_name".into(), "column_name") |
| ) |
| ), |
| "table_name.column_name" |
| ); |
| } |
| |
| #[test] |
| fn test_schema_display_alias_without_relation() { |
| assert_eq!( |
| format!( |
| "{}", |
| SchemaDisplay(&lit(1).alias_qualified(None::<&str>, "column_name")) |
| ), |
| "column_name" |
| ); |
| } |
| |
| fn wildcard_options( |
| opt_ilike: Option<IlikeSelectItem>, |
| opt_exclude: Option<ExcludeSelectItem>, |
| opt_except: Option<ExceptSelectItem>, |
| opt_replace: Option<PlannedReplaceSelectItem>, |
| opt_rename: Option<RenameSelectItem>, |
| ) -> WildcardOptions { |
| WildcardOptions { |
| ilike: opt_ilike, |
| exclude: opt_exclude, |
| except: opt_except, |
| replace: opt_replace, |
| rename: opt_rename, |
| } |
| } |
| |
| #[test] |
| fn test_size_of_expr() { |
| // because Expr is such a widely used struct in DataFusion |
| // it is important to keep its size as small as possible |
| // |
| // If this test fails when you change `Expr`, please try |
| // `Box`ing the fields to make `Expr` smaller |
| // See https://github.com/apache/datafusion/issues/16199 for details |
| assert_eq!(size_of::<Expr>(), 112); |
| assert_eq!(size_of::<ScalarValue>(), 64); |
| assert_eq!(size_of::<DataType>(), 24); // 3 ptrs |
| assert_eq!(size_of::<Vec<Expr>>(), 24); |
| assert_eq!(size_of::<Arc<Expr>>(), 8); |
| } |
| |
| #[test] |
| fn test_accept_exprs() { |
| fn accept_exprs<E: AsRef<Expr>>(_: &[E]) {} |
| |
| let expr = || -> Expr { lit(1) }; |
| |
| // Call accept_exprs with owned expressions |
| let owned_exprs = vec![expr(), expr()]; |
| accept_exprs(&owned_exprs); |
| |
| // Call accept_exprs with expressions from expr tree |
| let udf = Expr::ScalarFunction(ScalarFunction { |
| func: Arc::new(ScalarUDF::new_from_impl(TestUDF {})), |
| args: vec![expr(), expr()], |
| }); |
| let Expr::ScalarFunction(scalar) = &udf else { |
| unreachable!() |
| }; |
| accept_exprs(&scalar.args); |
| |
| // Call accept_exprs with expressions collected from expr tree, without cloning |
| let mut collected_refs: Vec<&Expr> = scalar.args.iter().collect(); |
| collected_refs.extend(&owned_exprs); |
| accept_exprs(&collected_refs); |
| |
| // test helpers |
| #[derive(Debug, PartialEq, Eq, Hash)] |
| struct TestUDF {} |
| impl ScalarUDFImpl for TestUDF { |
| fn as_any(&self) -> &dyn Any { |
| unimplemented!() |
| } |
| |
| fn name(&self) -> &str { |
| unimplemented!() |
| } |
| |
| fn signature(&self) -> &Signature { |
| unimplemented!() |
| } |
| |
| fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> { |
| unimplemented!() |
| } |
| |
| fn invoke_with_args( |
| &self, |
| _args: ScalarFunctionArgs, |
| ) -> Result<ColumnarValue> { |
| unimplemented!() |
| } |
| } |
| } |
| } |