blob: e23e5accae3978834f8f7afa6eac726c24273e92 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use arrow::datatypes::{DataType, Field, Schema};
use datafusion::{
assert_batches_eq,
error::Result,
prelude::{ParquetReadOptions, SessionContext},
};
use datafusion_common::DFSchema;
use datafusion_expr::{col, lit};
use datafusion_sql::unparser::Unparser;
/// This example demonstrates the programmatic parsing of SQL expressions using
/// the DataFusion [`SessionContext::parse_sql_expr`] API or the [`DataFrame::parse_sql_expr`] API.
///
///
/// The code in this example shows how to:
///
/// 1. [`simple_session_context_parse_sql_expr_demo`]: Parse a simple SQL text into a logical
/// expression using a schema at [`SessionContext`].
///
/// 2. [`simple_dataframe_parse_sql_expr_demo`]: Parse a simple SQL text into a logical expression
/// using a schema at [`DataFrame`].
///
/// 3. [`query_parquet_demo`]: Query a parquet file using the parsed_sql_expr from a DataFrame.
///
/// 4. [`round_trip_parse_sql_expr_demo`]: Parse a SQL text and convert it back to SQL using [`Unparser`].
#[tokio::main]
async fn main() -> Result<()> {
// See how to evaluate expressions
simple_session_context_parse_sql_expr_demo()?;
simple_dataframe_parse_sql_expr_demo().await?;
query_parquet_demo().await?;
round_trip_parse_sql_expr_demo().await?;
Ok(())
}
/// DataFusion can parse a SQL text to a logical expression against a schema at [`SessionContext`].
fn simple_session_context_parse_sql_expr_demo() -> Result<()> {
let sql = "a < 5 OR a = 8";
let expr = col("a").lt(lit(5_i64)).or(col("a").eq(lit(8_i64)));
// provide type information that `a` is an Int32
let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
let df_schema = DFSchema::try_from(schema).unwrap();
let ctx = SessionContext::new();
let parsed_expr = ctx.parse_sql_expr(sql, &df_schema)?;
assert_eq!(parsed_expr, expr);
Ok(())
}
/// DataFusion can parse a SQL text to an logical expression using schema at [`DataFrame`].
async fn simple_dataframe_parse_sql_expr_demo() -> Result<()> {
let sql = "int_col < 5 OR double_col = 8.0";
let expr = col("int_col")
.lt(lit(5_i64))
.or(col("double_col").eq(lit(8.0_f64)));
let ctx = SessionContext::new();
let testdata = datafusion::test_util::parquet_test_data();
let df = ctx
.read_parquet(
&format!("{testdata}/alltypes_plain.parquet"),
ParquetReadOptions::default(),
)
.await?;
let parsed_expr = df.parse_sql_expr(sql)?;
assert_eq!(parsed_expr, expr);
Ok(())
}
async fn query_parquet_demo() -> Result<()> {
let ctx = SessionContext::new();
let testdata = datafusion::test_util::parquet_test_data();
let df = ctx
.read_parquet(
&format!("{testdata}/alltypes_plain.parquet"),
ParquetReadOptions::default(),
)
.await?;
let df = df
.clone()
.select(vec![
df.parse_sql_expr("int_col")?,
df.parse_sql_expr("double_col")?,
])?
.filter(df.parse_sql_expr("int_col < 5 OR double_col = 8.0")?)?
.aggregate(
vec![df.parse_sql_expr("double_col")?],
vec![df.parse_sql_expr("SUM(int_col) as sum_int_col")?],
)?
// Directly parsing the SQL text into a sort expression is not supported yet, so
// construct it programmatically
.sort(vec![col("double_col").sort(false, false)])?
.limit(0, Some(1))?;
let result = df.collect().await?;
assert_batches_eq!(
&[
"+------------+----------------------+",
"| double_col | sum(?table?.int_col) |",
"+------------+----------------------+",
"| 10.1 | 4 |",
"+------------+----------------------+",
],
&result
);
Ok(())
}
/// DataFusion can parse a SQL text and convert it back to SQL using [`Unparser`].
async fn round_trip_parse_sql_expr_demo() -> Result<()> {
let sql = "((int_col < 5) OR (double_col = 8))";
let ctx = SessionContext::new();
let testdata = datafusion::test_util::parquet_test_data();
let df = ctx
.read_parquet(
&format!("{testdata}/alltypes_plain.parquet"),
ParquetReadOptions::default(),
)
.await?;
let parsed_expr = df.parse_sql_expr(sql)?;
let unparser = Unparser::default();
let round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();
assert_eq!(sql, round_trip_sql);
// enable pretty-unparsing. This make the output more human-readable
// but can be problematic when passed to other SQL engines due to
// difference in precedence rules between DataFusion and target engines.
let unparser = Unparser::default().with_pretty(true);
let pretty = "int_col < 5 OR double_col = 8";
let pretty_round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();
assert_eq!(pretty, pretty_round_trip_sql);
Ok(())
}