datafusion-examples/examples/query_planning/parse_sql_expr.rs - datafusion - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 //! See `main.rs` for how to run it.

 use arrow::datatypes::{DataType, Field, Schema};
 use datafusion::common::DFSchema;
 use datafusion::common::ScalarValue;
 use datafusion::logical_expr::{col, lit};
 use datafusion::sql::unparser::Unparser;
 use datafusion::{
     assert_batches_eq,
     error::Result,
     prelude::{ParquetReadOptions, SessionContext},
 };
 use datafusion_examples::utils::{datasets::ExampleDataset, write_csv_to_parquet};

 /// This example demonstrates the programmatic parsing of SQL expressions using
 /// the DataFusion [`SessionContext::parse_sql_expr`] API or the [`DataFrame::parse_sql_expr`] API.
 ///
 ///
 /// The code in this example shows how to:
 ///
 /// 1. [`simple_session_context_parse_sql_expr_demo`]: Parse a simple SQL text into a logical
 ///    expression using a schema at [`SessionContext`].
 ///
 /// 2. [`simple_dataframe_parse_sql_expr_demo`]: Parse a simple SQL text into a logical expression
 ///    using a schema at [`DataFrame`].
 ///
 /// 3. [`query_parquet_demo`]: Query a parquet file using the parsed_sql_expr from a DataFrame.
 ///
 /// 4. [`round_trip_parse_sql_expr_demo`]: Parse a SQL text and convert it back to SQL using [`Unparser`].
 pub async fn parse_sql_expr() -> Result<()> {
     // See how to evaluate expressions
     simple_session_context_parse_sql_expr_demo()?;
     simple_dataframe_parse_sql_expr_demo().await?;
     query_parquet_demo().await?;
     round_trip_parse_sql_expr_demo().await?;
     Ok(())
 }

 /// DataFusion can parse a SQL text to a logical expression against a schema at [`SessionContext`].
 fn simple_session_context_parse_sql_expr_demo() -> Result<()> {
     let sql = "a < 5 OR a = 8";
     let expr = col("a").lt(lit(5_i64)).or(col("a").eq(lit(8_i64)));

     // provide type information that `a` is an Int32
     let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
     let df_schema = DFSchema::try_from(schema).unwrap();
     let ctx = SessionContext::new();

     let parsed_expr = ctx.parse_sql_expr(sql, &df_schema)?;

     assert_eq!(parsed_expr, expr);

     Ok(())
 }

 /// DataFusion can parse a SQL text to an logical expression using schema at [`DataFrame`].
 async fn simple_dataframe_parse_sql_expr_demo() -> Result<()> {
     let sql = "car = 'red' OR speed > 1.0";
     let expr = col("car")
         .eq(lit(ScalarValue::Utf8(Some("red".to_string()))))
         .or(col("speed").gt(lit(1.0_f64)));

     let ctx = SessionContext::new();

     // Convert the CSV input into a temporary Parquet directory for querying
     let dataset = ExampleDataset::Cars;
     let parquet_temp = write_csv_to_parquet(&ctx, &dataset.path()).await?;

     let df = ctx
         .read_parquet(parquet_temp.path_str()?, ParquetReadOptions::default())
         .await?;

     let parsed_expr = df.parse_sql_expr(sql)?;

     assert_eq!(parsed_expr, expr);

     Ok(())
 }

 async fn query_parquet_demo() -> Result<()> {
     let ctx = SessionContext::new();

     // Convert the CSV input into a temporary Parquet directory for querying
     let dataset = ExampleDataset::Cars;
     let parquet_temp = write_csv_to_parquet(&ctx, &dataset.path()).await?;

     let df = ctx
         .read_parquet(parquet_temp.path_str()?, ParquetReadOptions::default())
         .await?;

     let df = df
         .clone()
         .select(vec![df.parse_sql_expr("car")?, df.parse_sql_expr("speed")?])?
         .filter(df.parse_sql_expr("car = 'red' OR speed > 1.0")?)?
         .aggregate(
             vec![df.parse_sql_expr("car")?],
             vec![df.parse_sql_expr("SUM(speed) as sum_speed")?],
         )?
         // Directly parsing the SQL text into a sort expression is not supported yet, so
         // construct it programmatically
         .sort(vec![col("car").sort(false, false)])?
         .limit(0, Some(1))?;

     let result = df.collect().await?;

     assert_batches_eq!(
         &[
             "+-----+--------------------+",
             "| car | sum_speed          |",
             "+-----+--------------------+",
             "| red | 162.49999999999997 |",
             "+-----+--------------------+"
         ],
         &result
     );

     Ok(())
 }

 /// DataFusion can parse a SQL text and convert it back to SQL using [`Unparser`].
 async fn round_trip_parse_sql_expr_demo() -> Result<()> {
     let sql = "((car = 'red') OR (speed > 1.0))";

     let ctx = SessionContext::new();

     // Convert the CSV input into a temporary Parquet directory for querying
     let dataset = ExampleDataset::Cars;
     let parquet_temp = write_csv_to_parquet(&ctx, &dataset.path()).await?;

     let df = ctx
         .read_parquet(parquet_temp.path_str()?, ParquetReadOptions::default())
         .await?;

     let parsed_expr = df.parse_sql_expr(sql)?;

     let unparser = Unparser::default();
     let round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();

     assert_eq!(sql, round_trip_sql);

     // enable pretty-unparsing. This make the output more human-readable
     // but can be problematic when passed to other SQL engines due to
     // difference in precedence rules between DataFusion and target engines.
     let unparser = Unparser::default().with_pretty(true);

     let pretty = "car = 'red' OR speed > 1.0";
     let pretty_round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();
     assert_eq!(pretty, pretty_round_trip_sql);

     Ok(())
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	//! See `main.rs` for how to run it.

	use arrow::datatypes::{DataType, Field, Schema};
	use datafusion::common::DFSchema;
	use datafusion::common::ScalarValue;
	use datafusion::logical_expr::{col, lit};
	use datafusion::sql::unparser::Unparser;
	use datafusion::{
	assert_batches_eq,
	error::Result,
	prelude::{ParquetReadOptions, SessionContext},
	};
	use datafusion_examples::utils::{datasets::ExampleDataset, write_csv_to_parquet};

	/// This example demonstrates the programmatic parsing of SQL expressions using
	/// the DataFusion [`SessionContext::parse_sql_expr`] API or the [`DataFrame::parse_sql_expr`] API.
	///
	///
	/// The code in this example shows how to:
	///
	/// 1. [`simple_session_context_parse_sql_expr_demo`]: Parse a simple SQL text into a logical
	/// expression using a schema at [`SessionContext`].
	///
	/// 2. [`simple_dataframe_parse_sql_expr_demo`]: Parse a simple SQL text into a logical expression
	/// using a schema at [`DataFrame`].
	///
	/// 3. [`query_parquet_demo`]: Query a parquet file using the parsed_sql_expr from a DataFrame.
	///
	/// 4. [`round_trip_parse_sql_expr_demo`]: Parse a SQL text and convert it back to SQL using [`Unparser`].
	pub async fn parse_sql_expr() -> Result<()> {
	// See how to evaluate expressions
	simple_session_context_parse_sql_expr_demo()?;
	simple_dataframe_parse_sql_expr_demo().await?;
	query_parquet_demo().await?;
	round_trip_parse_sql_expr_demo().await?;
	Ok(())
	}

	/// DataFusion can parse a SQL text to a logical expression against a schema at [`SessionContext`].
	fn simple_session_context_parse_sql_expr_demo() -> Result<()> {
	let sql = "a < 5 OR a = 8";
	let expr = col("a").lt(lit(5_i64)).or(col("a").eq(lit(8_i64)));

	// provide type information that `a` is an Int32
	let schema = Schema::new(vec![Field::new("a", DataType::Int32, true)]);
	let df_schema = DFSchema::try_from(schema).unwrap();
	let ctx = SessionContext::new();

	let parsed_expr = ctx.parse_sql_expr(sql, &df_schema)?;

	assert_eq!(parsed_expr, expr);

	Ok(())
	}

	/// DataFusion can parse a SQL text to an logical expression using schema at [`DataFrame`].
	async fn simple_dataframe_parse_sql_expr_demo() -> Result<()> {
	let sql = "car = 'red' OR speed > 1.0";
	let expr = col("car")
	.eq(lit(ScalarValue::Utf8(Some("red".to_string()))))
	.or(col("speed").gt(lit(1.0_f64)));

	let ctx = SessionContext::new();

	// Convert the CSV input into a temporary Parquet directory for querying
	let dataset = ExampleDataset::Cars;
	let parquet_temp = write_csv_to_parquet(&ctx, &dataset.path()).await?;

	let df = ctx
	.read_parquet(parquet_temp.path_str()?, ParquetReadOptions::default())
	.await?;

	let parsed_expr = df.parse_sql_expr(sql)?;

	assert_eq!(parsed_expr, expr);

	Ok(())
	}

	async fn query_parquet_demo() -> Result<()> {
	let ctx = SessionContext::new();

	// Convert the CSV input into a temporary Parquet directory for querying
	let dataset = ExampleDataset::Cars;
	let parquet_temp = write_csv_to_parquet(&ctx, &dataset.path()).await?;

	let df = ctx
	.read_parquet(parquet_temp.path_str()?, ParquetReadOptions::default())
	.await?;

	let df = df
	.clone()
	.select(vec![df.parse_sql_expr("car")?, df.parse_sql_expr("speed")?])?
	.filter(df.parse_sql_expr("car = 'red' OR speed > 1.0")?)?
	.aggregate(
	vec![df.parse_sql_expr("car")?],
	vec![df.parse_sql_expr("SUM(speed) as sum_speed")?],
	)?
	// Directly parsing the SQL text into a sort expression is not supported yet, so
	// construct it programmatically
	.sort(vec![col("car").sort(false, false)])?
	.limit(0, Some(1))?;

	let result = df.collect().await?;

	assert_batches_eq!(
	&[
	"+-----+--------------------+",
	"\| car \| sum_speed \|",
	"+-----+--------------------+",
	"\| red \| 162.49999999999997 \|",
	"+-----+--------------------+"
	],
	&result
	);

	Ok(())
	}

	/// DataFusion can parse a SQL text and convert it back to SQL using [`Unparser`].
	async fn round_trip_parse_sql_expr_demo() -> Result<()> {
	let sql = "((car = 'red') OR (speed > 1.0))";

	let ctx = SessionContext::new();

	// Convert the CSV input into a temporary Parquet directory for querying
	let dataset = ExampleDataset::Cars;
	let parquet_temp = write_csv_to_parquet(&ctx, &dataset.path()).await?;

	let df = ctx
	.read_parquet(parquet_temp.path_str()?, ParquetReadOptions::default())
	.await?;

	let parsed_expr = df.parse_sql_expr(sql)?;

	let unparser = Unparser::default();
	let round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();

	assert_eq!(sql, round_trip_sql);

	// enable pretty-unparsing. This make the output more human-readable
	// but can be problematic when passed to other SQL engines due to
	// difference in precedence rules between DataFusion and target engines.
	let unparser = Unparser::default().with_pretty(true);

	let pretty = "car = 'red' OR speed > 1.0";
	let pretty_round_trip_sql = unparser.expr_to_sql(&parsed_expr)?.to_string();
	assert_eq!(pretty, pretty_round_trip_sql);

	Ok(())
	}