In this example some simple processing is performed on the example.csv file.
Cargo.tomlAdd the following to your Cargo.toml file:
datafusion = "11.0" tokio = "1.0"
use datafusion::prelude::*; #[tokio::main] async fn main() -> datafusion::error::Result<()> { // register the table let ctx = SessionContext::new(); ctx.register_csv("example", "tests/data/example.csv", CsvReadOptions::new()).await?; // create a plan to run a SQL query let df = ctx.sql("SELECT a, MIN(b) FROM example WHERE a <= b GROUP BY a LIMIT 100").await?; // execute and print results df.show().await?; Ok(()) }
use datafusion::prelude::*; #[tokio::main] async fn main() -> datafusion::error::Result<()> { // create the dataframe let ctx = SessionContext::new(); let df = ctx.read_csv("tests/data/example.csv", CsvReadOptions::new()).await?; let df = df.filter(col("a").lt_eq(col("b")))? .aggregate(vec![col("a")], vec![min(col("b"))])? .limit(0, Some(100))?; // execute and print results df.show().await?; Ok(()) }
+---+--------+ | a | MIN(b) | +---+--------+ | 1 | 2 | +---+--------+
Please be aware that all identifiers are effectively made lower-case in SQL, so if your csv file has capital letters (ex: Name) you must put your column name in double quotes or the examples won't work.
To illustrate this behavior, consider the capitalized_example.csv file:
use datafusion::prelude::*; #[tokio::main] async fn main() -> datafusion::error::Result<()> { // register the table let ctx = SessionContext::new(); ctx.register_csv("example", "tests/data/capitalized_example.csv", CsvReadOptions::new()).await?; // create a plan to run a SQL query let df = ctx.sql("SELECT \"A\", MIN(b) FROM example WHERE \"A\" <= c GROUP BY \"A\" LIMIT 100").await?; // execute and print results df.show().await?; Ok(()) }
use datafusion::prelude::*; #[tokio::main] async fn main() -> datafusion::error::Result<()> { // create the dataframe let ctx = SessionContext::new(); let df = ctx.read_csv("tests/data/capitalized_example.csv", CsvReadOptions::new()).await?; let df = df.filter(col("A").lt_eq(col("c")))? .aggregate(vec![col("A")], vec![min(col("b"))])? .limit(0, Some(100))?; // execute and print results df.show().await?; Ok(()) }
+---+--------+ | A | MIN(b) | +---+--------+ | 2 | 1 | | 1 | 2 | +---+--------+