| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use std::sync::Arc; |
| |
| extern crate arrow; |
| extern crate datafusion; |
| |
| use arrow::array::{BinaryArray, Float64Array}; |
| use arrow::datatypes::{DataType, Field, Schema}; |
| |
| use datafusion::execution::context::ExecutionContext; |
| |
| /// This example demonstrates executing a simple query against an Arrow data source (CSV) and |
| /// fetching results |
| fn main() { |
| // create local execution context |
| let mut ctx = ExecutionContext::new(); |
| |
| // define schema for data source (csv file) |
| let schema = Arc::new(Schema::new(vec![ |
| Field::new("c1", DataType::Utf8, false), |
| Field::new("c2", DataType::UInt32, false), |
| Field::new("c3", DataType::Int8, false), |
| Field::new("c4", DataType::Int16, false), |
| Field::new("c5", DataType::Int32, false), |
| Field::new("c6", DataType::Int64, false), |
| Field::new("c7", DataType::UInt8, false), |
| Field::new("c8", DataType::UInt16, false), |
| Field::new("c9", DataType::UInt32, false), |
| Field::new("c10", DataType::UInt64, false), |
| Field::new("c11", DataType::Float32, false), |
| Field::new("c12", DataType::Float64, false), |
| Field::new("c13", DataType::Utf8, false), |
| ])); |
| |
| let testdata = |
| ::std::env::var("ARROW_TEST_DATA").expect("ARROW_TEST_DATA not defined"); |
| |
| // register csv file with the execution context |
| ctx.register_csv( |
| "aggregate_test_100", |
| &format!("{}/csv/aggregate_test_100.csv", testdata), |
| &schema, |
| true, |
| ); |
| |
| // simple projection and selection |
| let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 WHERE c11 > 0.1 AND c11 < 0.9 GROUP BY c1"; |
| |
| // execute the query |
| let relation = ctx.sql(&sql, 1024 * 1024).unwrap(); |
| |
| // display the relation |
| let mut results = relation.borrow_mut(); |
| |
| while let Some(batch) = results.next().unwrap() { |
| println!( |
| "RecordBatch has {} rows and {} columns", |
| batch.num_rows(), |
| batch.num_columns() |
| ); |
| |
| let c1 = batch |
| .column(0) |
| .as_any() |
| .downcast_ref::<BinaryArray>() |
| .unwrap(); |
| |
| let min = batch |
| .column(1) |
| .as_any() |
| .downcast_ref::<Float64Array>() |
| .unwrap(); |
| |
| let max = batch |
| .column(2) |
| .as_any() |
| .downcast_ref::<Float64Array>() |
| .unwrap(); |
| |
| for i in 0..batch.num_rows() { |
| let c1_value: String = String::from_utf8(c1.value(i).to_vec()).unwrap(); |
| |
| println!("{}, Min: {}, Max: {}", c1_value, min.value(i), max.value(i),); |
| } |
| } |
| } |