rust/datafusion/examples/csv_sql.rs - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 use std::sync::Arc;

 extern crate arrow;
 extern crate datafusion;

 use arrow::array::{BinaryArray, Float64Array};
 use arrow::datatypes::{DataType, Field, Schema};

 use datafusion::execution::context::ExecutionContext;

 /// This example demonstrates executing a simple query against an Arrow data source (CSV) and
 /// fetching results
 fn main() {
     // create local execution context
     let mut ctx = ExecutionContext::new();

     // define schema for data source (csv file)
     let schema = Arc::new(Schema::new(vec![
         Field::new("c1", DataType::Utf8, false),
         Field::new("c2", DataType::UInt32, false),
         Field::new("c3", DataType::Int8, false),
         Field::new("c4", DataType::Int16, false),
         Field::new("c5", DataType::Int32, false),
         Field::new("c6", DataType::Int64, false),
         Field::new("c7", DataType::UInt8, false),
         Field::new("c8", DataType::UInt16, false),
         Field::new("c9", DataType::UInt32, false),
         Field::new("c10", DataType::UInt64, false),
         Field::new("c11", DataType::Float32, false),
         Field::new("c12", DataType::Float64, false),
         Field::new("c13", DataType::Utf8, false),
     ]));

     let testdata =
         ::std::env::var("ARROW_TEST_DATA").expect("ARROW_TEST_DATA not defined");

     // register csv file with the execution context
     ctx.register_csv(
         "aggregate_test_100",
         &format!("{}/csv/aggregate_test_100.csv", testdata),
         &schema,
         true,
     );

     // simple projection and selection
     let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 WHERE c11 > 0.1 AND c11 < 0.9 GROUP BY c1";

     // execute the query
     let relation = ctx.sql(&sql, 1024 * 1024).unwrap();

     // display the relation
     let mut results = relation.borrow_mut();

     while let Some(batch) = results.next().unwrap() {
         println!(
             "RecordBatch has {} rows and {} columns",
             batch.num_rows(),
             batch.num_columns()
         );

         let c1 = batch
             .column(0)
             .as_any()
             .downcast_ref::<BinaryArray>()
             .unwrap();

         let min = batch
             .column(1)
             .as_any()
             .downcast_ref::<Float64Array>()
             .unwrap();

         let max = batch
             .column(2)
             .as_any()
             .downcast_ref::<Float64Array>()
             .unwrap();

         for i in 0..batch.num_rows() {
             let c1_value: String = String::from_utf8(c1.value(i).to_vec()).unwrap();

             println!("{}, Min: {}, Max: {}", c1_value, min.value(i), max.value(i),);
         }
     }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	use std::sync::Arc;

	extern crate arrow;
	extern crate datafusion;

	use arrow::array::{BinaryArray, Float64Array};
	use arrow::datatypes::{DataType, Field, Schema};

	use datafusion::execution::context::ExecutionContext;

	/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
	/// fetching results
	fn main() {
	// create local execution context
	let mut ctx = ExecutionContext::new();

	// define schema for data source (csv file)
	let schema = Arc::new(Schema::new(vec![
	Field::new("c1", DataType::Utf8, false),
	Field::new("c2", DataType::UInt32, false),
	Field::new("c3", DataType::Int8, false),
	Field::new("c4", DataType::Int16, false),
	Field::new("c5", DataType::Int32, false),
	Field::new("c6", DataType::Int64, false),
	Field::new("c7", DataType::UInt8, false),
	Field::new("c8", DataType::UInt16, false),
	Field::new("c9", DataType::UInt32, false),
	Field::new("c10", DataType::UInt64, false),
	Field::new("c11", DataType::Float32, false),
	Field::new("c12", DataType::Float64, false),
	Field::new("c13", DataType::Utf8, false),
	]));

	let testdata =
	::std::env::var("ARROW_TEST_DATA").expect("ARROW_TEST_DATA not defined");

	// register csv file with the execution context
	ctx.register_csv(
	"aggregate_test_100",
	&format!("{}/csv/aggregate_test_100.csv", testdata),
	&schema,
	true,
	);

	// simple projection and selection
	let sql = "SELECT c1, MIN(c12), MAX(c12) FROM aggregate_test_100 WHERE c11 > 0.1 AND c11 < 0.9 GROUP BY c1";

	// execute the query
	let relation = ctx.sql(&sql, 1024 * 1024).unwrap();

	// display the relation
	let mut results = relation.borrow_mut();

	while let Some(batch) = results.next().unwrap() {
	println!(
	"RecordBatch has {} rows and {} columns",
	batch.num_rows(),
	batch.num_columns()
	);

	let c1 = batch
	.column(0)
	.as_any()
	.downcast_ref::<BinaryArray>()
	.unwrap();

	let min = batch
	.column(1)
	.as_any()
	.downcast_ref::<Float64Array>()
	.unwrap();

	let max = batch
	.column(2)
	.as_any()
	.downcast_ref::<Float64Array>()
	.unwrap();

	for i in 0..batch.num_rows() {
	let c1_value: String = String::from_utf8(c1.value(i).to_vec()).unwrap();

	println!("{}, Min: {}, Max: {}", c1_value, min.value(i), max.value(i),);
	}
	}
	}