Skip to content

Instantly share code, notes, and snippets.

@ChengzhiZhao
Last active July 24, 2020 01:49
Show Gist options
  • Save ChengzhiZhao/3088137679ffe5ac920784955894a8e7 to your computer and use it in GitHub Desktop.
Save ChengzhiZhao/3088137679ffe5ac920784955894a8e7 to your computer and use it in GitHub Desktop.
DataFusion First Select Query
use std::sync::Arc;
extern crate arrow;
extern crate datafusion;
use arrow::array::{Int32Array, Float64Array};
use arrow::datatypes::{DataType, Field, Schema};
use std::time::{Duration, Instant};
use datafusion::execution::context::ExecutionContext;
/// This example demonstrates executing a simple query against an Arrow data source (CSV) and
/// fetching results
fn main() {
// create local execution context
let start = Instant::now();
let mut ctx = ExecutionContext::new();
// define schema for data source (csv file)
let schema = Arc::new(Schema::new(vec![
Field::new("userId", DataType::Int32, false),
Field::new("movieId", DataType::Int32, false),
Field::new("rating", DataType::Float64, false)
]));
// register csv file with the execution context
ctx.register_csv(
"ratings",
"/datafusion_test/ratings.csv",
&schema,
true,
);
// simple projection and selection
let sql = "SELECT userId,movieId,rating FROM ratings LIMIT 10";
// execute the query
let relation = ctx.sql(&sql, 1024 * 1024).unwrap();
// display the relation
let mut results = relation.borrow_mut();
while let Some(batch) = results.next().unwrap() {
println!(
"RecordBatch has {} rows and {} columns",
batch.num_rows(),
batch.num_columns()
);
let user_ids = batch
.column(0)
.as_any()
.downcast_ref::<Int32Array>()
.unwrap();
let movie_ids = batch
.column(1)
.as_any()
.downcast_ref::<Int32Array>()
.unwrap();
let ratings = batch
.column(2)
.as_any()
.downcast_ref::<Float64Array>()
.unwrap();
for i in 0..batch.num_rows() {
println!("{}, {}, {}", user_ids.value(i), movie_ids.value(i), ratings.value(i));
}
let duration = start.elapsed();
println!("Time elapsed in SQL() is: {:?}", duration);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment