blob: 069ed39d1039384e88f94f68938e88565751710a [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#[macro_use]
extern crate criterion;
use criterion::{Criterion, Throughput};
extern crate arrow;
extern crate parquet;
use std::sync::Arc;
use arrow::datatypes::*;
use arrow::{record_batch::RecordBatch, util::data_gen::*};
use parquet::{
arrow::ArrowWriter, errors::Result, file::writer::InMemoryWriteableCursor,
};
fn create_primitive_bench_batch(
size: usize,
null_density: f32,
true_density: f32,
) -> Result<RecordBatch> {
let fields = vec![
Field::new("_1", DataType::Int8, true),
Field::new("_2", DataType::Int16, true),
Field::new("_3", DataType::Int32, true),
Field::new("_4", DataType::Int64, true),
Field::new("_5", DataType::UInt8, true),
Field::new("_6", DataType::UInt16, true),
Field::new("_7", DataType::UInt32, true),
Field::new("_8", DataType::UInt64, true),
Field::new("_9", DataType::Float32, true),
Field::new("_10", DataType::Float64, true),
Field::new("_11", DataType::Date32, true),
Field::new("_12", DataType::Date64, true),
Field::new("_13", DataType::Time32(TimeUnit::Second), true),
Field::new("_14", DataType::Time32(TimeUnit::Millisecond), true),
Field::new("_15", DataType::Time64(TimeUnit::Microsecond), true),
Field::new("_16", DataType::Time64(TimeUnit::Nanosecond), true),
Field::new("_17", DataType::Utf8, true),
Field::new("_18", DataType::LargeUtf8, true),
Field::new("_19", DataType::Boolean, true),
];
let schema = Schema::new(fields);
Ok(create_random_batch(
Arc::new(schema),
size,
null_density,
true_density,
)?)
}
fn _create_nested_bench_batch(
size: usize,
null_density: f32,
true_density: f32,
) -> Result<RecordBatch> {
let fields = vec![
Field::new(
"_1",
DataType::Struct(vec![
Field::new("_1", DataType::Int8, true),
Field::new(
"_2",
DataType::Struct(vec![
Field::new("_1", DataType::Int8, true),
Field::new(
"_1",
DataType::Struct(vec![
Field::new("_1", DataType::Int8, true),
Field::new("_2", DataType::Utf8, true),
]),
true,
),
Field::new("_2", DataType::UInt8, true),
]),
true,
),
]),
true,
),
Field::new(
"_2",
DataType::LargeList(Box::new(Field::new(
"item",
DataType::List(Box::new(Field::new(
"item",
DataType::Struct(vec![
Field::new(
"_1",
DataType::Struct(vec![
Field::new("_1", DataType::Int8, true),
Field::new("_2", DataType::Int16, true),
Field::new("_3", DataType::Int32, true),
]),
true,
),
Field::new(
"_2",
DataType::List(Box::new(Field::new(
"",
DataType::FixedSizeBinary(2),
true,
))),
true,
),
]),
true,
))),
true,
))),
true,
),
];
let schema = Schema::new(fields);
Ok(create_random_batch(
Arc::new(schema),
size,
null_density,
true_density,
)?)
}
#[inline]
fn write_batch(batch: &RecordBatch) -> Result<()> {
// Write batch to an in-memory writer
let cursor = InMemoryWriteableCursor::default();
let mut writer = ArrowWriter::try_new(cursor, batch.schema(), None)?;
writer.write(&batch)?;
writer.close()?;
Ok(())
}
fn bench_primitive_writer(c: &mut Criterion) {
let batch = create_primitive_bench_batch(1024, 0.25, 0.75).unwrap();
let mut group = c.benchmark_group("write_batch primitive");
group.throughput(Throughput::Bytes(
batch
.columns()
.iter()
.map(|f| f.get_array_memory_size() as u64)
.sum(),
));
group.bench_function("1024 values", |b| b.iter(|| write_batch(&batch).unwrap()));
let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
group.throughput(Throughput::Bytes(
batch
.columns()
.iter()
.map(|f| f.get_array_memory_size() as u64)
.sum(),
));
group.bench_function("4096 values", |b| b.iter(|| write_batch(&batch).unwrap()));
group.finish();
}
// This bench triggers a write error, it is ignored for now
fn _bench_nested_writer(c: &mut Criterion) {
let batch = _create_nested_bench_batch(1024, 0.25, 0.75).unwrap();
let mut group = c.benchmark_group("write_batch nested");
group.throughput(Throughput::Bytes(
batch
.columns()
.iter()
.map(|f| f.get_array_memory_size() as u64)
.sum(),
));
group.bench_function("1024 values", |b| b.iter(|| write_batch(&batch).unwrap()));
let batch = create_primitive_bench_batch(4096, 0.25, 0.75).unwrap();
group.throughput(Throughput::Bytes(
batch
.columns()
.iter()
.map(|f| f.get_array_memory_size() as u64)
.sum(),
));
group.bench_function("4096 values", |b| b.iter(|| write_batch(&batch).unwrap()));
group.finish();
}
criterion_group!(benches, bench_primitive_writer);
criterion_main!(benches);