Add Decimal to CsvWriter and improve debug display (#406)
* Add Decimal to CsvWriter and improve debug display
* Measure CSV writer instead of file and data creation
* Re-use decimal formatting
diff --git a/arrow/benches/csv_writer.rs b/arrow/benches/csv_writer.rs
index 50b94d6..62c5da9 100644
--- a/arrow/benches/csv_writer.rs
+++ b/arrow/benches/csv_writer.rs
@@ -28,14 +28,14 @@
use std::fs::File;
use std::sync::Arc;
-fn record_batches_to_csv() {
+fn criterion_benchmark(c: &mut Criterion) {
#[cfg(feature = "csv")]
{
let schema = Schema::new(vec![
Field::new("c1", DataType::Utf8, false),
Field::new("c2", DataType::Float64, true),
Field::new("c3", DataType::UInt32, false),
- Field::new("c3", DataType::Boolean, true),
+ Field::new("c4", DataType::Boolean, true),
]);
let c1 = StringArray::from(vec![
@@ -59,16 +59,17 @@
let file = File::create("target/bench_write_csv.csv").unwrap();
let mut writer = csv::Writer::new(file);
let batches = vec![&b, &b, &b, &b, &b, &b, &b, &b, &b, &b, &b];
- #[allow(clippy::unit_arg)]
- criterion::black_box(for batch in batches {
- writer.write(batch).unwrap()
+
+ c.bench_function("record_batches_to_csv", |b| {
+ b.iter(|| {
+ #[allow(clippy::unit_arg)]
+ criterion::black_box(for batch in &batches {
+ writer.write(batch).unwrap()
+ });
+ });
});
}
}
-fn criterion_benchmark(c: &mut Criterion) {
- c.bench_function("record_batches_to_csv", |b| b.iter(record_batches_to_csv));
-}
-
criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
diff --git a/arrow/src/array/array_binary.rs b/arrow/src/array/array_binary.rs
index 0cb4db4..0b374db 100644
--- a/arrow/src/array/array_binary.rs
+++ b/arrow/src/array/array_binary.rs
@@ -666,6 +666,17 @@
self.length * i as i32
}
+ #[inline]
+ pub fn value_as_string(&self, row: usize) -> String {
+ let decimal_string = self.value(row).to_string();
+ if self.scale == 0 {
+ decimal_string
+ } else {
+ let splits = decimal_string.split_at(decimal_string.len() - self.scale);
+ format!("{}.{}", splits.0, splits.1)
+ }
+ }
+
pub fn from_fixed_size_list_array(
v: FixedSizeListArray,
precision: usize,
@@ -729,7 +740,9 @@
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "DecimalArray<{}, {}>\n[\n", self.precision, self.scale)?;
print_long_array(self, f, |array, index, f| {
- fmt::Debug::fmt(&array.value(index), f)
+ let formatted_decimal = array.value_as_string(index);
+
+ write!(f, "{}", formatted_decimal)
})?;
write!(f, "]")
}
@@ -758,7 +771,7 @@
#[cfg(test)]
mod tests {
use crate::{
- array::{LargeListArray, ListArray},
+ array::{DecimalBuilder, LargeListArray, ListArray},
datatypes::Field,
};
@@ -1163,17 +1176,16 @@
#[test]
fn test_decimal_array_fmt_debug() {
- let values: [u8; 32] = [
- 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- ];
- let array_data = ArrayData::builder(DataType::Decimal(23, 6))
- .len(2)
- .add_buffer(Buffer::from(&values[..]))
- .build();
- let arr = DecimalArray::from(array_data);
+ let values: Vec<i128> = vec![8887000000, -8887000000];
+ let mut decimal_builder = DecimalBuilder::new(3, 23, 6);
+
+ values.iter().for_each(|&value| {
+ decimal_builder.append_value(value).unwrap();
+ });
+ decimal_builder.append_null().unwrap();
+ let arr = decimal_builder.finish();
assert_eq!(
- "DecimalArray<23, 6>\n[\n 8887000000,\n -8887000000,\n]",
+ "DecimalArray<23, 6>\n[\n 8887.000000,\n -8887.000000,\n null,\n]",
format!("{:?}", arr)
);
}
diff --git a/arrow/src/csv/writer.rs b/arrow/src/csv/writer.rs
index aa0ed67..b3b8838 100644
--- a/arrow/src/csv/writer.rs
+++ b/arrow/src/csv/writer.rs
@@ -70,6 +70,7 @@
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use crate::record_batch::RecordBatch;
+use crate::util::display::make_string_from_decimal;
use crate::{array::*, util::serialization::lexical_to_string};
const DEFAULT_DATE_FORMAT: &str = "%F";
const DEFAULT_TIME_FORMAT: &str = "%T";
@@ -242,6 +243,7 @@
};
format!("{}", datetime.format(&self.timestamp_format))
}
+ DataType::Decimal(..) => make_string_from_decimal(col, row_index)?,
t => {
// List and Struct arrays not supported by the writer, any
// other type needs to be implemented
@@ -566,6 +568,7 @@
Field::new("c4", DataType::Boolean, true),
Field::new("c5", DataType::Timestamp(TimeUnit::Millisecond, None), true),
Field::new("c6", DataType::Time32(TimeUnit::Second), false),
+ Field::new("c7", DataType::Decimal(6, 2), false),
]);
let c1 = StringArray::from(vec![
@@ -585,6 +588,11 @@
None,
);
let c6 = Time32SecondArray::from(vec![1234, 24680, 85563]);
+ let mut c7_builder = DecimalBuilder::new(5, 6, 2);
+ c7_builder.append_value(12345_i128).unwrap();
+ c7_builder.append_value(-12345_i128).unwrap();
+ c7_builder.append_null().unwrap();
+ let c7 = c7_builder.finish();
let batch = RecordBatch::try_new(
Arc::new(schema),
@@ -595,6 +603,7 @@
Arc::new(c4),
Arc::new(c5),
Arc::new(c6),
+ Arc::new(c7),
],
)
.unwrap();
@@ -606,13 +615,13 @@
writer.write(batch).unwrap();
}
- let left = "c1,c2,c3,c4,c5,c6
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03
-Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34
-consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20
-sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03\n";
+ let left = "c1,c2,c3,c4,c5,c6,c7
+Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45
+consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45
+sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,
+Lorem ipsum dolor sit amet,123.564532,3,true,,00:20:34,123.45
+consectetur adipiscing elit,,2,false,2019-04-18T10:54:47.378000000,06:51:20,-123.45
+sed do eiusmod tempor,-556132.25,1,,2019-04-18T02:45:55.555000000,23:46:03,\n";
let right = writer.writer.into_inner().map(|s| s.to_string());
assert_eq!(Some(left.to_string()), right.ok());
}
diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 13d9f19..bb75a3a 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -19,6 +19,8 @@
//! purposes. See the `pretty` crate for additional functions for
//! record batch pretty printing.
+use std::sync::Arc;
+
use crate::array::Array;
use crate::datatypes::{
ArrowNativeType, ArrowPrimitiveType, DataType, Int16Type, Int32Type, Int64Type,
@@ -192,18 +194,15 @@
}};
}
-macro_rules! make_string_from_decimal {
- ($array_type: ty, $column: ident, $row: ident, $scale: ident) => {{
- let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
- let decimal_string = array.value($row).to_string();
- let formatted_decimal = if *$scale == 0 {
- decimal_string
- } else {
- let splits = decimal_string.split_at(decimal_string.len() - *$scale);
- format!("{}.{}", splits.0, splits.1)
- };
- Ok(formatted_decimal)
- }};
+#[inline(always)]
+pub fn make_string_from_decimal(column: &Arc<dyn Array>, row: usize) -> Result<String> {
+ let array = column
+ .as_any()
+ .downcast_ref::<array::DecimalArray>()
+ .unwrap();
+
+ let formatted_decimal = array.value_as_string(row);
+ Ok(formatted_decimal)
}
/// Get the value at the given row in an array as a String.
@@ -231,9 +230,7 @@
DataType::Float16 => make_string!(array::Float32Array, column, row),
DataType::Float32 => make_string!(array::Float32Array, column, row),
DataType::Float64 => make_string!(array::Float64Array, column, row),
- DataType::Decimal(_, scale) => {
- make_string_from_decimal!(array::DecimalArray, column, row, scale)
- }
+ DataType::Decimal(..) => make_string_from_decimal(column, row),
DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
make_string_datetime!(array::TimestampSecondArray, column, row)
}