ARROW-10656: [Rust] Use DataType comparison without values
Compare two data types based on types instead of strictly all values.
Closes #8715 from ch-sc/ARROW-10656-RecordBatch-requires-exact-data-type-match
Authored-by: Christoph Schulze <christoph.schulze@signavio.com>
Signed-off-by: Neville Dipale <nevilledips@gmail.com>
diff --git a/rust/arrow/examples/builders.rs b/rust/arrow/examples/builders.rs
index 61cce0e..e4f7eca 100644
--- a/rust/arrow/examples/builders.rs
+++ b/rust/arrow/examples/builders.rs
@@ -25,7 +25,9 @@
StringArray, StructArray,
};
use arrow::buffer::Buffer;
-use arrow::datatypes::{DataType, Date64Type, Field, Time64NanosecondType, ToByteSlice};
+use arrow::datatypes::{
+ DataType, Date64Type, Field, NullableDataType, Time64NanosecondType, ToByteSlice,
+};
fn main() {
// Primitive Arrays
@@ -100,7 +102,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
diff --git a/rust/arrow/src/array/array_binary.rs b/rust/arrow/src/array/array_binary.rs
index 15d6ccd..ef1cf8d 100644
--- a/rust/arrow/src/array/array_binary.rs
+++ b/rust/arrow/src/array/array_binary.rs
@@ -596,7 +596,7 @@
#[cfg(test)]
mod tests {
- use crate::datatypes::Field;
+ use crate::datatypes::NullableDataType;
use super::*;
@@ -908,7 +908,7 @@
.build();
let array_data = ArrayData::builder(DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::Binary, false)),
+ Box::new(NullableDataType::new(DataType::Binary, false)),
4,
))
.len(3)
diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs
index 4eb8dc5..00e7d29 100644
--- a/rust/arrow/src/array/array_list.rs
+++ b/rust/arrow/src/array/array_list.rs
@@ -297,15 +297,12 @@
#[cfg(test)]
mod tests {
use crate::{
- array::ArrayData,
- array::Int32Array,
- buffer::Buffer,
- datatypes::{Field, ToByteSlice},
- memory,
- util::bit_util,
+ array::ArrayData, array::Int32Array, buffer::Buffer, datatypes::ToByteSlice,
+ memory, util::bit_util,
};
use super::*;
+ use crate::datatypes::NullableDataType;
#[test]
fn test_list_array() {
@@ -321,7 +318,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
.add_buffer(value_offsets.clone())
@@ -391,7 +388,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
.add_buffer(value_offsets.clone())
@@ -457,7 +454,7 @@
// Construct a list array from the above two
let list_data_type = DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::Int32, false)),
+ Box::new(NullableDataType::new(DataType::Int32, false)),
3,
);
let list_data = ArrayData::builder(list_data_type.clone())
@@ -526,7 +523,7 @@
// Construct a list array from the above two
let list_data_type = DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::Int32, false)),
+ Box::new(NullableDataType::new(DataType::Int32, false)),
3,
);
let list_data = ArrayData::builder(list_data_type)
@@ -560,7 +557,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
@@ -625,7 +622,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
@@ -688,7 +685,7 @@
// Construct a fixed size list array from the above two
let list_data_type = DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::Int32, false)),
+ Box::new(NullableDataType::new(DataType::Int32, false)),
2,
);
let list_data = ArrayData::builder(list_data_type)
@@ -739,7 +736,7 @@
.add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()))
.build();
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_child_data(value_data)
@@ -754,7 +751,7 @@
fn test_list_array_invalid_child_array_len() {
let value_offsets = Buffer::from(&[0, 2, 5, 7].to_byte_slice());
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -773,7 +770,7 @@
let value_offsets = Buffer::from(&[2, 2, 5, 7].to_byte_slice());
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -805,7 +802,7 @@
.build();
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.add_buffer(buf2)
.add_child_data(value_data)
diff --git a/rust/arrow/src/array/builder.rs b/rust/arrow/src/array/builder.rs
index 08fcd64..11d375a 100644
--- a/rust/arrow/src/array/builder.rs
+++ b/rust/arrow/src/array/builder.rs
@@ -764,8 +764,7 @@
///
/// This is used for validating array data types in `append_data`
fn data_type(&self) -> DataType {
- DataType::List(Box::new(Field::new(
- "item",
+ DataType::List(Box::new(NullableDataType::new(
self.values_builder.data_type(),
true,
)))
@@ -834,8 +833,7 @@
let null_bit_buffer = self.bitmap_builder.finish();
let nulls = null_bit_buffer.count_set_bits();
self.offsets_builder.append(0).unwrap();
- let data = ArrayData::builder(DataType::List(Box::new(Field::new(
- "item",
+ let data = ArrayData::builder(DataType::List(Box::new(NullableDataType::new(
values_data.data_type().clone(),
true, // TODO: find a consistent way of getting this
))))
@@ -976,8 +974,7 @@
///
/// This is used for validating array data types in `append_data`
fn data_type(&self) -> DataType {
- DataType::LargeList(Box::new(Field::new(
- "item",
+ DataType::LargeList(Box::new(NullableDataType::new(
self.values_builder.data_type(),
true,
)))
@@ -1046,11 +1043,9 @@
let null_bit_buffer = self.bitmap_builder.finish();
let nulls = null_bit_buffer.count_set_bits();
self.offsets_builder.append(0).unwrap();
- let data = ArrayData::builder(DataType::LargeList(Box::new(Field::new(
- "item",
- values_data.data_type().clone(),
- true,
- ))))
+ let data = ArrayData::builder(DataType::LargeList(Box::new(
+ NullableDataType::new(values_data.data_type().clone(), true),
+ )))
.len(len)
.null_count(len - nulls)
.add_buffer(offset_buffer)
@@ -1158,7 +1153,7 @@
/// This is used for validating array data types in `append_data`
fn data_type(&self) -> DataType {
DataType::FixedSizeList(
- Box::new(Field::new("item", self.values_builder.data_type(), true)),
+ Box::new(NullableDataType::new(self.values_builder.data_type(), true)),
self.list_len,
)
}
@@ -1237,7 +1232,7 @@
let null_bit_buffer = self.bitmap_builder.finish();
let nulls = null_bit_buffer.count_set_bits();
let data = ArrayData::builder(DataType::FixedSizeList(
- Box::new(Field::new("item", values_data.data_type().clone(), true)),
+ Box::new(NullableDataType::new(values_data.data_type().clone(), true)),
self.list_len,
))
.len(len)
@@ -1458,7 +1453,10 @@
)) as ArrayDataRef;
Arc::new(ArrayData::new(
- DataType::List(Box::new(Field::new("item", DataType::UInt8, true))),
+ DataType::List(Box::new(NullableDataType::new(
+ DataType::UInt8,
+ true,
+ ))),
array.len(),
None,
array.null_buffer().cloned(),
@@ -1510,8 +1508,7 @@
)) as ArrayDataRef;
Arc::new(ArrayData::new(
- DataType::LargeList(Box::new(Field::new(
- "item",
+ DataType::LargeList(Box::new(NullableDataType::new(
DataType::UInt8,
true,
))),
@@ -1613,7 +1610,7 @@
)) as ArrayDataRef;
let list_data = Arc::new(ArrayData::new(
DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::UInt8, true)),
+ Box::new(NullableDataType::new(DataType::UInt8, true)),
self.builder.list_len,
),
array.len(),
@@ -1699,7 +1696,7 @@
)) as ArrayDataRef;
let list_data = Arc::new(ArrayData::new(
DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::UInt8, true)),
+ Box::new(NullableDataType::new(DataType::UInt8, true)),
self.builder.list_len,
),
array.len(),
@@ -3820,13 +3817,13 @@
#[test]
#[should_panic(
- expected = "Data type List(Field { name: \"item\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false }) is not currently supported"
+ expected = "Data type List(NullableDataType { data_type: Int64, nullable: true }) is not currently supported"
)]
fn test_struct_array_builder_from_schema_unsupported_type() {
let mut fields = Vec::new();
fields.push(Field::new("f1", DataType::Int16, false));
let list_type =
- DataType::List(Box::new(Field::new("item", DataType::Int64, true)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int64, true)));
fields.push(Field::new("f2", list_type, false));
let _ = StructBuilder::from_fields(fields, 5);
@@ -4125,7 +4122,7 @@
let list_value_offsets =
Buffer::from(&[0, 3, 5, 11, 13, 13, 15, 15, 17].to_byte_slice());
let expected_list_data = ArrayData::new(
- DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int64, true))),
8,
None,
None,
@@ -4211,7 +4208,7 @@
&[0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23].to_byte_slice(),
);
let expected_list_data = ArrayData::new(
- DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int64, true))),
12,
None,
None,
@@ -4253,7 +4250,7 @@
]);
let list_value_offsets = Buffer::from(&[0, 2, 3, 6].to_byte_slice());
let list_data = ArrayData::new(
- DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
3,
None,
None,
@@ -4288,7 +4285,7 @@
]);
let list_value_offsets = Buffer::from(&[0, 2, 2, 4, 5, 8, 9, 12].to_byte_slice());
let expected_list_data = ArrayData::new(
- DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
7,
None,
None, // is this correct?
@@ -4377,7 +4374,7 @@
]);
let expected_list_data = ArrayData::new(
DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::UInt16, true)),
+ Box::new(NullableDataType::new(DataType::UInt16, true)),
2,
),
12,
@@ -4450,7 +4447,7 @@
]);
let expected_list_data = ArrayData::new(
DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::UInt8, true)),
+ Box::new(NullableDataType::new(DataType::UInt8, true)),
2,
),
12,
diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs
index ef79302..96254c4 100644
--- a/rust/arrow/src/compute/kernels/cast.rs
+++ b/rust/arrow/src/compute/kernels/cast.rs
@@ -1237,7 +1237,7 @@
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
- &DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
)
.unwrap();
assert_eq!(5, b.len());
@@ -1267,7 +1267,7 @@
let array = Arc::new(a) as ArrayRef;
let b = cast(
&array,
- &DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
)
.unwrap();
assert_eq!(5, b.len());
@@ -1300,7 +1300,7 @@
let array = array.slice(2, 4);
let b = cast(
&array,
- &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Float64, true))),
)
.unwrap();
assert_eq!(4, b.len());
@@ -1377,7 +1377,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -1387,7 +1387,7 @@
let cast_array = cast(
&list_array,
- &DataType::List(Box::new(Field::new("item", DataType::UInt16, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::UInt16, true))),
)
.unwrap();
// 3 negative values should get lost when casting to unsigned,
@@ -1436,7 +1436,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -1446,8 +1446,7 @@
cast(
&list_array,
- &DataType::List(Box::new(Field::new(
- "item",
+ &DataType::List(Box::new(NullableDataType::new(
DataType::Timestamp(TimeUnit::Microsecond, None),
true,
))),
@@ -2854,7 +2853,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -2876,7 +2875,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
+ DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
@@ -2896,7 +2895,7 @@
// Construct a fixed size list array from the above two
let list_data_type = DataType::FixedSizeList(
- Box::new(Field::new("item", DataType::Int32, true)),
+ Box::new(NullableDataType::new(DataType::Int32, true)),
2,
);
let list_data = ArrayData::builder(list_data_type)
@@ -2989,12 +2988,12 @@
LargeBinary,
Utf8,
LargeUtf8,
- List(Box::new(Field::new("item", DataType::Int8, true))),
- List(Box::new(Field::new("item", DataType::Utf8, true))),
- FixedSizeList(Box::new(Field::new("item", DataType::Int8, true)), 10),
- FixedSizeList(Box::new(Field::new("item", DataType::Utf8, false)), 10),
- LargeList(Box::new(Field::new("item", DataType::Int8, true))),
- LargeList(Box::new(Field::new("item", DataType::Utf8, false))),
+ List(Box::new(NullableDataType::new(DataType::Int8, true))),
+ List(Box::new(NullableDataType::new(DataType::Utf8, true))),
+ FixedSizeList(Box::new(NullableDataType::new(DataType::Int8, true)), 10),
+ FixedSizeList(Box::new(NullableDataType::new(DataType::Utf8, false)), 10),
+ LargeList(Box::new(NullableDataType::new(DataType::Int8, true))),
+ LargeList(Box::new(NullableDataType::new(DataType::Utf8, false))),
Struct(vec![
Field::new("f1", DataType::Int32, false),
Field::new("f2", DataType::Utf8, true),
diff --git a/rust/arrow/src/compute/kernels/comparison.rs b/rust/arrow/src/compute/kernels/comparison.rs
index fd0bc73..0231810 100644
--- a/rust/arrow/src/compute/kernels/comparison.rs
+++ b/rust/arrow/src/compute/kernels/comparison.rs
@@ -776,8 +776,8 @@
#[cfg(test)]
mod tests {
use super::*;
- use crate::datatypes::{Int8Type, ToByteSlice};
- use crate::{array::Int32Array, datatypes::Field};
+ use crate::array::Int32Array;
+ use crate::datatypes::{Int8Type, NullableDataType, ToByteSlice};
#[test]
fn test_primitive_array_eq() {
@@ -1046,7 +1046,7 @@
.data();
let value_offsets = Buffer::from(&[0i64, 3, 6, 6, 9].to_byte_slice());
let list_data_type =
- DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
+ DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(4)
.add_buffer(value_offsets)
diff --git a/rust/arrow/src/compute/kernels/filter.rs b/rust/arrow/src/compute/kernels/filter.rs
index e90e493..ba8e1b7 100644
--- a/rust/arrow/src/compute/kernels/filter.rs
+++ b/rust/arrow/src/compute/kernels/filter.rs
@@ -1080,7 +1080,7 @@
let value_offsets = Buffer::from(&[0i64, 3, 6, 8, 8].to_byte_slice());
let list_data_type =
- DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(4)
.add_buffer(value_offsets)
diff --git a/rust/arrow/src/compute/kernels/limit.rs b/rust/arrow/src/compute/kernels/limit.rs
index 911dbf2..5e182e6 100644
--- a/rust/arrow/src/compute/kernels/limit.rs
+++ b/rust/arrow/src/compute/kernels/limit.rs
@@ -35,7 +35,7 @@
use super::*;
use crate::array::*;
use crate::buffer::Buffer;
- use crate::datatypes::{DataType, Field, ToByteSlice};
+ use crate::datatypes::{DataType, Field, NullableDataType, ToByteSlice};
use crate::util::bit_util;
use std::sync::Arc;
@@ -110,7 +110,7 @@
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
diff --git a/rust/arrow/src/compute/kernels/take.rs b/rust/arrow/src/compute/kernels/take.rs
index 9b5963a..9b48eb2 100644
--- a/rust/arrow/src/compute/kernels/take.rs
+++ b/rust/arrow/src/compute/kernels/take.rs
@@ -810,11 +810,9 @@
let value_offsets: [$offset_type; 4] = [0, 3, 6, 8];
let value_offsets = Buffer::from(&value_offsets.to_byte_slice());
// Construct a list array from the above two
- let list_data_type = DataType::$list_data_type(Box::new(Field::new(
- "item",
- DataType::Int32,
- false,
- )));
+ let list_data_type = DataType::$list_data_type(Box::new(
+ NullableDataType::new(DataType::Int32, false),
+ ));
let list_data = ArrayData::builder(list_data_type.clone())
.len(3)
.add_buffer(value_offsets)
@@ -883,11 +881,9 @@
let value_offsets: [$offset_type; 5] = [0, 3, 6, 7, 9];
let value_offsets = Buffer::from(&value_offsets.to_byte_slice());
// Construct a list array from the above two
- let list_data_type = DataType::$list_data_type(Box::new(Field::new(
- "item",
- DataType::Int32,
- false,
- )));
+ let list_data_type = DataType::$list_data_type(Box::new(
+ NullableDataType::new(DataType::Int32, false),
+ ));
let list_data = ArrayData::builder(list_data_type.clone())
.len(4)
.add_buffer(value_offsets)
@@ -956,11 +952,9 @@
let value_offsets: [$offset_type; 5] = [0, 3, 6, 6, 8];
let value_offsets = Buffer::from(&value_offsets.to_byte_slice());
// Construct a list array from the above two
- let list_data_type = DataType::$list_data_type(Box::new(Field::new(
- "item",
- DataType::Int32,
- false,
- )));
+ let list_data_type = DataType::$list_data_type(Box::new(
+ NullableDataType::new(DataType::Int32, false),
+ ));
let list_data = ArrayData::builder(list_data_type.clone())
.len(4)
.add_buffer(value_offsets)
@@ -1051,7 +1045,7 @@
let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
// Construct a list array from the above two
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
diff --git a/rust/arrow/src/compute/util.rs b/rust/arrow/src/compute/util.rs
index ba7de77..0fd0e64 100644
--- a/rust/arrow/src/compute/util.rs
+++ b/rust/arrow/src/compute/util.rs
@@ -321,7 +321,7 @@
#[test]
fn test_take_value_index_from_list() {
let list = build_list(
- DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
vec![0i32, 2i32, 5i32, 10i32],
);
@@ -337,7 +337,7 @@
#[test]
fn test_take_value_index_from_large_list() {
let list = build_list(
- DataType::LargeList(Box::new(Field::new("item", DataType::Int32, false))),
+ DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, false))),
Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
vec![0i64, 2i64, 5i64, 10i64],
);
diff --git a/rust/arrow/src/datatypes.rs b/rust/arrow/src/datatypes.rs
index 0a26d2e..dc7ae77 100644
--- a/rust/arrow/src/datatypes.rs
+++ b/rust/arrow/src/datatypes.rs
@@ -125,11 +125,11 @@
/// A variable-length string in Unicode with UFT-8 encoding and 64-bit offsets.
LargeUtf8,
/// A list of some logical data type with variable length.
- List(Box<Field>),
+ List(Box<NullableDataType>),
/// A list of some logical data type with fixed length.
- FixedSizeList(Box<Field>, i32),
+ FixedSizeList(Box<NullableDataType>, i32),
/// A list of some logical data type with variable length and 64-bit offsets.
- LargeList(Box<Field>),
+ LargeList(Box<NullableDataType>),
/// A nested datatype that contains a number of sub-fields.
Struct(Vec<Field>),
/// A nested datatype that can represent slots of differing types.
@@ -149,6 +149,13 @@
Decimal(usize, usize),
}
+/// Extends data type with nullability
+#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct NullableDataType {
+ data_type: DataType,
+ nullable: bool,
+}
+
/// Date is either a 32-bit or 64-bit type representing elapsed time since UNIX
/// epoch (1970-01-01) in days or milliseconds.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
@@ -189,8 +196,7 @@
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct Field {
name: String,
- data_type: DataType,
- nullable: bool,
+ data_type: NullableDataType,
dict_id: i64,
dict_is_ordered: bool,
}
@@ -876,7 +882,7 @@
impl DataType {
/// Parse a data type from a JSON representation
pub(crate) fn from(json: &Value) -> Result<DataType> {
- let default_field = Field::new("", DataType::Boolean, true);
+ let default_dt_ctx = NullableDataType::new(DataType::Boolean, true);
match *json {
Value::Object(ref map) => match map.get("name") {
Some(s) if s == "null" => Ok(DataType::Null),
@@ -1010,17 +1016,17 @@
},
Some(s) if s == "list" => {
// return a list with any type as its child isn't defined in the map
- Ok(DataType::List(Box::new(default_field)))
+ Ok(DataType::List(Box::new(default_dt_ctx)))
}
Some(s) if s == "largelist" => {
// return a largelist with any type as its child isn't defined in the map
- Ok(DataType::LargeList(Box::new(default_field)))
+ Ok(DataType::LargeList(Box::new(default_dt_ctx)))
}
Some(s) if s == "fixedsizelist" => {
// return a list with any type as its child isn't defined in the map
if let Some(Value::Number(size)) = map.get("listSize") {
Ok(DataType::FixedSizeList(
- Box::new(default_field),
+ Box::new(default_dt_ctx),
size.as_i64().unwrap() as i32,
))
} else {
@@ -1149,13 +1155,34 @@
}
}
+impl NullableDataType {
+ /// Creates a new data type context
+ pub fn new(data_type: DataType, nullable: bool) -> Self {
+ NullableDataType {
+ data_type,
+ nullable,
+ }
+ }
+
+ /// Returns an immutable reference to the data type
+ #[inline]
+ pub const fn data_type(&self) -> &DataType {
+ &self.data_type
+ }
+
+ /// Indicates whether in this data type context null values are eligible
+ #[inline]
+ pub const fn is_nullable(&self) -> bool {
+ self.nullable
+ }
+}
+
impl Field {
/// Creates a new field
pub fn new(name: &str, data_type: DataType, nullable: bool) -> Self {
Field {
name: name.to_string(),
- data_type,
- nullable,
+ data_type: NullableDataType::new(data_type, nullable),
dict_id: 0,
dict_is_ordered: false,
}
@@ -1171,8 +1198,7 @@
) -> Self {
Field {
name: name.to_string(),
- data_type,
- nullable,
+ data_type: NullableDataType::new(data_type, nullable),
dict_id,
dict_is_ordered,
}
@@ -1187,13 +1213,13 @@
/// Returns an immutable reference to the `Field`'s data-type
#[inline]
pub const fn data_type(&self) -> &DataType {
- &self.data_type
+ self.data_type.data_type()
}
/// Indicates whether this `Field` supports null values
#[inline]
pub const fn is_nullable(&self) -> bool {
- self.nullable
+ self.data_type.nullable
}
/// Returns the dictionary ID
@@ -1247,16 +1273,21 @@
"Field 'children' must have one element for a list data type".to_string(),
));
}
+ let nested_field = Self::from(&values[0])?;
+ let nexted_dt_ctx = NullableDataType::new(
+ nested_field.data_type.data_type,
+ nested_field.data_type.nullable,
+ );
match data_type {
DataType::List(_) => DataType::List(Box::new(
- Self::from(&values[0])?,
+ nexted_dt_ctx,
)),
DataType::LargeList(_) => DataType::LargeList(Box::new(
- Self::from(&values[0])?,
+ nexted_dt_ctx,
)),
DataType::FixedSizeList(_, int) => {
DataType::FixedSizeList(
- Box::new(Self::from(&values[0])?),
+ Box::new(nexted_dt_ctx),
int,
)
}
@@ -1332,8 +1363,7 @@
};
Ok(Field {
name,
- nullable,
- data_type,
+ data_type: NullableDataType::new(data_type, nullable),
dict_id,
dict_is_ordered,
})
@@ -1348,15 +1378,36 @@
pub fn to_json(&self) -> Value {
let children: Vec<Value> = match self.data_type() {
DataType::Struct(fields) => fields.iter().map(|f| f.to_json()).collect(),
- DataType::List(field) => vec![field.to_json()],
- DataType::LargeList(field) => vec![field.to_json()],
- DataType::FixedSizeList(field, _) => vec![field.to_json()],
+ DataType::List(type_ctx) => {
+ let item = Field::new(
+ "item",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
+ vec![item.to_json()]
+ }
+ DataType::LargeList(type_ctx) => {
+ let item = Field::new(
+ "item",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
+ vec![item.to_json()]
+ }
+ DataType::FixedSizeList(type_ctx, _) => {
+ let item = Field::new(
+ "item",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
+ vec![item.to_json()]
+ }
_ => vec![],
};
match self.data_type() {
DataType::Dictionary(ref index_type, ref value_type) => json!({
"name": self.name,
- "nullable": self.nullable,
+ "nullable": self.data_type.nullable,
"type": value_type.to_json(),
"children": children,
"dictionary": {
@@ -1367,8 +1418,8 @@
}),
_ => json!({
"name": self.name,
- "nullable": self.nullable,
- "type": self.data_type.to_json(),
+ "nullable": self.data_type.is_nullable(),
+ "type": self.data_type.data_type().to_json(),
"children": children
}),
}
@@ -1397,8 +1448,8 @@
.to_string(),
));
}
- match &mut self.data_type {
- DataType::Struct(nested_fields) => match &from.data_type {
+ match &mut self.data_type.data_type {
+ DataType::Struct(nested_fields) => match &from.data_type.data_type {
DataType::Struct(from_nested_fields) => {
for from_field in from_nested_fields {
let mut is_new_field = true;
@@ -1421,7 +1472,7 @@
));
}
},
- DataType::Union(nested_fields) => match &from.data_type {
+ DataType::Union(nested_fields) => match &from.data_type.data_type {
DataType::Union(from_nested_fields) => {
for from_field in from_nested_fields {
let mut is_new_field = true;
@@ -1473,7 +1524,7 @@
| DataType::Utf8
| DataType::LargeUtf8
| DataType::Decimal(_, _) => {
- if self.data_type != from.data_type {
+ if self.data_type.data_type != from.data_type.data_type {
return Err(ArrowError::SchemaError(
"Fail to merge schema Field due to conflicting datatype"
.to_string(),
@@ -1481,8 +1532,8 @@
}
}
}
- if from.nullable {
- self.nullable = from.nullable;
+ if from.data_type.nullable {
+ self.data_type.nullable = from.data_type.nullable;
}
Ok(())
@@ -1491,7 +1542,7 @@
impl fmt::Display for Field {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
- write!(f, "{}: {:?}", self.name, self.data_type)
+ write!(f, "{}: {:?}", self.name, self.data_type.data_type)
}
}
@@ -1811,12 +1862,12 @@
assert_eq!(
"{\"Struct\":[\
- {\"name\":\"first_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
- {\"name\":\"last_name\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
- {\"name\":\"address\",\"data_type\":{\"Struct\":\
- [{\"name\":\"street\",\"data_type\":\"Utf8\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false},\
- {\"name\":\"zip\",\"data_type\":\"UInt16\",\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}\
- ]},\"nullable\":false,\"dict_id\":0,\"dict_is_ordered\":false}]}",
+ {\"name\":\"first_name\",\"data_type\":{\"data_type\":\"Utf8\",\"nullable\":false},\"dict_id\":0,\"dict_is_ordered\":false},\
+ {\"name\":\"last_name\",\"data_type\":{\"data_type\":\"Utf8\",\"nullable\":false},\"dict_id\":0,\"dict_is_ordered\":false},\
+ {\"name\":\"address\",\"data_type\":{\"data_type\":{\"Struct\":\
+ [{\"name\":\"street\",\"data_type\":{\"data_type\":\"Utf8\",\"nullable\":false},\"dict_id\":0,\"dict_is_ordered\":false},\
+ {\"name\":\"zip\",\"data_type\":{\"data_type\":\"UInt16\",\"nullable\":false},\"dict_id\":0,\"dict_is_ordered\":false}\
+ ]},\"nullable\":false},\"dict_id\":0,\"dict_is_ordered\":false}]}",
serialized
);
@@ -1997,23 +2048,24 @@
Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
Field::new(
"c21",
- DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
+ DataType::List(Box::new(NullableDataType::new(
+ DataType::Boolean,
+ true,
+ ))),
false,
),
Field::new(
"c22",
DataType::FixedSizeList(
- Box::new(Field::new("bools", DataType::Boolean, false)),
+ Box::new(NullableDataType::new(DataType::Boolean, false)),
5,
),
false,
),
Field::new(
"c23",
- DataType::List(Box::new(Field::new(
- "inner_list",
- DataType::List(Box::new(Field::new(
- "struct",
+ DataType::List(Box::new(NullableDataType::new(
+ DataType::List(Box::new(NullableDataType::new(
DataType::Struct(vec![]),
true,
))),
@@ -2049,10 +2101,8 @@
Field::new("c33", DataType::LargeUtf8, true),
Field::new(
"c34",
- DataType::LargeList(Box::new(Field::new(
- "inner_large_list",
- DataType::LargeList(Box::new(Field::new(
- "struct",
+ DataType::LargeList(Box::new(NullableDataType::new(
+ DataType::LargeList(Box::new(NullableDataType::new(
DataType::Struct(vec![]),
false,
))),
@@ -2280,7 +2330,7 @@
},
"children": [
{
- "name": "bools",
+ "name": "item",
"nullable": false,
"type": {
"name": "bool"
@@ -2297,14 +2347,14 @@
},
"children": [
{
- "name": "inner_list",
+ "name": "item",
"nullable": false,
"type": {
"name": "list"
},
"children": [
{
- "name": "struct",
+ "name": "item",
"nullable": true,
"type": {
"name": "struct"
@@ -2437,14 +2487,14 @@
},
"children": [
{
- "name": "inner_large_list",
+ "name": "item",
"nullable": true,
"type": {
"name": "largelist"
},
"children": [
{
- "name": "struct",
+ "name": "item",
"nullable": false,
"type": {
"name": "struct"
@@ -2511,8 +2561,8 @@
assert_eq!(schema.to_string(), "first_name: Utf8, \
last_name: Utf8, \
address: Struct([\
- Field { name: \"street\", data_type: Utf8, nullable: false, dict_id: 0, dict_is_ordered: false }, \
- Field { name: \"zip\", data_type: UInt16, nullable: false, dict_id: 0, dict_is_ordered: false }])")
+ Field { name: \"street\", data_type: NullableDataType { data_type: Utf8, nullable: false }, dict_id: 0, dict_is_ordered: false }, \
+ Field { name: \"zip\", data_type: NullableDataType { data_type: UInt16, nullable: false }, dict_id: 0, dict_is_ordered: false }])")
}
#[test]
@@ -2746,6 +2796,34 @@
Ok(())
}
+
+ #[test]
+ fn test_compare_nested_types() {
+ let list_type_a = &DataType::List(Box::new(NullableDataType::new(
+ DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+ true,
+ )));
+ let list_type_b = &DataType::List(Box::new(NullableDataType::new(
+ DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+ true,
+ )));
+
+ assert_eq!(list_type_a, list_type_b);
+ }
+
+ #[test]
+ fn test_compare_mismatching_types() {
+ let list_type_a = &DataType::LargeList(Box::new(NullableDataType::new(
+ DataType::Dictionary(Box::new(DataType::Int32), Box::new(DataType::Utf8)),
+ true,
+ )));
+ let list_type_b = &DataType::LargeList(Box::new(NullableDataType::new(
+ DataType::Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
+ false,
+ )));
+
+ assert_ne!(list_type_a, list_type_b);
+ }
}
#[cfg(all(
diff --git a/rust/arrow/src/ipc/convert.rs b/rust/arrow/src/ipc/convert.rs
index 5c55442..8acfb05 100644
--- a/rust/arrow/src/ipc/convert.rs
+++ b/rust/arrow/src/ipc/convert.rs
@@ -17,7 +17,9 @@
//! Utilities for converting between IPC types and native Arrow types
-use crate::datatypes::{DataType, DateUnit, Field, IntervalUnit, Schema, TimeUnit};
+use crate::datatypes::{
+ DataType, DateUnit, Field, IntervalUnit, NullableDataType, Schema, TimeUnit,
+};
use crate::ipc;
use flatbuffers::{
@@ -125,6 +127,12 @@
/// Get the Arrow data type from the flatbuffer Field table
pub(crate) fn get_data_type(field: ipc::Field, may_be_dictionary: bool) -> DataType {
+ get_data_type_context(field, may_be_dictionary)
+ .data_type()
+ .clone()
+}
+
+fn get_data_type_context(field: ipc::Field, may_be_dictionary: bool) -> NullableDataType {
if let Some(dictionary) = field.dictionary() {
if may_be_dictionary {
let int = dictionary.indexType().unwrap();
@@ -139,14 +147,16 @@
(64, false) => DataType::UInt64,
_ => panic!("Unexpected bitwidth and signed"),
};
- return DataType::Dictionary(
- Box::new(index_type),
- Box::new(get_data_type(field, false)),
+ let value_type = get_data_type_context(field, false).data_type().clone();
+ return NullableDataType::new(
+ DataType::Dictionary(Box::new(index_type), Box::new(value_type)),
+ // taking nullability from parent field
+ field.nullable(),
);
}
}
- match field.type_type() {
+ let data_type = match field.type_type() {
ipc::Type::Null => DataType::Null,
ipc::Type::Bool => DataType::Boolean,
ipc::Type::Int => {
@@ -243,14 +253,16 @@
if children.len() != 1 {
panic!("expect a list to have one child")
}
- DataType::List(Box::new(children.get(0).into()))
+ let child_field = children.get(0);
+ DataType::List(Box::new(get_data_type_context(child_field, false)))
}
ipc::Type::LargeList => {
let children = field.children().unwrap();
if children.len() != 1 {
panic!("expect a large list to have one child")
}
- DataType::LargeList(Box::new(children.get(0).into()))
+ let child_field = children.get(0);
+ DataType::LargeList(Box::new(get_data_type_context(child_field, false)))
}
ipc::Type::FixedSizeList => {
let children = field.children().unwrap();
@@ -258,7 +270,11 @@
panic!("expect a list to have one child")
}
let fsl = field.type_as_fixed_size_list().unwrap();
- DataType::FixedSizeList(Box::new(children.get(0).into()), fsl.listSize())
+ let child_field = children.get(0);
+ DataType::FixedSizeList(
+ Box::new(get_data_type_context(child_field, false)),
+ fsl.listSize(),
+ )
}
ipc::Type::Struct_ => {
let mut fields = vec![];
@@ -271,7 +287,9 @@
DataType::Struct(fields)
}
t => unimplemented!("Type {:?} not supported", t),
- }
+ };
+
+ NullableDataType::new(data_type, field.nullable())
}
pub(crate) struct FBFieldType<'b> {
@@ -504,24 +522,63 @@
children: Some(fbb.create_vector(&empty_fields[..])),
}
}
- List(ref list_type) => {
- let child = build_field(fbb, list_type);
+ List(ref type_ctx) => {
+ let nested_type =
+ get_fb_field_type(type_ctx.data_type(), type_ctx.is_nullable(), fbb);
+ let child = ipc::Field::create(
+ fbb,
+ &ipc::FieldArgs {
+ name: None,
+ nullable: type_ctx.is_nullable(),
+ type_type: nested_type.type_type,
+ type_: Some(nested_type.type_),
+ children: nested_type.children,
+ dictionary: None,
+ custom_metadata: None,
+ },
+ );
FBFieldType {
type_type: ipc::Type::List,
type_: ipc::ListBuilder::new(fbb).finish().as_union_value(),
children: Some(fbb.create_vector(&[child])),
}
}
- LargeList(ref list_type) => {
- let child = build_field(fbb, list_type);
+ LargeList(ref type_ctx) => {
+ let inner_types =
+ get_fb_field_type(type_ctx.data_type(), type_ctx.is_nullable(), fbb);
+ let child = ipc::Field::create(
+ fbb,
+ &ipc::FieldArgs {
+ name: None,
+ nullable: type_ctx.is_nullable(),
+ type_type: inner_types.type_type,
+ type_: Some(inner_types.type_),
+ dictionary: None,
+ children: inner_types.children,
+ custom_metadata: None,
+ },
+ );
FBFieldType {
type_type: ipc::Type::LargeList,
type_: ipc::LargeListBuilder::new(fbb).finish().as_union_value(),
children: Some(fbb.create_vector(&[child])),
}
}
- FixedSizeList(ref list_type, len) => {
- let child = build_field(fbb, list_type);
+ FixedSizeList(ref type_ctx, len) => {
+ let inner_types =
+ get_fb_field_type(type_ctx.data_type(), type_ctx.is_nullable(), fbb);
+ let child = ipc::Field::create(
+ fbb,
+ &ipc::FieldArgs {
+ name: None,
+ nullable: type_ctx.is_nullable(),
+ type_type: inner_types.type_type,
+ type_: Some(inner_types.type_),
+ dictionary: None,
+ children: inner_types.children,
+ custom_metadata: None,
+ },
+ );
let mut builder = ipc::FixedSizeListBuilder::new(fbb);
builder.add_listSize(*len as i32);
FBFieldType {
@@ -604,7 +661,7 @@
#[cfg(test)]
mod tests {
use super::*;
- use crate::datatypes::{DataType, Field, Schema};
+ use crate::datatypes::{DataType, Field, NullableDataType, Schema};
#[test]
fn convert_schema_round_trip() {
@@ -670,13 +727,15 @@
Field::new("binary", DataType::Binary, false),
Field::new(
"list[u8]",
- DataType::List(Box::new(Field::new("item", DataType::UInt8, false))),
+ DataType::List(Box::new(NullableDataType::new(
+ DataType::UInt8,
+ false,
+ ))),
true,
),
Field::new(
"list[struct<float32, int32, bool>]",
- DataType::List(Box::new(Field::new(
- "struct",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Struct(vec![
Field::new("float32", DataType::UInt8, false),
Field::new("int32", DataType::Int32, true),
@@ -692,8 +751,7 @@
Field::new("int64", DataType::Int64, true),
Field::new(
"list[struct<date32, list[struct<>]>]",
- DataType::List(Box::new(Field::new(
- "struct",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Struct(vec![
Field::new(
"date32",
@@ -702,8 +760,7 @@
),
Field::new(
"list[struct<>]",
- DataType::List(Box::new(Field::new(
- "struct",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Struct(vec![]),
false,
))),
diff --git a/rust/arrow/src/ipc/reader.rs b/rust/arrow/src/ipc/reader.rs
index 76ad6b7..d5a929f 100644
--- a/rust/arrow/src/ipc/reader.rs
+++ b/rust/arrow/src/ipc/reader.rs
@@ -89,7 +89,7 @@
buffer_index += 2;
array
}
- List(ref list_field) | LargeList(ref list_field) => {
+ List(ref type_ctx) | LargeList(ref type_ctx) => {
let list_node = &nodes[node_index];
let list_buffers: Vec<Buffer> = buffers[buffer_index..buffer_index + 2]
.iter()
@@ -99,7 +99,7 @@
buffer_index += 2;
let triple = create_array(
nodes,
- list_field.data_type(),
+ type_ctx.data_type(),
data,
buffers,
dictionaries,
diff --git a/rust/arrow/src/json/reader.rs b/rust/arrow/src/json/reader.rs
index 5543eda..08c4007 100644
--- a/rust/arrow/src/json/reader.rs
+++ b/rust/arrow/src/json/reader.rs
@@ -66,20 +66,16 @@
1 => Ok(dt[0].clone()),
2 => {
// there can be a case where a list and scalar both exist
- if dt.contains(&&DataType::List(Box::new(Field::new(
- "item",
+ if dt.contains(&&DataType::List(Box::new(NullableDataType::new(
DataType::Float64,
true,
- )))) || dt.contains(&&DataType::List(Box::new(Field::new(
- "item",
+ )))) || dt.contains(&&DataType::List(Box::new(NullableDataType::new(
DataType::Int64,
true,
- )))) || dt.contains(&&DataType::List(Box::new(Field::new(
- "item",
+ )))) || dt.contains(&&DataType::List(Box::new(NullableDataType::new(
DataType::Boolean,
true,
- )))) || dt.contains(&&DataType::List(Box::new(Field::new(
- "item",
+ )))) || dt.contains(&&DataType::List(Box::new(NullableDataType::new(
DataType::Utf8,
true,
)))) {
@@ -90,14 +86,12 @@
match (dt[0], dt[1]) {
(t1, DataType::List(e)) if e.data_type() == &DataType::Float64 => {
if t1 == &DataType::Float64 {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
DataType::Float64,
true,
))))
} else {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
coerce_data_type(vec![t1, &DataType::Float64])?,
true,
))))
@@ -105,14 +99,12 @@
}
(t1, DataType::List(e)) if e.data_type() == &DataType::Int64 => {
if t1 == &DataType::Int64 {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
DataType::Int64,
true,
))))
} else {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
coerce_data_type(vec![t1, &DataType::Int64])?,
true,
))))
@@ -120,14 +112,12 @@
}
(t1, DataType::List(e)) if e.data_type() == &DataType::Boolean => {
if t1 == &DataType::Boolean {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
DataType::Boolean,
true,
))))
} else {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
coerce_data_type(vec![t1, &DataType::Boolean])?,
true,
))))
@@ -135,14 +125,12 @@
}
(t1, DataType::List(e)) if e.data_type() == &DataType::Utf8 => {
if t1 == &DataType::Utf8 {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
DataType::Utf8,
true,
))))
} else {
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
coerce_data_type(vec![t1, &DataType::Utf8])?,
true,
))))
@@ -162,8 +150,7 @@
_ => {
// TODO(nevi_me) It's possible to have [float, int, list(float)], which should
// return list(float). Will hash this out later
- Ok(DataType::List(Box::new(Field::new(
- "item",
+ Ok(DataType::List(Box::new(NullableDataType::new(
DataType::Utf8,
true,
))))
@@ -304,13 +291,13 @@
if values.contains_key(k) {
let x = values.get_mut(k).unwrap();
x.insert(DataType::List(Box::new(
- Field::new("item", dt, true),
+ NullableDataType::new(dt, true),
)));
} else {
// create hashset and add value type
let mut hs = HashSet::new();
hs.insert(DataType::List(Box::new(
- Field::new("item", dt, true),
+ NullableDataType::new(dt, true),
)));
values.insert(k.to_string(), hs);
}
@@ -1435,12 +1422,12 @@
assert_eq!(&DataType::Int64, a.1.data_type());
let b = schema.column_with_name("b").unwrap();
assert_eq!(
- &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Float64, true))),
b.1.data_type()
);
let c = schema.column_with_name("c").unwrap();
assert_eq!(
- &DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Boolean, true))),
c.1.data_type()
);
let d = schema.column_with_name("d").unwrap();
@@ -1493,35 +1480,35 @@
use crate::datatypes::DataType::*;
assert_eq!(
- List(Box::new(Field::new("item", Float64, true))),
+ List(Box::new(NullableDataType::new(Float64, true))),
coerce_data_type(vec![
&Float64,
- &List(Box::new(Field::new("item", Float64, true)))
+ &List(Box::new(NullableDataType::new(Float64, true)))
])
.unwrap()
);
assert_eq!(
- List(Box::new(Field::new("item", Float64, true))),
+ List(Box::new(NullableDataType::new(Float64, true))),
coerce_data_type(vec![
&Float64,
- &List(Box::new(Field::new("item", Int64, true)))
+ &List(Box::new(NullableDataType::new(Int64, true)))
])
.unwrap()
);
assert_eq!(
- List(Box::new(Field::new("item", Int64, true))),
+ List(Box::new(NullableDataType::new(Int64, true))),
coerce_data_type(vec![
&Int64,
- &List(Box::new(Field::new("item", Int64, true)))
+ &List(Box::new(NullableDataType::new(Int64, true)))
])
.unwrap()
);
// boolean and number are incompatible, return utf8
assert_eq!(
- List(Box::new(Field::new("item", Utf8, true))),
+ List(Box::new(NullableDataType::new(Utf8, true))),
coerce_data_type(vec![
&Boolean,
- &List(Box::new(Field::new("item", Float64, true)))
+ &List(Box::new(NullableDataType::new(Float64, true)))
])
.unwrap()
);
@@ -1554,17 +1541,17 @@
assert_eq!(&DataType::Int64, a.1.data_type());
let b = schema.column_with_name("b").unwrap();
assert_eq!(
- &DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Float64, true))),
b.1.data_type()
);
let c = schema.column_with_name("c").unwrap();
assert_eq!(
- &DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Boolean, true))),
c.1.data_type()
);
let d = schema.column_with_name("d").unwrap();
assert_eq!(
- &DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
+ &DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
d.1.data_type()
);
@@ -1804,8 +1791,7 @@
fn test_list_of_string_dictionary_from_json() {
let schema = Schema::new(vec![Field::new(
"events",
- List(Box::new(Field::new(
- "item",
+ List(Box::new(NullableDataType::new(
Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
true,
))),
@@ -1828,8 +1814,7 @@
let events = schema.column_with_name("events").unwrap();
assert_eq!(
- &List(Box::new(Field::new(
- "item",
+ &List(Box::new(NullableDataType::new(
Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
true
))),
@@ -1863,8 +1848,7 @@
fn test_list_of_string_dictionary_from_json_with_nulls() {
let schema = Schema::new(vec![Field::new(
"events",
- List(Box::new(Field::new(
- "item",
+ List(Box::new(NullableDataType::new(
Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
true,
))),
@@ -1889,8 +1873,7 @@
let events = schema.column_with_name("events").unwrap();
assert_eq!(
- &List(Box::new(Field::new(
- "item",
+ &List(Box::new(NullableDataType::new(
Dictionary(Box::new(DataType::UInt64), Box::new(DataType::Utf8)),
true
))),
@@ -2031,17 +2014,17 @@
Field::new("a", DataType::Int64, true),
Field::new(
"b",
- DataType::List(Box::new(Field::new("item", DataType::Float64, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Float64, true))),
true,
),
Field::new(
"c",
- DataType::List(Box::new(Field::new("item", DataType::Boolean, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Boolean, true))),
true,
),
Field::new(
"d",
- DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
true,
),
]);
diff --git a/rust/arrow/src/record_batch.rs b/rust/arrow/src/record_batch.rs
index b4aa97d..41cbd6d 100644
--- a/rust/arrow/src/record_batch.rs
+++ b/rust/arrow/src/record_batch.rs
@@ -299,6 +299,42 @@
}
#[test]
+ fn create_record_batch_with_matching_nested_type() {
+ let schema = Schema::new(vec![Field::new(
+ "list",
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
+ false,
+ )]);
+
+ let child_data = Int32Array::from(vec![0, 1, 2, 3, 4, 5]);
+ let child_data_ref = Arc::new(ArrayData::new(
+ DataType::Int32,
+ 6,
+ None,
+ None,
+ 0,
+ vec![child_data.data_ref().buffers()[0].clone()],
+ vec![],
+ ));
+
+ let offsets = UInt64Array::from(vec![0, 2, 4]);
+ let array_data = Arc::new(ArrayData::new(
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
+ 3,
+ None,
+ None,
+ 0,
+ vec![offsets.data_ref().buffers()[0].clone()],
+ vec![child_data_ref],
+ ));
+
+ let list_array = Arc::new(ListArray::from(array_data));
+
+ let result = RecordBatch::try_new(Arc::new(schema), vec![list_array]);
+ assert!(result.is_ok());
+ }
+
+ #[test]
fn create_record_batch_from_struct_array() {
let boolean_data = ArrayData::builder(DataType::Boolean)
.len(4)
diff --git a/rust/arrow/src/util/integration_util.rs b/rust/arrow/src/util/integration_util.rs
index 94d0a9b..4e41996 100644
--- a/rust/arrow/src/util/integration_util.rs
+++ b/rust/arrow/src/util/integration_util.rs
@@ -688,11 +688,7 @@
Field::new("c3", DataType::Utf8, true),
Field::new(
"c4",
- DataType::List(Box::new(Field::new(
- "custom_item",
- DataType::Int32,
- false,
- ))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, false))),
true,
),
]);
@@ -762,7 +758,7 @@
Field::new("utf8s", DataType::Utf8, true),
Field::new(
"lists",
- DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
true,
),
Field::new(
@@ -839,7 +835,7 @@
let value_data = Int32Array::from(vec![None, Some(2), None, None]);
let value_offsets = Buffer::from(&[0, 3, 4, 4].to_byte_slice());
let list_data_type =
- DataType::List(Box::new(Field::new("item", DataType::Int32, true)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(3)
.add_buffer(value_offsets)
diff --git a/rust/datafusion/src/physical_plan/distinct_expressions.rs b/rust/datafusion/src/physical_plan/distinct_expressions.rs
index bbccc3b..a441c0b 100644
--- a/rust/datafusion/src/physical_plan/distinct_expressions.rs
+++ b/rust/datafusion/src/physical_plan/distinct_expressions.rs
@@ -22,7 +22,7 @@
use std::hash::Hash;
use std::sync::Arc;
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, Field, NullableDataType};
use ahash::RandomState;
use std::collections::HashSet;
@@ -81,7 +81,10 @@
.map(|data_type| {
Field::new(
&format_state_name(&self.name, "count distinct"),
- DataType::List(Box::new(Field::new("item", data_type.clone(), true))),
+ DataType::List(Box::new(NullableDataType::new(
+ data_type.clone(),
+ true,
+ ))),
false,
)
})
diff --git a/rust/datafusion/src/physical_plan/functions.rs b/rust/datafusion/src/physical_plan/functions.rs
index b954f47..41fd210 100644
--- a/rust/datafusion/src/physical_plan/functions.rs
+++ b/rust/datafusion/src/physical_plan/functions.rs
@@ -39,11 +39,12 @@
use crate::physical_plan::expressions::{nullif_func, SUPPORTED_NULLIF_TYPES};
use crate::physical_plan::math_expressions;
use crate::physical_plan::string_expressions;
+use arrow::datatypes::NullableDataType;
use arrow::{
array::ArrayRef,
compute::kernels::length::length,
datatypes::TimeUnit,
- datatypes::{DataType, Field, Schema},
+ datatypes::{DataType, Schema},
record_batch::RecordBatch,
};
use fmt::{Debug, Formatter};
@@ -207,7 +208,7 @@
Ok(DataType::Timestamp(TimeUnit::Nanosecond, None))
}
BuiltinScalarFunction::Array => Ok(DataType::FixedSizeList(
- Box::new(Field::new("item", arg_types[0].clone(), true)),
+ Box::new(NullableDataType::new(arg_types[0].clone(), true)),
arg_types.len() as i32,
)),
BuiltinScalarFunction::NullIf => {
@@ -484,7 +485,10 @@
assert_eq!(
expr.data_type(&schema)?,
// type equals to a common coercion
- DataType::FixedSizeList(Box::new(Field::new("item", expected_type, true)), 2)
+ DataType::FixedSizeList(
+ Box::new(NullableDataType::new(expected_type, true)),
+ 2
+ )
);
// evaluate works
diff --git a/rust/datafusion/src/physical_plan/planner.rs b/rust/datafusion/src/physical_plan/planner.rs
index b592111..d6082f3 100644
--- a/rust/datafusion/src/physical_plan/planner.rs
+++ b/rust/datafusion/src/physical_plan/planner.rs
@@ -786,7 +786,7 @@
};
let plan = planner.create_physical_plan(&logical_plan, &ctx_state);
- let expected_error = "Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: Schema { fields: [Field { name: \"a\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false }], metadata: {} }, ExecutionPlan schema: Schema { fields: [Field { name: \"b\", data_type: Int32, nullable: false, dict_id: 0, dict_is_ordered: false }], metadata: {} }";
+ let expected_error = "Extension planner for NoOp created an ExecutionPlan with mismatched schema. LogicalPlan schema: Schema { fields: [Field { name: \"a\", data_type: NullableDataType { data_type: Int32, nullable: false }, dict_id: 0, dict_is_ordered: false }], metadata: {} }, ExecutionPlan schema: Schema { fields: [Field { name: \"b\", data_type: NullableDataType { data_type: Int32, nullable: false }, dict_id: 0, dict_is_ordered: false }], metadata: {} }";
match plan {
Ok(_) => assert!(false, "Expected planning failure"),
diff --git a/rust/datafusion/src/scalar.rs b/rust/datafusion/src/scalar.rs
index 06309ab..c64f1a2 100644
--- a/rust/datafusion/src/scalar.rs
+++ b/rust/datafusion/src/scalar.rs
@@ -23,10 +23,7 @@
Int16Builder, Int32Builder, Int64Builder, Int8Builder, ListBuilder, UInt16Builder,
UInt32Builder, UInt64Builder, UInt8Builder,
};
-use arrow::{
- array::ArrayRef,
- datatypes::{DataType, Field},
-};
+use arrow::{array::ArrayRef, datatypes::DataType};
use arrow::{
array::{
Array, BooleanArray, Date32Array, Float32Array, Float64Array, Int16Array,
@@ -37,6 +34,7 @@
};
use crate::error::{DataFusionError, Result};
+use arrow::datatypes::NullableDataType;
/// Represents a dynamically typed, nullable single value.
/// This is the single-valued counter-part of arrow’s `Array`.
@@ -136,7 +134,7 @@
ScalarValue::Utf8(_) => DataType::Utf8,
ScalarValue::LargeUtf8(_) => DataType::LargeUtf8,
ScalarValue::List(_, data_type) => {
- DataType::List(Box::new(Field::new("item", data_type.clone(), true)))
+ DataType::List(Box::new(NullableDataType::new(data_type.clone(), true)))
}
ScalarValue::Date32(_) => DataType::Date32(DateUnit::Day),
}
diff --git a/rust/datafusion/tests/sql.rs b/rust/datafusion/tests/sql.rs
index 7327487..bb503a1 100644
--- a/rust/datafusion/tests/sql.rs
+++ b/rust/datafusion/tests/sql.rs
@@ -25,7 +25,7 @@
use arrow::{array::*, datatypes::TimeUnit};
use arrow::{datatypes::Int32Type, datatypes::Int64Type, record_batch::RecordBatch};
use arrow::{
- datatypes::{DataType, Field, Schema, SchemaRef},
+ datatypes::{DataType, Field, NullableDataType, Schema, SchemaRef},
util::display::array_value_to_string,
};
@@ -142,12 +142,12 @@
let schema = Arc::new(Schema::new(vec![
Field::new(
"int64_list",
- DataType::List(Box::new(Field::new("item", DataType::Int64, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int64, true))),
true,
),
Field::new(
"utf8_list",
- DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
true,
),
]));
diff --git a/rust/integration-testing/src/bin/arrow-json-integration-test.rs b/rust/integration-testing/src/bin/arrow-json-integration-test.rs
index 72a113f..2ce3692 100644
--- a/rust/integration-testing/src/bin/arrow-json-integration-test.rs
+++ b/rust/integration-testing/src/bin/arrow-json-integration-test.rs
@@ -408,9 +408,14 @@
}
Ok(Arc::new(b.finish()))
}
- DataType::List(child_field) => {
+ DataType::List(type_ctx) => {
let null_buf = create_null_buf(&json_col);
let children = json_col.children.clone().unwrap();
+ let child_field = Field::new(
+ "element",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
let child_array = array_from_json(
&child_field,
children.get(0).unwrap().clone(),
@@ -431,9 +436,14 @@
.build();
Ok(Arc::new(ListArray::from(list_data)))
}
- DataType::LargeList(child_field) => {
+ DataType::LargeList(type_ctx) => {
let null_buf = create_null_buf(&json_col);
let children = json_col.children.clone().unwrap();
+ let child_field = Field::new(
+ "element",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
let child_array = array_from_json(
&child_field,
children.get(0).unwrap().clone(),
@@ -458,8 +468,13 @@
.build();
Ok(Arc::new(LargeListArray::from(list_data)))
}
- DataType::FixedSizeList(child_field, _) => {
+ DataType::FixedSizeList(type_ctx, _) => {
let children = json_col.children.clone().unwrap();
+ let child_field = Field::new(
+ "element",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
let child_array = array_from_json(
&child_field,
children.get(0).unwrap().clone(),
@@ -480,8 +495,8 @@
.len(json_col.count)
.null_bit_buffer(null_buf);
- for (field, col) in fields.iter().zip(json_col.children.unwrap()) {
- let array = array_from_json(field, col, dictionaries)?;
+ for (f, col) in fields.iter().zip(json_col.children.unwrap()) {
+ let array = array_from_json(f, col, dictionaries)?;
array_data = array_data.add_child_data(array.data());
}
diff --git a/rust/parquet/src/arrow/array_reader.rs b/rust/parquet/src/arrow/array_reader.rs
index 231b46d..eeb71b0 100644
--- a/rust/parquet/src/arrow/array_reader.rs
+++ b/rust/parquet/src/arrow/array_reader.rs
@@ -40,7 +40,7 @@
DurationSecondType as ArrowDurationSecondType, Field,
Float32Type as ArrowFloat32Type, Float64Type as ArrowFloat64Type,
Int16Type as ArrowInt16Type, Int32Type as ArrowInt32Type,
- Int64Type as ArrowInt64Type, Int8Type as ArrowInt8Type, Schema,
+ Int64Type as ArrowInt64Type, Int8Type as ArrowInt8Type, NullableDataType, Schema,
Time32MillisecondType as ArrowTime32MillisecondType,
Time32SecondType as ArrowTime32SecondType,
Time64MicrosecondType as ArrowTime64MicrosecondType,
@@ -1347,8 +1347,7 @@
.ok()
.map(|f| f.data_type().to_owned())
.unwrap_or_else(|| {
- ArrowType::List(Box::new(Field::new(
- list_type.name(),
+ ArrowType::List(Box::new(NullableDataType::new(
item_reader_type.clone(),
list_type.is_optional(),
)))
@@ -1628,7 +1627,7 @@
};
use arrow::datatypes::{
ArrowPrimitiveType, DataType as ArrowType, Date32Type as ArrowDate32, Field,
- Int32Type as ArrowInt32, Int64Type as ArrowInt64,
+ Int32Type as ArrowInt32, Int64Type as ArrowInt64, NullableDataType,
Time32MillisecondType as ArrowTime32MillisecondArray,
Time64MicrosecondType as ArrowTime64MicrosecondArray,
TimestampMicrosecondType as ArrowTimestampMicrosecondType,
@@ -2311,7 +2310,7 @@
let mut list_array_reader = ListArrayReader::<i32>::new(
Box::new(item_array_reader),
- ArrowType::List(Box::new(Field::new("item", ArrowType::Int32, true))),
+ ArrowType::List(Box::new(NullableDataType::new(ArrowType::Int32, false))),
ArrowType::Int32,
1,
1,
@@ -2365,7 +2364,7 @@
let mut list_array_reader = ListArrayReader::<i64>::new(
Box::new(item_array_reader),
- ArrowType::LargeList(Box::new(Field::new("item", ArrowType::Int32, true))),
+ ArrowType::LargeList(Box::new(NullableDataType::new(ArrowType::Int32, true))),
ArrowType::Int32,
1,
1,
diff --git a/rust/parquet/src/arrow/arrow_writer.rs b/rust/parquet/src/arrow/arrow_writer.rs
index dc9cf70..4dd35a7 100644
--- a/rust/parquet/src/arrow/arrow_writer.rs
+++ b/rust/parquet/src/arrow/arrow_writer.rs
@@ -688,8 +688,8 @@
use std::sync::Arc;
use arrow::array::*;
- use arrow::datatypes::ToByteSlice;
use arrow::datatypes::{DataType, Field, Schema, UInt32Type, UInt8Type};
+ use arrow::datatypes::{NullableDataType, ToByteSlice};
use arrow::record_batch::RecordBatch;
use crate::arrow::{ArrowReader, ParquetFileArrowReader};
@@ -776,7 +776,7 @@
// define schema
let schema = Schema::new(vec![Field::new(
"a",
- DataType::List(Box::new(Field::new("item", DataType::Int32, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
false,
)]);
@@ -789,11 +789,9 @@
arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
// Construct a list array from the above two
- let a_list_data = ArrayData::builder(DataType::List(Box::new(Field::new(
- "items",
- DataType::Int32,
- true,
- ))))
+ let a_list_data = ArrayData::builder(DataType::List(Box::new(
+ NullableDataType::new(DataType::Int32, true),
+ )))
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.data())
@@ -876,7 +874,7 @@
let struct_field_f = Field::new("f", DataType::Float32, true);
let struct_field_g = Field::new(
"g",
- DataType::List(Box::new(Field::new("items", DataType::Int16, false))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int16, false))),
false,
);
let struct_field_e = Field::new(
@@ -1297,11 +1295,9 @@
let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let a_value_offsets =
arrow::buffer::Buffer::from(&[0, 1, 3, 3, 6, 10].to_byte_slice());
- let a_list_data = ArrayData::builder(DataType::List(Box::new(Field::new(
- "item",
- DataType::Int32,
- true,
- ))))
+ let a_list_data = ArrayData::builder(DataType::List(Box::new(
+ NullableDataType::new(DataType::Int32, true),
+ )))
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.data())
@@ -1322,11 +1318,9 @@
let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let a_value_offsets =
arrow::buffer::Buffer::from(&[0i64, 1, 3, 3, 6, 10].to_byte_slice());
- let a_list_data = ArrayData::builder(DataType::LargeList(Box::new(Field::new(
- "large_item",
- DataType::Int32,
- true,
- ))))
+ let a_list_data = ArrayData::builder(DataType::LargeList(Box::new(
+ NullableDataType::new(DataType::Int32, true),
+ )))
.len(5)
.add_buffer(a_value_offsets)
.add_child_data(a_values.data())
diff --git a/rust/parquet/src/arrow/schema.rs b/rust/parquet/src/arrow/schema.rs
index c93325b..b0c3564 100644
--- a/rust/parquet/src/arrow/schema.rs
+++ b/rust/parquet/src/arrow/schema.rs
@@ -26,7 +26,7 @@
use std::collections::{HashMap, HashSet};
use std::sync::Arc;
-use arrow::datatypes::{DataType, DateUnit, Field, Schema, TimeUnit};
+use arrow::datatypes::{DataType, DateUnit, Field, NullableDataType, Schema, TimeUnit};
use arrow::ipc::writer;
use crate::basic::{LogicalType, Repetition, Type as PhysicalType};
@@ -412,18 +412,25 @@
.with_repetition(repetition)
.build()
}
- DataType::List(f) | DataType::FixedSizeList(f, _) | DataType::LargeList(f) => {
- Type::group_type_builder(name)
- .with_fields(&mut vec![Arc::new(
- Type::group_type_builder("list")
- .with_fields(&mut vec![Arc::new(arrow_to_parquet_type(f)?)])
- .with_repetition(Repetition::REPEATED)
- .build()?,
- )])
- .with_logical_type(LogicalType::LIST)
- .with_repetition(Repetition::REQUIRED)
- .build()
- }
+ DataType::List(type_ctx)
+ | DataType::FixedSizeList(type_ctx, _)
+ | DataType::LargeList(type_ctx) => Type::group_type_builder(name)
+ .with_fields(&mut vec![Arc::new(
+ Type::group_type_builder("list")
+ .with_fields(&mut vec![Arc::new({
+ let list_field = Field::new(
+ "element",
+ type_ctx.data_type().clone(),
+ type_ctx.is_nullable(),
+ );
+ arrow_to_parquet_type(&list_field)?
+ })])
+ .with_repetition(Repetition::REPEATED)
+ .build()?,
+ )])
+ .with_logical_type(LogicalType::LIST)
+ .with_repetition(Repetition::REQUIRED)
+ .build(),
DataType::Struct(fields) => {
if fields.is_empty() {
return Err(ArrowError(
@@ -538,8 +545,7 @@
if self.is_self_included() {
self.to_primitive_type_inner().map(|dt| {
if self.is_repeated() {
- Some(DataType::List(Box::new(Field::new(
- self.schema.name(),
+ Some(DataType::List(Box::new(NullableDataType::new(
dt,
self.is_nullable(),
))))
@@ -638,8 +644,7 @@
if self.is_repeated() {
self.to_struct().map(|opt| {
opt.map(|dt| {
- DataType::List(Box::new(Field::new(
- self.schema.name(),
+ DataType::List(Box::new(NullableDataType::new(
dt,
self.is_nullable(),
)))
@@ -731,8 +736,7 @@
item_type.map(|opt| {
opt.map(|dt| {
- DataType::List(Box::new(Field::new(
- list_item.name(),
+ DataType::List(Box::new(NullableDataType::new(
dt,
list_item.is_optional(),
)))
@@ -752,7 +756,9 @@
use std::{collections::HashMap, convert::TryFrom, sync::Arc};
- use arrow::datatypes::{DataType, DateUnit, Field, IntervalUnit, TimeUnit};
+ use arrow::datatypes::{
+ DataType, DateUnit, Field, IntervalUnit, NullableDataType, TimeUnit,
+ };
use crate::file::{metadata::KeyValue, reader::SerializedFileReader};
use crate::{
@@ -911,7 +917,7 @@
{
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("list", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
false,
));
}
@@ -925,7 +931,7 @@
{
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("list", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
true,
));
}
@@ -944,10 +950,10 @@
// }
{
let arrow_inner_list =
- DataType::List(Box::new(Field::new("list", DataType::Int32, true)));
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true)));
arrow_fields.push(Field::new(
"array_of_arrays",
- DataType::List(Box::new(Field::new("list", arrow_inner_list, true))),
+ DataType::List(Box::new(NullableDataType::new(arrow_inner_list, true))),
true,
));
}
@@ -961,7 +967,7 @@
{
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("element", DataType::Utf8, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))),
true,
));
}
@@ -973,7 +979,7 @@
{
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("element", DataType::Int32, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
true,
));
}
@@ -992,7 +998,7 @@
]);
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("element", arrow_struct, true))),
+ DataType::List(Box::new(NullableDataType::new(arrow_struct, true))),
true,
));
}
@@ -1009,7 +1015,7 @@
DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("array", arrow_struct, true))),
+ DataType::List(Box::new(NullableDataType::new(arrow_struct, true))),
true,
));
}
@@ -1026,7 +1032,7 @@
DataType::Struct(vec![Field::new("str", DataType::Utf8, false)]);
arrow_fields.push(Field::new(
"my_list",
- DataType::List(Box::new(Field::new("my_list_tuple", arrow_struct, true))),
+ DataType::List(Box::new(NullableDataType::new(arrow_struct, true))),
true,
));
}
@@ -1036,7 +1042,7 @@
{
arrow_fields.push(Field::new(
"name",
- DataType::List(Box::new(Field::new("name", DataType::Int32, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
true,
));
}
@@ -1202,8 +1208,7 @@
let inner_group_list = Field::new(
"innerGroup",
- DataType::List(Box::new(Field::new(
- "innerGroup",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Struct(vec![Field::new("leaf3", DataType::Int32, true)]),
true,
))),
@@ -1212,8 +1217,7 @@
let outer_group_list = Field::new(
"outerGroup",
- DataType::List(Box::new(Field::new(
- "outerGroup",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Struct(vec![
Field::new("leaf2", DataType::Int32, true),
inner_group_list,
@@ -1289,7 +1293,7 @@
Field::new("string", DataType::Utf8, true),
Field::new(
"bools",
- DataType::List(Box::new(Field::new("bools", DataType::Boolean, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Boolean, true))),
true,
),
Field::new("date", DataType::Date32(DateUnit::Day), true),
@@ -1359,7 +1363,7 @@
Field::new("string", DataType::Utf8, true),
Field::new(
"bools",
- DataType::List(Box::new(Field::new("element", DataType::Boolean, true))),
+ DataType::List(Box::new(NullableDataType::new(DataType::Boolean, true))),
true,
),
Field::new("date", DataType::Date32(DateUnit::Day), true),
@@ -1382,8 +1386,7 @@
Field::new("uint32", DataType::UInt32, false),
Field::new(
"int32",
- DataType::List(Box::new(Field::new(
- "element",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Int32,
true,
))),
@@ -1496,7 +1499,10 @@
Field::new("c20", DataType::Interval(IntervalUnit::YearMonth), false),
Field::new(
"c21",
- DataType::List(Box::new(Field::new("list", DataType::Boolean, true))),
+ DataType::List(Box::new(NullableDataType::new(
+ DataType::Boolean,
+ true,
+ ))),
false,
),
// Field::new(
@@ -1591,8 +1597,7 @@
vec![
Field::new(
"c21",
- DataType::List(Box::new(Field::new(
- "array",
+ DataType::List(Box::new(NullableDataType::new(
DataType::Boolean,
true,
))),
@@ -1601,17 +1606,15 @@
Field::new(
"c22",
DataType::FixedSizeList(
- Box::new(Field::new("items", DataType::Boolean, false)),
+ Box::new(NullableDataType::new(DataType::Boolean, false)),
5,
),
false,
),
Field::new(
"c23",
- DataType::List(Box::new(Field::new(
- "items",
- DataType::LargeList(Box::new(Field::new(
- "items",
+ DataType::List(Box::new(NullableDataType::new(
+ DataType::LargeList(Box::new(NullableDataType::new(
DataType::Struct(vec![
Field::new("a", DataType::Int16, true),
Field::new("b", DataType::Float64, false),