blob: 083d5bba15b06b541663fb7b5231d69deef82185 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Contains the `UnionArray` type.
//!
//! Each slot in a `UnionArray` can have a value chosen from a number of types. Each of the
//! possible types are named like the fields of a [`StructArray`](crate::array::StructArray).
//! A `UnionArray` can have two possible memory layouts, "dense" or "sparse". For more information
//! on please see the [specification](https://arrow.apache.org/docs/format/Columnar.html#union-layout).
//!
//! Builders are provided for `UnionArray`'s involving primitive types. `UnionArray`'s of nested
//! types are also supported but not via `UnionBuilder`, see the tests for examples.
//!
//! # Example: Dense Memory Layout
//!
//! ```
//! use arrow::array::UnionBuilder;
//! use arrow::datatypes::{Float64Type, Int32Type};
//!
//! # fn main() -> arrow::error::Result<()> {
//! let mut builder = UnionBuilder::new_dense(3);
//! builder.append::<Int32Type>("a", 1).unwrap();
//! builder.append::<Float64Type>("b", 3.0).unwrap();
//! builder.append::<Int32Type>("a", 4).unwrap();
//! let union = builder.build().unwrap();
//!
//! assert_eq!(union.type_id(0), 0_i8);
//! assert_eq!(union.type_id(1), 1_i8);
//! assert_eq!(union.type_id(2), 0_i8);
//!
//! assert_eq!(union.value_offset(0), 0_i32);
//! assert_eq!(union.value_offset(1), 0_i32);
//! assert_eq!(union.value_offset(2), 1_i32);
//!
//! # Ok(())
//! # }
//! ```
//!
//! # Example: Sparse Memory Layout
//! ```
//! use arrow::array::UnionBuilder;
//! use arrow::datatypes::{Float64Type, Int32Type};
//!
//! # fn main() -> arrow::error::Result<()> {
//! let mut builder = UnionBuilder::new_sparse(3);
//! builder.append::<Int32Type>("a", 1).unwrap();
//! builder.append::<Float64Type>("b", 3.0).unwrap();
//! builder.append::<Int32Type>("a", 4).unwrap();
//! let union = builder.build().unwrap();
//!
//! assert_eq!(union.type_id(0), 0_i8);
//! assert_eq!(union.type_id(1), 1_i8);
//! assert_eq!(union.type_id(2), 0_i8);
//!
//! assert_eq!(union.value_offset(0), 0_i32);
//! assert_eq!(union.value_offset(1), 1_i32);
//! assert_eq!(union.value_offset(2), 2_i32);
//!
//! # Ok(())
//! # }
//! ```
use crate::array::{data::count_nulls, make_array, Array, ArrayData, ArrayRef};
use crate::buffer::Buffer;
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use core::fmt;
use std::any::Any;
use std::mem;
use std::mem::size_of;
/// An Array that can represent slots of varying types.
pub struct UnionArray {
data: ArrayData,
boxed_fields: Vec<ArrayRef>,
}
impl UnionArray {
/// Creates a new `UnionArray`.
///
/// Accepts type ids, child arrays and optionally offsets (for dense unions) to create
/// a new `UnionArray`. This method makes no attempt to validate the data provided by the
/// caller and assumes that each of the components are correct and consistent with each other.
/// See `try_new` for an alternative that validates the data provided.
///
/// # Data Consistency
///
/// The `type_ids` `Buffer` should contain `i8` values. These values should be greater than
/// zero and must be less than the number of children provided in `child_arrays`. These values
/// are used to index into the `child_arrays`.
///
/// The `value_offsets` `Buffer` is only provided in the case of a dense union, sparse unions
/// should use `None`. If provided the `value_offsets` `Buffer` should contain `i32` values.
/// These values should be greater than zero and must be less than the length of the overall
/// array.
///
/// In both cases above we use signed integer types to maintain compatibility with other
/// Arrow implementations.
///
/// In both of the cases above we are accepting `Buffer`'s which are assumed to be representing
/// `i8` and `i32` values respectively. `Buffer` objects are untyped and no attempt is made
/// to ensure that the data provided is valid.
pub fn new(
type_ids: Buffer,
value_offsets: Option<Buffer>,
child_arrays: Vec<(Field, ArrayRef)>,
bitmap_data: Option<Buffer>,
) -> Self {
let (field_types, field_values): (Vec<_>, Vec<_>) =
child_arrays.into_iter().unzip();
let len = type_ids.len();
let mut builder = ArrayData::builder(DataType::Union(field_types))
.add_buffer(type_ids)
.child_data(field_values.into_iter().map(|a| a.data().clone()).collect())
.len(len);
if let Some(bitmap) = bitmap_data {
builder = builder.null_bit_buffer(bitmap)
}
let data = match value_offsets {
Some(b) => builder.add_buffer(b).build(),
None => builder.build(),
};
Self::from(data)
}
/// Attempts to create a new `UnionArray` and validates the inputs provided.
pub fn try_new(
type_ids: Buffer,
value_offsets: Option<Buffer>,
child_arrays: Vec<(Field, ArrayRef)>,
bitmap: Option<Buffer>,
) -> Result<Self> {
if let Some(b) = &value_offsets {
let nulls = count_nulls(bitmap.as_ref(), 0, type_ids.len());
if ((type_ids.len() - nulls) * 4) != b.len() {
return Err(ArrowError::InvalidArgumentError(
"Type Ids and Offsets represent a different number of array slots."
.to_string(),
));
}
}
// Check the type_ids
let type_id_slice: &[i8] = unsafe { type_ids.typed_data() };
let invalid_type_ids = type_id_slice
.iter()
.filter(|i| *i < &0)
.collect::<Vec<&i8>>();
if !invalid_type_ids.is_empty() {
return Err(ArrowError::InvalidArgumentError(format!(
"Type Ids must be positive and cannot be greater than the number of \
child arrays, found:\n{:?}",
invalid_type_ids
)));
}
// Check the value offsets if provided
if let Some(offset_buffer) = &value_offsets {
let max_len = type_ids.len() as i32;
let offsets_slice: &[i32] = unsafe { offset_buffer.typed_data() };
let invalid_offsets = offsets_slice
.iter()
.filter(|i| *i < &0 || *i > &max_len)
.collect::<Vec<&i32>>();
if !invalid_offsets.is_empty() {
return Err(ArrowError::InvalidArgumentError(format!(
"Offsets must be positive and within the length of the Array, \
found:\n{:?}",
invalid_offsets
)));
}
}
Ok(Self::new(type_ids, value_offsets, child_arrays, bitmap))
}
/// Accesses the child array for `type_id`.
///
/// # Panics
///
/// Panics if the `type_id` provided is less than zero or greater than the number of types
/// in the `Union`.
pub fn child(&self, type_id: i8) -> ArrayRef {
assert!(0 <= type_id);
assert!((type_id as usize) < self.boxed_fields.len());
self.boxed_fields[type_id as usize].clone()
}
/// Returns the `type_id` for the array slot at `index`.
///
/// # Panics
///
/// Panics if `index` is greater than the length of the array.
pub fn type_id(&self, index: usize) -> i8 {
assert!(index - self.offset() < self.len());
self.data().buffers()[0].as_slice()[index] as i8
}
/// Returns the offset into the underlying values array for the array slot at `index`.
///
/// # Panics
///
/// Panics if `index` is greater than the length of the array.
pub fn value_offset(&self, index: usize) -> i32 {
assert!(index - self.offset() < self.len());
if self.is_dense() {
// In format v4 unions had their own validity bitmap and offsets are compressed by omitting null values
// Starting with v5 unions don't have a validity bitmap and it's possible to directly index into the offsets buffer
let valid_slots = match self.data.null_buffer() {
Some(b) => b.count_set_bits_offset(0, index),
None => index,
};
self.data().buffers()[1].as_slice()[valid_slots * size_of::<i32>()] as i32
} else {
index as i32
}
}
/// Returns the array's value at `index`.
///
/// # Panics
///
/// Panics if `index` is greater than the length of the array.
pub fn value(&self, index: usize) -> ArrayRef {
let type_id = self.type_id(self.offset() + index);
let value_offset = self.value_offset(self.offset() + index) as usize;
let child_data = self.boxed_fields[type_id as usize].clone();
child_data.slice(value_offset, 1)
}
/// Returns the names of the types in the union.
pub fn type_names(&self) -> Vec<&str> {
match self.data.data_type() {
DataType::Union(fields) => fields
.iter()
.map(|f| f.name().as_str())
.collect::<Vec<&str>>(),
_ => unreachable!("Union array's data type is not a union!"),
}
}
/// Returns whether the `UnionArray` is dense (or sparse if `false`).
fn is_dense(&self) -> bool {
self.data().buffers().len() == 2
}
}
impl From<ArrayData> for UnionArray {
fn from(data: ArrayData) -> Self {
let mut boxed_fields = vec![];
for cd in data.child_data() {
boxed_fields.push(make_array(cd.clone()));
}
Self { data, boxed_fields }
}
}
impl Array for UnionArray {
fn as_any(&self) -> &Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}
/// Returns the total number of bytes of memory occupied by the buffers owned by this [UnionArray].
fn get_buffer_memory_size(&self) -> usize {
let mut size = self.data.get_buffer_memory_size();
for field in &self.boxed_fields {
size += field.get_buffer_memory_size();
}
size
}
/// Returns the total number of bytes of memory occupied physically by this [UnionArray].
fn get_array_memory_size(&self) -> usize {
let mut size = self.data.get_array_memory_size();
size += mem::size_of_val(self) - mem::size_of_val(&self.boxed_fields);
for field in &self.boxed_fields {
size += field.get_array_memory_size();
}
size
}
}
impl fmt::Debug for UnionArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let header = if self.is_dense() {
"UnionArray(Dense)\n["
} else {
"UnionArray(Sparse)\n["
};
writeln!(f, "{}", header)?;
writeln!(f, "-- type id buffer:")?;
writeln!(f, "{:?}", self.data().buffers()[0])?;
if self.is_dense() {
writeln!(f, "-- offsets buffer:")?;
writeln!(f, "{:?}", self.data().buffers()[1])?;
}
for (child_index, name) in self.type_names().iter().enumerate() {
let column = &self.boxed_fields[child_index];
writeln!(
f,
"-- child {}: \"{}\" ({:?})",
child_index,
*name,
column.data_type()
)?;
fmt::Debug::fmt(column, f)?;
writeln!(f)?;
}
writeln!(f, "]")
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use crate::array::*;
use crate::buffer::Buffer;
use crate::datatypes::{DataType, Field};
#[test]
fn test_dense_i32() {
let mut builder = UnionBuilder::new_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
builder.append::<Int32Type>("c", 5).unwrap();
builder.append::<Int32Type>("a", 6).unwrap();
builder.append::<Int32Type>("b", 7).unwrap();
let union = builder.build().unwrap();
let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
let expected_value_offsets = vec![0_i32, 0, 0, 1, 1, 2, 1];
let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
// Check type ids
assert_eq!(
union.data().buffers()[0],
Buffer::from_slice_ref(&expected_type_ids)
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
}
// Check offsets
assert_eq!(
union.data().buffers()[1],
Buffer::from_slice_ref(&expected_value_offsets)
);
for (i, id) in expected_value_offsets.iter().enumerate() {
assert_eq!(&union.value_offset(i), id);
}
// Check data
assert_eq!(
union.data().child_data()[0].buffers()[0],
Buffer::from_slice_ref(&[1_i32, 4, 6])
);
assert_eq!(
union.data().child_data()[1].buffers()[0],
Buffer::from_slice_ref(&[2_i32, 7])
);
assert_eq!(
union.data().child_data()[2].buffers()[0],
Buffer::from_slice_ref(&[3_i32, 5]),
);
assert_eq!(expected_array_values.len(), union.len());
for (i, expected_value) in expected_array_values.iter().enumerate() {
assert_eq!(false, union.is_null(i));
let slot = union.value(i);
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(expected_value, &value);
}
}
#[test]
fn test_dense_mixed() {
let mut builder = UnionBuilder::new_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
builder.append::<Int64Type>("c", 5).unwrap();
builder.append::<Int32Type>("a", 6).unwrap();
let union = builder.build().unwrap();
assert_eq!(5, union.len());
for i in 0..union.len() {
let slot = union.value(i);
assert_eq!(false, union.is_null(i));
match i {
0 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(1_i32, value);
}
1 => {
let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(3_i64, value);
}
2 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(4_i32, value);
}
3 => {
let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(5_i64, value);
}
4 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(6_i32, value);
}
_ => unreachable!(),
}
}
}
#[test]
fn test_dense_mixed_with_nulls() {
let mut builder = UnionBuilder::new_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 10).unwrap();
builder.append_null().unwrap();
builder.append::<Int32Type>("a", 6).unwrap();
let union = builder.build().unwrap();
assert_eq!(5, union.len());
for i in 0..union.len() {
let slot = union.value(i);
match i {
0 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(1_i32, value);
}
1 => {
let slot = slot.as_any().downcast_ref::<Int64Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(3_i64, value);
}
2 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(10_i32, value);
}
3 => assert!(union.is_null(i)),
4 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(6_i32, value);
}
_ => unreachable!(),
}
}
}
#[test]
fn test_dense_mixed_with_nulls_and_offset() {
let mut builder = UnionBuilder::new_dense(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int64Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 10).unwrap();
builder.append_null().unwrap();
builder.append::<Int32Type>("a", 6).unwrap();
let union = builder.build().unwrap();
let slice = union.slice(2, 3);
let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
assert_eq!(3, new_union.len());
for i in 0..new_union.len() {
let slot = new_union.value(i);
match i {
0 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(10_i32, value);
}
1 => assert!(new_union.is_null(i)),
2 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(6_i32, value);
}
_ => unreachable!(),
}
}
}
#[test]
fn test_dense_mixed_with_str() {
let string_array = StringArray::from(vec!["foo", "bar", "baz"]);
let int_array = Int32Array::from(vec![5, 6]);
let float_array = Float64Array::from(vec![10.0]);
let type_ids = [1_i8, 0, 0, 2, 0, 1];
let value_offsets = [0_i32, 0, 1, 0, 2, 1];
let type_id_buffer = Buffer::from_slice_ref(&type_ids);
let value_offsets_buffer = Buffer::from_slice_ref(&value_offsets);
let mut children: Vec<(Field, Arc<Array>)> = Vec::new();
children.push((
Field::new("A", DataType::Utf8, false),
Arc::new(string_array),
));
children.push((Field::new("B", DataType::Int32, false), Arc::new(int_array)));
children.push((
Field::new("C", DataType::Float64, false),
Arc::new(float_array),
));
let array = UnionArray::try_new(
type_id_buffer,
Some(value_offsets_buffer),
children,
None,
)
.unwrap();
// Check type ids
assert_eq!(Buffer::from_slice_ref(&type_ids), array.data().buffers()[0]);
for (i, id) in type_ids.iter().enumerate() {
assert_eq!(id, &array.type_id(i));
}
// Check offsets
assert_eq!(
Buffer::from_slice_ref(&value_offsets),
array.data().buffers()[1]
);
for (i, id) in value_offsets.iter().enumerate() {
assert_eq!(id, &array.value_offset(i));
}
// Check values
assert_eq!(6, array.len());
let slot = array.value(0);
let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
assert_eq!(5, value);
let slot = array.value(1);
let value = slot
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.value(0);
assert_eq!("foo", value);
let slot = array.value(2);
let value = slot
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.value(0);
assert_eq!("bar", value);
let slot = array.value(3);
let value = slot
.as_any()
.downcast_ref::<Float64Array>()
.unwrap()
.value(0);
assert!(10.0 - value < f64::EPSILON);
let slot = array.value(4);
let value = slot
.as_any()
.downcast_ref::<StringArray>()
.unwrap()
.value(0);
assert_eq!("baz", value);
let slot = array.value(5);
let value = slot.as_any().downcast_ref::<Int32Array>().unwrap().value(0);
assert_eq!(6, value);
}
#[test]
fn test_sparse_i32() {
let mut builder = UnionBuilder::new_sparse(7);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Int32Type>("b", 2).unwrap();
builder.append::<Int32Type>("c", 3).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
builder.append::<Int32Type>("c", 5).unwrap();
builder.append::<Int32Type>("a", 6).unwrap();
builder.append::<Int32Type>("b", 7).unwrap();
let union = builder.build().unwrap();
let expected_type_ids = vec![0_i8, 1, 2, 0, 2, 0, 1];
let expected_array_values = [1_i32, 2, 3, 4, 5, 6, 7];
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
}
// Check offsets, sparse union should only have a single buffer
assert_eq!(union.data().buffers().len(), 1);
// Check data
assert_eq!(
union.data().child_data()[0].buffers()[0],
Buffer::from_slice_ref(&[1_i32, 0, 0, 4, 0, 6, 0]),
);
assert_eq!(
Buffer::from_slice_ref(&[0_i32, 2_i32, 0, 0, 0, 0, 7]),
union.data().child_data()[1].buffers()[0]
);
assert_eq!(
Buffer::from_slice_ref(&[0_i32, 0, 3_i32, 0, 5, 0, 0]),
union.data().child_data()[2].buffers()[0]
);
assert_eq!(expected_array_values.len(), union.len());
for (i, expected_value) in expected_array_values.iter().enumerate() {
assert_eq!(false, union.is_null(i));
let slot = union.value(i);
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(expected_value, &value);
}
}
#[test]
fn test_sparse_mixed() {
let mut builder = UnionBuilder::new_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
builder.append::<Float64Type>("c", 5.0).unwrap();
builder.append::<Int32Type>("a", 6).unwrap();
let union = builder.build().unwrap();
let expected_type_ids = vec![0_i8, 1, 0, 1, 0];
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
}
// Check offsets, sparse union should only have a single buffer, i.e. no offsets
assert_eq!(union.data().buffers().len(), 1);
for i in 0..union.len() {
let slot = union.value(i);
assert_eq!(false, union.is_null(i));
match i {
0 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(1_i32, value);
}
1 => {
let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert!(value - 3_f64 < f64::EPSILON);
}
2 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(4_i32, value);
}
3 => {
let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert!(5_f64 - value < f64::EPSILON);
}
4 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(6_i32, value);
}
_ => unreachable!(),
}
}
}
#[test]
fn test_sparse_mixed_with_nulls() {
let mut builder = UnionBuilder::new_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null().unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
let union = builder.build().unwrap();
let expected_type_ids = vec![0_i8, 0, 1, 0];
// Check type ids
assert_eq!(
Buffer::from_slice_ref(&expected_type_ids),
union.data().buffers()[0]
);
for (i, id) in expected_type_ids.iter().enumerate() {
assert_eq!(id, &union.type_id(i));
}
// Check offsets, sparse union should only have a single buffer, i.e. no offsets
assert_eq!(union.data().buffers().len(), 1);
for i in 0..union.len() {
let slot = union.value(i);
match i {
0 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(1_i32, value);
}
1 => assert!(union.is_null(i)),
2 => {
let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert!(value - 3_f64 < f64::EPSILON);
}
3 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(4_i32, value);
}
_ => unreachable!(),
}
}
}
#[test]
fn test_sparse_mixed_with_nulls_and_offset() {
let mut builder = UnionBuilder::new_sparse(5);
builder.append::<Int32Type>("a", 1).unwrap();
builder.append_null().unwrap();
builder.append::<Float64Type>("c", 3.0).unwrap();
builder.append_null().unwrap();
builder.append::<Int32Type>("a", 4).unwrap();
let union = builder.build().unwrap();
let slice = union.slice(1, 4);
let new_union = slice.as_any().downcast_ref::<UnionArray>().unwrap();
assert_eq!(4, new_union.len());
for i in 0..new_union.len() {
let slot = new_union.value(i);
match i {
0 => assert!(new_union.is_null(i)),
1 => {
let slot = slot.as_any().downcast_ref::<Float64Array>().unwrap();
assert_eq!(false, new_union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert!(value - 3_f64 < f64::EPSILON);
}
2 => assert!(new_union.is_null(i)),
3 => {
let slot = slot.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(false, new_union.is_null(i));
assert_eq!(slot.len(), 1);
let value = slot.value(0);
assert_eq!(4_i32, value);
}
_ => unreachable!(),
}
}
}
}