| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use crate::array::{get_offsets, make_array, print_long_array}; |
| use crate::builder::{GenericListBuilder, PrimitiveBuilder}; |
| use crate::{ |
| Array, ArrayAccessor, ArrayRef, ArrowPrimitiveType, FixedSizeListArray, |
| iterator::GenericListArrayIter, new_empty_array, |
| }; |
| use arrow_buffer::{ArrowNativeType, NullBuffer, OffsetBuffer}; |
| use arrow_data::{ArrayData, ArrayDataBuilder}; |
| use arrow_schema::{ArrowError, DataType, FieldRef}; |
| use num_integer::Integer; |
| use std::any::Any; |
| use std::sync::Arc; |
| |
| /// A type that can be used within a variable-size array to encode offset information |
| /// |
| /// See [`ListArray`], [`LargeListArray`], [`BinaryArray`], [`LargeBinaryArray`], |
| /// [`StringArray`] and [`LargeStringArray`] |
| /// |
| /// [`BinaryArray`]: crate::array::BinaryArray |
| /// [`LargeBinaryArray`]: crate::array::LargeBinaryArray |
| /// [`StringArray`]: crate::array::StringArray |
| /// [`LargeStringArray`]: crate::array::LargeStringArray |
| pub trait OffsetSizeTrait: |
| ArrowNativeType + std::ops::AddAssign + Integer + num_traits::CheckedAdd |
| { |
| /// True for 64 bit offset size and false for 32 bit offset size |
| const IS_LARGE: bool; |
| /// Prefix for the offset size |
| const PREFIX: &'static str; |
| /// The max `usize` offset |
| const MAX_OFFSET: usize; |
| } |
| |
| impl OffsetSizeTrait for i32 { |
| const IS_LARGE: bool = false; |
| const PREFIX: &'static str = ""; |
| const MAX_OFFSET: usize = i32::MAX as usize; |
| } |
| |
| impl OffsetSizeTrait for i64 { |
| const IS_LARGE: bool = true; |
| const PREFIX: &'static str = "Large"; |
| const MAX_OFFSET: usize = i64::MAX as usize; |
| } |
| |
| /// An array of [variable length lists], similar to JSON arrays |
| /// (e.g. `["A", "B", "C"]`). This struct specifically represents |
| /// the [list layout]. Refer to [`GenericListViewArray`] for the |
| /// [list-view layout]. |
| /// |
| /// Lists are represented using `offsets` into a `values` child |
| /// array. Offsets are stored in two adjacent entries of an |
| /// [`OffsetBuffer`]. |
| /// |
| /// Arrow defines [`ListArray`] with `i32` offsets and |
| /// [`LargeListArray`] with `i64` offsets. |
| /// |
| /// Use [`GenericListBuilder`] to construct a [`GenericListArray`]. |
| /// |
| /// # Representation |
| /// |
| /// A [`ListArray`] can represent a list of values of any other |
| /// supported Arrow type. Each element of the `ListArray` itself is |
| /// a list which may be empty, may contain NULL and non-null values, |
| /// or may itself be NULL. |
| /// |
| /// For example, the `ListArray` shown in the following diagram stores |
| /// lists of strings. Note that `[]` represents an empty (length |
| /// 0), but non NULL list. |
| /// |
| /// ```text |
| /// ┌─────────────┐ |
| /// │ [A,B,C] │ |
| /// ├─────────────┤ |
| /// │ [] │ |
| /// ├─────────────┤ |
| /// │ NULL │ |
| /// ├─────────────┤ |
| /// │ [D] │ |
| /// ├─────────────┤ |
| /// │ [NULL, F] │ |
| /// └─────────────┘ |
| /// ``` |
| /// |
| /// The `values` are stored in a child [`StringArray`] and the offsets |
| /// are stored in an [`OffsetBuffer`] as shown in the following |
| /// diagram. The logical values and offsets are shown on the left, and |
| /// the actual `ListArray` encoding on the right. |
| /// |
| /// ```text |
| /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
| /// ┌ ─ ─ ─ ─ ─ ─ ┐ │ |
| /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ┌───┐ ┌───┐ |
| /// │ [A,B,C] │ │ (0,3) │ │ 1 │ │ 0 │ │ │ 1 │ │ A │ │ 0 │ |
| /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ |
| /// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ │ 1 │ │ B │ │ 1 │ |
| /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ |
| /// │ NULL │ │ (3,3) │ │ 0 │ │ 3 │ │ │ 1 │ │ C │ │ 2 │ |
| /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ |
| /// │ [D] │ │ (3,4) │ │ 1 │ │ 3 │ │ │ 1 │ │ D │ │ 3 │ |
| /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ├───┤ ├───┤ |
| /// │ [NULL, F] │ │ (4,6) │ │ 1 │ │ 4 │ │ │ 0 │ │ ? │ │ 4 │ |
| /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ├───┤ ├───┤ |
| /// │ 6 │ │ │ 1 │ │ F │ │ 5 │ |
| /// │ Validity └───┘ └───┘ └───┘ |
| /// Logical Logical (nulls) Offsets │ Values │ │ |
| /// Values Offsets │ (Array) |
| /// └ ─ ─ ─ ─ ─ ─ ┘ │ |
| /// (offsets[i], │ ListArray |
| /// offsets[i+1]) │ |
| /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
| /// ``` |
| /// |
| /// # Slicing |
| /// |
| /// Slicing a `ListArray` creates a new `ListArray` without copying any data, |
| /// but this means the [`Self::values`] and [`Self::offsets`] may have "unused" data |
| /// |
| /// For example, calling `slice(1, 3)` on the `ListArray` in the above example |
| /// would result in the following. Note |
| /// |
| /// 1. `Values` array is unchanged |
| /// 2. `Offsets` do not start at `0`, nor cover all values in the Values array. |
| /// |
| /// ```text |
| /// ┌ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
| /// ┌ ─ ─ ─ ─ ─ ─ ┐ │ ╔═══╗ |
| /// │ ╔═══╗ ╔═══╗ ║ ║ Not used |
| /// │ ║ 1 ║ ║ A ║ │ 0 │ ╚═══╝ |
| /// ┌─────────────┐ ┌───────┐ │ ┌───┐ ┌───┐ ╠═══╣ ╠═══╣ |
| /// │ [] (empty) │ │ (3,3) │ │ 1 │ │ 3 │ │ ║ 1 ║ ║ B ║ │ 1 │ |
| /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╠═══╣ ╠═══╣ |
| /// │ NULL │ │ (3,3) │ │ 0 │ │ 3 │ │ ║ 1 ║ ║ C ║ │ 2 │ |
| /// ├─────────────┤ ├───────┤ │ ├───┤ ├───┤ ╚═══╝ ╚═══╝ |
| /// │ [D] │ │ (3,4) │ │ 1 │ │ 3 │ │ │ 1 │ │ D │ │ 3 │ |
| /// └─────────────┘ └───────┘ │ └───┘ ├───┤ ╔═══╗ ╔═══╗ |
| /// │ 4 │ │ ║ 0 ║ ║ ? ║ │ 4 │ |
| /// │ └───┘ ╠═══╣ ╠═══╣ |
| /// │ ║ 1 ║ ║ F ║ │ 5 │ |
| /// │ Validity ╚═══╝ ╚═══╝ |
| /// Logical Logical (nulls) Offsets │ Values │ │ |
| /// Values Offsets │ (Array) |
| /// └ ─ ─ ─ ─ ─ ─ ┘ │ |
| /// (offsets[i], │ ListArray |
| /// offsets[i+1]) │ |
| /// └ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ |
| /// ``` |
| /// |
| /// [`StringArray`]: crate::array::StringArray |
| /// [`GenericListViewArray`]: crate::array::GenericListViewArray |
| /// [variable length lists]: https://arrow.apache.org/docs/format/Columnar.html#variable-size-list-layout |
| /// [list layout]: https://arrow.apache.org/docs/format/Columnar.html#list-layout |
| /// [list-view layout]: https://arrow.apache.org/docs/format/Columnar.html#listview-layout |
| pub struct GenericListArray<OffsetSize: OffsetSizeTrait> { |
| data_type: DataType, |
| nulls: Option<NullBuffer>, |
| values: ArrayRef, |
| value_offsets: OffsetBuffer<OffsetSize>, |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> Clone for GenericListArray<OffsetSize> { |
| fn clone(&self) -> Self { |
| Self { |
| data_type: self.data_type.clone(), |
| nulls: self.nulls.clone(), |
| values: self.values.clone(), |
| value_offsets: self.value_offsets.clone(), |
| } |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> { |
| /// The data type constructor of list array. |
| /// The input is the schema of the child array and |
| /// the output is the [`DataType`], List or LargeList. |
| pub const DATA_TYPE_CONSTRUCTOR: fn(FieldRef) -> DataType = if OffsetSize::IS_LARGE { |
| DataType::LargeList |
| } else { |
| DataType::List |
| }; |
| |
| /// Create a new [`GenericListArray`] from the provided parts |
| /// |
| /// # Errors |
| /// |
| /// Errors if |
| /// |
| /// * `offsets.len() - 1 != nulls.len()` |
| /// * `offsets.last() > values.len()` |
| /// * `!field.is_nullable() && values.is_nullable()` |
| /// * `field.data_type() != values.data_type()` |
| pub fn try_new( |
| field: FieldRef, |
| offsets: OffsetBuffer<OffsetSize>, |
| values: ArrayRef, |
| nulls: Option<NullBuffer>, |
| ) -> Result<Self, ArrowError> { |
| let len = offsets.len() - 1; // Offsets guaranteed to not be empty |
| let end_offset = offsets.last().unwrap().as_usize(); |
| // don't need to check other values of `offsets` because they are checked |
| // during construction of `OffsetBuffer` |
| if end_offset > values.len() { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "Max offset of {end_offset} exceeds length of values {}", |
| values.len() |
| ))); |
| } |
| |
| if let Some(n) = nulls.as_ref() { |
| if n.len() != len { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "Incorrect length of null buffer for {}ListArray, expected {len} got {}", |
| OffsetSize::PREFIX, |
| n.len(), |
| ))); |
| } |
| } |
| if !field.is_nullable() && values.is_nullable() { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "Non-nullable field of {}ListArray {:?} cannot contain nulls", |
| OffsetSize::PREFIX, |
| field.name() |
| ))); |
| } |
| |
| if field.data_type() != values.data_type() { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "{}ListArray expected data type {} got {} for {:?}", |
| OffsetSize::PREFIX, |
| field.data_type(), |
| values.data_type(), |
| field.name() |
| ))); |
| } |
| |
| Ok(Self { |
| data_type: Self::DATA_TYPE_CONSTRUCTOR(field), |
| nulls, |
| values, |
| value_offsets: offsets, |
| }) |
| } |
| |
| /// Create a new [`GenericListArray`] from the provided parts |
| /// |
| /// # Panics |
| /// |
| /// Panics if [`Self::try_new`] returns an error |
| pub fn new( |
| field: FieldRef, |
| offsets: OffsetBuffer<OffsetSize>, |
| values: ArrayRef, |
| nulls: Option<NullBuffer>, |
| ) -> Self { |
| Self::try_new(field, offsets, values, nulls).unwrap() |
| } |
| |
| /// Create a new [`GenericListArray`] of length `len` where all values are null |
| pub fn new_null(field: FieldRef, len: usize) -> Self { |
| let values = new_empty_array(field.data_type()); |
| Self { |
| data_type: Self::DATA_TYPE_CONSTRUCTOR(field), |
| nulls: Some(NullBuffer::new_null(len)), |
| value_offsets: OffsetBuffer::new_zeroed(len), |
| values, |
| } |
| } |
| |
| /// Deconstruct this array into its constituent parts |
| pub fn into_parts( |
| self, |
| ) -> ( |
| FieldRef, |
| OffsetBuffer<OffsetSize>, |
| ArrayRef, |
| Option<NullBuffer>, |
| ) { |
| let f = match self.data_type { |
| DataType::List(f) | DataType::LargeList(f) => f, |
| _ => unreachable!(), |
| }; |
| (f, self.value_offsets, self.values, self.nulls) |
| } |
| |
| /// Returns a reference to the offsets of this list |
| /// |
| /// Unlike [`Self::value_offsets`] this returns the [`OffsetBuffer`] |
| /// allowing for zero-copy cloning. |
| /// |
| /// Notes: The `offsets` may not start at 0 and may not cover all values in |
| /// [`Self::values`]. This can happen when the list array was sliced via |
| /// [`Self::slice`]. See documentation for [`Self`] for more details. |
| #[inline] |
| pub fn offsets(&self) -> &OffsetBuffer<OffsetSize> { |
| &self.value_offsets |
| } |
| |
| /// Returns a reference to the values of this list |
| /// |
| /// Note: The list array may not refer to all values in the `values` array. |
| /// For example if the list array was sliced via [`Self::slice`] values will |
| /// still contain values both before and after the slice. See documentation |
| /// for [`Self`] for more details. |
| #[inline] |
| pub fn values(&self) -> &ArrayRef { |
| &self.values |
| } |
| |
| /// Returns a clone of the value type of this list. |
| pub fn value_type(&self) -> DataType { |
| self.values.data_type().clone() |
| } |
| |
| /// Returns ith value of this list array. |
| /// |
| /// Note: This method does not check for nulls and the value is arbitrary |
| /// if [`is_null`](Self::is_null) returns true for the index. |
| /// |
| /// # Safety |
| /// Caller must ensure that the index is within the array bounds |
| pub unsafe fn value_unchecked(&self, i: usize) -> ArrayRef { |
| let end = unsafe { self.value_offsets().get_unchecked(i + 1).as_usize() }; |
| let start = unsafe { self.value_offsets().get_unchecked(i).as_usize() }; |
| self.values.slice(start, end - start) |
| } |
| |
| /// Returns ith value of this list array. |
| /// |
| /// Note: This method does not check for nulls and the value is arbitrary |
| /// (but still well-defined) if [`is_null`](Self::is_null) returns true for the index. |
| /// |
| /// # Panics |
| /// Panics if index `i` is out of bounds |
| pub fn value(&self, i: usize) -> ArrayRef { |
| let end = self.value_offsets()[i + 1].as_usize(); |
| let start = self.value_offsets()[i].as_usize(); |
| self.values.slice(start, end - start) |
| } |
| |
| /// Returns the offset values in the offsets buffer. |
| /// |
| /// See [`Self::offsets`] for more details. |
| #[inline] |
| pub fn value_offsets(&self) -> &[OffsetSize] { |
| &self.value_offsets |
| } |
| |
| /// Returns the length for value at index `i`. |
| #[inline] |
| pub fn value_length(&self, i: usize) -> OffsetSize { |
| let offsets = self.value_offsets(); |
| offsets[i + 1] - offsets[i] |
| } |
| |
| /// constructs a new iterator |
| pub fn iter<'a>(&'a self) -> GenericListArrayIter<'a, OffsetSize> { |
| GenericListArrayIter::<'a, OffsetSize>::new(self) |
| } |
| |
| #[inline] |
| fn get_type(data_type: &DataType) -> Option<&DataType> { |
| match (OffsetSize::IS_LARGE, data_type) { |
| (true, DataType::LargeList(child)) | (false, DataType::List(child)) => { |
| Some(child.data_type()) |
| } |
| _ => None, |
| } |
| } |
| |
| /// Returns a zero-copy slice of this array with the indicated offset and length. |
| /// |
| /// Notes: this method does *NOT* slice the underlying values array or modify |
| /// the values in the offsets buffer. See [`Self::values`] and |
| /// [`Self::offsets`] for more information. |
| pub fn slice(&self, offset: usize, length: usize) -> Self { |
| Self { |
| data_type: self.data_type.clone(), |
| nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)), |
| values: self.values.clone(), |
| value_offsets: self.value_offsets.slice(offset, length), |
| } |
| } |
| |
| /// Creates a [`GenericListArray`] from an iterator of primitive values |
| /// # Example |
| /// ``` |
| /// # use arrow_array::ListArray; |
| /// # use arrow_array::types::Int32Type; |
| /// |
| /// let data = vec![ |
| /// Some(vec![Some(0), Some(1), Some(2)]), |
| /// None, |
| /// Some(vec![Some(3), None, Some(5)]), |
| /// Some(vec![Some(6), Some(7)]), |
| /// ]; |
| /// let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data); |
| /// println!("{:?}", list_array); |
| /// ``` |
| pub fn from_iter_primitive<T, P, I>(iter: I) -> Self |
| where |
| T: ArrowPrimitiveType, |
| P: IntoIterator<Item = Option<<T as ArrowPrimitiveType>::Native>>, |
| I: IntoIterator<Item = Option<P>>, |
| { |
| let iter = iter.into_iter(); |
| let size_hint = iter.size_hint().0; |
| let mut builder = |
| GenericListBuilder::with_capacity(PrimitiveBuilder::<T>::new(), size_hint); |
| |
| for i in iter { |
| match i { |
| Some(p) => { |
| for t in p { |
| builder.values().append_option(t); |
| } |
| builder.append(true); |
| } |
| None => builder.append(false), |
| } |
| } |
| builder.finish() |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> From<ArrayData> for GenericListArray<OffsetSize> { |
| fn from(data: ArrayData) -> Self { |
| Self::try_new_from_array_data(data) |
| .expect("Expected infallible creation of GenericListArray from ArrayDataRef failed") |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> From<GenericListArray<OffsetSize>> for ArrayData { |
| fn from(array: GenericListArray<OffsetSize>) -> Self { |
| let len = array.len(); |
| let builder = ArrayDataBuilder::new(array.data_type) |
| .len(len) |
| .nulls(array.nulls) |
| .buffers(vec![array.value_offsets.into_inner().into_inner()]) |
| .child_data(vec![array.values.to_data()]); |
| |
| unsafe { builder.build_unchecked() } |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> From<FixedSizeListArray> for GenericListArray<OffsetSize> { |
| fn from(value: FixedSizeListArray) -> Self { |
| let (field, size) = match value.data_type() { |
| DataType::FixedSizeList(f, size) => (f, *size as usize), |
| _ => unreachable!(), |
| }; |
| |
| let offsets = OffsetBuffer::from_repeated_length(size, value.len()); |
| |
| Self { |
| data_type: Self::DATA_TYPE_CONSTRUCTOR(field.clone()), |
| nulls: value.nulls().cloned(), |
| values: value.values().clone(), |
| value_offsets: offsets, |
| } |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> GenericListArray<OffsetSize> { |
| fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> { |
| if data.buffers().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "ListArray data should contain a single buffer only (value offsets), had {}", |
| data.buffers().len() |
| ))); |
| } |
| |
| if data.child_data().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "ListArray should contain a single child array (values array), had {}", |
| data.child_data().len() |
| ))); |
| } |
| |
| let values = data.child_data()[0].clone(); |
| |
| if let Some(child_data_type) = Self::get_type(data.data_type()) { |
| if values.data_type() != child_data_type { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "[Large]ListArray's child datatype {:?} does not \ |
| correspond to the List's datatype {:?}", |
| values.data_type(), |
| child_data_type |
| ))); |
| } |
| } else { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "[Large]ListArray's datatype must be [Large]ListArray(). It is {:?}", |
| data.data_type() |
| ))); |
| } |
| |
| let values = make_array(values); |
| // SAFETY: |
| // ArrayData is valid, and verified type above |
| let value_offsets = unsafe { get_offsets(&data) }; |
| |
| Ok(Self { |
| data_type: data.data_type().clone(), |
| nulls: data.nulls().cloned(), |
| values, |
| value_offsets, |
| }) |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> { |
| fn as_any(&self) -> &dyn Any { |
| self |
| } |
| |
| fn to_data(&self) -> ArrayData { |
| self.clone().into() |
| } |
| |
| fn into_data(self) -> ArrayData { |
| self.into() |
| } |
| |
| fn data_type(&self) -> &DataType { |
| &self.data_type |
| } |
| |
| fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
| Arc::new(self.slice(offset, length)) |
| } |
| |
| fn len(&self) -> usize { |
| self.value_offsets.len() - 1 |
| } |
| |
| fn is_empty(&self) -> bool { |
| self.value_offsets.len() <= 1 |
| } |
| |
| fn shrink_to_fit(&mut self) { |
| if let Some(nulls) = &mut self.nulls { |
| nulls.shrink_to_fit(); |
| } |
| self.values.shrink_to_fit(); |
| self.value_offsets.shrink_to_fit(); |
| } |
| |
| fn offset(&self) -> usize { |
| 0 |
| } |
| |
| fn nulls(&self) -> Option<&NullBuffer> { |
| self.nulls.as_ref() |
| } |
| |
| fn logical_null_count(&self) -> usize { |
| // More efficient that the default implementation |
| self.null_count() |
| } |
| |
| fn get_buffer_memory_size(&self) -> usize { |
| let mut size = self.values.get_buffer_memory_size(); |
| size += self.value_offsets.inner().inner().capacity(); |
| if let Some(n) = self.nulls.as_ref() { |
| size += n.buffer().capacity(); |
| } |
| size |
| } |
| |
| fn get_array_memory_size(&self) -> usize { |
| let mut size = std::mem::size_of::<Self>() + self.values.get_array_memory_size(); |
| size += self.value_offsets.inner().inner().capacity(); |
| if let Some(n) = self.nulls.as_ref() { |
| size += n.buffer().capacity(); |
| } |
| size |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> { |
| type Item = ArrayRef; |
| |
| fn value(&self, index: usize) -> Self::Item { |
| GenericListArray::value(self, index) |
| } |
| |
| unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
| GenericListArray::value(self, index) |
| } |
| } |
| |
| impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListArray<OffsetSize> { |
| fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
| let prefix = OffsetSize::PREFIX; |
| |
| write!(f, "{prefix}ListArray\n[\n")?; |
| print_long_array(self, f, |array, index, f| { |
| std::fmt::Debug::fmt(&array.value(index), f) |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| /// A [`GenericListArray`] of variable size lists, storing offsets as `i32`. |
| /// |
| /// See [`ListBuilder`](crate::builder::ListBuilder) for how to construct a [`ListArray`] |
| pub type ListArray = GenericListArray<i32>; |
| |
| /// A [`GenericListArray`] of variable size lists, storing offsets as `i64`. |
| /// |
| /// See [`LargeListBuilder`](crate::builder::LargeListBuilder) for how to construct a [`LargeListArray`] |
| pub type LargeListArray = GenericListArray<i64>; |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use crate::builder::{FixedSizeListBuilder, Int32Builder, ListBuilder, UnionBuilder}; |
| use crate::cast::AsArray; |
| use crate::types::Int32Type; |
| use crate::{Int32Array, Int64Array}; |
| use arrow_buffer::{Buffer, ScalarBuffer, bit_util}; |
| use arrow_schema::Field; |
| |
| fn create_from_buffers() -> ListArray { |
| // [[0, 1, 2], [3, 4, 5], [6, 7]] |
| let values = Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7]); |
| let offsets = OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 6, 8])); |
| let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
| ListArray::new(field, offsets, Arc::new(values), None) |
| } |
| |
| #[test] |
| fn test_from_iter_primitive() { |
| let data = vec![ |
| Some(vec![Some(0), Some(1), Some(2)]), |
| Some(vec![Some(3), Some(4), Some(5)]), |
| Some(vec![Some(6), Some(7)]), |
| ]; |
| let list_array = ListArray::from_iter_primitive::<Int32Type, _, _>(data); |
| |
| let another = create_from_buffers(); |
| assert_eq!(list_array, another) |
| } |
| |
| #[test] |
| fn test_empty_list_array() { |
| // Construct an empty value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(0) |
| .add_buffer(Buffer::from([])) |
| .build() |
| .unwrap(); |
| |
| // Construct an empty offset buffer |
| let value_offsets = Buffer::from([]); |
| |
| // Construct a list array from the above two |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(0) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data) |
| .build() |
| .unwrap(); |
| |
| let list_array = ListArray::from(list_data); |
| assert_eq!(list_array.len(), 0) |
| } |
| |
| #[test] |
| fn test_list_array() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
| .build() |
| .unwrap(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1, 2], [3, 4, 5], [6, 7]] |
| let value_offsets = Buffer::from_slice_ref([0, 3, 6, 8]); |
| |
| // Construct a list array from the above two |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = ArrayData::builder(list_data_type.clone()) |
| .len(3) |
| .add_buffer(value_offsets.clone()) |
| .add_child_data(value_data.clone()) |
| .build() |
| .unwrap(); |
| let list_array = ListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.to_data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(3, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offsets()[2]); |
| assert_eq!(2, list_array.value_length(2)); |
| assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
| assert_eq!( |
| 0, |
| unsafe { list_array.value_unchecked(0) } |
| .as_primitive::<Int32Type>() |
| .value(0) |
| ); |
| for i in 0..3 { |
| assert!(list_array.is_valid(i)); |
| assert!(!list_array.is_null(i)); |
| } |
| |
| // Now test with a non-zero offset (skip first element) |
| // [[3, 4, 5], [6, 7]] |
| let list_data = ArrayData::builder(list_data_type) |
| .len(2) |
| .offset(1) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data.clone()) |
| .build() |
| .unwrap(); |
| let list_array = ListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.to_data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(2, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offsets()[1]); |
| assert_eq!(2, list_array.value_length(1)); |
| assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
| assert_eq!( |
| 3, |
| unsafe { list_array.value_unchecked(0) } |
| .as_primitive::<Int32Type>() |
| .value(0) |
| ); |
| } |
| |
| #[test] |
| fn test_large_list_array() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
| .build() |
| .unwrap(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1, 2], [3, 4, 5], [6, 7]] |
| let value_offsets = Buffer::from_slice_ref([0i64, 3, 6, 8]); |
| |
| // Construct a list array from the above two |
| let list_data_type = DataType::new_large_list(DataType::Int32, false); |
| let list_data = ArrayData::builder(list_data_type.clone()) |
| .len(3) |
| .add_buffer(value_offsets.clone()) |
| .add_child_data(value_data.clone()) |
| .build() |
| .unwrap(); |
| let list_array = LargeListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.to_data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(3, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offsets()[2]); |
| assert_eq!(2, list_array.value_length(2)); |
| assert_eq!(0, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
| assert_eq!( |
| 0, |
| unsafe { list_array.value_unchecked(0) } |
| .as_primitive::<Int32Type>() |
| .value(0) |
| ); |
| for i in 0..3 { |
| assert!(list_array.is_valid(i)); |
| assert!(!list_array.is_null(i)); |
| } |
| |
| // Now test with a non-zero offset |
| // [[3, 4, 5], [6, 7]] |
| let list_data = ArrayData::builder(list_data_type) |
| .len(2) |
| .offset(1) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data.clone()) |
| .build() |
| .unwrap(); |
| let list_array = LargeListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.to_data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(2, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offsets()[1]); |
| assert_eq!(2, list_array.value_length(1)); |
| assert_eq!(3, list_array.value(0).as_primitive::<Int32Type>().value(0)); |
| assert_eq!( |
| 3, |
| unsafe { list_array.value_unchecked(0) } |
| .as_primitive::<Int32Type>() |
| .value(0) |
| ); |
| } |
| |
| #[test] |
| fn test_list_array_slice() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(10) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
| .build() |
| .unwrap(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] |
| let value_offsets = Buffer::from_slice_ref([0, 2, 2, 2, 4, 6, 6, 9, 9, 10]); |
| // 01011001 00000001 |
| let mut null_bits: [u8; 2] = [0; 2]; |
| bit_util::set_bit(&mut null_bits, 0); |
| bit_util::set_bit(&mut null_bits, 3); |
| bit_util::set_bit(&mut null_bits, 4); |
| bit_util::set_bit(&mut null_bits, 6); |
| bit_util::set_bit(&mut null_bits, 8); |
| |
| // Construct a list array from the above two |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(9) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data.clone()) |
| .null_bit_buffer(Some(Buffer::from(null_bits))) |
| .build() |
| .unwrap(); |
| let list_array = ListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.to_data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(9, list_array.len()); |
| assert_eq!(4, list_array.null_count()); |
| assert_eq!(2, list_array.value_offsets()[3]); |
| assert_eq!(2, list_array.value_length(3)); |
| |
| let sliced_array = list_array.slice(1, 6); |
| assert_eq!(6, sliced_array.len()); |
| assert_eq!(3, sliced_array.null_count()); |
| |
| for i in 0..sliced_array.len() { |
| if bit_util::get_bit(&null_bits, 1 + i) { |
| assert!(sliced_array.is_valid(i)); |
| } else { |
| assert!(sliced_array.is_null(i)); |
| } |
| } |
| |
| // Check offset and length for each non-null value. |
| let sliced_list_array = sliced_array.as_any().downcast_ref::<ListArray>().unwrap(); |
| assert_eq!(2, sliced_list_array.value_offsets()[2]); |
| assert_eq!(2, sliced_list_array.value_length(2)); |
| assert_eq!(4, sliced_list_array.value_offsets()[3]); |
| assert_eq!(2, sliced_list_array.value_length(3)); |
| assert_eq!(6, sliced_list_array.value_offsets()[5]); |
| assert_eq!(3, sliced_list_array.value_length(5)); |
| } |
| |
| #[test] |
| fn test_large_list_array_slice() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(10) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
| .build() |
| .unwrap(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] |
| let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]); |
| // 01011001 00000001 |
| let mut null_bits: [u8; 2] = [0; 2]; |
| bit_util::set_bit(&mut null_bits, 0); |
| bit_util::set_bit(&mut null_bits, 3); |
| bit_util::set_bit(&mut null_bits, 4); |
| bit_util::set_bit(&mut null_bits, 6); |
| bit_util::set_bit(&mut null_bits, 8); |
| |
| // Construct a list array from the above two |
| let list_data_type = DataType::new_large_list(DataType::Int32, false); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(9) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data.clone()) |
| .null_bit_buffer(Some(Buffer::from(null_bits))) |
| .build() |
| .unwrap(); |
| let list_array = LargeListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.to_data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(9, list_array.len()); |
| assert_eq!(4, list_array.null_count()); |
| assert_eq!(2, list_array.value_offsets()[3]); |
| assert_eq!(2, list_array.value_length(3)); |
| |
| let sliced_array = list_array.slice(1, 6); |
| assert_eq!(6, sliced_array.len()); |
| assert_eq!(3, sliced_array.null_count()); |
| |
| for i in 0..sliced_array.len() { |
| if bit_util::get_bit(&null_bits, 1 + i) { |
| assert!(sliced_array.is_valid(i)); |
| } else { |
| assert!(sliced_array.is_null(i)); |
| } |
| } |
| |
| // Check offset and length for each non-null value. |
| let sliced_list_array = sliced_array |
| .as_any() |
| .downcast_ref::<LargeListArray>() |
| .unwrap(); |
| assert_eq!(2, sliced_list_array.value_offsets()[2]); |
| assert_eq!(2, sliced_list_array.value_length(2)); |
| assert_eq!(4, sliced_list_array.value_offsets()[3]); |
| assert_eq!(2, sliced_list_array.value_length(3)); |
| assert_eq!(6, sliced_list_array.value_offsets()[5]); |
| assert_eq!(3, sliced_list_array.value_length(5)); |
| } |
| |
| #[test] |
| #[should_panic(expected = "index out of bounds: the len is 10 but the index is 11")] |
| fn test_list_array_index_out_of_bound() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(10) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])) |
| .build() |
| .unwrap(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] |
| let value_offsets = Buffer::from_slice_ref([0i64, 2, 2, 2, 4, 6, 6, 9, 9, 10]); |
| // 01011001 00000001 |
| let mut null_bits: [u8; 2] = [0; 2]; |
| bit_util::set_bit(&mut null_bits, 0); |
| bit_util::set_bit(&mut null_bits, 3); |
| bit_util::set_bit(&mut null_bits, 4); |
| bit_util::set_bit(&mut null_bits, 6); |
| bit_util::set_bit(&mut null_bits, 8); |
| |
| // Construct a list array from the above two |
| let list_data_type = DataType::new_large_list(DataType::Int32, false); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(9) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data) |
| .null_bit_buffer(Some(Buffer::from(null_bits))) |
| .build() |
| .unwrap(); |
| let list_array = LargeListArray::from(list_data); |
| assert_eq!(9, list_array.len()); |
| |
| list_array.value(10); |
| } |
| #[test] |
| #[should_panic(expected = "ListArray data should contain a single buffer only (value offsets)")] |
| // Different error messages, so skip for now |
| // https://github.com/apache/arrow-rs/issues/1545 |
| #[cfg(not(feature = "force_validate"))] |
| fn test_list_array_invalid_buffer_len() { |
| let value_data = unsafe { |
| ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
| .build_unchecked() |
| }; |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = unsafe { |
| ArrayData::builder(list_data_type) |
| .len(3) |
| .add_child_data(value_data) |
| .build_unchecked() |
| }; |
| drop(ListArray::from(list_data)); |
| } |
| |
| #[test] |
| #[should_panic(expected = "ListArray should contain a single child array (values array)")] |
| // Different error messages, so skip for now |
| // https://github.com/apache/arrow-rs/issues/1545 |
| #[cfg(not(feature = "force_validate"))] |
| fn test_list_array_invalid_child_array_len() { |
| let value_offsets = Buffer::from_slice_ref([0, 2, 5, 7]); |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = unsafe { |
| ArrayData::builder(list_data_type) |
| .len(3) |
| .add_buffer(value_offsets) |
| .build_unchecked() |
| }; |
| drop(ListArray::from(list_data)); |
| } |
| |
| #[test] |
| #[should_panic(expected = "[Large]ListArray's datatype must be [Large]ListArray(). It is List")] |
| fn test_from_array_data_validation() { |
| let mut builder = ListBuilder::new(Int32Builder::new()); |
| builder.values().append_value(1); |
| builder.append(true); |
| let array = builder.finish(); |
| let _ = LargeListArray::from(array.into_data()); |
| } |
| |
| #[test] |
| fn test_list_array_offsets_need_not_start_at_zero() { |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from_slice_ref([0, 1, 2, 3, 4, 5, 6, 7])) |
| .build() |
| .unwrap(); |
| |
| let value_offsets = Buffer::from_slice_ref([2, 2, 5, 7]); |
| |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(3) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data) |
| .build() |
| .unwrap(); |
| |
| let list_array = ListArray::from(list_data); |
| assert_eq!(list_array.value_length(0), 0); |
| assert_eq!(list_array.value_length(1), 3); |
| assert_eq!(list_array.value_length(2), 2); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")] |
| // Different error messages, so skip for now |
| // https://github.com/apache/arrow-rs/issues/1545 |
| #[cfg(not(feature = "force_validate"))] |
| fn test_primitive_array_alignment() { |
| let buf = Buffer::from_slice_ref([0_u64]); |
| let buf2 = buf.slice(1); |
| let array_data = unsafe { |
| ArrayData::builder(DataType::Int32) |
| .add_buffer(buf2) |
| .build_unchecked() |
| }; |
| drop(Int32Array::from(array_data)); |
| } |
| |
| #[test] |
| #[should_panic(expected = "Memory pointer is not aligned with the specified scalar type")] |
| // Different error messages, so skip for now |
| // https://github.com/apache/arrow-rs/issues/1545 |
| #[cfg(not(feature = "force_validate"))] |
| fn test_list_array_alignment() { |
| let buf = Buffer::from_slice_ref([0_u64]); |
| let buf2 = buf.slice(1); |
| |
| let values: [i32; 8] = [0; 8]; |
| let value_data = unsafe { |
| ArrayData::builder(DataType::Int32) |
| .add_buffer(Buffer::from_slice_ref(values)) |
| .build_unchecked() |
| }; |
| |
| let list_data_type = |
| DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false))); |
| let list_data = unsafe { |
| ArrayData::builder(list_data_type) |
| .add_buffer(buf2) |
| .add_child_data(value_data) |
| .build_unchecked() |
| }; |
| drop(ListArray::from(list_data)); |
| } |
| |
| #[test] |
| fn list_array_equality() { |
| // test scaffold |
| fn do_comparison( |
| lhs_data: Vec<Option<Vec<Option<i32>>>>, |
| rhs_data: Vec<Option<Vec<Option<i32>>>>, |
| should_equal: bool, |
| ) { |
| let lhs = ListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data.clone()); |
| let rhs = ListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data.clone()); |
| assert_eq!(lhs == rhs, should_equal); |
| |
| let lhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(lhs_data); |
| let rhs = LargeListArray::from_iter_primitive::<Int32Type, _, _>(rhs_data); |
| assert_eq!(lhs == rhs, should_equal); |
| } |
| |
| do_comparison( |
| vec![ |
| Some(vec![Some(0), Some(1), Some(2)]), |
| None, |
| Some(vec![Some(3), None, Some(5)]), |
| Some(vec![Some(6), Some(7)]), |
| ], |
| vec![ |
| Some(vec![Some(0), Some(1), Some(2)]), |
| None, |
| Some(vec![Some(3), None, Some(5)]), |
| Some(vec![Some(6), Some(7)]), |
| ], |
| true, |
| ); |
| |
| do_comparison( |
| vec![ |
| None, |
| None, |
| Some(vec![Some(3), None, Some(5)]), |
| Some(vec![Some(6), Some(7)]), |
| ], |
| vec![ |
| Some(vec![Some(0), Some(1), Some(2)]), |
| None, |
| Some(vec![Some(3), None, Some(5)]), |
| Some(vec![Some(6), Some(7)]), |
| ], |
| false, |
| ); |
| |
| do_comparison( |
| vec![ |
| None, |
| None, |
| Some(vec![Some(3), None, Some(5)]), |
| Some(vec![Some(6), Some(7)]), |
| ], |
| vec![ |
| None, |
| None, |
| Some(vec![Some(3), None, Some(5)]), |
| Some(vec![Some(0), Some(0)]), |
| ], |
| false, |
| ); |
| |
| do_comparison( |
| vec![None, None, Some(vec![Some(1)])], |
| vec![None, None, Some(vec![Some(2)])], |
| false, |
| ); |
| } |
| |
| #[test] |
| fn test_empty_offsets() { |
| let f = Arc::new(Field::new("element", DataType::Int32, true)); |
| let string = ListArray::from( |
| ArrayData::builder(DataType::List(f.clone())) |
| .buffers(vec![Buffer::from(&[])]) |
| .add_child_data(ArrayData::new_empty(&DataType::Int32)) |
| .build() |
| .unwrap(), |
| ); |
| assert_eq!(string.value_offsets(), &[0]); |
| let string = LargeListArray::from( |
| ArrayData::builder(DataType::LargeList(f)) |
| .buffers(vec![Buffer::from(&[])]) |
| .add_child_data(ArrayData::new_empty(&DataType::Int32)) |
| .build() |
| .unwrap(), |
| ); |
| assert_eq!(string.len(), 0); |
| assert_eq!(string.value_offsets(), &[0]); |
| } |
| |
| #[test] |
| fn test_try_new() { |
| let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into()); |
| let values = Int32Array::new(vec![1, 2, 3, 4, 5].into(), None); |
| let values = Arc::new(values) as ArrayRef; |
| |
| let field = Arc::new(Field::new("element", DataType::Int32, false)); |
| ListArray::new(field.clone(), offsets.clone(), values.clone(), None); |
| |
| let nulls = NullBuffer::new_null(3); |
| ListArray::new(field.clone(), offsets, values.clone(), Some(nulls)); |
| |
| let nulls = NullBuffer::new_null(3); |
| let offsets = OffsetBuffer::new(vec![0, 1, 2, 4, 5].into()); |
| let err = LargeListArray::try_new(field, offsets.clone(), values.clone(), Some(nulls)) |
| .unwrap_err(); |
| |
| assert_eq!( |
| err.to_string(), |
| "Invalid argument error: Incorrect length of null buffer for LargeListArray, expected 4 got 3" |
| ); |
| |
| let field = Arc::new(Field::new("element", DataType::Int64, false)); |
| let err = LargeListArray::try_new(field.clone(), offsets.clone(), values.clone(), None) |
| .unwrap_err(); |
| |
| assert_eq!( |
| err.to_string(), |
| "Invalid argument error: LargeListArray expected data type Int64 got Int32 for \"element\"" |
| ); |
| |
| let nulls = NullBuffer::new_null(7); |
| let values = Int64Array::new(vec![0; 7].into(), Some(nulls)); |
| let values = Arc::new(values); |
| |
| let err = |
| LargeListArray::try_new(field, offsets.clone(), values.clone(), None).unwrap_err(); |
| |
| assert_eq!( |
| err.to_string(), |
| "Invalid argument error: Non-nullable field of LargeListArray \"element\" cannot contain nulls" |
| ); |
| |
| let field = Arc::new(Field::new("element", DataType::Int64, true)); |
| LargeListArray::new(field.clone(), offsets.clone(), values, None); |
| |
| let values = Int64Array::new(vec![0; 2].into(), None); |
| let err = LargeListArray::try_new(field, offsets, Arc::new(values), None).unwrap_err(); |
| |
| assert_eq!( |
| err.to_string(), |
| "Invalid argument error: Max offset of 5 exceeds length of values 2" |
| ); |
| } |
| |
| #[test] |
| fn test_from_fixed_size_list() { |
| let mut builder = FixedSizeListBuilder::new(Int32Builder::new(), 3); |
| builder.values().append_slice(&[1, 2, 3]); |
| builder.append(true); |
| builder.values().append_slice(&[0, 0, 0]); |
| builder.append(false); |
| builder.values().append_slice(&[4, 5, 6]); |
| builder.append(true); |
| let list: ListArray = builder.finish().into(); |
| |
| let values: Vec<_> = list |
| .iter() |
| .map(|x| x.map(|x| x.as_primitive::<Int32Type>().values().to_vec())) |
| .collect(); |
| assert_eq!(values, vec![Some(vec![1, 2, 3]), None, Some(vec![4, 5, 6])]) |
| } |
| |
| #[test] |
| fn test_nullable_union() { |
| let offsets = OffsetBuffer::new(vec![0, 1, 4, 5].into()); |
| let mut builder = UnionBuilder::new_dense(); |
| builder.append::<Int32Type>("a", 1).unwrap(); |
| builder.append::<Int32Type>("b", 2).unwrap(); |
| builder.append::<Int32Type>("b", 3).unwrap(); |
| builder.append::<Int32Type>("a", 4).unwrap(); |
| builder.append::<Int32Type>("a", 5).unwrap(); |
| let values = builder.build().unwrap(); |
| let field = Arc::new(Field::new("element", values.data_type().clone(), false)); |
| ListArray::new(field.clone(), offsets, Arc::new(values), None); |
| } |
| |
| #[test] |
| fn test_list_new_null_len() { |
| let field = Arc::new(Field::new_list_field(DataType::Int32, true)); |
| let array = ListArray::new_null(field, 5); |
| assert_eq!(array.len(), 5); |
| } |
| } |