| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use crate::{ |
| buffer::MutableBuffer, |
| datatypes::DataType, |
| error::{ArrowError, Result}, |
| util::bit_util, |
| }; |
| use std::mem; |
| |
| use super::{ |
| data::{into_buffers, new_buffers}, |
| ArrayData, ArrayDataBuilder, |
| }; |
| use crate::array::StringOffsetSizeTrait; |
| |
| mod boolean; |
| mod fixed_binary; |
| mod list; |
| mod null; |
| mod primitive; |
| mod structure; |
| mod utils; |
| mod variable_size; |
| |
| type ExtendNullBits<'a> = Box<Fn(&mut _MutableArrayData, usize, usize) + 'a>; |
| // function that extends `[start..start+len]` to the mutable array. |
| // this is dynamic because different data_types influence how buffers and childs are extended. |
| type Extend<'a> = Box<Fn(&mut _MutableArrayData, usize, usize, usize) + 'a>; |
| |
| type ExtendNulls = Box<Fn(&mut _MutableArrayData, usize) -> ()>; |
| |
| /// A mutable [ArrayData] that knows how to freeze itself into an [ArrayData]. |
| /// This is just a data container. |
| #[derive(Debug)] |
| struct _MutableArrayData<'a> { |
| pub data_type: DataType, |
| pub null_count: usize, |
| |
| pub len: usize, |
| pub null_buffer: MutableBuffer, |
| |
| // arrow specification only allows up to 3 buffers (2 ignoring the nulls above). |
| // Thus, we place them in the stack to avoid bound checks and greater data locality. |
| pub buffer1: MutableBuffer, |
| pub buffer2: MutableBuffer, |
| pub child_data: Vec<MutableArrayData<'a>>, |
| } |
| |
| impl<'a> _MutableArrayData<'a> { |
| fn freeze(self, dictionary: Option<ArrayData>) -> ArrayDataBuilder { |
| let buffers = into_buffers(&self.data_type, self.buffer1, self.buffer2); |
| |
| let child_data = match self.data_type { |
| DataType::Dictionary(_, _) => vec![dictionary.unwrap()], |
| _ => { |
| let mut child_data = Vec::with_capacity(self.child_data.len()); |
| for child in self.child_data { |
| child_data.push(child.freeze()); |
| } |
| child_data |
| } |
| }; |
| |
| let mut array_data_builder = ArrayDataBuilder::new(self.data_type) |
| .offset(0) |
| .len(self.len) |
| .null_count(self.null_count) |
| .buffers(buffers) |
| .child_data(child_data); |
| if self.null_count > 0 { |
| array_data_builder = |
| array_data_builder.null_bit_buffer(self.null_buffer.into()); |
| } |
| |
| array_data_builder |
| } |
| } |
| |
| fn build_extend_null_bits(array: &ArrayData, use_nulls: bool) -> ExtendNullBits { |
| if let Some(bitmap) = array.null_bitmap() { |
| let bytes = bitmap.bits.as_slice(); |
| Box::new(move |mutable, start, len| { |
| utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len); |
| mutable.null_count += utils::set_bits( |
| mutable.null_buffer.as_slice_mut(), |
| bytes, |
| mutable.len, |
| array.offset() + start, |
| len, |
| ); |
| }) |
| } else if use_nulls { |
| Box::new(|mutable, _, len| { |
| utils::resize_for_bits(&mut mutable.null_buffer, mutable.len + len); |
| let write_data = mutable.null_buffer.as_slice_mut(); |
| let offset = mutable.len; |
| (0..len).for_each(|i| { |
| bit_util::set_bit(write_data, offset + i); |
| }); |
| }) |
| } else { |
| Box::new(|_, _, _| {}) |
| } |
| } |
| |
| /// Struct to efficiently and interactively create an [ArrayData] from an existing [ArrayData] by |
| /// copying chunks. |
| /// The main use case of this struct is to perform unary operations to arrays of arbitrary types, such as `filter` and `take`. |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::{array::{Int32Array, Array, MutableArrayData}}; |
| /// |
| /// let array = Int32Array::from(vec![1, 2, 3, 4, 5]); |
| /// let array = array.data(); |
| /// // Create a new `MutableArrayData` from an array and with a capacity of 4. |
| /// // Capacity here is equivalent to `Vec::with_capacity` |
| /// let arrays = vec![array]; |
| /// let mut mutable = MutableArrayData::new(arrays, false, 4); |
| /// mutable.extend(0, 1, 3); // extend from the slice [1..3], [2,3] |
| /// mutable.extend(0, 0, 3); // extend from the slice [0..3], [1,2,3] |
| /// // `.freeze()` to convert `MutableArrayData` into a `ArrayData`. |
| /// let new_array = Int32Array::from(mutable.freeze()); |
| /// assert_eq!(Int32Array::from(vec![2, 3, 1, 2, 3]), new_array); |
| /// ``` |
| pub struct MutableArrayData<'a> { |
| arrays: Vec<&'a ArrayData>, |
| // The attributes in [_MutableArrayData] cannot be in [MutableArrayData] due to |
| // mutability invariants (interior mutability): |
| // [MutableArrayData] contains a function that can only mutate [_MutableArrayData], not |
| // [MutableArrayData] itself |
| data: _MutableArrayData<'a>, |
| |
| // the child data of the `Array` in Dictionary arrays. |
| // This is not stored in `MutableArrayData` because these values constant and only needed |
| // at the end, when freezing [_MutableArrayData]. |
| dictionary: Option<ArrayData>, |
| |
| // function used to extend values from arrays. This function's lifetime is bound to the array |
| // because it reads values from it. |
| extend_values: Vec<Extend<'a>>, |
| // function used to extend nulls from arrays. This function's lifetime is bound to the array |
| // because it reads nulls from it. |
| extend_null_bits: Vec<ExtendNullBits<'a>>, |
| |
| // function used to extend nulls. |
| // this is independent of the arrays and therefore has no lifetime. |
| extend_nulls: ExtendNulls, |
| } |
| |
| impl<'a> std::fmt::Debug for MutableArrayData<'a> { |
| fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
| // ignores the closures. |
| f.debug_struct("MutableArrayData") |
| .field("data", &self.data) |
| .finish() |
| } |
| } |
| |
| /// Builds an extend that adds `offset` to the source primitive |
| /// Additionally validates that `max` fits into the |
| /// the underlying primitive returning None if not |
| fn build_extend_dictionary( |
| array: &ArrayData, |
| offset: usize, |
| max: usize, |
| ) -> Option<Extend> { |
| use crate::datatypes::*; |
| use std::convert::TryInto; |
| |
| match array.data_type() { |
| DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() { |
| DataType::UInt8 => { |
| let _: u8 = max.try_into().ok()?; |
| let offset: u8 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::UInt16 => { |
| let _: u16 = max.try_into().ok()?; |
| let offset: u16 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::UInt32 => { |
| let _: u32 = max.try_into().ok()?; |
| let offset: u32 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::UInt64 => { |
| let _: u64 = max.try_into().ok()?; |
| let offset: u64 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::Int8 => { |
| let _: i8 = max.try_into().ok()?; |
| let offset: i8 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::Int16 => { |
| let _: i16 = max.try_into().ok()?; |
| let offset: i16 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::Int32 => { |
| let _: i32 = max.try_into().ok()?; |
| let offset: i32 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| DataType::Int64 => { |
| let _: i64 = max.try_into().ok()?; |
| let offset: i64 = offset.try_into().ok()?; |
| Some(primitive::build_extend_with_offset(array, offset)) |
| } |
| _ => unreachable!(), |
| }, |
| _ => None, |
| } |
| } |
| |
| fn build_extend(array: &ArrayData) -> Extend { |
| use crate::datatypes::*; |
| match array.data_type() { |
| DataType::Null => null::build_extend(array), |
| DataType::Boolean => boolean::build_extend(array), |
| DataType::UInt8 => primitive::build_extend::<u8>(array), |
| DataType::UInt16 => primitive::build_extend::<u16>(array), |
| DataType::UInt32 => primitive::build_extend::<u32>(array), |
| DataType::UInt64 => primitive::build_extend::<u64>(array), |
| DataType::Int8 => primitive::build_extend::<i8>(array), |
| DataType::Int16 => primitive::build_extend::<i16>(array), |
| DataType::Int32 => primitive::build_extend::<i32>(array), |
| DataType::Int64 => primitive::build_extend::<i64>(array), |
| DataType::Float32 => primitive::build_extend::<f32>(array), |
| DataType::Float64 => primitive::build_extend::<f64>(array), |
| DataType::Date32 |
| | DataType::Time32(_) |
| | DataType::Interval(IntervalUnit::YearMonth) => { |
| primitive::build_extend::<i32>(array) |
| } |
| DataType::Date64 |
| | DataType::Time64(_) |
| | DataType::Timestamp(_, _) |
| | DataType::Duration(_) |
| | DataType::Interval(IntervalUnit::DayTime) => { |
| primitive::build_extend::<i64>(array) |
| } |
| DataType::Utf8 | DataType::Binary => variable_size::build_extend::<i32>(array), |
| DataType::LargeUtf8 | DataType::LargeBinary => { |
| variable_size::build_extend::<i64>(array) |
| } |
| DataType::List(_) => list::build_extend::<i32>(array), |
| DataType::LargeList(_) => list::build_extend::<i64>(array), |
| DataType::Dictionary(_, _) => unreachable!("should use build_extend_dictionary"), |
| DataType::Struct(_) => structure::build_extend(array), |
| DataType::FixedSizeBinary(_) => fixed_binary::build_extend(array), |
| DataType::Float16 => unreachable!(), |
| /* |
| DataType::FixedSizeList(_, _) => {} |
| DataType::Union(_) => {} |
| */ |
| _ => todo!("Take and filter operations still not supported for this datatype"), |
| } |
| } |
| |
| fn build_extend_nulls(data_type: &DataType) -> ExtendNulls { |
| use crate::datatypes::*; |
| Box::new(match data_type { |
| DataType::Null => null::extend_nulls, |
| DataType::Boolean => boolean::extend_nulls, |
| DataType::UInt8 => primitive::extend_nulls::<u8>, |
| DataType::UInt16 => primitive::extend_nulls::<u16>, |
| DataType::UInt32 => primitive::extend_nulls::<u32>, |
| DataType::UInt64 => primitive::extend_nulls::<u64>, |
| DataType::Int8 => primitive::extend_nulls::<i8>, |
| DataType::Int16 => primitive::extend_nulls::<i16>, |
| DataType::Int32 => primitive::extend_nulls::<i32>, |
| DataType::Int64 => primitive::extend_nulls::<i64>, |
| DataType::Float32 => primitive::extend_nulls::<f32>, |
| DataType::Float64 => primitive::extend_nulls::<f64>, |
| DataType::Date32 |
| | DataType::Time32(_) |
| | DataType::Interval(IntervalUnit::YearMonth) => primitive::extend_nulls::<i32>, |
| DataType::Date64 |
| | DataType::Time64(_) |
| | DataType::Timestamp(_, _) |
| | DataType::Duration(_) |
| | DataType::Interval(IntervalUnit::DayTime) => primitive::extend_nulls::<i64>, |
| DataType::Utf8 | DataType::Binary => variable_size::extend_nulls::<i32>, |
| DataType::LargeUtf8 | DataType::LargeBinary => variable_size::extend_nulls::<i64>, |
| DataType::List(_) => list::extend_nulls::<i32>, |
| DataType::LargeList(_) => list::extend_nulls::<i64>, |
| DataType::Dictionary(child_data_type, _) => match child_data_type.as_ref() { |
| DataType::UInt8 => primitive::extend_nulls::<u8>, |
| DataType::UInt16 => primitive::extend_nulls::<u16>, |
| DataType::UInt32 => primitive::extend_nulls::<u32>, |
| DataType::UInt64 => primitive::extend_nulls::<u64>, |
| DataType::Int8 => primitive::extend_nulls::<i8>, |
| DataType::Int16 => primitive::extend_nulls::<i16>, |
| DataType::Int32 => primitive::extend_nulls::<i32>, |
| DataType::Int64 => primitive::extend_nulls::<i64>, |
| _ => unreachable!(), |
| }, |
| DataType::Struct(_) => structure::extend_nulls, |
| DataType::FixedSizeBinary(_) => fixed_binary::extend_nulls, |
| DataType::Float16 => unreachable!(), |
| /* |
| DataType::FixedSizeList(_, _) => {} |
| DataType::Union(_) => {} |
| */ |
| _ => todo!("Take and filter operations still not supported for this datatype"), |
| }) |
| } |
| |
| fn preallocate_offset_and_binary_buffer<Offset: StringOffsetSizeTrait>( |
| capacity: usize, |
| binary_size: usize, |
| ) -> [MutableBuffer; 2] { |
| // offsets |
| let mut buffer = MutableBuffer::new((1 + capacity) * mem::size_of::<Offset>()); |
| // safety: `unsafe` code assumes that this buffer is initialized with one element |
| if Offset::is_large() { |
| buffer.push(0i64); |
| } else { |
| buffer.push(0i32) |
| } |
| |
| [ |
| buffer, |
| MutableBuffer::new(binary_size * mem::size_of::<u8>()), |
| ] |
| } |
| |
| /// Define capacities of child data or data buffers. |
| #[derive(Debug, Clone)] |
| pub enum Capacities { |
| /// Binary, Utf8 and LargeUtf8 data types |
| /// Define |
| /// * the capacity of the array offsets |
| /// * the capacity of the binary/ str buffer |
| Binary(usize, Option<usize>), |
| /// List and LargeList data types |
| /// Define |
| /// * the capacity of the array offsets |
| /// * the capacity of the child data |
| List(usize, Option<Box<Capacities>>), |
| /// Struct type |
| /// * the capacity of the array |
| /// * the capacities of the fields |
| Struct(usize, Option<Vec<Capacities>>), |
| /// Dictionary type |
| /// * the capacity of the array/keys |
| /// * the capacity of the values |
| Dictionary(usize, Option<Box<Capacities>>), |
| /// Don't preallocate inner buffers and rely on array growth strategy |
| Array(usize), |
| } |
| impl<'a> MutableArrayData<'a> { |
| /// returns a new [MutableArrayData] with capacity to `capacity` slots and specialized to create an |
| /// [ArrayData] from multiple `arrays`. |
| /// |
| /// `use_nulls` is a flag used to optimize insertions. It should be `false` if the only source of nulls |
| /// are the arrays themselves and `true` if the user plans to call [MutableArrayData::extend_nulls]. |
| /// In other words, if `use_nulls` is `false`, calling [MutableArrayData::extend_nulls] should not be used. |
| pub fn new(arrays: Vec<&'a ArrayData>, use_nulls: bool, capacity: usize) -> Self { |
| Self::with_capacities(arrays, use_nulls, Capacities::Array(capacity)) |
| } |
| |
| /// Similar to [MutableArray::new], but lets users define the preallocated capacities of the array. |
| /// See also [MutableArray::new] for more information on the arguments. |
| /// |
| /// # Panic |
| /// This function panics if the given `capacities` don't match the data type of `arrays`. Or when |
| /// a [Capacities] variant is not yet supported. |
| pub fn with_capacities( |
| arrays: Vec<&'a ArrayData>, |
| mut use_nulls: bool, |
| capacities: Capacities, |
| ) -> Self { |
| let data_type = arrays[0].data_type(); |
| use crate::datatypes::*; |
| |
| // if any of the arrays has nulls, insertions from any array requires setting bits |
| // as there is at least one array with nulls. |
| if arrays.iter().any(|array| array.null_count() > 0) { |
| use_nulls = true; |
| }; |
| |
| let mut array_capacity; |
| |
| let [buffer1, buffer2] = match (data_type, &capacities) { |
| (DataType::LargeUtf8, Capacities::Binary(capacity, Some(value_cap))) |
| | (DataType::LargeBinary, Capacities::Binary(capacity, Some(value_cap))) => { |
| array_capacity = *capacity; |
| preallocate_offset_and_binary_buffer::<i64>(*capacity, *value_cap) |
| } |
| (DataType::Utf8, Capacities::Binary(capacity, Some(value_cap))) |
| | (DataType::Binary, Capacities::Binary(capacity, Some(value_cap))) => { |
| array_capacity = *capacity; |
| preallocate_offset_and_binary_buffer::<i32>(*capacity, *value_cap) |
| } |
| (_, Capacities::Array(capacity)) => { |
| array_capacity = *capacity; |
| new_buffers(data_type, *capacity) |
| } |
| _ => panic!("Capacities: {:?} not yet supported", capacities), |
| }; |
| |
| let child_data = match &data_type { |
| DataType::Null |
| | DataType::Boolean |
| | DataType::UInt8 |
| | DataType::UInt16 |
| | DataType::UInt32 |
| | DataType::UInt64 |
| | DataType::Int8 |
| | DataType::Int16 |
| | DataType::Int32 |
| | DataType::Int64 |
| | DataType::Float32 |
| | DataType::Float64 |
| | DataType::Date32 |
| | DataType::Date64 |
| | DataType::Time32(_) |
| | DataType::Time64(_) |
| | DataType::Duration(_) |
| | DataType::Timestamp(_, _) |
| | DataType::Utf8 |
| | DataType::Binary |
| | DataType::LargeUtf8 |
| | DataType::LargeBinary |
| | DataType::Interval(_) |
| | DataType::FixedSizeBinary(_) => vec![], |
| DataType::List(_) | DataType::LargeList(_) => { |
| let childs = arrays |
| .iter() |
| .map(|array| &array.child_data()[0]) |
| .collect::<Vec<_>>(); |
| |
| let capacities = if let Capacities::List(capacity, ref child_capacities) = |
| capacities |
| { |
| array_capacity = capacity; |
| child_capacities |
| .clone() |
| .map(|c| *c) |
| .unwrap_or(Capacities::Array(array_capacity)) |
| } else { |
| Capacities::Array(array_capacity) |
| }; |
| |
| vec![MutableArrayData::with_capacities( |
| childs, use_nulls, capacities, |
| )] |
| } |
| // the dictionary type just appends keys and clones the values. |
| DataType::Dictionary(_, _) => vec![], |
| DataType::Float16 => unreachable!(), |
| DataType::Struct(fields) => match capacities { |
| Capacities::Struct(capacity, Some(ref child_capacities)) => { |
| array_capacity = capacity; |
| (0..fields.len()) |
| .zip(child_capacities) |
| .map(|(i, child_cap)| { |
| let child_arrays = arrays |
| .iter() |
| .map(|array| &array.child_data()[i]) |
| .collect::<Vec<_>>(); |
| MutableArrayData::with_capacities( |
| child_arrays, |
| use_nulls, |
| child_cap.clone(), |
| ) |
| }) |
| .collect::<Vec<_>>() |
| } |
| Capacities::Struct(capacity, None) => { |
| array_capacity = capacity; |
| (0..fields.len()) |
| .map(|i| { |
| let child_arrays = arrays |
| .iter() |
| .map(|array| &array.child_data()[i]) |
| .collect::<Vec<_>>(); |
| MutableArrayData::new(child_arrays, use_nulls, capacity) |
| }) |
| .collect::<Vec<_>>() |
| } |
| _ => (0..fields.len()) |
| .map(|i| { |
| let child_arrays = arrays |
| .iter() |
| .map(|array| &array.child_data()[i]) |
| .collect::<Vec<_>>(); |
| MutableArrayData::new(child_arrays, use_nulls, array_capacity) |
| }) |
| .collect::<Vec<_>>(), |
| }, |
| _ => { |
| todo!("Take and filter operations still not supported for this datatype") |
| } |
| }; |
| |
| let dictionary = match &data_type { |
| DataType::Dictionary(_, _) => match arrays.len() { |
| 0 => unreachable!(), |
| 1 => Some(arrays[0].child_data()[0].clone()), |
| _ => { |
| if let Capacities::Dictionary(_, _) = capacities { |
| panic!("dictionary capacity not yet supported") |
| } |
| // Concat dictionaries together |
| let dictionaries: Vec<_> = |
| arrays.iter().map(|array| &array.child_data()[0]).collect(); |
| let lengths: Vec<_> = dictionaries |
| .iter() |
| .map(|dictionary| dictionary.len()) |
| .collect(); |
| let capacity = lengths.iter().sum(); |
| |
| let mut mutable = |
| MutableArrayData::new(dictionaries, false, capacity); |
| |
| for (i, len) in lengths.iter().enumerate() { |
| mutable.extend(i, 0, *len) |
| } |
| |
| Some(mutable.freeze()) |
| } |
| }, |
| _ => None, |
| }; |
| |
| let extend_nulls = build_extend_nulls(data_type); |
| |
| let extend_null_bits = arrays |
| .iter() |
| .map(|array| build_extend_null_bits(array, use_nulls)) |
| .collect(); |
| |
| let null_buffer = if use_nulls { |
| let null_bytes = bit_util::ceil(array_capacity, 8); |
| MutableBuffer::from_len_zeroed(null_bytes) |
| } else { |
| // create 0 capacity mutable buffer with the intention that it won't be used |
| MutableBuffer::with_capacity(0) |
| }; |
| |
| let extend_values = match &data_type { |
| DataType::Dictionary(_, _) => { |
| let mut next_offset = 0; |
| let extend_values: Result<Vec<_>> = arrays |
| .iter() |
| .map(|array| { |
| let offset = next_offset; |
| next_offset += array.child_data()[0].len(); |
| build_extend_dictionary(array, offset, next_offset) |
| .ok_or(ArrowError::DictionaryKeyOverflowError) |
| }) |
| .collect(); |
| |
| extend_values.expect("MutableArrayData::new is infallible") |
| } |
| _ => arrays.iter().map(|array| build_extend(array)).collect(), |
| }; |
| |
| let data = _MutableArrayData { |
| data_type: data_type.clone(), |
| len: 0, |
| null_count: 0, |
| null_buffer, |
| buffer1, |
| buffer2, |
| child_data, |
| }; |
| Self { |
| arrays, |
| data, |
| dictionary, |
| extend_values, |
| extend_null_bits, |
| extend_nulls, |
| } |
| } |
| |
| /// Extends this [MutableArrayData] with elements from the bounded [ArrayData] at `start` |
| /// and for a size of `len`. |
| /// # Panic |
| /// This function panics if the range is out of bounds, i.e. if `start + len >= array.len()`. |
| pub fn extend(&mut self, index: usize, start: usize, end: usize) { |
| let len = end - start; |
| (self.extend_null_bits[index])(&mut self.data, start, len); |
| (self.extend_values[index])(&mut self.data, index, start, len); |
| self.data.len += len; |
| } |
| |
| /// Extends this [MutableArrayData] with null elements, disregarding the bound arrays |
| pub fn extend_nulls(&mut self, len: usize) { |
| // TODO: null_buffer should probably be extended here as well |
| // otherwise is_valid() could later panic |
| // add test to confirm |
| self.data.null_count += len; |
| (self.extend_nulls)(&mut self.data, len); |
| self.data.len += len; |
| } |
| |
| /// Returns the current length |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.data.len |
| } |
| |
| /// Returns true if len is 0 |
| #[inline] |
| pub fn is_empty(&self) -> bool { |
| self.data.len == 0 |
| } |
| |
| /// Returns the current null count |
| #[inline] |
| pub fn null_count(&self) -> usize { |
| self.data.null_count |
| } |
| |
| /// Creates a [ArrayData] from the pushed regions up to this point, consuming `self`. |
| pub fn freeze(self) -> ArrayData { |
| self.data.freeze(self.dictionary).build() |
| } |
| |
| /// Creates a [ArrayDataBuilder] from the pushed regions up to this point, consuming `self`. |
| /// This is useful for extending the default behavior of MutableArrayData. |
| pub fn into_builder(self) -> ArrayDataBuilder { |
| self.data.freeze(self.dictionary) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use std::{convert::TryFrom, sync::Arc}; |
| |
| use super::*; |
| |
| use crate::{ |
| array::{ |
| Array, ArrayData, ArrayRef, BooleanArray, DictionaryArray, |
| FixedSizeBinaryArray, Int16Array, Int16Type, Int32Array, Int64Array, |
| Int64Builder, ListBuilder, NullArray, PrimitiveBuilder, StringArray, |
| StringDictionaryBuilder, StructArray, UInt8Array, |
| }, |
| buffer::Buffer, |
| datatypes::Field, |
| }; |
| use crate::{ |
| array::{ListArray, StringBuilder}, |
| error::Result, |
| }; |
| |
| /// tests extending from a primitive array w/ offset nor nulls |
| #[test] |
| fn test_primitive() { |
| let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); |
| let arrays = vec![b.data()]; |
| let mut a = MutableArrayData::new(arrays, false, 3); |
| a.extend(0, 0, 2); |
| let result = a.freeze(); |
| let array = UInt8Array::from(result); |
| let expected = UInt8Array::from(vec![Some(1), Some(2)]); |
| assert_eq!(array, expected); |
| } |
| |
| /// tests extending from a primitive array with offset w/ nulls |
| #[test] |
| fn test_primitive_offset() { |
| let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); |
| let b = b.slice(1, 2); |
| let arrays = vec![b.data()]; |
| let mut a = MutableArrayData::new(arrays, false, 2); |
| a.extend(0, 0, 2); |
| let result = a.freeze(); |
| let array = UInt8Array::from(result); |
| let expected = UInt8Array::from(vec![Some(2), Some(3)]); |
| assert_eq!(array, expected); |
| } |
| |
| /// tests extending from a primitive array with offset and nulls |
| #[test] |
| fn test_primitive_null_offset() { |
| let b = UInt8Array::from(vec![Some(1), None, Some(3)]); |
| let b = b.slice(1, 2); |
| let arrays = vec![b.data()]; |
| let mut a = MutableArrayData::new(arrays, false, 2); |
| a.extend(0, 0, 2); |
| let result = a.freeze(); |
| let array = UInt8Array::from(result); |
| let expected = UInt8Array::from(vec![None, Some(3)]); |
| assert_eq!(array, expected); |
| } |
| |
| #[test] |
| fn test_primitive_null_offset_nulls() { |
| let b = UInt8Array::from(vec![Some(1), Some(2), Some(3)]); |
| let b = b.slice(1, 2); |
| let arrays = vec![b.data()]; |
| let mut a = MutableArrayData::new(arrays, true, 2); |
| a.extend(0, 0, 2); |
| a.extend_nulls(3); |
| a.extend(0, 1, 2); |
| let result = a.freeze(); |
| let array = UInt8Array::from(result); |
| let expected = |
| UInt8Array::from(vec![Some(2), Some(3), None, None, None, Some(3)]); |
| assert_eq!(array, expected); |
| } |
| |
| #[test] |
| fn test_list_null_offset() -> Result<()> { |
| let int_builder = Int64Builder::new(24); |
| let mut builder = ListBuilder::<Int64Builder>::new(int_builder); |
| builder.values().append_slice(&[1, 2, 3])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[4, 5])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[6, 7, 8])?; |
| builder.append(true)?; |
| let array = builder.finish(); |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| mutable.extend(0, 0, 1); |
| |
| let result = mutable.freeze(); |
| let array = ListArray::from(result); |
| |
| let int_builder = Int64Builder::new(24); |
| let mut builder = ListBuilder::<Int64Builder>::new(int_builder); |
| builder.values().append_slice(&[1, 2, 3])?; |
| builder.append(true)?; |
| let expected = builder.finish(); |
| |
| assert_eq!(array, expected); |
| |
| Ok(()) |
| } |
| |
| /// tests extending from a variable-sized (strings and binary) array w/ offset with nulls |
| #[test] |
| fn test_variable_sized_nulls() { |
| let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| |
| let result = mutable.freeze(); |
| let result = StringArray::from(result); |
| |
| let expected = StringArray::from(vec![Some("bc"), None]); |
| assert_eq!(result, expected); |
| } |
| |
| /// tests extending from a variable-sized (strings and binary) array |
| /// with an offset and nulls |
| #[test] |
| fn test_variable_sized_offsets() { |
| let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); |
| let array = array.slice(1, 3); |
| |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 0, 3); |
| |
| let result = mutable.freeze(); |
| let result = StringArray::from(result); |
| |
| let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_string_offsets() { |
| let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); |
| let array = array.slice(1, 3); |
| |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 0, 3); |
| |
| let result = mutable.freeze(); |
| let result = StringArray::from(result); |
| |
| let expected = StringArray::from(vec![Some("bc"), None, Some("defh")]); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_multiple_with_nulls() { |
| let array1 = StringArray::from(vec!["hello", "world"]); |
| let array2 = StringArray::from(vec![Some("1"), None]); |
| |
| let arrays = vec![array1.data(), array2.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 5); |
| |
| mutable.extend(0, 0, 2); |
| mutable.extend(1, 0, 2); |
| |
| let result = mutable.freeze(); |
| let result = StringArray::from(result); |
| |
| let expected = |
| StringArray::from(vec![Some("hello"), Some("world"), Some("1"), None]); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_string_null_offset_nulls() { |
| let array = StringArray::from(vec![Some("a"), Some("bc"), None, Some("defh")]); |
| let array = array.slice(1, 3); |
| |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, true, 0); |
| |
| mutable.extend(0, 1, 3); |
| mutable.extend_nulls(1); |
| |
| let result = mutable.freeze(); |
| let result = StringArray::from(result); |
| |
| let expected = StringArray::from(vec![None, Some("defh"), None]); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_bool() { |
| let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]); |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| |
| let result = mutable.freeze(); |
| let result = BooleanArray::from(result); |
| |
| let expected = BooleanArray::from(vec![Some(true), None]); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_null() { |
| let array1 = NullArray::new(10); |
| let array2 = NullArray::new(5); |
| let arrays = vec![array1.data(), array2.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| mutable.extend(1, 0, 1); |
| |
| let result = mutable.freeze(); |
| let result = NullArray::from(result); |
| |
| let expected = NullArray::new(3); |
| assert_eq!(result, expected); |
| } |
| |
| fn create_dictionary_array(values: &[&str], keys: &[Option<&str>]) -> ArrayData { |
| let values = StringArray::from(values.to_vec()); |
| let mut builder = StringDictionaryBuilder::new_with_dictionary( |
| PrimitiveBuilder::<Int16Type>::new(3), |
| &values, |
| ) |
| .unwrap(); |
| for key in keys { |
| if let Some(v) = key { |
| builder.append(v).unwrap(); |
| } else { |
| builder.append_null().unwrap() |
| } |
| } |
| builder.finish().data().clone() |
| } |
| |
| #[test] |
| fn test_dictionary() { |
| // (a, b, c), (0, 1, 0, 2) => (a, b, a, c) |
| let array = create_dictionary_array( |
| &["a", "b", "c"], |
| &[Some("a"), Some("b"), None, Some("c")], |
| ); |
| let arrays = vec![&array]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| |
| let result = mutable.freeze(); |
| let result = DictionaryArray::from(result); |
| |
| let expected = Int16Array::from(vec![Some(1), None]); |
| assert_eq!(result.keys(), &expected); |
| } |
| |
| #[test] |
| fn test_struct() { |
| let strings: ArrayRef = Arc::new(StringArray::from(vec![ |
| Some("joe"), |
| None, |
| None, |
| Some("mark"), |
| Some("doe"), |
| ])); |
| let ints: ArrayRef = Arc::new(Int32Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| ])); |
| |
| let array = |
| StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) |
| .unwrap(); |
| let arrays = vec![array.data()]; |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| let data = mutable.freeze(); |
| let array = StructArray::from(data); |
| |
| let expected = StructArray::try_from(vec![ |
| ("f1", strings.slice(1, 2)), |
| ("f2", ints.slice(1, 2)), |
| ]) |
| .unwrap(); |
| assert_eq!(array, expected) |
| } |
| |
| #[test] |
| fn test_struct_offset() { |
| let strings: ArrayRef = Arc::new(StringArray::from(vec![ |
| Some("joe"), |
| None, |
| None, |
| Some("mark"), |
| Some("doe"), |
| ])); |
| let ints: ArrayRef = Arc::new(Int32Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| ])); |
| |
| let array = |
| StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) |
| .unwrap() |
| .slice(1, 3); |
| let arrays = vec![array.data()]; |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| let data = mutable.freeze(); |
| let array = StructArray::from(data); |
| |
| let expected_strings: ArrayRef = |
| Arc::new(StringArray::from(vec![None, Some("mark")])); |
| let expected = StructArray::try_from(vec![ |
| ("f1", expected_strings), |
| ("f2", ints.slice(2, 2)), |
| ]) |
| .unwrap(); |
| |
| assert_eq!(array, expected); |
| } |
| |
| #[test] |
| fn test_struct_nulls() { |
| let strings: ArrayRef = Arc::new(StringArray::from(vec![ |
| Some("joe"), |
| None, |
| None, |
| Some("mark"), |
| Some("doe"), |
| ])); |
| let ints: ArrayRef = Arc::new(Int32Array::from(vec![ |
| Some(1), |
| Some(2), |
| None, |
| Some(4), |
| Some(5), |
| ])); |
| |
| let array = |
| StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) |
| .unwrap(); |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| let data = mutable.freeze(); |
| let array = StructArray::from(data); |
| |
| let expected_string = Arc::new(StringArray::from(vec![None, None])) as ArrayRef; |
| let expected_int = Arc::new(Int32Array::from(vec![Some(2), None])) as ArrayRef; |
| |
| let expected = |
| StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)]) |
| .unwrap(); |
| assert_eq!(array, expected) |
| } |
| |
| #[test] |
| fn test_struct_many() { |
| let strings: ArrayRef = Arc::new(StringArray::from(vec![ |
| Some("joe"), |
| None, |
| None, |
| Some("mark"), |
| Some("doe"), |
| ])); |
| let ints: ArrayRef = Arc::new(Int32Array::from(vec![ |
| Some(1), |
| Some(2), |
| None, |
| Some(4), |
| Some(5), |
| ])); |
| |
| let array = |
| StructArray::try_from(vec![("f1", strings.clone()), ("f2", ints.clone())]) |
| .unwrap(); |
| let arrays = vec![array.data(), array.data()]; |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 3); |
| mutable.extend(1, 0, 2); |
| let data = mutable.freeze(); |
| let array = StructArray::from(data); |
| |
| let expected_string = |
| Arc::new(StringArray::from(vec![None, None, Some("joe"), None])) as ArrayRef; |
| let expected_int = |
| Arc::new(Int32Array::from(vec![Some(2), None, Some(1), Some(2)])) as ArrayRef; |
| |
| let expected = |
| StructArray::try_from(vec![("f1", expected_string), ("f2", expected_int)]) |
| .unwrap(); |
| assert_eq!(array, expected) |
| } |
| |
| #[test] |
| fn test_binary_fixed_sized_offsets() { |
| let array = FixedSizeBinaryArray::try_from_iter( |
| vec![vec![0, 0], vec![0, 1], vec![0, 2]].into_iter(), |
| ) |
| .expect("Failed to create FixedSizeBinaryArray from iterable"); |
| let array = array.slice(1, 2); |
| // = [[0, 1], [0, 2]] due to the offset = 1 |
| |
| let arrays = vec![array.data()]; |
| |
| let mut mutable = MutableArrayData::new(arrays, false, 0); |
| |
| mutable.extend(0, 1, 2); |
| mutable.extend(0, 0, 1); |
| |
| let result = mutable.freeze(); |
| let result = FixedSizeBinaryArray::from(result); |
| |
| let expected = |
| FixedSizeBinaryArray::try_from_iter(vec![vec![0, 2], vec![0, 1]].into_iter()) |
| .expect("Failed to create FixedSizeBinaryArray from iterable"); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_list_append() -> Result<()> { |
| let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(24)); |
| builder.values().append_slice(&[1, 2, 3])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[4, 5])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[6, 7, 8])?; |
| builder.values().append_slice(&[9, 10, 11])?; |
| builder.append(true)?; |
| let a = builder.finish(); |
| |
| let a_builder = Int64Builder::new(24); |
| let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder); |
| a_builder.values().append_slice(&[12, 13])?; |
| a_builder.append(true)?; |
| a_builder.append(true)?; |
| a_builder.values().append_slice(&[14, 15])?; |
| a_builder.append(true)?; |
| let b = a_builder.finish(); |
| |
| let c = b.slice(1, 2); |
| |
| let mut mutable = |
| MutableArrayData::new(vec![a.data(), b.data(), c.data()], false, 1); |
| mutable.extend(0, 0, a.len()); |
| mutable.extend(1, 0, b.len()); |
| mutable.extend(2, 0, c.len()); |
| |
| let finished = mutable.freeze(); |
| |
| let expected_int_array = Int64Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| Some(7), |
| Some(8), |
| Some(9), |
| Some(10), |
| Some(11), |
| // append first array |
| Some(12), |
| Some(13), |
| Some(14), |
| Some(15), |
| // append second array |
| Some(14), |
| Some(15), |
| ]); |
| let list_value_offsets = |
| Buffer::from_slice_ref(&[0i32, 3, 5, 11, 13, 13, 15, 15, 17]); |
| let expected_list_data = ArrayData::new( |
| DataType::List(Box::new(Field::new("item", DataType::Int64, true))), |
| 8, |
| None, |
| None, |
| 0, |
| vec![list_value_offsets], |
| vec![expected_int_array.data().clone()], |
| ); |
| assert_eq!(finished, expected_list_data); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_list_nulls_append() -> Result<()> { |
| let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(32)); |
| builder.values().append_slice(&[1, 2, 3])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[4, 5])?; |
| builder.append(true)?; |
| builder.append(false)?; |
| builder.values().append_slice(&[6, 7, 8])?; |
| builder.values().append_null()?; |
| builder.values().append_null()?; |
| builder.values().append_slice(&[9, 10, 11])?; |
| builder.append(true)?; |
| let a = builder.finish(); |
| let a = a.data(); |
| |
| let mut builder = ListBuilder::<Int64Builder>::new(Int64Builder::new(32)); |
| builder.values().append_slice(&[12, 13])?; |
| builder.append(true)?; |
| builder.append(false)?; |
| builder.append(true)?; |
| builder.values().append_null()?; |
| builder.values().append_null()?; |
| builder.values().append_slice(&[14, 15])?; |
| builder.append(true)?; |
| let b = builder.finish(); |
| let b = b.data(); |
| let c = b.slice(1, 2); |
| let d = b.slice(2, 2); |
| |
| let mut mutable = MutableArrayData::new(vec![a, b, &c, &d], false, 10); |
| |
| mutable.extend(0, 0, a.len()); |
| mutable.extend(1, 0, b.len()); |
| mutable.extend(2, 0, c.len()); |
| mutable.extend(3, 0, d.len()); |
| let result = mutable.freeze(); |
| |
| let expected_int_array = Int64Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| Some(7), |
| Some(8), |
| None, |
| None, |
| Some(9), |
| Some(10), |
| Some(11), |
| // second array |
| Some(12), |
| Some(13), |
| None, |
| None, |
| Some(14), |
| Some(15), |
| // slice(1, 2) results in no values added |
| None, |
| None, |
| Some(14), |
| Some(15), |
| ]); |
| let list_value_offsets = |
| Buffer::from_slice_ref(&[0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23]); |
| let expected_list_data = ArrayData::new( |
| DataType::List(Box::new(Field::new("item", DataType::Int64, true))), |
| 12, |
| None, |
| Some(Buffer::from(&[0b11011011, 0b1110])), |
| 0, |
| vec![list_value_offsets], |
| vec![expected_int_array.data().clone()], |
| ); |
| assert_eq!(result, expected_list_data); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_list_of_strings_append() -> Result<()> { |
| // [["alpha", "beta", None]] |
| let mut builder = ListBuilder::new(StringBuilder::new(32)); |
| builder.values().append_value("Hello")?; |
| builder.values().append_value("Arrow")?; |
| builder.values().append_null()?; |
| builder.append(true)?; |
| let a = builder.finish(); |
| |
| // [["alpha", "beta"], [None], ["gamma", "delta", None]] |
| let mut builder = ListBuilder::new(StringBuilder::new(32)); |
| builder.values().append_value("alpha")?; |
| builder.values().append_value("beta")?; |
| builder.append(true)?; |
| builder.values().append_null()?; |
| builder.append(true)?; |
| builder.values().append_value("gamma")?; |
| builder.values().append_value("delta")?; |
| builder.values().append_null()?; |
| builder.append(true)?; |
| let b = builder.finish(); |
| |
| let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10); |
| |
| mutable.extend(0, 0, a.len()); |
| mutable.extend(1, 0, b.len()); |
| mutable.extend(1, 1, 3); |
| mutable.extend(1, 0, 0); |
| let result = mutable.freeze(); |
| |
| let expected_string_array = StringArray::from(vec![ |
| // extend a[0..a.len()] |
| // a[0] |
| Some("Hello"), |
| Some("Arrow"), |
| None, |
| // extend b[0..b.len()] |
| // b[0] |
| Some("alpha"), |
| Some("beta"), |
| // b[1] |
| None, |
| // b[2] |
| Some("gamma"), |
| Some("delta"), |
| None, |
| // extend b[1..3] |
| // b[1] |
| None, |
| // b[2] |
| Some("gamma"), |
| Some("delta"), |
| None, |
| // extend b[0..0] |
| ]); |
| let list_value_offsets = Buffer::from_slice_ref(&[0, 3, 5, 6, 9, 10, 13]); |
| let expected_list_data = ArrayData::new( |
| DataType::List(Box::new(Field::new("item", DataType::Utf8, true))), |
| 6, |
| None, |
| None, |
| 0, |
| vec![list_value_offsets], |
| vec![expected_string_array.data().clone()], |
| ); |
| assert_eq!(result, expected_list_data); |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_fixed_size_binary_append() { |
| let a = vec![Some(vec![1, 2]), Some(vec![3, 4]), Some(vec![5, 6])]; |
| let a = FixedSizeBinaryArray::try_from_sparse_iter(a.into_iter()) |
| .expect("Failed to create FixedSizeBinaryArray from iterable"); |
| |
| let b = vec![ |
| None, |
| Some(vec![7, 8]), |
| Some(vec![9, 10]), |
| None, |
| Some(vec![13, 14]), |
| None, |
| ]; |
| let b = FixedSizeBinaryArray::try_from_sparse_iter(b.into_iter()) |
| .expect("Failed to create FixedSizeBinaryArray from iterable"); |
| |
| let mut mutable = MutableArrayData::new(vec![a.data(), b.data()], false, 10); |
| |
| mutable.extend(0, 0, a.len()); |
| mutable.extend(1, 0, b.len()); |
| mutable.extend(1, 1, 4); |
| mutable.extend(1, 2, 3); |
| mutable.extend(1, 5, 5); |
| let result = mutable.freeze(); |
| |
| let expected = vec![ |
| // a |
| Some(vec![1, 2]), |
| Some(vec![3, 4]), |
| Some(vec![5, 6]), |
| // b |
| None, |
| Some(vec![7, 8]), |
| Some(vec![9, 10]), |
| None, |
| Some(vec![13, 14]), |
| None, |
| // b[1..4] |
| Some(vec![7, 8]), |
| Some(vec![9, 10]), |
| None, |
| // b[2..3] |
| Some(vec![9, 10]), |
| // b[4..4] |
| ]; |
| let expected = FixedSizeBinaryArray::try_from_sparse_iter(expected.into_iter()) |
| .expect("Failed to create FixedSizeBinaryArray from iterable"); |
| assert_eq!(&result, expected.data()); |
| } |
| |
| /* |
| // this is an old test used on a meanwhile removed dead code |
| // that is still useful when `MutableArrayData` supports fixed-size lists. |
| #[test] |
| fn test_fixed_size_list_append() -> Result<()> { |
| let int_builder = UInt16Builder::new(64); |
| let mut builder = FixedSizeListBuilder::<UInt16Builder>::new(int_builder, 2); |
| builder.values().append_slice(&[1, 2])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[3, 4])?; |
| builder.append(false)?; |
| builder.values().append_slice(&[5, 6])?; |
| builder.append(true)?; |
| |
| let a_builder = UInt16Builder::new(64); |
| let mut a_builder = FixedSizeListBuilder::<UInt16Builder>::new(a_builder, 2); |
| a_builder.values().append_slice(&[7, 8])?; |
| a_builder.append(true)?; |
| a_builder.values().append_slice(&[9, 10])?; |
| a_builder.append(true)?; |
| a_builder.values().append_slice(&[11, 12])?; |
| a_builder.append(false)?; |
| a_builder.values().append_slice(&[13, 14])?; |
| a_builder.append(true)?; |
| a_builder.values().append_null()?; |
| a_builder.values().append_null()?; |
| a_builder.append(true)?; |
| let a = a_builder.finish(); |
| |
| // append array |
| builder.append_data(&[ |
| a.data(), |
| a.slice(1, 3).data(), |
| a.slice(2, 1).data(), |
| a.slice(5, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| |
| let expected_int_array = UInt16Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| // append first array |
| Some(7), |
| Some(8), |
| Some(9), |
| Some(10), |
| Some(11), |
| Some(12), |
| Some(13), |
| Some(14), |
| None, |
| None, |
| // append slice(1, 3) |
| Some(9), |
| Some(10), |
| Some(11), |
| Some(12), |
| Some(13), |
| Some(14), |
| // append slice(2, 1) |
| Some(11), |
| Some(12), |
| ]); |
| let expected_list_data = ArrayData::new( |
| DataType::FixedSizeList( |
| Box::new(Field::new("item", DataType::UInt16, true)), |
| 2, |
| ), |
| 12, |
| None, |
| None, |
| 0, |
| vec![], |
| vec![expected_int_array.data()], |
| ); |
| let expected_list = |
| FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayData); |
| assert_eq!(&expected_list.values(), &finished.values()); |
| assert_eq!(expected_list.len(), finished.len()); |
| |
| Ok(()) |
| } |
| */ |
| } |