| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| //! Defines a [`BufferBuilder`](crate::array::BufferBuilder) capable |
| //! of creating a [`Buffer`](crate::buffer::Buffer) which can be used |
| //! as an internal buffer in an [`ArrayData`](crate::array::ArrayData) |
| //! object. |
| |
| use std::any::Any; |
| use std::collections::HashMap; |
| use std::fmt; |
| use std::marker::PhantomData; |
| use std::mem; |
| use std::{convert::TryInto, sync::Arc}; |
| |
| use crate::array::*; |
| use crate::buffer::{Buffer, MutableBuffer}; |
| use crate::datatypes::*; |
| use crate::error::{ArrowError, Result}; |
| use crate::util::bit_util; |
| |
| /// Converts a `MutableBuffer` to a `BufferBuilder<T>`. |
| /// |
| /// `slots` is the number of array slots currently represented in the `MutableBuffer`. |
| pub(crate) fn mutable_buffer_to_builder<T: ArrowPrimitiveType>( |
| mutable_buffer: MutableBuffer, |
| slots: usize, |
| ) -> BufferBuilder<T> { |
| BufferBuilder::<T> { |
| buffer: mutable_buffer, |
| len: slots, |
| _marker: PhantomData, |
| } |
| } |
| |
| /// Converts a `BufferBuilder<T>` into it's underlying `MutableBuffer`. |
| /// |
| /// `From` is not implemented because associated type bounds are unstable. |
| pub(crate) fn builder_to_mutable_buffer<T: ArrowPrimitiveType>( |
| builder: BufferBuilder<T>, |
| ) -> MutableBuffer { |
| builder.buffer |
| } |
| |
| /// Builder for creating a [`Buffer`](crate::buffer::Buffer) object. |
| /// |
| /// This builder is implemented for primitive types and creates a |
| /// buffer with a zero-copy `build()` method. |
| /// |
| /// See trait [`BufferBuilderTrait`](crate::array::BufferBuilderTrait) |
| /// for further documentation and examples. |
| /// |
| /// A [`Buffer`](crate::buffer::Buffer) is the underlying data |
| /// structure of Arrow's [`Arrays`](crate::array::Array). |
| /// |
| /// For all supported types, there are type definitions for the |
| /// generic version of `BufferBuilder<T>`, e.g. `UInt8BufferBuilder`. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// # fn main() -> arrow::error::Result<()> { |
| /// let mut builder = UInt8BufferBuilder::new(100); |
| /// builder.append_slice(&[42, 43, 44]); |
| /// builder.append(45); |
| /// let buffer = builder.finish(); |
| /// |
| /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]); |
| /// # Ok(()) |
| /// # } |
| /// ``` |
| #[derive(Debug)] |
| pub struct BufferBuilder<T: ArrowPrimitiveType> { |
| buffer: MutableBuffer, |
| len: usize, |
| _marker: PhantomData<T>, |
| } |
| |
| /// Trait for simplifying the construction of [`Buffers`](crate::buffer::Buffer). |
| /// |
| /// This trait is used mainly to offer separate implementations for |
| /// numeric types and boolean types, while still be able to call methods on buffer builder |
| /// with generic primitive type. |
| /// Separate implementations of this trait allow to add implementation-details, |
| /// e.g. the implementation for boolean types uses bit-packing. |
| pub trait BufferBuilderTrait<T: ArrowPrimitiveType> { |
| /// Creates a new builder with initial capacity for _at least_ `capacity` |
| /// elements of type `T`. |
| /// |
| /// The capacity can later be manually adjusted with the |
| /// [`reserve()`](BufferBuilderTrait::reserve) method. |
| /// Also the |
| /// [`append()`](BufferBuilderTrait::append), |
| /// [`append_slice()`](BufferBuilderTrait::append_slice) and |
| /// [`advance()`](BufferBuilderTrait::advance) |
| /// methods automatically increase the capacity if needed. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// |
| /// assert!(builder.capacity() >= 10); |
| /// ``` |
| fn new(capacity: usize) -> Self; |
| |
| /// Returns the current number of array elements in the internal buffer. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.append(42); |
| /// |
| /// assert_eq!(builder.len(), 1); |
| /// ``` |
| fn len(&self) -> usize; |
| |
| /// Returns whether the internal buffer is empty. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.append(42); |
| /// |
| /// assert_eq!(builder.is_empty(), false); |
| /// ``` |
| fn is_empty(&self) -> bool; |
| |
| /// Returns the actual capacity (number of elements) of the internal buffer. |
| /// |
| /// Note: the internal capacity returned by this method might be larger than |
| /// what you'd expect after setting the capacity in the `new()` or `reserve()` |
| /// functions. |
| fn capacity(&self) -> usize; |
| |
| /// Increases the number of elements in the internal buffer by `n` |
| /// and resizes the buffer as needed. |
| /// |
| /// The values of the newly added elements are undefined. |
| /// This method is usually used when appending `NULL` values to the buffer |
| /// as they still require physical memory space. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.advance(2); |
| /// |
| /// assert_eq!(builder.len(), 2); |
| /// ``` |
| fn advance(&mut self, n: usize) -> Result<()>; |
| |
| /// Reserves memory for _at least_ `n` more elements of type `T`. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.reserve(10); |
| /// |
| /// assert!(builder.capacity() >= 20); |
| /// ``` |
| fn reserve(&mut self, n: usize); |
| |
| /// Appends a value of type `T` into the builder, |
| /// growing the internal buffer as needed. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.append(42); |
| /// |
| /// assert_eq!(builder.len(), 1); |
| /// ``` |
| fn append(&mut self, value: T::Native) -> Result<()>; |
| |
| /// Appends a value of type `T` into the builder N times, |
| /// growing the internal buffer as needed. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.append_n(10, 42); |
| /// |
| /// assert_eq!(builder.len(), 10); |
| /// ``` |
| fn append_n(&mut self, n: usize, value: T::Native) -> Result<()>; |
| |
| /// Appends a slice of type `T`, growing the internal buffer as needed. |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.append_slice(&[42, 44, 46]); |
| /// |
| /// assert_eq!(builder.len(), 3); |
| /// ``` |
| fn append_slice(&mut self, slice: &[T::Native]) -> Result<()>; |
| |
| /// Resets this builder and returns an immutable [`Buffer`](crate::buffer::Buffer). |
| /// |
| /// # Example: |
| /// |
| /// ``` |
| /// use arrow::array::{UInt8BufferBuilder, BufferBuilderTrait}; |
| /// |
| /// let mut builder = UInt8BufferBuilder::new(10); |
| /// builder.append_slice(&[42, 44, 46]); |
| /// |
| /// let buffer = builder.finish(); |
| /// |
| /// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]); |
| /// ``` |
| fn finish(&mut self) -> Buffer; |
| } |
| |
| impl<T: ArrowPrimitiveType> BufferBuilderTrait<T> for BufferBuilder<T> { |
| #[inline] |
| fn new(capacity: usize) -> Self { |
| let buffer = if T::DATA_TYPE == DataType::Boolean { |
| let byte_capacity = bit_util::ceil(capacity, 8); |
| let actual_capacity = bit_util::round_upto_multiple_of_64(byte_capacity); |
| let mut buffer = MutableBuffer::new(actual_capacity); |
| buffer.set_null_bits(0, actual_capacity); |
| buffer |
| } else { |
| MutableBuffer::new(capacity * mem::size_of::<T::Native>()) |
| }; |
| |
| Self { |
| buffer, |
| len: 0, |
| _marker: PhantomData, |
| } |
| } |
| |
| fn len(&self) -> usize { |
| self.len |
| } |
| |
| fn is_empty(&self) -> bool { |
| self.len == 0 |
| } |
| |
| fn capacity(&self) -> usize { |
| let bit_capacity = self.buffer.capacity() * 8; |
| bit_capacity / T::get_bit_width() |
| } |
| |
| #[inline] |
| fn advance(&mut self, i: usize) -> Result<()> { |
| let new_buffer_len = if T::DATA_TYPE == DataType::Boolean { |
| bit_util::ceil(self.len + i, 8) |
| } else { |
| (self.len + i) * mem::size_of::<T::Native>() |
| }; |
| self.buffer.resize(new_buffer_len); |
| self.len += i; |
| Ok(()) |
| } |
| |
| #[inline] |
| fn reserve(&mut self, n: usize) { |
| let new_capacity = self.len + n; |
| if T::DATA_TYPE == DataType::Boolean { |
| if new_capacity > self.capacity() { |
| let new_byte_capacity = bit_util::ceil(new_capacity, 8); |
| let existing_capacity = self.buffer.capacity(); |
| let new_capacity = self.buffer.reserve(new_byte_capacity); |
| self.buffer |
| .set_null_bits(existing_capacity, new_capacity - existing_capacity); |
| } |
| } else { |
| let byte_capacity = mem::size_of::<T::Native>() * new_capacity; |
| self.buffer.reserve(byte_capacity); |
| } |
| } |
| |
| #[inline] |
| fn append(&mut self, v: T::Native) -> Result<()> { |
| self.reserve(1); |
| if T::DATA_TYPE == DataType::Boolean { |
| if v != T::default_value() { |
| unsafe { |
| bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len); |
| } |
| } |
| self.len += 1; |
| } else { |
| self.write_bytes(v.to_byte_slice(), 1); |
| } |
| Ok(()) |
| } |
| |
| #[inline] |
| fn append_n(&mut self, n: usize, v: T::Native) -> Result<()> { |
| self.reserve(n); |
| if T::DATA_TYPE == DataType::Boolean { |
| if n != 0 && v != T::default_value() { |
| let data = unsafe { |
| std::slice::from_raw_parts_mut( |
| self.buffer.raw_data_mut(), |
| self.buffer.capacity(), |
| ) |
| }; |
| (self.len..self.len + n).for_each(|i| bit_util::set_bit(data, i)) |
| } |
| self.len += n; |
| } else { |
| for _ in 0..n { |
| self.write_bytes(v.to_byte_slice(), 1); |
| } |
| } |
| Ok(()) |
| } |
| |
| #[inline] |
| fn append_slice(&mut self, slice: &[T::Native]) -> Result<()> { |
| let array_slots = slice.len(); |
| self.reserve(array_slots); |
| |
| if T::DATA_TYPE == DataType::Boolean { |
| for v in slice { |
| if *v != T::default_value() { |
| // For performance the `len` of the buffer is not |
| // updated on each append but is updated in the |
| // `freeze` method instead. |
| unsafe { |
| bit_util::set_bit_raw(self.buffer.raw_data_mut(), self.len); |
| } |
| } |
| self.len += 1; |
| } |
| Ok(()) |
| } else { |
| self.write_bytes(slice.to_byte_slice(), array_slots); |
| Ok(()) |
| } |
| } |
| |
| #[inline] |
| fn finish(&mut self) -> Buffer { |
| if T::DATA_TYPE == DataType::Boolean { |
| // `append` does not update the buffer's `len` so do it before `freeze` is called. |
| let new_buffer_len = bit_util::ceil(self.len, 8); |
| debug_assert!(new_buffer_len >= self.buffer.len()); |
| let mut buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); |
| self.len = 0; |
| buf.resize(new_buffer_len); |
| buf.freeze() |
| } else { |
| let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); |
| self.len = 0; |
| buf.freeze() |
| } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> BufferBuilder<T> { |
| /// Writes a byte slice to the underlying buffer and updates the `len`, i.e. the |
| /// number array elements in the builder. Also, converts the `io::Result` |
| /// required by the `Write` trait to the Arrow `Result` type. |
| fn write_bytes(&mut self, bytes: &[u8], len_added: usize) { |
| self.buffer.extend_from_slice(bytes); |
| self.len += len_added; |
| } |
| } |
| |
| /// Trait for dealing with different array builders at runtime |
| pub trait ArrayBuilder: Any { |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize; |
| |
| /// Returns whether number of array slots is zero |
| fn is_empty(&self) -> bool; |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()>; |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType; |
| |
| /// Builds the array |
| fn finish(&mut self) -> ArrayRef; |
| |
| /// Returns the builder as a non-mutable `Any` reference. |
| /// |
| /// This is most useful when one wants to call non-mutable APIs on a specific builder |
| /// type. In this case, one can first cast this into a `Any`, and then use |
| /// `downcast_ref` to get a reference on the specific builder. |
| fn as_any(&self) -> &Any; |
| |
| /// Returns the builder as a mutable `Any` reference. |
| /// |
| /// This is most useful when one wants to call mutable APIs on a specific builder |
| /// type. In this case, one can first cast this into a `Any`, and then use |
| /// `downcast_mut` to get a reference on the specific builder. |
| fn as_any_mut(&mut self) -> &mut Any; |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any>; |
| } |
| |
| /// Array builder for fixed-width primitive types |
| #[derive(Debug)] |
| pub struct PrimitiveBuilder<T: ArrowPrimitiveType> { |
| values_builder: BufferBuilder<T>, |
| bitmap_builder: BooleanBufferBuilder, |
| } |
| |
| impl<T: ArrowPrimitiveType> ArrayBuilder for PrimitiveBuilder<T> { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.values_builder.len |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.values_builder.is_empty() |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| let mut total_len = 0; |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.buffers().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "Primitive arrays should have 1 buffer".to_string(), |
| )); |
| } |
| total_len += array.len(); |
| } |
| // reserve memory |
| self.values_builder.reserve(total_len); |
| self.bitmap_builder.reserve(total_len); |
| |
| let mul = T::get_bit_width() / 8; |
| for array in data { |
| let len = array.len(); |
| if len == 0 { |
| continue; |
| } |
| let offset = array.offset(); |
| if array.data_type() == &DataType::Boolean { |
| // booleans are bit-packed, thus we iterate through the array |
| let array = PrimitiveArray::<T>::from(array.clone()); |
| for i in 0..len { |
| self.values_builder.append(array.value(i))?; |
| } |
| } else { |
| let sliced = array.buffers()[0].data(); |
| // slice into data by factoring (offset and length) * byte width |
| self.values_builder |
| .write_bytes(&sliced[(offset * mul)..((len + offset) * mul)], len); |
| } |
| |
| for i in 0..len { |
| // account for offset as `ArrayData` does not |
| self.bitmap_builder.append(array.is_valid(i))?; |
| } |
| } |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| T::DATA_TYPE |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> PrimitiveBuilder<T> { |
| /// Creates a new primitive array builder |
| pub fn new(capacity: usize) -> Self { |
| Self { |
| values_builder: BufferBuilder::<T>::new(capacity), |
| bitmap_builder: BooleanBufferBuilder::new(capacity), |
| } |
| } |
| |
| /// Returns the capacity of this builder measured in slots of type `T` |
| pub fn capacity(&self) -> usize { |
| self.values_builder.capacity() |
| } |
| |
| /// Appends a value of type `T` into the builder |
| pub fn append_value(&mut self, v: T::Native) -> Result<()> { |
| self.bitmap_builder.append(true)?; |
| self.values_builder.append(v)?; |
| Ok(()) |
| } |
| |
| /// Appends a null slot into the builder |
| pub fn append_null(&mut self) -> Result<()> { |
| self.bitmap_builder.append(false)?; |
| self.values_builder.advance(1)?; |
| Ok(()) |
| } |
| |
| /// Appends an `Option<T>` into the builder |
| pub fn append_option(&mut self, v: Option<T::Native>) -> Result<()> { |
| match v { |
| None => self.append_null()?, |
| Some(v) => self.append_value(v)?, |
| }; |
| Ok(()) |
| } |
| |
| /// Appends a slice of type `T` into the builder |
| pub fn append_slice(&mut self, v: &[T::Native]) -> Result<()> { |
| self.bitmap_builder.append_n(v.len(), true)?; |
| self.values_builder.append_slice(v)?; |
| Ok(()) |
| } |
| |
| /// Appends values from a slice of type `T` and a validity boolean slice |
| pub fn append_values( |
| &mut self, |
| values: &[T::Native], |
| is_valid: &[bool], |
| ) -> Result<()> { |
| if values.len() != is_valid.len() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Value and validity lengths must be equal".to_string(), |
| )); |
| } |
| self.bitmap_builder.append_slice(is_valid)?; |
| self.values_builder.append_slice(values) |
| } |
| |
| /// Builds the `PrimitiveArray` and reset this builder. |
| pub fn finish(&mut self) -> PrimitiveArray<T> { |
| let len = self.len(); |
| let null_bit_buffer = self.bitmap_builder.finish(); |
| let null_count = len - null_bit_buffer.count_set_bits(); |
| let mut builder = ArrayData::builder(T::DATA_TYPE) |
| .len(len) |
| .add_buffer(self.values_builder.finish()); |
| if null_count > 0 { |
| builder = builder |
| .null_count(null_count) |
| .null_bit_buffer(null_bit_buffer); |
| } |
| let data = builder.build(); |
| PrimitiveArray::<T>::from(data) |
| } |
| |
| /// Builds the `DictionaryArray` and reset this builder. |
| pub fn finish_dict(&mut self, values: ArrayRef) -> DictionaryArray<T> { |
| let len = self.len(); |
| let null_bit_buffer = self.bitmap_builder.finish(); |
| let null_count = len - null_bit_buffer.count_set_bits(); |
| let data_type = DataType::Dictionary( |
| Box::new(T::DATA_TYPE), |
| Box::new(values.data_type().clone()), |
| ); |
| let mut builder = ArrayData::builder(data_type) |
| .len(len) |
| .add_buffer(self.values_builder.finish()); |
| if null_count > 0 { |
| builder = builder |
| .null_count(null_count) |
| .null_bit_buffer(null_bit_buffer); |
| } |
| builder = builder.add_child_data(values.data()); |
| DictionaryArray::<T>::from(builder.build()) |
| } |
| } |
| |
| /// Array builder for `ListArray` |
| #[derive(Debug)] |
| pub struct ListBuilder<T: ArrayBuilder> { |
| offsets_builder: Int32BufferBuilder, |
| bitmap_builder: BooleanBufferBuilder, |
| values_builder: T, |
| len: usize, |
| } |
| |
| impl<T: ArrayBuilder> ListBuilder<T> { |
| /// Creates a new `ListArrayBuilder` from a given values array builder |
| pub fn new(values_builder: T) -> Self { |
| let capacity = values_builder.len(); |
| Self::with_capacity(values_builder, capacity) |
| } |
| |
| /// Creates a new `ListArrayBuilder` from a given values array builder |
| /// `capacity` is the number of items to pre-allocate space for in this builder |
| pub fn with_capacity(values_builder: T, capacity: usize) -> Self { |
| let mut offsets_builder = Int32BufferBuilder::new(capacity + 1); |
| offsets_builder.append(0).unwrap(); |
| Self { |
| offsets_builder, |
| bitmap_builder: BooleanBufferBuilder::new(capacity), |
| values_builder, |
| len: 0, |
| } |
| } |
| } |
| |
| impl<T: ArrayBuilder> ArrayBuilder for ListBuilder<T> |
| where |
| T: 'static, |
| { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| let mut total_len = 0; |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.buffers().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "List arrays should have 1 buffer".to_string(), |
| )); |
| } |
| if array.child_data().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "List arrays should have 1 child_data element".to_string(), |
| )); |
| } |
| total_len += array.len(); |
| } |
| // reserve memory |
| self.offsets_builder.reserve(total_len); |
| self.bitmap_builder.reserve(total_len); |
| // values_builder is allocated by the relevant builder, and is not allocated here |
| |
| // determine the latest offset on the builder |
| let mut cum_offset = if self.offsets_builder.len() == 0 { |
| 0 |
| } else { |
| // peek into buffer to get last appended offset |
| let buffer = self.offsets_builder.buffer.data(); |
| let len = self.offsets_builder.len(); |
| let (start, end) = ((len - 1) * 4, len * 4); |
| let slice = &buffer[start..end]; |
| i32::from_le_bytes(slice.try_into().unwrap()) |
| }; |
| for array in data { |
| let len = array.len(); |
| if len == 0 { |
| continue; |
| } |
| let offset = array.offset(); |
| |
| // `typed_data` is unsafe, however this call is safe as `ListArray` has i32 offsets |
| let offsets = unsafe { |
| &array.buffers()[0].typed_data::<i32>()[offset..(len + offset) + 1] |
| }; |
| // the offsets of the child array determine its length |
| // this could be obtained by getting the concrete ListArray and getting value_offsets |
| let offset_at_len = offsets[offsets.len() - 1] as usize; |
| let first_offset = offsets[0] as usize; |
| // create the child array and offset it |
| let child_data = &array.child_data()[0]; |
| let child_array = make_array(child_data.clone()); |
| // slice the child array to account for offsets |
| let sliced = child_array.slice(first_offset, offset_at_len - first_offset); |
| self.values().append_data(&[sliced.data()])?; |
| let adjusted_offsets: Vec<i32> = offsets |
| .windows(2) |
| .map(|w| { |
| let curr_offset = w[1] - w[0] + cum_offset; |
| cum_offset = curr_offset; |
| curr_offset |
| }) |
| .collect(); |
| self.offsets_builder |
| .append_slice(adjusted_offsets.as_slice())?; |
| |
| for i in 0..len { |
| self.bitmap_builder.append(array.is_valid(i))?; |
| } |
| } |
| |
| // append array length |
| self.len += total_len; |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::List(Box::new(NullableDataType::new( |
| self.values_builder.data_type(), |
| true, |
| ))) |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.len |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.len == 0 |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl<T: ArrayBuilder> ListBuilder<T> |
| where |
| T: 'static, |
| { |
| /// Returns the child array builder as a mutable reference. |
| /// |
| /// This mutable reference can be used to append values into the child array builder, |
| /// but you must call `append` to delimit each distinct list value. |
| pub fn values(&mut self) -> &mut T { |
| &mut self.values_builder |
| } |
| |
| /// Finish the current variable-length list array slot |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.offsets_builder |
| .append(self.values_builder.len() as i32)?; |
| self.bitmap_builder.append(is_valid)?; |
| self.len += 1; |
| Ok(()) |
| } |
| |
| /// Builds the `ListArray` and reset this builder. |
| pub fn finish(&mut self) -> ListArray { |
| let len = self.len(); |
| self.len = 0; |
| let values_arr = self |
| .values_builder |
| .as_any_mut() |
| .downcast_mut::<T>() |
| .unwrap() |
| .finish(); |
| let values_data = values_arr.data(); |
| |
| let offset_buffer = self.offsets_builder.finish(); |
| let null_bit_buffer = self.bitmap_builder.finish(); |
| let nulls = null_bit_buffer.count_set_bits(); |
| self.offsets_builder.append(0).unwrap(); |
| let data = ArrayData::builder(DataType::List(Box::new(NullableDataType::new( |
| values_data.data_type().clone(), |
| true, // TODO: find a consistent way of getting this |
| )))) |
| .len(len) |
| .null_count(len - nulls) |
| .add_buffer(offset_buffer) |
| .add_child_data(values_data) |
| .null_bit_buffer(null_bit_buffer) |
| .build(); |
| |
| ListArray::from(data) |
| } |
| } |
| |
| /// Array builder for `ListArray` |
| #[derive(Debug)] |
| pub struct LargeListBuilder<T: ArrayBuilder> { |
| offsets_builder: Int64BufferBuilder, |
| bitmap_builder: BooleanBufferBuilder, |
| values_builder: T, |
| len: usize, |
| } |
| |
| impl<T: ArrayBuilder> LargeListBuilder<T> { |
| /// Creates a new `LargeListArrayBuilder` from a given values array builder |
| pub fn new(values_builder: T) -> Self { |
| let capacity = values_builder.len(); |
| Self::with_capacity(values_builder, capacity) |
| } |
| |
| /// Creates a new `LargeListArrayBuilder` from a given values array builder |
| /// `capacity` is the number of items to pre-allocate space for in this builder |
| pub fn with_capacity(values_builder: T, capacity: usize) -> Self { |
| let mut offsets_builder = Int64BufferBuilder::new(capacity + 1); |
| offsets_builder.append(0).unwrap(); |
| Self { |
| offsets_builder, |
| bitmap_builder: BooleanBufferBuilder::new(capacity), |
| values_builder, |
| len: 0, |
| } |
| } |
| } |
| |
| impl<T: ArrayBuilder> ArrayBuilder for LargeListBuilder<T> |
| where |
| T: 'static, |
| { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| let mut total_len = 0; |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.buffers().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "List arrays should have 1 buffer".to_string(), |
| )); |
| } |
| if array.child_data().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "List arrays should have 1 child_data element".to_string(), |
| )); |
| } |
| total_len += array.len(); |
| } |
| // reserve memory |
| self.offsets_builder.reserve(total_len); |
| self.bitmap_builder.reserve(total_len); |
| // values_builder is allocated by the relevant builder, and is not allocated here |
| |
| // determine the latest offset on the builder |
| let mut cum_offset = if self.offsets_builder.len() == 0 { |
| 0 |
| } else { |
| // peek into buffer to get last appended offset |
| let buffer = self.offsets_builder.buffer.data(); |
| let len = self.offsets_builder.len(); |
| let (start, end) = ((len - 1) * 8, len * 8); |
| let slice = &buffer[start..end]; |
| i64::from_le_bytes(slice.try_into().unwrap()) |
| }; |
| for array in data { |
| let len = array.len(); |
| if len == 0 { |
| continue; |
| } |
| let offset = array.offset(); |
| |
| // `typed_data` is unsafe, however this call is safe as `LargeListArray` has i64 offsets |
| let offsets = unsafe { |
| &array.buffers()[0].typed_data::<i64>()[offset..(len + offset) + 1] |
| }; |
| // the offsets of the child array determine its length |
| // this could be obtained by getting the concrete ListArray and getting value_offsets |
| let offset_at_len = offsets[offsets.len() - 1] as usize; |
| let first_offset = offsets[0] as usize; |
| // create the child array and offset it |
| let child_data = &array.child_data()[0]; |
| let child_array = make_array(child_data.clone()); |
| // slice the child array to account for offsets |
| let sliced = child_array.slice(first_offset, offset_at_len - first_offset); |
| self.values().append_data(&[sliced.data()])?; |
| let adjusted_offsets: Vec<i64> = offsets |
| .windows(2) |
| .map(|w| { |
| let curr_offset = w[1] - w[0] + cum_offset; |
| cum_offset = curr_offset; |
| curr_offset |
| }) |
| .collect(); |
| self.offsets_builder |
| .append_slice(adjusted_offsets.as_slice())?; |
| |
| for i in 0..len { |
| self.bitmap_builder.append(array.is_valid(i))?; |
| } |
| } |
| |
| // append array length |
| self.len += total_len; |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::LargeList(Box::new(NullableDataType::new( |
| self.values_builder.data_type(), |
| true, |
| ))) |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.len |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.len == 0 |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl<T: ArrayBuilder> LargeListBuilder<T> |
| where |
| T: 'static, |
| { |
| /// Returns the child array builder as a mutable reference. |
| /// |
| /// This mutable reference can be used to append values into the child array builder, |
| /// but you must call `append` to delimit each distinct list value. |
| pub fn values(&mut self) -> &mut T { |
| &mut self.values_builder |
| } |
| |
| /// Finish the current variable-length list array slot |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.offsets_builder |
| .append(self.values_builder.len() as i64)?; |
| self.bitmap_builder.append(is_valid)?; |
| self.len += 1; |
| Ok(()) |
| } |
| |
| /// Builds the `LargeListArray` and reset this builder. |
| pub fn finish(&mut self) -> LargeListArray { |
| let len = self.len(); |
| self.len = 0; |
| let values_arr = self |
| .values_builder |
| .as_any_mut() |
| .downcast_mut::<T>() |
| .unwrap() |
| .finish(); |
| let values_data = values_arr.data(); |
| |
| let offset_buffer = self.offsets_builder.finish(); |
| let null_bit_buffer = self.bitmap_builder.finish(); |
| let nulls = null_bit_buffer.count_set_bits(); |
| self.offsets_builder.append(0).unwrap(); |
| let data = ArrayData::builder(DataType::LargeList(Box::new( |
| NullableDataType::new(values_data.data_type().clone(), true), |
| ))) |
| .len(len) |
| .null_count(len - nulls) |
| .add_buffer(offset_buffer) |
| .add_child_data(values_data) |
| .null_bit_buffer(null_bit_buffer) |
| .build(); |
| |
| LargeListArray::from(data) |
| } |
| } |
| |
| /// Array builder for `ListArray` |
| #[derive(Debug)] |
| pub struct FixedSizeListBuilder<T: ArrayBuilder> { |
| bitmap_builder: BooleanBufferBuilder, |
| values_builder: T, |
| len: usize, |
| list_len: i32, |
| } |
| |
| impl<T: ArrayBuilder> FixedSizeListBuilder<T> { |
| /// Creates a new `FixedSizeListBuilder` from a given values array builder |
| /// `length` is the number of values within each array |
| pub fn new(values_builder: T, length: i32) -> Self { |
| let capacity = values_builder.len(); |
| Self::with_capacity(values_builder, length, capacity) |
| } |
| |
| /// Creates a new `FixedSizeListBuilder` from a given values array builder |
| /// `length` is the number of values within each array |
| /// `capacity` is the number of items to pre-allocate space for in this builder |
| pub fn with_capacity(values_builder: T, length: i32, capacity: usize) -> Self { |
| let mut offsets_builder = Int32BufferBuilder::new(capacity + 1); |
| offsets_builder.append(0).unwrap(); |
| Self { |
| bitmap_builder: BooleanBufferBuilder::new(capacity), |
| values_builder, |
| len: 0, |
| list_len: length, |
| } |
| } |
| } |
| |
| impl<T: ArrayBuilder> ArrayBuilder for FixedSizeListBuilder<T> |
| where |
| T: 'static, |
| { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| let mut total_len = 0; |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.child_data().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "FixedSizeList arrays should have 1 child_data element".to_string(), |
| )); |
| } |
| total_len += array.len(); |
| } |
| // reserve memory |
| self.bitmap_builder.reserve(total_len); |
| |
| // determine the latest offset on the builder |
| for array in data { |
| let len = array.len(); |
| if len == 0 { |
| continue; |
| } |
| let offset = array.offset(); |
| |
| // the offsets of the child array determine its length |
| let first_offset = self.list_len as usize * offset; |
| let offset_at_len = first_offset + len * self.list_len as usize; |
| // create the child array and offset it |
| let child_data = &array.child_data()[0]; |
| let child_array = make_array(child_data.clone()); |
| // slice the child array to account for offsets |
| let sliced = child_array.slice(first_offset, offset_at_len - first_offset); |
| self.values().append_data(&[sliced.data()])?; |
| for i in 0..len { |
| self.bitmap_builder.append(array.is_valid(i))?; |
| } |
| } |
| |
| // append array length |
| self.len += total_len; |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::FixedSizeList( |
| Box::new(NullableDataType::new(self.values_builder.data_type(), true)), |
| self.list_len, |
| ) |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.len |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.len == 0 |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl<T: ArrayBuilder> FixedSizeListBuilder<T> |
| where |
| T: 'static, |
| { |
| /// Returns the child array builder as a mutable reference. |
| /// |
| /// This mutable reference can be used to append values into the child array builder, |
| /// but you must call `append` to delimit each distinct list value. |
| pub fn values(&mut self) -> &mut T { |
| &mut self.values_builder |
| } |
| |
| pub fn value_length(&self) -> i32 { |
| self.list_len |
| } |
| |
| /// Finish the current variable-length list array slot |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.bitmap_builder.append(is_valid)?; |
| self.len += 1; |
| Ok(()) |
| } |
| |
| /// Builds the `FixedSizeListBuilder` and reset this builder. |
| pub fn finish(&mut self) -> FixedSizeListArray { |
| let len = self.len(); |
| self.len = 0; |
| let values_arr = self |
| .values_builder |
| .as_any_mut() |
| .downcast_mut::<T>() |
| .unwrap() |
| .finish(); |
| let values_data = values_arr.data(); |
| |
| // check that values_data length is multiple of len if we have data |
| if len != 0 { |
| assert!( |
| values_data.len() / len == self.list_len as usize, |
| "Values of FixedSizeList must have equal lengths, values have length {} and list has {}", |
| values_data.len() / len, |
| self.list_len |
| ); |
| } |
| |
| let null_bit_buffer = self.bitmap_builder.finish(); |
| let nulls = null_bit_buffer.count_set_bits(); |
| let data = ArrayData::builder(DataType::FixedSizeList( |
| Box::new(NullableDataType::new(values_data.data_type().clone(), true)), |
| self.list_len, |
| )) |
| .len(len) |
| .null_count(len - nulls) |
| .add_child_data(values_data) |
| .null_bit_buffer(null_bit_buffer) |
| .build(); |
| |
| FixedSizeListArray::from(data) |
| } |
| } |
| |
| /// Array builder for `BinaryArray` |
| #[derive(Debug)] |
| pub struct BinaryBuilder { |
| builder: ListBuilder<UInt8Builder>, |
| } |
| |
| #[derive(Debug)] |
| pub struct LargeBinaryBuilder { |
| builder: LargeListBuilder<UInt8Builder>, |
| } |
| |
| #[derive(Debug)] |
| pub struct StringBuilder { |
| builder: ListBuilder<UInt8Builder>, |
| } |
| |
| #[derive(Debug)] |
| pub struct LargeStringBuilder { |
| builder: LargeListBuilder<UInt8Builder>, |
| } |
| |
| #[derive(Debug)] |
| pub struct FixedSizeBinaryBuilder { |
| builder: FixedSizeListBuilder<UInt8Builder>, |
| } |
| |
| #[derive(Debug)] |
| pub struct DecimalBuilder { |
| builder: FixedSizeListBuilder<UInt8Builder>, |
| precision: usize, |
| scale: usize, |
| } |
| |
| impl ArrayBuilder for BinaryBuilder { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| append_binary_data(&mut self.builder, &DataType::Binary, data) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::Binary |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.builder.is_empty() |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl ArrayBuilder for LargeBinaryBuilder { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| append_large_binary_data(&mut self.builder, &DataType::LargeBinary, data) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::LargeBinary |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.builder.is_empty() |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl ArrayBuilder for StringBuilder { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| append_binary_data(&mut self.builder, &DataType::Utf8, data) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::Utf8 |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.builder.is_empty() |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| // Helper function for appending Binary and Utf8 data |
| fn append_binary_data( |
| builder: &mut ListBuilder<UInt8Builder>, |
| data_type: &DataType, |
| data: &[ArrayDataRef], |
| ) -> Result<()> { |
| // validate arraydata and reserve memory |
| for array in data { |
| if array.data_type() != data_type { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different".to_string(), |
| )); |
| } |
| if array.buffers().len() != 2 { |
| return Err(ArrowError::InvalidArgumentError( |
| "Binary/String arrays should have 2 buffers".to_string(), |
| )); |
| } |
| } |
| |
| builder.append_data( |
| &data |
| .iter() |
| .map(|array| { |
| // convert string to List<u8> to reuse list's cast |
| let int_data = &array.buffers()[1]; |
| let int_data = Arc::new(ArrayData::new( |
| DataType::UInt8, |
| int_data.len(), |
| None, |
| None, |
| 0, |
| vec![int_data.clone()], |
| vec![], |
| )) as ArrayDataRef; |
| |
| Arc::new(ArrayData::new( |
| DataType::List(Box::new(NullableDataType::new( |
| DataType::UInt8, |
| true, |
| ))), |
| array.len(), |
| None, |
| array.null_buffer().cloned(), |
| array.offset(), |
| vec![(&array.buffers()[0]).clone()], |
| vec![int_data], |
| )) |
| }) |
| .collect::<Vec<ArrayDataRef>>(), |
| )?; |
| |
| Ok(()) |
| } |
| |
| // Helper function for appending LargeBinary and LargeUtf8 data |
| fn append_large_binary_data( |
| builder: &mut LargeListBuilder<UInt8Builder>, |
| data_type: &DataType, |
| data: &[ArrayDataRef], |
| ) -> Result<()> { |
| // validate arraydata and reserve memory |
| for array in data { |
| if array.data_type() != data_type { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different".to_string(), |
| )); |
| } |
| if array.buffers().len() != 2 { |
| return Err(ArrowError::InvalidArgumentError( |
| "Binary/String arrays should have 2 buffers".to_string(), |
| )); |
| } |
| } |
| |
| builder.append_data( |
| &data |
| .iter() |
| .map(|array| { |
| // convert string to List<u8> to reuse list's cast |
| let int_data = &array.buffers()[1]; |
| let int_data = Arc::new(ArrayData::new( |
| DataType::UInt8, |
| int_data.len(), |
| None, |
| None, |
| 0, |
| vec![int_data.clone()], |
| vec![], |
| )) as ArrayDataRef; |
| |
| Arc::new(ArrayData::new( |
| DataType::LargeList(Box::new(NullableDataType::new( |
| DataType::UInt8, |
| true, |
| ))), |
| array.len(), |
| None, |
| array.null_buffer().cloned(), |
| array.offset(), |
| vec![(&array.buffers()[0]).clone()], |
| vec![int_data], |
| )) |
| }) |
| .collect::<Vec<ArrayDataRef>>(), |
| )?; |
| |
| Ok(()) |
| } |
| |
| impl ArrayBuilder for LargeStringBuilder { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| append_large_binary_data(&mut self.builder, &DataType::LargeUtf8, data) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::LargeUtf8 |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.builder.is_empty() |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl ArrayBuilder for FixedSizeBinaryBuilder { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.buffers().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "FixedSizeBinary arrays should have 1 buffer".to_string(), |
| )); |
| } |
| } |
| for array in data { |
| // convert string to FixedSizeList<u8> to reuse list's append |
| let int_data = &array.buffers()[0]; |
| let int_data = Arc::new(ArrayData::new( |
| DataType::UInt8, |
| int_data.len(), |
| None, |
| None, |
| 0, |
| vec![int_data.clone()], |
| vec![], |
| )) as ArrayDataRef; |
| let list_data = Arc::new(ArrayData::new( |
| DataType::FixedSizeList( |
| Box::new(NullableDataType::new(DataType::UInt8, true)), |
| self.builder.list_len, |
| ), |
| array.len(), |
| None, |
| array.null_buffer().cloned(), |
| array.offset(), |
| vec![], |
| vec![int_data], |
| )); |
| self.builder.append_data(&[list_data])?; |
| } |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::FixedSizeBinary(self.builder.list_len) |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.builder.is_empty() |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl ArrayBuilder for DecimalBuilder { |
| /// Returns the builder as a non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.buffers().len() != 1 { |
| return Err(ArrowError::InvalidArgumentError( |
| "Decimal arrays should have 1 buffer".to_string(), |
| )); |
| } |
| } |
| for array in data { |
| // convert string to FixedSizeList<u8> to reuse list's append |
| let int_data = &array.buffers()[0]; |
| let int_data = Arc::new(ArrayData::new( |
| DataType::UInt8, |
| int_data.len(), |
| None, |
| None, |
| 0, |
| vec![int_data.clone()], |
| vec![], |
| )) as ArrayDataRef; |
| let list_data = Arc::new(ArrayData::new( |
| DataType::FixedSizeList( |
| Box::new(NullableDataType::new(DataType::UInt8, true)), |
| self.builder.list_len, |
| ), |
| array.len(), |
| None, |
| array.null_buffer().cloned(), |
| array.offset(), |
| vec![], |
| vec![int_data], |
| )); |
| self.builder.append_data(&[list_data])?; |
| } |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::Decimal(self.precision, self.scale) |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.builder.is_empty() |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl BinaryBuilder { |
| /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values |
| /// array |
| pub fn new(capacity: usize) -> Self { |
| let values_builder = UInt8Builder::new(capacity); |
| Self { |
| builder: ListBuilder::new(values_builder), |
| } |
| } |
| |
| /// Appends a single byte value into the builder's values array. |
| /// |
| /// Note, when appending individual byte values you must call `append` to delimit each |
| /// distinct list value. |
| pub fn append_byte(&mut self, value: u8) -> Result<()> { |
| self.builder.values().append_value(value)?; |
| Ok(()) |
| } |
| |
| /// Appends a byte slice into the builder. |
| /// |
| /// Automatically calls the `append` method to delimit the slice appended in as a |
| /// distinct array element. |
| pub fn append_value(&mut self, value: &[u8]) -> Result<()> { |
| self.builder.values().append_slice(value)?; |
| self.builder.append(true)?; |
| Ok(()) |
| } |
| |
| /// Finish the current variable-length list array slot. |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.builder.append(is_valid) |
| } |
| |
| /// Append a null value to the array. |
| pub fn append_null(&mut self) -> Result<()> { |
| self.append(false) |
| } |
| |
| /// Builds the `BinaryArray` and reset this builder. |
| pub fn finish(&mut self) -> BinaryArray { |
| BinaryArray::from(self.builder.finish()) |
| } |
| } |
| |
| impl LargeBinaryBuilder { |
| /// Creates a new `LargeBinaryBuilder`, `capacity` is the number of bytes in the values |
| /// array |
| pub fn new(capacity: usize) -> Self { |
| let values_builder = UInt8Builder::new(capacity); |
| Self { |
| builder: LargeListBuilder::new(values_builder), |
| } |
| } |
| |
| /// Appends a single byte value into the builder's values array. |
| /// |
| /// Note, when appending individual byte values you must call `append` to delimit each |
| /// distinct list value. |
| pub fn append_byte(&mut self, value: u8) -> Result<()> { |
| self.builder.values().append_value(value)?; |
| Ok(()) |
| } |
| |
| /// Appends a byte slice into the builder. |
| /// |
| /// Automatically calls the `append` method to delimit the slice appended in as a |
| /// distinct array element. |
| pub fn append_value(&mut self, value: &[u8]) -> Result<()> { |
| self.builder.values().append_slice(value)?; |
| self.builder.append(true)?; |
| Ok(()) |
| } |
| |
| /// Finish the current variable-length list array slot. |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.builder.append(is_valid) |
| } |
| |
| /// Append a null value to the array. |
| pub fn append_null(&mut self) -> Result<()> { |
| self.append(false) |
| } |
| |
| /// Builds the `LargeBinaryArray` and reset this builder. |
| pub fn finish(&mut self) -> LargeBinaryArray { |
| LargeBinaryArray::from(self.builder.finish()) |
| } |
| } |
| |
| impl StringBuilder { |
| /// Creates a new `StringBuilder`, |
| /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder |
| pub fn new(capacity: usize) -> Self { |
| let values_builder = UInt8Builder::new(capacity); |
| Self { |
| builder: ListBuilder::new(values_builder), |
| } |
| } |
| |
| /// Creates a new `StringBuilder`, |
| /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder |
| /// `item_capacity` is the number of items to pre-allocate space for in this builder |
| pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { |
| let values_builder = UInt8Builder::new(data_capacity); |
| Self { |
| builder: ListBuilder::with_capacity(values_builder, item_capacity), |
| } |
| } |
| |
| /// Appends a string into the builder. |
| /// |
| /// Automatically calls the `append` method to delimit the string appended in as a |
| /// distinct array element. |
| pub fn append_value(&mut self, value: &str) -> Result<()> { |
| self.builder.values().append_slice(value.as_bytes())?; |
| self.builder.append(true)?; |
| Ok(()) |
| } |
| |
| /// Finish the current variable-length list array slot. |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.builder.append(is_valid) |
| } |
| |
| /// Append a null value to the array. |
| pub fn append_null(&mut self) -> Result<()> { |
| self.append(false) |
| } |
| |
| /// Builds the `StringArray` and reset this builder. |
| pub fn finish(&mut self) -> StringArray { |
| StringArray::from(self.builder.finish()) |
| } |
| } |
| |
| impl LargeStringBuilder { |
| /// Creates a new `StringBuilder`, |
| /// `capacity` is the number of bytes of string data to pre-allocate space for in this builder |
| pub fn new(capacity: usize) -> Self { |
| let values_builder = UInt8Builder::new(capacity); |
| Self { |
| builder: LargeListBuilder::new(values_builder), |
| } |
| } |
| |
| /// Creates a new `StringBuilder`, |
| /// `data_capacity` is the number of bytes of string data to pre-allocate space for in this builder |
| /// `item_capacity` is the number of items to pre-allocate space for in this builder |
| pub fn with_capacity(item_capacity: usize, data_capacity: usize) -> Self { |
| let values_builder = UInt8Builder::new(data_capacity); |
| Self { |
| builder: LargeListBuilder::with_capacity(values_builder, item_capacity), |
| } |
| } |
| |
| /// Appends a string into the builder. |
| /// |
| /// Automatically calls the `append` method to delimit the string appended in as a |
| /// distinct array element. |
| pub fn append_value(&mut self, value: &str) -> Result<()> { |
| self.builder.values().append_slice(value.as_bytes())?; |
| self.builder.append(true)?; |
| Ok(()) |
| } |
| |
| /// Finish the current variable-length list array slot. |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.builder.append(is_valid) |
| } |
| |
| /// Append a null value to the array. |
| pub fn append_null(&mut self) -> Result<()> { |
| self.append(false) |
| } |
| |
| /// Builds the `LargeStringArray` and reset this builder. |
| pub fn finish(&mut self) -> LargeStringArray { |
| LargeStringArray::from(self.builder.finish()) |
| } |
| } |
| |
| impl FixedSizeBinaryBuilder { |
| /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values |
| /// array |
| pub fn new(capacity: usize, byte_width: i32) -> Self { |
| let values_builder = UInt8Builder::new(capacity); |
| Self { |
| builder: FixedSizeListBuilder::new(values_builder, byte_width), |
| } |
| } |
| |
| /// Appends a byte slice into the builder. |
| /// |
| /// Automatically calls the `append` method to delimit the slice appended in as a |
| /// distinct array element. |
| pub fn append_value(&mut self, value: &[u8]) -> Result<()> { |
| if self.builder.value_length() != value.len() as i32 { |
| return Err(ArrowError::InvalidArgumentError( |
| "Byte slice does not have the same length as FixedSizeBinaryBuilder value lengths".to_string() |
| )); |
| } |
| self.builder.values().append_slice(value)?; |
| self.builder.append(true) |
| } |
| |
| /// Append a null value to the array. |
| pub fn append_null(&mut self) -> Result<()> { |
| let length: usize = self.builder.value_length() as usize; |
| self.builder.values().append_slice(&vec![0u8; length][..])?; |
| self.builder.append(false) |
| } |
| |
| /// Builds the `FixedSizeBinaryArray` and reset this builder. |
| pub fn finish(&mut self) -> FixedSizeBinaryArray { |
| FixedSizeBinaryArray::from(self.builder.finish()) |
| } |
| } |
| |
| impl DecimalBuilder { |
| /// Creates a new `BinaryBuilder`, `capacity` is the number of bytes in the values |
| /// array |
| pub fn new(capacity: usize, precision: usize, scale: usize) -> Self { |
| let values_builder = UInt8Builder::new(capacity); |
| let byte_width = DecimalArray::calc_fixed_byte_size(precision); |
| Self { |
| builder: FixedSizeListBuilder::new(values_builder, byte_width), |
| precision, |
| scale, |
| } |
| } |
| |
| /// Appends a byte slice into the builder. |
| /// |
| /// Automatically calls the `append` method to delimit the slice appended in as a |
| /// distinct array element. |
| pub fn append_value(&mut self, value: i128) -> Result<()> { |
| let value_as_bytes = Self::from_i128_to_fixed_size_bytes( |
| value, |
| self.builder.value_length() as usize, |
| )?; |
| if self.builder.value_length() != value_as_bytes.len() as i32 { |
| return Err(ArrowError::InvalidArgumentError( |
| "Byte slice does not have the same length as DecimalBuilder value lengths".to_string() |
| )); |
| } |
| self.builder |
| .values() |
| .append_slice(value_as_bytes.as_slice())?; |
| self.builder.append(true) |
| } |
| |
| fn from_i128_to_fixed_size_bytes(v: i128, size: usize) -> Result<Vec<u8>> { |
| if size > 16 { |
| return Err(ArrowError::InvalidArgumentError( |
| "DecimalBuilder only supports values up to 16 bytes.".to_string(), |
| )); |
| } |
| let res = v.to_be_bytes(); |
| let start_byte = 16 - size; |
| Ok(res[start_byte..16].to_vec()) |
| } |
| |
| /// Append a null value to the array. |
| pub fn append_null(&mut self) -> Result<()> { |
| let length: usize = self.builder.value_length() as usize; |
| self.builder.values().append_slice(&vec![0u8; length][..])?; |
| self.builder.append(false) |
| } |
| |
| /// Builds the `DecimalArray` and reset this builder. |
| pub fn finish(&mut self) -> DecimalArray { |
| DecimalArray::from_fixed_size_list_array( |
| self.builder.finish(), |
| self.precision, |
| self.scale, |
| ) |
| } |
| } |
| |
| /// Array builder for Struct types. |
| /// |
| /// Note that callers should make sure that methods of all the child field builders are |
| /// properly called to maintain the consistency of the data structure. |
| pub struct StructBuilder { |
| fields: Vec<Field>, |
| field_anys: Vec<Box<Any>>, |
| field_builders: Vec<Box<ArrayBuilder>>, |
| bitmap_builder: BooleanBufferBuilder, |
| len: usize, |
| } |
| |
| impl fmt::Debug for StructBuilder { |
| fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
| f.debug_struct("StructBuilder") |
| .field("fields", &self.fields) |
| .field("field_anys", &self.field_anys) |
| .field("bitmap_builder", &self.bitmap_builder) |
| .field("len", &self.len) |
| .finish() |
| } |
| } |
| |
| impl ArrayBuilder for StructBuilder { |
| /// Returns the number of array slots in the builder. |
| /// |
| /// Note that this always return the first child field builder's length, and it is |
| /// the caller's responsibility to maintain the consistency that all the child field |
| /// builder should have the equal number of elements. |
| fn len(&self) -> usize { |
| self.len |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.len == 0 |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, data: &[ArrayDataRef]) -> Result<()> { |
| // validate arraydata and reserve memory |
| let mut total_len = 0; |
| for array in data { |
| if array.data_type() != &self.data_type() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Cannot append data to builder if data types are different" |
| .to_string(), |
| )); |
| } |
| if array.child_data().len() != self.num_fields() { |
| return Err(ArrowError::InvalidArgumentError( |
| "Struct should have the same child_data length as fields".to_string(), |
| )); |
| } |
| total_len += array.len(); |
| } |
| self.bitmap_builder.reserve(total_len); |
| |
| for array in data { |
| let len = array.len(); |
| if len == 0 { |
| continue; |
| } |
| let offset = array.offset(); |
| for (builder, child_data) in self |
| .field_builders |
| .iter_mut() |
| .zip(array.child_data().iter()) |
| { |
| // slice child_data to account for offsets |
| let child_array = make_array(child_data.clone()); |
| let sliced = child_array.slice(offset, len); |
| builder.append_data(&[sliced.data()])?; |
| } |
| for i in 0..len { |
| self.bitmap_builder.append(array.is_valid(i))?; |
| } |
| } |
| |
| self.len += total_len; |
| Ok(()) |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::Struct(self.fields.clone()) |
| } |
| |
| /// Builds the array. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| |
| /// Returns the builder as a non-mutable `Any` reference. |
| /// |
| /// This is most useful when one wants to call non-mutable APIs on a specific builder |
| /// type. In this case, one can first cast this into a `Any`, and then use |
| /// `downcast_ref` to get a reference on the specific builder. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Returns the builder as a mutable `Any` reference. |
| /// |
| /// This is most useful when one wants to call mutable APIs on a specific builder |
| /// type. In this case, one can first cast this into a `Any`, and then use |
| /// `downcast_mut` to get a reference on the specific builder. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| } |
| |
| /// Returns a builder with capacity `capacity` that corresponds to the datatype `DataType` |
| /// This function is useful to construct arrays from an arbitrary vectors with known/expected |
| /// schema. |
| pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<ArrayBuilder> { |
| match datatype { |
| DataType::Null => unimplemented!(), |
| DataType::Boolean => Box::new(BooleanBuilder::new(capacity)), |
| DataType::Int8 => Box::new(Int8Builder::new(capacity)), |
| DataType::Int16 => Box::new(Int16Builder::new(capacity)), |
| DataType::Int32 => Box::new(Int32Builder::new(capacity)), |
| DataType::Int64 => Box::new(Int64Builder::new(capacity)), |
| DataType::UInt8 => Box::new(UInt8Builder::new(capacity)), |
| DataType::UInt16 => Box::new(UInt16Builder::new(capacity)), |
| DataType::UInt32 => Box::new(UInt32Builder::new(capacity)), |
| DataType::UInt64 => Box::new(UInt64Builder::new(capacity)), |
| DataType::Float32 => Box::new(Float32Builder::new(capacity)), |
| DataType::Float64 => Box::new(Float64Builder::new(capacity)), |
| DataType::Binary => Box::new(BinaryBuilder::new(capacity)), |
| DataType::FixedSizeBinary(len) => { |
| Box::new(FixedSizeBinaryBuilder::new(capacity, *len)) |
| } |
| DataType::Decimal(precision, scale) => { |
| Box::new(DecimalBuilder::new(capacity, *precision, *scale)) |
| } |
| DataType::Utf8 => Box::new(StringBuilder::new(capacity)), |
| DataType::Date32(DateUnit::Day) => Box::new(Date32Builder::new(capacity)), |
| DataType::Date64(DateUnit::Millisecond) => Box::new(Date64Builder::new(capacity)), |
| DataType::Time32(TimeUnit::Second) => { |
| Box::new(Time32SecondBuilder::new(capacity)) |
| } |
| DataType::Time32(TimeUnit::Millisecond) => { |
| Box::new(Time32MillisecondBuilder::new(capacity)) |
| } |
| DataType::Time64(TimeUnit::Microsecond) => { |
| Box::new(Time64MicrosecondBuilder::new(capacity)) |
| } |
| DataType::Time64(TimeUnit::Nanosecond) => { |
| Box::new(Time64NanosecondBuilder::new(capacity)) |
| } |
| DataType::Timestamp(TimeUnit::Second, _) => { |
| Box::new(TimestampSecondBuilder::new(capacity)) |
| } |
| DataType::Timestamp(TimeUnit::Millisecond, _) => { |
| Box::new(TimestampMillisecondBuilder::new(capacity)) |
| } |
| DataType::Timestamp(TimeUnit::Microsecond, _) => { |
| Box::new(TimestampMicrosecondBuilder::new(capacity)) |
| } |
| DataType::Timestamp(TimeUnit::Nanosecond, _) => { |
| Box::new(TimestampNanosecondBuilder::new(capacity)) |
| } |
| DataType::Interval(IntervalUnit::YearMonth) => { |
| Box::new(IntervalYearMonthBuilder::new(capacity)) |
| } |
| DataType::Interval(IntervalUnit::DayTime) => { |
| Box::new(IntervalDayTimeBuilder::new(capacity)) |
| } |
| DataType::Duration(TimeUnit::Second) => { |
| Box::new(DurationSecondBuilder::new(capacity)) |
| } |
| DataType::Duration(TimeUnit::Millisecond) => { |
| Box::new(DurationMillisecondBuilder::new(capacity)) |
| } |
| DataType::Duration(TimeUnit::Microsecond) => { |
| Box::new(DurationMicrosecondBuilder::new(capacity)) |
| } |
| DataType::Duration(TimeUnit::Nanosecond) => { |
| Box::new(DurationNanosecondBuilder::new(capacity)) |
| } |
| DataType::Struct(fields) => { |
| Box::new(StructBuilder::from_fields(fields.clone(), capacity)) |
| } |
| t => panic!("Data type {:?} is not currently supported", t), |
| } |
| } |
| |
| impl StructBuilder { |
| pub fn new(fields: Vec<Field>, builders: Vec<Box<ArrayBuilder>>) -> Self { |
| let mut field_anys = Vec::with_capacity(builders.len()); |
| let mut field_builders = Vec::with_capacity(builders.len()); |
| |
| // Create and maintain two references for each of the input builder. We need the |
| // extra `Any` reference because we need to cast the builder to a specific type |
| // in `field_builder()` by calling `downcast_mut`. |
| for f in builders.into_iter() { |
| let raw_f = Box::into_raw(f); |
| let raw_f_copy = raw_f; |
| unsafe { |
| field_anys.push(Box::from_raw(raw_f).into_box_any()); |
| field_builders.push(Box::from_raw(raw_f_copy)); |
| } |
| } |
| |
| Self { |
| fields, |
| field_anys, |
| field_builders, |
| bitmap_builder: BooleanBufferBuilder::new(0), |
| len: 0, |
| } |
| } |
| |
| pub fn from_fields(fields: Vec<Field>, capacity: usize) -> Self { |
| let mut builders = Vec::with_capacity(fields.len()); |
| for field in &fields { |
| builders.push(make_builder(field.data_type(), capacity)); |
| } |
| Self::new(fields, builders) |
| } |
| |
| /// Returns a mutable reference to the child field builder at index `i`. |
| /// Result will be `None` if the input type `T` provided doesn't match the actual |
| /// field builder's type. |
| pub fn field_builder<T: ArrayBuilder>(&mut self, i: usize) -> Option<&mut T> { |
| self.field_anys[i].downcast_mut::<T>() |
| } |
| |
| /// Returns the number of fields for the struct this builder is building. |
| pub fn num_fields(&self) -> usize { |
| self.field_builders.len() |
| } |
| |
| /// Appends an element (either null or non-null) to the struct. The actual elements |
| /// should be appended for each child sub-array in a consistent way. |
| pub fn append(&mut self, is_valid: bool) -> Result<()> { |
| self.bitmap_builder.append(is_valid)?; |
| self.len += 1; |
| Ok(()) |
| } |
| |
| /// Appends a null element to the struct. |
| pub fn append_null(&mut self) -> Result<()> { |
| self.append(false) |
| } |
| |
| /// Builds the `StructArray` and reset this builder. |
| pub fn finish(&mut self) -> StructArray { |
| let mut child_data = Vec::with_capacity(self.field_builders.len()); |
| for f in &mut self.field_builders { |
| let arr = f.finish(); |
| child_data.push(arr.data()); |
| } |
| |
| let null_bit_buffer = self.bitmap_builder.finish(); |
| let null_count = self.len - null_bit_buffer.count_set_bits(); |
| let mut builder = ArrayData::builder(DataType::Struct(self.fields.clone())) |
| .len(self.len) |
| .child_data(child_data); |
| if null_count > 0 { |
| builder = builder |
| .null_count(null_count) |
| .null_bit_buffer(null_bit_buffer); |
| } |
| |
| self.len = 0; |
| |
| StructArray::from(builder.build()) |
| } |
| } |
| |
| impl Drop for StructBuilder { |
| fn drop(&mut self) { |
| // To avoid double drop on the field array builders. |
| let builders = std::mem::replace(&mut self.field_builders, Vec::new()); |
| std::mem::forget(builders); |
| } |
| } |
| |
| /// `FieldData` is a helper struct to track the state of the fields in the `UnionBuilder`. |
| #[derive(Debug)] |
| struct FieldData { |
| /// The type id for this field |
| type_id: i8, |
| /// The Arrow data type represented in the `values_buffer`, which is untyped |
| data_type: DataType, |
| /// A buffer containing the values for this field in raw bytes |
| values_buffer: Option<MutableBuffer>, |
| /// The number of array slots represented by the buffer |
| slots: usize, |
| /// The number of null array slots in this child array |
| null_count: usize, |
| /// A builder for the bitmap if required (for Sparse Unions) |
| bitmap_builder: Option<BooleanBufferBuilder>, |
| } |
| |
| impl FieldData { |
| /// Creates a new `FieldData`. |
| fn new( |
| type_id: i8, |
| data_type: DataType, |
| bitmap_builder: Option<BooleanBufferBuilder>, |
| ) -> Self { |
| Self { |
| type_id, |
| data_type, |
| values_buffer: Some(MutableBuffer::new(1)), |
| slots: 0, |
| null_count: 0, |
| bitmap_builder, |
| } |
| } |
| |
| /// Appends a single value to this `FieldData`'s `values_buffer`. |
| fn append_to_values_buffer<T: ArrowPrimitiveType>( |
| &mut self, |
| v: T::Native, |
| ) -> Result<()> { |
| let values_buffer = self |
| .values_buffer |
| .take() |
| .expect("Values buffer was never created"); |
| let mut builder: BufferBuilder<T> = |
| mutable_buffer_to_builder(values_buffer, self.slots); |
| builder.append(v)?; |
| let mutable_buffer = builder_to_mutable_buffer(builder); |
| self.values_buffer = Some(mutable_buffer); |
| |
| self.slots += 1; |
| if let Some(b) = &mut self.bitmap_builder { |
| b.append(true)? |
| }; |
| Ok(()) |
| } |
| |
| /// Appends a null to this `FieldData`. |
| fn append_null<T: ArrowPrimitiveType>(&mut self) -> Result<()> { |
| if let Some(b) = &mut self.bitmap_builder { |
| let values_buffer = self |
| .values_buffer |
| .take() |
| .expect("Values buffer was never created"); |
| let mut builder: BufferBuilder<T> = |
| mutable_buffer_to_builder(values_buffer, self.slots); |
| builder.advance(1)?; |
| let mutable_buffer = builder_to_mutable_buffer(builder); |
| self.values_buffer = Some(mutable_buffer); |
| self.slots += 1; |
| self.null_count += 1; |
| b.append(false)?; |
| }; |
| Ok(()) |
| } |
| |
| /// Appends a null to this `FieldData` when the type is not known at compile time. |
| /// |
| /// As the main `append` method of `UnionBuilder` is generic, we need a way to append null |
| /// slots to the fields that are not being appended to in the case of sparse unions. This |
| /// method solves this problem by appending dynamically based on `DataType`. |
| /// |
| /// Note, this method does **not** update the length of the `UnionArray` (this is done by the |
| /// main append operation) and assumes that it is called from a method that is generic over `T` |
| /// where `T` satisfies the bound `ArrowPrimitiveType`. |
| fn append_null_dynamic(&mut self) -> Result<()> { |
| match self.data_type { |
| DataType::Null => unimplemented!(), |
| DataType::Boolean => self.append_null::<BooleanType>()?, |
| DataType::Int8 => self.append_null::<Int8Type>()?, |
| DataType::Int16 => self.append_null::<Int16Type>()?, |
| DataType::Int32 |
| | DataType::Date32(_) |
| | DataType::Time32(_) |
| | DataType::Interval(IntervalUnit::YearMonth) => { |
| self.append_null::<Int32Type>()? |
| } |
| DataType::Int64 |
| | DataType::Timestamp(_, _) |
| | DataType::Date64(_) |
| | DataType::Time64(_) |
| | DataType::Interval(IntervalUnit::DayTime) |
| | DataType::Duration(_) => self.append_null::<Int64Type>()?, |
| DataType::UInt8 => self.append_null::<UInt8Type>()?, |
| DataType::UInt16 => self.append_null::<UInt16Type>()?, |
| DataType::UInt32 => self.append_null::<UInt32Type>()?, |
| DataType::UInt64 => self.append_null::<UInt64Type>()?, |
| DataType::Float32 => self.append_null::<Float32Type>()?, |
| DataType::Float64 => self.append_null::<Float64Type>()?, |
| _ => unreachable!("All cases of types that satisfy the trait bounds over T are covered above."), |
| }; |
| Ok(()) |
| } |
| } |
| |
| /// Builder type for creating a new `UnionArray`. |
| #[derive(Debug)] |
| pub struct UnionBuilder { |
| /// The current number of slots in the array |
| len: usize, |
| /// Maps field names to `FieldData` instances which track the builders for that field |
| fields: HashMap<String, FieldData>, |
| /// Builder to keep track of type ids |
| type_id_builder: Int8BufferBuilder, |
| /// Builder to keep track of offsets (`None` for sparse unions) |
| value_offset_builder: Option<Int32BufferBuilder>, |
| /// Optional builder for null slots |
| bitmap_builder: Option<BooleanBufferBuilder>, |
| } |
| |
| impl UnionBuilder { |
| /// Creates a new dense array builder. |
| pub fn new_dense(capacity: usize) -> Self { |
| Self { |
| len: 0, |
| fields: HashMap::default(), |
| type_id_builder: Int8BufferBuilder::new(capacity), |
| value_offset_builder: Some(Int32BufferBuilder::new(capacity)), |
| bitmap_builder: None, |
| } |
| } |
| |
| /// Creates a new sparse array builder. |
| pub fn new_sparse(capacity: usize) -> Self { |
| Self { |
| len: 0, |
| fields: HashMap::default(), |
| type_id_builder: Int8BufferBuilder::new(capacity), |
| value_offset_builder: None, |
| bitmap_builder: None, |
| } |
| } |
| |
| /// Appends a null to this builder. |
| pub fn append_null(&mut self) -> Result<()> { |
| if self.bitmap_builder.is_none() { |
| let mut builder = BooleanBufferBuilder::new(self.len + 1); |
| for _ in 0..self.len { |
| builder.append(true)?; |
| } |
| self.bitmap_builder = Some(builder) |
| } |
| self.bitmap_builder |
| .as_mut() |
| .expect("Cannot be None") |
| .append(false)?; |
| |
| self.type_id_builder.append(i8::default())?; |
| |
| // Handle sparse union |
| if self.value_offset_builder.is_none() { |
| for (_, fd) in self.fields.iter_mut() { |
| fd.append_null_dynamic()?; |
| } |
| } |
| self.len += 1; |
| Ok(()) |
| } |
| |
| /// Appends a value to this builder. |
| pub fn append<T: ArrowPrimitiveType>( |
| &mut self, |
| type_name: &str, |
| v: T::Native, |
| ) -> Result<()> { |
| let type_name = type_name.to_string(); |
| |
| let mut field_data = match self.fields.remove(&type_name) { |
| Some(data) => data, |
| None => match self.value_offset_builder { |
| Some(_) => FieldData::new(self.fields.len() as i8, T::DATA_TYPE, None), |
| None => { |
| let mut fd = FieldData::new( |
| self.fields.len() as i8, |
| T::DATA_TYPE, |
| Some(BooleanBufferBuilder::new(1)), |
| ); |
| for _ in 0..self.len { |
| fd.append_null::<T>()?; |
| } |
| fd |
| } |
| }, |
| }; |
| self.type_id_builder.append(field_data.type_id)?; |
| |
| match &mut self.value_offset_builder { |
| // Dense Union |
| Some(offset_builder) => { |
| offset_builder.append(field_data.slots as i32)?; |
| } |
| // Sparse Union |
| None => { |
| for (name, fd) in self.fields.iter_mut() { |
| if name != &type_name { |
| fd.append_null_dynamic()?; |
| } |
| } |
| } |
| } |
| field_data.append_to_values_buffer::<T>(v)?; |
| self.fields.insert(type_name, field_data); |
| |
| // Update the bitmap builder if it exists |
| if let Some(b) = &mut self.bitmap_builder { |
| b.append(true)?; |
| } |
| self.len += 1; |
| Ok(()) |
| } |
| |
| /// Builds this builder creating a new `UnionArray`. |
| pub fn build(mut self) -> Result<UnionArray> { |
| let type_id_buffer = self.type_id_builder.finish(); |
| let value_offsets_buffer = self.value_offset_builder.map(|mut b| b.finish()); |
| let mut children = Vec::new(); |
| for ( |
| name, |
| FieldData { |
| type_id, |
| data_type, |
| values_buffer, |
| slots, |
| bitmap_builder, |
| null_count, |
| }, |
| ) in self.fields.into_iter() |
| { |
| let buffer = values_buffer |
| .expect("The `values_buffer` should only ever be None inside the `append` method.") |
| .freeze(); |
| let arr_data_builder = ArrayDataBuilder::new(data_type.clone()) |
| .add_buffer(buffer) |
| .null_count(null_count) |
| .len(slots); |
| // .build(); |
| let arr_data_ref = match bitmap_builder { |
| Some(mut bb) => arr_data_builder.null_bit_buffer(bb.finish()).build(), |
| None => arr_data_builder.build(), |
| }; |
| let array_ref = make_array(arr_data_ref); |
| children.push((type_id, (Field::new(&name, data_type, false), array_ref))) |
| } |
| |
| children.sort_by(|a, b| { |
| a.0.partial_cmp(&b.0) |
| .expect("This will never be None as type ids are always i8 values.") |
| }); |
| let children: Vec<_> = children.into_iter().map(|(_, b)| b).collect(); |
| let bitmap = self.bitmap_builder.map(|mut b| b.finish()); |
| |
| UnionArray::try_new(type_id_buffer, value_offsets_buffer, children, bitmap) |
| } |
| } |
| |
| /// Array builder for `DictionaryArray`. For example to map a set of byte indices |
| /// to f32 values. Note that the use of a `HashMap` here will not scale to very large |
| /// arrays or result in an ordered dictionary. |
| #[derive(Debug)] |
| pub struct PrimitiveDictionaryBuilder<K, V> |
| where |
| K: ArrowPrimitiveType, |
| V: ArrowPrimitiveType, |
| { |
| keys_builder: PrimitiveBuilder<K>, |
| values_builder: PrimitiveBuilder<V>, |
| map: HashMap<Box<[u8]>, K::Native>, |
| } |
| |
| impl<K, V> PrimitiveDictionaryBuilder<K, V> |
| where |
| K: ArrowPrimitiveType, |
| V: ArrowPrimitiveType, |
| { |
| /// Creates a new `PrimitiveDictionaryBuilder` from a keys builder and a value builder. |
| pub fn new( |
| keys_builder: PrimitiveBuilder<K>, |
| values_builder: PrimitiveBuilder<V>, |
| ) -> Self { |
| Self { |
| keys_builder, |
| values_builder, |
| map: HashMap::new(), |
| } |
| } |
| } |
| |
| impl<K, V> ArrayBuilder for PrimitiveDictionaryBuilder<K, V> |
| where |
| K: ArrowPrimitiveType, |
| V: ArrowPrimitiveType, |
| { |
| /// Returns the builder as an non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Returns the builder as an mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.keys_builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.keys_builder.is_empty() |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, _data: &[ArrayDataRef]) -> Result<()> { |
| // TODO: This will require an implementation that doesn't just append keys |
| unimplemented!("Appending data for dictionary arrays not yet implemented") |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(V::DATA_TYPE)) |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl<K, V> PrimitiveDictionaryBuilder<K, V> |
| where |
| K: ArrowPrimitiveType, |
| V: ArrowPrimitiveType, |
| { |
| /// Append a primitive value to the array. Return an existing index |
| /// if already present in the values array or a new index if the |
| /// value is appended to the values array. |
| pub fn append(&mut self, value: V::Native) -> Result<K::Native> { |
| if let Some(&key) = self.map.get(value.to_byte_slice()) { |
| // Append existing value. |
| self.keys_builder.append_value(key)?; |
| Ok(key) |
| } else { |
| // Append new value. |
| let key = K::Native::from_usize(self.values_builder.len()) |
| .ok_or(ArrowError::DictionaryKeyOverflowError)?; |
| self.values_builder.append_value(value)?; |
| self.keys_builder.append_value(key as K::Native)?; |
| self.map.insert(value.to_byte_slice().into(), key); |
| Ok(key) |
| } |
| } |
| |
| pub fn append_null(&mut self) -> Result<()> { |
| self.keys_builder.append_null() |
| } |
| |
| /// Builds the `DictionaryArray` and reset this builder. |
| pub fn finish(&mut self) -> DictionaryArray<K> { |
| self.map.clear(); |
| let value_ref: ArrayRef = Arc::new(self.values_builder.finish()); |
| self.keys_builder.finish_dict(value_ref) |
| } |
| } |
| |
| /// Array builder for `DictionaryArray`. For example to map a set of byte indices |
| /// to f32 values. Note that the use of a `HashMap` here will not scale to very large |
| /// arrays or result in an ordered dictionary. |
| #[derive(Debug)] |
| pub struct StringDictionaryBuilder<K> |
| where |
| K: ArrowDictionaryKeyType, |
| { |
| keys_builder: PrimitiveBuilder<K>, |
| values_builder: StringBuilder, |
| map: HashMap<Box<[u8]>, K::Native>, |
| } |
| |
| impl<K> StringDictionaryBuilder<K> |
| where |
| K: ArrowDictionaryKeyType, |
| { |
| /// Creates a new `StringDictionaryBuilder` from a keys builder and a value builder. |
| pub fn new(keys_builder: PrimitiveBuilder<K>, values_builder: StringBuilder) -> Self { |
| Self { |
| keys_builder, |
| values_builder, |
| map: HashMap::new(), |
| } |
| } |
| |
| /// Creates a new `StringDictionaryBuilder` from a keys builder and a dictionary |
| /// which is initialized with the given values. |
| /// The indices of those dictionary values are used as keys. |
| /// |
| /// # Example |
| /// |
| /// ``` |
| /// use arrow::datatypes::Int16Type; |
| /// use arrow::array::{StringArray, StringDictionaryBuilder, PrimitiveBuilder, Int16Array}; |
| /// use std::convert::TryFrom; |
| /// |
| /// let dictionary_values = StringArray::from(vec![None, Some("abc"), Some("def")]); |
| /// |
| /// let mut builder = StringDictionaryBuilder::new_with_dictionary(PrimitiveBuilder::<Int16Type>::new(3), &dictionary_values).unwrap(); |
| /// builder.append("def").unwrap(); |
| /// builder.append_null().unwrap(); |
| /// builder.append("abc").unwrap(); |
| /// |
| /// let dictionary_array = builder.finish(); |
| /// |
| /// let keys = dictionary_array.keys(); |
| /// |
| /// assert_eq!(keys, &Int16Array::from(vec![Some(2), None, Some(1)])); |
| /// ``` |
| pub fn new_with_dictionary( |
| keys_builder: PrimitiveBuilder<K>, |
| dictionary_values: &StringArray, |
| ) -> Result<Self> { |
| let dict_len = dictionary_values.len(); |
| let mut values_builder = |
| StringBuilder::with_capacity(dict_len, dictionary_values.value_data().len()); |
| let mut map: HashMap<Box<[u8]>, K::Native> = HashMap::with_capacity(dict_len); |
| for i in 0..dict_len { |
| if dictionary_values.is_valid(i) { |
| let value = dictionary_values.value(i); |
| map.insert( |
| value.as_bytes().into(), |
| K::Native::from_usize(i) |
| .ok_or(ArrowError::DictionaryKeyOverflowError)?, |
| ); |
| values_builder.append_value(value)?; |
| } else { |
| values_builder.append_null()?; |
| } |
| } |
| Ok(Self { |
| keys_builder, |
| values_builder, |
| map, |
| }) |
| } |
| } |
| |
| impl<K> ArrayBuilder for StringDictionaryBuilder<K> |
| where |
| K: ArrowDictionaryKeyType, |
| { |
| /// Returns the builder as an non-mutable `Any` reference. |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| /// Returns the builder as an mutable `Any` reference. |
| fn as_any_mut(&mut self) -> &mut Any { |
| self |
| } |
| |
| /// Returns the boxed builder as a box of `Any`. |
| fn into_box_any(self: Box<Self>) -> Box<Any> { |
| self |
| } |
| |
| /// Returns the number of array slots in the builder |
| fn len(&self) -> usize { |
| self.keys_builder.len() |
| } |
| |
| /// Returns whether the number of array slots is zero |
| fn is_empty(&self) -> bool { |
| self.keys_builder.is_empty() |
| } |
| |
| /// Appends data from other arrays into the builder |
| /// |
| /// This is most useful when concatenating arrays of the same type into a builder. |
| fn append_data(&mut self, _data: &[ArrayDataRef]) -> Result<()> { |
| // TODO: This will require an implementation that doesn't just append keys |
| unimplemented!("Appending data for dictionary arrays not yet implemented") |
| } |
| |
| /// Returns the data type of the builder |
| /// |
| /// This is used for validating array data types in `append_data` |
| fn data_type(&self) -> DataType { |
| DataType::Dictionary(Box::new(K::DATA_TYPE), Box::new(DataType::Utf8)) |
| } |
| |
| /// Builds the array and reset this builder. |
| fn finish(&mut self) -> ArrayRef { |
| Arc::new(self.finish()) |
| } |
| } |
| |
| impl<K> StringDictionaryBuilder<K> |
| where |
| K: ArrowDictionaryKeyType, |
| { |
| /// Append a primitive value to the array. Return an existing index |
| /// if already present in the values array or a new index if the |
| /// value is appended to the values array. |
| pub fn append(&mut self, value: &str) -> Result<K::Native> { |
| if let Some(&key) = self.map.get(value.as_bytes()) { |
| // Append existing value. |
| self.keys_builder.append_value(key)?; |
| Ok(key) |
| } else { |
| // Append new value. |
| let key = K::Native::from_usize(self.values_builder.len()) |
| .ok_or(ArrowError::DictionaryKeyOverflowError)?; |
| self.values_builder.append_value(value)?; |
| self.keys_builder.append_value(key as K::Native)?; |
| self.map.insert(value.as_bytes().into(), key); |
| Ok(key) |
| } |
| } |
| |
| pub fn append_null(&mut self) -> Result<()> { |
| self.keys_builder.append_null() |
| } |
| |
| /// Builds the `DictionaryArray` and reset this builder. |
| pub fn finish(&mut self) -> DictionaryArray<K> { |
| self.map.clear(); |
| let value_ref: ArrayRef = Arc::new(self.values_builder.finish()); |
| self.keys_builder.finish_dict(value_ref) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| use crate::array::Array; |
| use crate::bitmap::Bitmap; |
| |
| #[test] |
| fn test_builder_i32_empty() { |
| let mut b = Int32BufferBuilder::new(5); |
| assert_eq!(0, b.len()); |
| assert_eq!(16, b.capacity()); |
| let a = b.finish(); |
| assert_eq!(0, a.len()); |
| } |
| |
| #[test] |
| fn test_builder_i32_alloc_zero_bytes() { |
| let mut b = Int32BufferBuilder::new(0); |
| b.append(123).unwrap(); |
| let a = b.finish(); |
| assert_eq!(4, a.len()); |
| } |
| |
| #[test] |
| fn test_builder_i32() { |
| let mut b = Int32BufferBuilder::new(5); |
| for i in 0..5 { |
| b.append(i).unwrap(); |
| } |
| assert_eq!(16, b.capacity()); |
| let a = b.finish(); |
| assert_eq!(20, a.len()); |
| } |
| |
| #[test] |
| fn test_builder_i32_grow_buffer() { |
| let mut b = Int32BufferBuilder::new(2); |
| assert_eq!(16, b.capacity()); |
| for i in 0..20 { |
| b.append(i).unwrap(); |
| } |
| assert_eq!(32, b.capacity()); |
| let a = b.finish(); |
| assert_eq!(80, a.len()); |
| } |
| |
| #[test] |
| fn test_builder_finish() { |
| let mut b = Int32BufferBuilder::new(5); |
| assert_eq!(16, b.capacity()); |
| for i in 0..10 { |
| b.append(i).unwrap(); |
| } |
| let mut a = b.finish(); |
| assert_eq!(40, a.len()); |
| assert_eq!(0, b.len()); |
| assert_eq!(0, b.capacity()); |
| |
| // Try build another buffer after cleaning up. |
| for i in 0..20 { |
| b.append(i).unwrap() |
| } |
| assert_eq!(32, b.capacity()); |
| a = b.finish(); |
| assert_eq!(80, a.len()); |
| } |
| |
| #[test] |
| fn test_reserve() { |
| let mut b = UInt8BufferBuilder::new(2); |
| assert_eq!(64, b.capacity()); |
| b.reserve(64); |
| assert_eq!(64, b.capacity()); |
| b.reserve(65); |
| assert_eq!(128, b.capacity()); |
| |
| let mut b = Int32BufferBuilder::new(2); |
| assert_eq!(16, b.capacity()); |
| b.reserve(16); |
| assert_eq!(16, b.capacity()); |
| b.reserve(17); |
| assert_eq!(32, b.capacity()); |
| } |
| |
| #[test] |
| fn test_append_slice() { |
| let mut b = UInt8BufferBuilder::new(0); |
| b.append_slice(b"Hello, ").unwrap(); |
| b.append_slice(b"World!").unwrap(); |
| let buffer = b.finish(); |
| assert_eq!(13, buffer.len()); |
| |
| let mut b = Int32BufferBuilder::new(0); |
| b.append_slice(&[32, 54]).unwrap(); |
| let buffer = b.finish(); |
| assert_eq!(8, buffer.len()); |
| } |
| |
| #[test] |
| fn test_append_values() -> Result<()> { |
| let mut a = Int8Builder::new(0); |
| a.append_value(1)?; |
| a.append_null()?; |
| a.append_value(-2)?; |
| assert_eq!(a.len(), 3); |
| |
| // append values |
| let values = &[1, 2, 3, 4]; |
| let is_valid = &[true, true, false, true]; |
| a.append_values(values, is_valid)?; |
| |
| assert_eq!(a.len(), 7); |
| let array = a.finish(); |
| assert_eq!(array.value(0), 1); |
| assert_eq!(array.is_null(1), true); |
| assert_eq!(array.value(2), -2); |
| assert_eq!(array.value(3), 1); |
| assert_eq!(array.value(4), 2); |
| assert_eq!(array.is_null(5), true); |
| assert_eq!(array.value(6), 4); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_write_bytes() { |
| let mut b = BooleanBufferBuilder::new(4); |
| b.append(false).unwrap(); |
| b.append(true).unwrap(); |
| b.append(false).unwrap(); |
| b.append(true).unwrap(); |
| assert_eq!(4, b.len()); |
| assert_eq!(512, b.capacity()); |
| let buffer = b.finish(); |
| assert_eq!(1, buffer.len()); |
| |
| let mut b = BooleanBufferBuilder::new(4); |
| b.append_slice(&[false, true, false, true]).unwrap(); |
| assert_eq!(4, b.len()); |
| assert_eq!(512, b.capacity()); |
| let buffer = b.finish(); |
| assert_eq!(1, buffer.len()); |
| } |
| |
| #[test] |
| fn test_write_bytes_i32() { |
| let mut b = Int32BufferBuilder::new(4); |
| let bytes = [8, 16, 32, 64].to_byte_slice(); |
| b.write_bytes(bytes, 4); |
| assert_eq!(4, b.len()); |
| assert_eq!(16, b.capacity()); |
| let buffer = b.finish(); |
| assert_eq!(16, buffer.len()); |
| } |
| |
| #[test] |
| fn test_boolean_array_builder_append_slice() { |
| let arr1 = |
| BooleanArray::from(vec![Some(true), Some(false), None, None, Some(false)]); |
| |
| let mut builder = BooleanArray::builder(0); |
| builder.append_slice(&[true, false]).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_value(false).unwrap(); |
| let arr2 = builder.finish(); |
| |
| assert_eq!(arr1.len(), arr2.len()); |
| assert_eq!(arr1.offset(), arr2.offset()); |
| assert_eq!(arr1.null_count(), arr2.null_count()); |
| for i in 0..5 { |
| assert_eq!(arr1.is_null(i), arr2.is_null(i)); |
| assert_eq!(arr1.is_valid(i), arr2.is_valid(i)); |
| if arr1.is_valid(i) { |
| assert_eq!(arr1.value(i), arr2.value(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_boolean_builder_increases_buffer_len() { |
| // 00000010 01001000 |
| let buf = Buffer::from([72_u8, 2_u8]); |
| let mut builder = BooleanBufferBuilder::new(8); |
| |
| for i in 0..10 { |
| if i == 3 || i == 6 || i == 9 { |
| builder.append(true).unwrap(); |
| } else { |
| builder.append(false).unwrap(); |
| } |
| } |
| let buf2 = builder.finish(); |
| |
| assert_eq!(buf.len(), buf2.len()); |
| assert_eq!(buf.data(), buf2.data()); |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_i32() { |
| let mut builder = Int32Array::builder(5); |
| for i in 0..5 { |
| builder.append_value(i).unwrap(); |
| } |
| let arr = builder.finish(); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..5 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i32, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_date32() { |
| let mut builder = Date32Array::builder(5); |
| for i in 0..5 { |
| builder.append_value(i).unwrap(); |
| } |
| let arr = builder.finish(); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..5 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i32, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_timestamp_second() { |
| let mut builder = TimestampSecondArray::builder(5); |
| for i in 0..5 { |
| builder.append_value(i).unwrap(); |
| } |
| let arr = builder.finish(); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..5 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i64, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_bool() { |
| // 00000010 01001000 |
| let buf = Buffer::from([72_u8, 2_u8]); |
| let mut builder = BooleanArray::builder(10); |
| for i in 0..10 { |
| if i == 3 || i == 6 || i == 9 { |
| builder.append_value(true).unwrap(); |
| } else { |
| builder.append_value(false).unwrap(); |
| } |
| } |
| |
| let arr = builder.finish(); |
| assert_eq!(buf, arr.values()); |
| assert_eq!(10, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..10 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i) |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_append_option() { |
| let arr1 = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]); |
| |
| let mut builder = Int32Array::builder(5); |
| builder.append_option(Some(0)).unwrap(); |
| builder.append_option(None).unwrap(); |
| builder.append_option(Some(2)).unwrap(); |
| builder.append_option(None).unwrap(); |
| builder.append_option(Some(4)).unwrap(); |
| let arr2 = builder.finish(); |
| |
| assert_eq!(arr1.len(), arr2.len()); |
| assert_eq!(arr1.offset(), arr2.offset()); |
| assert_eq!(arr1.null_count(), arr2.null_count()); |
| for i in 0..5 { |
| assert_eq!(arr1.is_null(i), arr2.is_null(i)); |
| assert_eq!(arr1.is_valid(i), arr2.is_valid(i)); |
| if arr1.is_valid(i) { |
| assert_eq!(arr1.value(i), arr2.value(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_append_null() { |
| let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]); |
| |
| let mut builder = Int32Array::builder(5); |
| builder.append_value(0).unwrap(); |
| builder.append_value(2).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_value(4).unwrap(); |
| let arr2 = builder.finish(); |
| |
| assert_eq!(arr1.len(), arr2.len()); |
| assert_eq!(arr1.offset(), arr2.offset()); |
| assert_eq!(arr1.null_count(), arr2.null_count()); |
| for i in 0..5 { |
| assert_eq!(arr1.is_null(i), arr2.is_null(i)); |
| assert_eq!(arr1.is_valid(i), arr2.is_valid(i)); |
| if arr1.is_valid(i) { |
| assert_eq!(arr1.value(i), arr2.value(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_append_slice() { |
| let arr1 = Int32Array::from(vec![Some(0), Some(2), None, None, Some(4)]); |
| |
| let mut builder = Int32Array::builder(5); |
| builder.append_slice(&[0, 2]).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_value(4).unwrap(); |
| let arr2 = builder.finish(); |
| |
| assert_eq!(arr1.len(), arr2.len()); |
| assert_eq!(arr1.offset(), arr2.offset()); |
| assert_eq!(arr1.null_count(), arr2.null_count()); |
| for i in 0..5 { |
| assert_eq!(arr1.is_null(i), arr2.is_null(i)); |
| assert_eq!(arr1.is_valid(i), arr2.is_valid(i)); |
| if arr1.is_valid(i) { |
| assert_eq!(arr1.value(i), arr2.value(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_builder_finish() { |
| let mut builder = Int32Builder::new(5); |
| builder.append_slice(&[2, 4, 6, 8]).unwrap(); |
| let mut arr = builder.finish(); |
| assert_eq!(4, arr.len()); |
| assert_eq!(0, builder.len()); |
| |
| builder.append_slice(&[1, 3, 5, 7, 9]).unwrap(); |
| arr = builder.finish(); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, builder.len()); |
| } |
| |
| #[test] |
| fn test_list_array_builder() { |
| let values_builder = Int32Builder::new(10); |
| let mut builder = ListBuilder::new(values_builder); |
| |
| // [[0, 1, 2], [3, 4, 5], [6, 7]] |
| builder.values().append_value(0).unwrap(); |
| builder.values().append_value(1).unwrap(); |
| builder.values().append_value(2).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(3).unwrap(); |
| builder.values().append_value(4).unwrap(); |
| builder.values().append_value(5).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(6).unwrap(); |
| builder.values().append_value(7).unwrap(); |
| builder.append(true).unwrap(); |
| let list_array = builder.finish(); |
| |
| let values = list_array.values().data().buffers()[0].clone(); |
| assert_eq!( |
| Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()), |
| values |
| ); |
| assert_eq!( |
| Buffer::from(&[0, 3, 6, 8].to_byte_slice()), |
| list_array.data().buffers()[0].clone() |
| ); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(3, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offset(2)); |
| assert_eq!(2, list_array.value_length(2)); |
| for i in 0..3 { |
| assert!(list_array.is_valid(i)); |
| assert!(!list_array.is_null(i)); |
| } |
| } |
| |
| #[test] |
| fn test_large_list_array_builder() { |
| let values_builder = Int32Builder::new(10); |
| let mut builder = LargeListBuilder::new(values_builder); |
| |
| // [[0, 1, 2], [3, 4, 5], [6, 7]] |
| builder.values().append_value(0).unwrap(); |
| builder.values().append_value(1).unwrap(); |
| builder.values().append_value(2).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(3).unwrap(); |
| builder.values().append_value(4).unwrap(); |
| builder.values().append_value(5).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(6).unwrap(); |
| builder.values().append_value(7).unwrap(); |
| builder.append(true).unwrap(); |
| let list_array = builder.finish(); |
| |
| let values = list_array.values().data().buffers()[0].clone(); |
| assert_eq!( |
| Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice()), |
| values |
| ); |
| assert_eq!( |
| Buffer::from(&[0i64, 3, 6, 8].to_byte_slice()), |
| list_array.data().buffers()[0].clone() |
| ); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(3, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offset(2)); |
| assert_eq!(2, list_array.value_length(2)); |
| for i in 0..3 { |
| assert!(list_array.is_valid(i)); |
| assert!(!list_array.is_null(i)); |
| } |
| } |
| |
| #[test] |
| fn test_list_array_builder_nulls() { |
| let values_builder = Int32Builder::new(10); |
| let mut builder = ListBuilder::new(values_builder); |
| |
| // [[0, 1, 2], null, [3, null, 5], [6, 7]] |
| builder.values().append_value(0).unwrap(); |
| builder.values().append_value(1).unwrap(); |
| builder.values().append_value(2).unwrap(); |
| builder.append(true).unwrap(); |
| builder.append(false).unwrap(); |
| builder.values().append_value(3).unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.values().append_value(5).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(6).unwrap(); |
| builder.values().append_value(7).unwrap(); |
| builder.append(true).unwrap(); |
| let list_array = builder.finish(); |
| |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(4, list_array.len()); |
| assert_eq!(1, list_array.null_count()); |
| assert_eq!(3, list_array.value_offset(2)); |
| assert_eq!(3, list_array.value_length(2)); |
| } |
| |
| #[test] |
| fn test_large_list_array_builder_nulls() { |
| let values_builder = Int32Builder::new(10); |
| let mut builder = LargeListBuilder::new(values_builder); |
| |
| // [[0, 1, 2], null, [3, null, 5], [6, 7]] |
| builder.values().append_value(0).unwrap(); |
| builder.values().append_value(1).unwrap(); |
| builder.values().append_value(2).unwrap(); |
| builder.append(true).unwrap(); |
| builder.append(false).unwrap(); |
| builder.values().append_value(3).unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.values().append_value(5).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(6).unwrap(); |
| builder.values().append_value(7).unwrap(); |
| builder.append(true).unwrap(); |
| let list_array = builder.finish(); |
| |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(4, list_array.len()); |
| assert_eq!(1, list_array.null_count()); |
| assert_eq!(3, list_array.value_offset(2)); |
| assert_eq!(3, list_array.value_length(2)); |
| } |
| |
| #[test] |
| fn test_fixed_size_list_array_builder() { |
| let values_builder = Int32Builder::new(10); |
| let mut builder = FixedSizeListBuilder::new(values_builder, 3); |
| |
| // [[0, 1, 2], null, [3, null, 5], [6, 7, null]] |
| builder.values().append_value(0).unwrap(); |
| builder.values().append_value(1).unwrap(); |
| builder.values().append_value(2).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.append(false).unwrap(); |
| builder.values().append_value(3).unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.values().append_value(5).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_value(6).unwrap(); |
| builder.values().append_value(7).unwrap(); |
| builder.values().append_null().unwrap(); |
| builder.append(true).unwrap(); |
| let list_array = builder.finish(); |
| |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(4, list_array.len()); |
| assert_eq!(1, list_array.null_count()); |
| assert_eq!(6, list_array.value_offset(2)); |
| assert_eq!(3, list_array.value_length()); |
| } |
| |
| #[test] |
| fn test_list_array_builder_finish() { |
| let values_builder = Int32Array::builder(5); |
| let mut builder = ListBuilder::new(values_builder); |
| |
| builder.values().append_slice(&[1, 2, 3]).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_slice(&[4, 5, 6]).unwrap(); |
| builder.append(true).unwrap(); |
| |
| let mut arr = builder.finish(); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, builder.len()); |
| |
| builder.values().append_slice(&[7, 8, 9]).unwrap(); |
| builder.append(true).unwrap(); |
| arr = builder.finish(); |
| assert_eq!(1, arr.len()); |
| assert_eq!(0, builder.len()); |
| } |
| |
| #[test] |
| fn test_fixed_size_list_array_builder_empty() { |
| let values_builder = Int32Array::builder(5); |
| let mut builder = FixedSizeListBuilder::new(values_builder, 3); |
| |
| let arr = builder.finish(); |
| assert_eq!(0, arr.len()); |
| assert_eq!(0, builder.len()); |
| } |
| |
| #[test] |
| fn test_fixed_size_list_array_builder_finish() { |
| let values_builder = Int32Array::builder(5); |
| let mut builder = FixedSizeListBuilder::new(values_builder, 3); |
| |
| builder.values().append_slice(&[1, 2, 3]).unwrap(); |
| builder.append(true).unwrap(); |
| builder.values().append_slice(&[4, 5, 6]).unwrap(); |
| builder.append(true).unwrap(); |
| |
| let mut arr = builder.finish(); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, builder.len()); |
| |
| builder.values().append_slice(&[7, 8, 9]).unwrap(); |
| builder.append(true).unwrap(); |
| arr = builder.finish(); |
| assert_eq!(1, arr.len()); |
| assert_eq!(0, builder.len()); |
| } |
| |
| #[test] |
| fn test_list_list_array_builder() { |
| let primitive_builder = Int32Builder::new(10); |
| let values_builder = ListBuilder::new(primitive_builder); |
| let mut builder = ListBuilder::new(values_builder); |
| |
| // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] |
| builder.values().values().append_value(1).unwrap(); |
| builder.values().values().append_value(2).unwrap(); |
| builder.values().append(true).unwrap(); |
| builder.values().values().append_value(3).unwrap(); |
| builder.values().values().append_value(4).unwrap(); |
| builder.values().append(true).unwrap(); |
| builder.append(true).unwrap(); |
| |
| builder.values().values().append_value(5).unwrap(); |
| builder.values().values().append_value(6).unwrap(); |
| builder.values().values().append_value(7).unwrap(); |
| builder.values().append(true).unwrap(); |
| builder.values().append(false).unwrap(); |
| builder.values().values().append_value(8).unwrap(); |
| builder.values().append(true).unwrap(); |
| builder.append(true).unwrap(); |
| |
| builder.append(false).unwrap(); |
| |
| builder.values().values().append_value(9).unwrap(); |
| builder.values().values().append_value(10).unwrap(); |
| builder.values().append(true).unwrap(); |
| builder.append(true).unwrap(); |
| |
| let list_array = builder.finish(); |
| |
| assert_eq!(4, list_array.len()); |
| assert_eq!(1, list_array.null_count()); |
| assert_eq!( |
| Buffer::from(&[0, 2, 5, 5, 6].to_byte_slice()), |
| list_array.data().buffers()[0].clone() |
| ); |
| |
| assert_eq!(6, list_array.values().data().len()); |
| assert_eq!(1, list_array.values().data().null_count()); |
| assert_eq!( |
| Buffer::from(&[0, 2, 4, 7, 7, 8, 10].to_byte_slice()), |
| list_array.values().data().buffers()[0].clone() |
| ); |
| |
| assert_eq!(10, list_array.values().data().child_data()[0].len()); |
| assert_eq!(0, list_array.values().data().child_data()[0].null_count()); |
| assert_eq!( |
| Buffer::from(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10].to_byte_slice()), |
| list_array.values().data().child_data()[0].buffers()[0].clone() |
| ); |
| } |
| |
| #[test] |
| fn test_binary_array_builder() { |
| let mut builder = BinaryBuilder::new(20); |
| |
| builder.append_byte(b'h').unwrap(); |
| builder.append_byte(b'e').unwrap(); |
| builder.append_byte(b'l').unwrap(); |
| builder.append_byte(b'l').unwrap(); |
| builder.append_byte(b'o').unwrap(); |
| builder.append(true).unwrap(); |
| builder.append(true).unwrap(); |
| builder.append_byte(b'w').unwrap(); |
| builder.append_byte(b'o').unwrap(); |
| builder.append_byte(b'r').unwrap(); |
| builder.append_byte(b'l').unwrap(); |
| builder.append_byte(b'd').unwrap(); |
| builder.append(true).unwrap(); |
| |
| let binary_array = builder.finish(); |
| |
| assert_eq!(3, binary_array.len()); |
| assert_eq!(0, binary_array.null_count()); |
| assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); |
| assert_eq!([] as [u8; 0], binary_array.value(1)); |
| assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2)); |
| assert_eq!(5, binary_array.value_offset(2)); |
| assert_eq!(5, binary_array.value_length(2)); |
| } |
| |
| #[test] |
| fn test_large_binary_array_builder() { |
| let mut builder = LargeBinaryBuilder::new(20); |
| |
| builder.append_byte(b'h').unwrap(); |
| builder.append_byte(b'e').unwrap(); |
| builder.append_byte(b'l').unwrap(); |
| builder.append_byte(b'l').unwrap(); |
| builder.append_byte(b'o').unwrap(); |
| builder.append(true).unwrap(); |
| builder.append(true).unwrap(); |
| builder.append_byte(b'w').unwrap(); |
| builder.append_byte(b'o').unwrap(); |
| builder.append_byte(b'r').unwrap(); |
| builder.append_byte(b'l').unwrap(); |
| builder.append_byte(b'd').unwrap(); |
| builder.append(true).unwrap(); |
| |
| let binary_array = builder.finish(); |
| |
| assert_eq!(3, binary_array.len()); |
| assert_eq!(0, binary_array.null_count()); |
| assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); |
| assert_eq!([] as [u8; 0], binary_array.value(1)); |
| assert_eq!([b'w', b'o', b'r', b'l', b'd'], binary_array.value(2)); |
| assert_eq!(5, binary_array.value_offset(2)); |
| assert_eq!(5, binary_array.value_length(2)); |
| } |
| |
| #[test] |
| fn test_string_array_builder() { |
| let mut builder = StringBuilder::new(20); |
| |
| builder.append_value("hello").unwrap(); |
| builder.append(true).unwrap(); |
| builder.append_value("world").unwrap(); |
| |
| let string_array = builder.finish(); |
| |
| assert_eq!(3, string_array.len()); |
| assert_eq!(0, string_array.null_count()); |
| assert_eq!("hello", string_array.value(0)); |
| assert_eq!("", string_array.value(1)); |
| assert_eq!("world", string_array.value(2)); |
| assert_eq!(5, string_array.value_offset(2)); |
| assert_eq!(5, string_array.value_length(2)); |
| } |
| |
| #[test] |
| fn test_fixed_size_binary_builder() { |
| let mut builder = FixedSizeBinaryBuilder::new(15, 5); |
| |
| // [b"hello", null, "arrow"] |
| builder.append_value(b"hello").unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_value(b"arrow").unwrap(); |
| let fixed_size_binary_array: FixedSizeBinaryArray = builder.finish(); |
| |
| assert_eq!( |
| &DataType::FixedSizeBinary(5), |
| fixed_size_binary_array.data_type() |
| ); |
| assert_eq!(3, fixed_size_binary_array.len()); |
| assert_eq!(1, fixed_size_binary_array.null_count()); |
| assert_eq!(10, fixed_size_binary_array.value_offset(2)); |
| assert_eq!(5, fixed_size_binary_array.value_length()); |
| } |
| |
| #[test] |
| fn test_decimal_builder() { |
| let mut builder = DecimalBuilder::new(30, 23, 6); |
| |
| builder.append_value(8_887_000_000).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_value(-8_887_000_000).unwrap(); |
| let decimal_array: DecimalArray = builder.finish(); |
| |
| assert_eq!(&DataType::Decimal(23, 6), decimal_array.data_type()); |
| assert_eq!(3, decimal_array.len()); |
| assert_eq!(1, decimal_array.null_count()); |
| assert_eq!(20, decimal_array.value_offset(2)); |
| assert_eq!(10, decimal_array.value_length()); |
| } |
| |
| #[test] |
| fn test_string_array_builder_finish() { |
| let mut builder = StringBuilder::new(10); |
| |
| builder.append_value("hello").unwrap(); |
| builder.append_value("world").unwrap(); |
| |
| let mut arr = builder.finish(); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, builder.len()); |
| |
| builder.append_value("arrow").unwrap(); |
| arr = builder.finish(); |
| assert_eq!(1, arr.len()); |
| assert_eq!(0, builder.len()); |
| } |
| |
| #[test] |
| fn test_string_array_builder_append_string() { |
| let mut builder = StringBuilder::new(20); |
| |
| let var = "hello".to_owned(); |
| builder.append_value(&var).unwrap(); |
| builder.append(true).unwrap(); |
| builder.append_value("world").unwrap(); |
| |
| let string_array = builder.finish(); |
| |
| assert_eq!(3, string_array.len()); |
| assert_eq!(0, string_array.null_count()); |
| assert_eq!("hello", string_array.value(0)); |
| assert_eq!("", string_array.value(1)); |
| assert_eq!("world", string_array.value(2)); |
| assert_eq!(5, string_array.value_offset(2)); |
| assert_eq!(5, string_array.value_length(2)); |
| } |
| |
| #[test] |
| fn test_struct_array_builder() { |
| let string_builder = StringBuilder::new(4); |
| let int_builder = Int32Builder::new(4); |
| |
| let mut fields = Vec::new(); |
| let mut field_builders = Vec::new(); |
| fields.push(Field::new("f1", DataType::Utf8, false)); |
| field_builders.push(Box::new(string_builder) as Box<ArrayBuilder>); |
| fields.push(Field::new("f2", DataType::Int32, false)); |
| field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>); |
| |
| let mut builder = StructBuilder::new(fields, field_builders); |
| assert_eq!(2, builder.num_fields()); |
| |
| let string_builder = builder |
| .field_builder::<StringBuilder>(0) |
| .expect("builder at field 0 should be string builder"); |
| string_builder.append_value("joe").unwrap(); |
| string_builder.append_null().unwrap(); |
| string_builder.append_null().unwrap(); |
| string_builder.append_value("mark").unwrap(); |
| |
| let int_builder = builder |
| .field_builder::<Int32Builder>(1) |
| .expect("builder at field 1 should be int builder"); |
| int_builder.append_value(1).unwrap(); |
| int_builder.append_value(2).unwrap(); |
| int_builder.append_null().unwrap(); |
| int_builder.append_value(4).unwrap(); |
| |
| builder.append(true).unwrap(); |
| builder.append(true).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append(true).unwrap(); |
| |
| let arr = builder.finish(); |
| |
| let struct_data = arr.data(); |
| assert_eq!(4, struct_data.len()); |
| assert_eq!(1, struct_data.null_count()); |
| assert_eq!( |
| &Some(Bitmap::from(Buffer::from(&[11_u8]))), |
| struct_data.null_bitmap() |
| ); |
| |
| let expected_string_data = ArrayData::builder(DataType::Utf8) |
| .len(4) |
| .null_count(2) |
| .null_bit_buffer(Buffer::from(&[9_u8])) |
| .add_buffer(Buffer::from(&[0, 3, 3, 3, 7].to_byte_slice())) |
| .add_buffer(Buffer::from(b"joemark")) |
| .build(); |
| |
| let expected_int_data = ArrayData::builder(DataType::Int32) |
| .len(4) |
| .null_count(1) |
| .null_bit_buffer(Buffer::from(&[11_u8])) |
| .add_buffer(Buffer::from(&[1, 2, 0, 4].to_byte_slice())) |
| .build(); |
| |
| assert_eq!(expected_string_data, arr.column(0).data()); |
| |
| // TODO: implement equality for ArrayData |
| assert_eq!(expected_int_data.len(), arr.column(1).data().len()); |
| assert_eq!( |
| expected_int_data.null_count(), |
| arr.column(1).data().null_count() |
| ); |
| assert_eq!( |
| expected_int_data.null_bitmap(), |
| arr.column(1).data().null_bitmap() |
| ); |
| let expected_value_buf = expected_int_data.buffers()[0].clone(); |
| let actual_value_buf = arr.column(1).data().buffers()[0].clone(); |
| for i in 0..expected_int_data.len() { |
| if !expected_int_data.is_null(i) { |
| assert_eq!( |
| expected_value_buf.data()[i * 4..(i + 1) * 4], |
| actual_value_buf.data()[i * 4..(i + 1) * 4] |
| ); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_struct_array_builder_finish() { |
| let int_builder = Int32Builder::new(10); |
| let bool_builder = BooleanBuilder::new(10); |
| |
| let mut fields = Vec::new(); |
| let mut field_builders = Vec::new(); |
| fields.push(Field::new("f1", DataType::Int32, false)); |
| field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>); |
| fields.push(Field::new("f2", DataType::Boolean, false)); |
| field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>); |
| |
| let mut builder = StructBuilder::new(fields, field_builders); |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_slice(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
| .unwrap(); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_slice(&[ |
| false, true, false, true, false, true, false, true, false, true, |
| ]) |
| .unwrap(); |
| |
| // Append slot values - all are valid. |
| for _ in 0..10 { |
| assert!(builder.append(true).is_ok()) |
| } |
| |
| assert_eq!(10, builder.len()); |
| |
| let arr = builder.finish(); |
| |
| assert_eq!(10, arr.len()); |
| assert_eq!(0, builder.len()); |
| |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_slice(&[1, 3, 5, 7, 9]) |
| .unwrap(); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_slice(&[false, true, false, true, false]) |
| .unwrap(); |
| |
| // Append slot values - all are valid. |
| for _ in 0..5 { |
| assert!(builder.append(true).is_ok()) |
| } |
| |
| assert_eq!(5, builder.len()); |
| |
| let arr = builder.finish(); |
| |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, builder.len()); |
| } |
| |
| #[test] |
| fn test_struct_array_builder_from_schema() { |
| let mut fields = Vec::new(); |
| fields.push(Field::new("f1", DataType::Float32, false)); |
| fields.push(Field::new("f2", DataType::Utf8, false)); |
| let mut sub_fields = Vec::new(); |
| sub_fields.push(Field::new("g1", DataType::Int32, false)); |
| sub_fields.push(Field::new("g2", DataType::Boolean, false)); |
| let struct_type = DataType::Struct(sub_fields); |
| fields.push(Field::new("f3", struct_type, false)); |
| |
| let mut builder = StructBuilder::from_fields(fields, 5); |
| assert_eq!(3, builder.num_fields()); |
| assert!(builder.field_builder::<Float32Builder>(0).is_some()); |
| assert!(builder.field_builder::<StringBuilder>(1).is_some()); |
| assert!(builder.field_builder::<StructBuilder>(2).is_some()); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Data type List(NullableDataType { data_type: Int64, nullable: true }) is not currently supported" |
| )] |
| fn test_struct_array_builder_from_schema_unsupported_type() { |
| let mut fields = Vec::new(); |
| fields.push(Field::new("f1", DataType::Int16, false)); |
| let list_type = |
| DataType::List(Box::new(NullableDataType::new(DataType::Int64, true))); |
| fields.push(Field::new("f2", list_type, false)); |
| |
| let _ = StructBuilder::from_fields(fields, 5); |
| } |
| |
| #[test] |
| fn test_struct_array_builder_field_builder_type_mismatch() { |
| let int_builder = Int32Builder::new(10); |
| |
| let mut fields = Vec::new(); |
| let mut field_builders = Vec::new(); |
| fields.push(Field::new("f1", DataType::Int32, false)); |
| field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>); |
| |
| let mut builder = StructBuilder::new(fields, field_builders); |
| assert!(builder.field_builder::<BinaryBuilder>(0).is_none()); |
| } |
| |
| #[test] |
| fn test_primitive_dictionary_builder() { |
| let key_builder = PrimitiveBuilder::<UInt8Type>::new(3); |
| let value_builder = PrimitiveBuilder::<UInt32Type>::new(2); |
| let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); |
| builder.append(12345678).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append(22345678).unwrap(); |
| let array = builder.finish(); |
| |
| assert_eq!( |
| array.keys(), |
| &UInt8Array::from(vec![Some(0), None, Some(1)]) |
| ); |
| |
| // Values are polymorphic and so require a downcast. |
| let av = array.values(); |
| let ava: &UInt32Array = av.as_any().downcast_ref::<UInt32Array>().unwrap(); |
| let avs: &[u32] = ava.value_slice(0, array.values().len()); |
| |
| assert_eq!(array.is_null(0), false); |
| assert_eq!(array.is_null(1), true); |
| assert_eq!(array.is_null(2), false); |
| |
| assert_eq!(avs, &[12345678, 22345678]); |
| } |
| |
| #[test] |
| fn test_string_dictionary_builder() { |
| let key_builder = PrimitiveBuilder::<Int8Type>::new(5); |
| let value_builder = StringBuilder::new(2); |
| let mut builder = StringDictionaryBuilder::new(key_builder, value_builder); |
| builder.append("abc").unwrap(); |
| builder.append_null().unwrap(); |
| builder.append("def").unwrap(); |
| builder.append("def").unwrap(); |
| builder.append("abc").unwrap(); |
| let array = builder.finish(); |
| |
| assert_eq!( |
| array.keys(), |
| &Int8Array::from(vec![Some(0), None, Some(1), Some(1), Some(0)]) |
| ); |
| |
| // Values are polymorphic and so require a downcast. |
| let av = array.values(); |
| let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap(); |
| |
| assert_eq!(ava.value(0), "abc"); |
| assert_eq!(ava.value(1), "def"); |
| } |
| |
| #[test] |
| fn test_string_dictionary_builder_with_existing_dictionary() { |
| let dictionary = StringArray::from(vec![None, Some("def"), Some("abc")]); |
| |
| let key_builder = PrimitiveBuilder::<Int8Type>::new(6); |
| let mut builder = |
| StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary) |
| .unwrap(); |
| builder.append("abc").unwrap(); |
| builder.append_null().unwrap(); |
| builder.append("def").unwrap(); |
| builder.append("def").unwrap(); |
| builder.append("abc").unwrap(); |
| builder.append("ghi").unwrap(); |
| let array = builder.finish(); |
| |
| assert_eq!( |
| array.keys(), |
| &Int8Array::from(vec![Some(2), None, Some(1), Some(1), Some(2), Some(3)]) |
| ); |
| |
| // Values are polymorphic and so require a downcast. |
| let av = array.values(); |
| let ava: &StringArray = av.as_any().downcast_ref::<StringArray>().unwrap(); |
| |
| assert_eq!(ava.is_valid(0), false); |
| assert_eq!(ava.value(1), "def"); |
| assert_eq!(ava.value(2), "abc"); |
| assert_eq!(ava.value(3), "ghi"); |
| } |
| |
| #[test] |
| fn test_string_dictionary_builder_with_reserved_null_value() { |
| let dictionary: Vec<Option<&str>> = vec![None]; |
| let dictionary = StringArray::from(dictionary); |
| |
| let key_builder = PrimitiveBuilder::<Int16Type>::new(4); |
| let mut builder = |
| StringDictionaryBuilder::new_with_dictionary(key_builder, &dictionary) |
| .unwrap(); |
| builder.append("abc").unwrap(); |
| builder.append_null().unwrap(); |
| builder.append("def").unwrap(); |
| builder.append("abc").unwrap(); |
| let array = builder.finish(); |
| |
| assert_eq!(array.is_null(1), true); |
| assert_eq!(array.is_valid(1), false); |
| |
| let keys = array.keys_array(); |
| |
| assert_eq!(keys.value(0), 1); |
| assert_eq!(keys.is_null(1), true); |
| // zero initialization is currently guaranteed by Buffer allocation and resizing |
| assert_eq!(keys.value(1), 0); |
| assert_eq!(keys.value(2), 2); |
| assert_eq!(keys.value(3), 1); |
| } |
| |
| #[test] |
| #[should_panic(expected = "DictionaryKeyOverflowError")] |
| fn test_primitive_dictionary_overflow() { |
| let key_builder = PrimitiveBuilder::<UInt8Type>::new(257); |
| let value_builder = PrimitiveBuilder::<UInt32Type>::new(257); |
| let mut builder = PrimitiveDictionaryBuilder::new(key_builder, value_builder); |
| // 256 unique keys. |
| for i in 0..256 { |
| builder.append(i + 1000).unwrap(); |
| } |
| // Special error if the key overflows (256th entry) |
| builder.append(1257).unwrap(); |
| } |
| |
| #[test] |
| fn test_primitive_append() -> Result<()> { |
| let mut builder = Int32Builder::new(2); |
| builder.append_null()?; |
| builder.append_value(1)?; |
| // create an array to append |
| let array = Int32Array::from(vec![None, Some(3), None, None, Some(6), Some(7)]); |
| builder.append_data(&[ |
| array.data(), |
| array.slice(1, 4).data(), |
| array.slice(2, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| let expected = Int32Array::from(vec![ |
| None, |
| Some(1), |
| None, |
| Some(3), |
| None, |
| None, |
| Some(6), |
| Some(7), |
| // array.data() end |
| Some(3), |
| None, |
| None, |
| Some(6), |
| ]); |
| assert_eq!(finished.len(), expected.len()); |
| assert_eq!(finished.null_count(), expected.null_count()); |
| assert_eq!(finished, expected); |
| |
| let mut builder = Float64Builder::new(64); |
| builder.append_null()?; |
| builder.append_value(1.0)?; |
| // create an array to append |
| let array = |
| Float64Array::from(vec![None, Some(3.0), None, None, Some(6.0), Some(7.0)]); |
| builder.append_data(&[ |
| array.data(), |
| array.slice(1, 5).data(), |
| array.slice(2, 1).data(), |
| ])?; |
| let finished = builder.finish(); |
| let expected = Float64Array::from(vec![ |
| None, |
| Some(1.0), |
| None, |
| Some(3.0), |
| None, |
| None, |
| Some(6.0), |
| Some(7.0), |
| Some(3.0), |
| None, |
| None, |
| Some(6.0), |
| Some(7.0), |
| None, |
| ]); |
| assert_eq!(finished.len(), expected.len()); |
| assert_eq!(finished.null_count(), expected.null_count()); |
| assert_eq!(finished, expected); |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_boolean_append() -> Result<()> { |
| let mut builder = BooleanBuilder::new(2); |
| builder.append_null()?; |
| builder.append_value(true)?; |
| // create an array to append |
| let array = BooleanArray::from(vec![ |
| None, |
| Some(true), |
| None, |
| None, |
| Some(false), |
| Some(true), |
| ]); |
| builder.append_data(&[ |
| array.data(), |
| array.slice(1, 4).data(), |
| array.slice(2, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| let expected = BooleanArray::from(vec![ |
| None, |
| Some(true), |
| None, |
| Some(true), |
| None, |
| None, |
| Some(false), |
| Some(true), |
| Some(true), |
| None, |
| None, |
| Some(false), |
| ]); |
| assert_eq!(finished.len(), expected.len()); |
| assert_eq!(finished.null_count(), expected.null_count()); |
| assert_eq!(finished, expected); |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_list_append() -> Result<()> { |
| let int_builder = Int64Builder::new(24); |
| let mut builder = ListBuilder::<Int64Builder>::new(int_builder); |
| builder.values().append_slice(&[1, 2, 3])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[4, 5])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[6, 7, 8])?; |
| builder.values().append_slice(&[9, 10, 11])?; |
| builder.append(true)?; |
| |
| let a_builder = Int64Builder::new(24); |
| let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder); |
| a_builder.values().append_slice(&[12, 13])?; |
| a_builder.append(true)?; |
| a_builder.append(true)?; |
| a_builder.values().append_slice(&[14, 15])?; |
| a_builder.append(true)?; |
| let a = a_builder.finish(); |
| |
| // append array |
| builder.append_data(&[a.data(), a.slice(1, 2).data()])?; |
| let finished = builder.finish(); |
| |
| let expected_int_array = Int64Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| Some(7), |
| Some(8), |
| Some(9), |
| Some(10), |
| Some(11), |
| // append first array |
| Some(12), |
| Some(13), |
| Some(14), |
| Some(15), |
| // append second array |
| Some(14), |
| Some(15), |
| ]); |
| let list_value_offsets = |
| Buffer::from(&[0, 3, 5, 11, 13, 13, 15, 15, 17].to_byte_slice()); |
| let expected_list_data = ArrayData::new( |
| DataType::List(Box::new(NullableDataType::new(DataType::Int64, true))), |
| 8, |
| None, |
| None, |
| 0, |
| vec![list_value_offsets], |
| vec![expected_int_array.data()], |
| ); |
| let expected_list = ListArray::from(Arc::new(expected_list_data) as ArrayDataRef); |
| assert_eq!( |
| finished.data().buffers()[0].data(), |
| expected_list.data().buffers()[0].data() |
| ); |
| assert_eq!(&expected_list.values(), &finished.values()); |
| assert_eq!(expected_list.len(), finished.len()); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_list_nulls_append() -> Result<()> { |
| let int_builder = Int64Builder::new(32); |
| let mut builder = ListBuilder::<Int64Builder>::new(int_builder); |
| builder.values().append_slice(&[1, 2, 3])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[4, 5])?; |
| builder.append(true)?; |
| builder.append(false)?; |
| builder.values().append_slice(&[6, 7, 8])?; |
| builder.values().append_null()?; |
| builder.values().append_null()?; |
| builder.values().append_slice(&[9, 10, 11])?; |
| builder.append(true)?; |
| |
| let a_builder = Int64Builder::new(32); |
| let mut a_builder = ListBuilder::<Int64Builder>::new(a_builder); |
| a_builder.values().append_slice(&[12, 13])?; |
| a_builder.append(true)?; |
| a_builder.append(false)?; |
| a_builder.append(true)?; |
| a_builder.values().append_null()?; |
| a_builder.values().append_null()?; |
| a_builder.values().append_slice(&[14, 15])?; |
| a_builder.append(true)?; |
| let a = a_builder.finish(); |
| |
| // append array |
| builder.append_data(&[ |
| a.data(), |
| a.slice(1, 2).data(), |
| a.slice(2, 2).data(), |
| a.slice(4, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| |
| let expected_int_array = Int64Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| Some(7), |
| Some(8), |
| None, |
| None, |
| Some(9), |
| Some(10), |
| Some(11), |
| // second array |
| Some(12), |
| Some(13), |
| None, |
| None, |
| Some(14), |
| Some(15), |
| // slice(1, 2) results in no values added |
| None, |
| None, |
| Some(14), |
| Some(15), |
| ]); |
| let list_value_offsets = Buffer::from( |
| &[0, 3, 5, 5, 13, 15, 15, 15, 19, 19, 19, 19, 23].to_byte_slice(), |
| ); |
| let expected_list_data = ArrayData::new( |
| DataType::List(Box::new(NullableDataType::new(DataType::Int64, true))), |
| 12, |
| None, |
| None, |
| 0, |
| vec![list_value_offsets], |
| vec![expected_int_array.data()], |
| ); |
| let expected_list = ListArray::from(Arc::new(expected_list_data) as ArrayDataRef); |
| assert_eq!( |
| finished.data().buffers()[0].data(), |
| expected_list.data().buffers()[0].data() |
| ); |
| assert_eq!( |
| finished.data().child_data()[0].buffers()[0].data(), |
| expected_list.data().child_data()[0].buffers()[0].data() |
| ); |
| assert_eq!(&expected_list.values(), &finished.values()); |
| assert_eq!(expected_list.len(), finished.len()); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_list_of_strings_append() -> Result<()> { |
| let string_builder = StringBuilder::new(32); |
| let mut builder = ListBuilder::<StringBuilder>::new(string_builder); |
| builder.values().append_value("Hello")?; |
| builder.values().append_value("Arrow")?; |
| builder.append(true)?; |
| builder.append(false)?; |
| |
| let string_array = StringArray::from(vec![ |
| Some("alpha"), |
| Some("beta"), |
| None, |
| Some("gamma"), |
| Some("delta"), |
| None, |
| ]); |
| let list_value_offsets = Buffer::from(&[0, 2, 3, 6].to_byte_slice()); |
| let list_data = ArrayData::new( |
| DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))), |
| 3, |
| None, |
| None, |
| 0, |
| vec![list_value_offsets], |
| vec![string_array.data()], |
| ); |
| let list_array = ListArray::from(Arc::new(list_data) as ArrayDataRef); |
| builder.append_data(&[ |
| list_array.data(), |
| list_array.slice(1, 2).data(), |
| list_array.slice(0, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| |
| let expected_string_array = StringArray::from(vec![ |
| Some("Hello"), |
| Some("Arrow"), |
| // list_array |
| Some("alpha"), |
| Some("beta"), |
| None, |
| Some("gamma"), |
| Some("delta"), |
| None, |
| // slice(1, 2) |
| None, |
| Some("gamma"), |
| Some("delta"), |
| None, |
| // slice(0, 0) returns nothing |
| ]); |
| let list_value_offsets = Buffer::from(&[0, 2, 2, 4, 5, 8, 9, 12].to_byte_slice()); |
| let expected_list_data = ArrayData::new( |
| DataType::List(Box::new(NullableDataType::new(DataType::Utf8, true))), |
| 7, |
| None, |
| None, // is this correct? |
| 0, |
| vec![list_value_offsets], |
| vec![expected_string_array.data()], |
| ); |
| let expected_list = ListArray::from(Arc::new(expected_list_data) as ArrayDataRef); |
| assert_eq!( |
| finished.data().buffers()[0].data(), |
| expected_list.data().buffers()[0].data() |
| ); |
| assert_eq!( |
| finished.data().child_data()[0].buffers()[0].data(), |
| expected_list.data().child_data()[0].buffers()[0].data() |
| ); |
| assert_eq!(&expected_list.values(), &finished.values()); |
| assert_eq!(expected_list.len(), finished.len()); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_fixed_size_list_append() -> Result<()> { |
| let int_builder = UInt16Builder::new(64); |
| let mut builder = FixedSizeListBuilder::<UInt16Builder>::new(int_builder, 2); |
| builder.values().append_slice(&[1, 2])?; |
| builder.append(true)?; |
| builder.values().append_slice(&[3, 4])?; |
| builder.append(false)?; |
| builder.values().append_slice(&[5, 6])?; |
| builder.append(true)?; |
| |
| let a_builder = UInt16Builder::new(64); |
| let mut a_builder = FixedSizeListBuilder::<UInt16Builder>::new(a_builder, 2); |
| a_builder.values().append_slice(&[7, 8])?; |
| a_builder.append(true)?; |
| a_builder.values().append_slice(&[9, 10])?; |
| a_builder.append(true)?; |
| a_builder.values().append_slice(&[11, 12])?; |
| a_builder.append(false)?; |
| a_builder.values().append_slice(&[13, 14])?; |
| a_builder.append(true)?; |
| a_builder.values().append_null()?; |
| a_builder.values().append_null()?; |
| a_builder.append(true)?; |
| let a = a_builder.finish(); |
| |
| // append array |
| builder.append_data(&[ |
| a.data(), |
| a.slice(1, 3).data(), |
| a.slice(2, 1).data(), |
| a.slice(5, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| |
| let expected_int_array = UInt16Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| // append first array |
| Some(7), |
| Some(8), |
| Some(9), |
| Some(10), |
| Some(11), |
| Some(12), |
| Some(13), |
| Some(14), |
| None, |
| None, |
| // append slice(1, 3) |
| Some(9), |
| Some(10), |
| Some(11), |
| Some(12), |
| Some(13), |
| Some(14), |
| // append slice(2, 1) |
| Some(11), |
| Some(12), |
| ]); |
| let expected_list_data = ArrayData::new( |
| DataType::FixedSizeList( |
| Box::new(NullableDataType::new(DataType::UInt16, true)), |
| 2, |
| ), |
| 12, |
| None, |
| None, |
| 0, |
| vec![], |
| vec![expected_int_array.data()], |
| ); |
| let expected_list = |
| FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayDataRef); |
| assert_eq!(&expected_list.values(), &finished.values()); |
| assert_eq!(expected_list.len(), finished.len()); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_fixed_size_binary_append() -> Result<()> { |
| let mut builder = FixedSizeBinaryBuilder::new(64, 2); |
| builder.append_value(&[1, 2])?; |
| builder.append_value(&[3, 4])?; |
| builder.append_value(&[5, 6])?; |
| |
| let mut a_builder = FixedSizeBinaryBuilder::new(64, 2); |
| a_builder.append_value(&[7, 8])?; |
| a_builder.append_value(&[9, 10])?; |
| a_builder.append_null()?; |
| a_builder.append_value(&[13, 14])?; |
| a_builder.append_null()?; |
| let a = a_builder.finish(); |
| |
| // append array |
| builder.append_data(&[ |
| a.data(), |
| a.slice(1, 3).data(), |
| a.slice(2, 1).data(), |
| a.slice(5, 0).data(), |
| ])?; |
| let finished = builder.finish(); |
| |
| let expected_int_array = UInt8Array::from(vec![ |
| Some(1), |
| Some(2), |
| Some(3), |
| Some(4), |
| Some(5), |
| Some(6), |
| // append first array |
| Some(7), |
| Some(8), |
| Some(9), |
| Some(10), |
| None, |
| None, |
| Some(13), |
| Some(14), |
| None, |
| None, |
| // append slice(1, 3) |
| Some(9), |
| Some(10), |
| None, |
| None, |
| Some(13), |
| Some(14), |
| // append slice(2, 1) |
| None, |
| None, |
| ]); |
| let expected_list_data = ArrayData::new( |
| DataType::FixedSizeList( |
| Box::new(NullableDataType::new(DataType::UInt8, true)), |
| 2, |
| ), |
| 12, |
| None, |
| None, |
| 0, |
| vec![], |
| vec![expected_int_array.data()], |
| ); |
| let expected_list = |
| FixedSizeListArray::from(Arc::new(expected_list_data) as ArrayDataRef); |
| let expected_list = FixedSizeBinaryArray::from(expected_list); |
| // assert_eq!(expected_list.values(), finished.values()); |
| assert_eq!(expected_list.len(), finished.len()); |
| |
| Ok(()) |
| } |
| |
| #[test] |
| fn test_struct_append() -> Result<()> { |
| let int_builder = Int32Builder::new(64); |
| let bool_builder = BooleanBuilder::new(64); |
| |
| let field1 = Field::new("f1", DataType::Int32, false); |
| let field2 = Field::new("f2", DataType::Boolean, false); |
| let mut fields = Vec::new(); |
| let mut field_builders = Vec::new(); |
| fields.push(field1.clone()); |
| field_builders.push(Box::new(int_builder) as Box<ArrayBuilder>); |
| fields.push(field2.clone()); |
| field_builders.push(Box::new(bool_builder) as Box<ArrayBuilder>); |
| |
| let mut builder = StructBuilder::new(fields, field_builders); |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_slice(&[0, 1, 2, 3, 4])?; |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_slice(&[false, true, false, true, false])?; |
| |
| // Append slot values - all are valid. |
| for _ in 0..5 { |
| assert!(builder.append(true).is_ok()) |
| } |
| |
| let arr = builder.finish(); |
| |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, builder.len()); |
| |
| builder |
| .field_builder::<Int32Builder>(0) |
| .unwrap() |
| .append_slice(&[1, 3, 5, 7, 9]) |
| .unwrap(); |
| builder |
| .field_builder::<BooleanBuilder>(1) |
| .unwrap() |
| .append_slice(&[true, true, true, false, true]) |
| .unwrap(); |
| |
| // Append slot values - all are valid. |
| for _ in 0..5 { |
| assert!(builder.append(true).is_ok()) |
| } |
| |
| assert_eq!(5, builder.len()); |
| |
| // append array to builder |
| builder.append_data(&[ |
| arr.data(), |
| arr.slice(1, 4).data(), |
| arr.slice(4, 0).data(), |
| ])?; |
| // finish builder |
| let arr2 = builder.finish(); |
| |
| let f1 = Arc::new(Int32Array::from(vec![ |
| 1, 3, 5, 7, 9, 0, 1, 2, 3, 4, 1, 2, 3, 4, |
| ])) as ArrayRef; |
| let f2 = Arc::new(BooleanArray::from(vec![ |
| true, true, true, false, true, false, true, false, true, false, true, false, |
| true, false, |
| ])) as ArrayRef; |
| let expected = StructArray::from(vec![(field1, f1), (field2, f2)]); |
| assert_eq!(arr2.data().child_data()[0], expected.data().child_data()[0]); |
| assert_eq!(arr2.data().child_data()[1], expected.data().child_data()[1]); |
| assert_eq!(arr2, expected); |
| |
| Ok(()) |
| } |
| } |