| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use std::any::Any; |
| use std::convert::{From, TryFrom}; |
| use std::fmt; |
| use std::io::Write; |
| use std::mem; |
| use std::sync::Arc; |
| |
| use chrono::prelude::*; |
| |
| use super::*; |
| use crate::array::equal::JsonEqual; |
| use crate::buffer::{Buffer, MutableBuffer}; |
| use crate::datatypes::DataType::Struct; |
| use crate::datatypes::*; |
| use crate::error::{ArrowError, Result}; |
| use crate::memory; |
| use crate::util::bit_util; |
| |
| /// Number of seconds in a day |
| const SECONDS_IN_DAY: i64 = 86_400; |
| /// Number of milliseconds in a second |
| const MILLISECONDS: i64 = 1_000; |
| /// Number of microseconds in a second |
| const MICROSECONDS: i64 = 1_000_000; |
| /// Number of nanoseconds in a second |
| const NANOSECONDS: i64 = 1_000_000_000; |
| |
| /// Trait for dealing with different types of array at runtime when the type of the |
| /// array is not known in advance |
| pub trait Array: fmt::Debug + Send + Sync + ArrayEqual + JsonEqual { |
| /// Returns the array as `Any` so that it can be downcast to a specific implementation |
| fn as_any(&self) -> &Any; |
| |
| /// Returns a reference-counted pointer to the data of this array |
| fn data(&self) -> ArrayDataRef; |
| |
| /// Returns a borrowed & reference-counted pointer to the data of this array |
| fn data_ref(&self) -> &ArrayDataRef; |
| |
| /// Returns a reference to the data type of this array |
| fn data_type(&self) -> &DataType { |
| self.data_ref().data_type() |
| } |
| |
| /// Returns a zero-copy slice of this array with the indicated offset and length. |
| fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
| make_array(slice_data(self.data(), offset, length)) |
| } |
| |
| /// Returns the length (i.e., number of elements) of this array |
| fn len(&self) -> usize { |
| self.data().len() |
| } |
| |
| /// Returns the offset of this array |
| fn offset(&self) -> usize { |
| self.data().offset() |
| } |
| |
| /// Returns whether the element at index `i` is null |
| fn is_null(&self, i: usize) -> bool { |
| self.data().is_null(self.data().offset() + i) |
| } |
| |
| /// Returns whether the element at index `i` is not null |
| fn is_valid(&self, i: usize) -> bool { |
| self.data().is_valid(self.data().offset() + i) |
| } |
| |
| /// Returns the total number of nulls in this array |
| fn null_count(&self) -> usize { |
| self.data().null_count() |
| } |
| } |
| |
| pub type ArrayRef = Arc<Array>; |
| |
| /// Constructs an array using the input `data`. Returns a reference-counted `Array` |
| /// instance. |
| pub fn make_array(data: ArrayDataRef) -> ArrayRef { |
| // TODO: here data_type() needs to clone the type - maybe add a type tag enum to |
| // avoid the cloning. |
| match data.data_type().clone() { |
| DataType::Boolean => Arc::new(BooleanArray::from(data)) as ArrayRef, |
| DataType::Int8 => Arc::new(Int8Array::from(data)) as ArrayRef, |
| DataType::Int16 => Arc::new(Int16Array::from(data)) as ArrayRef, |
| DataType::Int32 => Arc::new(Int32Array::from(data)) as ArrayRef, |
| DataType::Int64 => Arc::new(Int64Array::from(data)) as ArrayRef, |
| DataType::UInt8 => Arc::new(UInt8Array::from(data)) as ArrayRef, |
| DataType::UInt16 => Arc::new(UInt16Array::from(data)) as ArrayRef, |
| DataType::UInt32 => Arc::new(UInt32Array::from(data)) as ArrayRef, |
| DataType::UInt64 => Arc::new(UInt64Array::from(data)) as ArrayRef, |
| DataType::Float32 => Arc::new(Float32Array::from(data)) as ArrayRef, |
| DataType::Float64 => Arc::new(Float64Array::from(data)) as ArrayRef, |
| DataType::Date32(DateUnit::Day) => Arc::new(Date32Array::from(data)) as ArrayRef, |
| DataType::Date64(DateUnit::Millisecond) => { |
| Arc::new(Date64Array::from(data)) as ArrayRef |
| } |
| DataType::Time32(TimeUnit::Second) => { |
| Arc::new(Time32SecondArray::from(data)) as ArrayRef |
| } |
| DataType::Time32(TimeUnit::Millisecond) => { |
| Arc::new(Time32MillisecondArray::from(data)) as ArrayRef |
| } |
| DataType::Time64(TimeUnit::Microsecond) => { |
| Arc::new(Time64MicrosecondArray::from(data)) as ArrayRef |
| } |
| DataType::Time64(TimeUnit::Nanosecond) => { |
| Arc::new(Time64NanosecondArray::from(data)) as ArrayRef |
| } |
| DataType::Timestamp(TimeUnit::Second) => { |
| Arc::new(TimestampSecondArray::from(data)) as ArrayRef |
| } |
| DataType::Timestamp(TimeUnit::Millisecond) => { |
| Arc::new(TimestampMillisecondArray::from(data)) as ArrayRef |
| } |
| DataType::Timestamp(TimeUnit::Microsecond) => { |
| Arc::new(TimestampMicrosecondArray::from(data)) as ArrayRef |
| } |
| DataType::Timestamp(TimeUnit::Nanosecond) => { |
| Arc::new(TimestampNanosecondArray::from(data)) as ArrayRef |
| } |
| DataType::Utf8 => Arc::new(BinaryArray::from(data)) as ArrayRef, |
| DataType::List(_) => Arc::new(ListArray::from(data)) as ArrayRef, |
| DataType::Struct(_) => Arc::new(StructArray::from(data)) as ArrayRef, |
| dt => panic!("Unexpected data type {:?}", dt), |
| } |
| } |
| |
| fn slice_data(data: ArrayDataRef, mut offset: usize, length: usize) -> ArrayDataRef { |
| assert!((offset + length) <= data.len()); |
| |
| let mut new_data = data.as_ref().clone(); |
| let len = ::std::cmp::min(new_data.len - offset, length); |
| |
| offset += data.offset; |
| new_data.len = len; |
| new_data.offset = offset; |
| |
| // Calculate the new null count based on the offset |
| new_data.null_count = if let Some(bitmap) = new_data.null_bitmap() { |
| let valid_bits = bitmap.bits.data(); |
| len.checked_sub(bit_util::count_set_bits_offset(valid_bits, offset, length)) |
| .unwrap() |
| } else { |
| 0 |
| }; |
| |
| Arc::new(new_data) |
| } |
| |
| /// ---------------------------------------------------------------------------- |
| /// Implementations of different array types |
| |
| struct RawPtrBox<T> { |
| inner: *const T, |
| } |
| |
| impl<T> RawPtrBox<T> { |
| fn new(inner: *const T) -> Self { |
| Self { inner } |
| } |
| |
| fn get(&self) -> *const T { |
| self.inner |
| } |
| } |
| |
| unsafe impl<T> Send for RawPtrBox<T> {} |
| unsafe impl<T> Sync for RawPtrBox<T> {} |
| |
| /// Array whose elements are of primitive types. |
| pub struct PrimitiveArray<T: ArrowPrimitiveType> { |
| data: ArrayDataRef, |
| /// Pointer to the value array. The lifetime of this must be <= to the value buffer |
| /// stored in `data`, so it's safe to store. |
| /// Also note that boolean arrays are bit-packed, so although the underlying pointer |
| /// is of type bool it should be cast back to u8 before being used. |
| /// i.e. `self.raw_values.get() as *const u8` |
| raw_values: RawPtrBox<T::Native>, |
| } |
| |
| /// Common operations for primitive types, including numeric types and boolean type. |
| pub trait PrimitiveArrayOps<T: ArrowPrimitiveType> { |
| fn values(&self) -> Buffer; |
| fn value(&self, i: usize) -> T::Native; |
| } |
| |
| // This is necessary when caller wants to access `PrimitiveArrayOps`'s methods with |
| // `ArrowPrimitiveType`. It doesn't have any implementation as the actual implementations |
| // are delegated to that of `ArrowNumericType` and `BooleanType`. |
| impl<T: ArrowPrimitiveType> PrimitiveArrayOps<T> for PrimitiveArray<T> { |
| default fn values(&self) -> Buffer { |
| unimplemented!() |
| } |
| |
| default fn value(&self, _: usize) -> T::Native { |
| unimplemented!() |
| } |
| } |
| |
| impl<T: ArrowNumericType> PrimitiveArrayOps<T> for PrimitiveArray<T> { |
| fn values(&self) -> Buffer { |
| self.values() |
| } |
| |
| fn value(&self, i: usize) -> T::Native { |
| self.value(i) |
| } |
| } |
| |
| impl PrimitiveArrayOps<BooleanType> for BooleanArray { |
| fn values(&self) -> Buffer { |
| self.values() |
| } |
| |
| fn value(&self, i: usize) -> bool { |
| self.value(i) |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> { |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| fn data(&self) -> ArrayDataRef { |
| self.data.clone() |
| } |
| |
| fn data_ref(&self) -> &ArrayDataRef { |
| &self.data |
| } |
| } |
| |
| /// Implementation for primitive arrays with numeric types. |
| /// Boolean arrays are bit-packed and so implemented separately. |
| impl<T: ArrowNumericType> PrimitiveArray<T> { |
| pub fn new(length: usize, values: Buffer, null_count: usize, offset: usize) -> Self { |
| let array_data = ArrayData::builder(T::get_data_type()) |
| .len(length) |
| .add_buffer(values) |
| .null_count(null_count) |
| .offset(offset) |
| .build(); |
| PrimitiveArray::from(array_data) |
| } |
| |
| /// Returns a `Buffer` holds all the values of this array. |
| /// |
| /// Note this doesn't take account into the offset of this array. |
| pub fn values(&self) -> Buffer { |
| self.data.buffers()[0].clone() |
| } |
| |
| /// Returns the length of this array |
| pub fn len(&self) -> usize { |
| self.data.len() |
| } |
| |
| /// Returns a raw pointer to the values of this array. |
| pub fn raw_values(&self) -> *const T::Native { |
| unsafe { |
| mem::transmute(self.raw_values.get().offset(self.data.offset() as isize)) |
| } |
| } |
| |
| /// Returns the primitive value at index `i`. |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| pub fn value(&self, i: usize) -> T::Native { |
| unsafe { *(self.raw_values().offset(i as isize)) } |
| } |
| |
| /// Returns a slice for the given offset and length |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| pub fn value_slice(&self, offset: usize, len: usize) -> &[T::Native] { |
| let raw = unsafe { |
| std::slice::from_raw_parts(self.raw_values().offset(offset as isize), len) |
| }; |
| &raw[..] |
| } |
| |
| // Returns a new primitive array builder |
| pub fn builder(capacity: usize) -> PrimitiveBuilder<T> { |
| PrimitiveBuilder::<T>::new(capacity) |
| } |
| } |
| |
| impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T> |
| where |
| i64: std::convert::From<T::Native>, |
| { |
| /// Returns value as a chrono `NaiveDateTime`, handling time resolution |
| /// |
| /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned. |
| /// A valid value is expected, thus the user should first check for validity. |
| pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> { |
| let v = i64::from(self.value(i)); |
| match self.data_type() { |
| DataType::Date32(_) => { |
| // convert days into seconds |
| Some(NaiveDateTime::from_timestamp(v as i64 * SECONDS_IN_DAY, 0)) |
| } |
| DataType::Date64(_) => Some(NaiveDateTime::from_timestamp( |
| // extract seconds from milliseconds |
| v / MILLISECONDS, |
| // discard extracted seconds and convert milliseconds to nanoseconds |
| (v % MILLISECONDS * MICROSECONDS) as u32, |
| )), |
| DataType::Time32(_) | DataType::Time64(_) => None, |
| DataType::Timestamp(unit) => match unit { |
| TimeUnit::Second => Some(NaiveDateTime::from_timestamp(v, 0)), |
| TimeUnit::Millisecond => Some(NaiveDateTime::from_timestamp( |
| // extract seconds from milliseconds |
| v / MILLISECONDS, |
| // discard extracted seconds and convert milliseconds to nanoseconds |
| (v % MILLISECONDS * MICROSECONDS) as u32, |
| )), |
| TimeUnit::Microsecond => Some(NaiveDateTime::from_timestamp( |
| // extract seconds from microseconds |
| v / MICROSECONDS, |
| // discard extracted seconds and convert microseconds to nanoseconds |
| (v % MICROSECONDS * MILLISECONDS) as u32, |
| )), |
| TimeUnit::Nanosecond => Some(NaiveDateTime::from_timestamp( |
| // extract seconds from nanoseconds |
| v / NANOSECONDS, |
| // discard extracted seconds |
| (v % NANOSECONDS) as u32, |
| )), |
| }, |
| // interval is not yet fully documented [ARROW-3097] |
| DataType::Interval(_) => None, |
| _ => None, |
| } |
| } |
| |
| /// Returns value as a chrono `NaiveDate` by using `Self::datetime()` |
| /// |
| /// If a data type cannot be converted to `NaiveDate`, a `None` is returned |
| pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> { |
| match self.value_as_datetime(i) { |
| Some(datetime) => Some(datetime.date()), |
| None => None, |
| } |
| } |
| |
| /// Returns a value as a chrono `NaiveTime` |
| /// |
| /// `Date32` and `Date64` return UTC midnight as they do not have time resolution |
| pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> { |
| match self.data_type() { |
| DataType::Time32(unit) => { |
| // safe to immediately cast to u32 as `self.value(i)` is positive i32 |
| let v = i64::from(self.value(i)) as u32; |
| match unit { |
| TimeUnit::Second => { |
| Some(NaiveTime::from_num_seconds_from_midnight(v, 0)) |
| } |
| TimeUnit::Millisecond => { |
| Some(NaiveTime::from_num_seconds_from_midnight( |
| // extract seconds from milliseconds |
| v / MILLISECONDS as u32, |
| // discard extracted seconds and convert milliseconds to |
| // nanoseconds |
| v % MILLISECONDS as u32 * MICROSECONDS as u32, |
| )) |
| } |
| _ => None, |
| } |
| } |
| DataType::Time64(unit) => { |
| let v = i64::from(self.value(i)); |
| match unit { |
| TimeUnit::Microsecond => { |
| Some(NaiveTime::from_num_seconds_from_midnight( |
| // extract seconds from microseconds |
| (v / MICROSECONDS) as u32, |
| // discard extracted seconds and convert microseconds to |
| // nanoseconds |
| (v % MICROSECONDS * MILLISECONDS) as u32, |
| )) |
| } |
| TimeUnit::Nanosecond => { |
| Some(NaiveTime::from_num_seconds_from_midnight( |
| // extract seconds from nanoseconds |
| (v / NANOSECONDS) as u32, |
| // discard extracted seconds |
| (v % NANOSECONDS) as u32, |
| )) |
| } |
| _ => None, |
| } |
| } |
| DataType::Timestamp(_) => match self.value_as_datetime(i) { |
| Some(datetime) => Some(datetime.time()), |
| None => None, |
| }, |
| DataType::Date32(_) | DataType::Date64(_) => { |
| Some(NaiveTime::from_hms(0, 0, 0)) |
| } |
| DataType::Interval(_) => None, |
| _ => None, |
| } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> { |
| default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?; |
| print_long_array(self, f, |array, index, f| { |
| fmt::Debug::fmt(&array.value(index), f) |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| impl<T: ArrowNumericType> fmt::Debug for PrimitiveArray<T> { |
| default fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?; |
| print_long_array(self, f, |array, index, f| { |
| fmt::Debug::fmt(&array.value(index), f) |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| impl<T: ArrowNumericType + ArrowTemporalType> fmt::Debug for PrimitiveArray<T> |
| where |
| i64: std::convert::From<T::Native>, |
| { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "PrimitiveArray<{:?}>\n[\n", T::get_data_type())?; |
| print_long_array(self, f, |array, index, f| match T::get_data_type() { |
| DataType::Date32(_) | DataType::Date64(_) => { |
| match array.value_as_date(index) { |
| Some(date) => write!(f, "{:?}", date), |
| None => write!(f, "null"), |
| } |
| } |
| DataType::Time32(_) | DataType::Time64(_) => { |
| match array.value_as_time(index) { |
| Some(time) => write!(f, "{:?}", time), |
| None => write!(f, "null"), |
| } |
| } |
| DataType::Timestamp(_) => match array.value_as_datetime(index) { |
| Some(datetime) => write!(f, "{:?}", datetime), |
| None => write!(f, "null"), |
| }, |
| _ => write!(f, "null"), |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| /// Specific implementation for Boolean arrays due to bit-packing |
| impl PrimitiveArray<BooleanType> { |
| pub fn new(length: usize, values: Buffer, null_count: usize, offset: usize) -> Self { |
| let array_data = ArrayData::builder(DataType::Boolean) |
| .len(length) |
| .add_buffer(values) |
| .null_count(null_count) |
| .offset(offset) |
| .build(); |
| BooleanArray::from(array_data) |
| } |
| |
| /// Returns a `Buffer` holds all the values of this array. |
| /// |
| /// Note this doesn't take account into the offset of this array. |
| pub fn values(&self) -> Buffer { |
| self.data.buffers()[0].clone() |
| } |
| |
| /// Returns the boolean value at index `i`. |
| pub fn value(&self, i: usize) -> bool { |
| assert!(i < self.data.len()); |
| let offset = i + self.offset(); |
| unsafe { bit_util::get_bit_raw(self.raw_values.get() as *const u8, offset) } |
| } |
| |
| // Returns a new primitive array builder |
| pub fn builder(capacity: usize) -> BooleanBuilder { |
| BooleanBuilder::new(capacity) |
| } |
| } |
| |
| impl fmt::Debug for PrimitiveArray<BooleanType> { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "PrimitiveArray<{:?}>\n[\n", BooleanType::get_data_type())?; |
| print_long_array(self, f, |array, index, f| { |
| fmt::Debug::fmt(&array.value(index), f) |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| // TODO: the macro is needed here because we'd get "conflicting implementations" error |
| // otherwise with both `From<Vec<T::Native>>` and `From<Vec<Option<T::Native>>>`. |
| // We should revisit this in future. |
| macro_rules! def_numeric_from_vec { |
| ( $ty:ident, $native_ty:ident, $ty_id:expr ) => { |
| impl From<Vec<$native_ty>> for PrimitiveArray<$ty> { |
| fn from(data: Vec<$native_ty>) -> Self { |
| let array_data = ArrayData::builder($ty_id) |
| .len(data.len()) |
| .add_buffer(Buffer::from(data.to_byte_slice())) |
| .build(); |
| PrimitiveArray::from(array_data) |
| } |
| } |
| |
| // Constructs a primitive array from a vector. Should only be used for testing. |
| impl From<Vec<Option<$native_ty>>> for PrimitiveArray<$ty> { |
| fn from(data: Vec<Option<$native_ty>>) -> Self { |
| let data_len = data.len(); |
| let num_bytes = bit_util::ceil(data_len, 8); |
| let mut null_buf = |
| MutableBuffer::new(num_bytes).with_bitset(num_bytes, false); |
| let mut val_buf = |
| MutableBuffer::new(data_len * mem::size_of::<$native_ty>()); |
| |
| { |
| let null = vec![0; mem::size_of::<$native_ty>()]; |
| let null_slice = null_buf.data_mut(); |
| for (i, v) in data.iter().enumerate() { |
| if let Some(n) = v { |
| bit_util::set_bit(null_slice, i); |
| // unwrap() in the following should be safe here since we've |
| // made sure enough space is allocated for the values. |
| val_buf.write(&n.to_byte_slice()).unwrap(); |
| } else { |
| val_buf.write(&null).unwrap(); |
| } |
| } |
| } |
| |
| let array_data = ArrayData::builder($ty_id) |
| .len(data_len) |
| .add_buffer(val_buf.freeze()) |
| .null_bit_buffer(null_buf.freeze()) |
| .build(); |
| PrimitiveArray::from(array_data) |
| } |
| } |
| }; |
| } |
| |
| def_numeric_from_vec!(Int8Type, i8, DataType::Int8); |
| def_numeric_from_vec!(Int16Type, i16, DataType::Int16); |
| def_numeric_from_vec!(Int32Type, i32, DataType::Int32); |
| def_numeric_from_vec!(Int64Type, i64, DataType::Int64); |
| def_numeric_from_vec!(UInt8Type, u8, DataType::UInt8); |
| def_numeric_from_vec!(UInt16Type, u16, DataType::UInt16); |
| def_numeric_from_vec!(UInt32Type, u32, DataType::UInt32); |
| def_numeric_from_vec!(UInt64Type, u64, DataType::UInt64); |
| def_numeric_from_vec!(Float32Type, f32, DataType::Float32); |
| def_numeric_from_vec!(Float64Type, f64, DataType::Float64); |
| // TODO: add temporal arrays |
| |
| def_numeric_from_vec!( |
| TimestampSecondType, |
| i64, |
| DataType::Timestamp(TimeUnit::Second) |
| ); |
| def_numeric_from_vec!( |
| TimestampMillisecondType, |
| i64, |
| DataType::Timestamp(TimeUnit::Millisecond) |
| ); |
| def_numeric_from_vec!( |
| TimestampMicrosecondType, |
| i64, |
| DataType::Timestamp(TimeUnit::Microsecond) |
| ); |
| def_numeric_from_vec!( |
| TimestampNanosecondType, |
| i64, |
| DataType::Timestamp(TimeUnit::Nanosecond) |
| ); |
| def_numeric_from_vec!(Date32Type, i32, DataType::Date32(DateUnit::Day)); |
| def_numeric_from_vec!(Date64Type, i64, DataType::Date64(DateUnit::Millisecond)); |
| def_numeric_from_vec!(Time32SecondType, i32, DataType::Time32(TimeUnit::Second)); |
| def_numeric_from_vec!( |
| Time32MillisecondType, |
| i32, |
| DataType::Time32(TimeUnit::Millisecond) |
| ); |
| def_numeric_from_vec!( |
| Time64MicrosecondType, |
| i64, |
| DataType::Time64(TimeUnit::Microsecond) |
| ); |
| def_numeric_from_vec!( |
| Time64NanosecondType, |
| i64, |
| DataType::Time64(TimeUnit::Nanosecond) |
| ); |
| |
| /// Constructs a boolean array from a vector. Should only be used for testing. |
| impl From<Vec<bool>> for BooleanArray { |
| fn from(data: Vec<bool>) -> Self { |
| let num_byte = bit_util::ceil(data.len(), 8); |
| let mut mut_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false); |
| { |
| let mut_slice = mut_buf.data_mut(); |
| for (i, b) in data.iter().enumerate() { |
| if *b { |
| bit_util::set_bit(mut_slice, i); |
| } |
| } |
| } |
| let array_data = ArrayData::builder(DataType::Boolean) |
| .len(data.len()) |
| .add_buffer(mut_buf.freeze()) |
| .build(); |
| BooleanArray::from(array_data) |
| } |
| } |
| |
| impl From<Vec<Option<bool>>> for BooleanArray { |
| fn from(data: Vec<Option<bool>>) -> Self { |
| let data_len = data.len(); |
| let num_byte = bit_util::ceil(data_len, 8); |
| let mut null_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false); |
| let mut val_buf = MutableBuffer::new(num_byte).with_bitset(num_byte, false); |
| |
| { |
| let null_slice = null_buf.data_mut(); |
| let val_slice = val_buf.data_mut(); |
| |
| for (i, v) in data.iter().enumerate() { |
| if let Some(b) = v { |
| bit_util::set_bit(null_slice, i); |
| if *b { |
| bit_util::set_bit(val_slice, i); |
| } |
| } |
| } |
| } |
| |
| let array_data = ArrayData::builder(DataType::Boolean) |
| .len(data_len) |
| .add_buffer(val_buf.freeze()) |
| .null_bit_buffer(null_buf.freeze()) |
| .build(); |
| BooleanArray::from(array_data) |
| } |
| } |
| |
| /// Constructs a `PrimitiveArray` from an array data reference. |
| impl<T: ArrowPrimitiveType> From<ArrayDataRef> for PrimitiveArray<T> { |
| default fn from(data: ArrayDataRef) -> Self { |
| assert_eq!( |
| data.buffers().len(), |
| 1, |
| "PrimitiveArray data should contain a single buffer only (values buffer)" |
| ); |
| let raw_values = data.buffers()[0].raw_data(); |
| assert!( |
| memory::is_aligned::<u8>(raw_values, mem::align_of::<T::Native>()), |
| "memory is not aligned" |
| ); |
| Self { |
| data, |
| raw_values: RawPtrBox::new(raw_values as *const T::Native), |
| } |
| } |
| } |
| |
| /// Common operations for List types, currently `ListArray` and `BinaryArray`. |
| pub trait ListArrayOps { |
| fn value_offset_at(&self, i: usize) -> i32; |
| } |
| |
| impl ListArrayOps for ListArray { |
| fn value_offset_at(&self, i: usize) -> i32 { |
| self.value_offset_at(i) |
| } |
| } |
| |
| impl ListArrayOps for BinaryArray { |
| fn value_offset_at(&self, i: usize) -> i32 { |
| self.value_offset_at(i) |
| } |
| } |
| |
| /// A list array where each element is a variable-sized sequence of values with the same |
| /// type. |
| pub struct ListArray { |
| data: ArrayDataRef, |
| values: ArrayRef, |
| value_offsets: RawPtrBox<i32>, |
| } |
| |
| impl ListArray { |
| /// Returns an reference to the values of this list. |
| pub fn values(&self) -> ArrayRef { |
| self.values.clone() |
| } |
| |
| /// Returns a clone of the value type of this list. |
| pub fn value_type(&self) -> DataType { |
| self.values.data().data_type().clone() |
| } |
| |
| /// Returns ith value of this list array. |
| pub fn value(&self, i: usize) -> ArrayRef { |
| self.values |
| .slice(self.value_offset(i) as usize, self.value_length(i) as usize) |
| } |
| |
| /// Returns the offset for value at index `i`. |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| #[inline] |
| pub fn value_offset(&self, i: usize) -> i32 { |
| self.value_offset_at(self.data.offset() + i) |
| } |
| |
| /// Returns the length for value at index `i`. |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| #[inline] |
| pub fn value_length(&self, mut i: usize) -> i32 { |
| i += self.data.offset(); |
| self.value_offset_at(i + 1) - self.value_offset_at(i) |
| } |
| |
| #[inline] |
| fn value_offset_at(&self, i: usize) -> i32 { |
| unsafe { *self.value_offsets.get().offset(i as isize) } |
| } |
| } |
| |
| /// Constructs a `ListArray` from an array data reference. |
| impl From<ArrayDataRef> for ListArray { |
| fn from(data: ArrayDataRef) -> Self { |
| assert_eq!( |
| data.buffers().len(), |
| 1, |
| "ListArray data should contain a single buffer only (value offsets)" |
| ); |
| assert_eq!( |
| data.child_data().len(), |
| 1, |
| "ListArray should contain a single child array (values array)" |
| ); |
| let values = make_array(data.child_data()[0].clone()); |
| let raw_value_offsets = data.buffers()[0].raw_data(); |
| assert!( |
| memory::is_aligned(raw_value_offsets, mem::align_of::<i32>()), |
| "memory is not aligned" |
| ); |
| let value_offsets = raw_value_offsets as *const i32; |
| unsafe { |
| assert_eq!(*value_offsets.offset(0), 0, "offsets do not start at zero"); |
| } |
| Self { |
| data: data.clone(), |
| values, |
| value_offsets: RawPtrBox::new(value_offsets), |
| } |
| } |
| } |
| |
| impl Array for ListArray { |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| fn data(&self) -> ArrayDataRef { |
| self.data.clone() |
| } |
| |
| fn data_ref(&self) -> &ArrayDataRef { |
| &self.data |
| } |
| } |
| |
| // Helper function for printing potentially long arrays. |
| fn print_long_array<A, F>(array: &A, f: &mut fmt::Formatter, print_item: F) -> fmt::Result |
| where |
| A: Array, |
| F: Fn(&A, usize, &mut fmt::Formatter) -> fmt::Result, |
| { |
| for i in 0..std::cmp::min(10, array.len()) { |
| if array.is_null(i) { |
| write!(f, " null,\n")?; |
| } else { |
| write!(f, " ")?; |
| print_item(&array, i, f)?; |
| write!(f, ",\n")?; |
| } |
| } |
| if array.len() > 10 { |
| if array.len() > 20 { |
| write!(f, " ...{} elements...,\n", array.len() - 20)?; |
| } |
| for i in array.len() - 10..array.len() { |
| if array.is_null(i) { |
| write!(f, " null,\n")?; |
| } else { |
| write!(f, " ")?; |
| print_item(&array, i, f)?; |
| write!(f, ",\n")?; |
| } |
| } |
| } |
| Ok(()) |
| } |
| |
| impl fmt::Debug for ListArray { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "ListArray\n[\n")?; |
| print_long_array(self, f, |array, index, f| { |
| fmt::Debug::fmt(&array.value(index), f) |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| /// A special type of `ListArray` whose elements are binaries. |
| pub struct BinaryArray { |
| data: ArrayDataRef, |
| value_offsets: RawPtrBox<i32>, |
| value_data: RawPtrBox<u8>, |
| } |
| |
| impl BinaryArray { |
| /// Returns the element at index `i` as a byte slice. |
| pub fn value(&self, i: usize) -> &[u8] { |
| assert!(i < self.data.len(), "BinaryArray out of bounds access"); |
| let offset = i.checked_add(self.data.offset()).unwrap(); |
| unsafe { |
| let pos = self.value_offset_at(offset); |
| ::std::slice::from_raw_parts( |
| self.value_data.get().offset(pos as isize), |
| (self.value_offset_at(offset + 1) - pos) as usize, |
| ) |
| } |
| } |
| |
| /// Returns the element at index `i` as a string. |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| pub fn get_string(&self, i: usize) -> String { |
| let slice = self.value(i); |
| unsafe { String::from_utf8_unchecked(Vec::from(slice)) } |
| } |
| |
| /// Returns the offset for the element at index `i`. |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| #[inline] |
| pub fn value_offset(&self, i: usize) -> i32 { |
| self.value_offset_at(self.data.offset() + i) |
| } |
| |
| /// Returns the length for the element at index `i`. |
| /// |
| /// Note this doesn't do any bound checking, for performance reason. |
| #[inline] |
| pub fn value_length(&self, mut i: usize) -> i32 { |
| i += self.data.offset(); |
| self.value_offset_at(i + 1) - self.value_offset_at(i) |
| } |
| |
| /// Returns a clone of the value offset buffer |
| pub fn value_offsets(&self) -> Buffer { |
| self.data.buffers()[0].clone() |
| } |
| |
| /// Returns a clone of the value data buffer |
| pub fn value_data(&self) -> Buffer { |
| self.data.buffers()[1].clone() |
| } |
| |
| #[inline] |
| fn value_offset_at(&self, i: usize) -> i32 { |
| unsafe { *self.value_offsets.get().offset(i as isize) } |
| } |
| } |
| |
| impl From<ArrayDataRef> for BinaryArray { |
| fn from(data: ArrayDataRef) -> Self { |
| assert_eq!( |
| data.buffers().len(), |
| 2, |
| "BinaryArray data should contain 2 buffers only (offsets and values)" |
| ); |
| let raw_value_offsets = data.buffers()[0].raw_data(); |
| assert!( |
| memory::is_aligned(raw_value_offsets, mem::align_of::<i32>()), |
| "memory is not aligned" |
| ); |
| let value_data = data.buffers()[1].raw_data(); |
| Self { |
| data: data.clone(), |
| value_offsets: RawPtrBox::new(raw_value_offsets as *const i32), |
| value_data: RawPtrBox::new(value_data), |
| } |
| } |
| } |
| |
| impl<'a> From<Vec<&'a str>> for BinaryArray { |
| fn from(v: Vec<&'a str>) -> Self { |
| let mut offsets = Vec::with_capacity(v.len() + 1); |
| let mut values = Vec::new(); |
| let mut length_so_far = 0; |
| offsets.push(length_so_far); |
| for s in &v { |
| length_so_far += s.len() as i32; |
| offsets.push(length_so_far as i32); |
| values.extend_from_slice(s.as_bytes()); |
| } |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(v.len()) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| BinaryArray::from(array_data) |
| } |
| } |
| |
| impl From<Vec<&[u8]>> for BinaryArray { |
| fn from(v: Vec<&[u8]>) -> Self { |
| let mut offsets = Vec::with_capacity(v.len() + 1); |
| let mut values = Vec::new(); |
| let mut length_so_far = 0; |
| offsets.push(length_so_far); |
| for s in &v { |
| length_so_far += s.len() as i32; |
| offsets.push(length_so_far as i32); |
| values.extend_from_slice(s); |
| } |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(v.len()) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| BinaryArray::from(array_data) |
| } |
| } |
| |
| impl<'a> TryFrom<Vec<Option<&'a str>>> for BinaryArray { |
| type Error = ArrowError; |
| |
| fn try_from(v: Vec<Option<&'a str>>) -> Result<Self> { |
| let mut builder = BinaryBuilder::new(v.len()); |
| for val in v { |
| if let Some(s) = val { |
| builder.append_string(s)?; |
| } else { |
| builder.append(false)?; |
| } |
| } |
| Ok(builder.finish()) |
| } |
| } |
| |
| /// Creates a `BinaryArray` from `List<u8>` array |
| impl From<ListArray> for BinaryArray { |
| fn from(v: ListArray) -> Self { |
| assert_eq!( |
| v.data().child_data()[0].child_data().len(), |
| 0, |
| "BinaryArray can only be created from list array of u8 values \ |
| (i.e. List<PrimitiveArray<u8>>)." |
| ); |
| assert_eq!( |
| v.data().child_data()[0].data_type(), |
| &DataType::UInt8, |
| "BinaryArray can only be created from List<u8> arrays, mismatched data types." |
| ); |
| |
| let mut builder = ArrayData::builder(DataType::Utf8) |
| .len(v.len()) |
| .add_buffer(v.data().buffers()[0].clone()) |
| .add_buffer(v.data().child_data()[0].buffers()[0].clone()); |
| if let Some(bitmap) = v.data().null_bitmap() { |
| builder = builder |
| .null_count(v.data().null_count()) |
| .null_bit_buffer(bitmap.bits.clone()) |
| } |
| |
| let data = builder.build(); |
| Self::from(data) |
| } |
| } |
| |
| impl fmt::Debug for BinaryArray { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "BinaryArray\n[\n")?; |
| print_long_array(self, f, |array, index, f| { |
| fmt::Debug::fmt(&array.value(index), f) |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| impl Array for BinaryArray { |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| fn data(&self) -> ArrayDataRef { |
| self.data.clone() |
| } |
| |
| fn data_ref(&self) -> &ArrayDataRef { |
| &self.data |
| } |
| } |
| |
| /// A nested array type where each child (called *field*) is represented by a separate |
| /// array. |
| pub struct StructArray { |
| data: ArrayDataRef, |
| pub(crate) boxed_fields: Vec<ArrayRef>, |
| } |
| |
| impl StructArray { |
| /// Returns the field at `pos`. |
| pub fn column(&self, pos: usize) -> &ArrayRef { |
| &self.boxed_fields[pos] |
| } |
| |
| /// Return the number of fields in this struct array |
| pub fn num_columns(&self) -> usize { |
| self.boxed_fields.len() |
| } |
| |
| /// Returns the fields of the struct array |
| pub fn columns(&self) -> Vec<&ArrayRef> { |
| self.boxed_fields.iter().collect() |
| } |
| |
| /// Returns child array refs of the struct array |
| pub fn columns_ref(&self) -> Vec<ArrayRef> { |
| self.boxed_fields.clone() |
| } |
| |
| /// Return field names in this struct array |
| pub fn column_names(&self) -> Vec<&str> { |
| match self.data.data_type() { |
| Struct(fields) => fields |
| .iter() |
| .map(|f| f.name().as_str()) |
| .collect::<Vec<&str>>(), |
| _ => unreachable!("Struct array's data type is not struct!"), |
| } |
| } |
| |
| /// Return child array whose field name equals to column_name |
| pub fn column_by_name(&self, column_name: &str) -> Option<&ArrayRef> { |
| self.column_names() |
| .iter() |
| .position(|c| c == &column_name) |
| .map(|pos| self.column(pos)) |
| } |
| } |
| |
| impl From<ArrayDataRef> for StructArray { |
| fn from(data: ArrayDataRef) -> Self { |
| let mut boxed_fields = vec![]; |
| for cd in data.child_data() { |
| let child_data = if data.offset != 0 || data.len != cd.len { |
| slice_data(cd.clone(), data.offset, data.len) |
| } else { |
| cd.clone() |
| }; |
| boxed_fields.push(make_array(child_data)); |
| } |
| Self { data, boxed_fields } |
| } |
| } |
| |
| impl Array for StructArray { |
| fn as_any(&self) -> &Any { |
| self |
| } |
| |
| fn data(&self) -> ArrayDataRef { |
| self.data.clone() |
| } |
| |
| fn data_ref(&self) -> &ArrayDataRef { |
| &self.data |
| } |
| |
| /// Returns the length (i.e., number of elements) of this array |
| fn len(&self) -> usize { |
| self.data().len() |
| } |
| } |
| |
| impl From<Vec<(Field, ArrayRef)>> for StructArray { |
| fn from(v: Vec<(Field, ArrayRef)>) -> Self { |
| let (field_types, field_values): (Vec<_>, Vec<_>) = v.into_iter().unzip(); |
| |
| // Check the length of the child arrays |
| let length = field_values[0].len(); |
| for i in 1..field_values.len() { |
| assert_eq!( |
| length, |
| field_values[i].len(), |
| "all child arrays of a StructArray must have the same length" |
| ); |
| assert_eq!( |
| field_types[i].data_type(), |
| field_values[i].data().data_type(), |
| "the field data types must match the array data in a StructArray" |
| ) |
| } |
| |
| let data = ArrayData::builder(DataType::Struct(field_types)) |
| .child_data(field_values.into_iter().map(|a| a.data()).collect()) |
| .len(length) |
| .build(); |
| Self::from(data) |
| } |
| } |
| |
| impl fmt::Debug for StructArray { |
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| write!(f, "StructArray\n[\n")?; |
| for (child_index, name) in self.column_names().iter().enumerate() { |
| let column = self.column(child_index); |
| write!( |
| f, |
| "-- child {}: \"{}\" ({:?})\n", |
| child_index, |
| name, |
| column.data_type() |
| )?; |
| fmt::Debug::fmt(column, f)?; |
| write!(f, "\n")?; |
| } |
| write!(f, "]") |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| use std::sync::Arc; |
| use std::thread; |
| |
| use crate::buffer::Buffer; |
| use crate::datatypes::{DataType, Field}; |
| use crate::memory; |
| |
| #[test] |
| fn test_primitive_array_from_vec() { |
| let buf = Buffer::from(&[0, 1, 2, 3, 4].to_byte_slice()); |
| let buf2 = buf.clone(); |
| let arr = Int32Array::new(5, buf, 0, 0); |
| let slice = unsafe { ::std::slice::from_raw_parts(arr.raw_values(), 5) }; |
| assert_eq!(buf2, arr.values()); |
| assert_eq!(&[0, 1, 2, 3, 4], slice); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..5 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i32, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_from_vec_option() { |
| // Test building a primitive array with null values |
| let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(2, arr.null_count()); |
| for i in 0..5 { |
| if i % 2 == 0 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i32, arr.value(i)); |
| } else { |
| assert!(arr.is_null(i)); |
| assert!(!arr.is_valid(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_date64_array_from_vec_option() { |
| // Test building a primitive array with null values |
| // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions |
| // work |
| let arr: PrimitiveArray<Date64Type> = |
| vec![Some(1550902545147), None, Some(1550902545147)].into(); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| for i in 0..3 { |
| if i % 2 == 0 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(1550902545147, arr.value(i)); |
| // roundtrip to and from datetime |
| assert_eq!( |
| 1550902545147, |
| arr.value_as_datetime(i).unwrap().timestamp_millis() |
| ); |
| } else { |
| assert!(arr.is_null(i)); |
| assert!(!arr.is_valid(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_time32_millisecond_array_from_vec() { |
| // 1: 00:00:00.001 |
| // 37800005: 10:30:00.005 |
| // 86399210: 23:59:59.210 |
| let arr: PrimitiveArray<Time32MillisecondType> = |
| vec![1, 37_800_005, 86_399_210].into(); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"]; |
| for i in 0..3 { |
| // check that we can't create dates or datetimes from time instances |
| assert_eq!(None, arr.value_as_datetime(i)); |
| assert_eq!(None, arr.value_as_date(i)); |
| let time = arr.value_as_time(i).unwrap(); |
| assert_eq!(formatted[i], time.format("%H:%M:%S%.3f").to_string()); |
| } |
| } |
| |
| #[test] |
| fn test_time64_nanosecond_array_from_vec() { |
| // Test building a primitive array with null values |
| // we use Int32 and Int64 as a backing array, so all Int32 and Int64 convensions |
| // work |
| |
| // 1e6: 00:00:00.001 |
| // 37800005e6: 10:30:00.005 |
| // 86399210e6: 23:59:59.210 |
| let arr: PrimitiveArray<Time64NanosecondType> = |
| vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into(); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"]; |
| for i in 0..3 { |
| // check that we can't create dates or datetimes from time instances |
| assert_eq!(None, arr.value_as_datetime(i)); |
| assert_eq!(None, arr.value_as_date(i)); |
| let time = arr.value_as_time(i).unwrap(); |
| dbg!(time); |
| assert_eq!(formatted[i], time.format("%H:%M:%S%.3f").to_string()); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_slice() { |
| let arr = Int32Array::from(vec![ |
| Some(0), |
| None, |
| Some(2), |
| None, |
| Some(4), |
| Some(5), |
| Some(6), |
| None, |
| None, |
| ]); |
| assert_eq!(9, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(4, arr.null_count()); |
| |
| let arr2 = arr.slice(2, 5); |
| assert_eq!(5, arr2.len()); |
| assert_eq!(2, arr2.offset()); |
| assert_eq!(1, arr2.null_count()); |
| |
| for i in 0..arr2.len() { |
| assert_eq!(i == 1, arr2.is_null(i)); |
| assert_eq!(i != 1, arr2.is_valid(i)); |
| } |
| |
| let arr3 = arr2.slice(2, 3); |
| assert_eq!(3, arr3.len()); |
| assert_eq!(4, arr3.offset()); |
| assert_eq!(0, arr3.null_count()); |
| |
| let int_arr = arr3.as_any().downcast_ref::<Int32Array>().unwrap(); |
| assert_eq!(4, int_arr.value(0)); |
| assert_eq!(5, int_arr.value(1)); |
| assert_eq!(6, int_arr.value(2)); |
| } |
| |
| #[test] |
| fn test_value_slice_no_bounds_check() { |
| let arr = Int32Array::from(vec![2, 3, 4]); |
| let _slice = arr.value_slice(0, 4); |
| } |
| |
| #[test] |
| fn test_int32_fmt_debug() { |
| let buf = Buffer::from(&[0, 1, 2, 3, 4].to_byte_slice()); |
| let arr = Int32Array::new(5, buf, 0, 0); |
| assert_eq!( |
| "PrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_int32_with_null_fmt_debug() { |
| let mut builder = Int32Array::builder(3); |
| builder.append_slice(&[0, 1]).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_slice(&[3, 4]).unwrap(); |
| let arr = builder.finish(); |
| assert_eq!( |
| "PrimitiveArray<Int32>\n[\n 0,\n 1,\n null,\n 3,\n 4,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_boolean_fmt_debug() { |
| let buf = Buffer::from(&[true, false, false].to_byte_slice()); |
| let arr = BooleanArray::new(3, buf, 0, 0); |
| assert_eq!( |
| "PrimitiveArray<Boolean>\n[\n true,\n false,\n false,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_boolean_with_null_fmt_debug() { |
| let mut builder = BooleanArray::builder(3); |
| builder.append_value(true).unwrap(); |
| builder.append_null().unwrap(); |
| builder.append_value(false).unwrap(); |
| let arr = builder.finish(); |
| assert_eq!( |
| "PrimitiveArray<Boolean>\n[\n true,\n null,\n false,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_timestamp_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| vec![1546214400000, 1546214400000].into(); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_date32_fmt_debug() { |
| let arr: PrimitiveArray<Date32Type> = vec![12356, 13548].into(); |
| assert_eq!( |
| "PrimitiveArray<Date32(Day)>\n[\n 2003-10-31,\n 2007-02-04,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_time32second_fmt_debug() { |
| let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into(); |
| assert_eq!( |
| "PrimitiveArray<Time32(Second)>\n[\n 02:00:01,\n 16:40:54,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_primitive_array_builder() { |
| // Test building an primitive array with ArrayData builder and offset |
| let buf = Buffer::from(&[0, 1, 2, 3, 4].to_byte_slice()); |
| let buf2 = buf.clone(); |
| let data = ArrayData::builder(DataType::Int32) |
| .len(5) |
| .offset(2) |
| .add_buffer(buf) |
| .build(); |
| let arr = Int32Array::from(data); |
| assert_eq!(buf2, arr.values()); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..3 { |
| assert_eq!((i + 2) as i32, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ |
| (values buffer)")] |
| fn test_primitive_array_invalid_buffer_len() { |
| let data = ArrayData::builder(DataType::Int32).len(5).build(); |
| Int32Array::from(data); |
| } |
| |
| #[test] |
| fn test_boolean_array_new() { |
| // 00000010 01001000 |
| let buf = Buffer::from([72_u8, 2_u8]); |
| let buf2 = buf.clone(); |
| let arr = BooleanArray::new(10, buf, 0, 0); |
| assert_eq!(buf2, arr.values()); |
| assert_eq!(10, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..10 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i == 3 || i == 6 || i == 9, arr.value(i), "failed at {}", i) |
| } |
| } |
| |
| #[test] |
| fn test_boolean_array_from_vec() { |
| let buf = Buffer::from([10_u8]); |
| let arr = BooleanArray::from(vec![false, true, false, true]); |
| assert_eq!(buf, arr.values()); |
| assert_eq!(4, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..4 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i) |
| } |
| } |
| |
| #[test] |
| fn test_boolean_array_from_vec_option() { |
| let buf = Buffer::from([10_u8]); |
| let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]); |
| assert_eq!(buf, arr.values()); |
| assert_eq!(4, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| for i in 0..4 { |
| if i == 2 { |
| assert!(arr.is_null(i)); |
| assert!(!arr.is_valid(i)); |
| } else { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i) |
| } |
| } |
| } |
| |
| #[test] |
| fn test_boolean_array_builder() { |
| // Test building a boolean array with ArrayData builder and offset |
| // 000011011 |
| let buf = Buffer::from([27_u8]); |
| let buf2 = buf.clone(); |
| let data = ArrayData::builder(DataType::Boolean) |
| .len(5) |
| .offset(2) |
| .add_buffer(buf) |
| .build(); |
| let arr = BooleanArray::from(data); |
| assert_eq!(buf2, arr.values()); |
| assert_eq!(5, arr.len()); |
| assert_eq!(2, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..3 { |
| assert_eq!(i != 0, arr.value(i), "failed at {}", i); |
| } |
| } |
| |
| #[test] |
| #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ |
| (values buffer)")] |
| fn test_boolean_array_invalid_buffer_len() { |
| let data = ArrayData::builder(DataType::Boolean).len(5).build(); |
| BooleanArray::from(data); |
| } |
| |
| #[test] |
| fn test_list_array() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) |
| .build(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1, 2], [3, 4, 5], [6, 7]] |
| let value_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice()); |
| |
| // Construct a list array from the above two |
| let list_data_type = DataType::List(Box::new(DataType::Int32)); |
| let list_data = ArrayData::builder(list_data_type.clone()) |
| .len(3) |
| .add_buffer(value_offsets.clone()) |
| .add_child_data(value_data.clone()) |
| .build(); |
| let list_array = ListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(3, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offset(2)); |
| assert_eq!(2, list_array.value_length(2)); |
| for i in 0..3 { |
| assert!(list_array.is_valid(i)); |
| assert!(!list_array.is_null(i)); |
| } |
| |
| // Now test with a non-zero offset |
| let list_data = ArrayData::builder(list_data_type) |
| .len(3) |
| .offset(1) |
| .add_buffer(value_offsets) |
| .add_child_data(value_data.clone()) |
| .build(); |
| let list_array = ListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(3, list_array.len()); |
| assert_eq!(0, list_array.null_count()); |
| assert_eq!(6, list_array.value_offset(1)); |
| assert_eq!(2, list_array.value_length(1)); |
| } |
| |
| #[test] |
| |
| fn test_list_array_slice() { |
| // Construct a value array |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(10) |
| .add_buffer(Buffer::from( |
| &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9].to_byte_slice(), |
| )) |
| .build(); |
| |
| // Construct a buffer for value offsets, for the nested array: |
| // [[0, 1], null, null, [2, 3], [4, 5], null, [6, 7, 8], null, [9]] |
| let value_offsets = |
| Buffer::from(&[0, 2, 2, 2, 4, 6, 6, 9, 9, 10].to_byte_slice()); |
| // 01011001 00000001 |
| let mut null_bits: [u8; 2] = [0; 2]; |
| bit_util::set_bit(&mut null_bits, 0); |
| bit_util::set_bit(&mut null_bits, 3); |
| bit_util::set_bit(&mut null_bits, 4); |
| bit_util::set_bit(&mut null_bits, 6); |
| bit_util::set_bit(&mut null_bits, 8); |
| |
| // Construct a list array from the above two |
| let list_data_type = DataType::List(Box::new(DataType::Int32)); |
| let list_data = ArrayData::builder(list_data_type.clone()) |
| .len(9) |
| .add_buffer(value_offsets.clone()) |
| .add_child_data(value_data.clone()) |
| .null_bit_buffer(Buffer::from(null_bits)) |
| .build(); |
| let list_array = ListArray::from(list_data); |
| |
| let values = list_array.values(); |
| assert_eq!(value_data, values.data()); |
| assert_eq!(DataType::Int32, list_array.value_type()); |
| assert_eq!(9, list_array.len()); |
| assert_eq!(4, list_array.null_count()); |
| assert_eq!(2, list_array.value_offset(3)); |
| assert_eq!(2, list_array.value_length(3)); |
| |
| let sliced_array = list_array.slice(1, 6); |
| assert_eq!(6, sliced_array.len()); |
| assert_eq!(1, sliced_array.offset()); |
| assert_eq!(3, sliced_array.null_count()); |
| |
| for i in 0..sliced_array.len() { |
| if bit_util::get_bit(&null_bits, sliced_array.offset() + i) { |
| assert!(sliced_array.is_valid(i)); |
| } else { |
| assert!(sliced_array.is_null(i)); |
| } |
| } |
| |
| // Check offset and length for each non-null value. |
| let sliced_list_array = |
| sliced_array.as_any().downcast_ref::<ListArray>().unwrap(); |
| assert_eq!(2, sliced_list_array.value_offset(2)); |
| assert_eq!(2, sliced_list_array.value_length(2)); |
| assert_eq!(4, sliced_list_array.value_offset(3)); |
| assert_eq!(2, sliced_list_array.value_length(3)); |
| assert_eq!(6, sliced_list_array.value_offset(5)); |
| assert_eq!(3, sliced_list_array.value_length(5)); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "ListArray data should contain a single buffer only (value offsets)" |
| )] |
| fn test_list_array_invalid_buffer_len() { |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) |
| .build(); |
| let list_data_type = DataType::List(Box::new(DataType::Int32)); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(3) |
| .add_child_data(value_data) |
| .build(); |
| ListArray::from(list_data); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "ListArray should contain a single child array (values array)" |
| )] |
| fn test_list_array_invalid_child_array_len() { |
| let value_offsets = Buffer::from(&[0, 2, 5, 7].to_byte_slice()); |
| let list_data_type = DataType::List(Box::new(DataType::Int32)); |
| let list_data = ArrayData::builder(list_data_type) |
| .len(3) |
| .add_buffer(value_offsets) |
| .build(); |
| ListArray::from(list_data); |
| } |
| |
| #[test] |
| #[should_panic(expected = "offsets do not start at zero")] |
| fn test_list_array_invalid_value_offset_start() { |
| let value_data = ArrayData::builder(DataType::Int32) |
| .len(8) |
| .add_buffer(Buffer::from(&[0, 1, 2, 3, 4, 5, 6, 7].to_byte_slice())) |
| .build(); |
| |
| let value_offsets = Buffer::from(&[2, 2, 5, 7].to_byte_slice()); |
| |
| let list_data_type = DataType::List(Box::new(DataType::Int32)); |
| let list_data = ArrayData::builder(list_data_type.clone()) |
| .len(3) |
| .add_buffer(value_offsets.clone()) |
| .add_child_data(value_data.clone()) |
| .build(); |
| ListArray::from(list_data); |
| } |
| |
| #[test] |
| fn test_binary_array() { |
| let values: [u8; 12] = [ |
| b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
| ]; |
| let offsets: [i32; 4] = [0, 5, 5, 12]; |
| |
| // Array data: ["hello", "", "parquet"] |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(3) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| let binary_array = BinaryArray::from(array_data); |
| assert_eq!(3, binary_array.len()); |
| assert_eq!(0, binary_array.null_count()); |
| assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); |
| assert_eq!("hello", binary_array.get_string(0)); |
| assert_eq!([] as [u8; 0], binary_array.value(1)); |
| assert_eq!("", binary_array.get_string(1)); |
| assert_eq!( |
| [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
| binary_array.value(2) |
| ); |
| assert_eq!("parquet", binary_array.get_string(2)); |
| assert_eq!(5, binary_array.value_offset(2)); |
| assert_eq!(7, binary_array.value_length(2)); |
| for i in 0..3 { |
| assert!(binary_array.is_valid(i)); |
| assert!(!binary_array.is_null(i)); |
| } |
| |
| // Test binary array with offset |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(4) |
| .offset(1) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| let binary_array = BinaryArray::from(array_data); |
| assert_eq!( |
| [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
| binary_array.value(1) |
| ); |
| assert_eq!("parquet", binary_array.get_string(1)); |
| assert_eq!(5, binary_array.value_offset(0)); |
| assert_eq!(0, binary_array.value_length(0)); |
| assert_eq!(5, binary_array.value_offset(1)); |
| assert_eq!(7, binary_array.value_length(1)); |
| } |
| |
| #[test] |
| fn test_binary_array_from_list_array() { |
| let values: [u8; 12] = [ |
| b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
| ]; |
| let values_data = ArrayData::builder(DataType::UInt8) |
| .len(12) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| let offsets: [i32; 4] = [0, 5, 5, 12]; |
| |
| // Array data: ["hello", "", "parquet"] |
| let array_data1 = ArrayData::builder(DataType::Utf8) |
| .len(3) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| let binary_array1 = BinaryArray::from(array_data1); |
| |
| let array_data2 = ArrayData::builder(DataType::Utf8) |
| .len(3) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_child_data(values_data) |
| .build(); |
| let list_array = ListArray::from(array_data2); |
| let binary_array2 = BinaryArray::from(list_array); |
| |
| assert_eq!(2, binary_array2.data().buffers().len()); |
| assert_eq!(0, binary_array2.data().child_data().len()); |
| |
| assert_eq!(binary_array1.len(), binary_array2.len()); |
| assert_eq!(binary_array1.null_count(), binary_array2.null_count()); |
| for i in 0..binary_array1.len() { |
| assert_eq!(binary_array1.value(i), binary_array2.value(i)); |
| assert_eq!(binary_array1.get_string(i), binary_array2.get_string(i)); |
| assert_eq!(binary_array1.value_offset(i), binary_array2.value_offset(i)); |
| assert_eq!(binary_array1.value_length(i), binary_array2.value_length(i)); |
| } |
| } |
| |
| #[test] |
| fn test_binary_array_from_u8_slice() { |
| let values: Vec<&[u8]> = vec![ |
| &[b'h', b'e', b'l', b'l', b'o'], |
| &[], |
| &[b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
| ]; |
| |
| // Array data: ["hello", "", "parquet"] |
| let binary_array = BinaryArray::from(values); |
| |
| assert_eq!(3, binary_array.len()); |
| assert_eq!(0, binary_array.null_count()); |
| assert_eq!([b'h', b'e', b'l', b'l', b'o'], binary_array.value(0)); |
| assert_eq!("hello", binary_array.get_string(0)); |
| assert_eq!([] as [u8; 0], binary_array.value(1)); |
| assert_eq!("", binary_array.get_string(1)); |
| assert_eq!( |
| [b'p', b'a', b'r', b'q', b'u', b'e', b't'], |
| binary_array.value(2) |
| ); |
| assert_eq!("parquet", binary_array.get_string(2)); |
| assert_eq!(5, binary_array.value_offset(2)); |
| assert_eq!(7, binary_array.value_length(2)); |
| for i in 0..3 { |
| assert!(binary_array.is_valid(i)); |
| assert!(!binary_array.is_null(i)); |
| } |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "BinaryArray can only be created from List<u8> arrays, mismatched \ |
| data types." |
| )] |
| fn test_binary_array_from_incorrect_list_array_type() { |
| let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
| let values_data = ArrayData::builder(DataType::UInt32) |
| .len(12) |
| .add_buffer(Buffer::from(values[..].to_byte_slice())) |
| .build(); |
| let offsets: [i32; 4] = [0, 5, 5, 12]; |
| |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(3) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_child_data(values_data) |
| .build(); |
| let list_array = ListArray::from(array_data); |
| BinaryArray::from(list_array); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "BinaryArray can only be created from list array of u8 values \ |
| (i.e. List<PrimitiveArray<u8>>)." |
| )] |
| fn test_binary_array_from_incorrect_list_array() { |
| let values: [u32; 12] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]; |
| let values_data = ArrayData::builder(DataType::UInt32) |
| .len(12) |
| .add_buffer(Buffer::from(values[..].to_byte_slice())) |
| .add_child_data(ArrayData::builder(DataType::Boolean).build()) |
| .build(); |
| let offsets: [i32; 4] = [0, 5, 5, 12]; |
| |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(3) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_child_data(values_data) |
| .build(); |
| let list_array = ListArray::from(array_data); |
| BinaryArray::from(list_array); |
| } |
| |
| #[test] |
| #[should_panic(expected = "BinaryArray out of bounds access")] |
| fn test_binary_array_get_value_index_out_of_bound() { |
| let values: [u8; 12] = [ |
| b'h', b'e', b'l', b'l', b'o', b'p', b'a', b'r', b'q', b'u', b'e', b't', |
| ]; |
| let offsets: [i32; 4] = [0, 5, 5, 12]; |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .len(3) |
| .add_buffer(Buffer::from(offsets.to_byte_slice())) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| let binary_array = BinaryArray::from(array_data); |
| binary_array.value(4); |
| } |
| |
| #[test] |
| fn test_struct_array_builder() { |
| let boolean_data = ArrayData::builder(DataType::Boolean) |
| .len(4) |
| .add_buffer(Buffer::from([false, false, true, true].to_byte_slice())) |
| .build(); |
| let int_data = ArrayData::builder(DataType::Int64) |
| .len(4) |
| .add_buffer(Buffer::from([42, 28, 19, 31].to_byte_slice())) |
| .build(); |
| let mut field_types = vec![]; |
| field_types.push(Field::new("a", DataType::Boolean, false)); |
| field_types.push(Field::new("b", DataType::Int64, false)); |
| let struct_array_data = ArrayData::builder(DataType::Struct(field_types)) |
| .len(4) |
| .add_child_data(boolean_data.clone()) |
| .add_child_data(int_data.clone()) |
| .build(); |
| let struct_array = StructArray::from(struct_array_data); |
| |
| assert_eq!(boolean_data, struct_array.column(0).data()); |
| assert_eq!(int_data, struct_array.column(1).data()); |
| } |
| |
| #[test] |
| fn test_struct_array_from() { |
| let boolean_data = ArrayData::builder(DataType::Boolean) |
| .len(4) |
| .add_buffer(Buffer::from([12_u8])) |
| .build(); |
| let int_data = ArrayData::builder(DataType::Int32) |
| .len(4) |
| .add_buffer(Buffer::from([42, 28, 19, 31].to_byte_slice())) |
| .build(); |
| let struct_array = StructArray::from(vec![ |
| ( |
| Field::new("b", DataType::Boolean, false), |
| Arc::new(BooleanArray::from(vec![false, false, true, true])) |
| as Arc<Array>, |
| ), |
| ( |
| Field::new("c", DataType::Int32, false), |
| Arc::new(Int32Array::from(vec![42, 28, 19, 31])), |
| ), |
| ]); |
| assert_eq!(boolean_data, struct_array.column(0).data()); |
| assert_eq!(int_data, struct_array.column(1).data()); |
| assert_eq!(4, struct_array.len()); |
| assert_eq!(0, struct_array.null_count()); |
| assert_eq!(0, struct_array.offset()); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "the field data types must match the array data in a StructArray" |
| )] |
| fn test_struct_array_from_mismatched_types() { |
| StructArray::from(vec![ |
| ( |
| Field::new("b", DataType::Int16, false), |
| Arc::new(BooleanArray::from(vec![false, false, true, true])) |
| as Arc<Array>, |
| ), |
| ( |
| Field::new("c", DataType::Utf8, false), |
| Arc::new(Int32Array::from(vec![42, 28, 19, 31])), |
| ), |
| ]); |
| } |
| |
| #[test] |
| fn test_struct_array_slice() { |
| let boolean_data = ArrayData::builder(DataType::Boolean) |
| .len(5) |
| .add_buffer(Buffer::from([0b00010000])) |
| .null_bit_buffer(Buffer::from([0b00010001])) |
| .build(); |
| let int_data = ArrayData::builder(DataType::Int32) |
| .len(5) |
| .add_buffer(Buffer::from([0, 28, 42, 0, 0].to_byte_slice())) |
| .null_bit_buffer(Buffer::from([0b00000110])) |
| .build(); |
| |
| let mut field_types = vec![]; |
| field_types.push(Field::new("a", DataType::Boolean, false)); |
| field_types.push(Field::new("b", DataType::Int32, false)); |
| let struct_array_data = ArrayData::builder(DataType::Struct(field_types)) |
| .len(5) |
| .add_child_data(boolean_data.clone()) |
| .add_child_data(int_data.clone()) |
| .null_bit_buffer(Buffer::from([0b00010111])) |
| .build(); |
| let struct_array = StructArray::from(struct_array_data); |
| |
| assert_eq!(5, struct_array.len()); |
| assert_eq!(1, struct_array.null_count()); |
| assert!(struct_array.is_valid(0)); |
| assert!(struct_array.is_valid(1)); |
| assert!(struct_array.is_valid(2)); |
| assert!(struct_array.is_null(3)); |
| assert!(struct_array.is_valid(4)); |
| assert_eq!(boolean_data, struct_array.column(0).data()); |
| assert_eq!(int_data, struct_array.column(1).data()); |
| |
| let c0 = struct_array.column(0); |
| let c0 = c0.as_any().downcast_ref::<BooleanArray>().unwrap(); |
| assert_eq!(5, c0.len()); |
| assert_eq!(3, c0.null_count()); |
| assert!(c0.is_valid(0)); |
| assert_eq!(false, c0.value(0)); |
| assert!(c0.is_null(1)); |
| assert!(c0.is_null(2)); |
| assert!(c0.is_null(3)); |
| assert!(c0.is_valid(4)); |
| assert_eq!(true, c0.value(4)); |
| |
| let c1 = struct_array.column(1); |
| let c1 = c1.as_any().downcast_ref::<Int32Array>().unwrap(); |
| assert_eq!(5, c1.len()); |
| assert_eq!(3, c1.null_count()); |
| assert!(c1.is_null(0)); |
| assert!(c1.is_valid(1)); |
| assert_eq!(28, c1.value(1)); |
| assert!(c1.is_valid(2)); |
| assert_eq!(42, c1.value(2)); |
| assert!(c1.is_null(3)); |
| assert!(c1.is_null(4)); |
| |
| let sliced_array = struct_array.slice(2, 3); |
| let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap(); |
| assert_eq!(3, sliced_array.len()); |
| assert_eq!(2, sliced_array.offset()); |
| assert_eq!(1, sliced_array.null_count()); |
| assert!(sliced_array.is_valid(0)); |
| assert!(sliced_array.is_null(1)); |
| assert!(sliced_array.is_valid(2)); |
| |
| let sliced_c0 = sliced_array.column(0); |
| let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap(); |
| assert_eq!(3, sliced_c0.len()); |
| assert_eq!(2, sliced_c0.offset()); |
| assert!(sliced_c0.is_null(0)); |
| assert!(sliced_c0.is_null(1)); |
| assert!(sliced_c0.is_valid(2)); |
| assert_eq!(true, sliced_c0.value(2)); |
| |
| let sliced_c1 = sliced_array.column(1); |
| let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap(); |
| assert_eq!(3, sliced_c1.len()); |
| assert_eq!(2, sliced_c1.offset()); |
| assert!(sliced_c1.is_valid(0)); |
| assert_eq!(42, sliced_c1.value(0)); |
| assert!(sliced_c1.is_null(1)); |
| assert!(sliced_c1.is_null(2)); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "all child arrays of a StructArray must have the same length" |
| )] |
| fn test_invalid_struct_child_array_lengths() { |
| StructArray::from(vec![ |
| ( |
| Field::new("b", DataType::Float32, false), |
| Arc::new(Float32Array::from(vec![1.1])) as Arc<Array>, |
| ), |
| ( |
| Field::new("c", DataType::Float64, false), |
| Arc::new(Float64Array::from(vec![2.2, 3.3])), |
| ), |
| ]); |
| } |
| |
| #[test] |
| #[should_panic(expected = "memory is not aligned")] |
| fn test_primitive_array_alignment() { |
| let ptr = memory::allocate_aligned(8); |
| let buf = Buffer::from_raw_parts(ptr, 8); |
| let buf2 = buf.slice(1); |
| let array_data = ArrayData::builder(DataType::Int32).add_buffer(buf2).build(); |
| Int32Array::from(array_data); |
| } |
| |
| #[test] |
| #[should_panic(expected = "memory is not aligned")] |
| fn test_list_array_alignment() { |
| let ptr = memory::allocate_aligned(8); |
| let buf = Buffer::from_raw_parts(ptr, 8); |
| let buf2 = buf.slice(1); |
| |
| let values: [i32; 8] = [0; 8]; |
| let value_data = ArrayData::builder(DataType::Int32) |
| .add_buffer(Buffer::from(values.to_byte_slice())) |
| .build(); |
| |
| let list_data_type = DataType::List(Box::new(DataType::Int32)); |
| let list_data = ArrayData::builder(list_data_type.clone()) |
| .add_buffer(buf2) |
| .add_child_data(value_data.clone()) |
| .build(); |
| ListArray::from(list_data); |
| } |
| |
| #[test] |
| #[should_panic(expected = "memory is not aligned")] |
| fn test_binary_array_alignment() { |
| let ptr = memory::allocate_aligned(8); |
| let buf = Buffer::from_raw_parts(ptr, 8); |
| let buf2 = buf.slice(1); |
| |
| let values: [u8; 12] = [0; 12]; |
| |
| let array_data = ArrayData::builder(DataType::Utf8) |
| .add_buffer(buf2) |
| .add_buffer(Buffer::from(&values[..])) |
| .build(); |
| BinaryArray::from(array_data); |
| } |
| |
| #[test] |
| fn test_access_array_concurrently() { |
| let a = Int32Array::from(vec![5, 6, 7, 8, 9]); |
| let ret = thread::spawn(move || a.value(3)).join(); |
| |
| assert!(ret.is_ok()); |
| assert_eq!(8, ret.ok().unwrap()); |
| } |
| } |