blob: 9101865d6631a407fcf816f03305844097f0476f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::any::Any;
use std::borrow::Borrow;
use std::convert::From;
use std::fmt;
use std::iter::{FromIterator, IntoIterator};
use std::mem;
use chrono::{prelude::*, Duration};
use super::array::print_long_array;
use super::raw_pointer::RawPtrBox;
use super::*;
use crate::temporal_conversions;
use crate::util::bit_util;
use crate::{
buffer::{Buffer, MutableBuffer},
util::trusted_len_unzip,
};
/// Number of seconds in a day
const SECONDS_IN_DAY: i64 = 86_400;
/// Number of milliseconds in a second
const MILLISECONDS: i64 = 1_000;
/// Number of microseconds in a second
const MICROSECONDS: i64 = 1_000_000;
/// Number of nanoseconds in a second
const NANOSECONDS: i64 = 1_000_000_000;
/// Array whose elements are of primitive types.
pub struct PrimitiveArray<T: ArrowPrimitiveType> {
/// Underlying ArrayData
/// # Safety
/// must have exactly one buffer, aligned to type T
data: ArrayData,
/// Pointer to the value array. The lifetime of this must be <= to the value buffer
/// stored in `data`, so it's safe to store.
/// # Safety
/// raw_values must have a value equivalent to `data.buffers()[0].raw_data()`
/// raw_values must have alignment for type T::NativeType
raw_values: RawPtrBox<T::Native>,
}
impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Returns the length of this array.
#[inline]
pub fn len(&self) -> usize {
self.data.len()
}
/// Returns whether this array is empty.
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
/// Returns a slice of the values of this array
#[inline]
pub fn values(&self) -> &[T::Native] {
// Soundness
// raw_values alignment & location is ensured by fn from(ArrayDataRef)
// buffer bounds/offset is ensured by the ArrayData instance.
unsafe {
std::slice::from_raw_parts(
self.raw_values.as_ptr().add(self.data.offset()),
self.len(),
)
}
}
// Returns a new primitive array builder
pub fn builder(capacity: usize) -> PrimitiveBuilder<T> {
PrimitiveBuilder::<T>::new(capacity)
}
/// Returns the primitive value at index `i`.
///
/// # Safety
///
/// caller must ensure that the passed in offset is less than the array len()
#[inline]
pub unsafe fn value_unchecked(&self, i: usize) -> T::Native {
let offset = i + self.offset();
*self.raw_values.as_ptr().add(offset)
}
/// Returns the primitive value at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
/// # Safety
/// caller must ensure that the passed in offset is less than the array len()
#[inline]
pub fn value(&self, i: usize) -> T::Native {
debug_assert!(i < self.len());
unsafe { self.value_unchecked(i) }
}
/// Creates a PrimitiveArray based on an iterator of values without nulls
pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self {
let val_buf: Buffer = iter.into_iter().collect();
let data = ArrayData::new(
T::DATA_TYPE,
val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
None,
None,
0,
vec![val_buf],
vec![],
);
PrimitiveArray::from(data)
}
/// Creates a PrimitiveArray based on a constant value with `count` elements
pub fn from_value(value: T::Native, count: usize) -> Self {
// # Safety: length is known
let val_buf = unsafe { Buffer::from_trusted_len_iter((0..count).map(|_| value)) };
let data = ArrayData::new(
T::DATA_TYPE,
val_buf.len() / mem::size_of::<<T as ArrowPrimitiveType>::Native>(),
None,
None,
0,
vec![val_buf],
vec![],
);
PrimitiveArray::from(data)
}
}
impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
fn as_any(&self) -> &Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}
/// Returns the total number of bytes of memory occupied by the buffers owned by this [PrimitiveArray].
fn get_buffer_memory_size(&self) -> usize {
self.data.get_buffer_memory_size()
}
/// Returns the total number of bytes of memory occupied physically by this [PrimitiveArray].
fn get_array_memory_size(&self) -> usize {
self.data.get_array_memory_size() + mem::size_of::<RawPtrBox<T::Native>>()
}
}
fn as_datetime<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDateTime> {
match T::DATA_TYPE {
DataType::Date32 => Some(temporal_conversions::date32_to_datetime(v as i32)),
DataType::Date64 => Some(temporal_conversions::date64_to_datetime(v)),
DataType::Time32(_) | DataType::Time64(_) => None,
DataType::Timestamp(unit, _) => match unit {
TimeUnit::Second => Some(temporal_conversions::timestamp_s_to_datetime(v)),
TimeUnit::Millisecond => {
Some(temporal_conversions::timestamp_ms_to_datetime(v))
}
TimeUnit::Microsecond => {
Some(temporal_conversions::timestamp_us_to_datetime(v))
}
TimeUnit::Nanosecond => {
Some(temporal_conversions::timestamp_ns_to_datetime(v))
}
},
// interval is not yet fully documented [ARROW-3097]
DataType::Interval(_) => None,
_ => None,
}
}
fn as_date<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveDate> {
as_datetime::<T>(v).map(|datetime| datetime.date())
}
fn as_time<T: ArrowPrimitiveType>(v: i64) -> Option<NaiveTime> {
match T::DATA_TYPE {
DataType::Time32(unit) => {
// safe to immediately cast to u32 as `self.value(i)` is positive i32
let v = v as u32;
match unit {
TimeUnit::Second => Some(temporal_conversions::time32s_to_time(v as i32)),
TimeUnit::Millisecond => {
Some(temporal_conversions::time32ms_to_time(v as i32))
}
_ => None,
}
}
DataType::Time64(unit) => match unit {
TimeUnit::Microsecond => Some(temporal_conversions::time64us_to_time(v)),
TimeUnit::Nanosecond => Some(temporal_conversions::time64ns_to_time(v)),
_ => None,
},
DataType::Timestamp(_, _) => as_datetime::<T>(v).map(|datetime| datetime.time()),
DataType::Date32 | DataType::Date64 => Some(NaiveTime::from_hms(0, 0, 0)),
DataType::Interval(_) => None,
_ => None,
}
}
fn as_duration<T: ArrowPrimitiveType>(v: i64) -> Option<Duration> {
match T::DATA_TYPE {
DataType::Duration(unit) => match unit {
TimeUnit::Second => Some(temporal_conversions::duration_s_to_duration(v)),
TimeUnit::Millisecond => {
Some(temporal_conversions::duration_ms_to_duration(v))
}
TimeUnit::Microsecond => {
Some(temporal_conversions::duration_us_to_duration(v))
}
TimeUnit::Nanosecond => {
Some(temporal_conversions::duration_ns_to_duration(v))
}
},
_ => None,
}
}
impl<T: ArrowTemporalType + ArrowNumericType> PrimitiveArray<T>
where
i64: std::convert::From<T::Native>,
{
/// Returns value as a chrono `NaiveDateTime`, handling time resolution
///
/// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned.
/// A valid value is expected, thus the user should first check for validity.
pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> {
as_datetime::<T>(i64::from(self.value(i)))
}
/// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
///
/// If a data type cannot be converted to `NaiveDate`, a `None` is returned
pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> {
self.value_as_datetime(i).map(|datetime| datetime.date())
}
/// Returns a value as a chrono `NaiveTime`
///
/// `Date32` and `Date64` return UTC midnight as they do not have time resolution
pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> {
as_time::<T>(i64::from(self.value(i)))
}
/// Returns a value as a chrono `Duration`
///
/// If a data type cannot be converted to `Duration`, a `None` is returned
pub fn value_as_duration(&self, i: usize) -> Option<Duration> {
as_duration::<T>(i64::from(self.value(i)))
}
}
impl<T: ArrowPrimitiveType> fmt::Debug for PrimitiveArray<T> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "PrimitiveArray<{:?}>\n[\n", T::DATA_TYPE)?;
print_long_array(self, f, |array, index, f| match T::DATA_TYPE {
DataType::Date32 | DataType::Date64 => {
let v = self.value(index).to_isize().unwrap() as i64;
match as_date::<T>(v) {
Some(date) => write!(f, "{:?}", date),
None => write!(f, "null"),
}
}
DataType::Time32(_) | DataType::Time64(_) => {
let v = self.value(index).to_isize().unwrap() as i64;
match as_time::<T>(v) {
Some(time) => write!(f, "{:?}", time),
None => write!(f, "null"),
}
}
DataType::Timestamp(_, _) => {
let v = self.value(index).to_isize().unwrap() as i64;
match as_datetime::<T>(v) {
Some(datetime) => write!(f, "{:?}", datetime),
None => write!(f, "null"),
}
}
_ => fmt::Debug::fmt(&array.value(index), f),
})?;
write!(f, "]")
}
}
impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> {
type Item = Option<<T as ArrowPrimitiveType>::Native>;
type IntoIter = PrimitiveIter<'a, T>;
fn into_iter(self) -> Self::IntoIter {
PrimitiveIter::<'a, T>::new(self)
}
}
impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> {
/// constructs a new iterator
pub fn iter(&'a self) -> PrimitiveIter<'a, T> {
PrimitiveIter::<'a, T>::new(&self)
}
}
impl<T: ArrowPrimitiveType, Ptr: Borrow<Option<<T as ArrowPrimitiveType>::Native>>>
FromIterator<Ptr> for PrimitiveArray<T>
{
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (lower, _) = iter.size_hint();
let mut null_buf = BooleanBufferBuilder::new(lower);
let buffer: Buffer = iter
.map(|item| {
if let Some(a) = item.borrow() {
null_buf.append(true);
*a
} else {
null_buf.append(false);
// this ensures that null items on the buffer are not arbitrary.
// This is important because falible operations can use null values (e.g. a vectorized "add")
// which may panic (e.g. overflow if the number on the slots happen to be very large).
T::Native::default()
}
})
.collect();
let data = ArrayData::new(
T::DATA_TYPE,
null_buf.len(),
None,
Some(null_buf.into()),
0,
vec![buffer],
vec![],
);
PrimitiveArray::from(data)
}
}
impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
/// Creates a [`PrimitiveArray`] from an iterator of trusted length.
/// # Safety
/// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html).
/// I.e. that `size_hint().1` correctly reports its length.
#[inline]
pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self
where
P: std::borrow::Borrow<Option<<T as ArrowPrimitiveType>::Native>>,
I: IntoIterator<Item = P>,
{
let iterator = iter.into_iter();
let (_, upper) = iterator.size_hint();
let len = upper.expect("trusted_len_unzip requires an upper limit");
let (null, buffer) = trusted_len_unzip(iterator);
let data =
ArrayData::new(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]);
PrimitiveArray::from(data)
}
}
// TODO: the macro is needed here because we'd get "conflicting implementations" error
// otherwise with both `From<Vec<T::Native>>` and `From<Vec<Option<T::Native>>>`.
// We should revisit this in future.
macro_rules! def_numeric_from_vec {
( $ty:ident ) => {
impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for PrimitiveArray<$ty> {
fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self {
let array_data = ArrayData::builder($ty::DATA_TYPE)
.len(data.len())
.add_buffer(Buffer::from_slice_ref(&data))
.build();
PrimitiveArray::from(array_data)
}
}
// Constructs a primitive array from a vector. Should only be used for testing.
impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>>
for PrimitiveArray<$ty>
{
fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self {
PrimitiveArray::from_iter(data.iter())
}
}
};
}
def_numeric_from_vec!(Int8Type);
def_numeric_from_vec!(Int16Type);
def_numeric_from_vec!(Int32Type);
def_numeric_from_vec!(Int64Type);
def_numeric_from_vec!(UInt8Type);
def_numeric_from_vec!(UInt16Type);
def_numeric_from_vec!(UInt32Type);
def_numeric_from_vec!(UInt64Type);
def_numeric_from_vec!(Float32Type);
def_numeric_from_vec!(Float64Type);
def_numeric_from_vec!(Date32Type);
def_numeric_from_vec!(Date64Type);
def_numeric_from_vec!(Time32SecondType);
def_numeric_from_vec!(Time32MillisecondType);
def_numeric_from_vec!(Time64MicrosecondType);
def_numeric_from_vec!(Time64NanosecondType);
def_numeric_from_vec!(IntervalYearMonthType);
def_numeric_from_vec!(IntervalDayTimeType);
def_numeric_from_vec!(DurationSecondType);
def_numeric_from_vec!(DurationMillisecondType);
def_numeric_from_vec!(DurationMicrosecondType);
def_numeric_from_vec!(DurationNanosecondType);
def_numeric_from_vec!(TimestampSecondType);
def_numeric_from_vec!(TimestampMillisecondType);
def_numeric_from_vec!(TimestampMicrosecondType);
def_numeric_from_vec!(TimestampNanosecondType);
impl<T: ArrowTimestampType> PrimitiveArray<T> {
/// Construct a timestamp array from a vec of i64 values and an optional timezone
pub fn from_vec(data: Vec<i64>, timezone: Option<String>) -> Self {
let array_data =
ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
.len(data.len())
.add_buffer(Buffer::from_slice_ref(&data))
.build();
PrimitiveArray::from(array_data)
}
}
impl<T: ArrowTimestampType> PrimitiveArray<T> {
/// Construct a timestamp array from a vec of Option<i64> values and an optional timezone
pub fn from_opt_vec(data: Vec<Option<i64>>, timezone: Option<String>) -> Self {
// TODO: duplicated from def_numeric_from_vec! macro, it looks possible to convert to generic
let data_len = data.len();
let mut null_buf = MutableBuffer::new_null(data_len);
let mut val_buf = MutableBuffer::new(data_len * mem::size_of::<i64>());
{
let null_slice = null_buf.as_slice_mut();
for (i, v) in data.iter().enumerate() {
if let Some(n) = v {
bit_util::set_bit(null_slice, i);
val_buf.push(*n);
} else {
val_buf.push(0i64);
}
}
}
let array_data =
ArrayData::builder(DataType::Timestamp(T::get_time_unit(), timezone))
.len(data_len)
.add_buffer(val_buf.into())
.null_bit_buffer(null_buf.into())
.build();
PrimitiveArray::from(array_data)
}
}
/// Constructs a `PrimitiveArray` from an array data reference.
impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"PrimitiveArray data should contain a single buffer only (values buffer)"
);
let ptr = data.buffers()[0].as_ptr();
Self {
data,
raw_values: unsafe { RawPtrBox::new(ptr) },
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::thread;
use crate::buffer::Buffer;
use crate::datatypes::DataType;
#[test]
fn test_primitive_array_from_vec() {
let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
assert_eq!(buf, arr.data.buffers()[0]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
for i in 0..5 {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
assert_eq!(i as i32, arr.value(i));
}
assert_eq!(64, arr.get_buffer_memory_size());
assert_eq!(136, arr.get_array_memory_size());
}
#[test]
fn test_primitive_array_from_vec_option() {
// Test building a primitive array with null values
let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(2, arr.null_count());
for i in 0..5 {
if i % 2 == 0 {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
assert_eq!(i as i32, arr.value(i));
} else {
assert!(arr.is_null(i));
assert!(!arr.is_valid(i));
}
}
assert_eq!(128, arr.get_buffer_memory_size());
assert_eq!(216, arr.get_array_memory_size());
}
#[test]
fn test_date64_array_from_vec_option() {
// Test building a primitive array with null values
// we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions
// work
let arr: PrimitiveArray<Date64Type> =
vec![Some(1550902545147), None, Some(1550902545147)].into();
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
for i in 0..3 {
if i % 2 == 0 {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
assert_eq!(1550902545147, arr.value(i));
// roundtrip to and from datetime
assert_eq!(
1550902545147,
arr.value_as_datetime(i).unwrap().timestamp_millis()
);
} else {
assert!(arr.is_null(i));
assert!(!arr.is_valid(i));
}
}
}
#[test]
fn test_time32_millisecond_array_from_vec() {
// 1: 00:00:00.001
// 37800005: 10:30:00.005
// 86399210: 23:59:59.210
let arr: PrimitiveArray<Time32MillisecondType> =
vec![1, 37_800_005, 86_399_210].into();
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
for (i, formatted) in formatted.iter().enumerate().take(3) {
// check that we can't create dates or datetimes from time instances
assert_eq!(None, arr.value_as_datetime(i));
assert_eq!(None, arr.value_as_date(i));
let time = arr.value_as_time(i).unwrap();
assert_eq!(*formatted, time.format("%H:%M:%S%.3f").to_string());
}
}
#[test]
fn test_time64_nanosecond_array_from_vec() {
// Test building a primitive array with null values
// we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions
// work
// 1e6: 00:00:00.001
// 37800005e6: 10:30:00.005
// 86399210e6: 23:59:59.210
let arr: PrimitiveArray<Time64NanosecondType> =
vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into();
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
let formatted = vec!["00:00:00.001", "10:30:00.005", "23:59:59.210"];
for (i, item) in formatted.iter().enumerate().take(3) {
// check that we can't create dates or datetimes from time instances
assert_eq!(None, arr.value_as_datetime(i));
assert_eq!(None, arr.value_as_date(i));
let time = arr.value_as_time(i).unwrap();
assert_eq!(*item, time.format("%H:%M:%S%.3f").to_string());
}
}
#[test]
fn test_interval_array_from_vec() {
// intervals are currently not treated specially, but are Int32 and Int64 arrays
let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
// a day_time interval contains days and milliseconds, but we do not yet have accessors for the values
let arr = IntervalDayTimeArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
}
#[test]
fn test_duration_array_from_vec() {
let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]);
assert_eq!(3, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(1, arr.values()[0]);
assert!(arr.is_null(1));
assert_eq!(-5, arr.value(2));
assert_eq!(-5, arr.values()[2]);
}
#[test]
fn test_timestamp_array_from_vec() {
let arr = TimestampSecondArray::from_vec(vec![1, -5], None);
assert_eq!(2, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(-5, arr.value(1));
assert_eq!(&[1, -5], arr.values());
let arr = TimestampMillisecondArray::from_vec(vec![1, -5], None);
assert_eq!(2, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(-5, arr.value(1));
assert_eq!(&[1, -5], arr.values());
let arr = TimestampMicrosecondArray::from_vec(vec![1, -5], None);
assert_eq!(2, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(-5, arr.value(1));
assert_eq!(&[1, -5], arr.values());
let arr = TimestampNanosecondArray::from_vec(vec![1, -5], None);
assert_eq!(2, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
assert_eq!(1, arr.value(0));
assert_eq!(-5, arr.value(1));
assert_eq!(&[1, -5], arr.values());
}
#[test]
fn test_primitive_array_slice() {
let arr = Int32Array::from(vec![
Some(0),
None,
Some(2),
None,
Some(4),
Some(5),
Some(6),
None,
None,
]);
assert_eq!(9, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(4, arr.null_count());
let arr2 = arr.slice(2, 5);
assert_eq!(5, arr2.len());
assert_eq!(2, arr2.offset());
assert_eq!(1, arr2.null_count());
for i in 0..arr2.len() {
assert_eq!(i == 1, arr2.is_null(i));
assert_eq!(i != 1, arr2.is_valid(i));
}
let int_arr2 = arr2.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(2, int_arr2.values()[0]);
assert_eq!(&[4, 5, 6], &int_arr2.values()[2..5]);
let arr3 = arr2.slice(2, 3);
assert_eq!(3, arr3.len());
assert_eq!(4, arr3.offset());
assert_eq!(0, arr3.null_count());
let int_arr3 = arr3.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(&[4, 5, 6], int_arr3.values());
assert_eq!(4, int_arr3.value(0));
assert_eq!(5, int_arr3.value(1));
assert_eq!(6, int_arr3.value(2));
}
#[test]
fn test_boolean_array_slice() {
let arr = BooleanArray::from(vec![
Some(true),
None,
Some(false),
None,
Some(true),
Some(false),
Some(true),
Some(false),
None,
Some(true),
]);
assert_eq!(10, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(3, arr.null_count());
let arr2 = arr.slice(3, 5);
assert_eq!(5, arr2.len());
assert_eq!(3, arr2.offset());
assert_eq!(1, arr2.null_count());
let bool_arr = arr2.as_any().downcast_ref::<BooleanArray>().unwrap();
assert_eq!(false, bool_arr.is_valid(0));
assert_eq!(true, bool_arr.is_valid(1));
assert_eq!(true, bool_arr.value(1));
assert_eq!(true, bool_arr.is_valid(2));
assert_eq!(false, bool_arr.value(2));
assert_eq!(true, bool_arr.is_valid(3));
assert_eq!(true, bool_arr.value(3));
assert_eq!(true, bool_arr.is_valid(4));
assert_eq!(false, bool_arr.value(4));
}
#[test]
fn test_int32_fmt_debug() {
let arr = Int32Array::from(vec![0, 1, 2, 3, 4]);
assert_eq!(
"PrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n]",
format!("{:?}", arr)
);
}
#[test]
fn test_fmt_debug_up_to_20_elements() {
(1..=20).for_each(|i| {
let values = (0..i).collect::<Vec<i16>>();
let array_expected = format!(
"PrimitiveArray<Int16>\n[\n{}\n]",
values
.iter()
.map(|v| { format!(" {},", v) })
.collect::<Vec<String>>()
.join("\n")
);
let array = Int16Array::from(values);
assert_eq!(array_expected, format!("{:?}", array));
})
}
#[test]
fn test_int32_with_null_fmt_debug() {
let mut builder = Int32Array::builder(3);
builder.append_slice(&[0, 1]).unwrap();
builder.append_null().unwrap();
builder.append_slice(&[3, 4]).unwrap();
let arr = builder.finish();
assert_eq!(
"PrimitiveArray<Int32>\n[\n 0,\n 1,\n null,\n 3,\n 4,\n]",
format!("{:?}", arr)
);
}
#[test]
fn test_timestamp_fmt_debug() {
let arr: PrimitiveArray<TimestampMillisecondType> =
TimestampMillisecondArray::from_vec(
vec![1546214400000, 1546214400000, -1546214400000],
None,
);
assert_eq!(
"PrimitiveArray<Timestamp(Millisecond, None)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]",
format!("{:?}", arr)
);
}
#[test]
fn test_date32_fmt_debug() {
let arr: PrimitiveArray<Date32Type> = vec![12356, 13548, -365].into();
assert_eq!(
"PrimitiveArray<Date32>\n[\n 2003-10-31,\n 2007-02-04,\n 1969-01-01,\n]",
format!("{:?}", arr)
);
}
#[test]
fn test_time32second_fmt_debug() {
let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into();
assert_eq!(
"PrimitiveArray<Time32(Second)>\n[\n 02:00:01,\n 16:40:54,\n]",
format!("{:?}", arr)
);
}
#[test]
#[should_panic(expected = "invalid time")]
fn test_time32second_invalid_neg() {
// The panic should come from chrono, not from arrow
let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into();
println!("{:?}", arr);
}
#[test]
fn test_primitive_array_builder() {
// Test building a primitive array with ArrayData builder and offset
let buf = Buffer::from_slice_ref(&[0, 1, 2, 3, 4]);
let buf2 = buf.clone();
let data = ArrayData::builder(DataType::Int32)
.len(5)
.offset(2)
.add_buffer(buf)
.build();
let arr = Int32Array::from(data);
assert_eq!(buf2, arr.data.buffers()[0]);
assert_eq!(5, arr.len());
assert_eq!(0, arr.null_count());
for i in 0..3 {
assert_eq!((i + 2) as i32, arr.value(i));
}
}
#[test]
fn test_primitive_from_iter_values() {
// Test building a primitive array with from_iter_values
let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10);
assert_eq!(10, arr.len());
assert_eq!(0, arr.null_count());
for i in 0..10i32 {
assert_eq!(i, arr.value(i as usize));
}
}
#[test]
fn test_primitive_array_from_unbound_iter() {
// iterator that doesn't declare (upper) size bound
let value_iter = (0..)
.scan(0usize, |pos, i| {
if *pos < 10 {
*pos += 1;
Some(Some(i))
} else {
// actually returns up to 10 values
None
}
})
// limited using take()
.take(100);
let (_, upper_size_bound) = value_iter.size_hint();
// the upper bound, defined by take above, is 100
assert_eq!(upper_size_bound, Some(100));
let primitive_array: PrimitiveArray<Int32Type> = value_iter.collect();
// but the actual number of items in the array should be 10
assert_eq!(primitive_array.len(), 10);
}
#[test]
#[should_panic(expected = "PrimitiveArray data should contain a single buffer only \
(values buffer)")]
fn test_primitive_array_invalid_buffer_len() {
let data = ArrayData::builder(DataType::Int32).len(5).build();
Int32Array::from(data);
}
#[test]
fn test_access_array_concurrently() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let ret = thread::spawn(move || a.value(3)).join();
assert!(ret.is_ok());
assert_eq!(8, ret.ok().unwrap());
}
}