| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use crate::array::print_long_array; |
| use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder}; |
| use crate::iterator::PrimitiveIter; |
| use crate::temporal_conversions::{ |
| as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time, |
| }; |
| use crate::timezone::Tz; |
| use crate::trusted_len::trusted_len_unzip; |
| use crate::types::*; |
| use crate::{Array, ArrayAccessor, ArrayRef, Scalar}; |
| use arrow_buffer::{i256, ArrowNativeType, Buffer, NullBuffer, ScalarBuffer}; |
| use arrow_data::bit_iterator::try_for_each_valid_idx; |
| use arrow_data::{ArrayData, ArrayDataBuilder}; |
| use arrow_schema::{ArrowError, DataType}; |
| use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime}; |
| use half::f16; |
| use std::any::Any; |
| use std::sync::Arc; |
| |
| /// A [`PrimitiveArray`] of `i8` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Int8Array; |
| /// // Create from Vec<Option<i8>> |
| /// let arr = Int8Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<i8> |
| /// let arr = Int8Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: Int8Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Int8Array = PrimitiveArray<Int8Type>; |
| |
| /// A [`PrimitiveArray`] of `i16` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Int16Array; |
| /// // Create from Vec<Option<i16>> |
| /// let arr = Int16Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<i16> |
| /// let arr = Int16Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: Int16Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Int16Array = PrimitiveArray<Int16Type>; |
| |
| /// A [`PrimitiveArray`] of `i32` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Int32Array; |
| /// // Create from Vec<Option<i32>> |
| /// let arr = Int32Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<i32> |
| /// let arr = Int32Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: Int32Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Int32Array = PrimitiveArray<Int32Type>; |
| |
| /// A [`PrimitiveArray`] of `i64` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Int64Array; |
| /// // Create from Vec<Option<i64>> |
| /// let arr = Int64Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<i64> |
| /// let arr = Int64Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: Int64Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Int64Array = PrimitiveArray<Int64Type>; |
| |
| /// A [`PrimitiveArray`] of `u8` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::UInt8Array; |
| /// // Create from Vec<Option<u8>> |
| /// let arr = UInt8Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<u8> |
| /// let arr = UInt8Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: UInt8Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type UInt8Array = PrimitiveArray<UInt8Type>; |
| |
| /// A [`PrimitiveArray`] of `u16` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::UInt16Array; |
| /// // Create from Vec<Option<u16>> |
| /// let arr = UInt16Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<u16> |
| /// let arr = UInt16Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: UInt16Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type UInt16Array = PrimitiveArray<UInt16Type>; |
| |
| /// A [`PrimitiveArray`] of `u32` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::UInt32Array; |
| /// // Create from Vec<Option<u32>> |
| /// let arr = UInt32Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<u32> |
| /// let arr = UInt32Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: UInt32Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type UInt32Array = PrimitiveArray<UInt32Type>; |
| |
| /// A [`PrimitiveArray`] of `u64` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::UInt64Array; |
| /// // Create from Vec<Option<u64>> |
| /// let arr = UInt64Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<u64> |
| /// let arr = UInt64Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: UInt64Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type UInt64Array = PrimitiveArray<UInt64Type>; |
| |
| /// A [`PrimitiveArray`] of `f16` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Float16Array; |
| /// use half::f16; |
| /// // Create from Vec<Option<f16>> |
| /// let arr = Float16Array::from(vec![Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))]); |
| /// // Create from Vec<i8> |
| /// let arr = Float16Array::from(vec![f16::from_f64(1.0), f16::from_f64(2.0), f16::from_f64(3.0)]); |
| /// // Create iter/collect |
| /// let arr: Float16Array = std::iter::repeat(f16::from_f64(1.0)).take(10).collect(); |
| /// ``` |
| /// |
| /// # Example: Using `collect` |
| /// ``` |
| /// # use arrow_array::Float16Array; |
| /// use half::f16; |
| /// let arr : Float16Array = [Some(f16::from_f64(1.0)), Some(f16::from_f64(2.0))].into_iter().collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Float16Array = PrimitiveArray<Float16Type>; |
| |
| /// A [`PrimitiveArray`] of `f32` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Float32Array; |
| /// // Create from Vec<Option<f32>> |
| /// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); |
| /// // Create from Vec<f32> |
| /// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); |
| /// // Create iter/collect |
| /// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Float32Array = PrimitiveArray<Float32Type>; |
| |
| /// A [`PrimitiveArray`] of `f64` |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Float32Array; |
| /// // Create from Vec<Option<f32>> |
| /// let arr = Float32Array::from(vec![Some(1.0), None, Some(2.0)]); |
| /// // Create from Vec<f32> |
| /// let arr = Float32Array::from(vec![1.0, 2.0, 3.0]); |
| /// // Create iter/collect |
| /// let arr: Float32Array = std::iter::repeat(42.0).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Float64Array = PrimitiveArray<Float64Type>; |
| |
| /// A [`PrimitiveArray`] of seconds since UNIX epoch stored as `i64` |
| /// |
| /// This type is similar to the [`chrono::DateTime`] type and can hold |
| /// values such as `1970-05-09 14:25:11 +01:00` |
| /// |
| /// See also [`Timestamp`](arrow_schema::DataType::Timestamp). |
| /// |
| /// # Example: UTC timestamps post epoch |
| /// ``` |
| /// # use arrow_array::TimestampSecondArray; |
| /// use arrow_array::timezone::Tz; |
| /// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00 |
| /// let arr = TimestampSecondArray::from(vec![11111111]); |
| /// // OR |
| /// let arr = TimestampSecondArray::from(vec![Some(11111111)]); |
| /// let utc_tz: Tz = "+00:00".parse().unwrap(); |
| /// |
| /// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1970-05-09 14:25:11 +00:00") |
| /// ``` |
| /// |
| /// # Example: UTC timestamps pre epoch |
| /// ``` |
| /// # use arrow_array::TimestampSecondArray; |
| /// use arrow_array::timezone::Tz; |
| /// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00 |
| /// let arr = TimestampSecondArray::from(vec![-11111111]); |
| /// // OR |
| /// let arr = TimestampSecondArray::from(vec![Some(-11111111)]); |
| /// let utc_tz: Tz = "+00:00".parse().unwrap(); |
| /// |
| /// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| v.to_string()).unwrap(), "1969-08-25 09:34:49 +00:00") |
| /// ``` |
| /// |
| /// # Example: With timezone specified |
| /// ``` |
| /// # use arrow_array::TimestampSecondArray; |
| /// use arrow_array::timezone::Tz; |
| /// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00 |
| /// let arr = TimestampSecondArray::from(vec![11111111]).with_timezone("+10:00".to_string()); |
| /// // OR |
| /// let arr = TimestampSecondArray::from(vec![Some(11111111)]).with_timezone("+10:00".to_string()); |
| /// let sydney_tz: Tz = "+10:00".parse().unwrap(); |
| /// |
| /// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00") |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>; |
| |
| /// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` |
| /// |
| /// See examples for [`TimestampSecondArray`] |
| pub type TimestampMillisecondArray = PrimitiveArray<TimestampMillisecondType>; |
| |
| /// A [`PrimitiveArray`] of microseconds since UNIX epoch stored as `i64` |
| /// |
| /// See examples for [`TimestampSecondArray`] |
| pub type TimestampMicrosecondArray = PrimitiveArray<TimestampMicrosecondType>; |
| |
| /// A [`PrimitiveArray`] of nanoseconds since UNIX epoch stored as `i64` |
| /// |
| /// See examples for [`TimestampSecondArray`] |
| pub type TimestampNanosecondArray = PrimitiveArray<TimestampNanosecondType>; |
| |
| /// A [`PrimitiveArray`] of days since UNIX epoch stored as `i32` |
| /// |
| /// This type is similar to the [`chrono::NaiveDate`] type and can hold |
| /// values such as `2018-11-13` |
| pub type Date32Array = PrimitiveArray<Date32Type>; |
| |
| /// A [`PrimitiveArray`] of milliseconds since UNIX epoch stored as `i64` |
| /// |
| /// This type is similar to the [`chrono::NaiveDate`] type and can hold |
| /// values such as `2018-11-13` |
| pub type Date64Array = PrimitiveArray<Date64Type>; |
| |
| /// A [`PrimitiveArray`] of seconds since midnight stored as `i32` |
| /// |
| /// This type is similar to the [`chrono::NaiveTime`] type and can |
| /// hold values such as `00:02:00` |
| pub type Time32SecondArray = PrimitiveArray<Time32SecondType>; |
| |
| /// A [`PrimitiveArray`] of milliseconds since midnight stored as `i32` |
| /// |
| /// This type is similar to the [`chrono::NaiveTime`] type and can |
| /// hold values such as `00:02:00.123` |
| pub type Time32MillisecondArray = PrimitiveArray<Time32MillisecondType>; |
| |
| /// A [`PrimitiveArray`] of microseconds since midnight stored as `i64` |
| /// |
| /// This type is similar to the [`chrono::NaiveTime`] type and can |
| /// hold values such as `00:02:00.123456` |
| pub type Time64MicrosecondArray = PrimitiveArray<Time64MicrosecondType>; |
| |
| /// A [`PrimitiveArray`] of nanoseconds since midnight stored as `i64` |
| /// |
| /// This type is similar to the [`chrono::NaiveTime`] type and can |
| /// hold values such as `00:02:00.123456789` |
| pub type Time64NanosecondArray = PrimitiveArray<Time64NanosecondType>; |
| |
| /// A [`PrimitiveArray`] of “calendar” intervals in whole months |
| /// |
| /// See [`IntervalYearMonthType`] for details on representation and caveats. |
| /// |
| /// # Example |
| /// ``` |
| /// # use arrow_array::IntervalYearMonthArray; |
| /// let array = IntervalYearMonthArray::from(vec![ |
| /// 2, // 2 months |
| /// 25, // 2 years and 1 month |
| /// -1 // -1 months |
| /// ]); |
| /// ``` |
| pub type IntervalYearMonthArray = PrimitiveArray<IntervalYearMonthType>; |
| |
| /// A [`PrimitiveArray`] of “calendar” intervals in days and milliseconds |
| /// |
| /// See [`IntervalDayTime`] for details on representation and caveats. |
| /// |
| /// # Example |
| /// ``` |
| /// # use arrow_array::IntervalDayTimeArray; |
| /// use arrow_array::types::IntervalDayTime; |
| /// let array = IntervalDayTimeArray::from(vec![ |
| /// IntervalDayTime::new(1, 1000), // 1 day, 1000 milliseconds |
| /// IntervalDayTime::new(33, 0), // 33 days, 0 milliseconds |
| /// IntervalDayTime::new(0, 12 * 60 * 60 * 1000), // 0 days, 12 hours |
| /// ]); |
| /// ``` |
| pub type IntervalDayTimeArray = PrimitiveArray<IntervalDayTimeType>; |
| |
| /// A [`PrimitiveArray`] of “calendar” intervals in months, days, and nanoseconds. |
| /// |
| /// See [`IntervalMonthDayNano`] for details on representation and caveats. |
| /// |
| /// # Example |
| /// ``` |
| /// # use arrow_array::IntervalMonthDayNanoArray; |
| /// use arrow_array::types::IntervalMonthDayNano; |
| /// let array = IntervalMonthDayNanoArray::from(vec![ |
| /// IntervalMonthDayNano::new(1, 2, 1000), // 1 month, 2 days, 1 nanosecond |
| /// IntervalMonthDayNano::new(12, 1, 0), // 12 months, 1 days, 0 nanoseconds |
| /// IntervalMonthDayNano::new(0, 0, 12 * 1000 * 1000), // 0 days, 12 milliseconds |
| /// ]); |
| /// ``` |
| pub type IntervalMonthDayNanoArray = PrimitiveArray<IntervalMonthDayNanoType>; |
| |
| /// A [`PrimitiveArray`] of elapsed durations in seconds |
| pub type DurationSecondArray = PrimitiveArray<DurationSecondType>; |
| |
| /// A [`PrimitiveArray`] of elapsed durations in milliseconds |
| pub type DurationMillisecondArray = PrimitiveArray<DurationMillisecondType>; |
| |
| /// A [`PrimitiveArray`] of elapsed durations in microseconds |
| pub type DurationMicrosecondArray = PrimitiveArray<DurationMicrosecondType>; |
| |
| /// A [`PrimitiveArray`] of elapsed durations in nanoseconds |
| pub type DurationNanosecondArray = PrimitiveArray<DurationNanosecondType>; |
| |
| /// A [`PrimitiveArray`] of 128-bit fixed point decimals |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Decimal128Array; |
| /// // Create from Vec<Option<i18>> |
| /// let arr = Decimal128Array::from(vec![Some(1), None, Some(2)]); |
| /// // Create from Vec<i128> |
| /// let arr = Decimal128Array::from(vec![1, 2, 3]); |
| /// // Create iter/collect |
| /// let arr: Decimal128Array = std::iter::repeat(42).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Decimal128Array = PrimitiveArray<Decimal128Type>; |
| |
| /// A [`PrimitiveArray`] of 256-bit fixed point decimals |
| /// |
| /// # Examples |
| /// |
| /// Construction |
| /// |
| /// ``` |
| /// # use arrow_array::Decimal256Array; |
| /// use arrow_buffer::i256; |
| /// // Create from Vec<Option<i256>> |
| /// let arr = Decimal256Array::from(vec![Some(i256::from(1)), None, Some(i256::from(2))]); |
| /// // Create from Vec<i256> |
| /// let arr = Decimal256Array::from(vec![i256::from(1), i256::from(2), i256::from(3)]); |
| /// // Create iter/collect |
| /// let arr: Decimal256Array = std::iter::repeat(i256::from(42)).take(10).collect(); |
| /// ``` |
| /// |
| /// See [`PrimitiveArray`] for more information and examples |
| pub type Decimal256Array = PrimitiveArray<Decimal256Type>; |
| |
| pub use crate::types::ArrowPrimitiveType; |
| |
| /// An array of primitive values, of type [`ArrowPrimitiveType`] |
| /// |
| /// # Example: From a Vec |
| /// |
| /// ``` |
| /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
| /// let arr: PrimitiveArray<Int32Type> = vec![1, 2, 3, 4].into(); |
| /// assert_eq!(4, arr.len()); |
| /// assert_eq!(0, arr.null_count()); |
| /// assert_eq!(arr.values(), &[1, 2, 3, 4]) |
| /// ``` |
| /// |
| /// # Example: From an optional Vec |
| /// |
| /// ``` |
| /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
| /// let arr: PrimitiveArray<Int32Type> = vec![Some(1), None, Some(3), None].into(); |
| /// assert_eq!(4, arr.len()); |
| /// assert_eq!(2, arr.null_count()); |
| /// // Note: values for null indexes are arbitrary |
| /// assert_eq!(arr.values(), &[1, 0, 3, 0]) |
| /// ``` |
| /// |
| /// # Example: From an iterator of values |
| /// |
| /// ``` |
| /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
| /// let arr: PrimitiveArray<Int32Type> = (0..10).map(|x| x + 1).collect(); |
| /// assert_eq!(10, arr.len()); |
| /// assert_eq!(0, arr.null_count()); |
| /// for i in 0..10i32 { |
| /// assert_eq!(i + 1, arr.value(i as usize)); |
| /// } |
| /// ``` |
| /// |
| /// # Example: From an iterator of option |
| /// |
| /// ``` |
| /// # use arrow_array::{Array, PrimitiveArray, types::Int32Type}; |
| /// let arr: PrimitiveArray<Int32Type> = (0..10).map(|x| (x % 2 == 0).then_some(x)).collect(); |
| /// assert_eq!(10, arr.len()); |
| /// assert_eq!(5, arr.null_count()); |
| /// // Note: values for null indexes are arbitrary |
| /// assert_eq!(arr.values(), &[0, 0, 2, 0, 4, 0, 6, 0, 8, 0]) |
| /// ``` |
| /// |
| /// # Example: Using Builder |
| /// |
| /// ``` |
| /// # use arrow_array::Array; |
| /// # use arrow_array::builder::PrimitiveBuilder; |
| /// # use arrow_array::types::Int32Type; |
| /// let mut builder = PrimitiveBuilder::<Int32Type>::new(); |
| /// builder.append_value(1); |
| /// builder.append_null(); |
| /// builder.append_value(2); |
| /// let array = builder.finish(); |
| /// // Note: values for null indexes are arbitrary |
| /// assert_eq!(array.values(), &[1, 0, 2]); |
| /// assert!(array.is_null(1)); |
| /// ``` |
| /// |
| /// # Example: Get a `PrimitiveArray` from an [`ArrayRef`] |
| /// ``` |
| /// # use std::sync::Arc; |
| /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Float32Array, PrimitiveArray}; |
| /// # use arrow_array::types::{Float32Type}; |
| /// # use arrow_schema::DataType; |
| /// # let array: ArrayRef = Arc::new(Float32Array::from(vec![1.2, 2.3])); |
| /// // will panic if the array is not a Float32Array |
| /// assert_eq!(&DataType::Float32, array.data_type()); |
| /// let f32_array: Float32Array = array.as_primitive().clone(); |
| /// assert_eq!(f32_array, Float32Array::from(vec![1.2, 2.3])); |
| /// ``` |
| pub struct PrimitiveArray<T: ArrowPrimitiveType> { |
| data_type: DataType, |
| /// Values data |
| values: ScalarBuffer<T::Native>, |
| nulls: Option<NullBuffer>, |
| } |
| |
| impl<T: ArrowPrimitiveType> Clone for PrimitiveArray<T> { |
| fn clone(&self) -> Self { |
| Self { |
| data_type: self.data_type.clone(), |
| values: self.values.clone(), |
| nulls: self.nulls.clone(), |
| } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> PrimitiveArray<T> { |
| /// Create a new [`PrimitiveArray`] from the provided values and nulls |
| /// |
| /// # Panics |
| /// |
| /// Panics if [`Self::try_new`] returns an error |
| /// |
| /// # Example |
| /// |
| /// Creating a [`PrimitiveArray`] directly from a [`ScalarBuffer`] and [`NullBuffer`] using |
| /// this constructor is the most performant approach, avoiding any additional allocations |
| /// |
| /// ``` |
| /// # use arrow_array::Int32Array; |
| /// # use arrow_array::types::Int32Type; |
| /// # use arrow_buffer::NullBuffer; |
| /// // [1, 2, 3, 4] |
| /// let array = Int32Array::new(vec![1, 2, 3, 4].into(), None); |
| /// // [1, null, 3, 4] |
| /// let nulls = NullBuffer::from(vec![true, false, true, true]); |
| /// let array = Int32Array::new(vec![1, 2, 3, 4].into(), Some(nulls)); |
| /// ``` |
| pub fn new(values: ScalarBuffer<T::Native>, nulls: Option<NullBuffer>) -> Self { |
| Self::try_new(values, nulls).unwrap() |
| } |
| |
| /// Create a new [`PrimitiveArray`] of the given length where all values are null |
| pub fn new_null(length: usize) -> Self { |
| Self { |
| data_type: T::DATA_TYPE, |
| values: vec![T::Native::usize_as(0); length].into(), |
| nulls: Some(NullBuffer::new_null(length)), |
| } |
| } |
| |
| /// Create a new [`PrimitiveArray`] from the provided values and nulls |
| /// |
| /// # Errors |
| /// |
| /// Errors if: |
| /// - `values.len() != nulls.len()` |
| pub fn try_new( |
| values: ScalarBuffer<T::Native>, |
| nulls: Option<NullBuffer>, |
| ) -> Result<Self, ArrowError> { |
| if let Some(n) = nulls.as_ref() { |
| if n.len() != values.len() { |
| return Err(ArrowError::InvalidArgumentError(format!( |
| "Incorrect length of null buffer for PrimitiveArray, expected {} got {}", |
| values.len(), |
| n.len(), |
| ))); |
| } |
| } |
| |
| Ok(Self { |
| data_type: T::DATA_TYPE, |
| values, |
| nulls, |
| }) |
| } |
| |
| /// Create a new [`Scalar`] from `value` |
| pub fn new_scalar(value: T::Native) -> Scalar<Self> { |
| Scalar::new(Self { |
| data_type: T::DATA_TYPE, |
| values: vec![value].into(), |
| nulls: None, |
| }) |
| } |
| |
| /// Deconstruct this array into its constituent parts |
| pub fn into_parts(self) -> (DataType, ScalarBuffer<T::Native>, Option<NullBuffer>) { |
| (self.data_type, self.values, self.nulls) |
| } |
| |
| /// Overrides the [`DataType`] of this [`PrimitiveArray`] |
| /// |
| /// Prefer using [`Self::with_timezone`] or [`Self::with_precision_and_scale`] where |
| /// the primitive type is suitably constrained, as these cannot panic |
| /// |
| /// # Panics |
| /// |
| /// Panics if ![Self::is_compatible] |
| pub fn with_data_type(self, data_type: DataType) -> Self { |
| Self::assert_compatible(&data_type); |
| Self { data_type, ..self } |
| } |
| |
| /// Asserts that `data_type` is compatible with `Self` |
| fn assert_compatible(data_type: &DataType) { |
| assert!( |
| Self::is_compatible(data_type), |
| "PrimitiveArray expected data type {} got {}", |
| T::DATA_TYPE, |
| data_type |
| ); |
| } |
| |
| /// Returns the length of this array. |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.values.len() |
| } |
| |
| /// Returns whether this array is empty. |
| pub fn is_empty(&self) -> bool { |
| self.values.is_empty() |
| } |
| |
| /// Returns the values of this array |
| #[inline] |
| pub fn values(&self) -> &ScalarBuffer<T::Native> { |
| &self.values |
| } |
| |
| /// Returns a new primitive array builder |
| pub fn builder(capacity: usize) -> PrimitiveBuilder<T> { |
| PrimitiveBuilder::<T>::with_capacity(capacity) |
| } |
| |
| /// Returns if this [`PrimitiveArray`] is compatible with the provided [`DataType`] |
| /// |
| /// This is equivalent to `data_type == T::DATA_TYPE`, however ignores timestamp |
| /// timezones and decimal precision and scale |
| pub fn is_compatible(data_type: &DataType) -> bool { |
| match T::DATA_TYPE { |
| DataType::Timestamp(t1, _) => { |
| matches!(data_type, DataType::Timestamp(t2, _) if &t1 == t2) |
| } |
| DataType::Decimal128(_, _) => matches!(data_type, DataType::Decimal128(_, _)), |
| DataType::Decimal256(_, _) => matches!(data_type, DataType::Decimal256(_, _)), |
| _ => T::DATA_TYPE.eq(data_type), |
| } |
| } |
| |
| /// Returns the primitive value at index `i`. |
| /// |
| /// # Safety |
| /// |
| /// caller must ensure that the passed in offset is less than the array len() |
| #[inline] |
| pub unsafe fn value_unchecked(&self, i: usize) -> T::Native { |
| *self.values.get_unchecked(i) |
| } |
| |
| /// Returns the primitive value at index `i`. |
| /// # Panics |
| /// Panics if index `i` is out of bounds |
| #[inline] |
| pub fn value(&self, i: usize) -> T::Native { |
| assert!( |
| i < self.len(), |
| "Trying to access an element at index {} from a PrimitiveArray of length {}", |
| i, |
| self.len() |
| ); |
| unsafe { self.value_unchecked(i) } |
| } |
| |
| /// Creates a PrimitiveArray based on an iterator of values without nulls |
| pub fn from_iter_values<I: IntoIterator<Item = T::Native>>(iter: I) -> Self { |
| let val_buf: Buffer = iter.into_iter().collect(); |
| let len = val_buf.len() / std::mem::size_of::<T::Native>(); |
| Self { |
| data_type: T::DATA_TYPE, |
| values: ScalarBuffer::new(val_buf, 0, len), |
| nulls: None, |
| } |
| } |
| |
| /// Creates a PrimitiveArray based on a constant value with `count` elements |
| pub fn from_value(value: T::Native, count: usize) -> Self { |
| unsafe { |
| let val_buf = Buffer::from_trusted_len_iter((0..count).map(|_| value)); |
| Self::new(val_buf.into(), None) |
| } |
| } |
| |
| /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
| pub fn take_iter<'a>( |
| &'a self, |
| indexes: impl Iterator<Item = Option<usize>> + 'a, |
| ) -> impl Iterator<Item = Option<T::Native>> + 'a { |
| indexes.map(|opt_index| opt_index.map(|index| self.value(index))) |
| } |
| |
| /// Returns an iterator that returns the values of `array.value(i)` for an iterator with each element `i` |
| /// # Safety |
| /// |
| /// caller must ensure that the offsets in the iterator are less than the array len() |
| pub unsafe fn take_iter_unchecked<'a>( |
| &'a self, |
| indexes: impl Iterator<Item = Option<usize>> + 'a, |
| ) -> impl Iterator<Item = Option<T::Native>> + 'a { |
| indexes.map(|opt_index| opt_index.map(|index| self.value_unchecked(index))) |
| } |
| |
| /// Returns a zero-copy slice of this array with the indicated offset and length. |
| pub fn slice(&self, offset: usize, length: usize) -> Self { |
| Self { |
| data_type: self.data_type.clone(), |
| values: self.values.slice(offset, length), |
| nulls: self.nulls.as_ref().map(|n| n.slice(offset, length)), |
| } |
| } |
| |
| /// Reinterprets this array's contents as a different data type without copying |
| /// |
| /// This can be used to efficiently convert between primitive arrays with the |
| /// same underlying representation |
| /// |
| /// Note: this will not modify the underlying values, and therefore may change |
| /// the semantic values of the array, e.g. 100 milliseconds in a [`TimestampNanosecondArray`] |
| /// will become 100 seconds in a [`TimestampSecondArray`]. |
| /// |
| /// For casts that preserve the semantic value, check out the |
| /// [compute kernels](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/index.html). |
| /// |
| /// ``` |
| /// # use arrow_array::{Int64Array, TimestampNanosecondArray}; |
| /// let a = Int64Array::from_iter_values([1, 2, 3, 4]); |
| /// let b: TimestampNanosecondArray = a.reinterpret_cast(); |
| /// ``` |
| pub fn reinterpret_cast<K>(&self) -> PrimitiveArray<K> |
| where |
| K: ArrowPrimitiveType<Native = T::Native>, |
| { |
| let d = self.to_data().into_builder().data_type(K::DATA_TYPE); |
| |
| // SAFETY: |
| // Native type is the same |
| PrimitiveArray::from(unsafe { d.build_unchecked() }) |
| } |
| |
| /// Applies a unary infallible function to a primitive array, producing a |
| /// new array of potentially different type. |
| /// |
| /// This is the fastest way to perform an operation on a primitive array |
| /// when the benefits of a vectorized operation outweigh the cost of |
| /// branching nulls and non-nulls. |
| /// |
| /// See also |
| /// * [`Self::unary_mut`] for in place modification. |
| /// * [`Self::try_unary`] for fallible operations. |
| /// * [`arrow::compute::binary`] for binary operations |
| /// |
| /// [`arrow::compute::binary`]: https://docs.rs/arrow/latest/arrow/compute/fn.binary.html |
| /// # Null Handling |
| /// |
| /// Applies the function for all values, including those on null slots. This |
| /// will often allow the compiler to generate faster vectorized code, but |
| /// requires that the operation must be infallible (not error/panic) for any |
| /// value of the corresponding type or this function may panic. |
| /// |
| /// # Example |
| /// ```rust |
| /// # use arrow_array::{Int32Array, Float32Array, types::Int32Type}; |
| /// # fn main() { |
| /// let array = Int32Array::from(vec![Some(5), Some(7), None]); |
| /// // Create a new array with the value of applying sqrt |
| /// let c = array.unary(|x| f32::sqrt(x as f32)); |
| /// assert_eq!(c, Float32Array::from(vec![Some(2.236068), Some(2.6457512), None])); |
| /// # } |
| /// ``` |
| pub fn unary<F, O>(&self, op: F) -> PrimitiveArray<O> |
| where |
| O: ArrowPrimitiveType, |
| F: Fn(T::Native) -> O::Native, |
| { |
| let nulls = self.nulls().cloned(); |
| let values = self.values().iter().map(|v| op(*v)); |
| // JUSTIFICATION |
| // Benefit |
| // ~60% speedup |
| // Soundness |
| // `values` is an iterator with a known size because arrays are sized. |
| let buffer = unsafe { Buffer::from_trusted_len_iter(values) }; |
| PrimitiveArray::new(buffer.into(), nulls) |
| } |
| |
| /// Applies a unary and infallible function to the array in place if possible. |
| /// |
| /// # Buffer Reuse |
| /// |
| /// If the underlying buffers are not shared with other arrays, mutates the |
| /// underlying buffer in place, without allocating. |
| /// |
| /// If the underlying buffer is shared, returns Err(self) |
| /// |
| /// # Null Handling |
| /// |
| /// See [`Self::unary`] for more information on null handling. |
| /// |
| /// # Example |
| /// |
| /// ```rust |
| /// # use arrow_array::{Int32Array, types::Int32Type}; |
| /// let array = Int32Array::from(vec![Some(5), Some(7), None]); |
| /// // Apply x*2+1 to the data in place, no allocations |
| /// let c = array.unary_mut(|x| x * 2 + 1).unwrap(); |
| /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None])); |
| /// ``` |
| /// |
| /// # Example: modify [`ArrayRef`] in place, if not shared |
| /// |
| /// It is also possible to modify an [`ArrayRef`] if there are no other |
| /// references to the underlying buffer. |
| /// |
| /// ```rust |
| /// # use std::sync::Arc; |
| /// # use arrow_array::{Array, cast::AsArray, ArrayRef, Int32Array, PrimitiveArray, types::Int32Type}; |
| /// # let array: ArrayRef = Arc::new(Int32Array::from(vec![Some(5), Some(7), None])); |
| /// // Convert to Int32Array (panic's if array.data_type is not Int32) |
| /// let a = array.as_primitive::<Int32Type>().clone(); |
| /// // Try to apply x*2+1 to the data in place, fails because array is still shared |
| /// a.unary_mut(|x| x * 2 + 1).unwrap_err(); |
| /// // Try again, this time dropping the last remaining reference |
| /// let a = array.as_primitive::<Int32Type>().clone(); |
| /// drop(array); |
| /// // Now we can apply the operation in place |
| /// let c = a.unary_mut(|x| x * 2 + 1).unwrap(); |
| /// assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None])); |
| /// ``` |
| |
| pub fn unary_mut<F>(self, op: F) -> Result<PrimitiveArray<T>, PrimitiveArray<T>> |
| where |
| F: Fn(T::Native) -> T::Native, |
| { |
| let mut builder = self.into_builder()?; |
| builder |
| .values_slice_mut() |
| .iter_mut() |
| .for_each(|v| *v = op(*v)); |
| Ok(builder.finish()) |
| } |
| |
| /// Applies a unary fallible function to all valid values in a primitive |
| /// array, producing a new array of potentially different type. |
| /// |
| /// Applies `op` to only rows that are valid, which is often significantly |
| /// slower than [`Self::unary`], which should be preferred if `op` is |
| /// fallible. |
| /// |
| /// Note: LLVM is currently unable to effectively vectorize fallible operations |
| pub fn try_unary<F, O, E>(&self, op: F) -> Result<PrimitiveArray<O>, E> |
| where |
| O: ArrowPrimitiveType, |
| F: Fn(T::Native) -> Result<O::Native, E>, |
| { |
| let len = self.len(); |
| |
| let nulls = self.nulls().cloned(); |
| let mut buffer = BufferBuilder::<O::Native>::new(len); |
| buffer.append_n_zeroed(len); |
| let slice = buffer.as_slice_mut(); |
| |
| let f = |idx| { |
| unsafe { *slice.get_unchecked_mut(idx) = op(self.value_unchecked(idx))? }; |
| Ok::<_, E>(()) |
| }; |
| |
| match &nulls { |
| Some(nulls) => nulls.try_for_each_valid_idx(f)?, |
| None => (0..len).try_for_each(f)?, |
| } |
| |
| let values = buffer.finish().into(); |
| Ok(PrimitiveArray::new(values, nulls)) |
| } |
| |
| /// Applies a unary fallible function to all valid values in a mutable |
| /// primitive array. |
| /// |
| /// # Null Handling |
| /// |
| /// See [`Self::try_unary`] for more information on null handling. |
| /// |
| /// # Buffer Reuse |
| /// |
| /// See [`Self::unary_mut`] for more information on buffer reuse. |
| /// |
| /// This returns an `Err` when the input array is shared buffer with other |
| /// array. In the case, returned `Err` wraps input array. If the function |
| /// encounters an error during applying on values. In the case, this returns an `Err` within |
| /// an `Ok` which wraps the actual error. |
| /// |
| /// Note: LLVM is currently unable to effectively vectorize fallible operations |
| pub fn try_unary_mut<F, E>( |
| self, |
| op: F, |
| ) -> Result<Result<PrimitiveArray<T>, E>, PrimitiveArray<T>> |
| where |
| F: Fn(T::Native) -> Result<T::Native, E>, |
| { |
| let len = self.len(); |
| let null_count = self.null_count(); |
| let mut builder = self.into_builder()?; |
| |
| let (slice, null_buffer) = builder.slices_mut(); |
| |
| let r = try_for_each_valid_idx(len, 0, null_count, null_buffer.as_deref(), |idx| { |
| unsafe { *slice.get_unchecked_mut(idx) = op(*slice.get_unchecked(idx))? }; |
| Ok::<_, E>(()) |
| }); |
| |
| if let Err(err) = r { |
| return Ok(Err(err)); |
| } |
| |
| Ok(Ok(builder.finish())) |
| } |
| |
| /// Applies a unary and nullable function to all valid values in a primitive array |
| /// |
| /// Applies `op` to only rows that are valid, which is often significantly |
| /// slower than [`Self::unary`], which should be preferred if `op` is |
| /// fallible. |
| /// |
| /// Note: LLVM is currently unable to effectively vectorize fallible operations |
| pub fn unary_opt<F, O>(&self, op: F) -> PrimitiveArray<O> |
| where |
| O: ArrowPrimitiveType, |
| F: Fn(T::Native) -> Option<O::Native>, |
| { |
| let len = self.len(); |
| let (nulls, null_count, offset) = match self.nulls() { |
| Some(n) => (Some(n.validity()), n.null_count(), n.offset()), |
| None => (None, 0, 0), |
| }; |
| |
| let mut null_builder = BooleanBufferBuilder::new(len); |
| match nulls { |
| Some(b) => null_builder.append_packed_range(offset..offset + len, b), |
| None => null_builder.append_n(len, true), |
| } |
| |
| let mut buffer = BufferBuilder::<O::Native>::new(len); |
| buffer.append_n_zeroed(len); |
| let slice = buffer.as_slice_mut(); |
| |
| let mut out_null_count = null_count; |
| |
| let _ = try_for_each_valid_idx(len, offset, null_count, nulls, |idx| { |
| match op(unsafe { self.value_unchecked(idx) }) { |
| Some(v) => unsafe { *slice.get_unchecked_mut(idx) = v }, |
| None => { |
| out_null_count += 1; |
| null_builder.set_bit(idx, false); |
| } |
| } |
| Ok::<_, ()>(()) |
| }); |
| |
| let nulls = null_builder.finish(); |
| let values = buffer.finish().into(); |
| let nulls = unsafe { NullBuffer::new_unchecked(nulls, out_null_count) }; |
| PrimitiveArray::new(values, Some(nulls)) |
| } |
| |
| /// Returns a `PrimitiveBuilder` for this array, suitable for mutating values |
| /// in place. |
| /// |
| /// # Buffer Reuse |
| /// |
| /// If the underlying data buffer has no other outstanding references, the |
| /// buffer is used without copying. |
| /// |
| /// If the underlying data buffer does have outstanding references, returns |
| /// `Err(self)` |
| pub fn into_builder(self) -> Result<PrimitiveBuilder<T>, Self> { |
| let len = self.len(); |
| let data = self.into_data(); |
| let null_bit_buffer = data.nulls().map(|b| b.inner().sliced()); |
| |
| let element_len = std::mem::size_of::<T::Native>(); |
| let buffer = |
| data.buffers()[0].slice_with_length(data.offset() * element_len, len * element_len); |
| |
| drop(data); |
| |
| let try_mutable_null_buffer = match null_bit_buffer { |
| None => Ok(None), |
| Some(null_buffer) => { |
| // Null buffer exists, tries to make it mutable |
| null_buffer.into_mutable().map(Some) |
| } |
| }; |
| |
| let try_mutable_buffers = match try_mutable_null_buffer { |
| Ok(mutable_null_buffer) => { |
| // Got mutable null buffer, tries to get mutable value buffer |
| let try_mutable_buffer = buffer.into_mutable(); |
| |
| // try_mutable_buffer.map(...).map_err(...) doesn't work as the compiler complains |
| // mutable_null_buffer is moved into map closure. |
| match try_mutable_buffer { |
| Ok(mutable_buffer) => Ok(PrimitiveBuilder::<T>::new_from_buffer( |
| mutable_buffer, |
| mutable_null_buffer, |
| )), |
| Err(buffer) => Err((buffer, mutable_null_buffer.map(|b| b.into()))), |
| } |
| } |
| Err(mutable_null_buffer) => { |
| // Unable to get mutable null buffer |
| Err((buffer, Some(mutable_null_buffer))) |
| } |
| }; |
| |
| match try_mutable_buffers { |
| Ok(builder) => Ok(builder), |
| Err((buffer, null_bit_buffer)) => { |
| let builder = ArrayData::builder(T::DATA_TYPE) |
| .len(len) |
| .add_buffer(buffer) |
| .null_bit_buffer(null_bit_buffer); |
| |
| let array_data = unsafe { builder.build_unchecked() }; |
| let array = PrimitiveArray::<T>::from(array_data); |
| |
| Err(array) |
| } |
| } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> From<PrimitiveArray<T>> for ArrayData { |
| fn from(array: PrimitiveArray<T>) -> Self { |
| let builder = ArrayDataBuilder::new(array.data_type) |
| .len(array.values.len()) |
| .nulls(array.nulls) |
| .buffers(vec![array.values.into_inner()]); |
| |
| unsafe { builder.build_unchecked() } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> { |
| fn as_any(&self) -> &dyn Any { |
| self |
| } |
| |
| fn to_data(&self) -> ArrayData { |
| self.clone().into() |
| } |
| |
| fn into_data(self) -> ArrayData { |
| self.into() |
| } |
| |
| fn data_type(&self) -> &DataType { |
| &self.data_type |
| } |
| |
| fn slice(&self, offset: usize, length: usize) -> ArrayRef { |
| Arc::new(self.slice(offset, length)) |
| } |
| |
| fn len(&self) -> usize { |
| self.values.len() |
| } |
| |
| fn is_empty(&self) -> bool { |
| self.values.is_empty() |
| } |
| |
| fn offset(&self) -> usize { |
| 0 |
| } |
| |
| fn nulls(&self) -> Option<&NullBuffer> { |
| self.nulls.as_ref() |
| } |
| |
| fn get_buffer_memory_size(&self) -> usize { |
| let mut size = self.values.inner().capacity(); |
| if let Some(n) = self.nulls.as_ref() { |
| size += n.buffer().capacity(); |
| } |
| size |
| } |
| |
| fn get_array_memory_size(&self) -> usize { |
| std::mem::size_of::<Self>() + self.get_buffer_memory_size() |
| } |
| } |
| |
| impl<'a, T: ArrowPrimitiveType> ArrayAccessor for &'a PrimitiveArray<T> { |
| type Item = T::Native; |
| |
| fn value(&self, index: usize) -> Self::Item { |
| PrimitiveArray::value(self, index) |
| } |
| |
| #[inline] |
| unsafe fn value_unchecked(&self, index: usize) -> Self::Item { |
| PrimitiveArray::value_unchecked(self, index) |
| } |
| } |
| |
| impl<T: ArrowTemporalType> PrimitiveArray<T> |
| where |
| i64: From<T::Native>, |
| { |
| /// Returns value as a chrono `NaiveDateTime`, handling time resolution |
| /// |
| /// If a data type cannot be converted to `NaiveDateTime`, a `None` is returned. |
| /// A valid value is expected, thus the user should first check for validity. |
| pub fn value_as_datetime(&self, i: usize) -> Option<NaiveDateTime> { |
| as_datetime::<T>(i64::from(self.value(i))) |
| } |
| |
| /// Returns value as a chrono `NaiveDateTime`, handling time resolution with the provided tz |
| /// |
| /// functionally it is same as `value_as_datetime`, however it adds |
| /// the passed tz to the to-be-returned NaiveDateTime |
| pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) -> Option<DateTime<Tz>> { |
| as_datetime_with_timezone::<T>(i64::from(self.value(i)), tz) |
| } |
| |
| /// Returns value as a chrono `NaiveDate` by using `Self::datetime()` |
| /// |
| /// If a data type cannot be converted to `NaiveDate`, a `None` is returned |
| pub fn value_as_date(&self, i: usize) -> Option<NaiveDate> { |
| self.value_as_datetime(i).map(|datetime| datetime.date()) |
| } |
| |
| /// Returns a value as a chrono `NaiveTime` |
| /// |
| /// `Date32` and `Date64` return UTC midnight as they do not have time resolution |
| pub fn value_as_time(&self, i: usize) -> Option<NaiveTime> { |
| as_time::<T>(i64::from(self.value(i))) |
| } |
| |
| /// Returns a value as a chrono `Duration` |
| /// |
| /// If a data type cannot be converted to `Duration`, a `None` is returned |
| pub fn value_as_duration(&self, i: usize) -> Option<Duration> { |
| as_duration::<T>(i64::from(self.value(i))) |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> std::fmt::Debug for PrimitiveArray<T> { |
| fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { |
| let data_type = self.data_type(); |
| |
| write!(f, "PrimitiveArray<{data_type:?}>\n[\n")?; |
| print_long_array(self, f, |array, index, f| match data_type { |
| DataType::Date32 | DataType::Date64 => { |
| let v = self.value(index).to_i64().unwrap(); |
| match as_date::<T>(v) { |
| Some(date) => write!(f, "{date:?}"), |
| None => { |
| write!( |
| f, |
| "Cast error: Failed to convert {v} to temporal for {data_type:?}" |
| ) |
| } |
| } |
| } |
| DataType::Time32(_) | DataType::Time64(_) => { |
| let v = self.value(index).to_i64().unwrap(); |
| match as_time::<T>(v) { |
| Some(time) => write!(f, "{time:?}"), |
| None => { |
| write!( |
| f, |
| "Cast error: Failed to convert {v} to temporal for {data_type:?}" |
| ) |
| } |
| } |
| } |
| DataType::Timestamp(_, tz_string_opt) => { |
| let v = self.value(index).to_i64().unwrap(); |
| match tz_string_opt { |
| // for Timestamp with TimeZone |
| Some(tz_string) => { |
| match tz_string.parse::<Tz>() { |
| // if the time zone is valid, construct a DateTime<Tz> and format it as rfc3339 |
| Ok(tz) => match as_datetime_with_timezone::<T>(v, tz) { |
| Some(datetime) => write!(f, "{}", datetime.to_rfc3339()), |
| None => write!(f, "null"), |
| }, |
| // if the time zone is invalid, shows NaiveDateTime with an error message |
| Err(_) => match as_datetime::<T>(v) { |
| Some(datetime) => { |
| write!(f, "{datetime:?} (Unknown Time Zone '{tz_string}')") |
| } |
| None => write!(f, "null"), |
| }, |
| } |
| } |
| // for Timestamp without TimeZone |
| None => match as_datetime::<T>(v) { |
| Some(datetime) => write!(f, "{datetime:?}"), |
| None => write!(f, "null"), |
| }, |
| } |
| } |
| _ => std::fmt::Debug::fmt(&array.value(index), f), |
| })?; |
| write!(f, "]") |
| } |
| } |
| |
| impl<'a, T: ArrowPrimitiveType> IntoIterator for &'a PrimitiveArray<T> { |
| type Item = Option<<T as ArrowPrimitiveType>::Native>; |
| type IntoIter = PrimitiveIter<'a, T>; |
| |
| fn into_iter(self) -> Self::IntoIter { |
| PrimitiveIter::<'a, T>::new(self) |
| } |
| } |
| |
| impl<'a, T: ArrowPrimitiveType> PrimitiveArray<T> { |
| /// constructs a new iterator |
| pub fn iter(&'a self) -> PrimitiveIter<'a, T> { |
| PrimitiveIter::<'a, T>::new(self) |
| } |
| } |
| |
| /// An optional primitive value |
| /// |
| /// This struct is used as an adapter when creating `PrimitiveArray` from an iterator. |
| /// `FromIterator` for `PrimitiveArray` takes an iterator where the elements can be `into` |
| /// this struct. So once implementing `From` or `Into` trait for a type, an iterator of |
| /// the type can be collected to `PrimitiveArray`. |
| #[derive(Debug)] |
| pub struct NativeAdapter<T: ArrowPrimitiveType> { |
| /// Corresponding Rust native type if available |
| pub native: Option<T::Native>, |
| } |
| |
| macro_rules! def_from_for_primitive { |
| ( $ty:ident, $tt:tt) => { |
| impl From<$tt> for NativeAdapter<$ty> { |
| fn from(value: $tt) -> Self { |
| NativeAdapter { |
| native: Some(value), |
| } |
| } |
| } |
| }; |
| } |
| |
| def_from_for_primitive!(Int8Type, i8); |
| def_from_for_primitive!(Int16Type, i16); |
| def_from_for_primitive!(Int32Type, i32); |
| def_from_for_primitive!(Int64Type, i64); |
| def_from_for_primitive!(UInt8Type, u8); |
| def_from_for_primitive!(UInt16Type, u16); |
| def_from_for_primitive!(UInt32Type, u32); |
| def_from_for_primitive!(UInt64Type, u64); |
| def_from_for_primitive!(Float16Type, f16); |
| def_from_for_primitive!(Float32Type, f32); |
| def_from_for_primitive!(Float64Type, f64); |
| def_from_for_primitive!(Decimal128Type, i128); |
| def_from_for_primitive!(Decimal256Type, i256); |
| |
| impl<T: ArrowPrimitiveType> From<Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> { |
| fn from(value: Option<<T as ArrowPrimitiveType>::Native>) -> Self { |
| NativeAdapter { native: value } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> From<&Option<<T as ArrowPrimitiveType>::Native>> for NativeAdapter<T> { |
| fn from(value: &Option<<T as ArrowPrimitiveType>::Native>) -> Self { |
| NativeAdapter { native: *value } |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType, Ptr: Into<NativeAdapter<T>>> FromIterator<Ptr> for PrimitiveArray<T> { |
| fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self { |
| let iter = iter.into_iter(); |
| let (lower, _) = iter.size_hint(); |
| |
| let mut null_builder = BooleanBufferBuilder::new(lower); |
| |
| let buffer: Buffer = iter |
| .map(|item| { |
| if let Some(a) = item.into().native { |
| null_builder.append(true); |
| a |
| } else { |
| null_builder.append(false); |
| // this ensures that null items on the buffer are not arbitrary. |
| // This is important because fallible operations can use null values (e.g. a vectorized "add") |
| // which may panic (e.g. overflow if the number on the slots happen to be very large). |
| T::Native::default() |
| } |
| }) |
| .collect(); |
| |
| let len = null_builder.len(); |
| |
| let data = unsafe { |
| ArrayData::new_unchecked( |
| T::DATA_TYPE, |
| len, |
| None, |
| Some(null_builder.into()), |
| 0, |
| vec![buffer], |
| vec![], |
| ) |
| }; |
| PrimitiveArray::from(data) |
| } |
| } |
| |
| impl<T: ArrowPrimitiveType> PrimitiveArray<T> { |
| /// Creates a [`PrimitiveArray`] from an iterator of trusted length. |
| /// # Safety |
| /// The iterator must be [`TrustedLen`](https://doc.rust-lang.org/std/iter/trait.TrustedLen.html). |
| /// I.e. that `size_hint().1` correctly reports its length. |
| #[inline] |
| pub unsafe fn from_trusted_len_iter<I, P>(iter: I) -> Self |
| where |
| P: std::borrow::Borrow<Option<<T as ArrowPrimitiveType>::Native>>, |
| I: IntoIterator<Item = P>, |
| { |
| let iterator = iter.into_iter(); |
| let (_, upper) = iterator.size_hint(); |
| let len = upper.expect("trusted_len_unzip requires an upper limit"); |
| |
| let (null, buffer) = trusted_len_unzip(iterator); |
| |
| let data = |
| ArrayData::new_unchecked(T::DATA_TYPE, len, None, Some(null), 0, vec![buffer], vec![]); |
| PrimitiveArray::from(data) |
| } |
| } |
| |
| // TODO: the macro is needed here because we'd get "conflicting implementations" error |
| // otherwise with both `From<Vec<T::Native>>` and `From<Vec<Option<T::Native>>>`. |
| // We should revisit this in future. |
| macro_rules! def_numeric_from_vec { |
| ( $ty:ident ) => { |
| impl From<Vec<<$ty as ArrowPrimitiveType>::Native>> for PrimitiveArray<$ty> { |
| fn from(data: Vec<<$ty as ArrowPrimitiveType>::Native>) -> Self { |
| let array_data = ArrayData::builder($ty::DATA_TYPE) |
| .len(data.len()) |
| .add_buffer(Buffer::from_vec(data)); |
| let array_data = unsafe { array_data.build_unchecked() }; |
| PrimitiveArray::from(array_data) |
| } |
| } |
| |
| // Constructs a primitive array from a vector. Should only be used for testing. |
| impl From<Vec<Option<<$ty as ArrowPrimitiveType>::Native>>> for PrimitiveArray<$ty> { |
| fn from(data: Vec<Option<<$ty as ArrowPrimitiveType>::Native>>) -> Self { |
| PrimitiveArray::from_iter(data.iter()) |
| } |
| } |
| }; |
| } |
| |
| def_numeric_from_vec!(Int8Type); |
| def_numeric_from_vec!(Int16Type); |
| def_numeric_from_vec!(Int32Type); |
| def_numeric_from_vec!(Int64Type); |
| def_numeric_from_vec!(UInt8Type); |
| def_numeric_from_vec!(UInt16Type); |
| def_numeric_from_vec!(UInt32Type); |
| def_numeric_from_vec!(UInt64Type); |
| def_numeric_from_vec!(Float16Type); |
| def_numeric_from_vec!(Float32Type); |
| def_numeric_from_vec!(Float64Type); |
| def_numeric_from_vec!(Decimal128Type); |
| def_numeric_from_vec!(Decimal256Type); |
| |
| def_numeric_from_vec!(Date32Type); |
| def_numeric_from_vec!(Date64Type); |
| def_numeric_from_vec!(Time32SecondType); |
| def_numeric_from_vec!(Time32MillisecondType); |
| def_numeric_from_vec!(Time64MicrosecondType); |
| def_numeric_from_vec!(Time64NanosecondType); |
| def_numeric_from_vec!(IntervalYearMonthType); |
| def_numeric_from_vec!(IntervalDayTimeType); |
| def_numeric_from_vec!(IntervalMonthDayNanoType); |
| def_numeric_from_vec!(DurationSecondType); |
| def_numeric_from_vec!(DurationMillisecondType); |
| def_numeric_from_vec!(DurationMicrosecondType); |
| def_numeric_from_vec!(DurationNanosecondType); |
| def_numeric_from_vec!(TimestampSecondType); |
| def_numeric_from_vec!(TimestampMillisecondType); |
| def_numeric_from_vec!(TimestampMicrosecondType); |
| def_numeric_from_vec!(TimestampNanosecondType); |
| |
| impl<T: ArrowTimestampType> PrimitiveArray<T> { |
| /// Construct a timestamp array from a vec of i64 values and an optional timezone |
| #[deprecated(note = "Use with_timezone_opt instead")] |
| pub fn from_vec(data: Vec<i64>, timezone: Option<String>) -> Self |
| where |
| Self: From<Vec<i64>>, |
| { |
| Self::from(data).with_timezone_opt(timezone) |
| } |
| |
| /// Construct a timestamp array from a vec of `Option<i64>` values and an optional timezone |
| #[deprecated(note = "Use with_timezone_opt instead")] |
| pub fn from_opt_vec(data: Vec<Option<i64>>, timezone: Option<String>) -> Self |
| where |
| Self: From<Vec<Option<i64>>>, |
| { |
| Self::from(data).with_timezone_opt(timezone) |
| } |
| |
| /// Returns the timezone of this array if any |
| pub fn timezone(&self) -> Option<&str> { |
| match self.data_type() { |
| DataType::Timestamp(_, tz) => tz.as_deref(), |
| _ => unreachable!(), |
| } |
| } |
| |
| /// Construct a timestamp array with new timezone |
| pub fn with_timezone(self, timezone: impl Into<Arc<str>>) -> Self { |
| self.with_timezone_opt(Some(timezone.into())) |
| } |
| |
| /// Construct a timestamp array with UTC |
| pub fn with_timezone_utc(self) -> Self { |
| self.with_timezone("+00:00") |
| } |
| |
| /// Construct a timestamp array with an optional timezone |
| pub fn with_timezone_opt<S: Into<Arc<str>>>(self, timezone: Option<S>) -> Self { |
| Self { |
| data_type: DataType::Timestamp(T::UNIT, timezone.map(Into::into)), |
| ..self |
| } |
| } |
| } |
| |
| /// Constructs a `PrimitiveArray` from an array data reference. |
| impl<T: ArrowPrimitiveType> From<ArrayData> for PrimitiveArray<T> { |
| fn from(data: ArrayData) -> Self { |
| Self::assert_compatible(data.data_type()); |
| assert_eq!( |
| data.buffers().len(), |
| 1, |
| "PrimitiveArray data should contain a single buffer only (values buffer)" |
| ); |
| |
| let values = ScalarBuffer::new(data.buffers()[0].clone(), data.offset(), data.len()); |
| Self { |
| data_type: data.data_type().clone(), |
| values, |
| nulls: data.nulls().cloned(), |
| } |
| } |
| } |
| |
| impl<T: DecimalType + ArrowPrimitiveType> PrimitiveArray<T> { |
| /// Returns a Decimal array with the same data as self, with the |
| /// specified precision and scale. |
| /// |
| /// See [`validate_decimal_precision_and_scale`] |
| pub fn with_precision_and_scale(self, precision: u8, scale: i8) -> Result<Self, ArrowError> { |
| validate_decimal_precision_and_scale::<T>(precision, scale)?; |
| Ok(Self { |
| data_type: T::TYPE_CONSTRUCTOR(precision, scale), |
| ..self |
| }) |
| } |
| |
| /// Validates values in this array can be properly interpreted |
| /// with the specified precision. |
| pub fn validate_decimal_precision(&self, precision: u8) -> Result<(), ArrowError> { |
| (0..self.len()).try_for_each(|idx| { |
| if self.is_valid(idx) { |
| let decimal = unsafe { self.value_unchecked(idx) }; |
| T::validate_decimal_precision(decimal, precision) |
| } else { |
| Ok(()) |
| } |
| }) |
| } |
| |
| /// Validates the Decimal Array, if the value of slot is overflow for the specified precision, and |
| /// will be casted to Null |
| pub fn null_if_overflow_precision(&self, precision: u8) -> Self { |
| self.unary_opt::<_, T>(|v| { |
| (T::validate_decimal_precision(v, precision).is_ok()).then_some(v) |
| }) |
| } |
| |
| /// Returns [`Self::value`] formatted as a string |
| pub fn value_as_string(&self, row: usize) -> String { |
| T::format_decimal(self.value(row), self.precision(), self.scale()) |
| } |
| |
| /// Returns the decimal precision of this array |
| pub fn precision(&self) -> u8 { |
| match T::BYTE_LENGTH { |
| 16 => { |
| if let DataType::Decimal128(p, _) = self.data_type() { |
| *p |
| } else { |
| unreachable!( |
| "Decimal128Array datatype is not DataType::Decimal128 but {}", |
| self.data_type() |
| ) |
| } |
| } |
| 32 => { |
| if let DataType::Decimal256(p, _) = self.data_type() { |
| *p |
| } else { |
| unreachable!( |
| "Decimal256Array datatype is not DataType::Decimal256 but {}", |
| self.data_type() |
| ) |
| } |
| } |
| other => unreachable!("Unsupported byte length for decimal array {}", other), |
| } |
| } |
| |
| /// Returns the decimal scale of this array |
| pub fn scale(&self) -> i8 { |
| match T::BYTE_LENGTH { |
| 16 => { |
| if let DataType::Decimal128(_, s) = self.data_type() { |
| *s |
| } else { |
| unreachable!( |
| "Decimal128Array datatype is not DataType::Decimal128 but {}", |
| self.data_type() |
| ) |
| } |
| } |
| 32 => { |
| if let DataType::Decimal256(_, s) = self.data_type() { |
| *s |
| } else { |
| unreachable!( |
| "Decimal256Array datatype is not DataType::Decimal256 but {}", |
| self.data_type() |
| ) |
| } |
| } |
| other => unreachable!("Unsupported byte length for decimal array {}", other), |
| } |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| use crate::builder::{Decimal128Builder, Decimal256Builder}; |
| use crate::cast::downcast_array; |
| use crate::BooleanArray; |
| use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; |
| use arrow_schema::TimeUnit; |
| |
| #[test] |
| fn test_primitive_array_from_vec() { |
| let buf = Buffer::from_slice_ref([0, 1, 2, 3, 4]); |
| let arr = Int32Array::from(vec![0, 1, 2, 3, 4]); |
| assert_eq!(&buf, arr.values.inner()); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..5 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i32, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_from_vec_option() { |
| // Test building a primitive array with null values |
| let arr = Int32Array::from(vec![Some(0), None, Some(2), None, Some(4)]); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(2, arr.null_count()); |
| for i in 0..5 { |
| if i % 2 == 0 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(i as i32, arr.value(i)); |
| } else { |
| assert!(arr.is_null(i)); |
| assert!(!arr.is_valid(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_date64_array_from_vec_option() { |
| // Test building a primitive array with null values |
| // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions |
| // work |
| let arr: PrimitiveArray<Date64Type> = |
| vec![Some(1550902545147), None, Some(1550902545147)].into(); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| for i in 0..3 { |
| if i % 2 == 0 { |
| assert!(!arr.is_null(i)); |
| assert!(arr.is_valid(i)); |
| assert_eq!(1550902545147, arr.value(i)); |
| // roundtrip to and from datetime |
| assert_eq!( |
| 1550902545147, |
| arr.value_as_datetime(i) |
| .unwrap() |
| .and_utc() |
| .timestamp_millis() |
| ); |
| } else { |
| assert!(arr.is_null(i)); |
| assert!(!arr.is_valid(i)); |
| } |
| } |
| } |
| |
| #[test] |
| fn test_time32_millisecond_array_from_vec() { |
| // 1: 00:00:00.001 |
| // 37800005: 10:30:00.005 |
| // 86399210: 23:59:59.210 |
| let arr: PrimitiveArray<Time32MillisecondType> = vec![1, 37_800_005, 86_399_210].into(); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| let formatted = ["00:00:00.001", "10:30:00.005", "23:59:59.210"]; |
| for (i, formatted) in formatted.iter().enumerate().take(3) { |
| // check that we can't create dates or datetimes from time instances |
| assert_eq!(None, arr.value_as_datetime(i)); |
| assert_eq!(None, arr.value_as_date(i)); |
| let time = arr.value_as_time(i).unwrap(); |
| assert_eq!(*formatted, time.format("%H:%M:%S%.3f").to_string()); |
| } |
| } |
| |
| #[test] |
| fn test_time64_nanosecond_array_from_vec() { |
| // Test building a primitive array with null values |
| // we use Int32 and Int64 as a backing array, so all Int32 and Int64 conventions |
| // work |
| |
| // 1e6: 00:00:00.001 |
| // 37800005e6: 10:30:00.005 |
| // 86399210e6: 23:59:59.210 |
| let arr: PrimitiveArray<Time64NanosecondType> = |
| vec![1_000_000, 37_800_005_000_000, 86_399_210_000_000].into(); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| let formatted = ["00:00:00.001", "10:30:00.005", "23:59:59.210"]; |
| for (i, item) in formatted.iter().enumerate().take(3) { |
| // check that we can't create dates or datetimes from time instances |
| assert_eq!(None, arr.value_as_datetime(i)); |
| assert_eq!(None, arr.value_as_date(i)); |
| let time = arr.value_as_time(i).unwrap(); |
| assert_eq!(*item, time.format("%H:%M:%S%.3f").to_string()); |
| } |
| } |
| |
| #[test] |
| fn test_interval_array_from_vec() { |
| // intervals are currently not treated specially, but are Int32 and Int64 arrays |
| let arr = IntervalYearMonthArray::from(vec![Some(1), None, Some(-5)]); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(1, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(-5, arr.value(2)); |
| assert_eq!(-5, arr.values()[2]); |
| |
| let v0 = IntervalDayTime { |
| days: 34, |
| milliseconds: 1, |
| }; |
| let v2 = IntervalDayTime { |
| days: -2, |
| milliseconds: -5, |
| }; |
| |
| let arr = IntervalDayTimeArray::from(vec![Some(v0), None, Some(v2)]); |
| |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(v0, arr.value(0)); |
| assert_eq!(v0, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(v2, arr.value(2)); |
| assert_eq!(v2, arr.values()[2]); |
| |
| let v0 = IntervalMonthDayNano { |
| months: 2, |
| days: 34, |
| nanoseconds: -1, |
| }; |
| let v2 = IntervalMonthDayNano { |
| months: -3, |
| days: -2, |
| nanoseconds: 4, |
| }; |
| |
| let arr = IntervalMonthDayNanoArray::from(vec![Some(v0), None, Some(v2)]); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(v0, arr.value(0)); |
| assert_eq!(v0, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(v2, arr.value(2)); |
| assert_eq!(v2, arr.values()[2]); |
| } |
| |
| #[test] |
| fn test_duration_array_from_vec() { |
| let arr = DurationSecondArray::from(vec![Some(1), None, Some(-5)]); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(1, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(-5, arr.value(2)); |
| assert_eq!(-5, arr.values()[2]); |
| |
| let arr = DurationMillisecondArray::from(vec![Some(1), None, Some(-5)]); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(1, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(-5, arr.value(2)); |
| assert_eq!(-5, arr.values()[2]); |
| |
| let arr = DurationMicrosecondArray::from(vec![Some(1), None, Some(-5)]); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(1, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(-5, arr.value(2)); |
| assert_eq!(-5, arr.values()[2]); |
| |
| let arr = DurationNanosecondArray::from(vec![Some(1), None, Some(-5)]); |
| assert_eq!(3, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(1, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(1, arr.values()[0]); |
| assert!(arr.is_null(1)); |
| assert_eq!(-5, arr.value(2)); |
| assert_eq!(-5, arr.values()[2]); |
| } |
| |
| #[test] |
| fn test_timestamp_array_from_vec() { |
| let arr = TimestampSecondArray::from(vec![1, -5]); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(-5, arr.value(1)); |
| assert_eq!(&[1, -5], arr.values()); |
| |
| let arr = TimestampMillisecondArray::from(vec![1, -5]); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(-5, arr.value(1)); |
| assert_eq!(&[1, -5], arr.values()); |
| |
| let arr = TimestampMicrosecondArray::from(vec![1, -5]); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(-5, arr.value(1)); |
| assert_eq!(&[1, -5], arr.values()); |
| |
| let arr = TimestampNanosecondArray::from(vec![1, -5]); |
| assert_eq!(2, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(0, arr.null_count()); |
| assert_eq!(1, arr.value(0)); |
| assert_eq!(-5, arr.value(1)); |
| assert_eq!(&[1, -5], arr.values()); |
| } |
| |
| #[test] |
| fn test_primitive_array_slice() { |
| let arr = Int32Array::from(vec![ |
| Some(0), |
| None, |
| Some(2), |
| None, |
| Some(4), |
| Some(5), |
| Some(6), |
| None, |
| None, |
| ]); |
| assert_eq!(9, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(4, arr.null_count()); |
| |
| let arr2 = arr.slice(2, 5); |
| assert_eq!(5, arr2.len()); |
| assert_eq!(1, arr2.null_count()); |
| |
| for i in 0..arr2.len() { |
| assert_eq!(i == 1, arr2.is_null(i)); |
| assert_eq!(i != 1, arr2.is_valid(i)); |
| } |
| let int_arr2 = arr2.as_any().downcast_ref::<Int32Array>().unwrap(); |
| assert_eq!(2, int_arr2.values()[0]); |
| assert_eq!(&[4, 5, 6], &int_arr2.values()[2..5]); |
| |
| let arr3 = arr2.slice(2, 3); |
| assert_eq!(3, arr3.len()); |
| assert_eq!(0, arr3.null_count()); |
| |
| let int_arr3 = arr3.as_any().downcast_ref::<Int32Array>().unwrap(); |
| assert_eq!(&[4, 5, 6], int_arr3.values()); |
| assert_eq!(4, int_arr3.value(0)); |
| assert_eq!(5, int_arr3.value(1)); |
| assert_eq!(6, int_arr3.value(2)); |
| } |
| |
| #[test] |
| fn test_boolean_array_slice() { |
| let arr = BooleanArray::from(vec![ |
| Some(true), |
| None, |
| Some(false), |
| None, |
| Some(true), |
| Some(false), |
| Some(true), |
| Some(false), |
| None, |
| Some(true), |
| ]); |
| |
| assert_eq!(10, arr.len()); |
| assert_eq!(0, arr.offset()); |
| assert_eq!(3, arr.null_count()); |
| |
| let arr2 = arr.slice(3, 5); |
| assert_eq!(5, arr2.len()); |
| assert_eq!(3, arr2.offset()); |
| assert_eq!(1, arr2.null_count()); |
| |
| let bool_arr = arr2.as_any().downcast_ref::<BooleanArray>().unwrap(); |
| |
| assert!(!bool_arr.is_valid(0)); |
| |
| assert!(bool_arr.is_valid(1)); |
| assert!(bool_arr.value(1)); |
| |
| assert!(bool_arr.is_valid(2)); |
| assert!(!bool_arr.value(2)); |
| |
| assert!(bool_arr.is_valid(3)); |
| assert!(bool_arr.value(3)); |
| |
| assert!(bool_arr.is_valid(4)); |
| assert!(!bool_arr.value(4)); |
| } |
| |
| #[test] |
| fn test_int32_fmt_debug() { |
| let arr = Int32Array::from(vec![0, 1, 2, 3, 4]); |
| assert_eq!( |
| "PrimitiveArray<Int32>\n[\n 0,\n 1,\n 2,\n 3,\n 4,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_fmt_debug_up_to_20_elements() { |
| (1..=20).for_each(|i| { |
| let values = (0..i).collect::<Vec<i16>>(); |
| let array_expected = format!( |
| "PrimitiveArray<Int16>\n[\n{}\n]", |
| values |
| .iter() |
| .map(|v| { format!(" {v},") }) |
| .collect::<Vec<String>>() |
| .join("\n") |
| ); |
| let array = Int16Array::from(values); |
| |
| assert_eq!(array_expected, format!("{array:?}")); |
| }) |
| } |
| |
| #[test] |
| fn test_int32_with_null_fmt_debug() { |
| let mut builder = Int32Array::builder(3); |
| builder.append_slice(&[0, 1]); |
| builder.append_null(); |
| builder.append_slice(&[3, 4]); |
| let arr = builder.finish(); |
| assert_eq!( |
| "PrimitiveArray<Int32>\n[\n 0,\n 1,\n null,\n 3,\n 4,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_timestamp_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, None)>\n[\n 2018-12-31T00:00:00,\n 2018-12-31T00:00:00,\n 1921-01-02T00:00:00,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_timestamp_utc_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
| .with_timezone_utc(); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, Some(\"+00:00\"))>\n[\n 2018-12-31T00:00:00+00:00,\n 2018-12-31T00:00:00+00:00,\n 1921-01-02T00:00:00+00:00,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| #[cfg(feature = "chrono-tz")] |
| fn test_timestamp_with_named_tz_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
| .with_timezone("Asia/Taipei".to_string()); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| #[cfg(not(feature = "chrono-tz"))] |
| fn test_timestamp_with_named_tz_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
| .with_timezone("Asia/Taipei".to_string()); |
| |
| println!("{arr:?}"); |
| |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, Some(\"Asia/Taipei\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'Asia/Taipei'),\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_timestamp_with_fixed_offset_tz_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
| .with_timezone("+08:00".to_string()); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, Some(\"+08:00\"))>\n[\n 2018-12-31T08:00:00+08:00,\n 2018-12-31T08:00:00+08:00,\n 1921-01-02T08:00:00+08:00,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_timestamp_with_incorrect_tz_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = |
| TimestampMillisecondArray::from(vec![1546214400000, 1546214400000, -1546214400000]) |
| .with_timezone("xxx".to_string()); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, Some(\"xxx\"))>\n[\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 2018-12-31T00:00:00 (Unknown Time Zone 'xxx'),\n 1921-01-02T00:00:00 (Unknown Time Zone 'xxx'),\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| #[cfg(feature = "chrono-tz")] |
| fn test_timestamp_with_tz_with_daylight_saving_fmt_debug() { |
| let arr: PrimitiveArray<TimestampMillisecondType> = TimestampMillisecondArray::from(vec![ |
| 1647161999000, |
| 1647162000000, |
| 1667717999000, |
| 1667718000000, |
| ]) |
| .with_timezone("America/Denver".to_string()); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Millisecond, Some(\"America/Denver\"))>\n[\n 2022-03-13T01:59:59-07:00,\n 2022-03-13T03:00:00-06:00,\n 2022-11-06T00:59:59-06:00,\n 2022-11-06T01:00:00-06:00,\n]", |
| format!("{:?}", arr) |
| ); |
| } |
| |
| #[test] |
| fn test_date32_fmt_debug() { |
| let arr: PrimitiveArray<Date32Type> = vec![12356, 13548, -365].into(); |
| assert_eq!( |
| "PrimitiveArray<Date32>\n[\n 2003-10-31,\n 2007-02-04,\n 1969-01-01,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_time32second_fmt_debug() { |
| let arr: PrimitiveArray<Time32SecondType> = vec![7201, 60054].into(); |
| assert_eq!( |
| "PrimitiveArray<Time32(Second)>\n[\n 02:00:01,\n 16:40:54,\n]", |
| format!("{arr:?}") |
| ); |
| } |
| |
| #[test] |
| fn test_time32second_invalid_neg() { |
| // chrono::NaiveDatetime::from_timestamp_opt returns None while input is invalid |
| let arr: PrimitiveArray<Time32SecondType> = vec![-7201, -60054].into(); |
| assert_eq!( |
| "PrimitiveArray<Time32(Second)>\n[\n Cast error: Failed to convert -7201 to temporal for Time32(Second),\n Cast error: Failed to convert -60054 to temporal for Time32(Second),\n]", |
| // "PrimitiveArray<Time32(Second)>\n[\n null,\n null,\n]", |
| format!("{arr:?}") |
| ) |
| } |
| |
| #[test] |
| fn test_timestamp_micros_out_of_range() { |
| // replicate the issue from https://github.com/apache/arrow-datafusion/issues/3832 |
| let arr: PrimitiveArray<TimestampMicrosecondType> = vec![9065525203050843594].into(); |
| assert_eq!( |
| "PrimitiveArray<Timestamp(Microsecond, None)>\n[\n null,\n]", |
| format!("{arr:?}") |
| ) |
| } |
| |
| #[test] |
| fn test_primitive_array_builder() { |
| // Test building a primitive array with ArrayData builder and offset |
| let buf = Buffer::from_slice_ref([0i32, 1, 2, 3, 4, 5, 6]); |
| let buf2 = buf.slice_with_length(8, 20); |
| let data = ArrayData::builder(DataType::Int32) |
| .len(5) |
| .offset(2) |
| .add_buffer(buf) |
| .build() |
| .unwrap(); |
| let arr = Int32Array::from(data); |
| assert_eq!(&buf2, arr.values.inner()); |
| assert_eq!(5, arr.len()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..3 { |
| assert_eq!((i + 2) as i32, arr.value(i)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_from_iter_values() { |
| // Test building a primitive array with from_iter_values |
| let arr: PrimitiveArray<Int32Type> = PrimitiveArray::from_iter_values(0..10); |
| assert_eq!(10, arr.len()); |
| assert_eq!(0, arr.null_count()); |
| for i in 0..10i32 { |
| assert_eq!(i, arr.value(i as usize)); |
| } |
| } |
| |
| #[test] |
| fn test_primitive_array_from_unbound_iter() { |
| // iterator that doesn't declare (upper) size bound |
| let value_iter = (0..) |
| .scan(0usize, |pos, i| { |
| if *pos < 10 { |
| *pos += 1; |
| Some(Some(i)) |
| } else { |
| // actually returns up to 10 values |
| None |
| } |
| }) |
| // limited using take() |
| .take(100); |
| |
| let (_, upper_size_bound) = value_iter.size_hint(); |
| // the upper bound, defined by take above, is 100 |
| assert_eq!(upper_size_bound, Some(100)); |
| let primitive_array: PrimitiveArray<Int32Type> = value_iter.collect(); |
| // but the actual number of items in the array should be 10 |
| assert_eq!(primitive_array.len(), 10); |
| } |
| |
| #[test] |
| fn test_primitive_array_from_non_null_iter() { |
| let iter = (0..10_i32).map(Some); |
| let primitive_array = PrimitiveArray::<Int32Type>::from_iter(iter); |
| assert_eq!(primitive_array.len(), 10); |
| assert_eq!(primitive_array.null_count(), 0); |
| assert!(primitive_array.nulls().is_none()); |
| assert_eq!(primitive_array.values(), &[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) |
| } |
| |
| #[test] |
| #[should_panic(expected = "PrimitiveArray data should contain a single buffer only \ |
| (values buffer)")] |
| // Different error messages, so skip for now |
| // https://github.com/apache/arrow-rs/issues/1545 |
| #[cfg(not(feature = "force_validate"))] |
| fn test_primitive_array_invalid_buffer_len() { |
| let buffer = Buffer::from_slice_ref([0i32, 1, 2, 3, 4]); |
| let data = unsafe { |
| ArrayData::builder(DataType::Int32) |
| .add_buffer(buffer.clone()) |
| .add_buffer(buffer) |
| .len(5) |
| .build_unchecked() |
| }; |
| |
| drop(Int32Array::from(data)); |
| } |
| |
| #[test] |
| fn test_access_array_concurrently() { |
| let a = Int32Array::from(vec![5, 6, 7, 8, 9]); |
| let ret = std::thread::spawn(move || a.value(3)).join(); |
| |
| assert!(ret.is_ok()); |
| assert_eq!(8, ret.ok().unwrap()); |
| } |
| |
| #[test] |
| fn test_primitive_array_creation() { |
| let array1: Int8Array = [10_i8, 11, 12, 13, 14].into_iter().collect(); |
| let array2: Int8Array = [10_i8, 11, 12, 13, 14].into_iter().map(Some).collect(); |
| |
| assert_eq!(array1, array2); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Trying to access an element at index 4 from a PrimitiveArray of length 3" |
| )] |
| fn test_string_array_get_value_index_out_of_bound() { |
| let array: Int8Array = [10_i8, 11, 12].into_iter().collect(); |
| |
| array.value(4); |
| } |
| |
| #[test] |
| #[should_panic(expected = "PrimitiveArray expected data type Int64 got Int32")] |
| fn test_from_array_data_validation() { |
| let foo = PrimitiveArray::<Int32Type>::from_iter([1, 2, 3]); |
| let _ = PrimitiveArray::<Int64Type>::from(foo.into_data()); |
| } |
| |
| #[test] |
| fn test_decimal128() { |
| let values: Vec<_> = vec![0, 1, -1, i128::MIN, i128::MAX]; |
| let array: PrimitiveArray<Decimal128Type> = |
| PrimitiveArray::from_iter(values.iter().copied()); |
| assert_eq!(array.values(), &values); |
| |
| let array: PrimitiveArray<Decimal128Type> = |
| PrimitiveArray::from_iter_values(values.iter().copied()); |
| assert_eq!(array.values(), &values); |
| |
| let array = PrimitiveArray::<Decimal128Type>::from(values.clone()); |
| assert_eq!(array.values(), &values); |
| |
| let array = PrimitiveArray::<Decimal128Type>::from(array.to_data()); |
| assert_eq!(array.values(), &values); |
| } |
| |
| #[test] |
| fn test_decimal256() { |
| let values: Vec<_> = vec![i256::ZERO, i256::ONE, i256::MINUS_ONE, i256::MIN, i256::MAX]; |
| |
| let array: PrimitiveArray<Decimal256Type> = |
| PrimitiveArray::from_iter(values.iter().copied()); |
| assert_eq!(array.values(), &values); |
| |
| let array: PrimitiveArray<Decimal256Type> = |
| PrimitiveArray::from_iter_values(values.iter().copied()); |
| assert_eq!(array.values(), &values); |
| |
| let array = PrimitiveArray::<Decimal256Type>::from(values.clone()); |
| assert_eq!(array.values(), &values); |
| |
| let array = PrimitiveArray::<Decimal256Type>::from(array.to_data()); |
| assert_eq!(array.values(), &values); |
| } |
| |
| #[test] |
| fn test_decimal_array() { |
| // let val_8887: [u8; 16] = [192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; |
| // let val_neg_8887: [u8; 16] = [64, 36, 75, 238, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255]; |
| let values: [u8; 32] = [ |
| 192, 219, 180, 17, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64, 36, 75, 238, 253, 255, 255, |
| 255, 255, 255, 255, 255, 255, 255, 255, 255, |
| ]; |
| let array_data = ArrayData::builder(DataType::Decimal128(38, 6)) |
| .len(2) |
| .add_buffer(Buffer::from(&values[..])) |
| .build() |
| .unwrap(); |
| let decimal_array = Decimal128Array::from(array_data); |
| assert_eq!(8_887_000_000_i128, decimal_array.value(0)); |
| assert_eq!(-8_887_000_000_i128, decimal_array.value(1)); |
| } |
| |
| #[test] |
| fn test_decimal_append_error_value() { |
| let mut decimal_builder = Decimal128Builder::with_capacity(10); |
| decimal_builder.append_value(123456); |
| decimal_builder.append_value(12345); |
| let result = decimal_builder.finish().with_precision_and_scale(5, 3); |
| assert!(result.is_ok()); |
| let arr = result.unwrap(); |
| assert_eq!("12.345", arr.value_as_string(1)); |
| |
| // Validate it explicitly |
| let result = arr.validate_decimal_precision(5); |
| let error = result.unwrap_err(); |
| assert_eq!( |
| "Invalid argument error: 123456 is too large to store in a Decimal128 of precision 5. Max is 99999", |
| error.to_string() |
| ); |
| |
| decimal_builder = Decimal128Builder::new(); |
| decimal_builder.append_value(100); |
| decimal_builder.append_value(99); |
| decimal_builder.append_value(-100); |
| decimal_builder.append_value(-99); |
| let result = decimal_builder.finish().with_precision_and_scale(2, 1); |
| assert!(result.is_ok()); |
| let arr = result.unwrap(); |
| assert_eq!("9.9", arr.value_as_string(1)); |
| assert_eq!("-9.9", arr.value_as_string(3)); |
| |
| // Validate it explicitly |
| let result = arr.validate_decimal_precision(2); |
| let error = result.unwrap_err(); |
| assert_eq!( |
| "Invalid argument error: 100 is too large to store in a Decimal128 of precision 2. Max is 99", |
| error.to_string() |
| ); |
| } |
| |
| #[test] |
| fn test_decimal_from_iter_values() { |
| let array = Decimal128Array::from_iter_values(vec![-100, 0, 101]); |
| assert_eq!(array.len(), 3); |
| assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); |
| assert_eq!(-100_i128, array.value(0)); |
| assert!(!array.is_null(0)); |
| assert_eq!(0_i128, array.value(1)); |
| assert!(!array.is_null(1)); |
| assert_eq!(101_i128, array.value(2)); |
| assert!(!array.is_null(2)); |
| } |
| |
| #[test] |
| fn test_decimal_from_iter() { |
| let array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect(); |
| assert_eq!(array.len(), 3); |
| assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); |
| assert_eq!(-100_i128, array.value(0)); |
| assert!(!array.is_null(0)); |
| assert!(array.is_null(1)); |
| assert_eq!(101_i128, array.value(2)); |
| assert!(!array.is_null(2)); |
| } |
| |
| #[test] |
| fn test_decimal_iter_sized() { |
| let data = vec![Some(-100), None, Some(101)]; |
| let array: Decimal128Array = data.into_iter().collect(); |
| let mut iter = array.into_iter(); |
| |
| // is exact sized |
| assert_eq!(array.len(), 3); |
| |
| // size_hint is reported correctly |
| assert_eq!(iter.size_hint(), (3, Some(3))); |
| iter.next().unwrap(); |
| assert_eq!(iter.size_hint(), (2, Some(2))); |
| iter.next().unwrap(); |
| iter.next().unwrap(); |
| assert_eq!(iter.size_hint(), (0, Some(0))); |
| assert!(iter.next().is_none()); |
| assert_eq!(iter.size_hint(), (0, Some(0))); |
| } |
| |
| #[test] |
| fn test_decimal_array_value_as_string() { |
| let arr = [123450, -123450, 100, -100, 10, -10, 0] |
| .into_iter() |
| .map(Some) |
| .collect::<Decimal128Array>() |
| .with_precision_and_scale(6, 3) |
| .unwrap(); |
| |
| assert_eq!("123.450", arr.value_as_string(0)); |
| assert_eq!("-123.450", arr.value_as_string(1)); |
| assert_eq!("0.100", arr.value_as_string(2)); |
| assert_eq!("-0.100", arr.value_as_string(3)); |
| assert_eq!("0.010", arr.value_as_string(4)); |
| assert_eq!("-0.010", arr.value_as_string(5)); |
| assert_eq!("0.000", arr.value_as_string(6)); |
| } |
| |
| #[test] |
| fn test_decimal_array_with_precision_and_scale() { |
| let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) |
| .with_precision_and_scale(20, 2) |
| .unwrap(); |
| |
| assert_eq!(arr.data_type(), &DataType::Decimal128(20, 2)); |
| assert_eq!(arr.precision(), 20); |
| assert_eq!(arr.scale(), 2); |
| |
| let actual: Vec<_> = (0..arr.len()).map(|i| arr.value_as_string(i)).collect(); |
| let expected = vec!["123.45", "4.56", "78.90", "-1232234234324.32"]; |
| |
| assert_eq!(actual, expected); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "-123223423432432 is too small to store in a Decimal128 of precision 5. Min is -99999" |
| )] |
| fn test_decimal_array_with_precision_and_scale_out_of_range() { |
| let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432]) |
| // precision is too small to hold value |
| .with_precision_and_scale(5, 2) |
| .unwrap(); |
| arr.validate_decimal_precision(5).unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "precision cannot be 0, has to be between [1, 38]")] |
| fn test_decimal_array_with_precision_zero() { |
| Decimal128Array::from_iter_values([12345, 456]) |
| .with_precision_and_scale(0, 2) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "precision 40 is greater than max 38")] |
| fn test_decimal_array_with_precision_and_scale_invalid_precision() { |
| Decimal128Array::from_iter_values([12345, 456]) |
| .with_precision_and_scale(40, 2) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "scale 40 is greater than max 38")] |
| fn test_decimal_array_with_precision_and_scale_invalid_scale() { |
| Decimal128Array::from_iter_values([12345, 456]) |
| .with_precision_and_scale(20, 40) |
| .unwrap(); |
| } |
| |
| #[test] |
| #[should_panic(expected = "scale 10 is greater than precision 4")] |
| fn test_decimal_array_with_precision_and_scale_invalid_precision_and_scale() { |
| Decimal128Array::from_iter_values([12345, 456]) |
| .with_precision_and_scale(4, 10) |
| .unwrap(); |
| } |
| |
| #[test] |
| fn test_decimal_array_set_null_if_overflow_with_precision() { |
| let array = Decimal128Array::from(vec![Some(123456), Some(123), None, Some(123456)]); |
| let result = array.null_if_overflow_precision(5); |
| let expected = Decimal128Array::from(vec![None, Some(123), None, None]); |
| assert_eq!(result, expected); |
| } |
| |
| #[test] |
| fn test_decimal256_iter() { |
| let mut builder = Decimal256Builder::with_capacity(30); |
| let decimal1 = i256::from_i128(12345); |
| builder.append_value(decimal1); |
| |
| builder.append_null(); |
| |
| let decimal2 = i256::from_i128(56789); |
| builder.append_value(decimal2); |
| |
| let array: Decimal256Array = builder.finish().with_precision_and_scale(76, 6).unwrap(); |
| |
| let collected: Vec<_> = array.iter().collect(); |
| assert_eq!(vec![Some(decimal1), None, Some(decimal2)], collected); |
| } |
| |
| #[test] |
| fn test_from_iter_decimal256array() { |
| let value1 = i256::from_i128(12345); |
| let value2 = i256::from_i128(56789); |
| |
| let mut array: Decimal256Array = |
| vec![Some(value1), None, Some(value2)].into_iter().collect(); |
| array = array.with_precision_and_scale(76, 10).unwrap(); |
| assert_eq!(array.len(), 3); |
| assert_eq!(array.data_type(), &DataType::Decimal256(76, 10)); |
| assert_eq!(value1, array.value(0)); |
| assert!(!array.is_null(0)); |
| assert!(array.is_null(1)); |
| assert_eq!(value2, array.value(2)); |
| assert!(!array.is_null(2)); |
| } |
| |
| #[test] |
| fn test_from_iter_decimal128array() { |
| let mut array: Decimal128Array = vec![Some(-100), None, Some(101)].into_iter().collect(); |
| array = array.with_precision_and_scale(38, 10).unwrap(); |
| assert_eq!(array.len(), 3); |
| assert_eq!(array.data_type(), &DataType::Decimal128(38, 10)); |
| assert_eq!(-100_i128, array.value(0)); |
| assert!(!array.is_null(0)); |
| assert!(array.is_null(1)); |
| assert_eq!(101_i128, array.value(2)); |
| assert!(!array.is_null(2)); |
| } |
| |
| #[test] |
| fn test_unary_opt() { |
| let array = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7]); |
| let r = array.unary_opt::<_, Int32Type>(|x| (x % 2 != 0).then_some(x)); |
| |
| let expected = Int32Array::from(vec![Some(1), None, Some(3), None, Some(5), None, Some(7)]); |
| assert_eq!(r, expected); |
| |
| let r = expected.unary_opt::<_, Int32Type>(|x| (x % 3 != 0).then_some(x)); |
| let expected = Int32Array::from(vec![Some(1), None, None, None, Some(5), None, Some(7)]); |
| assert_eq!(r, expected); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "Trying to access an element at index 4 from a PrimitiveArray of length 3" |
| )] |
| fn test_fixed_size_binary_array_get_value_index_out_of_bound() { |
| let array = Decimal128Array::from(vec![-100, 0, 101]); |
| array.value(4); |
| } |
| |
| #[test] |
| fn test_into_builder() { |
| let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
| |
| let boxed: ArrayRef = Arc::new(array); |
| let col: Int32Array = downcast_array(&boxed); |
| drop(boxed); |
| |
| let mut builder = col.into_builder().unwrap(); |
| |
| let slice = builder.values_slice_mut(); |
| assert_eq!(slice, &[1, 2, 3]); |
| |
| slice[0] = 4; |
| slice[1] = 2; |
| slice[2] = 1; |
| |
| let expected: Int32Array = vec![Some(4), Some(2), Some(1)].into_iter().collect(); |
| |
| let new_array = builder.finish(); |
| assert_eq!(expected, new_array); |
| } |
| |
| #[test] |
| fn test_into_builder_cloned_array() { |
| let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
| |
| let boxed: ArrayRef = Arc::new(array); |
| |
| let col: Int32Array = PrimitiveArray::<Int32Type>::from(boxed.to_data()); |
| let err = col.into_builder(); |
| |
| match err { |
| Ok(_) => panic!("Should not get builder from cloned array"), |
| Err(returned) => { |
| let expected: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
| assert_eq!(expected, returned) |
| } |
| } |
| } |
| |
| #[test] |
| fn test_into_builder_on_sliced_array() { |
| let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
| let slice = array.slice(1, 2); |
| let col: Int32Array = downcast_array(&slice); |
| |
| drop(slice); |
| |
| col.into_builder() |
| .expect_err("Should not build builder from sliced array"); |
| } |
| |
| #[test] |
| fn test_unary_mut() { |
| let array: Int32Array = vec![1, 2, 3].into_iter().map(Some).collect(); |
| |
| let c = array.unary_mut(|x| x * 2 + 1).unwrap(); |
| let expected: Int32Array = vec![3, 5, 7].into_iter().map(Some).collect(); |
| |
| assert_eq!(expected, c); |
| |
| let array: Int32Array = Int32Array::from(vec![Some(5), Some(7), None]); |
| let c = array.unary_mut(|x| x * 2 + 1).unwrap(); |
| assert_eq!(c, Int32Array::from(vec![Some(11), Some(15), None])); |
| } |
| |
| #[test] |
| #[should_panic( |
| expected = "PrimitiveArray expected data type Interval(MonthDayNano) got Interval(DayTime)" |
| )] |
| fn test_invalid_interval_type() { |
| let array = IntervalDayTimeArray::from(vec![IntervalDayTime::ZERO]); |
| let _ = IntervalMonthDayNanoArray::from(array.into_data()); |
| } |
| |
| #[test] |
| fn test_timezone() { |
| let array = TimestampNanosecondArray::from_iter_values([1, 2]); |
| assert_eq!(array.timezone(), None); |
| |
| let array = array.with_timezone("+02:00"); |
| assert_eq!(array.timezone(), Some("+02:00")); |
| } |
| |
| #[test] |
| fn test_try_new() { |
| Int32Array::new(vec![1, 2, 3, 4].into(), None); |
| Int32Array::new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(4))); |
| |
| let err = Int32Array::try_new(vec![1, 2, 3, 4].into(), Some(NullBuffer::new_null(3))) |
| .unwrap_err(); |
| |
| assert_eq!( |
| err.to_string(), |
| "Invalid argument error: Incorrect length of null buffer for PrimitiveArray, expected 4 got 3" |
| ); |
| |
| TimestampNanosecondArray::new(vec![1, 2, 3, 4].into(), None).with_data_type( |
| DataType::Timestamp(TimeUnit::Nanosecond, Some("03:00".into())), |
| ); |
| } |
| |
| #[test] |
| #[should_panic(expected = "PrimitiveArray expected data type Int32 got Date32")] |
| fn test_with_data_type() { |
| Int32Array::new(vec![1, 2, 3, 4].into(), None).with_data_type(DataType::Date32); |
| } |
| |
| #[test] |
| fn test_time_32second_output() { |
| let array: Time32SecondArray = vec![ |
| Some(-1), |
| Some(0), |
| Some(86_399), |
| Some(86_400), |
| Some(86_401), |
| None, |
| ] |
| .into(); |
| let debug_str = format!("{:?}", array); |
| assert_eq!("PrimitiveArray<Time32(Second)>\n[\n Cast error: Failed to convert -1 to temporal for Time32(Second),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400 to temporal for Time32(Second),\n Cast error: Failed to convert 86401 to temporal for Time32(Second),\n null,\n]", |
| debug_str |
| ); |
| } |
| |
| #[test] |
| fn test_time_32millisecond_debug_output() { |
| let array: Time32MillisecondArray = vec![ |
| Some(-1), |
| Some(0), |
| Some(86_399_000), |
| Some(86_400_000), |
| Some(86_401_000), |
| None, |
| ] |
| .into(); |
| let debug_str = format!("{:?}", array); |
| assert_eq!("PrimitiveArray<Time32(Millisecond)>\n[\n Cast error: Failed to convert -1 to temporal for Time32(Millisecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000 to temporal for Time32(Millisecond),\n Cast error: Failed to convert 86401000 to temporal for Time32(Millisecond),\n null,\n]", |
| debug_str |
| ); |
| } |
| |
| #[test] |
| fn test_time_64nanosecond_debug_output() { |
| let array: Time64NanosecondArray = vec![ |
| Some(-1), |
| Some(0), |
| Some(86_399 * 1_000_000_000), |
| Some(86_400 * 1_000_000_000), |
| Some(86_401 * 1_000_000_000), |
| None, |
| ] |
| .into(); |
| let debug_str = format!("{:?}", array); |
| assert_eq!( |
| "PrimitiveArray<Time64(Nanosecond)>\n[\n Cast error: Failed to convert -1 to temporal for Time64(Nanosecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000000 to temporal for Time64(Nanosecond),\n Cast error: Failed to convert 86401000000000 to temporal for Time64(Nanosecond),\n null,\n]", |
| debug_str |
| ); |
| } |
| |
| #[test] |
| fn test_time_64microsecond_debug_output() { |
| let array: Time64MicrosecondArray = vec![ |
| Some(-1), |
| Some(0), |
| Some(86_399 * 1_000_000), |
| Some(86_400 * 1_000_000), |
| Some(86_401 * 1_000_000), |
| None, |
| ] |
| .into(); |
| let debug_str = format!("{:?}", array); |
| assert_eq!("PrimitiveArray<Time64(Microsecond)>\n[\n Cast error: Failed to convert -1 to temporal for Time64(Microsecond),\n 00:00:00,\n 23:59:59,\n Cast error: Failed to convert 86400000000 to temporal for Time64(Microsecond),\n Cast error: Failed to convert 86401000000 to temporal for Time64(Microsecond),\n null,\n]", debug_str); |
| } |
| |
| #[test] |
| fn test_primitive_with_nulls_into_builder() { |
| let array: Int32Array = vec![ |
| Some(1), |
| None, |
| Some(3), |
| Some(4), |
| None, |
| Some(7), |
| None, |
| Some(8), |
| ] |
| .into_iter() |
| .collect(); |
| let _ = array.into_builder(); |
| } |
| } |