blob: 8702b558d01f54670fb887850d04c8361a003a7f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! [`zip`]: Combine values from two arrays based on boolean mask
use crate::filter::{SlicesIterator, prep_null_mask_filter};
use arrow_array::cast::AsArray;
use arrow_array::types::{
BinaryType, BinaryViewType, ByteArrayType, ByteViewType, LargeBinaryType, LargeUtf8Type,
StringViewType, Utf8Type,
};
use arrow_array::*;
use arrow_buffer::{
BooleanBuffer, Buffer, MutableBuffer, NullBuffer, OffsetBuffer, OffsetBufferBuilder,
ScalarBuffer, ToByteSlice,
};
use arrow_data::transform::MutableArrayData;
use arrow_data::{ArrayData, ByteView};
use arrow_schema::{ArrowError, DataType};
use std::fmt::{Debug, Formatter};
use std::hash::Hash;
use std::marker::PhantomData;
use std::ops::Not;
use std::sync::{Arc, OnceLock};
/// Zip two arrays by some boolean mask.
///
/// - Where `mask` is `true`, values of `truthy` are taken
/// - Where `mask` is `false` or `NULL`, values of `falsy` are taken
///
/// # Example: `zip` two arrays
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array};
/// # use arrow_select::zip::zip;
/// // mask: [true, true, false, NULL, true]
/// let mask = BooleanArray::from(vec![
/// Some(true), Some(true), Some(false), None, Some(true)
/// ]);
/// // truthy array: [1, NULL, 3, 4, 5]
/// let truthy = Int32Array::from(vec![
/// Some(1), None, Some(3), Some(4), Some(5)
/// ]);
/// // falsy array: [10, 20, 30, 40, 50]
/// let falsy = Int32Array::from(vec![
/// Some(10), Some(20), Some(30), Some(40), Some(50)
/// ]);
/// // zip with this mask select the first, second and last value from `truthy`
/// // and the third and fourth value from `falsy`
/// let result = zip(&mask, &truthy, &falsy).unwrap();
/// // Expected: [1, NULL, 30, 40, 5]
/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![
/// Some(1), None, Some(30), Some(40), Some(5)
/// ]));
/// assert_eq!(&result, &expected);
/// ```
///
/// # Example: `zip` and array with a scalar
///
/// Use `zip` to replace certain values in an array with a scalar
///
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array};
/// # use arrow_select::zip::zip;
/// // mask: [true, true, false, NULL, true]
/// let mask = BooleanArray::from(vec![
/// Some(true), Some(true), Some(false), None, Some(true)
/// ]);
/// // array: [1, NULL, 3, 4, 5]
/// let arr = Int32Array::from(vec![
/// Some(1), None, Some(3), Some(4), Some(5)
/// ]);
/// // scalar: 42
/// let scalar = Int32Array::new_scalar(42);
/// // zip the array with the mask select the first, second and last value from `arr`
/// // and fill the third and fourth value with the scalar 42
/// let result = zip(&mask, &arr, &scalar).unwrap();
/// // Expected: [1, NULL, 42, 42, 5]
/// let expected: ArrayRef = Arc::new(Int32Array::from(vec![
/// Some(1), None, Some(42), Some(42), Some(5)
/// ]));
/// assert_eq!(&result, &expected);
/// ```
pub fn zip(
mask: &BooleanArray,
truthy: &dyn Datum,
falsy: &dyn Datum,
) -> Result<ArrayRef, ArrowError> {
let (truthy_array, truthy_is_scalar) = truthy.get();
let (falsy_array, falsy_is_scalar) = falsy.get();
if falsy_is_scalar && truthy_is_scalar {
let zipper = ScalarZipper::try_new(truthy, falsy)?;
return zipper.zip_impl.create_output(mask);
}
let truthy = truthy_array;
let falsy = falsy_array;
if truthy.data_type() != falsy.data_type() {
return Err(ArrowError::InvalidArgumentError(
"arguments need to have the same data type".into(),
));
}
if truthy_is_scalar && truthy.len() != 1 {
return Err(ArrowError::InvalidArgumentError(
"scalar arrays must have 1 element".into(),
));
}
if !truthy_is_scalar && truthy.len() != mask.len() {
return Err(ArrowError::InvalidArgumentError(
"all arrays should have the same length".into(),
));
}
if falsy_is_scalar && falsy.len() != 1 {
return Err(ArrowError::InvalidArgumentError(
"scalar arrays must have 1 element".into(),
));
}
if !falsy_is_scalar && falsy.len() != mask.len() {
return Err(ArrowError::InvalidArgumentError(
"all arrays should have the same length".into(),
));
}
let falsy = falsy.to_data();
let truthy = truthy.to_data();
zip_impl(mask, &truthy, truthy_is_scalar, &falsy, falsy_is_scalar)
}
fn zip_impl(
mask: &BooleanArray,
truthy: &ArrayData,
truthy_is_scalar: bool,
falsy: &ArrayData,
falsy_is_scalar: bool,
) -> Result<ArrayRef, ArrowError> {
let mut mutable = MutableArrayData::new(vec![truthy, falsy], false, truthy.len());
// the SlicesIterator slices only the true values. So the gaps left by this iterator we need to
// fill with falsy values
// keep track of how much is filled
let mut filled = 0;
let mask_buffer = maybe_prep_null_mask_filter(mask);
SlicesIterator::from(&mask_buffer).for_each(|(start, end)| {
// the gap needs to be filled with falsy values
if start > filled {
if falsy_is_scalar {
for _ in filled..start {
// Copy the first item from the 'falsy' array into the output buffer.
mutable.extend(1, 0, 1);
}
} else {
mutable.extend(1, filled, start);
}
}
// fill with truthy values
if truthy_is_scalar {
for _ in start..end {
// Copy the first item from the 'truthy' array into the output buffer.
mutable.extend(0, 0, 1);
}
} else {
mutable.extend(0, start, end);
}
filled = end;
});
// the remaining part is falsy
if filled < mask.len() {
if falsy_is_scalar {
for _ in filled..mask.len() {
// Copy the first item from the 'falsy' array into the output buffer.
mutable.extend(1, 0, 1);
}
} else {
mutable.extend(1, filled, mask.len());
}
}
let data = mutable.freeze();
Ok(make_array(data))
}
/// Zipper for 2 scalars
///
/// Useful for using in `IF <expr> THEN <scalar> ELSE <scalar> END` expressions
///
/// # Example
/// ```
/// # use std::sync::Arc;
/// # use arrow_array::{ArrayRef, BooleanArray, Int32Array, Scalar, cast::AsArray, types::Int32Type};
///
/// # use arrow_select::zip::ScalarZipper;
/// let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
/// let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
/// let zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
///
/// // Later when we have a boolean mask
/// let mask = BooleanArray::from(vec![true, false, true, false, true]);
/// let result = zipper.zip(&mask).unwrap();
/// let actual = result.as_primitive::<Int32Type>();
/// let expected = Int32Array::from(vec![Some(42), Some(123), Some(42), Some(123), Some(42)]);
/// ```
///
#[derive(Debug, Clone)]
pub struct ScalarZipper {
zip_impl: Arc<dyn ZipImpl>,
}
impl ScalarZipper {
/// Try to create a new ScalarZipper from two scalar Datum
///
/// # Errors
/// returns error if:
/// - the two Datum have different data types
/// - either Datum is not a scalar (or has more than 1 element)
///
pub fn try_new(truthy: &dyn Datum, falsy: &dyn Datum) -> Result<Self, ArrowError> {
let (truthy, truthy_is_scalar) = truthy.get();
let (falsy, falsy_is_scalar) = falsy.get();
if truthy.data_type() != falsy.data_type() {
return Err(ArrowError::InvalidArgumentError(
"arguments need to have the same data type".into(),
));
}
if !truthy_is_scalar {
return Err(ArrowError::InvalidArgumentError(
"only scalar arrays are supported".into(),
));
}
if !falsy_is_scalar {
return Err(ArrowError::InvalidArgumentError(
"only scalar arrays are supported".into(),
));
}
if truthy.len() != 1 {
return Err(ArrowError::InvalidArgumentError(
"scalar arrays must have 1 element".into(),
));
}
if falsy.len() != 1 {
return Err(ArrowError::InvalidArgumentError(
"scalar arrays must have 1 element".into(),
));
}
macro_rules! primitive_size_helper {
($t:ty) => {
Arc::new(PrimitiveScalarImpl::<$t>::new(truthy, falsy)) as Arc<dyn ZipImpl>
};
}
let zip_impl = downcast_primitive! {
truthy.data_type() => (primitive_size_helper),
DataType::Utf8 => {
Arc::new(BytesScalarImpl::<Utf8Type>::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
DataType::LargeUtf8 => {
Arc::new(BytesScalarImpl::<LargeUtf8Type>::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
DataType::Binary => {
Arc::new(BytesScalarImpl::<BinaryType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
DataType::LargeBinary => {
Arc::new(BytesScalarImpl::<LargeBinaryType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
DataType::Utf8View => {
Arc::new(ByteViewScalarImpl::<StringViewType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
DataType::BinaryView => {
Arc::new(ByteViewScalarImpl::<BinaryViewType>::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
_ => {
Arc::new(FallbackImpl::new(truthy, falsy)) as Arc<dyn ZipImpl>
},
};
Ok(Self { zip_impl })
}
/// Creating output array based on input boolean array and the two scalar values the zipper was created with
/// See struct level documentation for examples.
pub fn zip(&self, mask: &BooleanArray) -> Result<ArrayRef, ArrowError> {
self.zip_impl.create_output(mask)
}
}
/// Impl for creating output array based on a mask
trait ZipImpl: Debug + Send + Sync {
/// Creating output array based on input boolean array
fn create_output(&self, input: &BooleanArray) -> Result<ArrayRef, ArrowError>;
}
#[derive(Debug, PartialEq)]
struct FallbackImpl {
truthy: ArrayData,
falsy: ArrayData,
}
impl FallbackImpl {
fn new(left: &dyn Array, right: &dyn Array) -> Self {
Self {
truthy: left.to_data(),
falsy: right.to_data(),
}
}
}
impl ZipImpl for FallbackImpl {
fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
zip_impl(predicate, &self.truthy, true, &self.falsy, true)
}
}
struct PrimitiveScalarImpl<T: ArrowPrimitiveType> {
data_type: DataType,
truthy: Option<T::Native>,
falsy: Option<T::Native>,
}
impl<T: ArrowPrimitiveType> Debug for PrimitiveScalarImpl<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("PrimitiveScalarImpl")
.field("data_type", &self.data_type)
.field("truthy", &self.truthy)
.field("falsy", &self.falsy)
.finish()
}
}
impl<T: ArrowPrimitiveType> PrimitiveScalarImpl<T> {
fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
Self {
data_type: truthy.data_type().clone(),
truthy: Self::get_value_from_scalar(truthy),
falsy: Self::get_value_from_scalar(falsy),
}
}
fn get_value_from_scalar(scalar: &dyn Array) -> Option<T::Native> {
if scalar.is_null(0) {
None
} else {
let value = scalar.as_primitive::<T>().value(0);
Some(value)
}
}
/// return an output array that has
/// `value` in all locations where predicate is true
/// `null` otherwise
fn get_scalar_and_null_buffer_for_single_non_nullable(
predicate: BooleanBuffer,
value: T::Native,
) -> (Vec<T::Native>, Option<NullBuffer>) {
let result_len = predicate.len();
let nulls = NullBuffer::new(predicate);
let scalars = vec![value; result_len];
(scalars, Some(nulls))
}
}
impl<T: ArrowPrimitiveType> ZipImpl for PrimitiveScalarImpl<T> {
fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
let result_len = predicate.len();
// Nulls are treated as false
let predicate = maybe_prep_null_mask_filter(predicate);
let (scalars, nulls): (Vec<T::Native>, Option<NullBuffer>) = match (self.truthy, self.falsy)
{
(Some(truthy_val), Some(falsy_val)) => {
let scalars: Vec<T::Native> = predicate
.iter()
.map(|b| if b { truthy_val } else { falsy_val })
.collect();
(scalars, None)
}
(Some(truthy_val), None) => {
// If a value is true we need the TRUTHY and the null buffer will have 1 (meaning not null)
// If a value is false we need the FALSY and the null buffer will have 0 (meaning null)
Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, truthy_val)
}
(None, Some(falsy_val)) => {
// Flipping the boolean buffer as we want the opposite of the TRUE case
//
// if the condition is true we want null so we need to NOT the value so we get 0 (meaning null)
// if the condition is false we want the FALSY value so we need to NOT the value so we get 1 (meaning not null)
let predicate = predicate.not();
Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, falsy_val)
}
(None, None) => {
// All values are null
let nulls = NullBuffer::new_null(result_len);
let scalars = vec![T::default_value(); result_len];
(scalars, Some(nulls))
}
};
let scalars = ScalarBuffer::<T::Native>::from(scalars);
let output = PrimitiveArray::<T>::try_new(scalars, nulls)?;
// Keep decimal precisions, scales or timestamps timezones
let output = output.with_data_type(self.data_type.clone());
Ok(Arc::new(output))
}
}
#[derive(PartialEq, Hash)]
struct BytesScalarImpl<T: ByteArrayType> {
truthy: Option<Vec<u8>>,
falsy: Option<Vec<u8>>,
phantom: PhantomData<T>,
}
impl<T: ByteArrayType> Debug for BytesScalarImpl<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("BytesScalarImpl")
.field("truthy", &self.truthy)
.field("falsy", &self.falsy)
.finish()
}
}
impl<T: ByteArrayType> BytesScalarImpl<T> {
fn new(truthy_value: &dyn Array, falsy_value: &dyn Array) -> Self {
Self {
truthy: Self::get_value_from_scalar(truthy_value),
falsy: Self::get_value_from_scalar(falsy_value),
phantom: PhantomData,
}
}
fn get_value_from_scalar(scalar: &dyn Array) -> Option<Vec<u8>> {
if scalar.is_null(0) {
None
} else {
let bytes: &[u8] = scalar.as_bytes::<T>().value(0).as_ref();
Some(bytes.to_vec())
}
}
/// return an output array that has
/// `value` in all locations where predicate is true
/// `null` otherwise
fn get_scalar_and_null_buffer_for_single_non_nullable(
predicate: BooleanBuffer,
value: &[u8],
) -> (Buffer, OffsetBuffer<T::Offset>, Option<NullBuffer>) {
let value_length = value.len();
let number_of_true = predicate.count_set_bits();
// Fast path for all nulls
if number_of_true == 0 {
// All values are null
let nulls = NullBuffer::new_null(predicate.len());
return (
// Empty bytes
Buffer::from(&[]),
// All nulls so all lengths are 0
OffsetBuffer::<T::Offset>::new_zeroed(predicate.len()),
Some(nulls),
);
}
let offsets = OffsetBuffer::<T::Offset>::from_lengths(
predicate.iter().map(|b| if b { value_length } else { 0 }),
);
let mut bytes = MutableBuffer::with_capacity(0);
bytes.repeat_slice_n_times(value, number_of_true);
let bytes = Buffer::from(bytes);
// If a value is true we need the TRUTHY and the null buffer will have 1 (meaning not null)
// If a value is false we need the FALSY and the null buffer will have 0 (meaning null)
let nulls = NullBuffer::new(predicate);
(bytes, offsets, Some(nulls))
}
/// Create a [`Buffer`] where `value` slice is repeated `number_of_values` times
/// and [`OffsetBuffer`] where there are `number_of_values` lengths, and all equals to `value` length
fn get_bytes_and_offset_for_all_same_value(
number_of_values: usize,
value: &[u8],
) -> (Buffer, OffsetBuffer<T::Offset>) {
let value_length = value.len();
let offsets =
OffsetBuffer::<T::Offset>::from_repeated_length(value_length, number_of_values);
let mut bytes = MutableBuffer::with_capacity(0);
bytes.repeat_slice_n_times(value, number_of_values);
let bytes = Buffer::from(bytes);
(bytes, offsets)
}
fn create_output_on_non_nulls(
predicate: &BooleanBuffer,
truthy_val: &[u8],
falsy_val: &[u8],
) -> (Buffer, OffsetBuffer<<T as ByteArrayType>::Offset>) {
let true_count = predicate.count_set_bits();
match true_count {
0 => {
// All values are falsy
let (bytes, offsets) =
Self::get_bytes_and_offset_for_all_same_value(predicate.len(), falsy_val);
return (bytes, offsets);
}
n if n == predicate.len() => {
// All values are truthy
let (bytes, offsets) =
Self::get_bytes_and_offset_for_all_same_value(predicate.len(), truthy_val);
return (bytes, offsets);
}
_ => {
// Fallback
}
}
let total_number_of_bytes =
true_count * truthy_val.len() + (predicate.len() - true_count) * falsy_val.len();
let mut mutable = MutableBuffer::with_capacity(total_number_of_bytes);
let mut offset_buffer_builder = OffsetBufferBuilder::<T::Offset>::new(predicate.len());
// keep track of how much is filled
let mut filled = 0;
let truthy_len = truthy_val.len();
let falsy_len = falsy_val.len();
SlicesIterator::from(predicate).for_each(|(start, end)| {
// the gap needs to be filled with falsy values
if start > filled {
let false_repeat_count = start - filled;
// Push false value `repeat_count` times
mutable.repeat_slice_n_times(falsy_val, false_repeat_count);
for _ in 0..false_repeat_count {
offset_buffer_builder.push_length(falsy_len)
}
}
let true_repeat_count = end - start;
// fill with truthy values
mutable.repeat_slice_n_times(truthy_val, true_repeat_count);
for _ in 0..true_repeat_count {
offset_buffer_builder.push_length(truthy_len)
}
filled = end;
});
// the remaining part is falsy
if filled < predicate.len() {
let false_repeat_count = predicate.len() - filled;
// Copy the first item from the 'falsy' array into the output buffer.
mutable.repeat_slice_n_times(falsy_val, false_repeat_count);
for _ in 0..false_repeat_count {
offset_buffer_builder.push_length(falsy_len)
}
}
(mutable.into(), offset_buffer_builder.finish())
}
}
impl<T: ByteArrayType> ZipImpl for BytesScalarImpl<T> {
fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
let result_len = predicate.len();
// Nulls are treated as false
let predicate = maybe_prep_null_mask_filter(predicate);
let (bytes, offsets, nulls): (Buffer, OffsetBuffer<T::Offset>, Option<NullBuffer>) =
match (self.truthy.as_deref(), self.falsy.as_deref()) {
(Some(truthy_val), Some(falsy_val)) => {
let (bytes, offsets) =
Self::create_output_on_non_nulls(&predicate, truthy_val, falsy_val);
(bytes, offsets, None)
}
(Some(truthy_val), None) => {
Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, truthy_val)
}
(None, Some(falsy_val)) => {
// Flipping the boolean buffer as we want the opposite of the TRUE case
//
// if the condition is true we want null so we need to NOT the value so we get 0 (meaning null)
// if the condition is false we want the FALSE value so we need to NOT the value so we get 1 (meaning not null)
let predicate = predicate.not();
Self::get_scalar_and_null_buffer_for_single_non_nullable(predicate, falsy_val)
}
(None, None) => {
// All values are null
let nulls = NullBuffer::new_null(result_len);
(
// Empty bytes
Buffer::from(&[]),
// All nulls so all lengths are 0
OffsetBuffer::<T::Offset>::new_zeroed(predicate.len()),
Some(nulls),
)
}
};
let output = unsafe {
// Safety: the values are based on valid inputs
// and `try_new` is expensive for strings as it validate that the input is valid utf8
GenericByteArray::<T>::new_unchecked(offsets, bytes, nulls)
};
Ok(Arc::new(output))
}
}
fn maybe_prep_null_mask_filter(predicate: &BooleanArray) -> BooleanBuffer {
// Nulls are treated as false
if predicate.null_count() == 0 {
predicate.values().clone()
} else {
let cleaned = prep_null_mask_filter(predicate);
let (boolean_buffer, _) = cleaned.into_parts();
boolean_buffer
}
}
struct ByteViewScalarImpl<T: ByteViewType> {
truthy_view: Option<u128>,
truthy_buffers: Arc<[Buffer]>,
falsy_view: Option<u128>,
falsy_buffers: Arc<[Buffer]>,
phantom: PhantomData<T>,
}
static EMPTY_ARC: OnceLock<Arc<[Buffer]>> = OnceLock::new();
fn empty_arc_buffers() -> Arc<[Buffer]> {
Arc::clone(EMPTY_ARC.get_or_init(|| Arc::new([])))
}
impl<T: ByteViewType> ByteViewScalarImpl<T> {
fn new(truthy: &dyn Array, falsy: &dyn Array) -> Self {
let (truthy_view, truthy_buffers) = Self::get_value_from_scalar(truthy);
let (falsy_view, falsy_buffers) = Self::get_value_from_scalar(falsy);
Self {
truthy_view,
truthy_buffers,
falsy_view,
falsy_buffers,
phantom: PhantomData,
}
}
fn get_value_from_scalar(scalar: &dyn Array) -> (Option<u128>, Arc<[Buffer]>) {
if scalar.is_null(0) {
(None, empty_arc_buffers())
} else {
let (views, buffers, _) = scalar.as_byte_view::<T>().clone().into_parts();
(views.first().copied(), buffers)
}
}
fn get_views_for_single_non_nullable(
predicate: BooleanBuffer,
value: u128,
buffers: Arc<[Buffer]>,
) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
let number_of_true = predicate.count_set_bits();
let number_of_values = predicate.len();
// Fast path for all nulls
if number_of_true == 0 {
// All values are null
return (
vec![0; number_of_values].into(),
empty_arc_buffers(),
Some(NullBuffer::new_null(number_of_values)),
);
}
let bytes = vec![value; number_of_values];
// If value is true and we want to handle the TRUTHY case, the null buffer will have 1 (meaning not null)
// If value is false and we want to handle the FALSY case, the null buffer will have 0 (meaning null)
let nulls = NullBuffer::new(predicate);
(bytes.into(), buffers, Some(nulls))
}
fn get_views_for_non_nullable(
predicate: BooleanBuffer,
result_len: usize,
truthy_view: u128,
truthy_buffers: Arc<[Buffer]>,
falsy_view: u128,
falsy_buffers: Arc<[Buffer]>,
) -> (ScalarBuffer<u128>, Arc<[Buffer]>, Option<NullBuffer>) {
let true_count = predicate.count_set_bits();
match true_count {
0 => {
// all values are falsy
(vec![falsy_view; result_len].into(), falsy_buffers, None)
}
n if n == predicate.len() => {
// all values are truthy
(vec![truthy_view; result_len].into(), truthy_buffers, None)
}
_ => {
let true_count = predicate.count_set_bits();
let mut buffers: Vec<Buffer> = truthy_buffers.to_vec();
// If the falsy buffers are empty, we can use the falsy view as it is, because the value
// is completely inlined. Otherwise, we have non-inlined values in the buffer, and we need
// to recalculate the falsy view
let view_falsy = if falsy_buffers.is_empty() {
falsy_view
} else {
let byte_view_falsy = ByteView::from(falsy_view);
let new_index_falsy_buffers =
buffers.len() as u32 + byte_view_falsy.buffer_index;
buffers.extend(falsy_buffers.iter().cloned());
let byte_view_falsy =
byte_view_falsy.with_buffer_index(new_index_falsy_buffers);
byte_view_falsy.as_u128()
};
let total_number_of_bytes = true_count * 16 + (predicate.len() - true_count) * 16;
let mut mutable = MutableBuffer::new(total_number_of_bytes);
let mut filled = 0;
SlicesIterator::from(&predicate).for_each(|(start, end)| {
if start > filled {
let false_repeat_count = start - filled;
mutable
.repeat_slice_n_times(view_falsy.to_byte_slice(), false_repeat_count);
}
let true_repeat_count = end - start;
mutable.repeat_slice_n_times(truthy_view.to_byte_slice(), true_repeat_count);
filled = end;
});
if filled < predicate.len() {
let false_repeat_count = predicate.len() - filled;
mutable.repeat_slice_n_times(view_falsy.to_byte_slice(), false_repeat_count);
}
let bytes = Buffer::from(mutable);
(bytes.into(), buffers.into(), None)
}
}
}
}
impl<T: ByteViewType> Debug for ByteViewScalarImpl<T> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ByteViewScalarImpl")
.field("truthy", &self.truthy_view)
.field("falsy", &self.falsy_view)
.finish()
}
}
impl<T: ByteViewType> ZipImpl for ByteViewScalarImpl<T> {
fn create_output(&self, predicate: &BooleanArray) -> Result<ArrayRef, ArrowError> {
let result_len = predicate.len();
// Nulls are treated as false
let predicate = maybe_prep_null_mask_filter(predicate);
let (views, buffers, nulls) = match (self.truthy_view, self.falsy_view) {
(Some(truthy), Some(falsy)) => Self::get_views_for_non_nullable(
predicate,
result_len,
truthy,
Arc::clone(&self.truthy_buffers),
falsy,
Arc::clone(&self.falsy_buffers),
),
(Some(truthy), None) => Self::get_views_for_single_non_nullable(
predicate,
truthy,
Arc::clone(&self.truthy_buffers),
),
(None, Some(falsy)) => {
let predicate = predicate.not();
Self::get_views_for_single_non_nullable(
predicate,
falsy,
Arc::clone(&self.falsy_buffers),
)
}
(None, None) => {
// All values are null
(
vec![0; result_len].into(),
empty_arc_buffers(),
Some(NullBuffer::new_null(result_len)),
)
}
};
let result = unsafe { GenericByteViewArray::<T>::new_unchecked(views, buffers, nulls) };
Ok(Arc::new(result))
}
}
#[cfg(test)]
mod test {
use super::*;
use arrow_array::types::Int32Type;
#[test]
fn test_zip_kernel_one() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &a, &b).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(5), None, Some(6), Some(7), Some(1)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_two() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
let b = Int32Array::from(vec![None, Some(3), Some(6), Some(7), Some(3)]);
let mask = BooleanArray::from(vec![false, false, true, true, false]);
let out = zip(&mask, &a, &b).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![None, Some(3), Some(7), None, Some(3)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_falsy_1() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
let fallback = Scalar::new(Int32Array::from_value(42, 1));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &a, &fallback).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(5), None, Some(42), Some(42), Some(1)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_falsy_2() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
let fallback = Scalar::new(Int32Array::from_value(42, 1));
let mask = BooleanArray::from(vec![false, false, true, true, false]);
let out = zip(&mask, &a, &fallback).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(42), Some(42), Some(7), None, Some(42)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_truthy_1() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
let fallback = Scalar::new(Int32Array::from_value(42, 1));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &fallback, &a).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(42), Some(42), Some(7), None, Some(42)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_truthy_2() {
let a = Int32Array::from(vec![Some(5), None, Some(7), None, Some(1)]);
let fallback = Scalar::new(Int32Array::from_value(42, 1));
let mask = BooleanArray::from(vec![false, false, true, true, false]);
let out = zip(&mask, &fallback, &a).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(5), None, Some(42), Some(42), Some(1)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_both_mask_ends_with_true() {
let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(42), Some(42), Some(123), Some(123), Some(42)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_both_mask_ends_with_false() {
let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
let mask = BooleanArray::from(vec![true, true, false, true, false, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![
Some(42),
Some(42),
Some(123),
Some(42),
Some(123),
Some(123),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_primitive_scalar_none_1() {
let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
let scalar_falsy = Scalar::new(Int32Array::new_null(1));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![Some(42), Some(42), None, None, Some(42)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_primitive_scalar_none_2() {
let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
let scalar_falsy = Scalar::new(Int32Array::new_null(1));
let mask = BooleanArray::from(vec![false, false, true, true, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![None, None, Some(42), Some(42), None]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_primitive_scalar_both_null() {
let scalar_truthy = Scalar::new(Int32Array::new_null(1));
let scalar_falsy = Scalar::new(Int32Array::new_null(1));
let mask = BooleanArray::from(vec![false, false, true, true, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![None, None, None, None, None]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_primitive_array_with_nulls_is_mask_should_be_treated_as_false() {
let truthy = Int32Array::from_iter_values(vec![1, 2, 3, 4, 5, 6]);
let falsy = Int32Array::from_iter_values(vec![7, 8, 9, 10, 11, 12]);
let mask = {
let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
let nulls = NullBuffer::from(vec![
true, true, true,
false, // null treated as false even though in the original mask it was true
true, true,
]);
BooleanArray::new(booleans, Some(nulls))
};
let out = zip(&mask, &truthy, &falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![
Some(1),
Some(2),
Some(9),
Some(10), // true in mask but null
Some(11),
Some(12),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_primitive_scalar_with_boolean_array_mask_with_nulls_should_be_treated_as_false()
{
let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
let mask = {
let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
let nulls = NullBuffer::from(vec![
true, true, true,
false, // null treated as false even though in the original mask it was true
true, true,
]);
BooleanArray::new(booleans, Some(nulls))
};
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<Int32Array>().unwrap();
let expected = Int32Array::from(vec![
Some(42),
Some(42),
Some(123),
Some(123), // true in mask but null
Some(123),
Some(123),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_string_array_with_nulls_is_mask_should_be_treated_as_false() {
let truthy = StringArray::from_iter_values(vec!["1", "2", "3", "4", "5", "6"]);
let falsy = StringArray::from_iter_values(vec!["7", "8", "9", "10", "11", "12"]);
let mask = {
let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
let nulls = NullBuffer::from(vec![
true, true, true,
false, // null treated as false even though in the original mask it was true
true, true,
]);
BooleanArray::new(booleans, Some(nulls))
};
let out = zip(&mask, &truthy, &falsy).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter_values(vec![
"1", "2", "9", "10", // true in mask but null
"11", "12",
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_large_string_scalar_with_boolean_array_mask_with_nulls_should_be_treated_as_false()
{
let scalar_truthy = Scalar::new(LargeStringArray::from_iter_values(["test"]));
let scalar_falsy = Scalar::new(LargeStringArray::from_iter_values(["something else"]));
let mask = {
let booleans = BooleanBuffer::from(vec![true, true, false, true, false, false]);
let nulls = NullBuffer::from(vec![
true, true, true,
false, // null treated as false even though in the original mask it was true
true, true,
]);
BooleanArray::new(booleans, Some(nulls))
};
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<LargeStringArray>().unwrap();
let expected = LargeStringArray::from_iter(vec![
Some("test"),
Some("test"),
Some("something else"),
Some("something else"), // true in mask but null
Some("something else"),
Some("something else"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_bytes_scalar_none_1() {
let scalar_truthy = Scalar::new(StringArray::from_iter_values(["hello"]));
let scalar_falsy = Scalar::new(StringArray::new_null(1));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
let expected = StringArray::from_iter(vec![
Some("hello"),
Some("hello"),
None,
None,
Some("hello"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_bytes_scalar_none_2() {
let scalar_truthy = Scalar::new(StringArray::new_null(1));
let scalar_falsy = Scalar::new(StringArray::from_iter_values(["hello"]));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
let expected = StringArray::from_iter(vec![None, None, Some("hello"), Some("hello"), None]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_bytes_scalar_both() {
let scalar_truthy = Scalar::new(StringArray::from_iter_values(["test"]));
let scalar_falsy = Scalar::new(StringArray::from_iter_values(["something else"]));
// mask ends with false
let mask = BooleanArray::from(vec![true, true, false, true, false, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<StringArray>().unwrap();
let expected = StringArray::from_iter(vec![
Some("test"),
Some("test"),
Some("something else"),
Some("test"),
Some("something else"),
Some("something else"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_scalar_bytes_only_taking_one_side() {
let mask_len = 5;
let all_true_mask = BooleanArray::from(vec![true; mask_len]);
let all_false_mask = BooleanArray::from(vec![false; mask_len]);
let null_scalar = Scalar::new(StringArray::new_null(1));
let non_null_scalar_1 = Scalar::new(StringArray::from_iter_values(["test"]));
let non_null_scalar_2 = Scalar::new(StringArray::from_iter_values(["something else"]));
{
// 1. Test where left is null and right is non-null
// and mask is all true
let out = zip(&all_true_mask, &null_scalar, &non_null_scalar_1).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
assert_eq!(actual, &expected);
}
{
// 2. Test where left is null and right is non-null
// and mask is all false
let out = zip(&all_false_mask, &null_scalar, &non_null_scalar_1).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
assert_eq!(actual, &expected);
}
{
// 3. Test where left is non-null and right is null
// and mask is all true
let out = zip(&all_true_mask, &non_null_scalar_1, &null_scalar).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
assert_eq!(actual, &expected);
}
{
// 4. Test where left is non-null and right is null
// and mask is all false
let out = zip(&all_false_mask, &non_null_scalar_1, &null_scalar).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
assert_eq!(actual, &expected);
}
{
// 5. Test where both left and right are not null
// and mask is all true
let out = zip(&all_true_mask, &non_null_scalar_1, &non_null_scalar_2).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter(std::iter::repeat_n(Some("test"), mask_len));
assert_eq!(actual, &expected);
}
{
// 6. Test where both left and right are not null
// and mask is all false
let out = zip(&all_false_mask, &non_null_scalar_1, &non_null_scalar_2).unwrap();
let actual = out.as_string::<i32>();
let expected =
StringArray::from_iter(std::iter::repeat_n(Some("something else"), mask_len));
assert_eq!(actual, &expected);
}
{
// 7. Test where both left and right are null
// and mask is random
let mask = BooleanArray::from(vec![true, false, true, false, true]);
let out = zip(&mask, &null_scalar, &null_scalar).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from_iter(std::iter::repeat_n(None::<&str>, mask_len));
assert_eq!(actual, &expected);
}
}
#[test]
fn test_scalar_zipper() {
let scalar_truthy = Scalar::new(Int32Array::from_value(42, 1));
let scalar_falsy = Scalar::new(Int32Array::from_value(123, 1));
let mask = BooleanArray::from(vec![false, false, true, true, false]);
let scalar_zipper = ScalarZipper::try_new(&scalar_truthy, &scalar_falsy).unwrap();
let out = scalar_zipper.zip(&mask).unwrap();
let actual = out.as_primitive::<Int32Type>();
let expected = Int32Array::from(vec![Some(123), Some(123), Some(42), Some(42), Some(123)]);
assert_eq!(actual, &expected);
// test with different mask length as well
let mask = BooleanArray::from(vec![true, false, true]);
let out = scalar_zipper.zip(&mask).unwrap();
let actual = out.as_primitive::<Int32Type>();
let expected = Int32Array::from(vec![Some(42), Some(123), Some(42)]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings() {
let scalar_truthy = Scalar::new(StringArray::from(vec!["hello"]));
let scalar_falsy = Scalar::new(StringArray::from(vec!["world"]));
let mask = BooleanArray::from(vec![true, false, true, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string::<i32>();
let expected = StringArray::from(vec![
Some("hello"),
Some("world"),
Some("hello"),
Some("world"),
Some("hello"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_binary() {
let truthy_bytes: &[u8] = b"\xFF\xFE\xFD";
let falsy_bytes: &[u8] = b"world";
let scalar_truthy = Scalar::new(BinaryArray::from_iter_values(
// Non valid UTF8 bytes
vec![truthy_bytes],
));
let scalar_falsy = Scalar::new(BinaryArray::from_iter_values(vec![falsy_bytes]));
let mask = BooleanArray::from(vec![true, false, true, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_binary::<i32>();
let expected = BinaryArray::from(vec![
Some(truthy_bytes),
Some(falsy_bytes),
Some(truthy_bytes),
Some(falsy_bytes),
Some(truthy_bytes),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_large_binary() {
let truthy_bytes: &[u8] = b"hey";
let falsy_bytes: &[u8] = b"world";
let scalar_truthy = Scalar::new(LargeBinaryArray::from_iter_values(vec![truthy_bytes]));
let scalar_falsy = Scalar::new(LargeBinaryArray::from_iter_values(vec![falsy_bytes]));
let mask = BooleanArray::from(vec![true, false, true, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_binary::<i64>();
let expected = LargeBinaryArray::from(vec![
Some(truthy_bytes),
Some(falsy_bytes),
Some(truthy_bytes),
Some(falsy_bytes),
Some(truthy_bytes),
]);
assert_eq!(actual, &expected);
}
// Test to ensure that the precision and scale are kept when zipping Decimal128 data
#[test]
fn test_zip_decimal_with_custom_precision_and_scale() {
let arr = Decimal128Array::from_iter_values([12345, 456, 7890, -123223423432432])
.with_precision_and_scale(20, 2)
.unwrap();
let arr: ArrayRef = Arc::new(arr);
let scalar_1 = Scalar::new(arr.slice(0, 1));
let scalar_2 = Scalar::new(arr.slice(1, 1));
let null_scalar = Scalar::new(new_null_array(arr.data_type(), 1));
let array_1: ArrayRef = arr.slice(0, 2);
let array_2: ArrayRef = arr.slice(2, 2);
test_zip_output_data_types_for_input(scalar_1, scalar_2, null_scalar, array_1, array_2);
}
// Test to ensure that the timezone is kept when zipping TimestampArray data
#[test]
fn test_zip_timestamp_with_timezone() {
let arr = TimestampSecondArray::from(vec![0, 1000, 2000, 4000])
.with_timezone("+01:00".to_string());
let arr: ArrayRef = Arc::new(arr);
let scalar_1 = Scalar::new(arr.slice(0, 1));
let scalar_2 = Scalar::new(arr.slice(1, 1));
let null_scalar = Scalar::new(new_null_array(arr.data_type(), 1));
let array_1: ArrayRef = arr.slice(0, 2);
let array_2: ArrayRef = arr.slice(2, 2);
test_zip_output_data_types_for_input(scalar_1, scalar_2, null_scalar, array_1, array_2);
}
fn test_zip_output_data_types_for_input(
scalar_1: Scalar<ArrayRef>,
scalar_2: Scalar<ArrayRef>,
null_scalar: Scalar<ArrayRef>,
array_1: ArrayRef,
array_2: ArrayRef,
) {
// non null Scalar vs non null Scalar
test_zip_output_data_type(&scalar_1, &scalar_2, 10);
// null Scalar vs non-null Scalar (and vice versa)
test_zip_output_data_type(&null_scalar, &scalar_1, 10);
test_zip_output_data_type(&scalar_1, &null_scalar, 10);
// non-null Scalar and array (and vice versa)
test_zip_output_data_type(&array_1.as_ref(), &scalar_1, array_1.len());
test_zip_output_data_type(&scalar_1, &array_1.as_ref(), array_1.len());
// Array and null scalar (and vice versa)
test_zip_output_data_type(&array_1.as_ref(), &null_scalar, array_1.len());
test_zip_output_data_type(&null_scalar, &array_1.as_ref(), array_1.len());
// Both arrays
test_zip_output_data_type(&array_1.as_ref(), &array_2.as_ref(), array_1.len());
}
fn test_zip_output_data_type(truthy: &dyn Datum, falsy: &dyn Datum, mask_length: usize) {
let expected_data_type = truthy.get().0.data_type().clone();
assert_eq!(&expected_data_type, falsy.get().0.data_type());
// Try different masks to test different paths
let mask_all_true = BooleanArray::from(vec![true; mask_length]);
let mask_all_false = BooleanArray::from(vec![false; mask_length]);
let mask_some_true_and_false =
BooleanArray::from((0..mask_length).map(|i| i % 2 == 0).collect::<Vec<bool>>());
for mask in [&mask_all_true, &mask_all_false, &mask_some_true_and_false] {
let out = zip(mask, truthy, falsy).unwrap();
assert_eq!(out.data_type(), &expected_data_type);
}
}
#[test]
fn zip_scalar_fallback_impl() {
let truthy_list_item_scalar = Some(vec![Some(1), None, Some(3)]);
let truthy_list_array_scalar =
Scalar::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
truthy_list_item_scalar.clone(),
]));
let falsy_list_item_scalar = Some(vec![None, Some(2), Some(4)]);
let falsy_list_array_scalar =
Scalar::new(ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
falsy_list_item_scalar.clone(),
]));
let mask = BooleanArray::from(vec![true, false, true, false, false, true, false]);
let out = zip(&mask, &truthy_list_array_scalar, &falsy_list_array_scalar).unwrap();
let actual = out.as_list::<i32>();
let expected = ListArray::from_iter_primitive::<Int32Type, _, _>(vec![
truthy_list_item_scalar.clone(),
falsy_list_item_scalar.clone(),
truthy_list_item_scalar.clone(),
falsy_list_item_scalar.clone(),
falsy_list_item_scalar.clone(),
truthy_list_item_scalar.clone(),
falsy_list_item_scalar.clone(),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
let mask = BooleanArray::from(vec![true, false, true, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![
Some("hello"),
Some("world"),
Some("hello"),
Some("world"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_binary_array_view() {
let scalar_truthy = Scalar::new(BinaryViewArray::from_iter_values(vec![b"hello"]));
let scalar_falsy = Scalar::new(BinaryViewArray::from_iter_values(vec![b"world"]));
let mask = BooleanArray::from(vec![true, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_byte_view();
let expected = BinaryViewArray::from_iter_values(vec![b"hello", b"world"]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view_with_nulls() {
let scalar_truthy = Scalar::new(StringViewArray::from_iter_values(["hello"]));
let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
let mask = BooleanArray::from(vec![true, true, false, false, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
let expected = StringViewArray::from_iter(vec![
Some("hello"),
Some("hello"),
None,
None,
Some("hello"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view_all_true_null() {
let scalar_truthy = Scalar::new(StringViewArray::new_null(1));
let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
let mask = BooleanArray::from(vec![true, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
let expected = StringViewArray::from_iter(vec![None::<String>, None]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view_all_false_null() {
let scalar_truthy = Scalar::new(StringViewArray::new_null(1));
let scalar_falsy = Scalar::new(StringViewArray::new_null(1));
let mask = BooleanArray::from(vec![false, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_any().downcast_ref::<StringViewArray>().unwrap();
let expected = StringViewArray::from_iter(vec![None::<String>, None]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_string_array_view_all_true() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
let mask = BooleanArray::from(vec![true, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![Some("hello"), Some("hello")]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_string_array_view_all_false() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["world"]));
let mask = BooleanArray::from(vec![false, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![Some("world"), Some("world")]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_large_strings() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another longer than 12 bytes"]));
let mask = BooleanArray::from(vec![true, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![
Some("longer than 12 bytes"),
Some("another longer than 12 bytes"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view_large_short_strings() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["hello"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
let mask = BooleanArray::from(vec![true, false, true, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![
Some("hello"),
Some("longer than 12 bytes"),
Some("hello"),
Some("longer than 12 bytes"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view_large_all_true() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another longer than 12 bytes"]));
let mask = BooleanArray::from(vec![true, true]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![
Some("longer than 12 bytes"),
Some("longer than 12 bytes"),
]);
assert_eq!(actual, &expected);
}
#[test]
fn test_zip_kernel_scalar_strings_array_view_large_all_false() {
let scalar_truthy = Scalar::new(StringViewArray::from(vec!["longer than 12 bytes"]));
let scalar_falsy = Scalar::new(StringViewArray::from(vec!["another longer than 12 bytes"]));
let mask = BooleanArray::from(vec![false, false]);
let out = zip(&mask, &scalar_truthy, &scalar_falsy).unwrap();
let actual = out.as_string_view();
let expected = StringViewArray::from(vec![
Some("another longer than 12 bytes"),
Some("another longer than 12 bytes"),
]);
assert_eq!(actual, &expected);
}
}