blob: 0fd0e64355ade53690cfd7b5252501f888196ec7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Common utilities for computation kernels.
use crate::array::*;
#[cfg(feature = "simd")]
use crate::bitmap::Bitmap;
use crate::buffer::{buffer_bin_and, buffer_bin_or, Buffer};
use crate::datatypes::*;
use crate::error::{ArrowError, Result};
use num::{One, ToPrimitive, Zero};
#[cfg(feature = "simd")]
use std::cmp::min;
use std::ops::Add;
/// Combines the null bitmaps of two arrays using a bitwise `and` operation.
///
/// This function is useful when implementing operations on higher level arrays.
pub(super) fn combine_option_bitmap(
left_data: &ArrayDataRef,
right_data: &ArrayDataRef,
len_in_bits: usize,
) -> Result<Option<Buffer>> {
let left_offset_in_bits = left_data.offset();
let right_offset_in_bits = right_data.offset();
let left = left_data.null_buffer();
let right = right_data.null_buffer();
match left {
None => match right {
None => Ok(None),
Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
},
Some(l) => match right {
None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
Some(r) => Ok(Some(buffer_bin_and(
&l,
left_offset_in_bits,
&r,
right_offset_in_bits,
len_in_bits,
))),
},
}
}
/// Compares the null bitmaps of two arrays using a bitwise `or` operation.
///
/// This function is useful when implementing operations on higher level arrays.
pub(super) fn compare_option_bitmap(
left_data: &ArrayDataRef,
right_data: &ArrayDataRef,
len_in_bits: usize,
) -> Result<Option<Buffer>> {
let left_offset_in_bits = left_data.offset();
let right_offset_in_bits = right_data.offset();
let left = left_data.null_buffer();
let right = right_data.null_buffer();
match left {
None => match right {
None => Ok(None),
Some(r) => Ok(Some(r.bit_slice(right_offset_in_bits, len_in_bits))),
},
Some(l) => match right {
None => Ok(Some(l.bit_slice(left_offset_in_bits, len_in_bits))),
Some(r) => Ok(Some(buffer_bin_or(
&l,
left_offset_in_bits,
&r,
right_offset_in_bits,
len_in_bits,
))),
},
}
}
/// Takes/filters a list array's inner data using the offsets of the list array.
///
/// Where a list array has indices `[0,2,5,10]`, taking indices of `[2,0]` returns
/// an array of the indices `[5..10, 0..2]` and offsets `[0,5,7]` (5 elements and 2
/// elements)
pub(super) fn take_value_indices_from_list<IndexType, OffsetType>(
values: &ArrayRef,
indices: &PrimitiveArray<IndexType>,
) -> Result<(PrimitiveArray<OffsetType>, Vec<OffsetType::Native>)>
where
IndexType: ArrowNumericType,
IndexType::Native: ToPrimitive,
OffsetType: ArrowNumericType,
OffsetType::Native: OffsetSizeTrait + Add + Zero + One,
PrimitiveArray<OffsetType>: From<Vec<Option<OffsetType::Native>>>,
{
// TODO: benchmark this function, there might be a faster unsafe alternative
// get list array's offsets
let list = values
.as_any()
.downcast_ref::<GenericListArray<OffsetType::Native>>()
.unwrap();
let offsets: Vec<OffsetType::Native> =
(0..=list.len()).map(|i| list.value_offset(i)).collect();
let mut new_offsets = Vec::with_capacity(indices.len());
let mut values = Vec::new();
let mut current_offset = OffsetType::Native::zero();
// add first offset
new_offsets.push(OffsetType::Native::zero());
// compute the value indices, and set offsets accordingly
for i in 0..indices.len() {
if indices.is_valid(i) {
let ix = ToPrimitive::to_usize(&indices.value(i)).ok_or_else(|| {
ArrowError::ComputeError("Cast to usize failed".to_string())
})?;
let start = offsets[ix];
let end = offsets[ix + 1];
current_offset = current_offset + (end - start);
new_offsets.push(current_offset);
let mut curr = start;
// if start == end, this slot is empty
while curr < end {
values.push(Some(curr));
curr = curr + OffsetType::Native::one();
}
} else {
new_offsets.push(current_offset);
}
}
Ok((PrimitiveArray::<OffsetType>::from(values), new_offsets))
}
/// Creates a new SIMD mask, i.e. `packed_simd::m32x16` or similar. that indicates if the
/// corresponding array slots represented by the mask are 'valid'.
///
/// Lanes of the SIMD mask can be set to 'valid' (`true`) if the corresponding array slot is not
/// `NULL`, as indicated by it's `Bitmap`, and is within the length of the array. Lanes outside the
/// length represent padding and are set to 'invalid' (`false`).
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
unsafe fn is_valid<T>(
bitmap: &Option<Bitmap>,
i: usize,
simd_width: usize,
array_len: usize,
) -> T::SimdMask
where
T: ArrowNumericType,
{
let simd_upper_bound = i + simd_width;
let mut validity = T::mask_init(true);
// Validity based on `Bitmap`
if let Some(b) = bitmap {
for j in i..min(array_len, simd_upper_bound) {
if !b.is_set(j) {
validity = T::mask_set(validity, j - i, false);
}
}
}
// Validity based on the length of the Array
for j in array_len..simd_upper_bound {
validity = T::mask_set(validity, j - i, false);
}
validity
}
/// Performs a SIMD load but sets all 'invalid' lanes to a constant value.
///
/// 'invalid' lanes are lanes where the corresponding array slots are either `NULL` or between the
/// length and capacity of the array, i.e. in the padded region.
///
/// Note that `array` below has it's own `Bitmap` separate from the `bitmap` argument. This
/// function is used to prepare `array`'s for binary operations. The `bitmap` argument is the
/// `Bitmap` after the binary operation.
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
pub(super) unsafe fn simd_load_set_invalid<T>(
array: &PrimitiveArray<T>,
bitmap: &Option<Bitmap>,
i: usize,
simd_width: usize,
fill_value: T::Native,
) -> T::Simd
where
T: ArrowNumericType,
T::Native: One,
{
let simd_with_zeros = T::load(array.value_slice(i, simd_width));
T::mask_select(
is_valid::<T>(bitmap, i, simd_width, array.len()),
simd_with_zeros,
T::init(fill_value),
)
}
#[cfg(test)]
mod tests {
use super::*;
use std::sync::Arc;
use crate::array::ArrayData;
use crate::datatypes::{DataType, ToByteSlice};
fn make_data_with_null_bit_buffer(
len: usize,
offset: usize,
null_bit_buffer: Option<Buffer>,
) -> Arc<ArrayData> {
// empty vec for buffers and children is not really correct, but for these tests we only care about the null bitmap
Arc::new(ArrayData::new(
DataType::UInt8,
len,
None,
null_bit_buffer,
offset,
vec![],
vec![],
))
}
#[test]
fn test_combine_option_bitmap() {
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
let some_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
let inverse_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
assert_eq!(
None,
combine_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
combine_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
);
assert_eq!(
Some(Buffer::from([0b0])),
combine_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
);
}
#[test]
fn test_compare_option_bitmap() {
let none_bitmap = make_data_with_null_bit_buffer(8, 0, None);
let some_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b01001010])));
let inverse_bitmap =
make_data_with_null_bit_buffer(8, 0, Some(Buffer::from([0b10110101])));
assert_eq!(
None,
compare_option_bitmap(&none_bitmap, &none_bitmap, 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
compare_option_bitmap(&some_bitmap, &none_bitmap, 8).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
compare_option_bitmap(&none_bitmap, &some_bitmap, 8,).unwrap()
);
assert_eq!(
Some(Buffer::from([0b01001010])),
compare_option_bitmap(&some_bitmap, &some_bitmap, 8,).unwrap()
);
assert_eq!(
Some(Buffer::from([0b11111111])),
compare_option_bitmap(&some_bitmap, &inverse_bitmap, 8,).unwrap()
);
}
fn build_list<P, S>(
list_data_type: DataType,
values: PrimitiveArray<P>,
offsets: Vec<S>,
) -> ArrayRef
where
P: ArrowPrimitiveType,
S: OffsetSizeTrait,
{
let value_data = values.data();
let value_offsets = Buffer::from(&offsets[..].to_byte_slice());
let list_data = ArrayData::builder(list_data_type)
.len(offsets.len() - 1)
.add_buffer(value_offsets)
.add_child_data(value_data)
.build();
Arc::new(GenericListArray::<S>::from(list_data))
}
#[test]
fn test_take_value_index_from_list() {
let list = build_list(
DataType::List(Box::new(NullableDataType::new(DataType::Int32, true))),
Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
vec![0i32, 2i32, 5i32, 10i32],
);
let indices = UInt32Array::from(vec![2, 0]);
let (indexed, offsets) =
take_value_indices_from_list::<_, Int32Type>(&list, &indices).unwrap();
assert_eq!(indexed, Int32Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
assert_eq!(offsets, vec![0, 5, 7]);
}
#[test]
fn test_take_value_index_from_large_list() {
let list = build_list(
DataType::LargeList(Box::new(NullableDataType::new(DataType::Int32, false))),
Int32Array::from(vec![0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
vec![0i64, 2i64, 5i64, 10i64],
);
let indices = UInt32Array::from(vec![2, 0]);
let (indexed, offsets) =
take_value_indices_from_list::<_, Int64Type>(&list, &indices).unwrap();
assert_eq!(indexed, Int64Array::from(vec![5, 6, 7, 8, 9, 0, 1]));
assert_eq!(offsets, vec![0, 5, 7]);
}
#[test]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn test_is_valid() {
let a = Int32Array::from(vec![
Some(15),
None,
None,
Some(1),
None,
None,
Some(5),
None,
None,
Some(4),
]);
let simd_lanes = 16;
let data = a.data();
let bitmap = data.null_bitmap();
let result = unsafe { is_valid::<Int32Type>(&bitmap, 0, simd_lanes, a.len()) };
for i in 0..simd_lanes {
if i % 3 != 0 || i > 9 {
assert_eq!(false, result.extract(i));
} else {
assert_eq!(true, result.extract(i));
}
}
}
#[test]
#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), feature = "simd"))]
fn test_simd_load_set_invalid() {
let a = Int64Array::from(vec![None, Some(15), Some(5), Some(0)]);
let new_bitmap = &Some(Bitmap::from(Buffer::from([0b00001010])));
let simd_lanes = 8;
let result = unsafe {
simd_load_set_invalid::<Int64Type>(&a, &new_bitmap, 0, simd_lanes, 1)
};
for i in 0..simd_lanes {
if i == 1 {
assert_eq!(15_i64, result.extract(i));
} else if i == 3 {
assert_eq!(0_i64, result.extract(i));
} else {
assert_eq!(1_i64, result.extract(i));
}
}
}
}