blob: d0108d236491ef3c0361c87e433f3562e741cb14 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::array::{data::count_nulls, ArrayData, OffsetSizeTrait};
use crate::bitmap::Bitmap;
use crate::buffer::{Buffer, MutableBuffer};
use crate::datatypes::DataType;
use crate::util::bit_util;
// whether bits along the positions are equal
// `lhs_start`, `rhs_start` and `len` are _measured in bits_.
#[inline]
pub(super) fn equal_bits(
lhs_values: &[u8],
rhs_values: &[u8],
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
(0..len).all(|i| {
bit_util::get_bit(lhs_values, lhs_start + i)
== bit_util::get_bit(rhs_values, rhs_start + i)
})
}
#[inline]
pub(super) fn equal_nulls(
lhs: &ArrayData,
rhs: &ArrayData,
lhs_nulls: Option<&Buffer>,
rhs_nulls: Option<&Buffer>,
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
let lhs_null_count = count_nulls(lhs_nulls, lhs_start, len);
let rhs_null_count = count_nulls(rhs_nulls, rhs_start, len);
if lhs_null_count > 0 || rhs_null_count > 0 {
let lhs_values = lhs_nulls.unwrap().as_slice();
let rhs_values = rhs_nulls.unwrap().as_slice();
equal_bits(
lhs_values,
rhs_values,
lhs_start + lhs.offset(),
rhs_start + rhs.offset(),
len,
)
} else {
true
}
}
#[inline]
pub(super) fn base_equal(lhs: &ArrayData, rhs: &ArrayData) -> bool {
lhs.data_type() == rhs.data_type() && lhs.len() == rhs.len()
}
// whether the two memory regions are equal
#[inline]
pub(super) fn equal_len(
lhs_values: &[u8],
rhs_values: &[u8],
lhs_start: usize,
rhs_start: usize,
len: usize,
) -> bool {
lhs_values[lhs_start..(lhs_start + len)] == rhs_values[rhs_start..(rhs_start + len)]
}
/// Computes the logical validity bitmap of the array data using the
/// parent's array data. The parent should be a list or struct, else
/// the logical bitmap of the array is returned unaltered.
///
/// Parent data is passed along with the parent's logical bitmap, as
/// nested arrays could have a logical bitmap different to the physical
/// one on the `ArrayData`.
pub(super) fn child_logical_null_buffer(
parent_data: &ArrayData,
logical_null_buffer: Option<&Buffer>,
child_data: &ArrayData,
) -> Option<Buffer> {
let parent_len = parent_data.len();
let parent_bitmap = logical_null_buffer
.cloned()
.map(Bitmap::from)
.unwrap_or_else(|| {
let ceil = bit_util::ceil(parent_len, 8);
Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
});
let self_null_bitmap = child_data.null_bitmap().clone().unwrap_or_else(|| {
let ceil = bit_util::ceil(child_data.len(), 8);
Bitmap::from(Buffer::from(vec![0b11111111; ceil]))
});
match parent_data.data_type() {
DataType::List(_) => Some(logical_list_bitmap::<i32>(
parent_data,
parent_bitmap,
self_null_bitmap,
)),
DataType::LargeList(_) => Some(logical_list_bitmap::<i64>(
parent_data,
parent_bitmap,
self_null_bitmap,
)),
DataType::FixedSizeList(_, len) => {
let len = *len as usize;
let array_offset = parent_data.offset();
let bitmap_len = bit_util::ceil(parent_len * len, 8);
let mut buffer = MutableBuffer::from_len_zeroed(bitmap_len);
let mut null_slice = buffer.as_slice_mut();
(array_offset..parent_len + array_offset).for_each(|index| {
let start = index * len;
let end = start + len;
let mask = parent_bitmap.is_set(index);
(start..end).for_each(|child_index| {
if mask && self_null_bitmap.is_set(child_index) {
bit_util::set_bit(&mut null_slice, child_index);
}
});
});
Some(buffer.into())
}
DataType::Struct(_) => {
// Arrow implementations are free to pad data, which can result in null buffers not
// having the same length.
// Rust bitwise comparisons will return an error if left AND right is performed on
// buffers of different length.
// This might be a valid case during integration testing, where we read Arrow arrays
// from IPC data, which has padding.
//
// We first perform a bitwise comparison, and if there is an error, we revert to a
// slower method that indexes into the buffers one-by-one.
let result = &parent_bitmap & &self_null_bitmap;
if let Ok(bitmap) = result {
return Some(bitmap.bits);
}
// slow path
let array_offset = parent_data.offset();
let mut buffer = MutableBuffer::new_null(parent_len);
let mut null_slice = buffer.as_slice_mut();
(0..parent_len).for_each(|index| {
if parent_bitmap.is_set(index + array_offset)
&& self_null_bitmap.is_set(index + array_offset)
{
bit_util::set_bit(&mut null_slice, index);
}
});
Some(buffer.into())
}
DataType::Union(_) => {
unimplemented!("Logical equality not yet implemented for union arrays")
}
DataType::Dictionary(_, _) => {
unimplemented!("Logical equality not yet implemented for nested dictionaries")
}
data_type => panic!("Data type {:?} is not a supported nested type", data_type),
}
}
// Calculate a list child's logical bitmap/buffer
#[inline]
fn logical_list_bitmap<OffsetSize: OffsetSizeTrait>(
parent_data: &ArrayData,
parent_bitmap: Bitmap,
child_bitmap: Bitmap,
) -> Buffer {
let offsets = parent_data.buffer::<OffsetSize>(0);
let offset_start = offsets.first().unwrap().to_usize().unwrap();
let offset_len = offsets.get(parent_data.len()).unwrap().to_usize().unwrap();
let mut buffer = MutableBuffer::new_null(offset_len - offset_start);
let mut null_slice = buffer.as_slice_mut();
offsets
.windows(2)
.enumerate()
.take(offset_len - offset_start)
.for_each(|(index, window)| {
let start = window[0].to_usize().unwrap();
let end = window[1].to_usize().unwrap();
let mask = parent_bitmap.is_set(index);
(start..end).for_each(|child_index| {
if mask && child_bitmap.is_set(child_index) {
bit_util::set_bit(&mut null_slice, child_index - offset_start);
}
});
});
buffer.into()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::datatypes::{Field, ToByteSlice};
#[test]
fn test_logical_null_buffer() {
let child_data = ArrayData::builder(DataType::Int32)
.len(11)
.add_buffer(Buffer::from(
vec![1i32, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11].to_byte_slice(),
))
.build();
let data = ArrayData::builder(DataType::List(Box::new(Field::new(
"item",
DataType::Int32,
false,
))))
.len(7)
.add_buffer(Buffer::from(vec![0, 0, 3, 5, 6, 9, 10, 11].to_byte_slice()))
.null_bit_buffer(Buffer::from(vec![0b01011010]))
.add_child_data(child_data.clone())
.build();
// Get the child logical null buffer. The child is non-nullable, but because the list has nulls,
// we expect the child to logically have some nulls, inherited from the parent:
// [1, 2, 3, null, null, 6, 7, 8, 9, null, 11]
let nulls = child_logical_null_buffer(
&data,
data.null_buffer(),
data.child_data().get(0).unwrap(),
);
let expected = Some(Buffer::from(vec![0b11100111, 0b00000101]));
assert_eq!(nulls, expected);
// test with offset
let data = ArrayData::builder(DataType::List(Box::new(Field::new(
"item",
DataType::Int32,
false,
))))
.len(4)
.offset(3)
.add_buffer(Buffer::from(vec![0, 0, 3, 5, 6, 9, 10, 11].to_byte_slice()))
// the null_bit_buffer doesn't have an offset, i.e. cleared the 3 offset bits 0b[---]01011[010]
.null_bit_buffer(Buffer::from(vec![0b00001011]))
.add_child_data(child_data)
.build();
let nulls = child_logical_null_buffer(
&data,
data.null_buffer(),
data.child_data().get(0).unwrap(),
);
let expected = Some(Buffer::from(vec![0b00101111]));
assert_eq!(nulls, expected);
}
}