blob: 408dfbaac90935d268452f04629d272a1bda29ff [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Contains declarations to bind to the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html).
use crate::bit_mask::set_bits;
use crate::{ArrayData, layout};
use arrow_buffer::buffer::NullBuffer;
use arrow_buffer::{Buffer, MutableBuffer, ScalarBuffer};
use arrow_schema::DataType;
use std::ffi::c_void;
/// ABI-compatible struct for ArrowArray from C Data Interface
/// See <https://arrow.apache.org/docs/format/CDataInterface.html#structure-definitions>
///
/// ```
/// # use arrow_data::ArrayData;
/// # use arrow_data::ffi::FFI_ArrowArray;
/// fn export_array(array: &ArrayData) -> FFI_ArrowArray {
/// FFI_ArrowArray::new(array)
/// }
/// ```
#[repr(C)]
#[derive(Debug)]
pub struct FFI_ArrowArray {
length: i64,
null_count: i64,
offset: i64,
n_buffers: i64,
n_children: i64,
buffers: *mut *const c_void,
children: *mut *mut FFI_ArrowArray,
dictionary: *mut FFI_ArrowArray,
release: Option<unsafe extern "C" fn(arg1: *mut FFI_ArrowArray)>,
// When exported, this MUST contain everything that is owned by this array.
// for example, any buffer pointed to in `buffers` must be here, as well
// as the `buffers` pointer itself.
// In other words, everything in [FFI_ArrowArray] must be owned by
// `private_data` and can assume that they do not outlive `private_data`.
private_data: *mut c_void,
}
impl Drop for FFI_ArrowArray {
fn drop(&mut self) {
match self.release {
None => (),
Some(release) => unsafe { release(self) },
};
}
}
unsafe impl Send for FFI_ArrowArray {}
unsafe impl Sync for FFI_ArrowArray {}
// callback used to drop [FFI_ArrowArray] when it is exported
unsafe extern "C" fn release_array(array: *mut FFI_ArrowArray) {
if array.is_null() {
return;
}
let array = unsafe { &mut *array };
// take ownership of `private_data`, therefore dropping it`
let private = unsafe { Box::from_raw(array.private_data as *mut ArrayPrivateData) };
for child in private.children.iter() {
let _ = unsafe { Box::from_raw(*child) };
}
if !private.dictionary.is_null() {
let _ = unsafe { Box::from_raw(private.dictionary) };
}
array.release = None;
}
/// Aligns the provided `nulls` to the provided `data_offset`
///
/// This is a temporary measure until offset is removed from ArrayData (#1799)
fn align_nulls(data_offset: usize, nulls: Option<&NullBuffer>) -> Option<Buffer> {
let nulls = nulls?;
if data_offset == nulls.offset() {
// Underlying buffer is already aligned
return Some(nulls.buffer().clone());
}
if data_offset == 0 {
return Some(nulls.inner().sliced());
}
let mut builder = MutableBuffer::new_null(data_offset + nulls.len());
set_bits(
builder.as_slice_mut(),
nulls.validity(),
data_offset,
nulls.offset(),
nulls.len(),
);
Some(builder.into())
}
struct ArrayPrivateData {
#[allow(dead_code)]
buffers: Vec<Option<Buffer>>,
buffers_ptr: Box<[*const c_void]>,
children: Box<[*mut FFI_ArrowArray]>,
dictionary: *mut FFI_ArrowArray,
}
impl FFI_ArrowArray {
/// creates a new `FFI_ArrowArray` from existing data.
pub fn new(data: &ArrayData) -> Self {
let data_layout = layout(data.data_type());
let mut buffers = if data_layout.can_contain_null_mask {
// * insert the null buffer at the start
// * make all others `Option<Buffer>`.
std::iter::once(align_nulls(data.offset(), data.nulls()))
.chain(data.buffers().iter().map(|b| Some(b.clone())))
.collect::<Vec<_>>()
} else {
data.buffers().iter().map(|b| Some(b.clone())).collect()
};
// `n_buffers` is the number of buffers by the spec.
let mut n_buffers = {
data_layout.buffers.len() + {
// If the layout has a null buffer by Arrow spec.
// Note that even the array doesn't have a null buffer because it has
// no null value, we still need to count 1 here to follow the spec.
usize::from(data_layout.can_contain_null_mask)
}
} as i64;
if data_layout.variadic {
// Save the lengths of all variadic buffers into a new buffer.
// The first buffer is `views`, and the rest are variadic.
let mut data_buffers_lengths = Vec::new();
for buffer in data.buffers().iter().skip(1) {
data_buffers_lengths.push(buffer.len() as i64);
n_buffers += 1;
}
buffers.push(Some(ScalarBuffer::from(data_buffers_lengths).into_inner()));
n_buffers += 1;
}
let buffers_ptr = buffers
.iter()
.flat_map(|maybe_buffer| match maybe_buffer {
Some(b) => Some(b.as_ptr() as *const c_void),
// This is for null buffer. We only put a null pointer for
// null buffer if by spec it can contain null mask.
None if data_layout.can_contain_null_mask => Some(std::ptr::null()),
None => None,
})
.collect::<Box<[_]>>();
let empty = vec![];
let (child_data, dictionary) = match data.data_type() {
DataType::Dictionary(_, _) => (
empty.as_slice(),
Box::into_raw(Box::new(FFI_ArrowArray::new(&data.child_data()[0]))),
),
_ => (data.child_data(), std::ptr::null_mut()),
};
let children = child_data
.iter()
.map(|child| Box::into_raw(Box::new(FFI_ArrowArray::new(child))))
.collect::<Box<_>>();
let n_children = children.len() as i64;
// As in the IPC format, emit null_count = length for Null type
let null_count = match data.data_type() {
DataType::Null => data.len(),
_ => data.null_count(),
};
// create the private data owning everything.
// any other data must be added here, e.g. via a struct, to track lifetime.
let mut private_data = Box::new(ArrayPrivateData {
buffers,
buffers_ptr,
children,
dictionary,
});
Self {
length: data.len() as i64,
null_count: null_count as i64,
offset: data.offset() as i64,
n_buffers,
n_children,
buffers: private_data.buffers_ptr.as_mut_ptr(),
children: private_data.children.as_mut_ptr(),
dictionary,
release: Some(release_array),
private_data: Box::into_raw(private_data) as *mut c_void,
}
}
/// Takes ownership of the pointed to [`FFI_ArrowArray`]
///
/// This acts to [move] the data out of `array`, setting the release callback to NULL
///
/// # Safety
///
/// * `array` must be [valid] for reads and writes
/// * `array` must be properly aligned
/// * `array` must point to a properly initialized value of [`FFI_ArrowArray`]
///
/// [move]: https://arrow.apache.org/docs/format/CDataInterface.html#moving-an-array
/// [valid]: https://doc.rust-lang.org/std/ptr/index.html#safety
pub unsafe fn from_raw(array: *mut FFI_ArrowArray) -> Self {
unsafe { std::ptr::replace(array, Self::empty()) }
}
/// create an empty `FFI_ArrowArray`, which can be used to import data into
pub fn empty() -> Self {
Self {
length: 0,
null_count: 0,
offset: 0,
n_buffers: 0,
n_children: 0,
buffers: std::ptr::null_mut(),
children: std::ptr::null_mut(),
dictionary: std::ptr::null_mut(),
release: None,
private_data: std::ptr::null_mut(),
}
}
/// the length of the array
#[inline]
pub fn len(&self) -> usize {
self.length as usize
}
/// whether the array is empty
#[inline]
pub fn is_empty(&self) -> bool {
self.length == 0
}
/// Whether the array has been released
#[inline]
pub fn is_released(&self) -> bool {
self.release.is_none()
}
/// the offset of the array
#[inline]
pub fn offset(&self) -> usize {
self.offset as usize
}
/// the null count of the array
#[inline]
pub fn null_count(&self) -> usize {
self.null_count as usize
}
/// Returns the null count, checking for validity
#[inline]
pub fn null_count_opt(&self) -> Option<usize> {
usize::try_from(self.null_count).ok()
}
/// Set the null count of the array
///
/// # Safety
/// Null count must match that of null buffer
#[inline]
pub unsafe fn set_null_count(&mut self, null_count: i64) {
self.null_count = null_count;
}
/// Returns the buffer at the provided index
///
/// # Panic
/// Panics if index >= self.num_buffers() or the buffer is not correctly aligned
#[inline]
pub fn buffer(&self, index: usize) -> *const u8 {
assert!(!self.buffers.is_null());
assert!(index < self.num_buffers());
// SAFETY:
// If buffers is not null must be valid for reads up to num_buffers
unsafe { std::ptr::read_unaligned((self.buffers as *mut *const u8).add(index)) }
}
/// Returns the number of buffers
#[inline]
pub fn num_buffers(&self) -> usize {
self.n_buffers as _
}
/// Returns the child at the provided index
#[inline]
pub fn child(&self, index: usize) -> &FFI_ArrowArray {
assert!(!self.children.is_null());
assert!(index < self.num_children());
// Safety:
// If children is not null must be valid for reads up to num_children
unsafe {
let child = std::ptr::read_unaligned(self.children.add(index));
child.as_ref().unwrap()
}
}
/// Returns the number of children
#[inline]
pub fn num_children(&self) -> usize {
self.n_children as _
}
/// Returns the dictionary if any
#[inline]
pub fn dictionary(&self) -> Option<&Self> {
// Safety:
// If dictionary is not null should be valid for reads of `Self`
unsafe { self.dictionary.as_ref() }
}
}
#[cfg(test)]
mod tests {
use super::*;
// More tests located in top-level arrow crate
#[test]
fn null_array_n_buffers() {
let data = ArrayData::new_null(&DataType::Null, 10);
let ffi_array = FFI_ArrowArray::new(&data);
assert_eq!(0, ffi_array.n_buffers);
let private_data =
unsafe { Box::from_raw(ffi_array.private_data as *mut ArrayPrivateData) };
assert_eq!(0, private_data.buffers_ptr.len());
let _ = Box::into_raw(private_data);
}
}