blob: abe510bdabc6e469ca3c802566ef77c0c7baafe0 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Buffer builders
mod boolean;
mod null;
mod offset;
pub use boolean::*;
pub use null::*;
pub use offset::*;
use crate::{ArrowNativeType, Buffer, MutableBuffer};
use std::marker::PhantomData;
/// Builder for creating a [Buffer] object.
///
/// A [Buffer] is the underlying data structure of Arrow's Arrays.
///
/// For all supported types, there are type definitions for the
/// generic version of `BufferBuilder<T>`, e.g. `BufferBuilder`.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(100);
/// builder.append_slice(&[42, 43, 44]);
/// builder.append(45);
/// let buffer = builder.finish();
///
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 43, 44, 45]);
/// ```
#[derive(Debug)]
pub struct BufferBuilder<T: ArrowNativeType> {
buffer: MutableBuffer,
len: usize,
_marker: PhantomData<T>,
}
impl<T: ArrowNativeType> BufferBuilder<T> {
/// Creates a new builder with initial capacity for _at least_ `capacity`
/// elements of type `T`.
///
/// The capacity can later be manually adjusted with the
/// [`reserve()`](BufferBuilder::reserve) method.
/// Also the
/// [`append()`](BufferBuilder::append),
/// [`append_slice()`](BufferBuilder::append_slice) and
/// [`advance()`](BufferBuilder::advance)
/// methods automatically increase the capacity if needed.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
///
/// assert!(builder.capacity() >= 10);
/// ```
#[inline]
pub fn new(capacity: usize) -> Self {
let buffer = MutableBuffer::new(capacity * std::mem::size_of::<T>());
Self {
buffer,
len: 0,
_marker: PhantomData,
}
}
/// Creates a new builder from a [`MutableBuffer`]
pub fn new_from_buffer(buffer: MutableBuffer) -> Self {
let buffer_len = buffer.len();
Self {
buffer,
len: buffer_len / std::mem::size_of::<T>(),
_marker: PhantomData,
}
}
/// Returns the current number of array elements in the internal buffer.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append(42);
///
/// assert_eq!(builder.len(), 1);
/// ```
pub fn len(&self) -> usize {
self.len
}
/// Returns whether the internal buffer is empty.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append(42);
///
/// assert_eq!(builder.is_empty(), false);
/// ```
pub fn is_empty(&self) -> bool {
self.len == 0
}
/// Returns the actual capacity (number of elements) of the internal buffer.
///
/// Note: the internal capacity returned by this method might be larger than
/// what you'd expect after setting the capacity in the `new()` or `reserve()`
/// functions.
pub fn capacity(&self) -> usize {
let byte_capacity = self.buffer.capacity();
byte_capacity / std::mem::size_of::<T>()
}
/// Increases the number of elements in the internal buffer by `n`
/// and resizes the buffer as needed.
///
/// The values of the newly added elements are 0.
/// This method is usually used when appending `NULL` values to the buffer
/// as they still require physical memory space.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.advance(2);
///
/// assert_eq!(builder.len(), 2);
/// ```
#[inline]
pub fn advance(&mut self, i: usize) {
self.buffer.extend_zeros(i * std::mem::size_of::<T>());
self.len += i;
}
/// Reserves memory for _at least_ `n` more elements of type `T`.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.reserve(10);
///
/// assert!(builder.capacity() >= 20);
/// ```
#[inline]
pub fn reserve(&mut self, n: usize) {
self.buffer.reserve(n * std::mem::size_of::<T>());
}
/// Appends a value of type `T` into the builder,
/// growing the internal buffer as needed.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append(42);
///
/// assert_eq!(builder.len(), 1);
/// ```
#[inline]
pub fn append(&mut self, v: T) {
self.reserve(1);
self.buffer.push(v);
self.len += 1;
}
/// Appends a value of type `T` into the builder N times,
/// growing the internal buffer as needed.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append_n(10, 42);
///
/// assert_eq!(builder.len(), 10);
/// ```
#[inline]
pub fn append_n(&mut self, n: usize, v: T) {
self.reserve(n);
self.extend(std::iter::repeat_n(v, n))
}
/// Appends `n`, zero-initialized values
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u32>::new(10);
/// builder.append_n_zeroed(3);
///
/// assert_eq!(builder.len(), 3);
/// assert_eq!(builder.as_slice(), &[0, 0, 0])
#[inline]
pub fn append_n_zeroed(&mut self, n: usize) {
self.buffer.extend_zeros(n * std::mem::size_of::<T>());
self.len += n;
}
/// Appends a slice of type `T`, growing the internal buffer as needed.
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append_slice(&[42, 44, 46]);
///
/// assert_eq!(builder.len(), 3);
/// ```
#[inline]
pub fn append_slice(&mut self, slice: &[T]) {
self.buffer.extend_from_slice(slice);
self.len += slice.len();
}
/// View the contents of this buffer as a slice
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<f64>::new(10);
/// builder.append(1.3);
/// builder.append_n(2, 2.3);
///
/// assert_eq!(builder.as_slice(), &[1.3, 2.3, 2.3]);
/// ```
#[inline]
pub fn as_slice(&self) -> &[T] {
// SAFETY
//
// - MutableBuffer is aligned and initialized for len elements of T
// - MutableBuffer corresponds to a single allocation
// - MutableBuffer does not support modification whilst active immutable borrows
unsafe { std::slice::from_raw_parts(self.buffer.as_ptr() as _, self.len) }
}
/// View the contents of this buffer as a mutable slice
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<f32>::new(10);
///
/// builder.append_slice(&[1., 2., 3.4]);
/// assert_eq!(builder.as_slice(), &[1., 2., 3.4]);
///
/// builder.as_slice_mut()[1] = 4.2;
/// assert_eq!(builder.as_slice(), &[1., 4.2, 3.4]);
/// ```
#[inline]
pub fn as_slice_mut(&mut self) -> &mut [T] {
// SAFETY
//
// - MutableBuffer is aligned and initialized for len elements of T
// - MutableBuffer corresponds to a single allocation
// - MutableBuffer does not support modification whilst active immutable borrows
unsafe { std::slice::from_raw_parts_mut(self.buffer.as_mut_ptr() as _, self.len) }
}
/// Shorten this BufferBuilder to `len` items
///
/// If `len` is greater than the builder's current length, this has no effect
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u16>::new(10);
///
/// builder.append_slice(&[42, 44, 46]);
/// assert_eq!(builder.as_slice(), &[42, 44, 46]);
///
/// builder.truncate(2);
/// assert_eq!(builder.as_slice(), &[42, 44]);
///
/// builder.append(12);
/// assert_eq!(builder.as_slice(), &[42, 44, 12]);
/// ```
#[inline]
pub fn truncate(&mut self, len: usize) {
self.buffer.truncate(len * std::mem::size_of::<T>());
self.len = len;
}
/// # Safety
/// This requires the iterator be a trusted length. This could instead require
/// the iterator implement `TrustedLen` once that is stabilized.
#[inline]
pub unsafe fn append_trusted_len_iter(&mut self, iter: impl IntoIterator<Item = T>) {
let iter = iter.into_iter();
let len = iter
.size_hint()
.1
.expect("append_trusted_len_iter expects upper bound");
self.reserve(len);
self.extend(iter);
}
/// Resets this builder and returns an immutable [Buffer].
///
/// # Example:
///
/// ```
/// # use arrow_buffer::builder::BufferBuilder;
///
/// let mut builder = BufferBuilder::<u8>::new(10);
/// builder.append_slice(&[42, 44, 46]);
///
/// let buffer = builder.finish();
///
/// assert_eq!(unsafe { buffer.typed_data::<u8>() }, &[42, 44, 46]);
/// ```
#[inline]
pub fn finish(&mut self) -> Buffer {
let buf = std::mem::take(&mut self.buffer);
self.len = 0;
buf.into()
}
}
impl<T: ArrowNativeType> Default for BufferBuilder<T> {
fn default() -> Self {
Self::new(0)
}
}
impl<T: ArrowNativeType> Extend<T> for BufferBuilder<T> {
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
self.buffer.extend(iter.into_iter().inspect(|_| {
self.len += 1;
}))
}
}
impl<T: ArrowNativeType> From<Vec<T>> for BufferBuilder<T> {
fn from(value: Vec<T>) -> Self {
Self::new_from_buffer(MutableBuffer::from(value))
}
}
impl<T: ArrowNativeType> FromIterator<T> for BufferBuilder<T> {
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> Self {
let mut builder = Self::default();
builder.extend(iter);
builder
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::mem;
#[test]
fn default() {
let builder = BufferBuilder::<u32>::default();
assert!(builder.is_empty());
assert!(builder.buffer.is_empty());
assert_eq!(builder.buffer.capacity(), 0);
}
#[test]
fn from_iter() {
let input = [1u16, 2, 3, 4];
let builder = input.into_iter().collect::<BufferBuilder<_>>();
assert_eq!(builder.len(), 4);
assert_eq!(builder.buffer.len(), 4 * mem::size_of::<u16>());
}
#[test]
fn extend() {
let input = [1, 2];
let mut builder = input.into_iter().collect::<BufferBuilder<_>>();
assert_eq!(builder.len(), 2);
builder.extend([3, 4]);
assert_eq!(builder.len(), 4);
}
}