blob: e6f426615be5ab521bf2b4c5eb3fdffc9dc7ef80 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use crate::{BooleanBufferBuilder, MutableBuffer, NullBuffer};
/// Builder for creating [`NullBuffer`]
///
/// # Performance
///
/// This builder only materializes the buffer when we append `false`.
/// If you only append `true`s to the builder, what you get will be
/// `None` when calling [`finish`](#method.finish).
///
/// This optimization is **very** important for the performance as it avoids
/// allocating memory for the null buffer when there are no nulls.
///
/// See [`Self::allocated_size`] to get the current memory allocated by the builder.
///
/// # Example
/// ```
/// # use arrow_buffer::NullBufferBuilder;
/// let mut builder = NullBufferBuilder::new(8);
/// builder.append_n_non_nulls(8);
/// // If no non null values are appended, the null buffer is not created
/// let buffer = builder.finish();
/// assert!(buffer.is_none());
/// // however, if a null value is appended, the null buffer is created
/// let mut builder = NullBufferBuilder::new(8);
/// builder.append_n_non_nulls(7);
/// builder.append_null();
/// let buffer = builder.finish().unwrap();
/// assert_eq!(buffer.len(), 8);
/// assert_eq!(buffer.iter().collect::<Vec<_>>(), vec![true, true, true, true, true, true, true, false]);
/// ```
#[derive(Debug)]
pub struct NullBufferBuilder {
/// The bitmap builder to store the null buffer:
/// * `Some` if any nulls have been appended ("materialized")
/// * `None` if no nulls have been appended.
bitmap_builder: Option<BooleanBufferBuilder>,
/// Length of the buffer before materializing.
///
/// if `bitmap_buffer` buffer is `Some`, this value is not used.
len: usize,
/// Initial capacity of the `bitmap_builder`, when it is materialized.
capacity: usize,
}
impl NullBufferBuilder {
/// Creates a new empty builder.
///
/// Note that this method does not allocate any memory, regardless of the
/// `capacity` parameter. If an allocation is required, `capacity` is the
/// size in bits (not bytes) that will be allocated at minimum.
pub fn new(capacity: usize) -> Self {
Self {
bitmap_builder: None,
len: 0,
capacity,
}
}
/// Creates a new builder with given length.
pub fn new_with_len(len: usize) -> Self {
Self {
bitmap_builder: None,
len,
capacity: len,
}
}
/// Creates a new builder from a `MutableBuffer`.
pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self {
let capacity = buffer.len() * 8;
assert!(len <= capacity);
let bitmap_builder = Some(BooleanBufferBuilder::new_from_buffer(buffer, len));
Self {
bitmap_builder,
len,
capacity,
}
}
/// Appends `n` `true`s into the builder
/// to indicate that these `n` items are not nulls.
#[inline]
pub fn append_n_non_nulls(&mut self, n: usize) {
if let Some(buf) = self.bitmap_builder.as_mut() {
buf.append_n(n, true)
} else {
self.len += n;
}
}
/// Appends a `true` into the builder
/// to indicate that this item is not null.
#[inline]
pub fn append_non_null(&mut self) {
if let Some(buf) = self.bitmap_builder.as_mut() {
buf.append(true)
} else {
self.len += 1;
}
}
/// Appends `n` `false`s into the builder
/// to indicate that these `n` items are nulls.
#[inline]
pub fn append_n_nulls(&mut self, n: usize) {
self.materialize_if_needed();
self.bitmap_builder.as_mut().unwrap().append_n(n, false);
}
/// Appends a `false` into the builder
/// to indicate that this item is null.
#[inline]
pub fn append_null(&mut self) {
self.materialize_if_needed();
self.bitmap_builder.as_mut().unwrap().append(false);
}
/// Appends a boolean value into the builder.
#[inline]
pub fn append(&mut self, not_null: bool) {
if not_null {
self.append_non_null()
} else {
self.append_null()
}
}
/// Gets a bit in the buffer at `index`
#[inline]
pub fn is_valid(&self, index: usize) -> bool {
if let Some(ref buf) = self.bitmap_builder {
buf.get_bit(index)
} else {
true
}
}
/// Truncates the builder to the given length
///
/// If `len` is greater than the buffer's current length, this has no effect
#[inline]
pub fn truncate(&mut self, len: usize) {
if let Some(buf) = self.bitmap_builder.as_mut() {
buf.truncate(len);
} else if len <= self.len {
self.len = len
}
}
/// Appends a boolean slice into the builder
/// to indicate the validations of these items.
pub fn append_slice(&mut self, slice: &[bool]) {
if slice.iter().any(|v| !v) {
self.materialize_if_needed()
}
if let Some(buf) = self.bitmap_builder.as_mut() {
buf.append_slice(slice)
} else {
self.len += slice.len();
}
}
/// Append [`NullBuffer`] to this [`NullBufferBuilder`]
///
/// This is useful when you want to concatenate two null buffers.
pub fn append_buffer(&mut self, buffer: &NullBuffer) {
if buffer.null_count() > 0 {
self.materialize_if_needed();
}
if let Some(buf) = self.bitmap_builder.as_mut() {
buf.append_buffer(buffer.inner())
} else {
self.len += buffer.len();
}
}
/// Builds the null buffer and resets the builder.
/// Returns `None` if the builder only contains `true`s.
pub fn finish(&mut self) -> Option<NullBuffer> {
self.len = 0;
Some(NullBuffer::new(self.bitmap_builder.take()?.finish()))
}
/// Builds the [NullBuffer] without resetting the builder.
pub fn finish_cloned(&self) -> Option<NullBuffer> {
let buffer = self.bitmap_builder.as_ref()?.finish_cloned();
Some(NullBuffer::new(buffer))
}
/// Returns the inner bitmap builder as slice
pub fn as_slice(&self) -> Option<&[u8]> {
Some(self.bitmap_builder.as_ref()?.as_slice())
}
fn materialize_if_needed(&mut self) {
if self.bitmap_builder.is_none() {
self.materialize()
}
}
#[cold]
fn materialize(&mut self) {
if self.bitmap_builder.is_none() {
let mut b = BooleanBufferBuilder::new(self.len.max(self.capacity));
b.append_n(self.len, true);
self.bitmap_builder = Some(b);
}
}
/// Return a mutable reference to the inner bitmap slice.
pub fn as_slice_mut(&mut self) -> Option<&mut [u8]> {
self.bitmap_builder.as_mut().map(|b| b.as_slice_mut())
}
/// Return the allocated size of this builder, in bytes, useful for memory accounting.
pub fn allocated_size(&self) -> usize {
self.bitmap_builder
.as_ref()
.map(|b| b.capacity() / 8)
.unwrap_or(0)
}
}
impl NullBufferBuilder {
/// Return the number of bits in the buffer.
pub fn len(&self) -> usize {
self.bitmap_builder.as_ref().map_or(self.len, |b| b.len())
}
/// Check if the builder is empty.
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_null_buffer_builder() {
let mut builder = NullBufferBuilder::new(0);
builder.append_null();
builder.append_non_null();
builder.append_n_nulls(2);
builder.append_n_non_nulls(2);
assert_eq!(6, builder.len());
assert_eq!(64, builder.allocated_size());
let buf = builder.finish().unwrap();
assert_eq!(&[0b110010_u8], buf.validity());
}
#[test]
fn test_null_buffer_builder_all_nulls() {
let mut builder = NullBufferBuilder::new(0);
builder.append_null();
builder.append_n_nulls(2);
builder.append_slice(&[false, false, false]);
assert_eq!(6, builder.len());
assert_eq!(64, builder.allocated_size());
let buf = builder.finish().unwrap();
assert_eq!(&[0b0_u8], buf.validity());
}
#[test]
fn test_null_buffer_builder_no_null() {
let mut builder = NullBufferBuilder::new(0);
builder.append_non_null();
builder.append_n_non_nulls(2);
builder.append_slice(&[true, true, true]);
assert_eq!(6, builder.len());
assert_eq!(0, builder.allocated_size());
let buf = builder.finish();
assert!(buf.is_none());
}
#[test]
fn test_null_buffer_builder_reset() {
let mut builder = NullBufferBuilder::new(0);
builder.append_slice(&[true, false, true]);
builder.finish();
assert!(builder.is_empty());
builder.append_slice(&[true, true, true]);
assert!(builder.finish().is_none());
assert!(builder.is_empty());
builder.append_slice(&[true, true, false, true]);
let buf = builder.finish().unwrap();
assert_eq!(&[0b1011_u8], buf.validity());
}
#[test]
fn test_null_buffer_builder_is_valid() {
let mut builder = NullBufferBuilder::new(0);
builder.append_n_non_nulls(6);
assert!(builder.is_valid(0));
builder.append_null();
assert!(!builder.is_valid(6));
builder.append_non_null();
assert!(builder.is_valid(7));
}
#[test]
fn test_null_buffer_builder_truncate() {
let mut builder = NullBufferBuilder::new(10);
builder.append_n_non_nulls(16);
assert_eq!(builder.as_slice(), None);
builder.truncate(20);
assert_eq!(builder.as_slice(), None);
assert_eq!(builder.len(), 16);
assert_eq!(builder.allocated_size(), 0);
builder.truncate(14);
assert_eq!(builder.as_slice(), None);
assert_eq!(builder.len(), 14);
builder.append_null();
builder.append_non_null();
assert_eq!(builder.as_slice().unwrap(), &[0xFF, 0b10111111]);
assert_eq!(builder.allocated_size(), 64);
}
#[test]
fn test_null_buffer_builder_truncate_never_materialized() {
let mut builder = NullBufferBuilder::new(0);
assert_eq!(builder.len(), 0);
builder.append_n_nulls(2); // doesn't materialize
assert_eq!(builder.len(), 2);
builder.truncate(1);
assert_eq!(builder.len(), 1);
}
#[test]
fn test_append_buffers() {
let mut builder = NullBufferBuilder::new(0);
let buffer1 = NullBuffer::from(&[true, true]);
let buffer2 = NullBuffer::from(&[true, true, false]);
builder.append_buffer(&buffer1);
builder.append_buffer(&buffer2);
assert_eq!(builder.as_slice().unwrap(), &[0b01111_u8]);
}
#[test]
fn test_append_buffers_with_unaligned_length() {
let mut builder = NullBufferBuilder::new(0);
let buffer = NullBuffer::from(&[true, true, false, true, false]);
builder.append_buffer(&buffer);
assert_eq!(builder.as_slice().unwrap(), &[0b01011_u8]);
let buffer = NullBuffer::from(&[false, false, true, true, true, false, false]);
builder.append_buffer(&buffer);
assert_eq!(builder.as_slice().unwrap(), &[0b10001011_u8, 0b0011_u8]);
}
#[test]
fn test_append_empty_buffer() {
let mut builder = NullBufferBuilder::new(0);
let buffer = NullBuffer::from(&[true, true, false, true]);
builder.append_buffer(&buffer);
assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
let buffer = NullBuffer::from(&[]);
builder.append_buffer(&buffer);
assert_eq!(builder.as_slice().unwrap(), &[0b1011_u8]);
}
#[test]
fn test_should_not_materialize_when_appending_all_valid_buffers() {
let mut builder = NullBufferBuilder::new(0);
let buffer = NullBuffer::from(&[true; 10]);
builder.append_buffer(&buffer);
let buffer = NullBuffer::from(&[true; 2]);
builder.append_buffer(&buffer);
assert_eq!(builder.finish(), None);
}
}