| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| use crate::bit_util::apply_bitwise_binary_op; |
| use crate::{BooleanBuffer, Buffer, MutableBuffer, bit_util}; |
| use std::ops::Range; |
| |
| /// Builder for [`BooleanBuffer`] |
| /// |
| /// # See Also |
| /// |
| /// * [`NullBuffer`] for building [`BooleanBuffer`]s for representing nulls |
| /// |
| /// [`NullBuffer`]: crate::NullBuffer |
| #[derive(Debug)] |
| pub struct BooleanBufferBuilder { |
| buffer: MutableBuffer, |
| len: usize, |
| } |
| |
| impl BooleanBufferBuilder { |
| /// Creates a new `BooleanBufferBuilder` with sufficient space for |
| /// `capacity` bits (not bytes). |
| /// |
| /// The capacity is rounded up to the nearest multiple of 8 for the |
| /// allocation. |
| #[inline] |
| pub fn new(capacity: usize) -> Self { |
| let byte_capacity = bit_util::ceil(capacity, 8); |
| let buffer = MutableBuffer::new(byte_capacity); |
| Self { buffer, len: 0 } |
| } |
| |
| /// Creates a new `BooleanBufferBuilder` from [`MutableBuffer`] of `len` |
| pub fn new_from_buffer(buffer: MutableBuffer, len: usize) -> Self { |
| assert!(len <= buffer.len() * 8); |
| let mut s = Self { |
| len: buffer.len() * 8, |
| buffer, |
| }; |
| s.truncate(len); |
| s |
| } |
| |
| /// Returns the length of the buffer |
| #[inline] |
| pub fn len(&self) -> usize { |
| self.len |
| } |
| |
| /// Sets a bit in the buffer at `index` |
| #[inline] |
| pub fn set_bit(&mut self, index: usize, v: bool) { |
| if v { |
| bit_util::set_bit(self.buffer.as_mut(), index); |
| } else { |
| bit_util::unset_bit(self.buffer.as_mut(), index); |
| } |
| } |
| |
| /// Gets a bit in the buffer at `index` |
| #[inline] |
| pub fn get_bit(&self, index: usize) -> bool { |
| bit_util::get_bit(self.buffer.as_slice(), index) |
| } |
| |
| /// Returns true if empty |
| #[inline] |
| pub fn is_empty(&self) -> bool { |
| self.len == 0 |
| } |
| |
| /// Returns the capacity of the buffer, in bits (not bytes) |
| /// |
| /// Note this |
| /// |
| /// # Example |
| /// ``` |
| /// # use arrow_buffer::builder::BooleanBufferBuilder; |
| /// // empty requires 0 bytes |
| /// let b = BooleanBufferBuilder::new(0); |
| /// assert_eq!(0, b.capacity()); |
| /// // Creating space for 1 bit results in 64 bytes (space for 512 bits) |
| /// // (64 is the minimum allocation size for 64 bit architectures) |
| /// let mut b = BooleanBufferBuilder::new(1); |
| /// assert_eq!(512, b.capacity()); |
| /// // 1000 bits requires 128 bytes (space for 1024 bits) |
| /// b.append_n(1000, true); |
| /// assert_eq!(1024, b.capacity()); |
| /// ``` |
| #[inline] |
| pub fn capacity(&self) -> usize { |
| self.buffer.capacity() * 8 |
| } |
| |
| /// Advances the buffer by `additional` bits |
| #[inline] |
| pub fn advance(&mut self, additional: usize) { |
| let new_len = self.len + additional; |
| let new_len_bytes = bit_util::ceil(new_len, 8); |
| if new_len_bytes > self.buffer.len() { |
| self.buffer.resize(new_len_bytes, 0); |
| } |
| self.len = new_len; |
| } |
| |
| /// Truncates the builder to the given length |
| /// |
| /// If `len` is greater than the buffer's current length, this has no effect |
| #[inline] |
| pub fn truncate(&mut self, len: usize) { |
| if len > self.len { |
| return; |
| } |
| |
| let new_len_bytes = bit_util::ceil(len, 8); |
| self.buffer.truncate(new_len_bytes); |
| self.len = len; |
| |
| let remainder = self.len % 8; |
| if remainder != 0 { |
| let mask = (1_u8 << remainder).wrapping_sub(1); |
| *self.buffer.as_mut().last_mut().unwrap() &= mask; |
| } |
| } |
| |
| /// Reserve space to at least `additional` new bits. |
| /// Capacity will be `>= self.len() + additional`. |
| /// New bytes are uninitialized and reading them is undefined behavior. |
| #[inline] |
| pub fn reserve(&mut self, additional: usize) { |
| let capacity = self.len + additional; |
| if capacity > self.capacity() { |
| // convert differential to bytes |
| let additional = bit_util::ceil(capacity, 8) - self.buffer.len(); |
| self.buffer.reserve(additional); |
| } |
| } |
| |
| /// Resizes the buffer, either truncating its contents (with no change in capacity), or |
| /// growing it (potentially reallocating it) and writing `false` in the newly available bits. |
| #[inline] |
| pub fn resize(&mut self, len: usize) { |
| match len.checked_sub(self.len) { |
| Some(delta) => self.advance(delta), |
| None => self.truncate(len), |
| } |
| } |
| |
| /// Appends a boolean `v` into the buffer |
| #[inline] |
| pub fn append(&mut self, v: bool) { |
| self.advance(1); |
| if v { |
| unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), self.len - 1) }; |
| } |
| } |
| |
| /// Appends n `additional` bits of value `v` into the buffer |
| #[inline] |
| pub fn append_n(&mut self, additional: usize, v: bool) { |
| match v { |
| true => { |
| let new_len = self.len + additional; |
| let new_len_bytes = bit_util::ceil(new_len, 8); |
| let cur_remainder = self.len % 8; |
| let new_remainder = new_len % 8; |
| |
| if cur_remainder != 0 { |
| // Pad last byte with 1s |
| *self.buffer.as_slice_mut().last_mut().unwrap() |= !((1 << cur_remainder) - 1) |
| } |
| self.buffer.resize(new_len_bytes, 0xFF); |
| if new_remainder != 0 { |
| // Clear remaining bits |
| *self.buffer.as_slice_mut().last_mut().unwrap() &= (1 << new_remainder) - 1 |
| } |
| self.len = new_len; |
| } |
| false => self.advance(additional), |
| } |
| } |
| |
| /// Appends a slice of booleans into the buffer |
| #[inline] |
| pub fn append_slice(&mut self, slice: &[bool]) { |
| let additional = slice.len(); |
| self.advance(additional); |
| |
| let offset = self.len() - additional; |
| for (i, v) in slice.iter().enumerate() { |
| if *v { |
| unsafe { bit_util::set_bit_raw(self.buffer.as_mut_ptr(), offset + i) } |
| } |
| } |
| } |
| |
| /// Append `range` bits from `to_set` |
| /// |
| /// `to_set` is a slice of bits packed LSB-first into `[u8]` |
| /// |
| /// # Panics |
| /// |
| /// Panics if `to_set` does not contain `ceil(range.end / 8)` bytes |
| pub fn append_packed_range(&mut self, range: Range<usize>, to_set: &[u8]) { |
| let offset_write = self.len; |
| let len = range.end - range.start; |
| // allocate new bits as 0 |
| self.advance(len); |
| // copy bits from to_set into self.buffer a word at a time |
| apply_bitwise_binary_op( |
| self.buffer.as_slice_mut(), |
| offset_write, |
| to_set, |
| range.start, |
| len, |
| |_a, b| b, // copy bits from to_set |
| ); |
| } |
| |
| /// Append [`BooleanBuffer`] to this [`BooleanBufferBuilder`] |
| pub fn append_buffer(&mut self, buffer: &BooleanBuffer) { |
| let range = buffer.offset()..buffer.offset() + buffer.len(); |
| self.append_packed_range(range, buffer.values()) |
| } |
| |
| /// Returns the packed bits |
| pub fn as_slice(&self) -> &[u8] { |
| self.buffer.as_slice() |
| } |
| |
| /// Returns the packed bits |
| pub fn as_slice_mut(&mut self) -> &mut [u8] { |
| self.buffer.as_slice_mut() |
| } |
| |
| /// Creates a [`BooleanBuffer`] |
| #[inline] |
| pub fn finish(&mut self) -> BooleanBuffer { |
| let buf = std::mem::replace(&mut self.buffer, MutableBuffer::new(0)); |
| let len = std::mem::replace(&mut self.len, 0); |
| BooleanBuffer::new(buf.into(), 0, len) |
| } |
| |
| /// Builds the [BooleanBuffer] without resetting the builder. |
| pub fn finish_cloned(&self) -> BooleanBuffer { |
| BooleanBuffer::new(Buffer::from_slice_ref(self.as_slice()), 0, self.len) |
| } |
| } |
| |
| impl From<BooleanBufferBuilder> for Buffer { |
| #[inline] |
| fn from(builder: BooleanBufferBuilder) -> Self { |
| builder.buffer.into() |
| } |
| } |
| |
| impl From<BooleanBufferBuilder> for BooleanBuffer { |
| #[inline] |
| fn from(builder: BooleanBufferBuilder) -> Self { |
| BooleanBuffer::new(builder.buffer.into(), 0, builder.len) |
| } |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use super::*; |
| |
| #[test] |
| fn test_boolean_buffer_builder_write_bytes() { |
| let mut b = BooleanBufferBuilder::new(4); |
| b.append(false); |
| b.append(true); |
| b.append(false); |
| b.append(true); |
| assert_eq!(4, b.len()); |
| assert_eq!(512, b.capacity()); |
| let buffer = b.finish(); |
| assert_eq!(4, buffer.len()); |
| |
| // Overallocate capacity |
| let mut b = BooleanBufferBuilder::new(8); |
| b.append_slice(&[false, true, false, true]); |
| assert_eq!(4, b.len()); |
| assert_eq!(512, b.capacity()); |
| let buffer = b.finish(); |
| assert_eq!(4, buffer.len()); |
| } |
| |
| #[test] |
| fn test_boolean_buffer_builder_unset_first_bit() { |
| let mut buffer = BooleanBufferBuilder::new(4); |
| buffer.append(true); |
| buffer.append(true); |
| buffer.append(false); |
| buffer.append(true); |
| buffer.set_bit(0, false); |
| assert_eq!(buffer.len(), 4); |
| assert_eq!(buffer.finish().values(), &[0b1010_u8]); |
| } |
| |
| #[test] |
| fn test_boolean_buffer_builder_unset_last_bit() { |
| let mut buffer = BooleanBufferBuilder::new(4); |
| buffer.append(true); |
| buffer.append(true); |
| buffer.append(false); |
| buffer.append(true); |
| buffer.set_bit(3, false); |
| assert_eq!(buffer.len(), 4); |
| assert_eq!(buffer.finish().values(), &[0b0011_u8]); |
| } |
| |
| #[test] |
| fn test_boolean_buffer_builder_unset_an_inner_bit() { |
| let mut buffer = BooleanBufferBuilder::new(5); |
| buffer.append(true); |
| buffer.append(true); |
| buffer.append(false); |
| buffer.append(true); |
| buffer.set_bit(1, false); |
| assert_eq!(buffer.len(), 4); |
| assert_eq!(buffer.finish().values(), &[0b1001_u8]); |
| } |
| |
| #[test] |
| fn test_boolean_buffer_builder_unset_several_bits() { |
| let mut buffer = BooleanBufferBuilder::new(5); |
| buffer.append(true); |
| buffer.append(true); |
| buffer.append(true); |
| buffer.append(false); |
| buffer.append(true); |
| buffer.set_bit(1, false); |
| buffer.set_bit(2, false); |
| assert_eq!(buffer.len(), 5); |
| assert_eq!(buffer.finish().values(), &[0b10001_u8]); |
| } |
| |
| #[test] |
| fn test_boolean_buffer_builder_unset_several_bits_bigger_than_one_byte() { |
| let mut buffer = BooleanBufferBuilder::new(16); |
| buffer.append_n(10, true); |
| buffer.set_bit(0, false); |
| buffer.set_bit(3, false); |
| buffer.set_bit(9, false); |
| assert_eq!(buffer.len(), 10); |
| assert_eq!(buffer.finish().values(), &[0b11110110_u8, 0b01_u8]); |
| } |
| |
| #[test] |
| fn test_boolean_buffer_builder_flip_several_bits_bigger_than_one_byte() { |
| let mut buffer = BooleanBufferBuilder::new(16); |
| buffer.append_n(5, true); |
| buffer.append_n(5, false); |
| buffer.append_n(5, true); |
| buffer.set_bit(0, false); |
| buffer.set_bit(3, false); |
| buffer.set_bit(9, false); |
| buffer.set_bit(6, true); |
| buffer.set_bit(14, true); |
| buffer.set_bit(13, false); |
| assert_eq!(buffer.len(), 15); |
| assert_eq!(buffer.finish().values(), &[0b01010110_u8, 0b1011100_u8]); |
| } |
| |
| #[test] |
| fn test_bool_buffer_builder_get_first_bit() { |
| let mut buffer = BooleanBufferBuilder::new(16); |
| buffer.append_n(8, true); |
| buffer.append_n(8, false); |
| assert!(buffer.get_bit(0)); |
| } |
| |
| #[test] |
| fn test_bool_buffer_builder_get_first_bit_not_requires_mutability() { |
| let buffer = { |
| let mut buffer = BooleanBufferBuilder::new(16); |
| buffer.append_n(8, true); |
| buffer |
| }; |
| |
| assert!(buffer.get_bit(0)); |
| } |
| |
| #[test] |
| fn test_bool_buffer_builder_get_last_bit() { |
| let mut buffer = BooleanBufferBuilder::new(16); |
| buffer.append_n(8, true); |
| buffer.append_n(8, false); |
| assert!(!buffer.get_bit(15)); |
| } |
| |
| #[test] |
| fn test_bool_buffer_builder_get_an_inner_bit() { |
| let mut buffer = BooleanBufferBuilder::new(16); |
| buffer.append_n(4, false); |
| buffer.append_n(8, true); |
| buffer.append_n(4, false); |
| assert!(buffer.get_bit(11)); |
| } |
| |
| #[test] |
| fn test_bool_buffer_fuzz() { |
| use rand::prelude::*; |
| |
| let mut buffer = BooleanBufferBuilder::new(12); |
| let mut all_bools = vec![]; |
| let mut rng = rand::rng(); |
| |
| let src_len = 32; |
| let (src, compacted_src) = { |
| let src: Vec<_> = std::iter::from_fn(|| Some(rng.next_u32() & 1 == 0)) |
| .take(src_len) |
| .collect(); |
| |
| let mut compacted_src = BooleanBufferBuilder::new(src_len); |
| compacted_src.append_slice(&src); |
| (src, compacted_src.finish()) |
| }; |
| |
| for _ in 0..100 { |
| let a = rng.next_u32() as usize % src_len; |
| let b = rng.next_u32() as usize % src_len; |
| |
| let start = a.min(b); |
| let end = a.max(b); |
| |
| buffer.append_packed_range(start..end, compacted_src.values()); |
| all_bools.extend_from_slice(&src[start..end]); |
| } |
| |
| let mut compacted = BooleanBufferBuilder::new(all_bools.len()); |
| compacted.append_slice(&all_bools); |
| |
| assert_eq!(buffer.finish(), compacted.finish()) |
| } |
| |
| #[test] |
| fn test_boolean_array_builder_resize() { |
| let mut builder = BooleanBufferBuilder::new(20); |
| builder.append_n(4, true); |
| builder.append_n(7, false); |
| builder.append_n(2, true); |
| builder.resize(20); |
| |
| assert_eq!(builder.len(), 20); |
| assert_eq!(builder.as_slice(), &[0b00001111, 0b00011000, 0b00000000]); |
| |
| builder.resize(5); |
| assert_eq!(builder.len(), 5); |
| assert_eq!(builder.as_slice(), &[0b00001111]); |
| |
| builder.append_n(4, true); |
| assert_eq!(builder.len(), 9); |
| assert_eq!(builder.as_slice(), &[0b11101111, 0b00000001]); |
| } |
| |
| #[test] |
| fn test_truncate() { |
| let b = MutableBuffer::from_iter([true, true, true, true]); |
| let mut builder = BooleanBufferBuilder::new_from_buffer(b, 2); |
| builder.advance(2); |
| let finished = builder.finish(); |
| assert_eq!(finished.values(), &[0b00000011]); |
| |
| let mut builder = BooleanBufferBuilder::new(10); |
| builder.append_n(5, true); |
| builder.resize(3); |
| builder.advance(2); |
| let finished = builder.finish(); |
| assert_eq!(finished.values(), &[0b00000111]); |
| |
| let mut builder = BooleanBufferBuilder::new(10); |
| builder.append_n(16, true); |
| assert_eq!(builder.as_slice(), &[0xFF, 0xFF]); |
| builder.truncate(20); |
| assert_eq!(builder.as_slice(), &[0xFF, 0xFF]); |
| builder.truncate(14); |
| assert_eq!(builder.as_slice(), &[0xFF, 0b00111111]); |
| builder.append(false); |
| builder.append(true); |
| assert_eq!(builder.as_slice(), &[0xFF, 0b10111111]); |
| builder.append_packed_range(0..3, &[0xFF]); |
| assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000111]); |
| builder.truncate(17); |
| assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b00000001]); |
| builder.append_packed_range(0..2, &[2]); |
| assert_eq!(builder.as_slice(), &[0xFF, 0b10111111, 0b0000101]); |
| builder.truncate(8); |
| assert_eq!(builder.as_slice(), &[0xFF]); |
| builder.resize(14); |
| assert_eq!(builder.as_slice(), &[0xFF, 0x00]); |
| builder.truncate(0); |
| assert_eq!(builder.as_slice(), &[]); |
| } |
| |
| #[test] |
| fn test_boolean_builder_increases_buffer_len() { |
| // 00000010 01001000 |
| let buf = Buffer::from([72_u8, 2_u8]); |
| let mut builder = BooleanBufferBuilder::new(8); |
| |
| for i in 0..16 { |
| if i == 3 || i == 6 || i == 9 { |
| builder.append(true); |
| } else { |
| builder.append(false); |
| } |
| } |
| let buf2 = builder.finish(); |
| |
| assert_eq!(buf.len(), buf2.inner().len()); |
| assert_eq!(buf.as_slice(), buf2.values()); |
| } |
| } |