blob: 8b38b5339553f944c45147d712a120ca6780f712 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use std::borrow::Borrow;
use std::convert::From;
use std::iter::{FromIterator, IntoIterator};
use std::mem;
use std::{any::Any, fmt};
use super::*;
use super::{array::print_long_array, raw_pointer::RawPtrBox};
use crate::buffer::{Buffer, MutableBuffer};
use crate::util::bit_util;
/// Array of bools
///
/// # Example
///
/// ```
/// use arrow::array::{Array, BooleanArray};
/// let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
/// assert_eq!(4, arr.len());
/// assert_eq!(1, arr.null_count());
/// assert!(arr.is_valid(0));
/// assert!(!arr.is_null(0));
/// assert_eq!(false, arr.value(0));
/// assert!(arr.is_valid(1));
/// assert!(!arr.is_null(1));
/// assert_eq!(true, arr.value(1));
/// assert!(!arr.is_valid(2));
/// assert!(arr.is_null(2));
/// assert!(arr.is_valid(3));
/// assert!(!arr.is_null(3));
/// assert_eq!(true, arr.value(3));
/// ```
///
pub struct BooleanArray {
data: ArrayData,
/// Pointer to the value array. The lifetime of this must be <= to the value buffer
/// stored in `data`, so it's safe to store.
raw_values: RawPtrBox<u8>,
}
impl fmt::Debug for BooleanArray {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "BooleanArray\n[\n")?;
print_long_array(self, f, |array, index, f| {
fmt::Debug::fmt(&array.value(index), f)
})?;
write!(f, "]")
}
}
impl BooleanArray {
/// Returns the length of this array.
pub fn len(&self) -> usize {
self.data.len()
}
/// Returns whether this array is empty.
pub fn is_empty(&self) -> bool {
self.data.is_empty()
}
// Returns a new boolean array builder
pub fn builder(capacity: usize) -> BooleanBuilder {
BooleanBuilder::new(capacity)
}
/// Returns a `Buffer` holding all the values of this array.
///
/// Note this doesn't take the offset of this array into account.
pub fn values(&self) -> &Buffer {
&self.data.buffers()[0]
}
/// Returns the boolean value at index `i`.
///
/// # Safety
/// This doesn't check bounds, the caller must ensure that index < self.len()
pub unsafe fn value_unchecked(&self, i: usize) -> bool {
let offset = i + self.offset();
bit_util::get_bit_raw(self.raw_values.as_ptr(), offset)
}
/// Returns the boolean value at index `i`.
///
/// Note this doesn't do any bound checking, for performance reason.
pub fn value(&self, i: usize) -> bool {
debug_assert!(i < self.len());
unsafe { self.value_unchecked(i) }
}
}
impl Array for BooleanArray {
fn as_any(&self) -> &Any {
self
}
fn data(&self) -> &ArrayData {
&self.data
}
/// Returns the total number of bytes of memory occupied by the buffers owned by this [BooleanArray].
fn get_buffer_memory_size(&self) -> usize {
self.data.get_buffer_memory_size()
}
/// Returns the total number of bytes of memory occupied physically by this [BooleanArray].
fn get_array_memory_size(&self) -> usize {
self.data.get_array_memory_size() + mem::size_of_val(self)
}
}
impl From<Vec<bool>> for BooleanArray {
fn from(data: Vec<bool>) -> Self {
let mut mut_buf = MutableBuffer::new_null(data.len());
{
let mut_slice = mut_buf.as_slice_mut();
for (i, b) in data.iter().enumerate() {
if *b {
bit_util::set_bit(mut_slice, i);
}
}
}
let array_data = ArrayData::builder(DataType::Boolean)
.len(data.len())
.add_buffer(mut_buf.into())
.build();
BooleanArray::from(array_data)
}
}
impl From<Vec<Option<bool>>> for BooleanArray {
fn from(data: Vec<Option<bool>>) -> Self {
BooleanArray::from_iter(data.iter())
}
}
impl From<ArrayData> for BooleanArray {
fn from(data: ArrayData) -> Self {
assert_eq!(
data.buffers().len(),
1,
"BooleanArray data should contain a single buffer only (values buffer)"
);
let ptr = data.buffers()[0].as_ptr();
Self {
data,
raw_values: unsafe { RawPtrBox::new(ptr) },
}
}
}
impl<'a> IntoIterator for &'a BooleanArray {
type Item = Option<bool>;
type IntoIter = BooleanIter<'a>;
fn into_iter(self) -> Self::IntoIter {
BooleanIter::<'a>::new(self)
}
}
impl<'a> BooleanArray {
/// constructs a new iterator
pub fn iter(&'a self) -> BooleanIter<'a> {
BooleanIter::<'a>::new(&self)
}
}
impl<Ptr: Borrow<Option<bool>>> FromIterator<Ptr> for BooleanArray {
fn from_iter<I: IntoIterator<Item = Ptr>>(iter: I) -> Self {
let iter = iter.into_iter();
let (_, data_len) = iter.size_hint();
let data_len = data_len.expect("Iterator must be sized"); // panic if no upper bound.
let num_bytes = bit_util::ceil(data_len, 8);
let mut null_buf = MutableBuffer::from_len_zeroed(num_bytes);
let mut val_buf = MutableBuffer::from_len_zeroed(num_bytes);
let data = val_buf.as_slice_mut();
let null_slice = null_buf.as_slice_mut();
iter.enumerate().for_each(|(i, item)| {
if let Some(a) = item.borrow() {
bit_util::set_bit(null_slice, i);
if *a {
bit_util::set_bit(data, i);
}
}
});
let data = ArrayData::new(
DataType::Boolean,
data_len,
None,
Some(null_buf.into()),
0,
vec![val_buf.into()],
vec![],
);
BooleanArray::from(data)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::buffer::Buffer;
use crate::datatypes::DataType;
#[test]
fn test_boolean_fmt_debug() {
let arr = BooleanArray::from(vec![true, false, false]);
assert_eq!(
"BooleanArray\n[\n true,\n false,\n false,\n]",
format!("{:?}", arr)
);
}
#[test]
fn test_boolean_with_null_fmt_debug() {
let mut builder = BooleanArray::builder(3);
builder.append_value(true).unwrap();
builder.append_null().unwrap();
builder.append_value(false).unwrap();
let arr = builder.finish();
assert_eq!(
"BooleanArray\n[\n true,\n null,\n false,\n]",
format!("{:?}", arr)
);
}
#[test]
fn test_boolean_array_from_vec() {
let buf = Buffer::from([10_u8]);
let arr = BooleanArray::from(vec![false, true, false, true]);
assert_eq!(&buf, arr.values());
assert_eq!(4, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(0, arr.null_count());
for i in 0..4 {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
}
}
#[test]
fn test_boolean_array_from_vec_option() {
let buf = Buffer::from([10_u8]);
let arr = BooleanArray::from(vec![Some(false), Some(true), None, Some(true)]);
assert_eq!(&buf, arr.values());
assert_eq!(4, arr.len());
assert_eq!(0, arr.offset());
assert_eq!(1, arr.null_count());
for i in 0..4 {
if i == 2 {
assert!(arr.is_null(i));
assert!(!arr.is_valid(i));
} else {
assert!(!arr.is_null(i));
assert!(arr.is_valid(i));
assert_eq!(i == 1 || i == 3, arr.value(i), "failed at {}", i)
}
}
}
#[test]
fn test_boolean_array_builder() {
// Test building a boolean array with ArrayData builder and offset
// 000011011
let buf = Buffer::from([27_u8]);
let buf2 = buf.clone();
let data = ArrayData::builder(DataType::Boolean)
.len(5)
.offset(2)
.add_buffer(buf)
.build();
let arr = BooleanArray::from(data);
assert_eq!(&buf2, arr.values());
assert_eq!(5, arr.len());
assert_eq!(2, arr.offset());
assert_eq!(0, arr.null_count());
for i in 0..3 {
assert_eq!(i != 0, arr.value(i), "failed at {}", i);
}
}
#[test]
#[should_panic(expected = "BooleanArray data should contain a single buffer only \
(values buffer)")]
fn test_boolean_array_invalid_buffer_len() {
let data = ArrayData::builder(DataType::Boolean).len(5).build();
BooleanArray::from(data);
}
}