blob: 4b4b08572a27893d4ea3e9b20690d1aebe37c215 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Defines miscellaneous array kernels.
use crate::array::ArrayRef;
/// Returns the array, taking only the number of elements specified
///
/// Limit performs a zero-copy slice of the array, and is a convenience method on slice
/// where:
/// * it performs a bounds-check on the array
/// * it slices from offset 0
pub fn limit(array: &ArrayRef, num_elements: usize) -> ArrayRef {
let lim = num_elements.min(array.len());
array.slice(0, lim)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::array::*;
use crate::buffer::Buffer;
use crate::datatypes::{DataType, Field};
use crate::util::bit_util;
use std::sync::Arc;
#[test]
fn test_limit_array() {
let a: ArrayRef = Arc::new(Int32Array::from(vec![5, 6, 7, 8, 9]));
let b = limit(&a, 3);
let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(3, c.len());
assert_eq!(5, c.value(0));
assert_eq!(6, c.value(1));
assert_eq!(7, c.value(2));
}
#[test]
fn test_limit_string_array() {
let a: ArrayRef = Arc::new(StringArray::from(vec!["hello", " ", "world", "!"]));
let b = limit(&a, 2);
let c = b.as_ref().as_any().downcast_ref::<StringArray>().unwrap();
assert_eq!(2, c.len());
assert_eq!("hello", c.value(0));
assert_eq!(" ", c.value(1));
}
#[test]
fn test_limit_array_with_null() {
let a: ArrayRef = Arc::new(Int32Array::from(vec![None, Some(5)]));
let b = limit(&a, 1);
let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(1, c.len());
assert_eq!(true, c.is_null(0));
}
#[test]
fn test_limit_array_with_limit_too_large() {
let a = Int32Array::from(vec![5, 6, 7, 8, 9]);
let a_ref: ArrayRef = Arc::new(a);
let b = limit(&a_ref, 6);
let c = b.as_ref().as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(5, c.len());
assert_eq!(5, c.value(0));
assert_eq!(6, c.value(1));
assert_eq!(7, c.value(2));
assert_eq!(8, c.value(3));
assert_eq!(9, c.value(4));
}
#[test]
fn test_list_array_limit() {
// adapted from crate::array::test::test_list_array_slice
// Construct a value array
let value_data = ArrayData::builder(DataType::Int32)
.len(10)
.add_buffer(Buffer::from_slice_ref(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]))
.build();
// Construct a buffer for value offsets, for the nested array:
// [[0, 1], null, [2, 3], null, [4, 5], null, [6, 7, 8], null, [9]]
let value_offsets = Buffer::from_slice_ref(&[0, 2, 2, 4, 4, 6, 6, 9, 9, 10]);
// 01010101 00000001
let mut null_bits: [u8; 2] = [0; 2];
bit_util::set_bit(&mut null_bits, 0);
bit_util::set_bit(&mut null_bits, 2);
bit_util::set_bit(&mut null_bits, 4);
bit_util::set_bit(&mut null_bits, 6);
bit_util::set_bit(&mut null_bits, 8);
// Construct a list array from the above two
let list_data_type =
DataType::List(Box::new(Field::new("item", DataType::Int32, false)));
let list_data = ArrayData::builder(list_data_type)
.len(9)
.add_buffer(value_offsets)
.add_child_data(value_data)
.null_bit_buffer(Buffer::from(null_bits))
.build();
let list_array: ArrayRef = Arc::new(ListArray::from(list_data));
let limit_array = limit(&list_array, 6);
assert_eq!(6, limit_array.len());
assert_eq!(0, limit_array.offset());
assert_eq!(3, limit_array.null_count());
// Check offset and length for each non-null value.
let limit_array: &ListArray =
limit_array.as_any().downcast_ref::<ListArray>().unwrap();
for i in 0..limit_array.len() {
let offset = limit_array.value_offsets()[i];
let length = limit_array.value_length(i);
if i % 2 == 0 {
assert_eq!(2, length);
assert_eq!(i as i32, offset);
} else {
assert_eq!(0, length);
}
}
}
#[test]
fn test_struct_array_limit() {
// adapted from crate::array::test::test_struct_array_slice
let boolean_data = ArrayData::builder(DataType::Boolean)
.len(5)
.add_buffer(Buffer::from([0b00010000]))
.null_bit_buffer(Buffer::from([0b00010001]))
.build();
let int_data = ArrayData::builder(DataType::Int32)
.len(5)
.add_buffer(Buffer::from_slice_ref(&[0, 28, 42, 0, 0]))
.null_bit_buffer(Buffer::from([0b00000110]))
.build();
let mut field_types = vec![];
field_types.push(Field::new("a", DataType::Boolean, false));
field_types.push(Field::new("b", DataType::Int32, false));
let struct_array_data = ArrayData::builder(DataType::Struct(field_types))
.len(5)
.add_child_data(boolean_data.clone())
.add_child_data(int_data.clone())
.null_bit_buffer(Buffer::from([0b00010111]))
.build();
let struct_array = StructArray::from(struct_array_data);
assert_eq!(5, struct_array.len());
assert_eq!(1, struct_array.null_count());
assert_eq!(&boolean_data, struct_array.column(0).data());
assert_eq!(&int_data, struct_array.column(1).data());
let array: ArrayRef = Arc::new(struct_array);
let sliced_array = limit(&array, 3);
let sliced_array = sliced_array.as_any().downcast_ref::<StructArray>().unwrap();
assert_eq!(3, sliced_array.len());
assert_eq!(0, sliced_array.offset());
assert_eq!(0, sliced_array.null_count());
assert!(sliced_array.is_valid(0));
assert!(sliced_array.is_valid(1));
assert!(sliced_array.is_valid(2));
let sliced_c0 = sliced_array.column(0);
let sliced_c0 = sliced_c0.as_any().downcast_ref::<BooleanArray>().unwrap();
assert_eq!(3, sliced_c0.len());
assert_eq!(0, sliced_c0.offset());
assert_eq!(2, sliced_c0.null_count());
assert!(sliced_c0.is_valid(0));
assert!(sliced_c0.is_null(1));
assert!(sliced_c0.is_null(2));
assert_eq!(false, sliced_c0.value(0));
let sliced_c1 = sliced_array.column(1);
let sliced_c1 = sliced_c1.as_any().downcast_ref::<Int32Array>().unwrap();
assert_eq!(3, sliced_c1.len());
assert_eq!(0, sliced_c1.offset());
assert_eq!(1, sliced_c1.null_count());
assert!(sliced_c1.is_null(0));
assert_eq!(28, sliced_c1.value(1));
assert_eq!(42, sliced_c1.value(2));
}
}