blob: 143050ea97f3e0865d968b36c3b6301bb65b2ff4 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//! Comparison kernels for `Array`s.
//!
//! These kernels can leverage SIMD if available on your system. Currently no runtime
//! detection is provided, you should enable the specific SIMD intrinsics using
//! `RUSTFLAGS="-C target-feature=+avx2"` for example. See the documentation
//! [here](https://doc.rust-lang.org/stable/core/arch/) for more information.
//!
use crate::array::*;
use crate::buffer::{buffer_unary_not, Buffer, MutableBuffer};
use crate::compute::util::combine_option_bitmap;
use crate::datatypes::{
ArrowNativeType, ArrowNativeTypeOp, ArrowNumericType, DataType, Date32Type,
Date64Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type,
IntervalDayTimeType, IntervalMonthDayNanoType, IntervalUnit, IntervalYearMonthType,
Time32MillisecondType, Time32SecondType, Time64MicrosecondType, Time64NanosecondType,
TimeUnit, TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType, UInt16Type, UInt32Type, UInt64Type,
UInt8Type,
};
#[allow(unused_imports)]
use crate::downcast_dictionary_array;
use crate::error::{ArrowError, Result};
use crate::util::bit_util;
use regex::Regex;
use std::collections::HashMap;
/// Helper function to perform boolean lambda function on values from two array accessors, this
/// version does not attempt to use SIMD.
fn compare_op<T: ArrayAccessor, S: ArrayAccessor, F>(
left: T,
right: S,
op: F,
) -> Result<BooleanArray>
where
F: Fn(T::Item, S::Item) -> bool,
{
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i), right.value_unchecked(i))
});
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(buffer)],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Helper function to perform boolean lambda function on values from array accessor, this
/// version does not attempt to use SIMD.
fn compare_op_scalar<T: ArrayAccessor, F>(left: T, op: F) -> Result<BooleanArray>
where
F: Fn(T::Item) -> bool,
{
let null_bit_buffer = left
.data()
.null_buffer()
.map(|b| b.bit_slice(left.offset(), left.len()));
let buffer = MutableBuffer::collect_bool(left.len(), |i| unsafe {
// SAFETY: i in range 0..len
op(left.value_unchecked(i))
});
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(buffer)],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Evaluate `op(left, right)` for [`PrimitiveArray`]s using a specified
/// comparison function.
pub fn no_simd_compare_op<T, F>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
op: F,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
F: Fn(T::Native, T::Native) -> bool,
{
compare_op(left, right, op)
}
/// Evaluate `op(left, right)` for [`PrimitiveArray`] and scalar using
/// a specified comparison function.
pub fn no_simd_compare_op_scalar<T, F>(
left: &PrimitiveArray<T>,
right: T::Native,
op: F,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
F: Fn(T::Native, T::Native) -> bool,
{
compare_op_scalar(left, |l| op(l, right))
}
fn is_like_pattern(c: char) -> bool {
c == '%' || c == '_'
}
/// Evaluate regex `op(left)` matching `right` on [`StringArray`] / [`LargeStringArray`]
///
/// If `negate_regex` is true, the regex expression will be negated. (for example, with `not like`)
fn regex_like<OffsetSize, F>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
negate_regex: bool,
op: F,
) -> Result<BooleanArray>
where
OffsetSize: OffsetSizeTrait,
F: Fn(&str) -> Result<Regex>,
{
let mut map = HashMap::new();
if left.len() != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], left.len())?;
let mut result = BooleanBufferBuilder::new(left.len());
for i in 0..left.len() {
let haystack = left.value(i);
let pat = right.value(i);
let re = if let Some(ref regex) = map.get(pat) {
regex
} else {
let re_pattern = replace_like_wildcards(pat)?;
let re = op(&re_pattern)?;
map.insert(pat, re);
map.get(pat).unwrap()
};
result.append(if negate_regex {
!re.is_match(haystack)
} else {
re.is_match(haystack)
});
}
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![result.finish()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform SQL `left LIKE right` operation on [`StringArray`] / [`LargeStringArray`].
///
/// There are two wildcards supported with the LIKE operator:
///
/// 1. `%` - The percent sign represents zero, one, or multiple characters
/// 2. `_` - The underscore represents a single character
///
/// For example:
/// ```
/// use arrow::array::{StringArray, BooleanArray};
/// use arrow::compute::like_utf8;
///
/// let strings = StringArray::from(vec!["Arrow", "Arrow", "Arrow", "Ar"]);
/// let patterns = StringArray::from(vec!["A%", "B%", "A.", "A_"]);
///
/// let result = like_utf8(&strings, &patterns).unwrap();
/// assert_eq!(result, BooleanArray::from(vec![true, false, false, true]));
/// ```
pub fn like_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
regex_like(left, right, false, |re_pattern| {
Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})
})
}
#[inline]
fn like_scalar_op<'a, F: Fn(bool) -> bool, L: ArrayAccessor<Item = &'a str>>(
left: L,
right: &str,
op: F,
) -> Result<BooleanArray> {
if !right.contains(is_like_pattern) {
// fast path, can use equals
compare_op_scalar(left, |item| op(item == right))
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use starts_with
let starts_with = &right[..right.len() - 1];
compare_op_scalar(left, |item| op(item.starts_with(starts_with)))
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
// fast path, can use ends_with
let ends_with = &right[1..];
compare_op_scalar(left, |item| op(item.ends_with(ends_with)))
} else if right.starts_with('%')
&& right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[1..right.len() - 1].contains(is_like_pattern)
{
let contains = &right[1..right.len() - 1];
compare_op_scalar(left, |item| op(item.contains(contains)))
} else {
let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})?;
compare_op_scalar(left, |item| op(re.is_match(item)))
}
}
#[inline]
fn like_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
left: L,
right: &str,
) -> Result<BooleanArray> {
like_scalar_op(left, right, |x| x)
}
/// Perform SQL `left LIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn like_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
like_scalar(left, right)
}
/// Perform SQL `left LIKE right` operation on [`DictionaryArray`] with values
/// [`StringArray`]/[`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn like_dict_scalar<K: ArrowNumericType>(
left: &DictionaryArray<K>,
right: &str,
) -> Result<BooleanArray> {
match left.value_type() {
DataType::Utf8 => {
let left = left.downcast_dict::<GenericStringArray<i32>>().unwrap();
like_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = left.downcast_dict::<GenericStringArray<i64>>().unwrap();
like_scalar(left, right)
}
_ => {
Err(ArrowError::ComputeError(
"like_dict_scalar only supports DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
))
}
}
}
/// Transforms a like `pattern` to a regex compatible pattern. To achieve that, it does:
///
/// 1. Replace like wildcards for regex expressions as the pattern will be evaluated using regex match: `%` => `.*` and `_` => `.`
/// 2. Escape regex meta characters to match them and not be evaluated as regex special chars. For example: `.` => `\\.`
/// 3. Replace escaped like wildcards removing the escape characters to be able to match it as a regex. For example: `\\%` => `%`
fn replace_like_wildcards(pattern: &str) -> Result<String> {
let mut result = String::new();
let pattern = String::from(pattern);
let mut chars_iter = pattern.chars().peekable();
while let Some(c) = chars_iter.next() {
if c == '\\' {
let next = chars_iter.peek();
match next {
Some(next) if is_like_pattern(*next) => {
result.push(*next);
// Skipping the next char as it is already appended
chars_iter.next();
}
_ => {
result.push('\\');
result.push('\\');
}
}
} else if regex_syntax::is_meta_character(c) {
result.push('\\');
result.push(c);
} else if c == '%' {
result.push_str(".*");
} else if c == '_' {
result.push('.');
} else {
result.push(c);
}
}
Ok(result)
}
/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`].
///
/// See the documentation on [`like_utf8`] for more details.
pub fn nlike_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
regex_like(left, right, true, |re_pattern| {
Regex::new(&format!("^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from LIKE pattern: {}",
e
))
})
})
}
#[inline]
fn nlike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
left: L,
right: &str,
) -> Result<BooleanArray> {
like_scalar_op(left, right, |x| !x)
}
/// Perform SQL `left NOT LIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn nlike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
nlike_scalar(left, right)
}
/// Perform SQL `left NOT LIKE right` operation on [`DictionaryArray`] with values
/// [`StringArray`]/[`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn nlike_dict_scalar<K: ArrowNumericType>(
left: &DictionaryArray<K>,
right: &str,
) -> Result<BooleanArray> {
match left.value_type() {
DataType::Utf8 => {
let left = left.downcast_dict::<GenericStringArray<i32>>().unwrap();
nlike_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = left.downcast_dict::<GenericStringArray<i64>>().unwrap();
nlike_scalar(left, right)
}
_ => {
Err(ArrowError::ComputeError(
"nlike_dict_scalar only supports DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
))
}
}
}
/// Perform SQL `left ILIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`].
///
/// See the documentation on [`like_utf8`] for more details.
pub fn ilike_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
regex_like(left, right, false, |re_pattern| {
Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from ILIKE pattern: {}",
e
))
})
})
}
#[inline]
fn ilike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
left: L,
right: &str,
) -> Result<BooleanArray> {
let null_bit_buffer = left.data().null_buffer().cloned();
let bytes = bit_util::ceil(left.len(), 8);
let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
let bool_slice = bool_buf.as_slice_mut();
if !right.contains(is_like_pattern) {
// fast path, can use equals
let right_uppercase = right.to_uppercase();
for i in 0..left.len() {
unsafe {
if left.value_unchecked(i).to_uppercase() == right_uppercase {
bit_util::set_bit(bool_slice, i);
}
}
}
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use starts_with
let start_str = &right[..right.len() - 1].to_uppercase();
for i in 0..left.len() {
unsafe {
if left
.value_unchecked(i)
.to_uppercase()
.starts_with(start_str)
{
bit_util::set_bit(bool_slice, i);
}
}
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
// fast path, can use ends_with
let ends_str = &right[1..].to_uppercase();
for i in 0..left.len() {
unsafe {
if left.value_unchecked(i).to_uppercase().ends_with(ends_str) {
bit_util::set_bit(bool_slice, i);
}
}
}
} else if right.starts_with('%')
&& right.ends_with('%')
&& !right[1..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use contains
let contains = &right[1..right.len() - 1].to_uppercase();
for i in 0..left.len() {
unsafe {
if left.value_unchecked(i).to_uppercase().contains(contains) {
bit_util::set_bit(bool_slice, i);
}
}
}
} else {
let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from ILIKE pattern: {}",
e
))
})?;
for i in 0..left.len() {
let haystack = unsafe { left.value_unchecked(i) };
if re.is_match(haystack) {
bit_util::set_bit(bool_slice, i);
}
}
};
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform SQL `left ILIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn ilike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
ilike_scalar(left, right)
}
/// Perform SQL `left ILIKE right` operation on [`DictionaryArray`] with values
/// [`StringArray`]/[`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn ilike_dict_scalar<K: ArrowNumericType>(
left: &DictionaryArray<K>,
right: &str,
) -> Result<BooleanArray> {
match left.value_type() {
DataType::Utf8 => {
let left = left.downcast_dict::<GenericStringArray<i32>>().unwrap();
ilike_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = left.downcast_dict::<GenericStringArray<i64>>().unwrap();
ilike_scalar(left, right)
}
_ => {
Err(ArrowError::ComputeError(
"ilike_dict_scalar only supports DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
))
}
}
}
/// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`].
///
/// See the documentation on [`like_utf8`] for more details.
pub fn nilike_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
regex_like(left, right, true, |re_pattern| {
Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from ILIKE pattern: {}",
e
))
})
})
}
#[inline]
fn nilike_scalar<'a, L: ArrayAccessor<Item = &'a str>>(
left: L,
right: &str,
) -> Result<BooleanArray> {
let null_bit_buffer = left.data().null_buffer().cloned();
let bytes = bit_util::ceil(left.len(), 8);
let mut bool_buf = MutableBuffer::from_len_zeroed(bytes);
let bool_slice = bool_buf.as_slice_mut();
if !right.contains(is_like_pattern) {
// fast path, can use equals
let right_uppercase = right.to_uppercase();
for i in 0..left.len() {
unsafe {
if left.value_unchecked(i).to_uppercase() != right_uppercase {
bit_util::set_bit(bool_slice, i);
}
}
}
} else if right.ends_with('%')
&& !right.ends_with("\\%")
&& !right[..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use starts_with
let start_str = &right[..right.len() - 1].to_uppercase();
for i in 0..left.len() {
unsafe {
if !(left
.value_unchecked(i)
.to_uppercase()
.starts_with(start_str))
{
bit_util::set_bit(bool_slice, i);
}
}
}
} else if right.starts_with('%') && !right[1..].contains(is_like_pattern) {
// fast path, can use ends_with
let ends_str = &right[1..].to_uppercase();
for i in 0..left.len() {
unsafe {
if !(left.value_unchecked(i).to_uppercase().ends_with(ends_str)) {
bit_util::set_bit(bool_slice, i);
}
}
}
} else if right.starts_with('%')
&& right.ends_with('%')
&& !right[1..right.len() - 1].contains(is_like_pattern)
{
// fast path, can use contains
let contains = &right[1..right.len() - 1].to_uppercase();
for i in 0..left.len() {
unsafe {
if !(left.value_unchecked(i).to_uppercase().contains(contains)) {
bit_util::set_bit(bool_slice, i);
}
}
}
} else {
let re_pattern = replace_like_wildcards(right)?;
let re = Regex::new(&format!("(?i)^{}$", re_pattern)).map_err(|e| {
ArrowError::ComputeError(format!(
"Unable to build regex from ILIKE pattern: {}",
e
))
})?;
for i in 0..left.len() {
let haystack = unsafe { left.value_unchecked(i) };
if !re.is_match(haystack) {
bit_util::set_bit(bool_slice, i);
}
}
};
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform SQL `left NOT ILIKE right` operation on [`StringArray`] /
/// [`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn nilike_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
nilike_scalar(left, right)
}
/// Perform SQL `left NOT ILIKE right` operation on [`DictionaryArray`] with values
/// [`StringArray`]/[`LargeStringArray`] and a scalar.
///
/// See the documentation on [`like_utf8`] for more details.
pub fn nilike_dict_scalar<K: ArrowNumericType>(
left: &DictionaryArray<K>,
right: &str,
) -> Result<BooleanArray> {
match left.value_type() {
DataType::Utf8 => {
let left = left.downcast_dict::<GenericStringArray<i32>>().unwrap();
nilike_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = left.downcast_dict::<GenericStringArray<i64>>().unwrap();
nilike_scalar(left, right)
}
_ => {
Err(ArrowError::ComputeError(
"nilike_dict_scalar only supports DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
))
}
}
}
/// Perform SQL `array ~ regex_array` operation on [`StringArray`] / [`LargeStringArray`].
/// If `regex_array` element has an empty value, the corresponding result value is always true.
///
/// `flags_array` are optional [`StringArray`] / [`LargeStringArray`] flag, which allow
/// special search modes, such as case insensitive and multi-line mode.
/// See the documentation [here](https://docs.rs/regex/1.5.4/regex/#grouping-and-flags)
/// for more information.
pub fn regexp_is_match_utf8<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
regex_array: &GenericStringArray<OffsetSize>,
flags_array: Option<&GenericStringArray<OffsetSize>>,
) -> Result<BooleanArray> {
if array.len() != regex_array.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap(&[array.data_ref(), regex_array.data_ref()], array.len())?;
let mut patterns: HashMap<String, Regex> = HashMap::new();
let mut result = BooleanBufferBuilder::new(array.len());
let complete_pattern = match flags_array {
Some(flags) => Box::new(regex_array.iter().zip(flags.iter()).map(
|(pattern, flags)| {
pattern.map(|pattern| match flags {
Some(flag) => format!("(?{}){}", flag, pattern),
None => pattern.to_string(),
})
},
)) as Box<dyn Iterator<Item = Option<String>>>,
None => Box::new(
regex_array
.iter()
.map(|pattern| pattern.map(|pattern| pattern.to_string())),
),
};
array
.iter()
.zip(complete_pattern)
.map(|(value, pattern)| {
match (value, pattern) {
// Required for Postgres compatibility:
// SELECT 'foobarbequebaz' ~ ''); = true
(Some(_), Some(pattern)) if pattern == *"" => {
result.append(true);
}
(Some(value), Some(pattern)) => {
let existing_pattern = patterns.get(&pattern);
let re = match existing_pattern {
Some(re) => re.clone(),
None => {
let re = Regex::new(pattern.as_str()).map_err(|e| {
ArrowError::ComputeError(format!(
"Regular expression did not compile: {:?}",
e
))
})?;
patterns.insert(pattern, re.clone());
re
}
};
result.append(re.is_match(value));
}
_ => result.append(false),
}
Ok(())
})
.collect::<Result<Vec<()>>>()?;
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
array.len(),
None,
null_bit_buffer,
0,
vec![result.finish()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform SQL `array ~ regex_array` operation on [`StringArray`] /
/// [`LargeStringArray`] and a scalar.
///
/// See the documentation on [`regexp_is_match_utf8`] for more details.
pub fn regexp_is_match_utf8_scalar<OffsetSize: OffsetSizeTrait>(
array: &GenericStringArray<OffsetSize>,
regex: &str,
flag: Option<&str>,
) -> Result<BooleanArray> {
let null_bit_buffer = array.data().null_buffer().cloned();
let mut result = BooleanBufferBuilder::new(array.len());
let pattern = match flag {
Some(flag) => format!("(?{}){}", flag, regex),
None => regex.to_string(),
};
if pattern.is_empty() {
result.append_n(array.len(), true);
} else {
let re = Regex::new(pattern.as_str()).map_err(|e| {
ArrowError::ComputeError(format!(
"Regular expression did not compile: {:?}",
e
))
})?;
for i in 0..array.len() {
let value = array.value(i);
result.append(re.is_match(value));
}
}
let buffer = result.finish();
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
array.len(),
None,
null_bit_buffer,
0,
vec![buffer],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`].
pub fn eq_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a == b)
}
fn utf8_empty<OffsetSize: OffsetSizeTrait, const EQ: bool>(
left: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
let null_bit_buffer = left
.data()
.null_buffer()
.map(|b| b.bit_slice(left.offset(), left.len()));
let buffer = unsafe {
MutableBuffer::from_trusted_len_iter_bool(left.value_offsets().windows(2).map(
|offset| {
if EQ {
offset[1].as_usize() == offset[0].as_usize()
} else {
offset[1].as_usize() > offset[0].as_usize()
}
},
))
};
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
null_bit_buffer,
0,
vec![Buffer::from(buffer)],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform `left == right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
pub fn eq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
if right.is_empty() {
return utf8_empty::<_, true>(left);
}
compare_op_scalar(left, |a| a == right)
}
/// Perform `left == right` operation on [`BooleanArray`]
pub fn eq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
compare_op(left, right, |a, b| !(a ^ b))
}
/// Perform `left != right` operation on [`BooleanArray`]
pub fn neq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
compare_op(left, right, |a, b| (a ^ b))
}
/// Perform `left < right` operation on [`BooleanArray`]
pub fn lt_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
compare_op(left, right, |a, b| ((!a) & b))
}
/// Perform `left <= right` operation on [`BooleanArray`]
pub fn lt_eq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
compare_op(left, right, |a, b| !(a & (!b)))
}
/// Perform `left > right` operation on [`BooleanArray`]
pub fn gt_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
compare_op(left, right, |a, b| (a & (!b)))
}
/// Perform `left >= right` operation on [`BooleanArray`]
pub fn gt_eq_bool(left: &BooleanArray, right: &BooleanArray) -> Result<BooleanArray> {
compare_op(left, right, |a, b| !((!a) & b))
}
/// Perform `left == right` operation on [`BooleanArray`] and a scalar
pub fn eq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray> {
let len = left.len();
let left_offset = left.offset();
let values = if right {
left.values().bit_slice(left_offset, len)
} else {
buffer_unary_not(left.values(), left.offset(), left.len())
};
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
len,
None,
left.data_ref()
.null_bitmap()
.as_ref()
.map(|b| b.buffer().bit_slice(left_offset, len)),
0,
vec![values],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Perform `left < right` operation on [`BooleanArray`] and a scalar
pub fn lt_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray> {
compare_op_scalar(left, |a: bool| !a & right)
}
/// Perform `left <= right` operation on [`BooleanArray`] and a scalar
pub fn lt_eq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a <= right)
}
/// Perform `left > right` operation on [`BooleanArray`] and a scalar
pub fn gt_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray> {
compare_op_scalar(left, |a: bool| a & !right)
}
/// Perform `left >= right` operation on [`BooleanArray`] and a scalar
pub fn gt_eq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a >= right)
}
/// Perform `left != right` operation on [`BooleanArray`] and a scalar
pub fn neq_bool_scalar(left: &BooleanArray, right: bool) -> Result<BooleanArray> {
eq_bool_scalar(left, !right)
}
/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
pub fn eq_binary<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &GenericBinaryArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a == b)
}
/// Perform `left == right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar
pub fn eq_binary_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &[u8],
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a == right)
}
/// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
pub fn neq_binary<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &GenericBinaryArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a != b)
}
/// Perform `left != right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
pub fn neq_binary_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &[u8],
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a != right)
}
/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
pub fn lt_binary<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &GenericBinaryArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a < b)
}
/// Perform `left < right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
pub fn lt_binary_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &[u8],
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a < right)
}
/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
pub fn lt_eq_binary<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &GenericBinaryArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a <= b)
}
/// Perform `left <= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
pub fn lt_eq_binary_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &[u8],
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a <= right)
}
/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
pub fn gt_binary<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &GenericBinaryArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a > b)
}
/// Perform `left > right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
pub fn gt_binary_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &[u8],
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a > right)
}
/// Perform `left >= right` operation on [`BinaryArray`] / [`LargeBinaryArray`].
pub fn gt_eq_binary<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &GenericBinaryArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a >= b)
}
/// Perform `left >= right` operation on [`BinaryArray`] / [`LargeBinaryArray`] and a scalar.
pub fn gt_eq_binary_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericBinaryArray<OffsetSize>,
right: &[u8],
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a >= right)
}
/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`].
pub fn neq_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a != b)
}
/// Perform `left != right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
pub fn neq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
if right.is_empty() {
return utf8_empty::<_, false>(left);
}
compare_op_scalar(left, |a| a != right)
}
/// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`].
pub fn lt_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a < b)
}
/// Perform `left < right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
pub fn lt_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a < right)
}
/// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`].
pub fn lt_eq_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a <= b)
}
/// Perform `left <= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
pub fn lt_eq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a <= right)
}
/// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`].
pub fn gt_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a > b)
}
/// Perform `left > right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
pub fn gt_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a > right)
}
/// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`].
pub fn gt_eq_utf8<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &GenericStringArray<OffsetSize>,
) -> Result<BooleanArray> {
compare_op(left, right, |a, b| a >= b)
}
/// Perform `left >= right` operation on [`StringArray`] / [`LargeStringArray`] and a scalar.
pub fn gt_eq_utf8_scalar<OffsetSize: OffsetSizeTrait>(
left: &GenericStringArray<OffsetSize>,
right: &str,
) -> Result<BooleanArray> {
compare_op_scalar(left, |a| a >= right)
}
// Avoids creating a closure for each combination of `$RIGHT` and `$TY`
fn try_to_type_result<T>(value: Option<T>, right: &str, ty: &str) -> Result<T> {
value.ok_or_else(|| {
ArrowError::ComputeError(format!("Could not convert {} with {}", right, ty,))
})
}
/// Calls $RIGHT.$TY() (e.g. `right.to_i128()`) with a nice error message.
/// Type of expression is `Result<.., ArrowError>`
macro_rules! try_to_type {
($RIGHT: expr, $TY: ident) => {
try_to_type_result($RIGHT.$TY(), stringify!($RIGHT), stringify!($TYPE))
};
}
macro_rules! dyn_compare_scalar {
// Applies `LEFT OP RIGHT` when `LEFT` is a `PrimitiveArray`
($LEFT: expr, $RIGHT: expr, $OP: ident) => {{
match $LEFT.data_type() {
DataType::Int8 => {
let right = try_to_type!($RIGHT, to_i8)?;
let left = as_primitive_array::<Int8Type>($LEFT);
$OP::<Int8Type>(left, right)
}
DataType::Int16 => {
let right = try_to_type!($RIGHT, to_i16)?;
let left = as_primitive_array::<Int16Type>($LEFT);
$OP::<Int16Type>(left, right)
}
DataType::Int32 => {
let right = try_to_type!($RIGHT, to_i32)?;
let left = as_primitive_array::<Int32Type>($LEFT);
$OP::<Int32Type>(left, right)
}
DataType::Int64 => {
let right = try_to_type!($RIGHT, to_i64)?;
let left = as_primitive_array::<Int64Type>($LEFT);
$OP::<Int64Type>(left, right)
}
DataType::UInt8 => {
let right = try_to_type!($RIGHT, to_u8)?;
let left = as_primitive_array::<UInt8Type>($LEFT);
$OP::<UInt8Type>(left, right)
}
DataType::UInt16 => {
let right = try_to_type!($RIGHT, to_u16)?;
let left = as_primitive_array::<UInt16Type>($LEFT);
$OP::<UInt16Type>(left, right)
}
DataType::UInt32 => {
let right = try_to_type!($RIGHT, to_u32)?;
let left = as_primitive_array::<UInt32Type>($LEFT);
$OP::<UInt32Type>(left, right)
}
DataType::UInt64 => {
let right = try_to_type!($RIGHT, to_u64)?;
let left = as_primitive_array::<UInt64Type>($LEFT);
$OP::<UInt64Type>(left, right)
}
DataType::Float32 => {
let right = try_to_type!($RIGHT, to_f32)?;
let left = as_primitive_array::<Float32Type>($LEFT);
$OP::<Float32Type>(left, right)
}
DataType::Float64 => {
let right = try_to_type!($RIGHT, to_f64)?;
let left = as_primitive_array::<Float64Type>($LEFT);
$OP::<Float64Type>(left, right)
}
_ => Err(ArrowError::ComputeError(format!(
"Unsupported data type {:?} for comparison {} with {:?}",
$LEFT.data_type(),
stringify!($OP),
$RIGHT
))),
}
}};
// Applies `LEFT OP RIGHT` when `LEFT` is a `DictionaryArray` with keys of type `KT`
($LEFT: expr, $RIGHT: expr, $KT: ident, $OP: ident) => {{
match $KT.as_ref() {
DataType::UInt8 => {
let left = as_dictionary_array::<UInt8Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::UInt16 => {
let left = as_dictionary_array::<UInt16Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::UInt32 => {
let left = as_dictionary_array::<UInt32Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::UInt64 => {
let left = as_dictionary_array::<UInt64Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int8 => {
let left = as_dictionary_array::<Int8Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int16 => {
let left = as_dictionary_array::<Int16Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int32 => {
let left = as_dictionary_array::<Int32Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
DataType::Int64 => {
let left = as_dictionary_array::<Int64Type>($LEFT);
unpack_dict_comparison(left, $OP(left.values(), $RIGHT)?)
}
_ => Err(ArrowError::ComputeError(format!(
"Unsupported dictionary key type {:?}",
$KT.as_ref()
))),
}
}};
}
macro_rules! dyn_compare_utf8_scalar {
($LEFT: expr, $RIGHT: expr, $KT: ident, $OP: ident) => {{
match $KT.as_ref() {
DataType::UInt8 => {
let left = as_dictionary_array::<UInt8Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::UInt16 => {
let left = as_dictionary_array::<UInt16Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::UInt32 => {
let left = as_dictionary_array::<UInt32Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::UInt64 => {
let left = as_dictionary_array::<UInt64Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::Int8 => {
let left = as_dictionary_array::<Int8Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::Int16 => {
let left = as_dictionary_array::<Int16Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::Int32 => {
let left = as_dictionary_array::<Int32Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
DataType::Int64 => {
let left = as_dictionary_array::<Int64Type>($LEFT);
let values = as_string_array(left.values());
unpack_dict_comparison(left, $OP(values, $RIGHT)?)
}
_ => Err(ArrowError::ComputeError(String::from("Unknown key type"))),
}
}};
}
/// Perform `left == right` operation on an array and a numeric scalar
/// value. Supports PrimitiveArrays, and DictionaryArrays that have primitive values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray>
where
T: num::ToPrimitive + std::fmt::Debug,
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
dyn_compare_scalar!(left, right, key_type, eq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, eq_scalar),
}
}
/// Perform `left < right` operation on an array and a numeric scalar
/// value. Supports PrimitiveArrays, and DictionaryArrays that have primitive values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray>
where
T: num::ToPrimitive + std::fmt::Debug,
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
dyn_compare_scalar!(left, right, key_type, lt_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, lt_scalar),
}
}
/// Perform `left <= right` operation on an array and a numeric scalar
/// value. Supports PrimitiveArrays, and DictionaryArrays that have primitive values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray>
where
T: num::ToPrimitive + std::fmt::Debug,
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
dyn_compare_scalar!(left, right, key_type, lt_eq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, lt_eq_scalar),
}
}
/// Perform `left > right` operation on an array and a numeric scalar
/// value. Supports PrimitiveArrays, and DictionaryArrays that have primitive values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray>
where
T: num::ToPrimitive + std::fmt::Debug,
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
dyn_compare_scalar!(left, right, key_type, gt_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, gt_scalar),
}
}
/// Perform `left >= right` operation on an array and a numeric scalar
/// value. Supports PrimitiveArrays, and DictionaryArrays that have primitive values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_eq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray>
where
T: num::ToPrimitive + std::fmt::Debug,
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
dyn_compare_scalar!(left, right, key_type, gt_eq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, gt_eq_scalar),
}
}
/// Perform `left != right` operation on an array and a numeric scalar
/// value. Supports PrimitiveArrays, and DictionaryArrays that have primitive values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn neq_dyn_scalar<T>(left: &dyn Array, right: T) -> Result<BooleanArray>
where
T: num::ToPrimitive + std::fmt::Debug,
{
match left.data_type() {
DataType::Dictionary(key_type, _value_type) => {
dyn_compare_scalar!(left, right, key_type, neq_dyn_scalar)
}
_ => dyn_compare_scalar!(left, right, neq_scalar),
}
}
/// Perform `left == right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
eq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
eq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"eq_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(),
)),
}
}
/// Perform `left != right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn neq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
neq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
neq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"neq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
.to_string(),
)),
}
}
/// Perform `left < right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn lt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
lt_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
lt_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(),
)),
}
}
/// Perform `left <= right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn lt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
lt_eq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
lt_eq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_eq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
.to_string(),
)),
}
}
/// Perform `left > right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn gt_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
gt_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
gt_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_dyn_binary_scalar only supports Binary or LargeBinary arrays".to_string(),
)),
}
}
/// Perform `left >= right` operation on an array and a numeric scalar
/// value. Supports BinaryArray and LargeBinaryArray
pub fn gt_eq_dyn_binary_scalar(left: &dyn Array, right: &[u8]) -> Result<BooleanArray> {
match left.data_type() {
DataType::Binary => {
let left = as_generic_binary_array::<i32>(left);
gt_eq_binary_scalar(left, right)
}
DataType::LargeBinary => {
let left = as_generic_binary_array::<i64>(left);
gt_eq_binary_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_eq_dyn_binary_scalar only supports Binary or LargeBinary arrays"
.to_string(),
)),
}
}
/// Perform `left == right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Utf8 | DataType::LargeUtf8 => {
dyn_compare_utf8_scalar!(left, right, key_type, eq_utf8_scalar)
}
_ => Err(ArrowError::ComputeError(
"eq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
)),
},
DataType::Utf8 => {
let left = as_string_array(left);
eq_utf8_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = as_largestring_array(left);
eq_utf8_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"eq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays".to_string(),
)),
};
result
}
/// Perform `left < right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn lt_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Utf8 | DataType::LargeUtf8 => {
dyn_compare_utf8_scalar!(left, right, key_type, lt_utf8_scalar)
}
_ => Err(ArrowError::ComputeError(
"lt_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
)),
},
DataType::Utf8 => {
let left = as_string_array(left);
lt_utf8_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = as_largestring_array(left);
lt_utf8_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays".to_string(),
)),
};
result
}
/// Perform `left >= right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn gt_eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Utf8 | DataType::LargeUtf8 => {
dyn_compare_utf8_scalar!(left, right, key_type, gt_eq_utf8_scalar)
}
_ => Err(ArrowError::ComputeError(
"gt_eq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
)),
},
DataType::Utf8 => {
let left = as_string_array(left);
gt_eq_utf8_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = as_largestring_array(left);
gt_eq_utf8_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_eq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays".to_string(),
)),
};
result
}
/// Perform `left <= right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn lt_eq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Utf8 | DataType::LargeUtf8 => {
dyn_compare_utf8_scalar!(left, right, key_type, lt_eq_utf8_scalar)
}
_ => Err(ArrowError::ComputeError(
"lt_eq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
)),
},
DataType::Utf8 => {
let left = as_string_array(left);
lt_eq_utf8_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = as_largestring_array(left);
lt_eq_utf8_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_eq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays".to_string(),
)),
};
result
}
/// Perform `left > right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn gt_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Utf8 | DataType::LargeUtf8 => {
dyn_compare_utf8_scalar!(left, right, key_type, gt_utf8_scalar)
}
_ => Err(ArrowError::ComputeError(
"gt_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
)),
},
DataType::Utf8 => {
let left = as_string_array(left);
gt_utf8_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = as_largestring_array(left);
gt_utf8_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays".to_string(),
)),
};
result
}
/// Perform `left != right` operation on an array and a numeric scalar
/// value. Supports StringArrays, and DictionaryArrays that have string values
pub fn neq_dyn_utf8_scalar(left: &dyn Array, right: &str) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Dictionary(key_type, value_type) => match value_type.as_ref() {
DataType::Utf8 | DataType::LargeUtf8 => {
dyn_compare_utf8_scalar!(left, right, key_type, neq_utf8_scalar)
}
_ => Err(ArrowError::ComputeError(
"neq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays or DictionaryArray with Utf8 or LargeUtf8 values".to_string(),
)),
},
DataType::Utf8 => {
let left = as_string_array(left);
neq_utf8_scalar(left, right)
}
DataType::LargeUtf8 => {
let left = as_largestring_array(left);
neq_utf8_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"neq_dyn_utf8_scalar only supports Utf8 or LargeUtf8 arrays".to_string(),
)),
};
result
}
/// Perform `left == right` operation on an array and a numeric scalar
/// value.
pub fn eq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Boolean => {
let left = as_boolean_array(left);
eq_bool_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"eq_dyn_bool_scalar only supports BooleanArray".to_string(),
)),
};
result
}
/// Perform `left < right` operation on an array and a numeric scalar
/// value. Supports BooleanArrays.
pub fn lt_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Boolean => {
let left = as_boolean_array(left);
lt_bool_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_dyn_bool_scalar only supports BooleanArray".to_string(),
)),
};
result
}
/// Perform `left > right` operation on an array and a numeric scalar
/// value. Supports BooleanArrays.
pub fn gt_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Boolean => {
let left = as_boolean_array(left);
gt_bool_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_dyn_bool_scalar only supports BooleanArray".to_string(),
)),
};
result
}
/// Perform `left <= right` operation on an array and a numeric scalar
/// value. Supports BooleanArrays.
pub fn lt_eq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Boolean => {
let left = as_boolean_array(left);
lt_eq_bool_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"lt_eq_dyn_bool_scalar only supports BooleanArray".to_string(),
)),
};
result
}
/// Perform `left >= right` operation on an array and a numeric scalar
/// value. Supports BooleanArrays.
pub fn gt_eq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Boolean => {
let left = as_boolean_array(left);
gt_eq_bool_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"gt_eq_dyn_bool_scalar only supports BooleanArray".to_string(),
)),
};
result
}
/// Perform `left != right` operation on an array and a numeric scalar
/// value. Supports BooleanArrays.
pub fn neq_dyn_bool_scalar(left: &dyn Array, right: bool) -> Result<BooleanArray> {
let result = match left.data_type() {
DataType::Boolean => {
let left = as_boolean_array(left);
neq_bool_scalar(left, right)
}
_ => Err(ArrowError::ComputeError(
"neq_dyn_bool_scalar only supports BooleanArray".to_string(),
)),
};
result
}
/// unpacks the results of comparing left.values (as a boolean)
///
/// TODO add example
///
fn unpack_dict_comparison<K>(
dict: &DictionaryArray<K>,
dict_comparison: BooleanArray,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
{
assert_eq!(dict_comparison.len(), dict.values().len());
let result: BooleanArray = dict
.keys()
.iter()
.map(|key| {
key.map(|key| unsafe {
let key = key.as_usize();
dict_comparison.value_unchecked(key)
})
})
.collect();
Ok(result)
}
/// Helper function to perform boolean lambda function on values from two arrays using
/// SIMD.
#[cfg(feature = "simd")]
fn simd_compare_op<T, SI, SC>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
simd_op: SI,
scalar_op: SC,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
SI: Fn(T::Simd, T::Simd) -> T::SimdMask,
SC: Fn(T::Native, T::Native) -> bool,
{
use std::borrow::BorrowMut;
let len = left.len();
if len != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let null_bit_buffer =
combine_option_bitmap(&[left.data_ref(), right.data_ref()], len)?;
// we process the data in chunks so that each iteration results in one u64 of comparison result bits
const CHUNK_SIZE: usize = 64;
let lanes = T::lanes();
// this is currently the case for all our datatypes and allows us to always append full bytes
assert!(
lanes <= CHUNK_SIZE,
"Number of vector lanes must be at most 64"
);
let buffer_size = bit_util::ceil(len, 8);
let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
let mut left_chunks = left.values().chunks_exact(CHUNK_SIZE);
let mut right_chunks = right.values().chunks_exact(CHUNK_SIZE);
let result_chunks = result.typed_data_mut();
let result_remainder = left_chunks
.borrow_mut()
.zip(right_chunks.borrow_mut())
.fold(result_chunks, |result_slice, (left_slice, right_slice)| {
let mut i = 0;
let mut bitmask = 0_u64;
while i < CHUNK_SIZE {
let simd_left = T::load(&left_slice[i..]);
let simd_right = T::load(&right_slice[i..]);
let simd_result = simd_op(simd_left, simd_right);
let m = T::mask_to_u64(&simd_result);
bitmask |= m << i;
i += lanes;
}
let bytes = bitmask.to_le_bytes();
result_slice[0..8].copy_from_slice(&bytes);
&mut result_slice[8..]
});
let left_remainder = left_chunks.remainder();
let right_remainder = right_chunks.remainder();
assert_eq!(left_remainder.len(), right_remainder.len());
if !left_remainder.is_empty() {
let remainder_bitmask = left_remainder
.iter()
.zip(right_remainder.iter())
.enumerate()
.fold(0_u64, |mut mask, (i, (scalar_left, scalar_right))| {
let bit = scalar_op(*scalar_left, *scalar_right) as u64;
mask |= bit << i;
mask
});
let remainder_mask_as_bytes =
&remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
result_remainder.copy_from_slice(remainder_mask_as_bytes);
}
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
len,
None,
null_bit_buffer,
0,
vec![result.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Helper function to perform boolean lambda function on values from an array and a scalar value using
/// SIMD.
#[cfg(feature = "simd")]
fn simd_compare_op_scalar<T, SI, SC>(
left: &PrimitiveArray<T>,
right: T::Native,
simd_op: SI,
scalar_op: SC,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
SI: Fn(T::Simd, T::Simd) -> T::SimdMask,
SC: Fn(T::Native, T::Native) -> bool,
{
use std::borrow::BorrowMut;
let len = left.len();
// we process the data in chunks so that each iteration results in one u64 of comparison result bits
const CHUNK_SIZE: usize = 64;
let lanes = T::lanes();
// this is currently the case for all our datatypes and allows us to always append full bytes
assert!(
lanes <= CHUNK_SIZE,
"Number of vector lanes must be at most 64"
);
let buffer_size = bit_util::ceil(len, 8);
let mut result = MutableBuffer::new(buffer_size).with_bitset(buffer_size, false);
let mut left_chunks = left.values().chunks_exact(CHUNK_SIZE);
let simd_right = T::init(right);
let result_chunks = result.typed_data_mut();
let result_remainder =
left_chunks
.borrow_mut()
.fold(result_chunks, |result_slice, left_slice| {
let mut i = 0;
let mut bitmask = 0_u64;
while i < CHUNK_SIZE {
let simd_left = T::load(&left_slice[i..]);
let simd_result = simd_op(simd_left, simd_right);
let m = T::mask_to_u64(&simd_result);
bitmask |= m << i;
i += lanes;
}
let bytes = bitmask.to_le_bytes();
result_slice[0..8].copy_from_slice(&bytes);
&mut result_slice[8..]
});
let left_remainder = left_chunks.remainder();
if !left_remainder.is_empty() {
let remainder_bitmask = left_remainder.iter().enumerate().fold(
0_u64,
|mut mask, (i, scalar_left)| {
let bit = scalar_op(*scalar_left, right) as u64;
mask |= bit << i;
mask
},
);
let remainder_mask_as_bytes =
&remainder_bitmask.to_le_bytes()[0..bit_util::ceil(left_remainder.len(), 8)];
result_remainder.copy_from_slice(remainder_mask_as_bytes);
}
let null_bit_buffer = left
.data_ref()
.null_buffer()
.map(|b| b.bit_slice(left.offset(), left.len()));
// null count is the same as in the input since the right side of the scalar comparison cannot be null
let null_count = left.null_count();
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
len,
Some(null_count),
null_bit_buffer,
0,
vec![result.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
fn cmp_primitive_array<T: ArrowNumericType, F>(
left: &dyn Array,
right: &dyn Array,
op: F,
) -> Result<BooleanArray>
where
F: Fn(T::Native, T::Native) -> bool,
{
let left_array = as_primitive_array::<T>(left);
let right_array = as_primitive_array::<T>(right);
compare_op(left_array, right_array, op)
}
#[cfg(feature = "dyn_cmp_dict")]
macro_rules! typed_dict_non_dict_cmp {
($LEFT: expr, $RIGHT: expr, $LEFT_KEY_TYPE: expr, $RIGHT_TYPE: tt, $OP_BOOL: expr, $OP: expr) => {{
match $LEFT_KEY_TYPE {
DataType::Int8 => {
let left = as_dictionary_array::<Int8Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::Int16 => {
let left = as_dictionary_array::<Int16Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::Int32 => {
let left = as_dictionary_array::<Int32Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::Int64 => {
let left = as_dictionary_array::<Int64Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt8 => {
let left = as_dictionary_array::<UInt8Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt16 => {
let left = as_dictionary_array::<UInt16Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt32 => {
let left = as_dictionary_array::<UInt32Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt64 => {
let left = as_dictionary_array::<UInt64Type>($LEFT);
cmp_dict_primitive::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
t => Err(ArrowError::NotYetImplemented(format!(
"Cannot compare dictionary array of key type {}",
t
))),
}
}};
}
#[cfg(feature = "dyn_cmp_dict")]
macro_rules! typed_dict_string_array_cmp {
($LEFT: expr, $RIGHT: expr, $LEFT_KEY_TYPE: expr, $RIGHT_TYPE: tt, $OP: expr) => {{
match $LEFT_KEY_TYPE {
DataType::Int8 => {
let left = as_dictionary_array::<Int8Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::Int16 => {
let left = as_dictionary_array::<Int16Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::Int32 => {
let left = as_dictionary_array::<Int32Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::Int64 => {
let left = as_dictionary_array::<Int64Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt8 => {
let left = as_dictionary_array::<UInt8Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt16 => {
let left = as_dictionary_array::<UInt16Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt32 => {
let left = as_dictionary_array::<UInt32Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
DataType::UInt64 => {
let left = as_dictionary_array::<UInt64Type>($LEFT);
cmp_dict_string_array::<_, $RIGHT_TYPE, _>(left, $RIGHT, $OP)
}
t => Err(ArrowError::NotYetImplemented(format!(
"Cannot compare dictionary array of key type {}",
t
))),
}
}};
}
#[cfg(feature = "dyn_cmp_dict")]
macro_rules! typed_cmp_dict_non_dict {
($LEFT: expr, $RIGHT: expr, $OP_BOOL: expr, $OP: expr, $OP_FLOAT: expr) => {{
match ($LEFT.data_type(), $RIGHT.data_type()) {
(DataType::Dictionary(left_key_type, left_value_type), right_type) => {
match (left_value_type.as_ref(), right_type) {
(DataType::Boolean, DataType::Boolean) => {
let left = $LEFT;
downcast_dictionary_array!(
left => {
cmp_dict_boolean_array::<_, _>(left, $RIGHT, $OP)
}
_ => Err(ArrowError::NotYetImplemented(format!(
"Cannot compare dictionary array of key type {}",
left_key_type.as_ref()
))),
)
}
(DataType::Int8, DataType::Int8) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), Int8Type, $OP_BOOL, $OP)
}
(DataType::Int16, DataType::Int16) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), Int16Type, $OP_BOOL, $OP)
}
(DataType::Int32, DataType::Int32) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), Int32Type, $OP_BOOL, $OP)
}
(DataType::Int64, DataType::Int64) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), Int64Type, $OP_BOOL, $OP)
}
(DataType::UInt8, DataType::UInt8) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), UInt8Type, $OP_BOOL, $OP)
}
(DataType::UInt16, DataType::UInt16) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), UInt16Type, $OP_BOOL, $OP)
}
(DataType::UInt32, DataType::UInt32) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), UInt32Type, $OP_BOOL, $OP)
}
(DataType::UInt64, DataType::UInt64) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), UInt64Type, $OP_BOOL, $OP)
}
(DataType::Float32, DataType::Float32) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), Float32Type, $OP_BOOL, $OP_FLOAT)
}
(DataType::Float64, DataType::Float64) => {
typed_dict_non_dict_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), Float64Type, $OP_BOOL, $OP_FLOAT)
}
(DataType::Utf8, DataType::Utf8) => {
typed_dict_string_array_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), i32, $OP)
}
(DataType::LargeUtf8, DataType::LargeUtf8) => {
typed_dict_string_array_cmp!($LEFT, $RIGHT, left_key_type.as_ref(), i64, $OP)
}
(DataType::Binary, DataType::Binary) => {
let left = $LEFT;
downcast_dictionary_array!(
left => {
cmp_dict_binary_array::<_, i32, _>(left, $RIGHT, $OP)
}
_ => Err(ArrowError::NotYetImplemented(format!(
"Cannot compare dictionary array of key type {}",
left_key_type.as_ref()
))),
)
}
(DataType::LargeBinary, DataType::LargeBinary) => {
let left = $LEFT;
downcast_dictionary_array!(
left => {
cmp_dict_binary_array::<_, i64, _>(left, $RIGHT, $OP)
}
_ => Err(ArrowError::NotYetImplemented(format!(
"Cannot compare dictionary array of key type {}",
left_key_type.as_ref()
))),
)
}
(t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
"Comparing dictionary array of type {} with array of type {} is not yet implemented",
t1, t2
))),
(t1, t2) => Err(ArrowError::CastError(format!(
"Cannot compare dictionary array with array of different value types ({} and {})",
t1, t2
))),
}
}
_ => unreachable!("Should not reach this branch"),
}
}};
}
#[cfg(not(feature = "dyn_cmp_dict"))]
macro_rules! typed_cmp_dict_non_dict {
($LEFT: expr, $RIGHT: expr, $OP_BOOL: expr, $OP: expr, $OP_FLOAT: expr) => {{
Err(ArrowError::CastError(format!(
"Comparing dictionary array of type {} with array of type {} requires \"dyn_cmp_dict\" feature",
$LEFT.data_type(), $RIGHT.data_type()
)))
}}
}
macro_rules! typed_compares {
($LEFT: expr, $RIGHT: expr, $OP_BOOL: expr, $OP: expr, $OP_FLOAT: expr) => {{
match ($LEFT.data_type(), $RIGHT.data_type()) {
(DataType::Boolean, DataType::Boolean) => {
compare_op(as_boolean_array($LEFT), as_boolean_array($RIGHT), $OP_BOOL)
}
(DataType::Int8, DataType::Int8) => {
cmp_primitive_array::<Int8Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Int16, DataType::Int16) => {
cmp_primitive_array::<Int16Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Int32, DataType::Int32) => {
cmp_primitive_array::<Int32Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Int64, DataType::Int64) => {
cmp_primitive_array::<Int64Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt8, DataType::UInt8) => {
cmp_primitive_array::<UInt8Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt16, DataType::UInt16) => {
cmp_primitive_array::<UInt16Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt32, DataType::UInt32) => {
cmp_primitive_array::<UInt32Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt64, DataType::UInt64) => {
cmp_primitive_array::<UInt64Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Float32, DataType::Float32) => {
cmp_primitive_array::<Float32Type, _>($LEFT, $RIGHT, $OP_FLOAT)
}
(DataType::Float64, DataType::Float64) => {
cmp_primitive_array::<Float64Type, _>($LEFT, $RIGHT, $OP_FLOAT)
}
(DataType::Utf8, DataType::Utf8) => {
compare_op(as_string_array($LEFT), as_string_array($RIGHT), $OP)
}
(DataType::LargeUtf8, DataType::LargeUtf8) => compare_op(
as_largestring_array($LEFT),
as_largestring_array($RIGHT),
$OP,
),
(DataType::Binary, DataType::Binary) => compare_op(
as_generic_binary_array::<i32>($LEFT),
as_generic_binary_array::<i32>($RIGHT),
$OP,
),
(DataType::LargeBinary, DataType::LargeBinary) => compare_op(
as_generic_binary_array::<i64>($LEFT),
as_generic_binary_array::<i64>($RIGHT),
$OP,
),
(
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
) => cmp_primitive_array::<TimestampNanosecondType, _>($LEFT, $RIGHT, $OP),
(
DataType::Timestamp(TimeUnit::Microsecond, _),
DataType::Timestamp(TimeUnit::Microsecond, _),
) => cmp_primitive_array::<TimestampMicrosecondType, _>($LEFT, $RIGHT, $OP),
(
DataType::Timestamp(TimeUnit::Millisecond, _),
DataType::Timestamp(TimeUnit::Millisecond, _),
) => cmp_primitive_array::<TimestampMillisecondType, _>($LEFT, $RIGHT, $OP),
(
DataType::Timestamp(TimeUnit::Second, _),
DataType::Timestamp(TimeUnit::Second, _),
) => cmp_primitive_array::<TimestampSecondType, _>($LEFT, $RIGHT, $OP),
(DataType::Date32, DataType::Date32) => {
cmp_primitive_array::<Date32Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Date64, DataType::Date64) => {
cmp_primitive_array::<Date64Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Time32(TimeUnit::Second), DataType::Time32(TimeUnit::Second)) => {
cmp_primitive_array::<Time32SecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Time32(TimeUnit::Millisecond),
DataType::Time32(TimeUnit::Millisecond),
) => cmp_primitive_array::<Time32MillisecondType, _>($LEFT, $RIGHT, $OP),
(
DataType::Time64(TimeUnit::Microsecond),
DataType::Time64(TimeUnit::Microsecond),
) => cmp_primitive_array::<Time64MicrosecondType, _>($LEFT, $RIGHT, $OP),
(
DataType::Time64(TimeUnit::Nanosecond),
DataType::Time64(TimeUnit::Nanosecond),
) => cmp_primitive_array::<Time64NanosecondType, _>($LEFT, $RIGHT, $OP),
(
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::YearMonth),
) => cmp_primitive_array::<IntervalYearMonthType, _>($LEFT, $RIGHT, $OP),
(
DataType::Interval(IntervalUnit::DayTime),
DataType::Interval(IntervalUnit::DayTime),
) => cmp_primitive_array::<IntervalDayTimeType, _>($LEFT, $RIGHT, $OP),
(
DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::MonthDayNano),
) => cmp_primitive_array::<IntervalMonthDayNanoType, _>($LEFT, $RIGHT, $OP),
(t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
"Comparing arrays of type {} is not yet implemented",
t1
))),
(t1, t2) => Err(ArrowError::CastError(format!(
"Cannot compare two arrays of different types ({} and {})",
t1, t2
))),
}
}};
}
/// Applies $OP to $LEFT and $RIGHT which are two dictionaries which have (the same) key type $KT
#[cfg(feature = "dyn_cmp_dict")]
macro_rules! typed_dict_cmp {
($LEFT: expr, $RIGHT: expr, $OP: expr, $OP_FLOAT: expr, $OP_BOOL: expr, $KT: tt) => {{
match ($LEFT.value_type(), $RIGHT.value_type()) {
(DataType::Boolean, DataType::Boolean) => {
cmp_dict_bool::<$KT, _>($LEFT, $RIGHT, $OP_BOOL)
}
(DataType::Int8, DataType::Int8) => {
cmp_dict::<$KT, Int8Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Int16, DataType::Int16) => {
cmp_dict::<$KT, Int16Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Int32, DataType::Int32) => {
cmp_dict::<$KT, Int32Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Int64, DataType::Int64) => {
cmp_dict::<$KT, Int64Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt8, DataType::UInt8) => {
cmp_dict::<$KT, UInt8Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt16, DataType::UInt16) => {
cmp_dict::<$KT, UInt16Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt32, DataType::UInt32) => {
cmp_dict::<$KT, UInt32Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::UInt64, DataType::UInt64) => {
cmp_dict::<$KT, UInt64Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Float32, DataType::Float32) => {
cmp_dict::<$KT, Float32Type, _>($LEFT, $RIGHT, $OP_FLOAT)
}
(DataType::Float64, DataType::Float64) => {
cmp_dict::<$KT, Float64Type, _>($LEFT, $RIGHT, $OP_FLOAT)
}
(DataType::Utf8, DataType::Utf8) => {
cmp_dict_utf8::<$KT, i32, _>($LEFT, $RIGHT, $OP)
}
(DataType::LargeUtf8, DataType::LargeUtf8) => {
cmp_dict_utf8::<$KT, i64, _>($LEFT, $RIGHT, $OP)
}
(DataType::Binary, DataType::Binary) => {
cmp_dict_binary::<$KT, i32, _>($LEFT, $RIGHT, $OP)
}
(DataType::LargeBinary, DataType::LargeBinary) => {
cmp_dict_binary::<$KT, i64, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Timestamp(TimeUnit::Nanosecond, _),
DataType::Timestamp(TimeUnit::Nanosecond, _),
) => {
cmp_dict::<$KT, TimestampNanosecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Timestamp(TimeUnit::Microsecond, _),
DataType::Timestamp(TimeUnit::Microsecond, _),
) => {
cmp_dict::<$KT, TimestampMicrosecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Timestamp(TimeUnit::Millisecond, _),
DataType::Timestamp(TimeUnit::Millisecond, _),
) => {
cmp_dict::<$KT, TimestampMillisecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Timestamp(TimeUnit::Second, _),
DataType::Timestamp(TimeUnit::Second, _),
) => {
cmp_dict::<$KT, TimestampSecondType, _>($LEFT, $RIGHT, $OP)
}
(DataType::Date32, DataType::Date32) => {
cmp_dict::<$KT, Date32Type, _>($LEFT, $RIGHT, $OP)
}
(DataType::Date64, DataType::Date64) => {
cmp_dict::<$KT, Date64Type, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Time32(TimeUnit::Second),
DataType::Time32(TimeUnit::Second),
) => {
cmp_dict::<$KT, Time32SecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Time32(TimeUnit::Millisecond),
DataType::Time32(TimeUnit::Millisecond),
) => {
cmp_dict::<$KT, Time32MillisecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Time64(TimeUnit::Microsecond),
DataType::Time64(TimeUnit::Microsecond),
) => {
cmp_dict::<$KT, Time64MicrosecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Time64(TimeUnit::Nanosecond),
DataType::Time64(TimeUnit::Nanosecond),
) => {
cmp_dict::<$KT, Time64NanosecondType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Interval(IntervalUnit::YearMonth),
DataType::Interval(IntervalUnit::YearMonth),
) => {
cmp_dict::<$KT, IntervalYearMonthType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Interval(IntervalUnit::DayTime),
DataType::Interval(IntervalUnit::DayTime),
) => {
cmp_dict::<$KT, IntervalDayTimeType, _>($LEFT, $RIGHT, $OP)
}
(
DataType::Interval(IntervalUnit::MonthDayNano),
DataType::Interval(IntervalUnit::MonthDayNano),
) => {
cmp_dict::<$KT, IntervalMonthDayNanoType, _>($LEFT, $RIGHT, $OP)
}
(t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
"Comparing dictionary arrays of value type {} is not yet implemented",
t1
))),
(t1, t2) => Err(ArrowError::CastError(format!(
"Cannot compare two dictionary arrays of different value types ({} and {})",
t1, t2
))),
}
}};
}
#[cfg(feature = "dyn_cmp_dict")]
macro_rules! typed_dict_compares {
// Applies `LEFT OP RIGHT` when `LEFT` and `RIGHT` both are `DictionaryArray`
($LEFT: expr, $RIGHT: expr, $OP: expr, $OP_FLOAT: expr, $OP_BOOL: expr) => {{
match ($LEFT.data_type(), $RIGHT.data_type()) {
(DataType::Dictionary(left_key_type, _), DataType::Dictionary(right_key_type, _))=> {
match (left_key_type.as_ref(), right_key_type.as_ref()) {
(DataType::Int8, DataType::Int8) => {
let left = as_dictionary_array::<Int8Type>($LEFT);
let right = as_dictionary_array::<Int8Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, Int8Type)
}
(DataType::Int16, DataType::Int16) => {
let left = as_dictionary_array::<Int16Type>($LEFT);
let right = as_dictionary_array::<Int16Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, Int16Type)
}
(DataType::Int32, DataType::Int32) => {
let left = as_dictionary_array::<Int32Type>($LEFT);
let right = as_dictionary_array::<Int32Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, Int32Type)
}
(DataType::Int64, DataType::Int64) => {
let left = as_dictionary_array::<Int64Type>($LEFT);
let right = as_dictionary_array::<Int64Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, Int64Type)
}
(DataType::UInt8, DataType::UInt8) => {
let left = as_dictionary_array::<UInt8Type>($LEFT);
let right = as_dictionary_array::<UInt8Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, UInt8Type)
}
(DataType::UInt16, DataType::UInt16) => {
let left = as_dictionary_array::<UInt16Type>($LEFT);
let right = as_dictionary_array::<UInt16Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, UInt16Type)
}
(DataType::UInt32, DataType::UInt32) => {
let left = as_dictionary_array::<UInt32Type>($LEFT);
let right = as_dictionary_array::<UInt32Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, UInt32Type)
}
(DataType::UInt64, DataType::UInt64) => {
let left = as_dictionary_array::<UInt64Type>($LEFT);
let right = as_dictionary_array::<UInt64Type>($RIGHT);
typed_dict_cmp!(left, right, $OP, $OP_FLOAT, $OP_BOOL, UInt64Type)
}
(t1, t2) if t1 == t2 => Err(ArrowError::NotYetImplemented(format!(
"Comparing dictionary arrays of type {} is not yet implemented",
t1
))),
(t1, t2) => Err(ArrowError::CastError(format!(
"Cannot compare two dictionary arrays of different key types ({} and {})",
t1, t2
))),
}
}
(t1, t2) => Err(ArrowError::CastError(format!(
"Cannot compare dictionary array with non-dictionary array ({} and {})",
t1, t2
))),
}
}};
}
#[cfg(not(feature = "dyn_cmp_dict"))]
macro_rules! typed_dict_compares {
($LEFT: expr, $RIGHT: expr, $OP: expr, $OP_FLOAT: expr, $OP_BOOL: expr) => {{
Err(ArrowError::CastError(format!(
"Comparing array of type {} with array of type {} requires \"dyn_cmp_dict\" feature",
$LEFT.data_type(), $RIGHT.data_type()
)))
}}
}
/// Perform given operation on `DictionaryArray` and `PrimitiveArray`. The value
/// type of `DictionaryArray` is same as `PrimitiveArray`'s type.
#[cfg(feature = "dyn_cmp_dict")]
fn cmp_dict_primitive<K, T, F>(
left: &DictionaryArray<K>,
right: &dyn Array,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
T: ArrowNumericType + Sync + Send,
F: Fn(T::Native, T::Native) -> bool,
{
compare_op(
left.downcast_dict::<PrimitiveArray<T>>().unwrap(),
as_primitive_array::<T>(right),
op,
)
}
/// Perform given operation on `DictionaryArray` and `GenericStringArray`. The value
/// type of `DictionaryArray` is same as `GenericStringArray`'s type.
#[cfg(feature = "dyn_cmp_dict")]
fn cmp_dict_string_array<K, OffsetSize: OffsetSizeTrait, F>(
left: &DictionaryArray<K>,
right: &dyn Array,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
F: Fn(&str, &str) -> bool,
{
compare_op(
left.downcast_dict::<GenericStringArray<OffsetSize>>()
.unwrap(),
right
.as_any()
.downcast_ref::<GenericStringArray<OffsetSize>>()
.unwrap(),
op,
)
}
/// Perform given operation on `DictionaryArray` and `BooleanArray`. The value
/// type of `DictionaryArray` is same as `BooleanArray`'s type.
#[cfg(feature = "dyn_cmp_dict")]
fn cmp_dict_boolean_array<K, F>(
left: &DictionaryArray<K>,
right: &dyn Array,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
F: Fn(bool, bool) -> bool,
{
compare_op(
left.downcast_dict::<BooleanArray>().unwrap(),
right.as_any().downcast_ref::<BooleanArray>().unwrap(),
op,
)
}
/// Perform given operation on `DictionaryArray` and `GenericBinaryArray`. The value
/// type of `DictionaryArray` is same as `GenericBinaryArray`'s type.
#[cfg(feature = "dyn_cmp_dict")]
fn cmp_dict_binary_array<K, OffsetSize: OffsetSizeTrait, F>(
left: &DictionaryArray<K>,
right: &dyn Array,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
F: Fn(&[u8], &[u8]) -> bool,
{
compare_op(
left.downcast_dict::<GenericBinaryArray<OffsetSize>>()
.unwrap(),
right
.as_any()
.downcast_ref::<GenericBinaryArray<OffsetSize>>()
.unwrap(),
op,
)
}
/// Perform given operation on two `DictionaryArray`s which value type is
/// primitive type. Returns an error if the two arrays have different value
/// type
#[cfg(feature = "dyn_cmp_dict")]
pub fn cmp_dict<K, T, F>(
left: &DictionaryArray<K>,
right: &DictionaryArray<K>,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
T: ArrowNumericType + Sync + Send,
F: Fn(T::Native, T::Native) -> bool,
{
compare_op(
left.downcast_dict::<PrimitiveArray<T>>().unwrap(),
right.downcast_dict::<PrimitiveArray<T>>().unwrap(),
op,
)
}
/// Perform the given operation on two `DictionaryArray`s which value type is
/// `DataType::Boolean`.
#[cfg(feature = "dyn_cmp_dict")]
pub fn cmp_dict_bool<K, F>(
left: &DictionaryArray<K>,
right: &DictionaryArray<K>,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
F: Fn(bool, bool) -> bool,
{
compare_op(
left.downcast_dict::<BooleanArray>().unwrap(),
right.downcast_dict::<BooleanArray>().unwrap(),
op,
)
}
/// Perform the given operation on two `DictionaryArray`s which value type is
/// `DataType::Utf8` or `DataType::LargeUtf8`.
#[cfg(feature = "dyn_cmp_dict")]
pub fn cmp_dict_utf8<K, OffsetSize: OffsetSizeTrait, F>(
left: &DictionaryArray<K>,
right: &DictionaryArray<K>,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
F: Fn(&str, &str) -> bool,
{
compare_op(
left.downcast_dict::<GenericStringArray<OffsetSize>>()
.unwrap(),
right
.downcast_dict::<GenericStringArray<OffsetSize>>()
.unwrap(),
op,
)
}
/// Perform the given operation on two `DictionaryArray`s which value type is
/// `DataType::Binary` or `DataType::LargeBinary`.
#[cfg(feature = "dyn_cmp_dict")]
pub fn cmp_dict_binary<K, OffsetSize: OffsetSizeTrait, F>(
left: &DictionaryArray<K>,
right: &DictionaryArray<K>,
op: F,
) -> Result<BooleanArray>
where
K: ArrowNumericType,
F: Fn(&[u8], &[u8]) -> bool,
{
compare_op(
left.downcast_dict::<GenericBinaryArray<OffsetSize>>()
.unwrap(),
right
.downcast_dict::<GenericBinaryArray<OffsetSize>>()
.unwrap(),
op,
)
}
/// Perform `left == right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
/// ```
/// use arrow::array::{StringArray, BooleanArray};
/// use arrow::compute::eq_dyn;
/// let array1 = StringArray::from(vec![Some("foo"), None, Some("bar")]);
/// let array2 = StringArray::from(vec![Some("foo"), None, Some("baz")]);
/// let result = eq_dyn(&array1, &array2).unwrap();
/// assert_eq!(BooleanArray::from(vec![Some(true), None, Some(false)]), result);
/// ```
pub fn eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
match left.data_type() {
DataType::Dictionary(_, _)
if matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_dict_compares!(
left,
right,
|a, b| a == b,
|a, b| a.total_cmp(&b).is_eq(),
|a, b| a == b
)
}
DataType::Dictionary(_, _)
if !matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_cmp_dict_non_dict!(left, right, |a, b| a == b, |a, b| a == b, |a, b| a
.total_cmp(&b)
.is_eq())
}
_ if matches!(right.data_type(), DataType::Dictionary(_, _)) => {
typed_cmp_dict_non_dict!(right, left, |a, b| a == b, |a, b| a == b, |a, b| a
.total_cmp(&b)
.is_eq())
}
_ => {
typed_compares!(left, right, |a, b| !(a ^ b), |a, b| a == b, |a, b| a
.total_cmp(&b)
.is_eq())
}
}
}
/// Perform `left != right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
/// ```
/// use arrow::array::{BinaryArray, BooleanArray};
/// use arrow::compute::neq_dyn;
/// let values1: Vec<Option<&[u8]>> = vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36])];
/// let values2: Vec<Option<&[u8]>> = vec![Some(&[0xfc, 0xa9]), None, Some(&[0x36, 0x00])];
/// let array1 = BinaryArray::from(values1);
/// let array2 = BinaryArray::from(values2);
/// let result = neq_dyn(&array1, &array2).unwrap();
/// assert_eq!(BooleanArray::from(vec![Some(false), None, Some(true)]), result);
/// ```
pub fn neq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
match left.data_type() {
DataType::Dictionary(_, _)
if matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_dict_compares!(
left,
right,
|a, b| a != b,
|a, b| a.total_cmp(&b).is_ne(),
|a, b| a != b
)
}
DataType::Dictionary(_, _)
if !matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_cmp_dict_non_dict!(left, right, |a, b| a != b, |a, b| a != b, |a, b| a
.total_cmp(&b)
.is_ne())
}
_ if matches!(right.data_type(), DataType::Dictionary(_, _)) => {
typed_cmp_dict_non_dict!(right, left, |a, b| a != b, |a, b| a != b, |a, b| a
.total_cmp(&b)
.is_ne())
}
_ => {
typed_compares!(left, right, |a, b| (a ^ b), |a, b| a != b, |a, b| a
.total_cmp(&b)
.is_ne())
}
}
}
/// Perform `left < right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
/// ```
/// use arrow::array::{PrimitiveArray, BooleanArray};
/// use arrow::datatypes::Int32Type;
/// use arrow::compute::lt_dyn;
/// let array1: PrimitiveArray<Int32Type> = PrimitiveArray::from(vec![Some(0), Some(1), Some(2)]);
/// let array2: PrimitiveArray<Int32Type> = PrimitiveArray::from(vec![Some(1), Some(1), None]);
/// let result = lt_dyn(&array1, &array2).unwrap();
/// assert_eq!(BooleanArray::from(vec![Some(true), Some(false), None]), result);
/// ```
#[allow(clippy::bool_comparison)]
pub fn lt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
match left.data_type() {
DataType::Dictionary(_, _)
if matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_dict_compares!(
left,
right,
|a, b| a < b,
|a, b| a.total_cmp(&b).is_lt(),
|a, b| a < b
)
}
DataType::Dictionary(_, _)
if !matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_cmp_dict_non_dict!(left, right, |a, b| a < b, |a, b| a < b, |a, b| a
.total_cmp(&b)
.is_lt())
}
_ if matches!(right.data_type(), DataType::Dictionary(_, _)) => {
typed_cmp_dict_non_dict!(right, left, |a, b| a > b, |a, b| a > b, |a, b| b
.total_cmp(&a)
.is_lt())
}
_ => {
typed_compares!(left, right, |a, b| ((!a) & b), |a, b| a < b, |a, b| a
.total_cmp(&b)
.is_lt())
}
}
}
/// Perform `left <= right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
/// ```
/// use arrow::array::{PrimitiveArray, BooleanArray};
/// use arrow::datatypes::Date32Type;
/// use arrow::compute::lt_eq_dyn;
/// let array1: PrimitiveArray<Date32Type> = vec![Some(12356), Some(13548), Some(-365), Some(365)].into();
/// let array2: PrimitiveArray<Date32Type> = vec![Some(12355), Some(13548), Some(-364), None].into();
/// let result = lt_eq_dyn(&array1, &array2).unwrap();
/// assert_eq!(BooleanArray::from(vec![Some(false), Some(true), Some(true), None]), result);
/// ```
pub fn lt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
match left.data_type() {
DataType::Dictionary(_, _)
if matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_dict_compares!(
left,
right,
|a, b| a <= b,
|a, b| a.total_cmp(&b).is_le(),
|a, b| a <= b
)
}
DataType::Dictionary(_, _)
if !matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_cmp_dict_non_dict!(left, right, |a, b| a <= b, |a, b| a <= b, |a, b| a
.total_cmp(&b)
.is_le())
}
_ if matches!(right.data_type(), DataType::Dictionary(_, _)) => {
typed_cmp_dict_non_dict!(right, left, |a, b| a >= b, |a, b| a >= b, |a, b| b
.total_cmp(&a)
.is_le())
}
_ => {
typed_compares!(left, right, |a, b| !(a & (!b)), |a, b| a <= b, |a, b| a
.total_cmp(&b)
.is_le())
}
}
}
/// Perform `left > right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
/// ```
/// use arrow::array::BooleanArray;
/// use arrow::compute::gt_dyn;
/// let array1 = BooleanArray::from(vec![Some(true), Some(false), None]);
/// let array2 = BooleanArray::from(vec![Some(false), Some(true), None]);
/// let result = gt_dyn(&array1, &array2).unwrap();
/// assert_eq!(BooleanArray::from(vec![Some(true), Some(false), None]), result);
/// ```
#[allow(clippy::bool_comparison)]
pub fn gt_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
match left.data_type() {
DataType::Dictionary(_, _)
if matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_dict_compares!(
left,
right,
|a, b| a > b,
|a, b| a.total_cmp(&b).is_gt(),
|a, b| a > b
)
}
DataType::Dictionary(_, _)
if !matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_cmp_dict_non_dict!(left, right, |a, b| a > b, |a, b| a > b, |a, b| a
.total_cmp(&b)
.is_gt())
}
_ if matches!(right.data_type(), DataType::Dictionary(_, _)) => {
typed_cmp_dict_non_dict!(right, left, |a, b| a < b, |a, b| a < b, |a, b| b
.total_cmp(&a)
.is_gt())
}
_ => {
typed_compares!(left, right, |a, b| (a & (!b)), |a, b| a > b, |a, b| a
.total_cmp(&b)
.is_gt())
}
}
}
/// Perform `left >= right` operation on two (dynamic) [`Array`]s.
///
/// Only when two arrays are of the same type the comparison will happen otherwise it will err
/// with a casting error.
///
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
///
/// # Example
/// ```
/// use arrow::array::{BooleanArray, StringArray};
/// use arrow::compute::gt_eq_dyn;
/// let array1 = StringArray::from(vec![Some(""), Some("aaa"), None]);
/// let array2 = StringArray::from(vec![Some(" "), Some("aa"), None]);
/// let result = gt_eq_dyn(&array1, &array2).unwrap();
/// assert_eq!(BooleanArray::from(vec![Some(false), Some(true), None]), result);
/// ```
pub fn gt_eq_dyn(left: &dyn Array, right: &dyn Array) -> Result<BooleanArray> {
match left.data_type() {
DataType::Dictionary(_, _)
if matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_dict_compares!(
left,
right,
|a, b| a >= b,
|a, b| a.total_cmp(&b).is_ge(),
|a, b| a >= b
)
}
DataType::Dictionary(_, _)
if !matches!(right.data_type(), DataType::Dictionary(_, _)) =>
{
typed_cmp_dict_non_dict!(left, right, |a, b| a >= b, |a, b| a >= b, |a, b| a
.total_cmp(&b)
.is_ge())
}
_ if matches!(right.data_type(), DataType::Dictionary(_, _)) => {
typed_cmp_dict_non_dict!(right, left, |a, b| a <= b, |a, b| a <= b, |a, b| b
.total_cmp(&a)
.is_ge())
}
_ => {
typed_compares!(left, right, |a, b| !((!a) & b), |a, b| a >= b, |a, b| a
.total_cmp(&b)
.is_ge())
}
}
}
/// Perform `left == right` operation on two [`PrimitiveArray`]s.
pub fn eq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(feature = "simd")]
return simd_compare_op(left, right, T::eq, |a, b| a == b);
#[cfg(not(feature = "simd"))]
return compare_op(left, right, |a, b| a == b);
}
/// Perform `left == right` operation on a [`PrimitiveArray`] and a scalar value.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
#[cfg(feature = "simd")]
return simd_compare_op_scalar(left, right, T::eq, |a, b| a == b);
#[cfg(not(feature = "simd"))]
return compare_op_scalar(left, |a| a.is_eq(right));
}
/// Applies an unary and infallible comparison function to a primitive array.
pub fn unary_cmp<T, F>(left: &PrimitiveArray<T>, op: F) -> Result<BooleanArray>
where
T: ArrowNumericType,
F: Fn(T::Native) -> bool,
{
compare_op_scalar(left, op)
}
/// Perform `left != right` operation on two [`PrimitiveArray`]s.
pub fn neq<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(feature = "simd")]
return simd_compare_op(left, right, T::ne, |a, b| a != b);
#[cfg(not(feature = "simd"))]
return compare_op(left, right, |a, b| a != b);
}
/// Perform `left != right` operation on a [`PrimitiveArray`] and a scalar value.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn neq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
#[cfg(feature = "simd")]
return simd_compare_op_scalar(left, right, T::ne, |a, b| a != b);
#[cfg(not(feature = "simd"))]
return compare_op_scalar(left, |a| a.is_ne(right));
}
/// Perform `left < right` operation on two [`PrimitiveArray`]s. Null values are less than non-null
/// values.
pub fn lt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(feature = "simd")]
return simd_compare_op(left, right, T::lt, |a, b| a < b);
#[cfg(not(feature = "simd"))]
return compare_op(left, right, |a, b| a < b);
}
/// Perform `left < right` operation on a [`PrimitiveArray`] and a scalar value.
/// Null values are less than non-null values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
#[cfg(feature = "simd")]
return simd_compare_op_scalar(left, right, T::lt, |a, b| a < b);
#[cfg(not(feature = "simd"))]
return compare_op_scalar(left, |a| a.is_lt(right));
}
/// Perform `left <= right` operation on two [`PrimitiveArray`]s. Null values are less than non-null
/// values.
pub fn lt_eq<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(feature = "simd")]
return simd_compare_op(left, right, T::le, |a, b| a <= b);
#[cfg(not(feature = "simd"))]
return compare_op(left, right, |a, b| a <= b);
}
/// Perform `left <= right` operation on a [`PrimitiveArray`] and a scalar value.
/// Null values are less than non-null values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn lt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
#[cfg(feature = "simd")]
return simd_compare_op_scalar(left, right, T::le, |a, b| a <= b);
#[cfg(not(feature = "simd"))]
return compare_op_scalar(left, |a| a.is_le(right));
}
/// Perform `left > right` operation on two [`PrimitiveArray`]s. Non-null values are greater than null
/// values.
pub fn gt<T>(left: &PrimitiveArray<T>, right: &PrimitiveArray<T>) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(feature = "simd")]
return simd_compare_op(left, right, T::gt, |a, b| a > b);
#[cfg(not(feature = "simd"))]
return compare_op(left, right, |a, b| a > b);
}
/// Perform `left > right` operation on a [`PrimitiveArray`] and a scalar value.
/// Non-null values are greater than null values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
#[cfg(feature = "simd")]
return simd_compare_op_scalar(left, right, T::gt, |a, b| a > b);
#[cfg(not(feature = "simd"))]
return compare_op_scalar(left, |a| a.is_gt(right));
}
/// Perform `left >= right` operation on two [`PrimitiveArray`]s. Non-null values are greater than null
/// values.
pub fn gt_eq<T>(
left: &PrimitiveArray<T>,
right: &PrimitiveArray<T>,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
{
#[cfg(feature = "simd")]
return simd_compare_op(left, right, T::ge, |a, b| a >= b);
#[cfg(not(feature = "simd"))]
return compare_op(left, right, |a, b| a >= b);
}
/// Perform `left >= right` operation on a [`PrimitiveArray`] and a scalar value.
/// Non-null values are greater than null values.
///
/// If `simd` feature flag is not enabled:
/// For floating values like f32 and f64, this comparison produces an ordering in accordance to
/// the totalOrder predicate as defined in the IEEE 754 (2008 revision) floating point standard.
/// Please refer to `f32::total_cmp` and `f64::total_cmp`.
pub fn gt_eq_scalar<T>(left: &PrimitiveArray<T>, right: T::Native) -> Result<BooleanArray>
where
T: ArrowNumericType,
T::Native: ArrowNativeTypeOp,
{
#[cfg(feature = "simd")]
return simd_compare_op_scalar(left, right, T::ge, |a, b| a >= b);
#[cfg(not(feature = "simd"))]
return compare_op_scalar(left, |a| a.is_ge(right));
}
/// Checks if a [`GenericListArray`] contains a value in the [`PrimitiveArray`]
pub fn contains<T, OffsetSize>(
left: &PrimitiveArray<T>,
right: &GenericListArray<OffsetSize>,
) -> Result<BooleanArray>
where
T: ArrowNumericType,
OffsetSize: OffsetSizeTrait,
{
let left_len = left.len();
if left_len != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let num_bytes = bit_util::ceil(left_len, 8);
let not_both_null_bit_buffer =
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
let bool_slice = bool_buf.as_slice_mut();
// if both array slots are valid, check if list contains primitive
for i in 0..left_len {
if bit_util::get_bit(not_both_null_bitmap, i) {
let list = right.value(i);
let list = list.as_any().downcast_ref::<PrimitiveArray<T>>().unwrap();
for j in 0..list.len() {
if list.is_valid(j) && (left.value(i) == list.value(j)) {
bit_util::set_bit(bool_slice, i);
continue;
}
}
}
}
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
/// Checks if a [`GenericListArray`] contains a value in the [`GenericStringArray`]
pub fn contains_utf8<OffsetSize>(
left: &GenericStringArray<OffsetSize>,
right: &ListArray,
) -> Result<BooleanArray>
where
OffsetSize: OffsetSizeTrait,
{
let left_len = left.len();
if left_len != right.len() {
return Err(ArrowError::ComputeError(
"Cannot perform comparison operation on arrays of different length"
.to_string(),
));
}
let num_bytes = bit_util::ceil(left_len, 8);
let not_both_null_bit_buffer =
match combine_option_bitmap(&[left.data_ref(), right.data_ref()], left_len)? {
Some(buff) => buff,
None => new_all_set_buffer(num_bytes),
};
let not_both_null_bitmap = not_both_null_bit_buffer.as_slice();
let mut bool_buf = MutableBuffer::from_len_zeroed(num_bytes);
let bool_slice = &mut bool_buf;
for i in 0..left_len {
// contains(null, null) = false
if bit_util::get_bit(not_both_null_bitmap, i) {
let list = right.value(i);
let list = list
.as_any()
.downcast_ref::<GenericStringArray<OffsetSize>>()
.unwrap();
for j in 0..list.len() {
if list.is_valid(j) && (left.value(i) == list.value(j)) {
bit_util::set_bit(bool_slice, i);
continue;
}
}
}
}
let data = unsafe {
ArrayData::new_unchecked(
DataType::Boolean,
left.len(),
None,
None,
0,
vec![bool_buf.into()],
vec![],
)
};
Ok(BooleanArray::from(data))
}
// create a buffer and fill it with valid bits
#[inline]
fn new_all_set_buffer(len: usize) -> Buffer {
let buffer = MutableBuffer::new(len);
let buffer = buffer.with_bitset(len, true);
buffer.into()
}
// disable wrapping inside literal vectors used for test data and assertions
#[rustfmt::skip::macros(vec)]
#[cfg(test)]
mod tests {
use std::sync::Arc;
use super::*;
use crate::datatypes::Int8Type;
use crate::{array::Int32Array, array::Int64Array, datatypes::Field};
/// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
/// `A_VEC` and `B_VEC` can be of type `Vec<T>` or `Vec<Option<T>>` where `T` is the native
/// type of the data type of the Arrow array element.
/// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
/// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
macro_rules! cmp_vec {
($KERNEL:ident, $DYN_KERNEL:ident, $ARRAY:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
let a = $ARRAY::from($A_VEC);
let b = $ARRAY::from($B_VEC);
let c = $KERNEL(&a, &b).unwrap();
assert_eq!(BooleanArray::from($EXPECTED), c);
// slice and test if the dynamic array works
let a = a.slice(0, a.len());
let b = b.slice(0, b.len());
let c = $DYN_KERNEL(a.as_ref(), b.as_ref()).unwrap();
assert_eq!(BooleanArray::from($EXPECTED), c);
// test with a larger version of the same data to ensure we cover the chunked part of the comparison
let mut a = vec![];
let mut b = vec![];
let mut e = vec![];
for _i in 0..10 {
a.extend($A_VEC);
b.extend($B_VEC);
e.extend($EXPECTED);
}
let a = $ARRAY::from(a);
let b = $ARRAY::from(b);
let c = $KERNEL(&a, &b).unwrap();
assert_eq!(BooleanArray::from(e), c);
};
}
/// Evaluate `KERNEL` with two vectors as inputs and assert against the expected output.
/// `A_VEC` and `B_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
/// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
/// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
macro_rules! cmp_i64 {
($KERNEL:ident, $DYN_KERNEL:ident, $A_VEC:expr, $B_VEC:expr, $EXPECTED:expr) => {
cmp_vec!($KERNEL, $DYN_KERNEL, Int64Array, $A_VEC, $B_VEC, $EXPECTED);
};
}
/// Evaluate `KERNEL` with one vectors and one scalar as inputs and assert against the expected output.
/// `A_VEC` can be of type `Vec<i64>` or `Vec<Option<i64>>`.
/// `EXPECTED` can be either `Vec<bool>` or `Vec<Option<bool>>`.
/// The main reason for this macro is that inputs and outputs align nicely after `cargo fmt`.
macro_rules! cmp_i64_scalar {
($KERNEL:ident, $A_VEC:expr, $B:literal, $EXPECTED:expr) => {
let a = Int64Array::from($A_VEC);
let c = $KERNEL(&a, $B).unwrap();
assert_eq!(BooleanArray::from($EXPECTED), c);
// test with a larger version of the same data to ensure we cover the chunked part of the comparison
let mut a = vec![];
let mut e = vec![];
for _i in 0..10 {
a.extend($A_VEC);
e.extend($EXPECTED);
}
let a = Int64Array::from(a);
let c = $KERNEL(&a, $B).unwrap();
assert_eq!(BooleanArray::from(e), c);
};
}
#[test]
fn test_primitive_array_eq() {
cmp_i64!(
eq,
eq_dyn,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
cmp_vec!(
eq,
eq_dyn,
TimestampSecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
cmp_vec!(
eq,
eq_dyn,
Time32SecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
cmp_vec!(
eq,
eq_dyn,
Time32MillisecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
cmp_vec!(
eq,
eq_dyn,
Time64MicrosecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
cmp_vec!(
eq,
eq_dyn,
Time64NanosecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, false, false, false, false, true, false, false]
);
}
#[test]
fn test_primitive_array_eq_scalar() {
cmp_i64_scalar!(
eq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![false, false, true, false, false, false, false, true, false, false]
);
}
#[test]
fn test_primitive_array_eq_with_slice() {
let a = Int32Array::from(vec![6, 7, 8, 8, 10]);
let b = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
let b_slice = b.slice(5, 5);
let c = b_slice.as_any().downcast_ref().unwrap();
let d = eq(c, &a).unwrap();
assert!(d.value(0));
assert!(d.value(1));
assert!(d.value(2));
assert!(!d.value(3));
assert!(d.value(4));
}
#[test]
fn test_primitive_array_eq_scalar_with_slice() {
let a = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
let a = a.slice(1, 3);
let a: &Int32Array = as_primitive_array(&a);
let a_eq = eq_scalar(a, 2).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![None, Some(true), Some(false)])
);
}
#[test]
fn test_primitive_array_neq() {
cmp_i64!(
neq,
neq_dyn,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, true, true, true, true, false, true, true]
);
cmp_vec!(
neq,
neq_dyn,
TimestampMillisecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, true, true, true, true, false, true, true]
);
}
#[test]
fn test_primitive_array_neq_scalar() {
cmp_i64_scalar!(
neq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![true, true, false, true, true, true, true, false, true, true]
);
}
#[test]
fn test_boolean_array_eq() {
let a: BooleanArray =
vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
.into();
let b: BooleanArray =
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
.into();
let res: Vec<Option<bool>> = eq_bool(&a, &b).unwrap().iter().collect();
assert_eq!(
res,
vec![Some(true), Some(false), Some(true), Some(false), None, None]
)
}
#[test]
fn test_boolean_array_neq() {
let a: BooleanArray =
vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
.into();
let b: BooleanArray =
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
.into();
let res: Vec<Option<bool>> = neq_bool(&a, &b).unwrap().iter().collect();
assert_eq!(
res,
vec![Some(false), Some(true), Some(false), Some(true), None, None]
)
}
#[test]
fn test_boolean_array_lt() {
let a: BooleanArray =
vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
.into();
let b: BooleanArray =
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
.into();
let res: Vec<Option<bool>> = lt_bool(&a, &b).unwrap().iter().collect();
assert_eq!(
res,
vec![Some(false), Some(true), Some(false), Some(false), None, None]
)
}
#[test]
fn test_boolean_array_lt_eq() {
let a: BooleanArray =
vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
.into();
let b: BooleanArray =
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
.into();
let res: Vec<Option<bool>> = lt_eq_bool(&a, &b).unwrap().iter().collect();
assert_eq!(
res,
vec![Some(true), Some(true), Some(true), Some(false), None, None]
)
}
#[test]
fn test_boolean_array_gt() {
let a: BooleanArray =
vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
.into();
let b: BooleanArray =
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
.into();
let res: Vec<Option<bool>> = gt_bool(&a, &b).unwrap().iter().collect();
assert_eq!(
res,
vec![Some(false), Some(false), Some(false), Some(true), None, None]
)
}
#[test]
fn test_boolean_array_gt_eq() {
let a: BooleanArray =
vec![Some(true), Some(false), Some(false), Some(true), Some(true), None]
.into();
let b: BooleanArray =
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
.into();
let res: Vec<Option<bool>> = gt_eq_bool(&a, &b).unwrap().iter().collect();
assert_eq!(
res,
vec![Some(true), Some(false), Some(true), Some(true), None, None]
)
}
#[test]
fn test_boolean_array_eq_scalar() {
let a: BooleanArray = vec![Some(true), Some(false), None].into();
let res1: Vec<Option<bool>> = eq_bool_scalar(&a, false).unwrap().iter().collect();
assert_eq!(res1, vec![Some(false), Some(true), None]);
let res2: Vec<Option<bool>> = eq_bool_scalar(&a, true).unwrap().iter().collect();
assert_eq!(res2, vec![Some(true), Some(false), None]);
}
#[test]
fn test_boolean_array_neq_scalar() {
let a: BooleanArray = vec![Some(true), Some(false), None].into();
let res1: Vec<Option<bool>> =
neq_bool_scalar(&a, false).unwrap().iter().collect();
assert_eq!(res1, vec![Some(true), Some(false), None]);
let res2: Vec<Option<bool>> = neq_bool_scalar(&a, true).unwrap().iter().collect();
assert_eq!(res2, vec![Some(false), Some(true), None]);
}
#[test]
fn test_boolean_array_lt_scalar() {
let a: BooleanArray = vec![Some(true), Some(false), None].into();
let res1: Vec<Option<bool>> = lt_bool_scalar(&a, false).unwrap().iter().collect();
assert_eq!(res1, vec![Some(false), Some(false), None]);
let res2: Vec<Option<bool>> = lt_bool_scalar(&a, true).unwrap().iter().collect();
assert_eq!(res2, vec![Some(false), Some(true), None]);
}
#[test]
fn test_boolean_array_lt_eq_scalar() {
let a: BooleanArray = vec![Some(true), Some(false), None].into();
let res1: Vec<Option<bool>> =
lt_eq_bool_scalar(&a, false).unwrap().iter().collect();
assert_eq!(res1, vec![Some(false), Some(true), None]);
let res2: Vec<Option<bool>> =
lt_eq_bool_scalar(&a, true).unwrap().iter().collect();
assert_eq!(res2, vec![Some(true), Some(true), None]);
}
#[test]
fn test_boolean_array_gt_scalar() {
let a: BooleanArray = vec![Some(true), Some(false), None].into();
let res1: Vec<Option<bool>> = gt_bool_scalar(&a, false).unwrap().iter().collect();
assert_eq!(res1, vec![Some(true), Some(false), None]);
let res2: Vec<Option<bool>> = gt_bool_scalar(&a, true).unwrap().iter().collect();
assert_eq!(res2, vec![Some(false), Some(false), None]);
}
#[test]
fn test_boolean_array_gt_eq_scalar() {
let a: BooleanArray = vec![Some(true), Some(false), None].into();
let res1: Vec<Option<bool>> =
gt_eq_bool_scalar(&a, false).unwrap().iter().collect();
assert_eq!(res1, vec![Some(true), Some(true), None]);
let res2: Vec<Option<bool>> =
gt_eq_bool_scalar(&a, true).unwrap().iter().collect();
assert_eq!(res2, vec![Some(true), Some(false), None]);
}
#[test]
fn test_primitive_array_lt() {
cmp_i64!(
lt,
lt_dyn,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, false, true, true, false, false, false, true, true]
);
cmp_vec!(
lt,
lt_dyn,
TimestampMillisecondArray,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, false, true, true, false, false, false, true, true]
);
}
#[test]
fn test_primitive_array_lt_scalar() {
cmp_i64_scalar!(
lt_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![true, true, false, false, false, true, true, false, false, false]
);
}
#[test]
fn test_primitive_array_lt_nulls() {
cmp_i64!(
lt,
lt_dyn,
vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
vec![None, None, None, Some(false), None, None, None, Some(true)]
);
cmp_vec!(
lt,
lt_dyn,
TimestampMillisecondArray,
vec![None, None, Some(1), Some(1), None, None, Some(2), Some(2),],
vec![None, Some(1), None, Some(1), None, Some(3), None, Some(3),],
vec![None, None, None, Some(false), None, None, None, Some(true)]
);
}
#[test]
fn test_primitive_array_lt_scalar_nulls() {
cmp_i64_scalar!(
lt_scalar,
vec![None, Some(1), Some(2), Some(3), None, Some(1), Some(2), Some(3), Some(2), None],
2,
vec![None, Some(true), Some(false), Some(false), None, Some(true), Some(false), Some(false), Some(false), None]
);
}
#[test]
fn test_primitive_array_lt_eq() {
cmp_i64!(
lt_eq,
lt_eq_dyn,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![false, false, true, true, true, false, false, true, true, true]
);
}
#[test]
fn test_primitive_array_lt_eq_scalar() {
cmp_i64_scalar!(
lt_eq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![true, true, true, false, false, true, true, true, false, false]
);
}
#[test]
fn test_primitive_array_lt_eq_nulls() {
cmp_i64!(
lt_eq,
lt_eq_dyn,
vec![None, None, Some(1), None, None, Some(1), None, None, Some(1)],
vec![None, Some(1), Some(0), None, Some(1), Some(2), None, None, Some(3)],
vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
);
}
#[test]
fn test_primitive_array_lt_eq_scalar_nulls() {
cmp_i64_scalar!(
lt_eq_scalar,
vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1,
vec![None, Some(true), Some(false), None, Some(true), Some(false), None, Some(true), Some(false)]
);
}
#[test]
fn test_primitive_array_gt() {
cmp_i64!(
gt,
gt_dyn,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, false, false, false, true, true, false, false, false]
);
}
#[test]
fn test_primitive_array_gt_scalar() {
cmp_i64_scalar!(
gt_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![false, false, false, true, true, false, false, false, true, true]
);
}
#[test]
fn test_primitive_array_gt_nulls() {
cmp_i64!(
gt,
gt_dyn,
vec![None, None, Some(1), None, None, Some(2), None, None, Some(3)],
vec![None, Some(1), Some(1), None, Some(1), Some(1), None, Some(1), Some(1)],
vec![None, None, Some(false), None, None, Some(true), None, None, Some(true)]
);
}
#[test]
fn test_primitive_array_gt_scalar_nulls() {
cmp_i64_scalar!(
gt_scalar,
vec![None, Some(1), Some(2), None, Some(1), Some(2), None, Some(1), Some(2)],
1,
vec![None, Some(false), Some(true), None, Some(false), Some(true), None, Some(false), Some(true)]
);
}
#[test]
fn test_primitive_array_gt_eq() {
cmp_i64!(
gt_eq,
gt_eq_dyn,
vec![8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
vec![true, true, true, false, false, true, true, true, false, false]
);
}
#[test]
fn test_primitive_array_gt_eq_scalar() {
cmp_i64_scalar!(
gt_eq_scalar,
vec![6, 7, 8, 9, 10, 6, 7, 8, 9, 10],
8,
vec![false, false, true, true, true, false, false, true, true, true]
);
}
#[test]
fn test_primitive_array_gt_eq_nulls() {
cmp_i64!(
gt_eq,
gt_eq_dyn,
vec![None, None, Some(1), None, Some(1), Some(2), None, None, Some(1)],
vec![None, Some(1), None, None, Some(1), Some(1), None, Some(2), Some(2)],
vec![None, None, None, None, Some(true), Some(true), None, None, Some(false)]
);
}
#[test]
fn test_primitive_array_gt_eq_scalar_nulls() {
cmp_i64_scalar!(
gt_eq_scalar,
vec![None, Some(1), Some(2), None, Some(2), Some(3), None, Some(3), Some(4)],
2,
vec![None, Some(false), Some(true), None, Some(true), Some(true), None, Some(true), Some(true)]
);
}
#[test]
fn test_primitive_array_compare_slice() {
let a: Int32Array = (0..100).map(Some).collect();
let a = a.slice(50, 50);
let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
let b: Int32Array = (100..200).map(Some).collect();
let b = b.slice(50, 50);
let b = b.as_any().downcast_ref::<Int32Array>().unwrap();
let actual = lt(a, b).unwrap();
let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
assert_eq!(expected, actual);
}
#[test]
fn test_primitive_array_compare_scalar_slice() {
let a: Int32Array = (0..100).map(Some).collect();
let a = a.slice(50, 50);
let a = a.as_any().downcast_ref::<Int32Array>().unwrap();
let actual = lt_scalar(a, 200).unwrap();
let expected: BooleanArray = (0..50).map(|_| Some(true)).collect();
assert_eq!(expected, actual);
}
#[test]
fn test_length_of_result_buffer() {
// `item_count` is chosen to not be a multiple of the number of SIMD lanes for this
// type (`Int8Type`), 64.
let item_count = 130;
let select_mask: BooleanArray = vec![true; item_count].into();
let array_a: PrimitiveArray<Int8Type> = vec![1; item_count].into();
let array_b: PrimitiveArray<Int8Type> = vec![2; item_count].into();
let result_mask = gt_eq(&array_a, &array_b).unwrap();
assert_eq!(
result_mask.data().buffers()[0].len(),
select_mask.data().buffers()[0].len()
);
}
// Expected behaviour:
// contains(1, [1, 2, null]) = true
// contains(3, [1, 2, null]) = false
// contains(null, [1, 2, null]) = false
// contains(null, null) = false
#[test]
fn test_contains() {
let value_data = Int32Array::from(vec![
Some(0),
Some(1),
Some(2),
Some(3),
Some(4),
Some(5),
Some(6),
None,
Some(7),
])
.data()
.clone();
let value_offsets = Buffer::from_slice_ref(&[0i64, 3, 6, 6, 9]);
let list_data_type =
DataType::LargeList(Box::new(Field::new("item", DataType::Int32, true)));
let list_data = ArrayData::builder(list_data_type)
.len(4)
.add_buffer(value_offsets)
.add_child_data(value_data)
.null_bit_buffer(Some(Buffer::from([0b00001011])))
.build()
.unwrap();
// [[0, 1, 2], [3, 4, 5], null, [6, null, 7]]
let list_array = LargeListArray::from(list_data);
let nulls = Int32Array::from(vec![None, None, None, None]);
let nulls_result = contains(&nulls, &list_array).unwrap();
assert_eq!(
nulls_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![false, false, false, false]),
);
let values = Int32Array::from(vec![Some(0), Some(0), Some(0), Some(0)]);
let values_result = contains(&values, &list_array).unwrap();
assert_eq!(
values_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![true, false, false, false]),
);
}
#[test]
fn test_interval_array() {
let a = IntervalDayTimeArray::from(
vec![Some(0), Some(6), Some(834), None, Some(3), None],
);
let b = IntervalDayTimeArray::from(
vec![Some(70), Some(6), Some(833), Some(6), Some(3), None],
);
let res = eq(&a, &b).unwrap();
let res_dyn = eq_dyn(&a, &b).unwrap();
assert_eq!(res, res_dyn);
assert_eq!(
&res_dyn,
&BooleanArray::from(
vec![Some(false), Some(true), Some(false), None, Some(true), None]
)
);
let a = IntervalMonthDayNanoArray::from(
vec![Some(0), Some(6), Some(834), None, Some(3), None],
);
let b = IntervalMonthDayNanoArray::from(
vec![Some(86), Some(5), Some(8), Some(6), Some(3), None],
);
let res = lt(&a, &b).unwrap();
let res_dyn = lt_dyn(&a, &b).unwrap();
assert_eq!(res, res_dyn);
assert_eq!(
&res_dyn,
&BooleanArray::from(
vec![Some(true), Some(false), Some(false), None, Some(false), None]
)
);
let a = IntervalYearMonthArray::from(
vec![Some(0), Some(623), Some(834), None, Some(3), None],
);
let b = IntervalYearMonthArray::from(
vec![Some(86), Some(5), Some(834), Some(6), Some(86), None],
);
let res = gt_eq(&a, &b).unwrap();
let res_dyn = gt_eq_dyn(&a, &b).unwrap();
assert_eq!(res, res_dyn);
assert_eq!(
&res_dyn,
&BooleanArray::from(
vec![Some(false), Some(true), Some(true), None, Some(false), None]
)
);
}
macro_rules! test_binary {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = BinaryArray::from_vec($left);
let right = BinaryArray::from_vec($right);
let res = $op(&left, &right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(v, expected[i]);
}
let left = LargeBinaryArray::from_vec($left);
let right = LargeBinaryArray::from_vec($right);
let res = $op(&left, &right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(v, expected[i]);
}
}
};
}
#[test]
fn test_binary_eq_scalar_on_slice() {
let a = BinaryArray::from_opt_vec(
vec![Some(b"hi"), None, Some(b"hello"), Some(b"world")],
);
let a = a.slice(1, 3);
let a = as_generic_binary_array::<i32>(&a);
let a_eq = eq_binary_scalar(a, b"hello").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![None, Some(true), Some(false)])
);
}
macro_rules! test_binary_scalar {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = BinaryArray::from_vec($left);
let res = $op(&left, $right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {:?} at position {} to {:?} ",
left.value(i),
i,
$right
);
}
let left = LargeBinaryArray::from_vec($left);
let res = $op(&left, $right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {:?} at position {} to {:?} ",
left.value(i),
i,
$right
);
}
}
};
}
test_binary!(
test_binary_array_eq,
vec![b"arrow", b"arrow", b"arrow", b"arrow", &[0xff, 0xf8]],
vec![b"arrow", b"parquet", b"datafusion", b"flight", &[0xff, 0xf8]],
eq_binary,
vec![true, false, false, false, true]
);
test_binary_scalar!(
test_binary_array_eq_scalar,
vec![b"arrow", b"parquet", b"datafusion", b"flight", &[0xff, 0xf8]],
"arrow".as_bytes(),
eq_binary_scalar,
vec![true, false, false, false, false]
);
test_binary!(
test_binary_array_neq,
vec![b"arrow", b"arrow", b"arrow", b"arrow", &[0xff, 0xf8]],
vec![b"arrow", b"parquet", b"datafusion", b"flight", &[0xff, 0xf9]],
neq_binary,
vec![false, true, true, true, true]
);
test_binary_scalar!(
test_binary_array_neq_scalar,
vec![b"arrow", b"parquet", b"datafusion", b"flight", &[0xff, 0xf8]],
"arrow".as_bytes(),
neq_binary_scalar,
vec![false, true, true, true, true]
);
test_binary!(
test_binary_array_lt,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
vec![b"flight", b"flight", b"flight", b"flight", &[0xff, 0xf9]],
lt_binary,
vec![true, true, false, false, true]
);
test_binary_scalar!(
test_binary_array_lt_scalar,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
"flight".as_bytes(),
lt_binary_scalar,
vec![true, true, false, false, false]
);
test_binary!(
test_binary_array_lt_eq,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
vec![b"flight", b"flight", b"flight", b"flight", &[0xff, 0xf8, 0xf9]],
lt_eq_binary,
vec![true, true, true, false, true]
);
test_binary_scalar!(
test_binary_array_lt_eq_scalar,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
"flight".as_bytes(),
lt_eq_binary_scalar,
vec![true, true, true, false, false]
);
test_binary!(
test_binary_array_gt,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf9]],
vec![b"flight", b"flight", b"flight", b"flight", &[0xff, 0xf8]],
gt_binary,
vec![false, false, false, true, true]
);
test_binary_scalar!(
test_binary_array_gt_scalar,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
"flight".as_bytes(),
gt_binary_scalar,
vec![false, false, false, true, true]
);
test_binary!(
test_binary_array_gt_eq,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
vec![b"flight", b"flight", b"flight", b"flight", &[0xff, 0xf8]],
gt_eq_binary,
vec![false, false, true, true, true]
);
test_binary_scalar!(
test_binary_array_gt_eq_scalar,
vec![b"arrow", b"datafusion", b"flight", b"parquet", &[0xff, 0xf8]],
"flight".as_bytes(),
gt_eq_binary_scalar,
vec![false, false, true, true, true]
);
// Expected behaviour:
// contains("ab", ["ab", "cd", null]) = true
// contains("ef", ["ab", "cd", null]) = false
// contains(null, ["ab", "cd", null]) = false
// contains(null, null) = false
#[test]
fn test_contains_utf8() {
let values_builder = StringBuilder::new();
let mut builder = ListBuilder::new(values_builder);
builder.values().append_value("Lorem");
builder.values().append_value("ipsum");
builder.values().append_null();
builder.append(true);
builder.values().append_value("sit");
builder.values().append_value("amet");
builder.values().append_value("Lorem");
builder.append(true);
builder.append(false);
builder.values().append_value("ipsum");
builder.append(true);
// [["Lorem", "ipsum", null], ["sit", "amet", "Lorem"], null, ["ipsum"]]
// value_offsets = [0, 3, 6, 6]
let list_array = builder.finish();
let nulls = StringArray::from(vec![None, None, None, None]);
let nulls_result = contains_utf8(&nulls, &list_array).unwrap();
assert_eq!(
nulls_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![false, false, false, false]),
);
let values = StringArray::from(vec![
Some("Lorem"),
Some("Lorem"),
Some("Lorem"),
Some("Lorem"),
]);
let values_result = contains_utf8(&values, &list_array).unwrap();
assert_eq!(
values_result
.as_any()
.downcast_ref::<BooleanArray>()
.unwrap(),
&BooleanArray::from(vec![true, true, false, false]),
);
}
macro_rules! test_utf8 {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let right = StringArray::from($right);
let res = $op(&left, &right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(v, expected[i]);
}
}
};
}
#[test]
fn test_utf8_eq_scalar_on_slice() {
let a = StringArray::from(
vec![Some("hi"), None, Some("hello"), Some("world"), Some("")],
);
let a = a.slice(1, 4);
let a = as_string_array(&a);
let a_eq = eq_utf8_scalar(a, "hello").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![None, Some(true), Some(false), Some(false)])
);
let a_eq2 = eq_utf8_scalar(a, "").unwrap();
assert_eq!(
a_eq2,
BooleanArray::from(vec![None, Some(false), Some(false), Some(true)])
);
}
macro_rules! test_utf8_scalar {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let res = $op(&left, $right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {} at position {} to {} ",
left.value(i),
i,
$right
);
}
let left = LargeStringArray::from($left);
let res = $op(&left, $right).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {} at position {} to {} ",
left.value(i),
i,
$right
);
}
}
};
}
macro_rules! test_flag_utf8 {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let right = StringArray::from($right);
let res = $op(&left, &right, None).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(v, expected[i]);
}
}
};
($test_name:ident, $left:expr, $right:expr, $flag:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let right = StringArray::from($right);
let flag = Some(StringArray::from($flag));
let res = $op(&left, &right, flag.as_ref()).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(v, expected[i]);
}
}
};
}
macro_rules! test_flag_utf8_scalar {
($test_name:ident, $left:expr, $right:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let res = $op(&left, $right, None).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {} at position {} to {} ",
left.value(i),
i,
$right
);
}
}
};
($test_name:ident, $left:expr, $right:expr, $flag:expr, $op:expr, $expected:expr) => {
#[test]
fn $test_name() {
let left = StringArray::from($left);
let flag = Some($flag);
let res = $op(&left, $right, flag).unwrap();
let expected = $expected;
assert_eq!(expected.len(), res.len());
for i in 0..res.len() {
let v = res.value(i);
assert_eq!(
v,
expected[i],
"unexpected result when comparing {} at position {} to {} ",
left.value(i),
i,
$right
);
}
}
};
}
test_utf8!(
test_utf8_array_like,
vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow", "arrow"],
vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_", ".*"],
like_utf8,
vec![true, true, true, false, false, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_testing,
vec!["varchar(255)", "int(255)", "varchar", "int"],
"%(%)%",
like_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_regex,
vec![".*", "a", "*"],
".*",
like_utf8_scalar,
vec![true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_escape_regex_dot,
vec![".", "a", "*"],
".",
like_utf8_scalar,
vec![true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"%ar%",
like_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_start,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow%",
like_utf8_scalar,
vec![true, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_end,
vec!["arrow", "parrow", "arrows", "arr"],
"%arrow",
like_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow",
like_utf8_scalar,
vec![true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_like_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
"arrow_",
like_utf8_scalar,
vec![false, true, false, false]
);
test_utf8_scalar!(
test_utf8_scalar_like_escape,
vec!["a%", "a\\x"],
"a\\%",
like_utf8_scalar,
vec![true, false]
);
test_utf8_scalar!(
test_utf8_scalar_like_escape_contains,
vec!["ba%", "ba\\x"],
"%a\\%",
like_utf8_scalar,
vec![true, false]
);
test_utf8!(
test_utf8_scalar_ilike_regex,
vec!["%%%"],
vec![r#"\%_\%"#],
ilike_utf8,
vec![true]
);
#[test]
fn test_replace_like_wildcards() {
let a_eq = "_%";
let expected = "..*";
assert_eq!(replace_like_wildcards(a_eq).unwrap(), expected);
}
#[test]
fn test_replace_like_wildcards_leave_like_meta_chars() {
let a_eq = "\\%\\_";
let expected = "%_";
assert_eq!(replace_like_wildcards(a_eq).unwrap(), expected);
}
#[test]
fn test_replace_like_wildcards_with_multiple_escape_chars() {
let a_eq = "\\\\%";
let expected = "\\\\%";
assert_eq!(replace_like_wildcards(a_eq).unwrap(), expected);
}
#[test]
fn test_replace_like_wildcards_escape_regex_meta_char() {
let a_eq = ".";
let expected = "\\.";
assert_eq!(replace_like_wildcards(a_eq).unwrap(), expected);
}
test_utf8!(
test_utf8_array_eq,
vec!["arrow", "arrow", "arrow", "arrow"],
vec!["arrow", "parquet", "datafusion", "flight"],
eq_utf8,
vec![true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_eq_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"arrow",
eq_utf8_scalar,
vec![true, false, false, false]
);
test_utf8!(
test_utf8_array_nlike,
vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrows", "arrow"],
vec!["arrow", "ar%", "%ro%", "foo", "arr", "arrow_", "arrow_"],
nlike_utf8,
vec![false, false, false, true, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_escape_testing,
vec!["varchar(255)", "int(255)", "varchar", "int"],
"%(%)%",
nlike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_escape_regex,
vec![".*", "a", "*"],
".*",
nlike_utf8_scalar,
vec![false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_escape_regex_dot,
vec![".", "a", "*"],
".",
nlike_utf8_scalar,
vec![false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"%ar%",
nlike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_start,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow%",
nlike_utf8_scalar,
vec![false, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_end,
vec!["arrow", "parrow", "arrows", "arr"],
"%arrow",
nlike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
"arrow",
nlike_utf8_scalar,
vec![false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nlike_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
"arrow_",
nlike_utf8_scalar,
vec![true, false, true, true]
);
test_utf8!(
test_utf8_array_ilike,
vec!["arrow", "arrow", "ARROW", "arrow", "ARROW", "ARROWS", "arROw"],
vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
ilike_utf8,
vec![true, true, true, false, false, true, false]
);
test_utf8_scalar!(
ilike_utf8_scalar_escape_testing,
vec!["varchar(255)", "int(255)", "varchar", "int"],
"%(%)%",
ilike_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"%AR%",
ilike_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_start,
vec!["arrow", "parrow", "arrows", "ARR"],
"aRRow%",
ilike_utf8_scalar,
vec![true, false, true, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_end,
vec!["ArroW", "parrow", "ARRowS", "arr"],
"%arrow",
ilike_utf8_scalar,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_equals,
vec!["arrow", "parrow", "arrows", "arr"],
"Arrow",
ilike_utf8_scalar,
vec![true, false, false, false]
);
test_utf8_scalar!(
test_utf8_array_ilike_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
"arrow_",
ilike_utf8_scalar,
vec![false, true, false, false]
);
test_utf8!(
test_utf8_array_nilike,
vec!["arrow", "arrow", "ARROW", "arrow", "ARROW", "ARROWS", "arROw"],
vec!["arrow", "ar%", "%ro%", "foo", "ar%r", "arrow_", "arrow_"],
nilike_utf8,
vec![false, false, false, true, true, false, true]
);
test_utf8_scalar!(
nilike_utf8_scalar_escape_testing,
vec!["varchar(255)", "int(255)", "varchar", "int"],
"%(%)%",
nilike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"%AR%",
nilike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_start,
vec!["arrow", "parrow", "arrows", "ARR"],
"aRRow%",
nilike_utf8_scalar,
vec![false, true, false, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_end,
vec!["ArroW", "parrow", "ARRowS", "arr"],
"%arrow",
nilike_utf8_scalar,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_equals,
vec!["arRow", "parrow", "arrows", "arr"],
"Arrow",
nilike_utf8_scalar,
vec![false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_nilike_scalar_one,
vec!["arrow", "arrows", "parrow", "arr"],
"arrow_",
nilike_utf8_scalar,
vec![true, false, true, true]
);
test_utf8!(
test_utf8_array_neq,
vec!["arrow", "arrow", "arrow", "arrow"],
vec!["arrow", "parquet", "datafusion", "flight"],
neq_utf8,
vec![false, true, true, true]
);
test_utf8_scalar!(
test_utf8_array_neq_scalar,
vec!["arrow", "parquet", "datafusion", "flight"],
"arrow",
neq_utf8_scalar,
vec![false, true, true, true]
);
test_utf8!(
test_utf8_array_lt,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
lt_utf8,
vec![true, true, false, false]
);
test_utf8_scalar!(
test_utf8_array_lt_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
lt_utf8_scalar,
vec![true, true, false, false]
);
test_utf8!(
test_utf8_array_lt_eq,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
lt_eq_utf8,
vec![true, true, true, false]
);
test_utf8_scalar!(
test_utf8_array_lt_eq_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
lt_eq_utf8_scalar,
vec![true, true, true, false]
);
test_utf8!(
test_utf8_array_gt,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
gt_utf8,
vec![false, false, false, true]
);
test_utf8_scalar!(
test_utf8_array_gt_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
gt_utf8_scalar,
vec![false, false, false, true]
);
test_utf8!(
test_utf8_array_gt_eq,
vec!["arrow", "datafusion", "flight", "parquet"],
vec!["flight", "flight", "flight", "flight"],
gt_eq_utf8,
vec![false, false, true, true]
);
test_utf8_scalar!(
test_utf8_array_gt_eq_scalar,
vec!["arrow", "datafusion", "flight", "parquet"],
"flight",
gt_eq_utf8_scalar,
vec![false, false, true, true]
);
test_flag_utf8!(
test_utf8_array_regexp_is_match,
vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"],
vec!["^ar", "^AR", "ow$", "OW$", "foo", ""],
regexp_is_match_utf8,
vec![true, false, true, false, false, true]
);
test_flag_utf8!(
test_utf8_array_regexp_is_match_insensitive,
vec!["arrow", "arrow", "arrow", "arrow", "arrow", "arrow"],
vec!["^ar", "^AR", "ow$", "OW$", "foo", ""],
vec!["i"; 6],
regexp_is_match_utf8,
vec![true, true, true, true, false, true]
);
test_flag_utf8_scalar!(
test_utf8_array_regexp_is_match_scalar,
vec!["arrow", "ARROW", "parquet", "PARQUET"],
"^ar",
regexp_is_match_utf8_scalar,
vec![true, false, false, false]
);
test_flag_utf8_scalar!(
test_utf8_array_regexp_is_match_empty_scalar,
vec!["arrow", "ARROW", "parquet", "PARQUET"],
"",
regexp_is_match_utf8_scalar,
vec![true, true, true, true]
);
test_flag_utf8_scalar!(
test_utf8_array_regexp_is_match_insensitive_scalar,
vec!["arrow", "ARROW", "parquet", "PARQUET"],
"^ar",
"i",
regexp_is_match_utf8_scalar,
vec![true, true, false, false]
);
#[test]
fn test_eq_dyn_scalar() {
let array = Int32Array::from(vec![6, 7, 8, 8, 10]);
let a_eq = eq_dyn_scalar(&array, 8).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(true), Some(false)]
)
);
}
#[test]
fn test_eq_dyn_scalar_with_dict() {
let mut builder =
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
builder.append(123).unwrap();
builder.append_null();
builder.append(23).unwrap();
let array = builder.finish();
let a_eq = eq_dyn_scalar(&array, 123).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), None, Some(false)])
);
}
#[test]
fn test_eq_dyn_scalar_float() {
let array: Float32Array = vec![6.0, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(true), Some(false)],
);
assert_eq!(eq_dyn_scalar(&array, 8).unwrap(), expected);
let array: ArrayRef = Arc::new(array);
let array = crate::compute::cast(&array, &DataType::Float64).unwrap();
assert_eq!(eq_dyn_scalar(&array, 8).unwrap(), expected);
}
#[test]
fn test_lt_dyn_scalar() {
let array = Int32Array::from(vec![6, 7, 8, 8, 10]);
let a_eq = lt_dyn_scalar(&array, 8).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), Some(false)]
)
);
}
#[test]
fn test_lt_dyn_scalar_with_dict() {
let mut builder =
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
builder.append(123).unwrap();
builder.append_null();
builder.append(23).unwrap();
let array = builder.finish();
let a_eq = lt_dyn_scalar(&array, 123).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), None, Some(true)])
);
}
#[test]
fn test_lt_dyn_scalar_float() {
let array: Float32Array = vec![6.0, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), Some(false)],
);
assert_eq!(lt_dyn_scalar(&array, 8).unwrap(), expected);
let array: ArrayRef = Arc::new(array);
let array = crate::compute::cast(&array, &DataType::Float64).unwrap();
assert_eq!(lt_dyn_scalar(&array, 8).unwrap(), expected);
}
#[test]
fn test_lt_eq_dyn_scalar() {
let array = Int32Array::from(vec![6, 7, 8, 8, 10]);
let a_eq = lt_eq_dyn_scalar(&array, 8).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(false)]
)
);
}
#[test]
fn test_lt_eq_dyn_scalar_with_dict() {
let mut builder =
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
builder.append(123).unwrap();
builder.append_null();
builder.append(23).unwrap();
let array = builder.finish();
let a_eq = lt_eq_dyn_scalar(&array, 23).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), None, Some(true)])
);
}
#[test]
fn test_lt_eq_dyn_scalar_float() {
let array: Float32Array = vec![6.0, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(false)],
);
assert_eq!(lt_eq_dyn_scalar(&array, 8).unwrap(), expected);
let array: ArrayRef = Arc::new(array);
let array = crate::compute::cast(&array, &DataType::Float64).unwrap();
assert_eq!(lt_eq_dyn_scalar(&array, 8).unwrap(), expected);
}
#[test]
fn test_gt_dyn_scalar() {
let array = Int32Array::from(vec![6, 7, 8, 8, 10]);
let a_eq = gt_dyn_scalar(&array, 8).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true)]
)
);
}
#[test]
fn test_gt_dyn_scalar_with_dict() {
let mut builder =
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
builder.append(123).unwrap();
builder.append_null();
builder.append(23).unwrap();
let array = builder.finish();
let a_eq = gt_dyn_scalar(&array, 23).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), None, Some(false)])
);
}
#[test]
fn test_gt_dyn_scalar_float() {
let array: Float32Array = vec![6.0, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true)],
);
assert_eq!(gt_dyn_scalar(&array, 8).unwrap(), expected);
let array: ArrayRef = Arc::new(array);
let array = crate::compute::cast(&array, &DataType::Float64).unwrap();
assert_eq!(gt_dyn_scalar(&array, 8).unwrap(), expected);
}
#[test]
fn test_gt_eq_dyn_scalar() {
let array = Int32Array::from(vec![6, 7, 8, 8, 10]);
let a_eq = gt_eq_dyn_scalar(&array, 8).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(true), Some(true)]
)
);
}
#[test]
fn test_gt_eq_dyn_scalar_with_dict() {
let mut builder =
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
builder.append(22).unwrap();
builder.append_null();
builder.append(23).unwrap();
let array = builder.finish();
let a_eq = gt_eq_dyn_scalar(&array, 23).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), None, Some(true)])
);
}
#[test]
fn test_gt_eq_dyn_scalar_float() {
let array: Float32Array = vec![6.0, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(true), Some(true)],
);
assert_eq!(gt_eq_dyn_scalar(&array, 8).unwrap(), expected);
let array: ArrayRef = Arc::new(array);
let array = crate::compute::cast(&array, &DataType::Float64).unwrap();
assert_eq!(gt_eq_dyn_scalar(&array, 8).unwrap(), expected);
}
#[test]
fn test_neq_dyn_scalar() {
let array = Int32Array::from(vec![6, 7, 8, 8, 10]);
let a_eq = neq_dyn_scalar(&array, 8).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), Some(true)]
)
);
}
#[test]
fn test_neq_dyn_scalar_with_dict() {
let mut builder =
PrimitiveDictionaryBuilder::<Int8Type, Int32Type>::with_capacity(3, 2);
builder.append(22).unwrap();
builder.append_null();
builder.append(23).unwrap();
let array = builder.finish();
let a_eq = neq_dyn_scalar(&array, 23).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), None, Some(false)])
);
}
#[test]
fn test_neq_dyn_scalar_float() {
let array: Float32Array = vec![6.0, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), Some(true)],
);
assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected);
let array: ArrayRef = Arc::new(array);
let array = crate::compute::cast(&array, &DataType::Float64).unwrap();
assert_eq!(neq_dyn_scalar(&array, 8).unwrap(), expected);
}
#[test]
fn test_eq_dyn_binary_scalar() {
let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"), Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]), None];
let array = BinaryArray::from(data.clone());
let large_array = LargeBinaryArray::from(data);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(false), Some(false), None],
);
assert_eq!(eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}
#[test]
fn test_neq_dyn_binary_scalar() {
let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"), Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]), None];
let array = BinaryArray::from(data.clone());
let large_array = LargeBinaryArray::from(data);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(true), Some(true), None],
);
assert_eq!(neq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
neq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}
#[test]
fn test_lt_dyn_binary_scalar() {
let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"), Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]), None];
let array = BinaryArray::from(data.clone());
let large_array = LargeBinaryArray::from(data);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), Some(false), None],
);
assert_eq!(lt_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
lt_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}
#[test]
fn test_lt_eq_dyn_binary_scalar() {
let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"), Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]), None];
let array = BinaryArray::from(data.clone());
let large_array = LargeBinaryArray::from(data);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(false), Some(false), None],
);
assert_eq!(lt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
lt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}
#[test]
fn test_gt_dyn_binary_scalar() {
let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"), Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]), None];
let array = BinaryArray::from(data.clone());
let large_array = LargeBinaryArray::from(data);
let scalar = "flight".as_bytes();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(true), Some(true), None],
);
assert_eq!(gt_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
gt_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}
#[test]
fn test_gt_eq_dyn_binary_scalar() {
let data: Vec<Option<&[u8]>> = vec![Some(b"arrow"), Some(b"datafusion"), Some(b"flight"), Some(b"parquet"), Some(&[0xff, 0xf8]), None];
let array = BinaryArray::from(data.clone());
let large_array = LargeBinaryArray::from(data);
let scalar = &[0xff, 0xf8];
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true), None],
);
assert_eq!(gt_eq_dyn_binary_scalar(&array, scalar).unwrap(), expected);
assert_eq!(
gt_eq_dyn_binary_scalar(&large_array, scalar).unwrap(),
expected
);
}
#[test]
fn test_eq_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = eq_dyn_utf8_scalar(&array, "xyz").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), Some(false), Some(true)])
);
}
#[test]
fn test_eq_dyn_utf8_scalar_with_dict() {
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("abc").unwrap();
builder.append_null();
builder.append("def").unwrap();
builder.append("def").unwrap();
builder.append("abc").unwrap();
let array = builder.finish();
let a_eq = eq_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(false), None, Some(true), Some(true), Some(false)]
)
);
}
#[test]
fn test_lt_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = lt_dyn_utf8_scalar(&array, "xyz").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), Some(true), Some(false)])
);
}
#[test]
fn test_lt_dyn_utf8_scalar_with_dict() {
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("abc").unwrap();
builder.append_null();
builder.append("def").unwrap();
builder.append("def").unwrap();
builder.append("abc").unwrap();
let array = builder.finish();
let a_eq = lt_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(true), None, Some(false), Some(false), Some(true)]
)
);
}
#[test]
fn test_lt_eq_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = lt_eq_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), Some(true), Some(false)])
);
}
#[test]
fn test_lt_eq_dyn_utf8_scalar_with_dict() {
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("abc").unwrap();
builder.append_null();
builder.append("def").unwrap();
builder.append("def").unwrap();
builder.append("xyz").unwrap();
let array = builder.finish();
let a_eq = lt_eq_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(true), None, Some(true), Some(true), Some(false)]
)
);
}
#[test]
fn test_gt_eq_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = gt_eq_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), Some(true), Some(true)])
);
}
#[test]
fn test_gt_eq_dyn_utf8_scalar_with_dict() {
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("abc").unwrap();
builder.append_null();
builder.append("def").unwrap();
builder.append("def").unwrap();
builder.append("xyz").unwrap();
let array = builder.finish();
let a_eq = gt_eq_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(false), None, Some(true), Some(true), Some(true)]
)
);
}
#[test]
fn test_gt_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = gt_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), Some(false), Some(true)])
);
}
#[test]
fn test_gt_dyn_utf8_scalar_with_dict() {
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("abc").unwrap();
builder.append_null();
builder.append("def").unwrap();
builder.append("def").unwrap();
builder.append("xyz").unwrap();
let array = builder.finish();
let a_eq = gt_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(false), None, Some(false), Some(false), Some(true)]
)
);
}
#[test]
fn test_neq_dyn_utf8_scalar() {
let array = StringArray::from(vec!["abc", "def", "xyz"]);
let a_eq = neq_dyn_utf8_scalar(&array, "xyz").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), Some(true), Some(false)])
);
}
#[test]
fn test_neq_dyn_utf8_scalar_with_dict() {
let mut builder = StringDictionaryBuilder::<Int8Type>::new();
builder.append("abc").unwrap();
builder.append_null();
builder.append("def").unwrap();
builder.append("def").unwrap();
builder.append("abc").unwrap();
let array = builder.finish();
let a_eq = neq_dyn_utf8_scalar(&array, "def").unwrap();
assert_eq!(
a_eq,
BooleanArray::from(
vec![Some(true), None, Some(false), Some(false), Some(true)]
)
);
}
#[test]
fn test_eq_dyn_bool_scalar() {
let array = BooleanArray::from(vec![true, false, true]);
let a_eq = eq_dyn_bool_scalar(&array, false).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), Some(true), Some(false)])
);
}
#[test]
fn test_lt_dyn_bool_scalar() {
let array = BooleanArray::from(vec![Some(true), Some(false), Some(true), None]);
let a_eq = lt_dyn_bool_scalar(&array, false).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), Some(false), Some(false), None])
);
}
#[test]
fn test_gt_dyn_bool_scalar() {
let array = BooleanArray::from(vec![true, false, true]);
let a_eq = gt_dyn_bool_scalar(&array, false).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), Some(false), Some(true)])
);
}
#[test]
fn test_lt_eq_dyn_bool_scalar() {
let array = BooleanArray::from(vec![true, false, true]);
let a_eq = lt_eq_dyn_bool_scalar(&array, false).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(false), Some(true), Some(false)])
);
}
#[test]
fn test_gt_eq_dyn_bool_scalar() {
let array = BooleanArray::from(vec![true, false, true]);
let a_eq = gt_eq_dyn_bool_scalar(&array, false).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), Some(true), Some(true)])
);
}
#[test]
fn test_neq_dyn_bool_scalar() {
let array = BooleanArray::from(vec![true, false, true]);
let a_eq = neq_dyn_bool_scalar(&array, false).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), Some(false), Some(true)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_i8_array() {
// Construct a value array
let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
let keys1 = Int8Array::from_iter_values([2_i8, 3, 4]);
let keys2 = Int8Array::from_iter_values([2_i8, 4, 4]);
let dict_array1 = DictionaryArray::try_new(&keys1, &values).unwrap();
let dict_array2 = DictionaryArray::try_new(&keys2, &values).unwrap();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![true, false, true]));
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![false, true, false])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_u64_array() {
let values = UInt64Array::from_iter_values([10_u64, 11, 12, 13, 14, 15, 16, 17]);
let keys1 = UInt64Array::from_iter_values([1_u64, 3, 4]);
let keys2 = UInt64Array::from_iter_values([2_u64, 3, 5]);
let dict_array1 =
DictionaryArray::<UInt64Type>::try_new(&keys1, &values).unwrap();
let dict_array2 =
DictionaryArray::<UInt64Type>::try_new(&keys2, &values).unwrap();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![false, true, false])
);
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![true, false, true]));
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_utf8_array() {
let test1 = vec!["a", "a", "b", "c"];
let test2 = vec!["a", "b", "b", "c"];
let dict_array1: DictionaryArray<Int8Type> = test1
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
let dict_array2: DictionaryArray<Int8Type> = test2
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(true)])
);
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(false)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_binary_array() {
let values: BinaryArray = ["hello", "", "parquet"]
.into_iter()
.map(|b| Some(b.as_bytes()))
.collect();
let keys1 = UInt64Array::from_iter_values([0_u64, 1, 2]);
let keys2 = UInt64Array::from_iter_values([0_u64, 2, 1]);
let dict_array1 =
DictionaryArray::<UInt64Type>::try_new(&keys1, &values).unwrap();
let dict_array2 =
DictionaryArray::<UInt64Type>::try_new(&keys2, &values).unwrap();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![true, false, false])
);
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![false, true, true]));
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_interval_array() {
let values = IntervalDayTimeArray::from(vec![1, 6, 10, 2, 3, 5]);
let keys1 = UInt64Array::from_iter_values([1_u64, 0, 3]);
let keys2 = UInt64Array::from_iter_values([2_u64, 0, 3]);
let dict_array1 =
DictionaryArray::<UInt64Type>::try_new(&keys1, &values).unwrap();
let dict_array2 =
DictionaryArray::<UInt64Type>::try_new(&keys2, &values).unwrap();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![false, true, true]));
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![true, false, false])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_date_array() {
let values = Date32Array::from(vec![1, 6, 10, 2, 3, 5]);
let keys1 = UInt64Array::from_iter_values([1_u64, 0, 3]);
let keys2 = UInt64Array::from_iter_values([2_u64, 0, 3]);
let dict_array1 =
DictionaryArray::<UInt64Type>::try_new(&keys1, &values).unwrap();
let dict_array2 =
DictionaryArray::<UInt64Type>::try_new(&keys2, &values).unwrap();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![false, true, true]));
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![true, false, false])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_bool_array() {
let values = BooleanArray::from(vec![true, false]);
let keys1 = UInt64Array::from_iter_values([1_u64, 1, 1]);
let keys2 = UInt64Array::from_iter_values([0_u64, 1, 0]);
let dict_array1 =
DictionaryArray::<UInt64Type>::try_new(&keys1, &values).unwrap();
let dict_array2 =
DictionaryArray::<UInt64Type>::try_new(&keys2, &values).unwrap();
let result = eq_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![false, true, false])
);
let result = neq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![true, false, true]));
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_gt_dyn_dictionary_i8_array() {
// Construct a value array
let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
let keys1 = Int8Array::from_iter_values([3_i8, 4, 4]);
let keys2 = Int8Array::from_iter_values([4_i8, 3, 4]);
let dict_array1 = DictionaryArray::try_new(&keys1, &values).unwrap();
let dict_array2 = DictionaryArray::try_new(&keys2, &values).unwrap();
let result = lt_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![true, false, false])
);
let result = lt_eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![true, false, true]));
let result = gt_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![false, true, false])
);
let result = gt_eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![false, true, true]));
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_gt_dyn_dictionary_bool_array() {
let values = BooleanArray::from(vec![true, false]);
let keys1 = UInt64Array::from_iter_values([1_u64, 1, 0]);
let keys2 = UInt64Array::from_iter_values([0_u64, 1, 1]);
let dict_array1 =
DictionaryArray::<UInt64Type>::try_new(&keys1, &values).unwrap();
let dict_array2 =
DictionaryArray::<UInt64Type>::try_new(&keys2, &values).unwrap();
let result = lt_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![true, false, false])
);
let result = lt_eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![true, true, false]));
let result = gt_dyn(&dict_array1, &dict_array2);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![false, false, true])
);
let result = gt_eq_dyn(&dict_array1, &dict_array2);
assert_eq!(result.unwrap(), BooleanArray::from(vec![false, true, true]));
}
#[test]
fn test_unary_cmp() {
let a = Int32Array::from(vec![Some(1), None, Some(2), Some(3)]);
let values = vec![1_i32, 3];
let a_eq = unary_cmp(&a, |a| values.contains(&a)).unwrap();
assert_eq!(
a_eq,
BooleanArray::from(vec![Some(true), None, Some(false), Some(true)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_i8_i8_array() {
let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
let dict_array = DictionaryArray::try_new(&keys, &values).unwrap();
let array = Int8Array::from_iter([Some(12_i8), None, Some(14)]);
let result = eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true)])
);
let result = eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true)])
);
let result = neq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false)])
);
let result = neq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_lt_eq_dyn_gt_dyn_gt_eq_dyn_dictionary_i8_i8_array() {
let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]);
let keys = Int8Array::from_iter_values([2_i8, 3, 4]);
let dict_array = DictionaryArray::try_new(&keys, &values).unwrap();
let array = Int8Array::from_iter([Some(12_i8), None, Some(11)]);
let result = lt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false)])
);
let result = lt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(true)])
);
let result = lt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(false)])
);
let result = lt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true)])
);
let result = gt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(true)])
);
let result = gt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false)])
);
let result = gt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true)])
);
let result = gt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(false)])
);
}
#[test]
fn test_eq_dyn_neq_dyn_float_nan() {
let array1: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let array2: Float32Array = vec![f32::NAN, f32::NAN, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
);
assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(false), Some(false)],
);
assert_eq!(neq_dyn(&array1, &array2).unwrap(), expected);
let array1: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let array2: Float64Array = vec![f64::NAN, f64::NAN, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
);
assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(false), Some(false)],
);
assert_eq!(neq_dyn(&array1, &array2).unwrap(), expected);
}
#[test]
fn test_lt_dyn_lt_eq_dyn_float_nan() {
let array1: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 11.0, f32::NAN]
.into_iter()
.map(Some)
.collect();
let array2: Float32Array = vec![f32::NAN, f32::NAN, 8.0, 9.0, 10.0, 1.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_eq_dyn(&array1, &array2).unwrap(), expected);
let array1: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 11.0, f64::NAN]
.into_iter()
.map(Some)
.collect();
let array2: Float64Array = vec![f64::NAN, f64::NAN, 8.0, 9.0, 10.0, 1.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_eq_dyn(&array1, &array2).unwrap(), expected);
}
#[test]
fn test_gt_dyn_gt_eq_dyn_float_nan() {
let array1: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 11.0, f32::NAN]
.into_iter()
.map(Some)
.collect();
let array2: Float32Array = vec![f32::NAN, f32::NAN, 8.0, 9.0, 10.0, 1.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_eq_dyn(&array1, &array2).unwrap(), expected);
let array1: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 11.0, f64::NAN]
.into_iter()
.map(Some)
.collect();
let array2: Float64Array = vec![f64::NAN, f64::NAN, 8.0, 9.0, 10.0, 1.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_eq_dyn(&array1, &array2).unwrap(), expected);
}
#[test]
fn test_eq_dyn_scalar_neq_dyn_scalar_float_nan() {
let array: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(false), Some(false), Some(false)],
);
assert_eq!(eq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
);
assert_eq!(neq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
let array: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(false), Some(false), Some(false)],
);
assert_eq!(eq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
);
assert_eq!(neq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
}
#[test]
fn test_lt_dyn_scalar_lt_eq_dyn_scalar_float_nan() {
let array: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
);
assert_eq!(lt_dyn_scalar(&array, f32::NAN).unwrap(), expected);
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
);
assert_eq!(lt_eq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
let array: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(true), Some(true), Some(true)],
);
assert_eq!(lt_dyn_scalar(&array, f64::NAN).unwrap(), expected);
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(true)],
);
assert_eq!(lt_eq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
}
#[test]
fn test_gt_dyn_scalar_gt_eq_dyn_scalar_float_nan() {
let array: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
assert_eq!(gt_dyn_scalar(&array, f32::NAN).unwrap(), expected);
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(false), Some(false), Some(false)],
);
assert_eq!(gt_eq_dyn_scalar(&array, f32::NAN).unwrap(), expected);
let array: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
assert_eq!(gt_dyn_scalar(&array, f64::NAN).unwrap(), expected);
#[cfg(feature = "simd")]
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(false)],
);
#[cfg(not(feature = "simd"))]
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(false), Some(false), Some(false)],
);
assert_eq!(gt_eq_dyn_scalar(&array, f64::NAN).unwrap(), expected);
}
#[test]
fn test_dict_like_kernels() {
let data =
vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
like_dict_scalar(&dict_array, "Air").unwrap(),
BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(true), None, Some(true)]
),
);
assert_eq!(
like_dict_scalar(&dict_array, "Wa%").unwrap(),
BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(false), None, Some(false)]
),
);
assert_eq!(
like_dict_scalar(&dict_array, "%r").unwrap(),
BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(true), None, Some(true)]
),
);
assert_eq!(
like_dict_scalar(&dict_array, "%i%").unwrap(),
BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), None, Some(true)]
),
);
assert_eq!(
like_dict_scalar(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
),
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_to_utf8_array() {
let test1 = vec!["a", "a", "b", "c"];
let test2 = vec!["a", "b", "b", "d"];
let dict_array: DictionaryArray<Int8Type> = test1
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
let array: StringArray = test2
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
let result = eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = neq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
let result = neq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_lt_eq_dyn_gt_dyn_gt_eq_dyn_dictionary_to_utf8_array() {
let test1 = vec!["abc", "abc", "b", "cde"];
let test2 = vec!["abc", "b", "b", "def"];
let dict_array: DictionaryArray<Int8Type> = test1
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
let array: StringArray = test2
.iter()
.map(|&x| if x == "b" { None } else { Some(x) })
.collect();
let result = lt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
let result = lt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(false)])
);
let result = lt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(true)])
);
let result = lt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = gt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(false)])
);
let result = gt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
let result = gt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = gt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(true)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_to_binary_array() {
let values: BinaryArray = ["hello", "", "parquet"]
.into_iter()
.map(|b| Some(b.as_bytes()))
.collect();
let keys = UInt64Array::from(vec![Some(0_u64), None, Some(2), Some(2)]);
let dict_array = DictionaryArray::<UInt64Type>::try_new(&keys, &values).unwrap();
let array: BinaryArray = ["hello", "", "parquet", "test"]
.into_iter()
.map(|b| Some(b.as_bytes()))
.collect();
let result = eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true), Some(false)])
);
let result = eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true), Some(false)])
);
let result = neq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false), Some(true)])
);
let result = neq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false), Some(true)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_lt_eq_dyn_gt_dyn_gt_eq_dyn_dictionary_to_binary_array() {
let values: BinaryArray = ["hello", "", "parquet"]
.into_iter()
.map(|b| Some(b.as_bytes()))
.collect();
let keys = UInt64Array::from(vec![Some(0_u64), None, Some(2), Some(2)]);
let dict_array = DictionaryArray::<UInt64Type>::try_new(&keys, &values).unwrap();
let array: BinaryArray = ["hello", "", "parquet", "test"]
.into_iter()
.map(|b| Some(b.as_bytes()))
.collect();
let result = lt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false), Some(true)])
);
let result = lt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false), Some(false)])
);
let result = lt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true), Some(true)])
);
let result = lt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true), Some(false)])
);
let result = gt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false), Some(false)])
);
let result = gt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, Some(false), Some(true)])
);
let result = gt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true), Some(false)])
);
let result = gt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, Some(true), Some(true)])
);
}
#[test]
fn test_dict_nlike_kernels() {
let data =
vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
nlike_dict_scalar(&dict_array, "Air").unwrap(),
BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(false), None, Some(false)]
),
);
assert_eq!(
nlike_dict_scalar(&dict_array, "Wa%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(true), None, Some(true)]
),
);
assert_eq!(
nlike_dict_scalar(&dict_array, "%r").unwrap(),
BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
),
);
assert_eq!(
nlike_dict_scalar(&dict_array, "%i%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
),
);
assert_eq!(
nlike_dict_scalar(&dict_array, "%a%r%").unwrap(),
BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), None, Some(true)]
),
);
}
#[test]
fn test_dict_ilike_kernels() {
let data =
vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
ilike_dict_scalar(&dict_array, "air").unwrap(),
BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(true), None, Some(true)]
),
);
assert_eq!(
ilike_dict_scalar(&dict_array, "wa%").unwrap(),
BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(false), None, Some(false)]
),
);
assert_eq!(
ilike_dict_scalar(&dict_array, "%R").unwrap(),
BooleanArray::from(
vec![Some(false), Some(false), Some(true), Some(true), None, Some(true)]
),
);
assert_eq!(
ilike_dict_scalar(&dict_array, "%I%").unwrap(),
BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), None, Some(true)]
),
);
assert_eq!(
ilike_dict_scalar(&dict_array, "%A%r%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(true), None, Some(true)]
),
);
}
#[test]
fn test_dict_nilike_kernels() {
let data =
vec![Some("Earth"), Some("Fire"), Some("Water"), Some("Air"), None, Some("Air")];
let dict_array: DictionaryArray<Int8Type> = data.into_iter().collect();
assert_eq!(
nilike_dict_scalar(&dict_array, "air").unwrap(),
BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(false), None, Some(false)]
),
);
assert_eq!(
nilike_dict_scalar(&dict_array, "wa%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(true), None, Some(true)]
),
);
assert_eq!(
nilike_dict_scalar(&dict_array, "%R").unwrap(),
BooleanArray::from(
vec![Some(true), Some(true), Some(false), Some(false), None, Some(false)]
),
);
assert_eq!(
nilike_dict_scalar(&dict_array, "%I%").unwrap(),
BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), None, Some(false)]
),
);
assert_eq!(
nilike_dict_scalar(&dict_array, "%A%r%").unwrap(),
BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(false), None, Some(false)]
),
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dict_non_dict_float_nan() {
let array1: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let values = Float32Array::from(vec![f32::NAN, 8.0, 10.0]);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 1, 2]);
let array2 = DictionaryArray::try_new(&keys, &values).unwrap();
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
);
assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(false), Some(false)],
);
assert_eq!(neq_dyn(&array1, &array2).unwrap(), expected);
let array1: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 10.0]
.into_iter()
.map(Some)
.collect();
let values = Float64Array::from(vec![f64::NAN, 8.0, 10.0]);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 1, 2]);
let array2 = DictionaryArray::try_new(&keys, &values).unwrap();
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(true), Some(true)],
);
assert_eq!(eq_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(false), Some(false)],
);
assert_eq!(neq_dyn(&array1, &array2).unwrap(), expected);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_lt_eq_dyn_dict_non_dict_float_nan() {
let array1: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 11.0, f32::NAN]
.into_iter()
.map(Some)
.collect();
let values = Float32Array::from(vec![f32::NAN, 8.0, 9.0, 10.0, 1.0]);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
let array2 = DictionaryArray::try_new(&keys, &values).unwrap();
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_eq_dyn(&array1, &array2).unwrap(), expected);
let array1: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 11.0, f64::NAN]
.into_iter()
.map(Some)
.collect();
let values = Float64Array::from(vec![f64::NAN, 8.0, 9.0, 10.0, 1.0]);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
let array2 = DictionaryArray::try_new(&keys, &values).unwrap();
let expected = BooleanArray::from(
vec![Some(false), Some(true), Some(false), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(true), Some(true), Some(true), Some(false), Some(false)],
);
assert_eq!(lt_eq_dyn(&array1, &array2).unwrap(), expected);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_gt_dyn_gt_eq_dyn_dict_non_dict_float_nan() {
let array1: Float32Array = vec![f32::NAN, 7.0, 8.0, 8.0, 11.0, f32::NAN]
.into_iter()
.map(Some)
.collect();
let values = Float32Array::from(vec![f32::NAN, 8.0, 9.0, 10.0, 1.0]);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
let array2 = DictionaryArray::try_new(&keys, &values).unwrap();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_eq_dyn(&array1, &array2).unwrap(), expected);
let array1: Float64Array = vec![f64::NAN, 7.0, 8.0, 8.0, 11.0, f64::NAN]
.into_iter()
.map(Some)
.collect();
let values = Float64Array::from(vec![f64::NAN, 8.0, 9.0, 10.0, 1.0]);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2, 3, 4]);
let array2 = DictionaryArray::try_new(&keys, &values).unwrap();
let expected = BooleanArray::from(
vec![Some(false), Some(false), Some(false), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_dyn(&array1, &array2).unwrap(), expected);
let expected = BooleanArray::from(
vec![Some(true), Some(false), Some(true), Some(false), Some(true), Some(true)],
);
assert_eq!(gt_eq_dyn(&array1, &array2).unwrap(), expected);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_eq_dyn_neq_dyn_dictionary_to_boolean_array() {
let test1 = vec![Some(true), None, Some(false)];
let test2 = vec![Some(true), None, None, Some(true)];
let values = BooleanArray::from(test1);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2]);
let dict_array = DictionaryArray::try_new(&keys, &values).unwrap();
let array: BooleanArray = test2.iter().collect();
let result = eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = neq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
let result = neq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
}
#[test]
#[cfg(feature = "dyn_cmp_dict")]
fn test_lt_dyn_lt_eq_dyn_gt_dyn_gt_eq_dyn_dictionary_to_boolean_array() {
let test1 = vec![Some(true), None, Some(false)];
let test2 = vec![Some(true), None, None, Some(true)];
let values = BooleanArray::from(test1);
let keys = Int8Array::from_iter_values([0_i8, 0, 1, 2]);
let dict_array = DictionaryArray::try_new(&keys, &values).unwrap();
let array: BooleanArray = test2.iter().collect();
let result = lt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
let result = lt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(false)])
);
let result = lt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(true)])
);
let result = lt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = gt_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(false)])
);
let result = gt_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(false), None, None, Some(true)])
);
let result = gt_eq_dyn(&dict_array, &array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(false)])
);
let result = gt_eq_dyn(&array, &dict_array);
assert_eq!(
result.unwrap(),
BooleanArray::from(vec![Some(true), None, None, Some(true)])
);
}
}