Apply non-nested kernel for non-nested in `array_has` and `inlist` (#12164)
* use non-nested kernel for non-nested
Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
* check is_nested outside of func
Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
---------
Signed-off-by: jayzhan211 <jayzhan211@gmail.com>
diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs
index 9b4357d..7f66eba 100644
--- a/datafusion/functions-nested/src/array_has.rs
+++ b/datafusion/functions-nested/src/array_has.rs
@@ -24,9 +24,8 @@
use datafusion_common::cast::as_generic_list_array;
use datafusion_common::utils::string_utils::string_array_to_vec;
use datafusion_common::{exec_err, Result, ScalarValue};
-use datafusion_expr::{ColumnarValue, Operator, ScalarUDFImpl, Signature, Volatility};
-
-use datafusion_physical_expr_common::datum::compare_op_for_nested;
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_physical_expr_common::datum::compare_with_eq;
use itertools::Itertools;
use crate::utils::make_scalar_function;
@@ -180,8 +179,9 @@
continue;
}
let arr = arr.unwrap();
+ let is_nested = arr.data_type().is_nested();
let needle_row = Scalar::new(needle.slice(i, 1));
- let eq_array = compare_op_for_nested(Operator::Eq, &arr, &needle_row)?;
+ let eq_array = compare_with_eq(&arr, &needle_row, is_nested)?;
let is_contained = eq_array.true_count() > 0;
boolean_builder.append_value(is_contained)
}
@@ -195,13 +195,14 @@
) -> Result<ArrayRef> {
let haystack = as_generic_list_array::<O>(haystack)?;
let values = haystack.values();
+ let is_nested = values.data_type().is_nested();
let offsets = haystack.value_offsets();
// If first argument is empty list (second argument is non-null), return false
// i.e. array_has([], non-null element) -> false
if values.len() == 0 {
return Ok(Arc::new(BooleanArray::from(vec![Some(false)])));
}
- let eq_array = compare_op_for_nested(Operator::Eq, values, needle)?;
+ let eq_array = compare_with_eq(values, needle, is_nested)?;
let mut final_contained = vec![None; haystack.len()];
for (i, offset) in offsets.windows(2).enumerate() {
let start = offset[0].to_usize().unwrap();
diff --git a/datafusion/physical-expr-common/src/datum.rs b/datafusion/physical-expr-common/src/datum.rs
index 96c08d0..c47ec9d 100644
--- a/datafusion/physical-expr-common/src/datum.rs
+++ b/datafusion/physical-expr-common/src/datum.rs
@@ -20,7 +20,8 @@
use arrow::buffer::NullBuffer;
use arrow::compute::SortOptions;
use arrow::error::ArrowError;
-use datafusion_common::internal_err;
+use datafusion_common::DataFusionError;
+use datafusion_common::{arrow_datafusion_err, internal_err};
use datafusion_common::{Result, ScalarValue};
use datafusion_expr_common::columnar_value::ColumnarValue;
use datafusion_expr_common::operator::Operator;
@@ -87,6 +88,19 @@
}
}
+/// Compare with eq with either nested or non-nested
+pub fn compare_with_eq(
+ lhs: &dyn Datum,
+ rhs: &dyn Datum,
+ is_nested: bool,
+) -> Result<BooleanArray> {
+ if is_nested {
+ compare_op_for_nested(Operator::Eq, lhs, rhs)
+ } else {
+ arrow::compute::kernels::cmp::eq(lhs, rhs).map_err(|e| arrow_datafusion_err!(e))
+ }
+}
+
/// Compare on nested type List, Struct, and so on
pub fn compare_op_for_nested(
op: Operator,
diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs
index dfc7055..0a3e5fc 100644
--- a/datafusion/physical-expr/src/expressions/in_list.rs
+++ b/datafusion/physical-expr/src/expressions/in_list.rs
@@ -40,8 +40,8 @@
use datafusion_common::{
exec_err, internal_err, not_impl_err, DFSchema, Result, ScalarValue,
};
-use datafusion_expr::{ColumnarValue, Operator};
-use datafusion_physical_expr_common::datum::compare_op_for_nested;
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr_common::datum::compare_with_eq;
use ahash::RandomState;
use hashbrown::hash_map::RawEntryMut;
@@ -356,17 +356,16 @@
Some(f) => f.contains(value.into_array(num_rows)?.as_ref(), self.negated)?,
None => {
let value = value.into_array(num_rows)?;
+ let is_nested = value.data_type().is_nested();
let found = self.list.iter().map(|expr| expr.evaluate(batch)).try_fold(
BooleanArray::new(BooleanBuffer::new_unset(num_rows), None),
|result, expr| -> Result<BooleanArray> {
- Ok(or_kleene(
- &result,
- &compare_op_for_nested(
- Operator::Eq,
- &value,
- &expr?.into_array(num_rows)?,
- )?,
- )?)
+ let rhs = compare_with_eq(
+ &value,
+ &expr?.into_array(num_rows)?,
+ is_nested,
+ )?;
+ Ok(or_kleene(&result, &rhs)?)
},
)?;