ARROW-9897: [C++][Gandiva] Added to_date function
signature: date64 to_date(utf8, utf8)
Closes #8095 from projjal/todate and squashes the following commits:
f896d46c6 <Projjal Chanda> added to_date function
Authored-by: Projjal Chanda <iam@pchanda.com>
Signed-off-by: Praveen <praveen@dremio.com>
diff --git a/cpp/src/gandiva/function_registry_datetime.cc b/cpp/src/gandiva/function_registry_datetime.cc
index cd4ae00..0688970 100644
--- a/cpp/src/gandiva/function_registry_datetime.cc
+++ b/cpp/src/gandiva/function_registry_datetime.cc
@@ -16,6 +16,7 @@
// under the License.
#include "gandiva/function_registry_datetime.h"
+
#include "gandiva/function_registry_common.h"
namespace gandiva {
@@ -56,6 +57,12 @@
kResultNullIfNull, "castVARCHAR_timestamp_int64",
NativeFunction::kNeedsContext),
+ NativeFunction("to_date", {}, DataTypeVector{utf8(), utf8()}, date64(),
+ kResultNullInternal, "gdv_fn_to_date_utf8_utf8",
+ NativeFunction::kNeedsContext |
+ NativeFunction::kNeedsFunctionHolder |
+ NativeFunction::kCanReturnErrors),
+
NativeFunction("to_date", {}, DataTypeVector{utf8(), utf8(), int32()}, date64(),
kResultNullInternal, "gdv_fn_to_date_utf8_utf8_int32",
NativeFunction::kNeedsContext |
diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc
index 042f525..ad3036f 100644
--- a/cpp/src/gandiva/gdv_function_stubs.cc
+++ b/cpp/src/gandiva/gdv_function_stubs.cc
@@ -49,6 +49,16 @@
return (*holder)();
}
+int64_t gdv_fn_to_date_utf8_utf8(int64_t context_ptr, int64_t holder_ptr,
+ const char* data, int data_len, bool in1_validity,
+ const char* pattern, int pattern_len, bool in2_validity,
+ bool* out_valid) {
+ gandiva::ExecutionContext* context =
+ reinterpret_cast<gandiva::ExecutionContext*>(context_ptr);
+ gandiva::ToDateHolder* holder = reinterpret_cast<gandiva::ToDateHolder*>(holder_ptr);
+ return (*holder)(context, data, data_len, in1_validity, out_valid);
+}
+
int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context_ptr, int64_t holder_ptr,
const char* data, int data_len, bool in1_validity,
const char* pattern, int pattern_len,
@@ -187,6 +197,21 @@
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_like_utf8_utf8));
+ // gdv_fn_to_date_utf8_utf8
+ args = {types->i64_type(), // int64_t execution_context
+ types->i64_type(), // int64_t holder_ptr
+ types->i8_ptr_type(), // const char* data
+ types->i32_type(), // int data_len
+ types->i1_type(), // bool in1_validity
+ types->i8_ptr_type(), // const char* pattern
+ types->i32_type(), // int pattern_len
+ types->i1_type(), // bool in2_validity
+ types->ptr_type(types->i8_type())}; // bool* out_valid
+
+ engine->AddGlobalMappingForFunc("gdv_fn_to_date_utf8_utf8",
+ types->i64_type() /*return_type*/, args,
+ reinterpret_cast<void*>(gdv_fn_to_date_utf8_utf8));
+
// gdv_fn_to_date_utf8_utf8_int32
args = {types->i64_type(), // int64_t execution_context
types->i64_type(), // int64_t holder_ptr
diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc
index b55093e..1ac04cd 100644
--- a/cpp/src/gandiva/tests/projector_test.cc
+++ b/cpp/src/gandiva/tests/projector_test.cc
@@ -15,13 +15,15 @@
// specific language governing permissions and limitations
// under the License.
-#include <cmath>
+#include "gandiva/projector.h"
#include <gtest/gtest.h>
-#include "arrow/memory_pool.h"
+#include <cmath>
-#include "gandiva/projector.h"
+#include "arrow/memory_pool.h"
+#include "gandiva/literal_holder.h"
+#include "gandiva/node.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
@@ -766,4 +768,45 @@
EXPECT_ARROW_ARRAY_EQUALS(exp_sum, outputs.at(0));
}
+TEST_F(TestProjector, TestToDate) {
+ // schema for input fields
+ auto field0 = field("f0", arrow::utf8());
+ auto field_node = std::make_shared<FieldNode>(field0);
+ auto schema = arrow::schema({field0});
+
+ // output fields
+ auto field_result = field("res", arrow::date64());
+
+ auto pattern_node =
+ std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder("YYYY-MM-DD"), false);
+
+ // Build expression
+ auto fn_node = TreeExprBuilder::MakeFunction("to_date", {field_node, pattern_node},
+ arrow::date64());
+ auto expr = TreeExprBuilder::MakeExpression(fn_node, field_result);
+
+ // Build a projector for the expressions.
+ std::shared_ptr<Projector> projector;
+ auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
+ EXPECT_TRUE(status.ok());
+
+ // Create a row-batch with some sample data
+ int num_records = 3;
+ auto array0 =
+ MakeArrowArrayUtf8({"1986-12-01", "2012-12-01", "invalid"}, {true, true, false});
+ // expected output
+ auto exp = MakeArrowArrayDate64({533779200000, 1354320000000, 0}, {true, true, false});
+
+ // prepare input record batch
+ auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
+
+ // Evaluate expression
+ arrow::ArrayVector outputs;
+ status = projector->Evaluate(*in_batch, pool_, &outputs);
+ EXPECT_TRUE(status.ok());
+
+ // Validate results
+ EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
+}
+
} // namespace gandiva
diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h
index da2cd69..5427043 100644
--- a/cpp/src/gandiva/tests/test_util.h
+++ b/cpp/src/gandiva/tests/test_util.h
@@ -19,6 +19,7 @@
#include <memory>
#include <utility>
#include <vector>
+
#include "arrow/testing/gtest_util.h"
#include "gandiva/arrow.h"
#include "gandiva/configuration.h"
@@ -76,6 +77,7 @@
#define MakeArrowArrayUint64 MakeArrowArray<arrow::UInt64Type, uint64_t>
#define MakeArrowArrayFloat32 MakeArrowArray<arrow::FloatType, float>
#define MakeArrowArrayFloat64 MakeArrowArray<arrow::DoubleType, double>
+#define MakeArrowArrayDate64 MakeArrowArray<arrow::Date64Type, int64_t>
#define MakeArrowArrayUtf8 MakeArrowArray<arrow::StringType, std::string>
#define MakeArrowArrayBinary MakeArrowArray<arrow::BinaryType, std::string>
#define MakeArrowArrayDecimal MakeArrowArray<arrow::Decimal128Type, arrow::Decimal128>
diff --git a/cpp/src/gandiva/to_date_holder.cc b/cpp/src/gandiva/to_date_holder.cc
index 1a75f57..1b7e286 100644
--- a/cpp/src/gandiva/to_date_holder.cc
+++ b/cpp/src/gandiva/to_date_holder.cc
@@ -15,23 +15,23 @@
// specific language governing permissions and limitations
// under the License.
+#include "gandiva/to_date_holder.h"
+
#include <algorithm>
#include <string>
#include "arrow/util/value_parsing.h"
#include "arrow/vendored/datetime.h"
-
#include "gandiva/date_utils.h"
#include "gandiva/execution_context.h"
#include "gandiva/node.h"
-#include "gandiva/to_date_holder.h"
namespace gandiva {
Status ToDateHolder::Make(const FunctionNode& node,
std::shared_ptr<ToDateHolder>* holder) {
- if (node.children().size() != 3) {
- return Status::Invalid("'to_date' function requires three parameters");
+ if (node.children().size() != 2 && node.children().size() != 3) {
+ return Status::Invalid("'to_date' function requires two or three parameters");
}
auto literal_pattern = dynamic_cast<LiteralNode*>(node.children().at(1).get());
@@ -47,18 +47,25 @@
}
auto pattern = arrow::util::get<std::string>(literal_pattern->holder());
- auto literal_suppress_errors = dynamic_cast<LiteralNode*>(node.children().at(2).get());
- if (literal_pattern == nullptr) {
- return Status::Invalid(
- "'to_date' function requires a int literal as the third parameter");
+ int suppress_errors = 0;
+ if (node.children().size() == 3) {
+ auto literal_suppress_errors =
+ dynamic_cast<LiteralNode*>(node.children().at(2).get());
+ if (literal_pattern == nullptr) {
+ return Status::Invalid(
+ "The (optional) third parameter to 'to_date' function needs to an integer "
+ "literal to indicate whether to suppress the error");
+ }
+
+ literal_type = literal_suppress_errors->return_type()->id();
+ if (literal_type != arrow::Type::INT32) {
+ return Status::Invalid(
+ "The (optional) third parameter to 'to_date' function needs to an integer "
+ "literal to indicate whether to suppress the error");
+ }
+ suppress_errors = arrow::util::get<int>(literal_suppress_errors->holder());
}
- literal_type = literal_suppress_errors->return_type()->id();
- if (literal_type != arrow::Type::INT32) {
- return Status::Invalid(
- "'to_date' function requires a int literal as the third parameter");
- }
- auto suppress_errors = arrow::util::get<int>(literal_suppress_errors->holder());
return Make(pattern, suppress_errors, holder);
}