blob: 1e408f0e43102019325ce776d4ad14a50b266c37 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gtest/gtest.h>
#include "arrow/memory_pool.h"
#include "arrow/status.h"
#include "arrow/util/logging_internal.h"
#include "gandiva/projector.h"
#include "gandiva/tests/test_util.h"
#include "gandiva/tree_expr_builder.h"
namespace gandiva {
using arrow::boolean;
using arrow::date64;
using arrow::int32;
using arrow::int64;
using arrow::utf8;
class TestUtf8 : public ::testing::Test {
public:
void SetUp() { pool_ = arrow::default_memory_pool(); }
protected:
arrow::MemoryPool* pool_;
};
TEST_F(TestUtf8, TestSimple) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res_1 = field("res1", int32());
auto res_2 = field("res2", boolean());
auto res_3 = field("res3", int32());
// build expressions.
// octet_length(a)
// octet_length(a) == bit_length(a) / 8
// length(a)
auto expr_a = TreeExprBuilder::MakeExpression("octet_length", {field_a}, res_1);
auto node_a = TreeExprBuilder::MakeField(field_a);
auto octet_length = TreeExprBuilder::MakeFunction("octet_length", {node_a}, int32());
auto literal_8 = TreeExprBuilder::MakeLiteral((int32_t)8);
auto bit_length = TreeExprBuilder::MakeFunction("bit_length", {node_a}, int32());
auto div_8 = TreeExprBuilder::MakeFunction("divide", {bit_length, literal_8}, int32());
auto is_equal =
TreeExprBuilder::MakeFunction("equal", {octet_length, div_8}, boolean());
auto expr_b = TreeExprBuilder::MakeExpression(is_equal, res_2);
auto expr_c = TreeExprBuilder::MakeExpression("length", {field_a}, res_3);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status =
Projector::Make(schema, {expr_a, expr_b, expr_c}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 5;
auto array_a = MakeArrowArrayUtf8({"foo", "hello", "bye", "hi", "मदन"},
{true, true, false, true, true});
// expected output
auto exp_1 = MakeArrowArrayInt32({3, 5, 0, 2, 9}, {true, true, false, true, true});
auto exp_2 = MakeArrowArrayBool({true, true, false, true, true},
{true, true, false, true, true});
auto exp_3 = MakeArrowArrayInt32({3, 5, 0, 2, 3}, {true, true, false, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_1, outputs.at(0));
EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs.at(1));
EXPECT_ARROW_ARRAY_EQUALS(exp_3, outputs.at(2));
}
TEST_F(TestUtf8, TestLiteral) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res = field("res", boolean());
// build expressions.
// a == literal(s)
auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_s = TreeExprBuilder::MakeStringLiteral("hello");
auto is_equal = TreeExprBuilder::MakeFunction("equal", {node_a, literal_s}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_equal, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a =
MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {true, true, true, false});
// expected output
auto exp = MakeArrowArrayBool({false, true, false, false}, {true, true, true, false});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestNullLiteral) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res = field("res", boolean());
// build expressions.
// a == literal(null)
auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_null = TreeExprBuilder::MakeNull(arrow::utf8());
auto is_equal =
TreeExprBuilder::MakeFunction("equal", {node_a, literal_null}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_equal, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a =
MakeArrowArrayUtf8({"foo", "hello", "bye", "hi"}, {true, true, true, false});
// expected output
auto exp =
MakeArrowArrayBool({false, false, false, false}, {false, false, false, false});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok());
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestLike) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res = field("res", boolean());
// build expressions.
// like(literal(s), a)
auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_s = TreeExprBuilder::MakeStringLiteral("%spark%");
auto is_like = TreeExprBuilder::MakeFunction("like", {node_a, literal_s}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_like, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "spark"},
{true, true, true, true});
// expected output
auto exp = MakeArrowArrayBool({false, true, true, true}, {true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestIlike) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res = field("res", boolean());
// build expressions.
// ilike(literal(s), a)
auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_s = TreeExprBuilder::MakeStringLiteral("%sparK%");
auto is_like = TreeExprBuilder::MakeFunction("ilike", {node_a, literal_s}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_like, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"park", "sParKle", "bright spark and fire", "spark"},
{true, true, true, true});
// expected output
auto exp = MakeArrowArrayBool({false, true, true, true}, {true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestLikeWithEscape) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res = field("res", boolean());
// build expressions.
// like(literal(s), a, '\')
auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_s = TreeExprBuilder::MakeStringLiteral("%pa\\%rk%");
auto escape_char = TreeExprBuilder::MakeStringLiteral("\\");
auto is_like =
TreeExprBuilder::MakeFunction("like", {node_a, literal_s, escape_char}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_like, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8(
{"park", "spa%rkle", "bright spa%rk and fire", "spark"}, {true, true, true, true});
// expected output
auto exp = MakeArrowArrayBool({false, true, true, false}, {true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestBeginsEnds) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res1 = field("res1", boolean());
auto res2 = field("res2", boolean());
// build expressions.
// like(literal("spark%"), a)
// like(literal("%spark"), a)
auto node_a = TreeExprBuilder::MakeField(field_a);
auto literal_begin = TreeExprBuilder::MakeStringLiteral("spark%");
auto is_like1 =
TreeExprBuilder::MakeFunction("like", {node_a, literal_begin}, boolean());
auto expr1 = TreeExprBuilder::MakeExpression(is_like1, res1);
auto literal_end = TreeExprBuilder::MakeStringLiteral("%spark");
auto is_like2 = TreeExprBuilder::MakeFunction("like", {node_a, literal_end}, boolean());
auto expr2 = TreeExprBuilder::MakeExpression(is_like2, res2);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr1, expr2}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a =
MakeArrowArrayUtf8({"park", "sparkle", "bright spark and fire", "fiery spark"},
{true, true, true, true});
// expected output
auto exp1 = MakeArrowArrayBool({false, true, false, false}, {true, true, true, true});
auto exp2 = MakeArrowArrayBool({false, false, false, true}, {true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp1, outputs.at(0));
EXPECT_ARROW_ARRAY_EQUALS(exp2, outputs.at(1));
}
TEST_F(TestUtf8, TestInternalAllocs) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res = field("res", boolean());
// build expressions.
// like(upper(a), literal("%SPARK%"))
auto node_a = TreeExprBuilder::MakeField(field_a);
auto upper_a = TreeExprBuilder::MakeFunction("upper", {node_a}, utf8());
auto literal_spark = TreeExprBuilder::MakeStringLiteral("%SPARK%");
auto is_like =
TreeExprBuilder::MakeFunction("like", {upper_a, literal_spark}, boolean());
auto expr = TreeExprBuilder::MakeExpression(is_like, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 5;
auto array_a = MakeArrowArrayUtf8(
{"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
{true, true, false, true, true});
// expected output
auto exp = MakeArrowArrayBool({false, true, false, true, false},
{true, true, false, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestCastDate) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res_1 = field("res1", int64());
// build expressions.
// extractYear(castDATE(a))
auto node_a = TreeExprBuilder::MakeField(field_a);
auto cast_function = TreeExprBuilder::MakeFunction("castDATE", {node_a}, date64());
auto extract_year =
TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
{true, true, false, true});
// expected output
auto exp_1 = MakeArrowArrayInt64({1967, 2067, 0, 0}, {true, true, false, false});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_EQ(status.code(), StatusCode::ExecutionError);
std::string expected_error = "Not a valid date value ";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos);
auto array_a_2 = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "67-1-1", "91-1-1"},
{true, true, true, true});
auto exp_2 = MakeArrowArrayInt64({1967, 2067, 2067, 1991}, {true, true, true, true});
auto in_batch_2 = arrow::RecordBatch::Make(schema, num_records, {array_a_2});
arrow::ArrayVector outputs2;
status = projector->Evaluate(*in_batch_2, pool_, &outputs2);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs2.at(0));
}
TEST_F(TestUtf8, TestToDateNoError) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res_1 = field("res1", int64());
// build expressions.
// extractYear(castDATE(a))
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_b = TreeExprBuilder::MakeStringLiteral("YYYY-MM-DD");
auto node_c = TreeExprBuilder::MakeLiteral(1);
auto cast_function =
TreeExprBuilder::MakeFunction("to_date", {node_a, node_b, node_c}, date64());
auto extract_year =
TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
{true, true, false, true});
// expected output
auto exp_1 = MakeArrowArrayInt64({1967, 67, 0, 0}, {true, true, false, false});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
EXPECT_ARROW_ARRAY_EQUALS(exp_1, outputs.at(0));
// Create a row-batch with some sample data
auto array_a_2 = MakeArrowArrayUtf8(
{"1967-12-1", "1967-12-01", "1967-11-11", "1991-11-11"}, {true, true, true, true});
auto exp_2 = MakeArrowArrayInt64({1967, 1967, 1967, 1991}, {true, true, true, true});
auto in_batch_2 = arrow::RecordBatch::Make(schema, num_records, {array_a_2});
arrow::ArrayVector outputs2;
status = projector->Evaluate(*in_batch_2, pool_, &outputs2);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_2, outputs2.at(0));
}
TEST_F(TestUtf8, TestToDateError) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// output fields
auto res_1 = field("res1", int64());
// build expressions.
// extractYear(castDATE(a))
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_b = TreeExprBuilder::MakeStringLiteral("YYYY-MM-DD");
auto node_c = TreeExprBuilder::MakeLiteral(0);
auto cast_function =
TreeExprBuilder::MakeFunction("to_date", {node_a, node_b, node_c}, date64());
auto extract_year =
TreeExprBuilder::MakeFunction("extractYear", {cast_function}, int64());
auto expr = TreeExprBuilder::MakeExpression(extract_year, res_1);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"1967-12-1", "67-12-01", "incorrect", "67-45-11"},
{true, true, false, true});
// expected output
auto exp_1 = MakeArrowArrayInt64({1967, 67, 0, 0}, {true, true, false, false});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_EQ(status.code(), StatusCode::ExecutionError);
std::string expected_error = "Error parsing value 67-45-11 for given format";
EXPECT_TRUE(status.message().find(expected_error) != std::string::npos)
<< status.message();
}
TEST_F(TestUtf8, TestIsNull) {
// schema for input fields
auto field_a = field("a", utf8());
auto schema = arrow::schema({field_a});
// build expressions
auto exprs = std::vector<ExpressionPtr>{
TreeExprBuilder::MakeExpression("isnull", {field_a}, field("is_null", boolean())),
TreeExprBuilder::MakeExpression("isnotnull", {field_a},
field("is_not_null", boolean())),
};
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, exprs, TestConfiguration(), &projector);
DCHECK_OK(status);
// Create a row-batch with some sample data
int num_records = 4;
auto array_a = MakeArrowArrayUtf8({"hello", "world", "incorrect", "universe"},
{true, true, false, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
// validate results
EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({false, false, true, false}),
outputs[0]); // isnull
EXPECT_ARROW_ARRAY_EQUALS(MakeArrowArrayBool({true, true, false, true}),
outputs[1]); // isnotnull
}
TEST_F(TestUtf8, TestVarlenOutput) {
// schema for input fields
auto field_a = field("a", boolean());
auto schema = arrow::schema({field_a});
// build expressions.
// if (a) literal_hi else literal_bye
auto if_node = TreeExprBuilder::MakeIf(
TreeExprBuilder::MakeField(field_a), TreeExprBuilder::MakeStringLiteral("hi"),
TreeExprBuilder::MakeStringLiteral("bye"), utf8());
auto expr = TreeExprBuilder::MakeExpression(if_node, field("res", utf8()));
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
// assert that it fails gracefully.
ASSERT_OK(Projector::Make(schema, {expr}, TestConfiguration(), &projector));
// Create a row-batch with some sample data
int num_records = 4;
auto array_in =
MakeArrowArrayBool({true, false, false, false}, {true, true, true, false});
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_in});
// Evaluate expression
arrow::ArrayVector outputs;
ASSERT_OK(projector->Evaluate(*in_batch, pool_, &outputs));
// expected output
auto exp = MakeArrowArrayUtf8({"hi", "bye", "bye", "bye"}, {true, true, true, true});
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs.at(0));
}
TEST_F(TestUtf8, TestConvertUtf8) {
// schema for input fields
auto field_a = field("a", arrow::binary());
auto field_c = field("c", utf8());
auto schema = arrow::schema({field_a, field_c});
// output fields
auto res = field("res", boolean());
// build expressions.
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_c = TreeExprBuilder::MakeField(field_c);
// define char to replace
auto node_b = TreeExprBuilder::MakeStringLiteral("z");
auto convert_replace_utf8 =
TreeExprBuilder::MakeFunction("convert_replaceUTF8", {node_a, node_b}, utf8());
auto equals =
TreeExprBuilder::MakeFunction("equal", {convert_replace_utf8, node_c}, boolean());
auto expr = TreeExprBuilder::MakeExpression(equals, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 3;
auto array_a = MakeArrowArrayUtf8({"ok-\xf8\x28"
"-a",
"all-valid", "ok-\xa0\xa1-valid"},
{true, true, true});
auto array_b =
MakeArrowArrayUtf8({"ok-z(-a", "all-valid", "ok-zz-valid"}, {true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
auto exp = MakeArrowArrayBool({true, true, true}, {true, true, true});
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
}
TEST_F(TestUtf8, TestCastVarChar) {
// schema for input fields
auto field_a = field("a", utf8());
auto field_c = field("c", utf8());
auto schema = arrow::schema({field_a, field_c});
// output fields
auto res = field("res", boolean());
// build expressions.
auto node_a = TreeExprBuilder::MakeField(field_a);
auto node_c = TreeExprBuilder::MakeField(field_c);
// truncates the string to input length
auto node_b = TreeExprBuilder::MakeLiteral(static_cast<int64_t>(10));
auto cast_varchar =
TreeExprBuilder::MakeFunction("castVARCHAR", {node_a, node_b}, utf8());
auto equals = TreeExprBuilder::MakeFunction("equal", {cast_varchar, node_c}, boolean());
auto expr = TreeExprBuilder::MakeExpression(equals, res);
// Build a projector for the expressions.
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 5;
auto array_a = MakeArrowArrayUtf8(
{"park", "Sparkle", "bright spark and fire", "fiery SPARK", "मदन"},
{true, true, false, true, true});
auto array_b =
MakeArrowArrayUtf8({"park", "Sparkle", "bright spar", "fiery SPAR", "मदन"},
{true, true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array_a, array_b});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
auto exp = MakeArrowArrayBool({true, true, false, true, true},
{true, true, false, true, true});
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp, outputs[0]);
}
TEST_F(TestUtf8, TestAscii) {
// schema for input fields
auto field0 = field("f0", arrow::utf8());
auto schema = arrow::schema({field0});
// output fields
auto field_asc = field("ascii", arrow::int32());
// Build expression
auto asc_expr = TreeExprBuilder::MakeExpression("ascii", {field0}, field_asc);
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {asc_expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 6;
auto array0 = MakeArrowArrayUtf8({"ABC", "", "abc", "Hello World", "123", "999"},
{true, true, true, true, true, true});
// expected output
auto exp_asc =
MakeArrowArrayInt32({65, 0, 97, 72, 49, 57}, {true, true, true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_asc, outputs.at(0));
}
TEST_F(TestUtf8, TestSpace) {
// schema for input fields
auto field0 = field("f0", arrow::int64());
auto schema = arrow::schema({field0});
// output fields
auto field_space = field("space", arrow::utf8());
// Build expression
auto space_expr = TreeExprBuilder::MakeExpression("space", {field0}, field_space);
std::shared_ptr<Projector> projector;
auto status = Projector::Make(schema, {space_expr}, TestConfiguration(), &projector);
EXPECT_TRUE(status.ok()) << status.message();
// Create a row-batch with some sample data
int num_records = 4;
auto array0 = MakeArrowArrayInt64({1, 0, -5, 2}, {true, true, true, true});
// expected output
auto exp_space = MakeArrowArrayUtf8({" ", "", "", " "}, {true, true, true, true});
// prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
// Evaluate expression
arrow::ArrayVector outputs;
status = projector->Evaluate(*in_batch, pool_, &outputs);
EXPECT_TRUE(status.ok()) << status.message();
// Validate results
EXPECT_ARROW_ARRAY_EQUALS(exp_space, outputs.at(0));
}
} // namespace gandiva