blob: 9d0a4d71afefca4459567ab405e1e407cb43a54b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <gmock/gmock.h>
#include <gtest/gtest.h>
#include <limits>
#include "gandiva/execution_context.h"
#include "gandiva/precompiled/types.h"
namespace gandiva {
TEST(TestStringOps, TestCompare) {
const char* left = "abcd789";
const char* right = "abcd123";
// 0 for equal
EXPECT_EQ(mem_compare(left, 4, right, 4), 0);
// compare lengths if the prefixes match
EXPECT_GT(mem_compare(left, 5, right, 4), 0);
EXPECT_LT(mem_compare(left, 4, right, 5), 0);
// compare bytes if the prefixes don't match
EXPECT_GT(mem_compare(left, 5, right, 5), 0);
EXPECT_GT(mem_compare(left, 5, right, 7), 0);
EXPECT_GT(mem_compare(left, 7, right, 5), 0);
}
TEST(TestStringOps, TestAscii) {
// ASCII
EXPECT_EQ(ascii_utf8("ABC", 3), 65);
EXPECT_EQ(ascii_utf8("abc", 3), 97);
EXPECT_EQ(ascii_utf8("Hello World!", 12), 72);
EXPECT_EQ(ascii_utf8("This is us", 10), 84);
EXPECT_EQ(ascii_utf8("", 0), 0);
EXPECT_EQ(ascii_utf8("123", 3), 49);
EXPECT_EQ(ascii_utf8("999", 3), 57);
EXPECT_EQ(ascii_utf8("\x80", 1), -128);
EXPECT_EQ(ascii_utf8("\xFF", 1), -1);
}
TEST(TestStringOps, TestChrBigInt) {
// CHR
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
int32_t out_len = 0;
auto out = chr_int32(ctx_ptr, 88, &out_len);
EXPECT_EQ(std::string(out, out_len), "X");
out = chr_int64(ctx_ptr, 65, &out_len);
EXPECT_EQ(std::string(out, out_len), "A");
out = chr_int32(ctx_ptr, 49, &out_len);
EXPECT_EQ(std::string(out, out_len), "1");
out = chr_int64(ctx_ptr, 84, &out_len);
EXPECT_EQ(std::string(out, out_len), "T");
out = chr_int32(ctx_ptr, 340, &out_len);
EXPECT_EQ(std::string(out, out_len), "T");
out = chr_int64(ctx_ptr, 256, &out_len);
EXPECT_EQ(std::strcmp(out, "\0"), 0);
out = chr_int32(ctx_ptr, 33, &out_len);
EXPECT_EQ(std::string(out, out_len), "!");
out = chr_int64(ctx_ptr, 46, &out_len);
EXPECT_EQ(std::string(out, out_len), ".");
out = chr_int32(ctx_ptr, 63, &out_len);
EXPECT_EQ(std::string(out, out_len), "?");
out = chr_int64(ctx_ptr, 0, &out_len);
EXPECT_EQ(std::strcmp(out, "\0"), 0);
out = chr_int32(ctx_ptr, -158, &out_len);
EXPECT_EQ(std::string(out, out_len), "b");
out = chr_int64(ctx_ptr, -5, &out_len);
EXPECT_EQ(std::string(out, out_len), "\xFB");
out = chr_int32(ctx_ptr, -340, &out_len);
EXPECT_EQ(std::string(out, out_len), "\xAC");
out = chr_int64(ctx_ptr, -66, &out_len);
EXPECT_EQ(std::string(out, out_len), "\xBE");
// €
out = chr_int32(ctx_ptr, 128, &out_len);
EXPECT_EQ(std::string(out, out_len), "\x80");
// œ
out = chr_int64(ctx_ptr, 156, &out_len);
EXPECT_EQ(std::string(out, out_len), "\x9C");
// ÿ
out = chr_int32(ctx_ptr, 255, &out_len);
EXPECT_EQ(std::string(out, out_len), "\xFF");
// BACKSPACE
out = chr_int64(ctx_ptr, 8, &out_len);
EXPECT_EQ(std::string(out, out_len), "\b");
// DEVICE CONTROL 3 (DC3)
out = chr_int32(ctx_ptr, 19, &out_len);
EXPECT_EQ(std::string(out, out_len), "\x13");
// ESCAPE (ESC)
out = chr_int64(ctx_ptr, 27, &out_len);
EXPECT_EQ(std::string(out, out_len), "\x1B");
}
TEST(TestStringOps, TestBeginsEnds) {
// starts_with
EXPECT_TRUE(starts_with_utf8_utf8("hello sir", 9, "hello", 5));
EXPECT_TRUE(starts_with_utf8_utf8("hellos", 6, "hello", 5));
EXPECT_TRUE(starts_with_utf8_utf8("hello", 5, "hello", 5));
EXPECT_FALSE(starts_with_utf8_utf8("hell", 4, "hello", 5));
EXPECT_FALSE(starts_with_utf8_utf8("world hello", 11, "hello", 5));
// ends_with
EXPECT_TRUE(ends_with_utf8_utf8("hello sir", 9, "sir", 3));
EXPECT_TRUE(ends_with_utf8_utf8("ssir", 4, "sir", 3));
EXPECT_TRUE(ends_with_utf8_utf8("sir", 3, "sir", 3));
EXPECT_FALSE(ends_with_utf8_utf8("ir", 2, "sir", 3));
EXPECT_FALSE(ends_with_utf8_utf8("hello", 5, "sir", 3));
}
TEST(TestStringOps, TestSpace) {
// Space - returns a string with 'n' spaces
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
int32_t out_len = 0;
auto out = space_int32(ctx_ptr, 1, &out_len);
EXPECT_EQ(std::string(out, out_len), " ");
out = space_int32(ctx_ptr, 10, &out_len);
EXPECT_EQ(std::string(out, out_len), " ");
out = space_int32(ctx_ptr, 5, &out_len);
EXPECT_EQ(std::string(out, out_len), " ");
out = space_int32(ctx_ptr, -5, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
out = space_int32(ctx_ptr, 65537, &out_len);
EXPECT_EQ(std::string(out, out_len), std::string(65536, ' '));
out = space_int32(ctx_ptr, 2147483647, &out_len);
EXPECT_EQ(std::string(out, out_len), std::string(65536, ' '));
out = space_int64(ctx_ptr, 2, &out_len);
EXPECT_EQ(std::string(out, out_len), " ");
out = space_int64(ctx_ptr, 9, &out_len);
EXPECT_EQ(std::string(out, out_len), " ");
out = space_int64(ctx_ptr, 4, &out_len);
EXPECT_EQ(std::string(out, out_len), " ");
out = space_int64(ctx_ptr, -5, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
out = space_int64(ctx_ptr, 65536, &out_len);
EXPECT_EQ(std::string(out, out_len), std::string(65536, ' '));
out = space_int64(ctx_ptr, 9223372036854775807, &out_len);
EXPECT_EQ(std::string(out, out_len), std::string(65536, ' '));
out = space_int64(ctx_ptr, -2639077559LL, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
}
TEST(TestStringOps, TestIsSubstr) {
EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "world", 5));
EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "lo wo", 5));
EXPECT_FALSE(is_substr_utf8_utf8("hello world", 11, "adsed", 5));
EXPECT_FALSE(is_substr_utf8_utf8("hel", 3, "hello", 5));
EXPECT_TRUE(is_substr_utf8_utf8("hello", 5, "hello", 5));
EXPECT_TRUE(is_substr_utf8_utf8("hello world", 11, "", 0));
}
TEST(TestStringOps, TestCharLength) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
EXPECT_EQ(utf8_length(ctx_ptr, "hello sir", 9), 9);
std::string a("âpple");
EXPECT_EQ(utf8_length(ctx_ptr, a.data(), static_cast<int>(a.length())), 5);
std::string b("मदन");
EXPECT_EQ(utf8_length(ctx_ptr, b.data(), static_cast<int>(b.length())), 3);
// invalid utf8
std::string c("\xf8\x28");
EXPECT_EQ(utf8_length(ctx_ptr, c.data(), static_cast<int>(c.length())), 0);
EXPECT_TRUE(ctx.get_error().find(
"unexpected byte \\f8 encountered while decoding utf8 string") !=
std::string::npos)
<< ctx.get_error();
ctx.Reset();
std::string d("aa\xc3");
EXPECT_EQ(utf8_length(ctx_ptr, d.data(), static_cast<int>(d.length())), 0);
EXPECT_TRUE(ctx.get_error().find(
"unexpected byte \\c3 encountered while decoding utf8 string") !=
std::string::npos)
<< ctx.get_error();
ctx.Reset();
std::string e(
"a\xc3"
"a");
EXPECT_EQ(utf8_length(ctx_ptr, e.data(), static_cast<int>(e.length())), 0);
EXPECT_TRUE(ctx.get_error().find(
"unexpected byte \\61 encountered while decoding utf8 string") !=
std::string::npos)
<< ctx.get_error();
ctx.Reset();
std::string f(
"a\xc3\xe3"
"a");
EXPECT_EQ(utf8_length(ctx_ptr, f.data(), static_cast<int>(f.length())), 0);
EXPECT_TRUE(ctx.get_error().find(
"unexpected byte \\e3 encountered while decoding utf8 string") !=
std::string::npos)
<< ctx.get_error();
ctx.Reset();
}
TEST(TestStringOps, TestConvertUtf8) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
// test first call to convert_from being empty string
std::string a("");
auto a_in_out_len = static_cast<int>(a.length());
const char* a_str =
convert_fromUTF8_binary(ctx_ptr, a.data(), a_in_out_len, &a_in_out_len);
EXPECT_EQ(std::string(a_str, a_in_out_len), "");
EXPECT_FALSE(ctx.has_error());
// subsequent valid calls
std::string b("abc");
auto b_in_out_len = static_cast<int>(b.length());
const char* b_str =
convert_fromUTF8_binary(ctx_ptr, b.data(), b_in_out_len, &b_in_out_len);
EXPECT_EQ(std::string(b_str, b_in_out_len), "abc");
EXPECT_FALSE(ctx.has_error());
std::string c("hello");
auto c_in_out_len = static_cast<int>(c.length());
const char* c_str =
convert_fromUTF8_binary(ctx_ptr, c.data(), c_in_out_len, &c_in_out_len);
EXPECT_EQ(std::string(c_str, c_in_out_len), "hello");
EXPECT_FALSE(ctx.has_error());
std::string d("zero length");
int d_in_out_len = 0;
const char* d_str =
convert_fromUTF8_binary(ctx_ptr, d.data(), d_in_out_len, &d_in_out_len);
EXPECT_EQ(std::string(d_str, d_in_out_len), "");
EXPECT_FALSE(ctx.has_error());
std::string e("substring");
int e_in_out_len = 3;
const char* e_str =
convert_fromUTF8_binary(ctx_ptr, e.data(), e_in_out_len, &e_in_out_len);
EXPECT_EQ(std::string(e_str, e_in_out_len), "sub");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestConvertReplaceInvalidUtf8Char) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
// invalid utf8 (xf8 is invalid but x28 is not - x28 = '(')
std::string a(
"ok-\xf8\x28"
"-a");
auto a_in_out_len = static_cast<int>(a.length());
const char* a_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, a.data(), a_in_out_len, "a", 1, &a_in_out_len);
EXPECT_EQ(std::string(a_str, a_in_out_len), "ok-a(-a");
EXPECT_FALSE(ctx.has_error());
// invalid utf8 (xa0 and xa1 are invalid)
std::string b("ok-\xa0\xa1-valid");
auto b_in_out_len = static_cast<int>(b.length());
const char* b_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, b.data(), b_in_out_len, "b", 1, &b_in_out_len);
EXPECT_EQ(std::string(b_str, b_in_out_len), "ok-bb-valid");
EXPECT_FALSE(ctx.has_error());
// full valid utf8
std::string c("all-valid");
auto c_in_out_len = static_cast<int>(c.length());
const char* c_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, c.data(), c_in_out_len, "c", 1, &c_in_out_len);
EXPECT_EQ(std::string(c_str, c_in_out_len), "all-valid");
EXPECT_FALSE(ctx.has_error());
// valid utf8 (महसुस is 4-char string, each char of which is likely a multibyte char)
std::string d("ok-महसुस-valid-new");
auto d_in_out_len = static_cast<int>(d.length());
const char* d_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, d.data(), d_in_out_len, "d", 1, &d_in_out_len);
EXPECT_EQ(std::string(d_str, d_in_out_len), "ok-महसुस-valid-new");
EXPECT_FALSE(ctx.has_error());
// full valid utf8, but invalid replacement char length
std::string e("all-valid");
auto e_in_out_len = static_cast<int>(e.length());
const char* e_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, e.data(), e_in_out_len, "ee", 2, &e_in_out_len);
EXPECT_EQ(std::string(e_str, e_in_out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
// invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
std::string f("ok-\xa0\xa1-valid");
auto f_in_out_len = static_cast<int>(f.length());
const char* f_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, f.data(), f_in_out_len, "", 0, &f_in_out_len);
EXPECT_EQ(std::string(f_str, f_in_out_len), "ok--valid");
EXPECT_FALSE(ctx.has_error());
ctx.Reset();
// invalid utf8 (xa0 and xa1 are invalid) with empty replacement char length
std::string g("\xa0\xa1-ok-\xa0\xa1-valid-\xa0\xa1");
auto g_in_out_len = static_cast<int>(g.length());
const char* g_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, g.data(), g_in_out_len, "", 0, &g_in_out_len);
EXPECT_EQ(std::string(g_str, g_in_out_len), "-ok--valid-");
EXPECT_FALSE(ctx.has_error());
ctx.Reset();
std::string h("\xa0\xa1-valid");
auto h_in_out_len = static_cast<int>(h.length());
const char* h_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, h.data(), h_in_out_len, "", 0, &h_in_out_len);
EXPECT_EQ(std::string(h_str, h_in_out_len), "-valid");
EXPECT_FALSE(ctx.has_error());
ctx.Reset();
std::string i("\xa0\xa1-valid-\xa0\xa1-valid-\xa0\xa1");
auto i_in_out_len = static_cast<int>(i.length());
const char* i_str = convert_replace_invalid_fromUTF8_binary(
ctx_ptr, i.data(), i_in_out_len, "", 0, &i_in_out_len);
EXPECT_EQ(std::string(i_str, i_in_out_len), "-valid--valid-");
EXPECT_FALSE(ctx.has_error());
ctx.Reset();
}
TEST(TestStringOps, TestRepeat) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str = repeat_utf8_int32(ctx_ptr, "abc", 3, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abcabc");
EXPECT_FALSE(ctx.has_error());
out_str = repeat_utf8_int32(ctx_ptr, "a", 1, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "aaaaa");
EXPECT_FALSE(ctx.has_error());
out_str = repeat_utf8_int32(ctx_ptr, "", 0, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = repeat_utf8_int32(ctx_ptr, "", -20, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = repeat_utf8_int32(ctx_ptr, "a", 1, -10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Repeat number can't be negative"));
ctx.Reset();
}
TEST(TestStringOps, TestCastBoolToVarchar) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str = castVARCHAR_bool_int64(ctx_ptr, true, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "tr");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_bool_int64(ctx_ptr, true, 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "true");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_bool_int64(ctx_ptr, false, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "fals");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_bool_int64(ctx_ptr, false, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "false");
EXPECT_FALSE(ctx.has_error());
castVARCHAR_bool_int64(ctx_ptr, true, -3, &out_len);
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
}
TEST(TestStringOps, TestCastVarcharToBool) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
EXPECT_EQ(castBIT_utf8(ctx_ptr, "true", 4), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, " true ", 14), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "true ", 9), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, " true", 9), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "TRUE", 4), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "TrUe", 4), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "1", 1), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, " 1", 3), true);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "false", 5), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "false ", 10), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, " false", 10), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "0", 1), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "0 ", 4), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "FALSE", 5), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "FaLsE", 5), false);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(castBIT_utf8(ctx_ptr, "test", 4), false);
EXPECT_TRUE(ctx.has_error());
EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Invalid value for boolean"));
ctx.Reset();
}
TEST(TestStringOps, TestCastVarchar) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
// BINARY TESTS
const char* out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "a");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asd");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
// do not truncate if output length is 0
out_str = castVARCHAR_binary_int64(ctx_ptr, "asdf", 4, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "", 0, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†", 9, 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "abc", 3, -1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
std::string z("aa\xc3");
out_str = castVARCHAR_binary_int64(ctx_ptr, z.data(), static_cast<int>(z.length()), 2,
&out_len);
EXPECT_EQ(std::string(out_str, out_len), "aa");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234567812341234");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234123");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "12345678");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234567");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_binary_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
EXPECT_FALSE(ctx.has_error());
std::string f("123456781234çåå\xc3");
out_str = castVARCHAR_binary_int64(ctx_ptr, f.data(), static_cast<int32_t>(f.length()),
16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr(
"unexpected byte \\c3 encountered while decoding utf8 string"));
ctx.Reset();
// UTF8 TESTS
out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "a");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asd");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
// do not truncate if output length is 0
out_str = castVARCHAR_utf8_int64(ctx_ptr, "asdf", 4, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "", 0, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†", 9, 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "abc", 3, -1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
std::string d("aa\xc3");
out_str = castVARCHAR_utf8_int64(ctx_ptr, d.data(), static_cast<int>(d.length()), 2,
&out_len);
EXPECT_EQ(std::string(out_str, out_len), "aa");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234567812341234");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234123");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 12, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 8, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "12345678");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234567");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812341234", 16, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "1234567812çåå†123456", 25, 16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "1234567812çåå†12");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "123456781234çåå†1234", 25, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "12çåå†34567812123456", 25, 16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "12çåå†3456781212");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†1234567812123456", 25, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå†");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "çåå†1234567812123456", 25, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çåå");
EXPECT_FALSE(ctx.has_error());
out_str = castVARCHAR_utf8_int64(ctx_ptr, "123456781234çåå†", 21, 40, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123456781234çåå†");
EXPECT_FALSE(ctx.has_error());
std::string y("123456781234çåå\xc3");
out_str = castVARCHAR_utf8_int64(ctx_ptr, y.data(), static_cast<int32_t>(y.length()),
16, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr(
"unexpected byte \\c3 encountered while decoding utf8 string"));
ctx.Reset();
}
TEST(TestStringOps, TestSubstring) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "as");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 1, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, 0, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, -2, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "df");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "asdf", 4, -5, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "अपाचे एरो", 25, 1, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "अपाचे");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "अपाचे एरो", 25, 7, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "एरो");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 4, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "†");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 2, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "åå");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 0, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "çå");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "afg", 4, 0, -5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64_int64(ctx_ptr, "", 0, 5, 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64(ctx_ptr, "abcd", 4, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "bcd");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64(ctx_ptr, "abcd", 4, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abcd");
EXPECT_FALSE(ctx.has_error());
out_str = substr_utf8_int64(ctx_ptr, "çåå†", 9, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "åå†");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestSubstringInvalidInputs) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
char bytes[] = {'\xA7', 'a'};
const char* out_str = substr_utf8_int64_int64(ctx_ptr, bytes, 2, 1, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
char midbytes[] = {'c', '\xA7', 'a'};
out_str = substr_utf8_int64_int64(ctx_ptr, midbytes, 3, 1, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
char midbytes2[] = {'\xC3', 'a', 'a'};
out_str = substr_utf8_int64_int64(ctx_ptr, midbytes2, 3, 1, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
char endbytes[] = {'a', 'a', '\xA7'};
out_str = substr_utf8_int64_int64(ctx_ptr, endbytes, 3, 1, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
char endbytes2[] = {'a', 'a', '\xC3'};
out_str = substr_utf8_int64_int64(ctx_ptr, endbytes2, 3, 1, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
out_str = substr_utf8_int64_int64(ctx_ptr, "çåå†", 9, 2147483656, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestGdvFnStubs, TestCastVarbinaryUtf8) {
gandiva::ExecutionContext ctx;
int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
const char* input = "abc";
const char* out;
out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 0, &out_len);
EXPECT_EQ(std::string(out, out_len), input);
out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 1, &out_len);
EXPECT_EQ(std::string(out, out_len), "a");
out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, 500, &out_len);
EXPECT_EQ(std::string(out, out_len), input);
out = castVARBINARY_utf8_int64(ctx_ptr, input, 3, -10, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
}
TEST(TestGdvFnStubs, TestCastVarbinaryBinary) {
gandiva::ExecutionContext ctx;
int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
const char* input = "\\x41\\x42\\x43";
const char* out;
out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 0, &out_len);
EXPECT_EQ(std::string(out, out_len), input);
out = castVARBINARY_binary_int64(ctx_ptr, input, 8, 8, &out_len);
EXPECT_EQ(std::string(out, out_len), "\\x41\\x42");
out = castVARBINARY_binary_int64(ctx_ptr, input, 12, 500, &out_len);
EXPECT_EQ(std::string(out, out_len), input);
out = castVARBINARY_binary_int64(ctx_ptr, input, 12, -10, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Output buffer length can't be negative"));
ctx.Reset();
}
TEST(TestGdvFnStubs, TestCastBinaryUtf8) {
int32_t out_len = 0;
const char* input = "abc";
const char* out;
out = castBINARY_utf8(input, 3, &out_len);
EXPECT_EQ(std::string(out, out_len), input);
out = castBINARY_utf8(input, 2, &out_len);
EXPECT_EQ(std::string(out, out_len), "ab");
out = castBINARY_utf8(input, 1, &out_len);
EXPECT_EQ(std::string(out, out_len), "a");
out = castBINARY_utf8(input, 0, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
}
TEST(TestGdvFnStubs, TestCastBinaryBinary) {
int32_t out_len = 0;
const char* input = "\\x41\\x42\\x43";
const char* out;
out = castBINARY_binary(input, 12, &out_len);
EXPECT_EQ(std::string(out, out_len), input);
out = castBINARY_binary(input, 8, &out_len);
EXPECT_EQ(std::string(out, out_len), "\\x41\\x42");
out = castBINARY_binary(input, 0, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
}
TEST(TestStringOps, TestConcat) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str =
concat_utf8_utf8(ctx_ptr, "abcd", 4, true, "\npq", 3, false, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abcd");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8(ctx_ptr, "asdf", 4, "jkl", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdfjkl");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8(ctx_ptr, "asdf", 4, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "asdf");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8(ctx_ptr, "", 0, "jkl", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "jkl");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8(ctx_ptr, "", 0, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8(ctx_ptr, "abcd\n", 5, "a", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abcd\na");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3,
true, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqard");
EXPECT_FALSE(ctx.has_error());
out_str =
concatOperator_utf8_utf8_utf8(ctx_ptr, "abcd\n", 5, "a", 1, "bcd", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abcd\nabcd");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8(ctx_ptr, "abcd", 4, "a", 1, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abcda");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8(ctx_ptr, "", 0, "a", 1, "pqrs", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "apqrs");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard",
3, true, "uvw", 3, false, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqard");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8(ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, "y",
1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8_utf8(ctx_ptr, "abcd", 4, false, "\npq", 3, true,
"ard", 3, true, "uvw", 3, false, "abc\n", 4,
true, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\n");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8_utf8(ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4,
"y", 1, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
"abc\n", 4, true, "sdfgs", 5, true, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "pqrs", 4, "", 0, "\nabc", 4, "y", 1, "", 0, "\nbcd", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrs\nabcy\nbcd");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
"abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3,
&out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjkl");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
"abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgs");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, "", 0,
&out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjkl");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
"abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, "qwert|n", 7,
true, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgsqwert|n");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "jkl", 3, "", 0,
"sfl\n", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjklsfl\n");
EXPECT_FALSE(ctx.has_error());
out_str = concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "abcd", 4, false, "\npq", 3, true, "ard", 3, true, "uvw", 3, false,
"abc\n", 4, true, "sdfgs", 5, true, "wfw", 3, false, "", 0, true, "qwert|n", 7,
true, "ewfwe", 5, false, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\npqardabc\nsdfgsqwert|n");
EXPECT_FALSE(ctx.has_error());
out_str = concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
ctx_ptr, "", 0, "pqrs", 4, "abc\n", 4, "y", 1, "", 0, "asdf", 4, "", 0, "jkl", 3,
"sfl\n", 4, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "pqrsabc\nyasdfjklsfl\n");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestReverse) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = reverse_utf8(ctx_ptr, "TestString", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "gnirtStseT");
EXPECT_FALSE(ctx.has_error());
out_str = reverse_utf8(ctx_ptr, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = reverse_utf8(ctx_ptr, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "†ååç");
EXPECT_FALSE(ctx.has_error());
std::string d("aa\xc3");
out_str = reverse_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr(
"unexpected byte \\c3 encountered while decoding utf8 string"));
ctx.Reset();
}
TEST(TestStringOps, TestLevenshtein) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
EXPECT_EQ(levenshtein(ctx_ptr, "kitten", 6, "sitting", 7), 3);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "book", 4, "back", 4), 2);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "", 0, "a", 1), 1);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "test", 4, "task", 4), 2);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "cat", 3, "coat", 4), 1);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "coat", 4, "coat", 4), 0);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "AAAA", 4, "aAAa", 4), 2);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "color", 5, "colour", 6), 1);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "Test String1", 12, "Test String2", 12), 1);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "TEST STRING1", 12, "test string2", 12), 11);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "", 0, "Test String2", 12), 12);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, nullptr, 0, "Test String2", 12), 12);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "Test String2", 12, nullptr, 0), 12);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, nullptr, 0, nullptr, 0), 0);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(levenshtein(ctx_ptr, "book", -5, "back", 4), 0);
EXPECT_TRUE(ctx.has_error());
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("String length must be greater than 0"));
ctx.Reset();
}
TEST(TestStringOps, TestQuote) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = quote_utf8(ctx_ptr, "dont", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\'dont\'");
EXPECT_FALSE(ctx.has_error());
out_str = quote_utf8(ctx_ptr, "abc", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\'abc\'");
EXPECT_FALSE(ctx.has_error());
out_str = quote_utf8(ctx_ptr, "don't", 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "\'don\\'t\'");
EXPECT_FALSE(ctx.has_error());
out_str = quote_utf8(ctx_ptr, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = quote_utf8(ctx_ptr, "'", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "'\\''");
EXPECT_FALSE(ctx.has_error());
out_str = quote_utf8(ctx_ptr, "'''''''''", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "'\\'\\'\\'\\'\\'\\'\\'\\'\\''");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestLtrim) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = ltrim_utf8(ctx_ptr, "TestString ", 12, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString ");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8(ctx_ptr, " TestString ", 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString ");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8(ctx_ptr, " Test çåå†bD", 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8(ctx_ptr, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8(ctx_ptr, " ", 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "abcbbaccabbcdef", 15, "abc", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "abcbbaccabbcdef", 15, "ababbac", 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "ååçåå†eç†Dd", 21, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "eç†Dd");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
std::string d(
"aa\xc3"
"bcd");
out_str =
ltrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len),
"\xc3"
"bcd");
EXPECT_FALSE(ctx.has_error());
std::string e(
"åå\xe0\xa0"
"bcd");
out_str =
ltrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len),
"\xE0\xa0"
"bcd");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = ltrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestLpadString) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
// LPAD function tests - with defined fill pad text
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "FillFillTestString");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "FillFTestString");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "FillFillFiTestString");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "ддабвгд");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "дhello");
out_str = lpad_utf8_int32_utf8(ctx_ptr, "大学路", 9, 65536, "哈", 3, &out_len);
EXPECT_EQ(out_len, 65536 * 3);
// LPAD function tests - with NO pad text
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " TestString");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " TestString");
out_str = lpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " абвгд");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, 65537, &out_len);
EXPECT_EQ(std::string(out_str, out_len), std::string(65526, ' ') + "TestString");
out_str = lpad_utf8_int32(ctx_ptr, "TestString", 10, -1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
}
TEST(TestStringOps, TestRpadString) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
// RPAD function tests - with defined fill pad text
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 4, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 10, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 0, 10, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 0, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, -500, "fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 500, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 18, "Fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFill");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 15, "Fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestStringFillF");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "TestString", 10, 20, "Fill", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestStringFillFillFi");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 7, "д", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "абвгддд");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "абвгд", 10, 20, "абвгд", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "абвгдабвгдабвгдабвгд");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "hello", 5, 6, "д", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "helloд");
out_str = rpad_utf8_int32_utf8(ctx_ptr, "大学路", 9, 655360, "哈雷路", 3, &out_len);
EXPECT_EQ(out_len, 65536 * 3);
// RPAD function tests - with NO pad text
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 0, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -500, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString ");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 15, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString ");
out_str = rpad_utf8_int32(ctx_ptr, "абвгд", 10, 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "абвгд ");
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, 65537, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString" + std::string(65526, ' '));
out_str = rpad_utf8_int32(ctx_ptr, "TestString", 10, -1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
}
TEST(TestStringOps, TestRtrim) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = rtrim_utf8(ctx_ptr, " TestString", 12, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " TestString");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8(ctx_ptr, " TestString ", 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), " TestString");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8(ctx_ptr, "Test çåå†bD ", 20, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8(ctx_ptr, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8(ctx_ptr, " ", 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "ring", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestSt");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "defabcbbaccabbc", 15, "abc", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "defabcbbaccabbc", 15, "ababbac", 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "eDdç†ååçåå†", 21, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "eDd");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
std::string d(
"\xc3"
"aaa");
out_str =
rtrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
std::string e(
"\xe0\xa0"
"åå");
out_str =
rtrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
out_str = rtrim_utf8_utf8(ctx_ptr, "åeçå", 7, "çå", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "åe");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = rtrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestBtrim) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = btrim_utf8(ctx_ptr, "TestString", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8(ctx_ptr, " TestString ", 18, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8(ctx_ptr, " Test çåå†bD ", 21, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test çåå†bD");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8(ctx_ptr, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8(ctx_ptr, " ", 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "", 0, "TestString", 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "Test", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "String");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Tes");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "abcbbadefccabbc", 15, "abc", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "abcbbadefccabbc", 15, "ababbac", 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "ååçåå†Ddeç†", 21, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Dde");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "ç†ååçåå†", 18, "çåå†", 9, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
ctx.Reset();
std::string d(
"acd\xc3"
"aaa");
out_str =
btrim_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "a", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
std::string e(
"åbc\xe0\xa0"
"åå");
out_str =
btrim_utf8_utf8(ctx_ptr, e.data(), static_cast<int>(e.length()), "å", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_TRUE(ctx.has_error());
ctx.Reset();
std::string f(
"aa\xc3"
"bcd");
out_str =
btrim_utf8_utf8(ctx_ptr, f.data(), static_cast<int>(f.length()), "a", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len),
"\xc3"
"bcd");
EXPECT_FALSE(ctx.has_error());
std::string g(
"åå\xe0\xa0"
"bcå");
out_str =
btrim_utf8_utf8(ctx_ptr, g.data(), static_cast<int>(g.length()), "å", 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len),
"\xe0\xa0"
"bc");
out_str = btrim_utf8_utf8(ctx_ptr, "åe†çå", 10, "çå", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "e†");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "TestString", 10, "abcd", 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = btrim_utf8_utf8(ctx_ptr, "acbabbcabb", 10, "abcbd", 5, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestLocate) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
int pos;
pos = locate_utf8_utf8(ctx_ptr, "String", 6, "TestString", 10);
EXPECT_EQ(pos, 5);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8_int32(ctx_ptr, "String", 6, "TestString", 10, 1);
EXPECT_EQ(pos, 5);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8_int32(ctx_ptr, "abc", 3, "abcabc", 6, 2);
EXPECT_EQ(pos, 4);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8(ctx_ptr, "çåå", 6, "s†å†emçåå†d", 21);
EXPECT_EQ(pos, 7);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "†barbar", 9, 3);
EXPECT_EQ(pos, 5);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8_int32(ctx_ptr, "sub", 3, "", 0, 1);
EXPECT_EQ(pos, 0);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8_int32(ctx_ptr, "", 0, "str", 3, 1);
EXPECT_EQ(pos, 0);
EXPECT_FALSE(ctx.has_error());
pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 0);
EXPECT_EQ(pos, 0);
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr("Start position must be greater than 0"));
ctx.Reset();
pos = locate_utf8_utf8_int32(ctx_ptr, "bar", 3, "barbar", 6, 7);
EXPECT_EQ(pos, 0);
EXPECT_FALSE(ctx.has_error());
std::string d(
"a\xff"
"c");
pos =
locate_utf8_utf8_int32(ctx_ptr, "c", 1, d.data(), static_cast<int>(d.length()), 3);
EXPECT_EQ(pos, 0);
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr(
"unexpected byte \\ff encountered while decoding utf8 string"));
ctx.Reset();
}
TEST(TestStringOps, TestByteSubstr) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "String");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -6, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "String");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 0, -500, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 1, 1000, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Str");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, 5, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "String");
EXPECT_FALSE(ctx.has_error());
out_str = byte_substr_binary_int32_int32(ctx_ptr, "TestString", 10, -100, 10, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
}
TEST(TestStringOps, TestStrPos) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
int pos;
pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
EXPECT_EQ(pos, 5);
EXPECT_FALSE(ctx.has_error());
pos = strpos_utf8_utf8(ctx_ptr, "TestString", 10, "String", 6);
EXPECT_EQ(pos, 5);
EXPECT_FALSE(ctx.has_error());
pos = strpos_utf8_utf8(ctx_ptr, "abcabc", 6, "abc", 3);
EXPECT_EQ(pos, 1);
EXPECT_FALSE(ctx.has_error());
pos = strpos_utf8_utf8(ctx_ptr, "s†å†emçåå†d", 21, "çåå", 6);
EXPECT_EQ(pos, 7);
EXPECT_FALSE(ctx.has_error());
pos = strpos_utf8_utf8(ctx_ptr, "†barbar", 9, "bar", 3);
EXPECT_EQ(pos, 2);
EXPECT_FALSE(ctx.has_error());
pos = strpos_utf8_utf8(ctx_ptr, "", 0, "sub", 3);
EXPECT_EQ(pos, 0);
EXPECT_FALSE(ctx.has_error());
pos = strpos_utf8_utf8(ctx_ptr, "str", 3, "", 0);
EXPECT_EQ(pos, 0);
EXPECT_FALSE(ctx.has_error());
std::string d(
"a\xff"
"c");
pos = strpos_utf8_utf8(ctx_ptr, d.data(), static_cast<int>(d.length()), "c", 1);
EXPECT_THAT(ctx.get_error(),
::testing::HasSubstr(
"unexpected byte \\ff encountered while decoding utf8 string"));
ctx.Reset();
}
TEST(TestStringOps, TestReplace) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = replace_utf8_utf8_utf8(ctx_ptr, "TestString1String2", 18, "String", 6,
"Replace", 7, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestReplace1Replace2");
EXPECT_FALSE(ctx.has_error());
out_str =
replace_utf8_utf8_utf8(ctx_ptr, "TestString1", 11, "String", 6, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test1");
EXPECT_FALSE(ctx.has_error());
out_str = replace_utf8_utf8_utf8(ctx_ptr, "", 0, "test", 4, "rep", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str = replace_utf8_utf8_utf8(ctx_ptr, "dž†çåå†", 17, "†", 3, "t", 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Çttçååt");
EXPECT_FALSE(ctx.has_error());
out_str = replace_utf8_utf8_utf8(ctx_ptr, "TestString", 10, "", 0, "rep", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
out_str =
replace_utf8_utf8_utf8(ctx_ptr, "Test", 4, "TestString", 10, "rep", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "Test");
EXPECT_FALSE(ctx.has_error());
out_str = replace_utf8_utf8_utf8(ctx_ptr, "Test", 4, "Test", 4, "", 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_FALSE(ctx.has_error());
out_str =
replace_utf8_utf8_utf8(ctx_ptr, "TestString", 10, "abc", 3, "xyz", 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "TestString");
EXPECT_FALSE(ctx.has_error());
replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "Hell", 4, "ell", 3, "ollow", 5, 5,
&out_len);
EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
ctx.Reset();
replace_with_max_len_utf8_utf8_utf8(ctx_ptr, "eeee", 4, "e", 1, "aaaa", 4, 14,
&out_len);
EXPECT_THAT(ctx.get_error(), ::testing::HasSubstr("Buffer overflow for output string"));
ctx.Reset();
}
TEST(TestStringOps, TestLeftString) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_EQ(output, "TestString");
out_str = left_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = left_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = left_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "Tes");
out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "TestStr");
out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -10, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = left_utf8_int32(ctx_ptr, "TestString", 10, -11, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
// the text length for this string is 10 (each utf8 char is represented by two bytes)
out_str = left_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "абв");
out_str = left_utf8_int32(ctx_ptr, "¥¥abdc", 8, -6, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
}
TEST(TestStringOps, TestRightString) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 10, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_EQ(output, "TestString");
out_str = right_utf8_int32(ctx_ptr, "", 0, 0, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = right_utf8_int32(ctx_ptr, "", 0, 500, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = right_utf8_int32(ctx_ptr, "TestString", 10, 3, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "ing");
out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -3, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "tString");
out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -10, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = right_utf8_int32(ctx_ptr, "TestString", 10, -11, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
// the text length for this string is 10 (each utf8 char is represented by two bytes)
out_str = right_utf8_int32(ctx_ptr, "абвгд", 10, 3, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "вгд");
out_str = right_utf8_int32(ctx_ptr, "¥¥abdc", 8, -6, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
}
TEST(TestStringOps, TestBinaryString) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = binary_string(ctx_ptr, "TestString", 10, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_EQ(output, "TestString");
out_str = binary_string(ctx_ptr, "", 0, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
out_str = binary_string(ctx_ptr, "T", 1, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "T");
out_str = binary_string(ctx_ptr, "\\x41\\x42\\x43", 12, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "ABC");
out_str = binary_string(ctx_ptr, "\\x41", 4, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "A");
out_str = binary_string(ctx_ptr, "\\x6d\\x6D", 8, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "mm");
out_str = binary_string(ctx_ptr, "\\x6f\\x6d", 8, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "om");
out_str = binary_string(ctx_ptr, "\\x4f\\x4D", 8, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "OM");
}
TEST(TestStringOps, TestSplitPart) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
out_str = split_part(ctx_ptr, "abc::def", 8, ":", 1, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 0, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
EXPECT_THAT(
ctx.get_error(),
::testing::HasSubstr("Index in split_part must be positive, value provided was 0"));
out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "A");
out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "B");
out_str = split_part(ctx_ptr, "A,B,C", 5, ",", 1, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "C");
out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "abc");
out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "def");
out_str = split_part(ctx_ptr, "abc~@~def~@~ghi", 15, "~@~", 3, 3, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "ghi");
// Result must be empty when the index is > no of elements
out_str = split_part(ctx_ptr, "123|456|789", 11, "|", 1, 4, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = split_part(ctx_ptr, "123|", 4, "|", 1, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "123");
out_str = split_part(ctx_ptr, "|123", 4, "|", 1, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "");
out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "å", 2, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "ç†");
out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "†åå", 6, 1, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "ç");
out_str = split_part(ctx_ptr, "ç†ååçåå†", 18, "†", 3, 2, &out_len);
EXPECT_EQ(std::string(out_str, out_len), "ååçåå");
}
TEST(TestStringOps, TestConvertTo) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
const char* out_str;
const int32_t ALL_BYTES_MATCH = 0;
int32_t integer_value = std::numeric_limits<int32_t>::max();
out_str = convert_toINT(ctx_ptr, integer_value, &out_len);
EXPECT_EQ(out_len, sizeof(integer_value));
EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &integer_value, out_len));
int64_t big_integer_value = std::numeric_limits<int64_t>::max();
out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
EXPECT_EQ(out_len, sizeof(big_integer_value));
EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &big_integer_value, out_len));
float float_value = std::numeric_limits<float>::max();
out_str = convert_toFLOAT(ctx_ptr, float_value, &out_len);
EXPECT_EQ(out_len, sizeof(float_value));
EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &float_value, out_len));
double double_value = std::numeric_limits<double>::max();
out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
EXPECT_EQ(out_len, sizeof(double_value));
EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, &double_value, out_len));
const char* test_string = "test string";
int32_t str_len = 11;
out_str = convert_toUTF8(ctx_ptr, test_string, str_len, &out_len);
EXPECT_EQ(out_len, str_len);
EXPECT_EQ(ALL_BYTES_MATCH, memcmp(out_str, test_string, out_len));
}
TEST(TestStringOps, TestConvertToBigEndian) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
gdv_int32 out_len_big_endian = 0;
const char* out_str;
const char* out_str_big_endian;
int64_t big_integer_value = std::numeric_limits<int64_t>::max();
out_str = convert_toBIGINT(ctx_ptr, big_integer_value, &out_len);
out_str_big_endian =
convert_toBIGINT_be(ctx_ptr, big_integer_value, &out_len_big_endian);
EXPECT_EQ(out_len_big_endian, sizeof(big_integer_value));
EXPECT_EQ(out_len_big_endian, out_len);
#if ARROW_LITTLE_ENDIAN
// Checks that bytes are in reverse order
for (auto i = 0; i < out_len; i++) {
EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
}
#else
for (auto i = 0; i < out_len; i++) {
EXPECT_EQ(out_str[i], out_str_big_endian[i]);
}
#endif
double double_value = std::numeric_limits<double>::max();
out_str = convert_toDOUBLE(ctx_ptr, double_value, &out_len);
out_str_big_endian = convert_toDOUBLE_be(ctx_ptr, double_value, &out_len_big_endian);
EXPECT_EQ(out_len_big_endian, sizeof(double_value));
EXPECT_EQ(out_len_big_endian, out_len);
#if ARROW_LITTLE_ENDIAN
// Checks that bytes are in reverse order
for (auto i = 0; i < out_len; i++) {
EXPECT_EQ(out_str[i], out_str_big_endian[out_len - (i + 1)]);
}
#else
for (auto i = 0; i < out_len; i++) {
EXPECT_EQ(out_str[i], out_str_big_endian[i]);
}
#endif
}
TEST(TestStringOps, TestConcatWs) {
gandiva::ExecutionContext ctx;
auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
const char* separator = "-";
auto sep_len = static_cast<int32_t>(strlen(separator));
int32_t out_len;
const char* word1 = "hey";
int32_t word1_len = static_cast<int32_t>(strlen(word1));
const char* word2 = "hello";
int32_t word2_len = static_cast<int32_t>(strlen(word2));
bool out_result;
const char* out =
concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len, true,
word2, word2_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hey-hello");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8(ctx_ptr, "", 0, true, "", 0, true, "", 0, true, &out_result,
&out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, true, "", 0, true, word2,
word2_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "-hello");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, false, word1, word1_len, true,
word2, word2_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_EQ(out_result, false);
out = concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len, false,
word2, word2_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hello");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len, true,
word2, word2_len, false, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hey");
EXPECT_EQ(out_result, true);
separator = "#";
sep_len = static_cast<int32_t>(strlen(separator));
const char* word3 = "wow";
int32_t word3_len = static_cast<int32_t>(strlen(word3));
out = concat_ws_utf8_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len,
true, word2, word2_len, true, word3, word3_len, true,
&out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hey#hello#wow");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8_utf8(ctx_ptr, separator, sep_len, true, "", 0, true, word2,
word2_len, false, word3, word3_len, true, &out_result,
&out_len);
EXPECT_EQ(std::string(out, out_len), "#wow");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8_utf8(ctx_ptr, separator, sep_len, false, word1, word1_len,
true, word2, word2_len, true, word3, word3_len, true,
&out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_EQ(out_result, false);
out = concat_ws_utf8_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len,
false, word2, word2_len, true, word3, word3_len, true,
&out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hello#wow");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len,
true, word2, word2_len, false, word3, word3_len, true,
&out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hey#wow");
EXPECT_EQ(out_result, true);
out = concat_ws_utf8_utf8_utf8(ctx_ptr, separator, sep_len, true, word1, word1_len,
false, word2, word2_len, false, word3, word3_len, true,
&out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "wow");
EXPECT_EQ(out_result, true);
separator = "=";
sep_len = static_cast<int32_t>(strlen(separator));
const char* word4 = "awesome";
int32_t word4_len = static_cast<int32_t>(strlen(word4));
out = concat_ws_utf8_utf8_utf8_utf8(
ctx_ptr, separator, sep_len, true, word1, word1_len, true, word2, word2_len, true,
word3, word3_len, true, word4, word4_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hey=hello=wow=awesome");
EXPECT_EQ(out_result, true);
separator = "&&";
sep_len = static_cast<int32_t>(strlen(separator));
const char* word5 = "super";
int32_t word5_len = static_cast<int32_t>(strlen(word5));
out = concat_ws_utf8_utf8_utf8_utf8_utf8(ctx_ptr, separator, sep_len, true, word1,
word1_len, true, word2, word2_len, true, word3,
word3_len, true, word4, word4_len, true, word5,
word5_len, true, &out_result, &out_len);
EXPECT_EQ(std::string(out, out_len), "hey&&hello&&wow&&awesome&&super");
EXPECT_EQ(out_result, true);
}
TEST(TestStringOps, TestEltFunction) {
// gandiva::ExecutionContext ctx;
// int64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
gdv_int32 out_len = 0;
bool out_validity = false;
const char* word1 = "john";
auto word1_len = static_cast<int32_t>(strlen(word1));
const char* word2 = "";
auto word2_len = static_cast<int32_t>(strlen(word2));
auto out_string = elt_int32_utf8_utf8(1, true, word1, word1_len, true, word2, word2_len,
true, &out_validity, &out_len);
EXPECT_EQ("john", std::string(out_string, out_len));
EXPECT_EQ(out_validity, true);
word1 = "hello";
word1_len = static_cast<int32_t>(strlen(word1));
word2 = "world";
word2_len = static_cast<int32_t>(strlen(word2));
out_string = elt_int32_utf8_utf8(2, true, word1, word1_len, true, word2, word2_len,
true, &out_validity, &out_len);
EXPECT_EQ("world", std::string(out_string, out_len));
EXPECT_EQ(out_validity, true);
word1 = "goodbye";
word1_len = static_cast<int32_t>(strlen(word1));
word2 = "world";
word2_len = static_cast<int32_t>(strlen(word2));
out_string = elt_int32_utf8_utf8(4, true, word1, word1_len, true, word2, word2_len,
true, &out_validity, &out_len);
EXPECT_EQ("", std::string(out_string, out_len));
EXPECT_EQ(out_validity, false);
word1 = "hi";
word1_len = static_cast<int32_t>(strlen(word1));
word2 = "yeah";
word2_len = static_cast<int32_t>(strlen(word2));
out_string = elt_int32_utf8_utf8(0, true, word1, word1_len, true, word2, word2_len,
true, &out_validity, &out_len);
EXPECT_EQ("", std::string(out_string, out_len));
EXPECT_EQ(out_validity, false);
const char* word3 = "wow";
auto word3_len = static_cast<int32_t>(strlen(word3));
out_string =
elt_int32_utf8_utf8_utf8(3, true, word1, word1_len, true, word2, word2_len, true,
word3, word3_len, true, &out_validity, &out_len);
EXPECT_EQ("wow", std::string(out_string, out_len));
EXPECT_EQ(out_validity, true);
const char* word4 = "awesome";
auto word4_len = static_cast<int32_t>(strlen(word4));
out_string = elt_int32_utf8_utf8_utf8_utf8(
4, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
word4, word4_len, true, &out_validity, &out_len);
EXPECT_EQ("awesome", std::string(out_string, out_len));
EXPECT_EQ(out_validity, true);
const char* word5 = "not-empty";
auto word5_len = static_cast<int32_t>(strlen(word5));
out_string = elt_int32_utf8_utf8_utf8_utf8_utf8(
5, true, word1, word1_len, true, word2, word2_len, true, word3, word3_len, true,
word4, word4_len, true, word5, word5_len, true, &out_validity, &out_len);
EXPECT_EQ("not-empty", std::string(out_string, out_len));
EXPECT_EQ(out_validity, true);
}
TEST(TestStringOps, TestToHex) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
int32_t in_len = 0;
const char* out_str;
in_len = 10;
char in_str[] = {0x54, 0x65, 0x73, 0x74, 0x53, 0x74, 0x72, 0x69, 0x6E, 0x67};
out_str = to_hex_binary(ctx_ptr, in_str, in_len, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 2 * in_len);
EXPECT_EQ(output, "54657374537472696E67");
in_len = 0;
out_str = to_hex_binary(ctx_ptr, "", in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 0);
EXPECT_EQ(output, "");
in_len = 1;
char in_str_one_char[] = {0x54};
out_str = to_hex_binary(ctx_ptr, in_str_one_char, in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 2 * in_len);
EXPECT_EQ(output, "54");
in_len = 16;
char in_str_spaces[] = {0x54, 0x65, 0x73, 0x74, 0x20, 0x77, 0x69, 0x74,
0x68, 0x20, 0x73, 0x70, 0x61, 0x63, 0x65, 0x73};
out_str = to_hex_binary(ctx_ptr, in_str_spaces, in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "54657374207769746820737061636573");
in_len = 20;
char in_str_break_line[] = {0x54, 0x65, 0x78, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68, 0x0A,
0x62, 0x72, 0x65, 0x61, 0x6B, 0x20, 0x6C, 0x69, 0x6E, 0x65};
out_str = to_hex_binary(ctx_ptr, in_str_break_line, in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 2 * in_len);
EXPECT_EQ(output, "5465787420776974680A627265616B206C696E65");
in_len = 27;
char in_str_with_num[] = {0x54, 0x65, 0x73, 0x74, 0x20, 0x77, 0x69, 0x74, 0x68,
0x20, 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0x73, 0x20,
0x31, 0x20, 0x2B, 0x20, 0x31, 0x20, 0x3D, 0x20, 0x32};
out_str = to_hex_binary(ctx_ptr, in_str_with_num, in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 2 * in_len);
EXPECT_EQ(output, "546573742077697468206E756D626572732031202B2031203D2032");
in_len = 22;
char in_str_with_tabs[] = {0x09, 0x0A, 0x09, 0x0A, 0x09, 0x0A, 0x09, 0x0A,
0x0A, 0x0A, 0x09, 0x20, 0x61, 0x20, 0x6C, 0x65,
0x74, 0x74, 0x40, 0x5D, 0x65, 0x72};
out_str = to_hex_binary(ctx_ptr, in_str_with_tabs, in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 2 * in_len);
EXPECT_EQ(output, "090A090A090A090A0A0A092061206C657474405D6572");
in_len = 22;
const char* binary_string =
"\x09\x0A\x09\x0A\x09\x0A\x09\x0A\x0A\x0A\x09\x20\x61\x20\x6C\x65\x74\x74\x40\x5D"
"\x65\x72";
out_str = to_hex_binary(ctx_ptr, binary_string, in_len, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(out_len, 2 * in_len);
EXPECT_EQ(output, "090A090A090A090A0A0A092061206C657474405D6572");
}
TEST(TestStringOps, TestToHexInt64) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
const char* out_str;
int64_t max_data = INT64_MAX;
out_str = to_hex_int64(ctx_ptr, max_data, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 16);
EXPECT_EQ(output, "7FFFFFFFFFFFFFFF");
ctx.Reset();
int64_t min_data = INT64_MIN;
out_str = to_hex_int64(ctx_ptr, min_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 16);
EXPECT_EQ(output, "8000000000000000");
ctx.Reset();
int64_t zero_data = 0;
out_str = to_hex_int64(ctx_ptr, zero_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 1);
EXPECT_EQ(output, "0");
ctx.Reset();
int64_t minus_zero_data = -0;
out_str = to_hex_int64(ctx_ptr, minus_zero_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 1);
EXPECT_EQ(output, "0");
ctx.Reset();
int64_t minus_one_data = -1;
out_str = to_hex_int64(ctx_ptr, minus_one_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 16);
EXPECT_EQ(output, "FFFFFFFFFFFFFFFF");
ctx.Reset();
int64_t one_data = 1;
out_str = to_hex_int64(ctx_ptr, one_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 1);
EXPECT_EQ(output, "1");
ctx.Reset();
}
TEST(TestStringOps, TestToHexInt32) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
const char* out_str;
int32_t max_data = INT32_MAX;
out_str = to_hex_int32(ctx_ptr, max_data, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 8);
EXPECT_EQ(output, "7FFFFFFF");
ctx.Reset();
int32_t min_data = INT32_MIN;
out_str = to_hex_int32(ctx_ptr, min_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 8);
EXPECT_EQ(output, "80000000");
ctx.Reset();
int32_t zero_data = 0;
out_str = to_hex_int32(ctx_ptr, zero_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 1);
EXPECT_EQ(output, "0");
ctx.Reset();
int32_t minus_zero_data = -0;
out_str = to_hex_int32(ctx_ptr, minus_zero_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 1);
EXPECT_EQ(output, "0");
ctx.Reset();
int32_t minus_one_data = -1;
out_str = to_hex_int32(ctx_ptr, minus_one_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 8);
EXPECT_EQ(output, "FFFFFFFF");
ctx.Reset();
int32_t one_data = 1;
out_str = to_hex_int32(ctx_ptr, one_data, &out_len);
output = std::string(out_str, out_len);
EXPECT_FALSE(ctx.has_error());
EXPECT_EQ(out_len, 1);
EXPECT_EQ(output, "1");
ctx.Reset();
}
TEST(TestStringOps, TestFromHex) {
gandiva::ExecutionContext ctx;
uint64_t ctx_ptr = reinterpret_cast<gdv_int64>(&ctx);
gdv_int32 out_len = 0;
bool out_valid = false;
const char* out_str;
out_str = from_hex_utf8(ctx_ptr, "414243", 6, true, &out_valid, &out_len);
std::string output = std::string(out_str, out_len);
EXPECT_EQ(output, "ABC");
EXPECT_EQ(out_valid, true);
out_str = from_hex_utf8(ctx_ptr, "", 0, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
EXPECT_EQ(out_valid, true);
out_str = from_hex_utf8(ctx_ptr, "41", 2, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "A");
EXPECT_EQ(out_valid, true);
out_str = from_hex_utf8(ctx_ptr, "6d6D", 4, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "mm");
EXPECT_EQ(out_valid, true);
out_str = from_hex_utf8(ctx_ptr, "6f6d", 4, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "om");
EXPECT_EQ(out_valid, true);
out_str = from_hex_utf8(ctx_ptr, "4f4D", 4, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "OM");
EXPECT_EQ(out_valid, true);
out_str = from_hex_utf8(ctx_ptr, "4f4D", 4, false, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
EXPECT_EQ(out_valid, false);
out_str =
from_hex_utf8(ctx_ptr, "egular courts above th", 22, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
EXPECT_EQ(out_valid, false);
out_str =
from_hex_utf8(ctx_ptr, "lites. fluffily even de", 23, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
EXPECT_EQ(out_valid, false);
out_str = from_hex_utf8(ctx_ptr, "T", 1, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
EXPECT_EQ(out_valid, false);
out_str = from_hex_utf8(ctx_ptr, "\\x41\\x42\\x43", 12, true, &out_valid, &out_len);
output = std::string(out_str, out_len);
EXPECT_EQ(output, "");
EXPECT_EQ(out_valid, false);
}
TEST(TestStringOps, TestSoundex) {
gandiva::ExecutionContext ctx;
auto ctx_ptr = reinterpret_cast<int64_t>(&ctx);
int32_t out_len = 0;
bool validity = false;
const char* out;
out = soundex_utf8(ctx_ptr, "123456789", 9, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_EQ(validity, false);
out = soundex_utf8(ctx_ptr, "a", 1, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A000");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "123456789a", 10, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A000");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "a123456789", 10, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A000");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "robert", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "R163");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "r-O-b-E-r-T", 11, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "R163");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Robert", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "R163");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Rupert", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "R163");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Honeyman", 8, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "H555");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Tymczak", 7, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "T522");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Ashcraft", 8, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A226");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Ashcroft", 8, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A226");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Jjjice", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "J200");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Luke Garcia", 11, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "L226");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "123 321 Luke 987 Gar4cia", 24, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "L226");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Alice Ichabod", 13, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A422");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Miller", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "M460");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "3Miller", 7, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "M460");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Mill3r", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "M460");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "abc", 3, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A120");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "123abc", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "A120");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "test", 4, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "T230");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "", 0, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Elvis", 5, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "E412");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "waterloo", 8, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "W364");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "eowolf", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), "E410");
EXPECT_EQ(validity, true);
out = soundex_utf8(ctx_ptr, "Smith", 5, true, &validity, &out_len);
auto out2 = soundex_utf8(ctx_ptr, "Smythe", 6, true, &validity, &out_len);
EXPECT_EQ(std::string(out, out_len), std::string(out2, out_len));
EXPECT_EQ(validity, true);
}
TEST(TestStringOps, TestInstr) {
std::string s1 = "hello world!";
auto s1_len = static_cast<int32_t>(s1.size());
std::string s2 = "world";
auto s2_len = static_cast<int32_t>(s2.size());
auto result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 7);
s1 = "apple banana mango";
s1_len = static_cast<int32_t>(s1.size());
s2 = "apple";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 1);
s1 = "";
s1_len = static_cast<int32_t>(s1.size());
s2 = "mango";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 0);
s1 = "open the door";
s1_len = static_cast<int32_t>(s1.size());
s2 = "";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 1);
s1 = "";
s1_len = static_cast<int32_t>(s1.size());
s2 = "";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 1);
s1 = "hi john";
s1_len = static_cast<int32_t>(s1.size());
s2 = "johny";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 0);
s1 = "cool";
s1_len = static_cast<int32_t>(s1.size());
s2 = "cooler";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 0);
s1 = "Hello";
s1_len = static_cast<int32_t>(s1.size());
s2 = "Hello";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 1);
s1 = "Hello";
s1_len = static_cast<int32_t>(s1.size());
s2 = "Hell";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 1);
s1 = "Hello";
s1_len = static_cast<int32_t>(s1.size());
s2 = "Hell0";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 0);
s1 = "Hello";
s1_len = static_cast<int32_t>(s1.size());
s2 = "H3ll";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 0);
s1 = "wow";
s1_len = static_cast<int32_t>(s1.size());
s2 = "wou";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 0);
s1 = "alphabetic";
s1_len = static_cast<int32_t>(s1.size());
s2 = "alpha";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 1);
s1 = "alphabetic";
s1_len = static_cast<int32_t>(s1.size());
s2 = "bet";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 6);
s1 = "kaleidoscope";
s1_len = static_cast<int32_t>(s1.size());
s2 = "scope";
s2_len = static_cast<int32_t>(s2.size());
result = instr_utf8(s1.c_str(), s1_len, s2.c_str(), s2_len);
EXPECT_EQ(result, 8);
}
} // namespace gandiva