cpp/src/gandiva/precompiled/string_ops.cc - arrow - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 // String functions
 #include "arrow/util/logging_internal.h"
 #include "arrow/util/value_parsing.h"

 extern "C" {

 #include <algorithm>
 #include <cinttypes>
 #include <climits>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>

 #include "./types.h"

 FORCE_INLINE
 gdv_int32 octet_length_utf8(const gdv_utf8 input, gdv_int32 length) { return length; }

 FORCE_INLINE
 gdv_int32 bit_length_utf8(const gdv_utf8 input, gdv_int32 length) { return length * 8; }

 FORCE_INLINE
 gdv_int32 octet_length_binary(const gdv_binary input, gdv_int32 length) { return length; }

 FORCE_INLINE
 gdv_int32 bit_length_binary(const gdv_binary input, gdv_int32 length) {
   return length * 8;
 }

 FORCE_INLINE
 int match_string(const char* input, gdv_int32 input_len, gdv_int32 start_pos,
                  const char* delim, gdv_int32 delim_len) {
   for (int i = start_pos; i < input_len; i++) {
     int left_chars = input_len - i;
     if ((left_chars >= delim_len) && memcmp(input + i, delim, delim_len) == 0) {
       return i + delim_len;
     }
   }

   return -1;
 }

 FORCE_INLINE
 gdv_int32 mem_compare(const char* left, gdv_int32 left_len, const char* right,
                       gdv_int32 right_len) {
   int min = left_len;
   if (right_len < min) {
     min = right_len;
   }

   int cmp_ret = memcmp(left, right, min);
   if (cmp_ret != 0) {
     return cmp_ret;
   } else {
     return left_len - right_len;
   }
 }

 // Expand inner macro for all varlen types.
 #define VAR_LEN_OP_TYPES(INNER, NAME, OP) \
   INNER(NAME, utf8, OP)                   \
   INNER(NAME, binary, OP)

 // Relational binary fns : left, right params are same, return is bool.
 #define BINARY_RELATIONAL(NAME, TYPE, OP)                                    \
   FORCE_INLINE                                                               \
   bool NAME##_##TYPE##_##TYPE(const gdv_##TYPE left, gdv_int32 left_len,     \
                               const gdv_##TYPE right, gdv_int32 right_len) { \
     return mem_compare(left, left_len, right, right_len) OP 0;               \
   }

 VAR_LEN_OP_TYPES(BINARY_RELATIONAL, equal, ==)
 VAR_LEN_OP_TYPES(BINARY_RELATIONAL, not_equal, !=)
 VAR_LEN_OP_TYPES(BINARY_RELATIONAL, less_than, <)
 VAR_LEN_OP_TYPES(BINARY_RELATIONAL, less_than_or_equal_to, <=)
 VAR_LEN_OP_TYPES(BINARY_RELATIONAL, greater_than, >)
 VAR_LEN_OP_TYPES(BINARY_RELATIONAL, greater_than_or_equal_to, >=)

 #undef BINARY_RELATIONAL
 #undef VAR_LEN_OP_TYPES

 // Expand inner macro for all varlen types.
 #define VAR_LEN_TYPES(INNER, NAME) \
   INNER(NAME, utf8)                \
   INNER(NAME, binary)

 FORCE_INLINE
 int to_binary_from_hex(char ch) {
   if (ch >= 'A' && ch <= 'F') {
     return 10 + (ch - 'A');
   } else if (ch >= 'a' && ch <= 'f') {
     return 10 + (ch - 'a');
   }
   return ch - '0';
 }

 FORCE_INLINE
 bool starts_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* prefix,
                            gdv_int32 prefix_len) {
   return ((data_len >= prefix_len) && (memcmp(data, prefix, prefix_len) == 0));
 }

 FORCE_INLINE
 bool ends_with_utf8_utf8(const char* data, gdv_int32 data_len, const char* suffix,
                          gdv_int32 suffix_len) {
   return ((data_len >= suffix_len) &&
           (memcmp(data + data_len - suffix_len, suffix, suffix_len) == 0));
 }

 FORCE_INLINE
 bool is_substr_utf8_utf8(const char* data, int32_t data_len, const char* substr,
                          int32_t substr_len) {
   for (int32_t i = 0; i <= data_len - substr_len; ++i) {
     if (memcmp(data + i, substr, substr_len) == 0) {
       return true;
     }
   }
   return false;
 }

 FORCE_INLINE
 gdv_int32 utf8_char_length(char c) {
   if ((signed char)c >= 0) {  // 1-byte char (0x00 ~ 0x7F)
     return 1;
   } else if ((c & 0xE0) == 0xC0) {  // 2-byte char
     return 2;
   } else if ((c & 0xF0) == 0xE0) {  // 3-byte char
     return 3;
   } else if ((c & 0xF8) == 0xF0) {  // 4-byte char
     return 4;
   }
   // invalid char
   return 0;
 }

 FORCE_INLINE
 void set_error_for_invalid_utf(int64_t execution_context, char val) {
   const char* fmt = "unexpected byte \\%02hhx encountered while decoding utf8 string";
   int size = static_cast<int>(strlen(fmt)) + 64;
   char* error = reinterpret_cast<char*>(malloc(size));
   snprintf(error, size, fmt, (unsigned char)val);
   gdv_fn_context_set_error_msg(execution_context, error);
   free(error);
 }

 FORCE_INLINE
 bool validate_utf8_following_bytes(const char* data, int32_t data_len,
                                    int32_t char_index) {
   for (int j = 1; j < data_len; ++j) {
     if ((data[char_index + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
       return false;
     }
   }
   return true;
 }

 // Count the number of utf8 characters
 // return 0 for invalid/incomplete input byte sequences
 FORCE_INLINE
 gdv_int32 utf8_length(gdv_int64 context, const char* data, gdv_int32 data_len) {
   int char_len = 0;
   int count = 0;
   for (int i = 0; i < data_len; i += char_len) {
     char_len = utf8_char_length(data[i]);
     if (char_len == 0 || i + char_len > data_len) {  // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, data[i]);
       return 0;
     }
     for (int j = 1; j < char_len; ++j) {
       if ((data[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
         set_error_for_invalid_utf(context, data[i + j]);
         return 0;
       }
     }
     ++count;
   }
   return count;
 }

 // Count the number of utf8 characters, ignoring invalid char, considering size 1
 FORCE_INLINE
 gdv_int32 utf8_length_ignore_invalid(const char* data, gdv_int32 data_len) {
   int char_len = 0;
   int count = 0;
   for (int i = 0; i < data_len; i += char_len) {
     char_len = utf8_char_length(data[i]);
     if (char_len == 0 || i + char_len > data_len) {  // invalid byte or incomplete glyph
       // if invalid byte or incomplete glyph, ignore it
       char_len = 1;
     }
     for (int j = 1; j < char_len; ++j) {
       if ((data[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
         char_len += 1;
       }
     }
     ++count;
   }
   return count;
 }

 // Get the byte position corresponding to a character position for a non-empty utf8
 // sequence
 FORCE_INLINE
 gdv_int32 utf8_byte_pos(gdv_int64 context, const char* str, gdv_int32 str_len,
                         gdv_int32 char_pos) {
   int char_len = 0;
   int byte_index = 0;
   for (gdv_int32 char_index = 0; char_index < char_pos && byte_index < str_len;
        char_index++) {
     char_len = utf8_char_length(str[byte_index]);
     if (char_len == 0 ||
         byte_index + char_len > str_len) {  // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, str[byte_index]);
       return -1;
     }
     byte_index += char_len;
   }
   return byte_index;
 }

 #define UTF8_LENGTH(NAME, TYPE)                                                 \
   FORCE_INLINE                                                                  \
   gdv_int32 NAME##_##TYPE(gdv_int64 context, gdv_##TYPE in, gdv_int32 in_len) { \
     return utf8_length(context, in, in_len);                                    \
   }

 UTF8_LENGTH(char_length, utf8)
 UTF8_LENGTH(length, utf8)
 UTF8_LENGTH(lengthUtf8, binary)

 // set max/min str length for space_int32, space_int64, lpad_utf8_int32_utf8
 // and rpad_utf8_int32_utf8 to avoid exceptions
 static const gdv_int32 max_str_length = 65536;
 static const gdv_int32 min_str_length = 0;
 // Returns a string of 'n' spaces.
 #define SPACE_STR(IN_TYPE)                                                              \
   GANDIVA_EXPORT                                                                        \
   const char* space_##IN_TYPE(gdv_int64 ctx, gdv_##IN_TYPE n, int32_t* out_len) {       \
     n = std::min(static_cast<gdv_##IN_TYPE>(max_str_length), n);                        \
     n = std::max(static_cast<gdv_##IN_TYPE>(min_str_length), n);                        \
     gdv_int32 n_times = static_cast<gdv_int32>(n);                                      \
     if (n_times <= 0) {                                                                 \
       *out_len = 0;                                                                     \
       return "";                                                                        \
     }                                                                                   \
     char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(ctx, n_times));     \
     if (ret == nullptr) {                                                               \
       gdv_fn_context_set_error_msg(ctx, "Could not allocate memory for output string"); \
       *out_len = 0;                                                                     \
       return "";                                                                        \
     }                                                                                   \
     for (int i = 0; i < n_times; i++) {                                                 \
       ret[i] = ' ';                                                                     \
     }                                                                                   \
     *out_len = n_times;                                                                 \
     return ret;                                                                         \
   }

 SPACE_STR(int32)
 SPACE_STR(int64)

 // Reverse a utf8 sequence
 FORCE_INLINE
 const char* reverse_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                          int32_t* out_len) {
   if (data_len == 0) {
     *out_len = 0;
     return "";
   }

   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, data_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }

   gdv_int32 char_len;
   for (gdv_int32 i = 0; i < data_len; i += char_len) {
     char_len = utf8_char_length(data[i]);

     if (char_len == 0 || i + char_len > data_len) {  // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, data[i]);
       *out_len = 0;
       return "";
     }

     for (gdv_int32 j = 0; j < char_len; ++j) {
       if (j > 0 && (data[i + j] & 0xC0) != 0x80) {  // bytes following head-byte of glyph
         set_error_for_invalid_utf(context, data[i + j]);
         *out_len = 0;
         return "";
       }
       ret[data_len - i - char_len + j] = data[i + j];
     }
   }
   *out_len = data_len;
   return ret;
 }

 // Trims whitespaces from the left end of the input utf8 sequence
 FORCE_INLINE
 const char* ltrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                        int32_t* out_len) {
   if (data_len == 0) {
     *out_len = 0;
     return "";
   }

   gdv_int32 start = 0;
   // start denotes the first position of non-space characters in the input string
   while (start < data_len && data[start] == ' ') {
     ++start;
   }

   *out_len = data_len - start;
   return data + start;
 }

 // Trims whitespaces from the right end of the input utf8 sequence
 FORCE_INLINE
 const char* rtrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                        int32_t* out_len) {
   if (data_len == 0) {
     *out_len = 0;
     return "";
   }

   gdv_int32 end = data_len - 1;
   // end denotes the last position of non-space characters in the input string
   while (end >= 0 && data[end] == ' ') {
     --end;
   }

   *out_len = end + 1;
   return data;
 }

 // Trims whitespaces from both the ends of the input utf8 sequence
 FORCE_INLINE
 const char* btrim_utf8(gdv_int64 context, const char* data, gdv_int32 data_len,
                        int32_t* out_len) {
   if (data_len == 0) {
     *out_len = 0;
     return "";
   }

   gdv_int32 start = 0, end = data_len - 1;
   // start and end denote the first and last positions of non-space
   // characters in the input string respectively
   while (start <= end && data[start] == ' ') {
     ++start;
   }
   while (end >= start && data[end] == ' ') {
     --end;
   }

   // string has some leading/trailing spaces and some non-space characters
   *out_len = end - start + 1;
   return data + start;
 }

 // Trims characters present in the trim text from the left end of the base text
 FORCE_INLINE
 const char* ltrim_utf8_utf8(gdv_int64 context, const char* basetext,
                             gdv_int32 basetext_len, const char* trimtext,
                             gdv_int32 trimtext_len, int32_t* out_len) {
   if (basetext_len == 0) {
     *out_len = 0;
     return "";
   } else if (trimtext_len == 0) {
     *out_len = basetext_len;
     return basetext;
   }

   gdv_int32 start_ptr, char_len;
   // scan the base text from left to right and increment the start pointer till
   // there is a character which is not present in the trim text
   for (start_ptr = 0; start_ptr < basetext_len; start_ptr += char_len) {
     char_len = utf8_char_length(basetext[start_ptr]);
     if (char_len == 0 || start_ptr + char_len > basetext_len) {
       // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, basetext[start_ptr]);
       *out_len = 0;
       return "";
     }
     if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + start_ptr, char_len)) {
       break;
     }
   }

   *out_len = basetext_len - start_ptr;
   return basetext + start_ptr;
 }

 // Trims characters present in the trim text from the right end of the base text
 FORCE_INLINE
 const char* rtrim_utf8_utf8(gdv_int64 context, const char* basetext,
                             gdv_int32 basetext_len, const char* trimtext,
                             gdv_int32 trimtext_len, int32_t* out_len) {
   if (basetext_len == 0) {
     *out_len = 0;
     return "";
   } else if (trimtext_len == 0) {
     *out_len = basetext_len;
     return basetext;
   }

   gdv_int32 char_len, end_ptr, byte_cnt = 1;
   // scan the base text from right to left and decrement the end pointer till
   // there is a character which is not present in the trim text
   for (end_ptr = basetext_len - 1; end_ptr >= 0; --end_ptr) {
     char_len = utf8_char_length(basetext[end_ptr]);
     if (char_len == 0) {  // trailing bytes of multibyte character
       ++byte_cnt;
       continue;
     }
     // this is the first byte of a character, hence check if char_len = char_cnt
     if (byte_cnt != char_len) {  // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, basetext[end_ptr]);
       *out_len = 0;
       return "";
     }
     byte_cnt = 1;  // reset the counter*/
     if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + end_ptr, char_len)) {
       break;
     }
   }

   // when all characters in the basetext are part of the trimtext
   if (end_ptr == -1) {
     *out_len = 0;
     return "";
   }

   end_ptr += utf8_char_length(basetext[end_ptr]);  // point to the next character
   *out_len = end_ptr;
   return basetext;
 }

 // Trims characters present in the trim text from both ends of the base text
 FORCE_INLINE
 const char* btrim_utf8_utf8(gdv_int64 context, const char* basetext,
                             gdv_int32 basetext_len, const char* trimtext,
                             gdv_int32 trimtext_len, int32_t* out_len) {
   if (basetext_len == 0) {
     *out_len = 0;
     return "";
   } else if (trimtext_len == 0) {
     *out_len = basetext_len;
     return basetext;
   }

   gdv_int32 start_ptr, end_ptr, char_len, byte_cnt = 1;
   // scan the base text from left to right and increment the start and decrement the
   // end pointers till there are characters which are not present in the trim text
   for (start_ptr = 0; start_ptr < basetext_len; start_ptr += char_len) {
     char_len = utf8_char_length(basetext[start_ptr]);
     if (char_len == 0 || start_ptr + char_len > basetext_len) {
       // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, basetext[start_ptr]);
       *out_len = 0;
       return "";
     }
     if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + start_ptr, char_len)) {
       break;
     }
   }
   for (end_ptr = basetext_len - 1; end_ptr >= start_ptr; --end_ptr) {
     char_len = utf8_char_length(basetext[end_ptr]);
     if (char_len == 0) {  // trailing byte in multibyte character
       ++byte_cnt;
       continue;
     }
     // this is the first byte of a character, hence check if char_len = char_cnt
     if (byte_cnt != char_len) {  // invalid byte or incomplete glyph
       set_error_for_invalid_utf(context, basetext[end_ptr]);
       *out_len = 0;
       return "";
     }
     byte_cnt = 1;  // reset the counter*/
     if (!is_substr_utf8_utf8(trimtext, trimtext_len, basetext + end_ptr, char_len)) {
       break;
     }
   }

   // when all characters are trimmed, start_ptr has been incremented to basetext_len and
   // end_ptr still points to basetext_len - 1, hence we need to handle this case
   if (start_ptr > end_ptr) {
     *out_len = 0;
     return "";
   }

   end_ptr += utf8_char_length(basetext[end_ptr]);  // point to the next character
   *out_len = end_ptr - start_ptr;
   return basetext + start_ptr;
 }

 FORCE_INLINE
 gdv_boolean compare_lower_strings(const char* base_str, gdv_int32 base_str_len,
                                   const char* str, gdv_int32 str_len) {
   if (base_str_len != str_len) {
     return false;
   }
   for (int i = 0; i < str_len; i++) {
     // convert char to lower
     char cur = str[i];
     // 'A' - 'Z' : 0x41 - 0x5a
     // 'a' - 'z' : 0x61 - 0x7a
     if (cur >= 0x41 && cur <= 0x5a) {
       cur = static_cast<char>(cur + 0x20);
     }
     // if the character does not match, break the flow
     if (cur != base_str[i]) break;
     // if the character matches and it is the last iteration, return true
     if (i == str_len - 1) return true;
   }
   return false;
 }

 // Try to cast the received string ('0', '1', 'true', 'false'), ignoring leading
 // and trailing spaces, also ignoring lower and upper case.
 FORCE_INLINE
 gdv_boolean castBIT_utf8(gdv_int64 context, const char* data, gdv_int32 data_len) {
   if (data_len <= 0) {
     gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
     return false;
   }

   // trim leading and trailing spaces
   int32_t trimmed_len;
   int32_t start = 0, end = data_len - 1;
   while (start <= end && data[start] == ' ') {
     ++start;
   }
   while (end >= start && data[end] == ' ') {
     --end;
   }
   trimmed_len = end - start + 1;
   const char* trimmed_data = data + start;

   // compare received string with the valid bool string values '1', '0', 'true', 'false'
   if (trimmed_len == 1) {
     // case for '0' and '1' value
     if (trimmed_data[0] == '1') return true;
     if (trimmed_data[0] == '0') return false;
   } else if (trimmed_len == 4) {
     // case for matching 'true'
     if (compare_lower_strings("true", 4, trimmed_data, trimmed_len)) return true;
   } else if (trimmed_len == 5) {
     // case for matching 'false'
     if (compare_lower_strings("false", 5, trimmed_data, trimmed_len)) return false;
   }
   // if no 'true', 'false', '0' or '1' value is found, set an error
   gdv_fn_context_set_error_msg(context, "Invalid value for boolean.");
   return false;
 }

 FORCE_INLINE
 const char* castVARCHAR_bool_int64(gdv_int64 context, gdv_boolean value,
                                    gdv_int64 out_len, gdv_int32* out_length) {
   gdv_int32 len = static_cast<gdv_int32>(out_len);
   if (len < 0) {
     gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative");
     *out_length = 0;
     return "";
   }
   const char* out =
       reinterpret_cast<const char*>(gdv_fn_context_arena_malloc(context, 5));
   out = value ? "true" : "false";
   *out_length = value ? ((len > 4) ? 4 : len) : ((len > 5) ? 5 : len);
   return out;
 }

 // Truncates the string to given length
 #define CAST_VARCHAR_FROM_VARLEN_TYPE(TYPE)                                            \
   FORCE_INLINE                                                                         \
   const char* castVARCHAR_##TYPE##_int64(gdv_int64 context, const char* data,          \
                                          gdv_int32 data_len, int64_t out_len,          \
                                          int32_t* out_length) {                        \
     int32_t len = static_cast<int32_t>(out_len);                                       \
                                                                                        \
     if (len < 0) {                                                                     \
       gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
       *out_length = 0;                                                                 \
       return "";                                                                       \
     }                                                                                  \
                                                                                        \
     if (len >= data_len || len == 0) {                                                 \
       *out_length = data_len;                                                          \
       return data;                                                                     \
     }                                                                                  \
                                                                                        \
     int32_t remaining = len;                                                           \
     int32_t index = 0;                                                                 \
     bool is_multibyte = false;                                                         \
     do {                                                                               \
       /* In utf8, MSB of a single byte unicode char is always 0,                       \
        * whereas for a multibyte character the MSB of each byte is 1.                  \
        * So for a single byte char, a bitwise-and with x80 (10000000) will be 0        \
        * and it won't be 0 for bytes of a multibyte char.                              \
        */                                                                              \
       char* data_ptr = const_cast<char*>(data);                                        \
                                                                                        \
       /* advance byte by byte till the 8-byte boundary then advance 8 bytes */         \
       auto num_bytes = reinterpret_cast<uintptr_t>(data_ptr) & 0x07;                   \
       num_bytes = (8 - num_bytes) & 0x07;                                              \
       while (num_bytes > 0) {                                                          \
         uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);                   \
         if ((*ptr & 0x80) != 0) {                                                      \
           is_multibyte = true;                                                         \
           break;                                                                       \
         }                                                                              \
         index++;                                                                       \
         remaining--;                                                                   \
         num_bytes--;                                                                   \
       }                                                                                \
       if (is_multibyte) break;                                                         \
       while (remaining >= 8) {                                                         \
         uint64_t* ptr = reinterpret_cast<uint64_t*>(data_ptr + index);                 \
         if ((*ptr & 0x8080808080808080) != 0) {                                        \
           is_multibyte = true;                                                         \
           break;                                                                       \
         }                                                                              \
         index += 8;                                                                    \
         remaining -= 8;                                                                \
       }                                                                                \
       if (is_multibyte) break;                                                         \
       if (remaining >= 4) {                                                            \
         uint32_t* ptr = reinterpret_cast<uint32_t*>(data_ptr + index);                 \
         if ((*ptr & 0x80808080) != 0) break;                                           \
         index += 4;                                                                    \
         remaining -= 4;                                                                \
       }                                                                                \
       while (remaining > 0) {                                                          \
         uint8_t* ptr = reinterpret_cast<uint8_t*>(data_ptr + index);                   \
         if ((*ptr & 0x80) != 0) {                                                      \
           is_multibyte = true;                                                         \
           break;                                                                       \
         }                                                                              \
         index++;                                                                       \
         remaining--;                                                                   \
       }                                                                                \
       if (is_multibyte) break;                                                         \
       /* reached here; all are single byte characters */                               \
       *out_length = len;                                                               \
       return data;                                                                     \
     } while (false);                                                                   \
                                                                                        \
     /* detected multibyte utf8 characters; slow path */                                \
     int32_t byte_pos =                                                                 \
         utf8_byte_pos(context, data + index, data_len - index, len - index);           \
     if (byte_pos < 0) {                                                                \
       *out_length = 0;                                                                 \
       return "";                                                                       \
     }                                                                                  \
                                                                                        \
     *out_length = index + byte_pos;                                                    \
     return data;                                                                       \
   }

 CAST_VARCHAR_FROM_VARLEN_TYPE(utf8)
 CAST_VARCHAR_FROM_VARLEN_TYPE(binary)

 #undef CAST_VARCHAR_FROM_VARLEN_TYPE

 // Add functions for castVARBINARY
 #define CAST_VARBINARY_FROM_STRING_AND_BINARY(TYPE)                                    \
   GANDIVA_EXPORT                                                                       \
   const char* castVARBINARY_##TYPE##_int64(gdv_int64 context, const char* data,        \
                                            gdv_int32 data_len, int64_t out_len,        \
                                            int32_t* out_length) {                      \
     int32_t len = static_cast<int32_t>(out_len);                                       \
     if (len < 0) {                                                                     \
       gdv_fn_context_set_error_msg(context, "Output buffer length can't be negative"); \
       *out_length = 0;                                                                 \
       return "";                                                                       \
     }                                                                                  \
                                                                                        \
     if (len >= data_len || len == 0) {                                                 \
       *out_length = data_len;                                                          \
     } else {                                                                           \
       *out_length = len;                                                               \
     }                                                                                  \
     return data;                                                                       \
   }

 CAST_VARBINARY_FROM_STRING_AND_BINARY(utf8)
 CAST_VARBINARY_FROM_STRING_AND_BINARY(binary)

 #define CAST_BINARY_FROM_STRING_AND_BINARY(TYPE)                      \
   GANDIVA_EXPORT                                                      \
   const char* castBINARY_##TYPE(const char* data, gdv_int32 data_len, \
                                 int32_t* out_length) {                \
     *out_length = data_len;                                           \
     return data;                                                      \
   }

 CAST_BINARY_FROM_STRING_AND_BINARY(utf8)
 CAST_BINARY_FROM_STRING_AND_BINARY(binary)

 #undef CAST_VARBINARY_FROM_STRING_AND_BINARY

 #define IS_NULL(NAME, TYPE)                                                \
   FORCE_INLINE                                                             \
   bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
     return !is_valid;                                                      \
   }

 VAR_LEN_TYPES(IS_NULL, isnull)

 #undef IS_NULL

 #define IS_NOT_NULL(NAME, TYPE)                                            \
   FORCE_INLINE                                                             \
   bool NAME##_##TYPE(gdv_##TYPE in, gdv_int32 len, gdv_boolean is_valid) { \
     return is_valid;                                                       \
   }

 VAR_LEN_TYPES(IS_NOT_NULL, isnotnull)

 #undef IS_NOT_NULL
 #undef VAR_LEN_TYPES

 /*
  We follow Oracle semantics for offset:
  - If position is positive, then the first glyph in the substring is determined by
  counting that many glyphs forward from the beginning of the input. (i.e., for position ==
  1 the first glyph in the substring will be identical to the first glyph in the input)

  - If position is negative, then the first glyph in the substring is determined by
  counting that many glyphs backward from the end of the input. (i.e., for position == -1
  the first glyph in the substring will be identical to the last glyph in the input)

  - If position is 0 then it is treated as 1.
  */
 FORCE_INLINE
 const char* substr_utf8_int64_int64(gdv_int64 context, const char* input,
                                     gdv_int32 in_data_len, gdv_int64 position,
                                     gdv_int64 substring_length, gdv_int32* out_data_len) {
   if (substring_length <= 0 || input == nullptr || in_data_len <= 0) {
     *out_data_len = 0;
     return "";
   }

   gdv_int64 in_glyphs_count =
       static_cast<gdv_int64>(utf8_length(context, input, in_data_len));

   // in_glyphs_count is zero if input has invalid glyphs
   if (in_glyphs_count == 0) {
     *out_data_len = 0;
     return "";
   }

   gdv_int64 from_glyph;  // from_glyph==0 indicates the first glyph of the input
   if (position > 0) {
     from_glyph = position - 1;
   } else if (position < 0) {
     from_glyph = in_glyphs_count + position;
   } else {
     from_glyph = 0;
   }

   if (from_glyph < 0 || from_glyph >= in_glyphs_count) {
     *out_data_len = 0;
     return "";
   }

   gdv_int64 out_glyphs_count = substring_length;
   if (substring_length > in_glyphs_count - from_glyph) {
     out_glyphs_count = in_glyphs_count - from_glyph;
   }

   gdv_int64 in_data_len64 = static_cast<gdv_int64>(in_data_len);
   gdv_int64 start_pos = 0;
   gdv_int64 end_pos = in_data_len64;

   gdv_int64 current_glyph = 0;
   gdv_int64 pos = 0;
   while (pos < in_data_len64) {
     if (current_glyph == from_glyph) {
       start_pos = pos;
     }
     pos += static_cast<gdv_int64>(utf8_char_length(input[pos]));
     if (current_glyph - from_glyph + 1 == out_glyphs_count) {
       end_pos = pos;
     }
     current_glyph++;
   }

   if (end_pos > in_data_len64 || end_pos > INT_MAX) {
     end_pos = in_data_len64;
   }

   *out_data_len = static_cast<gdv_int32>(end_pos - start_pos);
   char* ret =
       reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_data_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_data_len = 0;
     return "";
   }
   memcpy(ret, input + start_pos, *out_data_len);
   return ret;
 }

 FORCE_INLINE
 const char* substr_utf8_int64(gdv_int64 context, const char* input, gdv_int32 in_len,
                               gdv_int64 offset64, gdv_int32* out_len) {
   return substr_utf8_int64_int64(context, input, in_len, offset64, in_len, out_len);
 }

 FORCE_INLINE
 const char* repeat_utf8_int32(gdv_int64 context, const char* in, gdv_int32 in_len,
                               gdv_int32 repeat_number, gdv_int32* out_len) {
   // if the repeat number is zero, then return empty string
   if (repeat_number == 0 || in_len <= 0) {
     *out_len = 0;
     return "";
   }
   // if the repeat number is a negative number, an error is set on context
   if (repeat_number < 0) {
     gdv_fn_context_set_error_msg(context, "Repeat number can't be negative");
     *out_len = 0;
     return "";
   }
   *out_len = repeat_number * in_len;
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   for (int i = 0; i < repeat_number; ++i) {
     memcpy(ret + (i * in_len), in, in_len);
   }
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8(gdv_int64 context, const char* left, gdv_int32 left_len,
                              bool left_validity, const char* right, gdv_int32 right_len,
                              bool right_validity, gdv_int32* out_len) {
   if (!left_validity) {
     left_len = 0;
   }
   if (!right_validity) {
     right_len = 0;
   }
   return concatOperator_utf8_utf8(context, left, left_len, right, right_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8(gdv_int64 context, const char* left,
                                      gdv_int32 left_len, const char* right,
                                      gdv_int32 right_len, gdv_int32* out_len) {
   *out_len = left_len + right_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, left, left_len);
   memcpy(ret + left_len, right, right_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8(gdv_int64 context, const char* in1, gdv_int32 in1_len,
                                   bool in1_validity, const char* in2, gdv_int32 in2_len,
                                   bool in2_validity, const char* in3, gdv_int32 in3_len,
                                   bool in3_validity, gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   return concatOperator_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3, in3_len,
                                        out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8(gdv_int64 context, const char* in1,
                                           gdv_int32 in1_len, const char* in2,
                                           gdv_int32 in2_len, const char* in3,
                                           gdv_int32 in3_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
                                        gdv_int32 in1_len, bool in1_validity,
                                        const char* in2, gdv_int32 in2_len,
                                        bool in2_validity, const char* in3,
                                        gdv_int32 in3_len, bool in3_validity,
                                        const char* in4, gdv_int32 in4_len,
                                        bool in4_validity, gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3,
                                             in3_len, in4, in4_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8(gdv_int64 context, const char* in1,
                                                gdv_int32 in1_len, const char* in2,
                                                gdv_int32 in2_len, const char* in3,
                                                gdv_int32 in3_len, const char* in4,
                                                gdv_int32 in4_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len + in4_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
     gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
     bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
     gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   if (!in5_validity) {
     in5_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len, in3,
                                                  in3_len, in4, in4_len, in5, in5_len,
                                                  out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
     gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
     gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len + in4_len + in5_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
     gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
     bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
     const char* in6, gdv_int32 in6_len, bool in6_validity, gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   if (!in5_validity) {
     in5_len = 0;
   }
   if (!in6_validity) {
     in6_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(context, in1, in1_len, in2, in2_len,
                                                       in3, in3_len, in4, in4_len, in5,
                                                       in5_len, in6, in6_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
     gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
     gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
     gdv_int32 in6_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
     gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
     bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
     const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
     gdv_int32 in7_len, bool in7_validity, gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   if (!in5_validity) {
     in5_len = 0;
   }
   if (!in6_validity) {
     in6_len = 0;
   }
   if (!in7_validity) {
     in7_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
       context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
       in6_len, in7, in7_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
     gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
     gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
     gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
     gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
     bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
     const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
     gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
     bool in8_validity, gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   if (!in5_validity) {
     in5_len = 0;
   }
   if (!in6_validity) {
     in6_len = 0;
   }
   if (!in7_validity) {
     in7_len = 0;
   }
   if (!in8_validity) {
     in8_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
       context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
       in6_len, in7, in7_len, in8, in8_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
     gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
     gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
     gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
     gdv_int32 in8_len, gdv_int32* out_len) {
   *out_len =
       in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8,
          in8_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
     gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
     bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
     const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
     gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
     bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity,
     gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   if (!in5_validity) {
     in5_len = 0;
   }
   if (!in6_validity) {
     in6_len = 0;
   }
   if (!in7_validity) {
     in7_len = 0;
   }
   if (!in8_validity) {
     in8_len = 0;
   }
   if (!in9_validity) {
     in9_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
       context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
       in6_len, in7, in7_len, in8, in8_len, in9, in9_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
     gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
     gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
     gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
     gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len +
              in8_len + in9_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8,
          in8_len);
   memcpy(
       ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len,
       in9, in9_len);
   return ret;
 }

 FORCE_INLINE
 const char* concat_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, bool in1_validity,
     const char* in2, gdv_int32 in2_len, bool in2_validity, const char* in3,
     gdv_int32 in3_len, bool in3_validity, const char* in4, gdv_int32 in4_len,
     bool in4_validity, const char* in5, gdv_int32 in5_len, bool in5_validity,
     const char* in6, gdv_int32 in6_len, bool in6_validity, const char* in7,
     gdv_int32 in7_len, bool in7_validity, const char* in8, gdv_int32 in8_len,
     bool in8_validity, const char* in9, gdv_int32 in9_len, bool in9_validity,
     const char* in10, gdv_int32 in10_len, bool in10_validity, gdv_int32* out_len) {
   if (!in1_validity) {
     in1_len = 0;
   }
   if (!in2_validity) {
     in2_len = 0;
   }
   if (!in3_validity) {
     in3_len = 0;
   }
   if (!in4_validity) {
     in4_len = 0;
   }
   if (!in5_validity) {
     in5_len = 0;
   }
   if (!in6_validity) {
     in6_len = 0;
   }
   if (!in7_validity) {
     in7_len = 0;
   }
   if (!in8_validity) {
     in8_len = 0;
   }
   if (!in9_validity) {
     in9_len = 0;
   }
   if (!in10_validity) {
     in10_len = 0;
   }
   return concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
       context, in1, in1_len, in2, in2_len, in3, in3_len, in4, in4_len, in5, in5_len, in6,
       in6_len, in7, in7_len, in8, in8_len, in9, in9_len, in10, in10_len, out_len);
 }

 FORCE_INLINE
 const char* concatOperator_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8_utf8(
     gdv_int64 context, const char* in1, gdv_int32 in1_len, const char* in2,
     gdv_int32 in2_len, const char* in3, gdv_int32 in3_len, const char* in4,
     gdv_int32 in4_len, const char* in5, gdv_int32 in5_len, const char* in6,
     gdv_int32 in6_len, const char* in7, gdv_int32 in7_len, const char* in8,
     gdv_int32 in8_len, const char* in9, gdv_int32 in9_len, const char* in10,
     gdv_int32 in10_len, gdv_int32* out_len) {
   *out_len = in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len +
              in8_len + in9_len + in10_len;
   if (*out_len <= 0) {
     *out_len = 0;
     return "";
   }
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   memcpy(ret, in1, in1_len);
   memcpy(ret + in1_len, in2, in2_len);
   memcpy(ret + in1_len + in2_len, in3, in3_len);
   memcpy(ret + in1_len + in2_len + in3_len, in4, in4_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len, in5, in5_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len, in6, in6_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len, in7, in7_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len, in8,
          in8_len);
   memcpy(
       ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len + in8_len,
       in9, in9_len);
   memcpy(ret + in1_len + in2_len + in3_len + in4_len + in5_len + in6_len + in7_len +
              in8_len + in9_len,
          in10, in10_len);
   return ret;
 }

 // Returns the numeric value of the first character of str.
 GANDIVA_EXPORT
 gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) {
   if (data_len == 0) {
     return 0;
   }
   return static_cast<gdv_int32>(static_cast<signed char>(data[0]));
 }

 // Returns the ASCII character having the binary equivalent to A.
 // If A is larger than 256 the result is equivalent to chr(A % 256).
 FORCE_INLINE
 const char* chr_int32(gdv_int64 context, gdv_int32 in, gdv_int32* out_len) {
   in = in % 256;
   *out_len = 1;

   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   ret[0] = char(in);
   return ret;
 }

 // Returns the ASCII character having the binary equivalent to A.
 // If A is larger than 256 the result is equivalent to chr(A % 256).
 FORCE_INLINE
 const char* chr_int64(gdv_int64 context, gdv_int64 in, gdv_int32* out_len) {
   in = in % 256;
   *out_len = 1;

   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   ret[0] = char(in);
   return ret;
 }

 FORCE_INLINE
 const char* convert_fromUTF8_binary(gdv_int64 context, const char* bin_in, gdv_int32 len,
                                     gdv_int32* out_len) {
   *out_len = len;
   return bin_in;
 }

 FORCE_INLINE
 const char* convert_replace_invalid_fromUTF8_binary(int64_t context, const char* text_in,
                                                     int32_t text_len,
                                                     const char* char_to_replace,
                                                     int32_t char_to_replace_len,
                                                     int32_t* out_len) {
   if (char_to_replace_len > 1) {
     gdv_fn_context_set_error_msg(context, "Replacement of multiple bytes not supported");
     *out_len = 0;
     return "";
   }
   // actually the convert_replace function replaces invalid chars with an ASCII
   // character so the output length will be the same as the input length
   *out_len = text_len;
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   int32_t valid_bytes_to_cpy = 0;
   int32_t out_byte_counter = 0;
   int32_t in_byte_counter = 0;
   int32_t char_len;
   // scan the base text from left to right and increment the start pointer till
   // looking for invalid chars to substitute
   for (int text_index = 0; text_index < text_len; text_index += char_len) {
     char_len = utf8_char_length(text_in[text_index]);
     // only memory copy the bytes when detect invalid char
     if (char_len == 0 || text_index + char_len > text_len ||
         !validate_utf8_following_bytes(text_in, char_len, text_index)) {
       // define char_len = 1 to increase text_index by 1 (as ASCII char fits in 1 byte)
       char_len = 1;
       // first copy the valid bytes until now and then replace the invalid character
       memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
       // if the replacement char is empty, the invalid char should be ignored
       if (char_to_replace_len == 0) {
         out_byte_counter += valid_bytes_to_cpy;
       } else {
         ret[out_byte_counter + valid_bytes_to_cpy] = char_to_replace[0];
         out_byte_counter += valid_bytes_to_cpy + char_len;
       }
       in_byte_counter += valid_bytes_to_cpy + char_len;
       valid_bytes_to_cpy = 0;
       continue;
     }
     valid_bytes_to_cpy += char_len;
   }
   // if invalid chars were not found, return the original string
   if (out_byte_counter == 0 && in_byte_counter == 0) return text_in;
   // if there are still valid bytes to copy, do it
   if (valid_bytes_to_cpy != 0) {
     memcpy(ret + out_byte_counter, text_in + in_byte_counter, valid_bytes_to_cpy);
   }
   // the out length will be the out bytes copied + the missing end bytes copied
   *out_len = valid_bytes_to_cpy + out_byte_counter;
   return ret;
 }

 // The function reverse a char array in-place
 static inline void reverse_char_buf(char* buf, int32_t len) {
   char temp;

   for (int32_t i = 0; i < len / 2; i++) {
     int32_t pos_swp = len - (1 + i);
     temp = buf[pos_swp];
     buf[pos_swp] = buf[i];
     buf[i] = temp;
   }
 }

 // Converts a double variable to binary
 FORCE_INLINE
 const char* convert_toDOUBLE(int64_t context, double value, int32_t* out_len) {
   *out_len = sizeof(value);
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context,
                                  "Could not allocate memory for the output string");

     *out_len = 0;
     return "";
   }

   memcpy(ret, &value, *out_len);

   return ret;
 }

 FORCE_INLINE
 const char* convert_toDOUBLE_be(int64_t context, double value, int32_t* out_len) {
   // The function behaves like convert_toDOUBLE, but always return the result
   // in big endian format
   char* ret = const_cast<char*>(convert_toDOUBLE(context, value, out_len));

 #if ARROW_LITTLE_ENDIAN
   reverse_char_buf(ret, *out_len);
 #endif

   return ret;
 }

 // Converts a float variable to binary
 FORCE_INLINE
 const char* convert_toFLOAT(int64_t context, float value, int32_t* out_len) {
   *out_len = sizeof(value);
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context,
                                  "Could not allocate memory for the output string");

     *out_len = 0;
     return "";
   }

   memcpy(ret, &value, *out_len);

   return ret;
 }

 FORCE_INLINE
 const char* convert_toFLOAT_be(int64_t context, float value, int32_t* out_len) {
   // The function behaves like convert_toFLOAT, but always return the result
   // in big endian format
   char* ret = const_cast<char*>(convert_toFLOAT(context, value, out_len));

 #if ARROW_LITTLE_ENDIAN
   reverse_char_buf(ret, *out_len);
 #endif

   return ret;
 }

 // Converts a bigint(int with 64 bits) variable to binary
 FORCE_INLINE
 const char* convert_toBIGINT(int64_t context, int64_t value, int32_t* out_len) {
   *out_len = sizeof(value);
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context,
                                  "Could not allocate memory for the output string");

     *out_len = 0;
     return "";
   }

   memcpy(ret, &value, *out_len);

   return ret;
 }

 FORCE_INLINE
 const char* convert_toBIGINT_be(int64_t context, int64_t value, int32_t* out_len) {
   // The function behaves like convert_toBIGINT, but always return the result
   // in big endian format
   char* ret = const_cast<char*>(convert_toBIGINT(context, value, out_len));

 #if ARROW_LITTLE_ENDIAN
   reverse_char_buf(ret, *out_len);
 #endif

   return ret;
 }

 // Converts an integer(with 32 bits) variable to binary
 FORCE_INLINE
 const char* convert_toINT(int64_t context, int32_t value, int32_t* out_len) {
   *out_len = sizeof(value);
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context,
                                  "Could not allocate memory for the output string");

     *out_len = 0;
     return "";
   }

   memcpy(ret, &value, *out_len);

   return ret;
 }

 FORCE_INLINE
 const char* convert_toINT_be(int64_t context, int32_t value, int32_t* out_len) {
   // The function behaves like convert_toINT, but always return the result
   // in big endian format
   char* ret = const_cast<char*>(convert_toINT(context, value, out_len));

 #if ARROW_LITTLE_ENDIAN
   reverse_char_buf(ret, *out_len);
 #endif

   return ret;
 }

 // Converts a boolean variable to binary
 FORCE_INLINE
 const char* convert_toBOOLEAN(int64_t context, bool value, int32_t* out_len) {
   *out_len = sizeof(value);
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context,
                                  "Could not allocate memory for the output string");

     *out_len = 0;
     return "";
   }

   memcpy(ret, &value, *out_len);

   return ret;
 }

 // Converts a time variable to binary
 FORCE_INLINE
 const char* convert_toTIME_EPOCH(int64_t context, int32_t value, int32_t* out_len) {
   return convert_toINT(context, value, out_len);
 }

 FORCE_INLINE
 const char* convert_toTIME_EPOCH_be(int64_t context, int32_t value, int32_t* out_len) {
   // The function behaves as convert_toTIME_EPOCH, but
   // returns the bytes in big endian format
   return convert_toINT_be(context, value, out_len);
 }

 // Converts a timestamp variable to binary
 FORCE_INLINE
 const char* convert_toTIMESTAMP_EPOCH(int64_t context, int64_t timestamp,
                                       int32_t* out_len) {
   return convert_toBIGINT(context, timestamp, out_len);
 }

 FORCE_INLINE
 const char* convert_toTIMESTAMP_EPOCH_be(int64_t context, int64_t timestamp,
                                          int32_t* out_len) {
   // The function behaves as convert_toTIMESTAMP_EPOCH, but
   // returns the bytes in big endian format
   return convert_toBIGINT_be(context, timestamp, out_len);
 }

 // Converts a date variable to binary
 FORCE_INLINE
 const char* convert_toDATE_EPOCH(int64_t context, int64_t date, int32_t* out_len) {
   return convert_toBIGINT(context, date, out_len);
 }

 FORCE_INLINE
 const char* convert_toDATE_EPOCH_be(int64_t context, int64_t date, int32_t* out_len) {
   // The function behaves as convert_toDATE_EPOCH, but
   // returns the bytes in big endian format
   return convert_toBIGINT_be(context, date, out_len);
 }

 // Converts a string variable to binary
 FORCE_INLINE
 const char* convert_toUTF8(int64_t context, const char* value, int32_t value_len,
                            int32_t* out_len) {
   *out_len = value_len;
   return value;
 }

 // Calculate the levenshtein distance between two string values
 FORCE_INLINE
 gdv_int32 levenshtein(int64_t context, const char* in1, int32_t in1_len, const char* in2,
                       int32_t in2_len) {
   if (in1_len < 0 || in2_len < 0) {
     gdv_fn_context_set_error_msg(context, "String length must be greater than 0");
     return 0;
   }

   // Check input size 0
   if (in1_len == 0) {
     return in2_len;
   }
   if (in2_len == 0) {
     return in1_len;
   }

   // arr_larger and arr_smaller is one pointer for entries
   const char* arr_larger;
   const char* arr_smaller;
   // len_larger and len_smaller is one copy from lengths
   int len_larger;
   int len_smaller;

   if (in1_len < in2_len) {
     len_larger = in2_len;
     arr_larger = in2;

     len_smaller = in1_len;
     arr_smaller = in1;
   } else {
     len_larger = in1_len;
     arr_larger = in1;

     len_smaller = in2_len;
     arr_smaller = in2;
   }

   int* ptr = reinterpret_cast<int*>(
       gdv_fn_context_arena_malloc(context, (len_smaller + 1) * 2 * sizeof(int)));
   if (ptr == nullptr) {
     gdv_fn_context_set_error_msg(context, "String length must be greater than 0");
     return 0;
   }

   // MEMORY ADDRESS MALLOC
   // v0 -> (0, ..., &ptr[in2_len])
   // v1 -> (in2_len+1, ..., &ptr[in2_len * 2])
   int* v0;
   int* v1;
   int* aux;
   v0 = &ptr[0];
   v1 = &ptr[len_smaller + 1];

   // Initialize v0
   for (int i = 0; i <= len_smaller; i++) {
     v0[i] = i;
   }

   // Initialize interactive mode
   for (int i = 0; i < len_larger; i++) {
     // The first element to V1 is [i + 1]
     // For edit distance you can delete (i+1) chars from in1 to match empty in2 position
     v1[0] = i + 1;

     for (int j = 0; j < len_smaller; j++) {
       // Calculate costs to modify
       int deletionCost = v0[j + 1] + 1;
       int insertionCost = v1[j] + 1;
       int substitutionCost = v0[j] + 1;

       if (arr_larger[i] == arr_smaller[j]) {
         substitutionCost = v0[j];
       }

       // Catch the minor cost
       int min;
       min = deletionCost;

       if (min > substitutionCost) {
         min = substitutionCost;
       }
       if (min > insertionCost) {
         min = insertionCost;
       }

       // Set the minor cost to v1
       v1[j + 1] = min;
     }

     // Swapping v0 and v1
     aux = v0;
     v0 = v1;
     v1 = aux;
   }
   // The results of v1 are now in v0, Levenshtein value is in v0[n]
   return v0[len_smaller];
 }

 // Search for a string within another string
 // Same as "locate(substr, str)", except for the reverse order of the arguments.
 FORCE_INLINE
 gdv_int32 strpos_utf8_utf8(gdv_int64 context, const char* str, gdv_int32 str_len,
                            const char* sub_str, gdv_int32 sub_str_len) {
   return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1);
 }

 // Search for a string within another string
 FORCE_INLINE
 gdv_int32 locate_utf8_utf8(gdv_int64 context, const char* sub_str, gdv_int32 sub_str_len,
                            const char* str, gdv_int32 str_len) {
   return locate_utf8_utf8_int32(context, sub_str, sub_str_len, str, str_len, 1);
 }

 // Search for a string within another string starting at position start-pos (1-indexed)
 FORCE_INLINE
 gdv_int32 locate_utf8_utf8_int32(gdv_int64 context, const char* sub_str,
                                  gdv_int32 sub_str_len, const char* str,
                                  gdv_int32 str_len, gdv_int32 start_pos) {
   if (start_pos < 1) {
     gdv_fn_context_set_error_msg(context, "Start position must be greater than 0");
     return 0;
   }

   if (str_len == 0 || sub_str_len == 0) {
     return 0;
   }

   gdv_int32 byte_pos = utf8_byte_pos(context, str, str_len, start_pos - 1);
   if (byte_pos < 0 || byte_pos >= str_len) {
     return 0;
   }
   for (gdv_int32 i = byte_pos; i <= str_len - sub_str_len; ++i) {
     if (memcmp(str + i, sub_str, sub_str_len) == 0) {
       return utf8_length(context, str, i) + 1;
     }
   }
   return 0;
 }

 FORCE_INLINE
 const char* replace_with_max_len_utf8_utf8_utf8(gdv_int64 context, const char* text,
                                                 gdv_int32 text_len, const char* from_str,
                                                 gdv_int32 from_str_len,
                                                 const char* to_str, gdv_int32 to_str_len,
                                                 gdv_int32 max_length,
                                                 gdv_int32* out_len) {
   // if from_str is empty or its length exceeds that of original string,
   // return the original string
   if (from_str_len <= 0 || from_str_len > text_len) {
     *out_len = text_len;
     return text;
   }

   bool found = false;
   gdv_int32 text_index = 0;
   char* out;
   gdv_int32 out_index = 0;
   gdv_int32 last_match_index =
       0;  // defer copying string from last_match_index till next match is found

   for (; text_index <= text_len - from_str_len;) {
     if (memcmp(text + text_index, from_str, from_str_len) == 0) {
       if (out_index + text_index - last_match_index + to_str_len > max_length) {
         gdv_fn_context_set_error_msg(context, "Buffer overflow for output string");
         *out_len = 0;
         return "";
       }
       if (!found) {
         // found match for first time
         out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, max_length));
         if (out == nullptr) {
           gdv_fn_context_set_error_msg(context,
                                        "Could not allocate memory for output string");
           *out_len = 0;
           return "";
         }
         found = true;
       }
       // first copy the part deferred till now
       memcpy(out + out_index, text + last_match_index, (text_index - last_match_index));
       out_index += text_index - last_match_index;
       // then copy the target string
       memcpy(out + out_index, to_str, to_str_len);
       out_index += to_str_len;

       text_index += from_str_len;
       last_match_index = text_index;
     } else {
       text_index++;
     }
   }

   if (!found) {
     *out_len = text_len;
     return text;
   }

   if (out_index + text_len - last_match_index > max_length) {
     gdv_fn_context_set_error_msg(context, "Buffer overflow for output string");
     *out_len = 0;
     return "";
   }
   memcpy(out + out_index, text + last_match_index, text_len - last_match_index);
   out_index += text_len - last_match_index;
   *out_len = out_index;
   return out;
 }

 FORCE_INLINE
 const char* replace_utf8_utf8_utf8(gdv_int64 context, const char* text,
                                    gdv_int32 text_len, const char* from_str,
                                    gdv_int32 from_str_len, const char* to_str,
                                    gdv_int32 to_str_len, gdv_int32* out_len) {
   return replace_with_max_len_utf8_utf8_utf8(context, text, text_len, from_str,
                                              from_str_len, to_str, to_str_len, 65535,
                                              out_len);
 }

 // Returns the quoted string (Includes escape character for any single quotes)
 // E.g. DONT  -> 'DONT'
 //      DON'T -> 'DON\'T'
 FORCE_INLINE
 const char* quote_utf8(gdv_int64 context, const char* in, gdv_int32 in_len,
                        gdv_int32* out_len) {
   if (in_len <= 0) {
     *out_len = 0;
     return "";
   }
   // try to allocate double size output string (worst case)
   auto out =
       reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, (in_len * 2) + 2));
   if (out == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   // The output string should start with a single quote
   out[0] = '\'';
   gdv_int32 counter = 1;
   for (int i = 0; i < in_len; i++) {
     if (memcmp(in + i, "'", 1) == 0) {
       out[counter] = '\\';
       counter++;
       out[counter] = '\'';
     } else {
       out[counter] = in[i];
     }
     counter++;
   }
   out[counter] = '\'';
   *out_len = counter + 1;
   return out;
 }

 FORCE_INLINE
 gdv_int32 evaluate_return_char_length(gdv_int32 text_len, gdv_int32 actual_text_len,
                                       gdv_int32 return_length, const char* fill_text,
                                       gdv_int32 fill_text_len) {
   gdv_int32 fill_actual_text_len = utf8_length_ignore_invalid(fill_text, fill_text_len);
   gdv_int32 repeat_times = (return_length - actual_text_len) / fill_actual_text_len;
   gdv_int32 return_char_length = repeat_times * fill_text_len + text_len;
   gdv_int32 mod = (return_length - actual_text_len) % fill_actual_text_len;
   gdv_int32 char_len = 0;
   gdv_int32 fill_index = 0;
   for (gdv_int32 i = 0; i < mod; i++) {
     char_len = utf8_char_length(fill_text[fill_index]);
     fill_index += char_len;
     return_char_length += char_len;
   }
   return return_char_length;
 }

 FORCE_INLINE
 const char* lpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
                                  gdv_int32 return_length, const char* fill_text,
                                  gdv_int32 fill_text_len, gdv_int32* out_len) {
   // if the text length or the defined return length (number of characters to return)
   // is <=0, then return an empty string.
   return_length = std::min(max_str_length, return_length);
   return_length = std::max(min_str_length, return_length);
   if (text_len == 0 || return_length <= 0) {
     *out_len = 0;
     return "";
   }

   // count the number of utf8 characters on text, ignoring invalid bytes
   int actual_text_len = utf8_length_ignore_invalid(text, text_len);

   if (return_length == actual_text_len ||
       (return_length > actual_text_len && fill_text_len == 0)) {
     // case where the return length is same as the text's length, or if it need to
     // fill into text but "fill_text" is empty, then return text directly.
     *out_len = text_len;
     return text;
   } else if (return_length < actual_text_len) {
     // case where it truncates the result on return length.
     *out_len = utf8_byte_pos(context, text, text_len, return_length);
     return text;
   } else {
     // case (return_length > actual_text_len)
     // case where it needs to copy "fill_text" on the string left. The total number
     // of chars to copy is given by (return_length -  actual_text_len)
     gdv_int32 return_char_length = evaluate_return_char_length(
         text_len, actual_text_len, return_length, fill_text, fill_text_len);
     char* ret = reinterpret_cast<gdv_binary>(
         gdv_fn_context_arena_malloc(context, return_char_length));
     if (ret == nullptr) {
       gdv_fn_context_set_error_msg(context,
                                    "Could not allocate memory for output string");
       *out_len = 0;
       return "";
     }
     // try to fulfill the return string with the "fill_text" continuously
     int32_t copied_chars_count = 0;
     int32_t copied_chars_position = 0;
     while (copied_chars_count < return_length - actual_text_len) {
       int32_t char_len;
       int32_t fill_index;
       // for each char, evaluate its length to consider it when mem copying
       for (fill_index = 0; fill_index < fill_text_len; fill_index += char_len) {
         if (copied_chars_count >= return_length - actual_text_len) {
           break;
         }
         char_len = utf8_char_length(fill_text[fill_index]);
         // ignore invalid char on the fill text, considering it as size 1
         if (char_len == 0) char_len += 1;
         copied_chars_count++;
       }
       memcpy(ret + copied_chars_position, fill_text, fill_index);
       copied_chars_position += fill_index;
     }
     // after fulfilling the text, copy the main string
     memcpy(ret + copied_chars_position, text, text_len);
     *out_len = copied_chars_position + text_len;
     return ret;
   }
 }

 FORCE_INLINE
 const char* rpad_utf8_int32_utf8(gdv_int64 context, const char* text, gdv_int32 text_len,
                                  gdv_int32 return_length, const char* fill_text,
                                  gdv_int32 fill_text_len, gdv_int32* out_len) {
   // if the text length or the defined return length (number of characters to return)
   // is <=0, then return an empty string.
   return_length = std::min(max_str_length, return_length);
   return_length = std::max(min_str_length, return_length);
   if (text_len == 0 || return_length <= 0) {
     *out_len = 0;
     return "";
   }

   // count the number of utf8 characters on text, ignoring invalid bytes
   int actual_text_len = utf8_length_ignore_invalid(text, text_len);

   if (return_length == actual_text_len ||
       (return_length > actual_text_len && fill_text_len == 0)) {
     // case where the return length is same as the text's length, or if it need to
     // fill into text but "fill_text" is empty, then return text directly.
     *out_len = text_len;
     return text;
   } else if (return_length < actual_text_len) {
     // case where it truncates the result on return length.
     *out_len = utf8_byte_pos(context, text, text_len, return_length);
     return text;
   } else {
     // case (return_length > actual_text_len)
     // case where it needs to copy "fill_text" on the string right
     gdv_int32 return_char_length = evaluate_return_char_length(
         text_len, actual_text_len, return_length, fill_text, fill_text_len);
     char* ret = reinterpret_cast<gdv_binary>(
         gdv_fn_context_arena_malloc(context, return_char_length));
     if (ret == nullptr) {
       gdv_fn_context_set_error_msg(context,
                                    "Could not allocate memory for output string");
       *out_len = 0;
       return "";
     }
     // fulfill the initial text copying the main input string
     memcpy(ret, text, text_len);
     // try to fulfill the return string with the "fill_text" continuously
     int32_t copied_chars_count = 0;
     int32_t copied_chars_position = 0;
     while (actual_text_len + copied_chars_count < return_length) {
       int32_t char_len;
       int32_t fill_length;
       // for each char, evaluate its length to consider it when mem copying
       for (fill_length = 0; fill_length < fill_text_len; fill_length += char_len) {
         if (actual_text_len + copied_chars_count >= return_length) {
           break;
         }
         char_len = utf8_char_length(fill_text[fill_length]);
         // ignore invalid char on the fill text, considering it as size 1
         if (char_len == 0) char_len += 1;
         copied_chars_count++;
       }
       memcpy(ret + text_len + copied_chars_position, fill_text, fill_length);
       copied_chars_position += fill_length;
     }
     *out_len = copied_chars_position + text_len;
     return ret;
   }
 }

 FORCE_INLINE
 const char* lpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
                             gdv_int32 return_length, gdv_int32* out_len) {
   return lpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
 }

 FORCE_INLINE
 const char* rpad_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
                             gdv_int32 return_length, gdv_int32* out_len) {
   return rpad_utf8_int32_utf8(context, text, text_len, return_length, " ", 1, out_len);
 }

 FORCE_INLINE
 const char* split_part(gdv_int64 context, const char* text, gdv_int32 text_len,
                        const char* delimiter, gdv_int32 delim_len, gdv_int32 index,
                        gdv_int32* out_len) {
   *out_len = 0;
   if (index < 1) {
     char error_message[100];
     snprintf(error_message, sizeof(error_message),
              "Index in split_part must be positive, value provided was %d", index);
     gdv_fn_context_set_error_msg(context, error_message);
     return "";
   }

   if (delim_len == 0 || text_len == 0) {
     // output will just be text if no delimiter is provided
     *out_len = text_len;
     return text;
   }

   int i = 0, match_no = 1;

   while (i < text_len) {
     // find the position where delimiter matched for the first time
     int match_pos = match_string(text, text_len, i, delimiter, delim_len);
     if (match_pos == -1 && match_no != index) {
       // reached the end without finding a match.
       return "";
     } else {
       // Found a match. If the match number is index then return this match
       if (match_no == index) {
         int end_pos = match_pos - delim_len;

         if (match_pos == -1) {
           // end position should be last position of the string as we have the last
           // delimiter
           end_pos = text_len;
         }

         *out_len = end_pos - i;
         return text + i;
       } else {
         i = match_pos;
         match_no++;
       }
     }
   }

   return "";
 }

 // Returns the x leftmost characters of a given string. Cases:
 //     LEFT("TestString", 10) => "TestString"
 //     LEFT("TestString", 3) => "Tes"
 //     LEFT("TestString", -3) => "TestStr"
 FORCE_INLINE
 const char* left_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
                             gdv_int32 number, gdv_int32* out_len) {
   // returns the 'number' left most characters of a given text
   if (text_len == 0 || number == 0) {
     *out_len = 0;
     return "";
   }

   int32_t char_count = utf8_length(context, text, text_len);

   // char_count is zero if input has invalid utf8 char
   if (char_count == 0) {
     *out_len = 0;
     return "";
   }

   // case where left('abcdef', -6) -> "" and left('abcdef', -7) -> ""
   if (number < 0 && -(number) >= char_count) {
     *out_len = 0;
     return "";
   }

   // iterate over the utf8 string validating each character
   int char_len;
   int current_char_count = 0;
   int byte_index = 0;
   for (int i = 0; i < text_len; i += char_len) {
     char_len = utf8_char_length(text[i]);
     byte_index += char_len;
     ++current_char_count;
     // Define the rules to stop the iteration over the string
     // case where left('abc', 5) -> 'abc'
     if (number > 0 && current_char_count == number) {
       break;
     }
     // case where left('abc', -5) ==> ''
     if (number < 0 && current_char_count == number + char_count) {
       break;
     }
   }

   *out_len = byte_index;
   return text;
 }

 // Returns the x rightmost characters of a given string. Cases:
 //     RIGHT("TestString", 10) => "TestString"
 //     RIGHT("TestString", 3) => "ing"
 //     RIGHT("TestString", -3) => "tString"
 FORCE_INLINE
 const char* right_utf8_int32(gdv_int64 context, const char* text, gdv_int32 text_len,
                              gdv_int32 number, gdv_int32* out_len) {
   // returns the 'number' left most characters of a given text
   if (text_len == 0 || number == 0) {
     *out_len = 0;
     return "";
   }

   // initially counts the number of utf8 characters in the defined text
   int32_t char_count = utf8_length(context, text, text_len);

   // char_count is zero if input has invalid utf8 char
   if (char_count == 0) {
     *out_len = 0;
     return "";
   }

   // case where right('abcdef', -6) -> "" and right('abcdef', -7) -> ""
   if (number < 0 && -(number) >= char_count) {
     *out_len = 0;
     return "";
   }

   int32_t start_char_pos;  // the char result start position (inclusive)

   if (number > 0) {
     // case where right('abc', 5) ==> 'abc' start_char_pos=1.
     start_char_pos = (char_count > number) ? char_count - number : 0;
   } else {
     start_char_pos = number * -1;
   }

   // calculate the start byte position
   int32_t start_byte_pos = utf8_byte_pos(context, text, text_len, start_char_pos);

   // calculate output length
   *out_len = (text_len - start_byte_pos);
   return text + start_byte_pos;
 }

 FORCE_INLINE
 const char* binary_string(gdv_int64 context, const char* text, gdv_int32 text_len,
                           gdv_int32* out_len) {
   gdv_binary ret =
       reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }

   if (text_len == 0) {
     *out_len = 0;
     return "";
   }

   // converting hex encoded string to normal string
   int j = 0;
   for (int i = 0; i < text_len; i++, j++) {
     if (text[i] == '\\' && i + 3 < text_len &&
         (text[i + 1] == 'x' || text[i + 1] == 'X')) {
       char hd1 = text[i + 2];
       char hd2 = text[i + 3];
       if (isxdigit(hd1) && isxdigit(hd2)) {
         // [a-fA-F0-9]
         ret[j] = to_binary_from_hex(hd1) * 16 + to_binary_from_hex(hd2);
         i += 3;
       } else {
         ret[j] = text[i];
       }
     } else {
       ret[j] = text[i];
     }
   }
   *out_len = j;
   return ret;
 }

 #define CAST_INT_BIGINT_VARBINARY(OUT_TYPE, TYPE_NAME)                                 \
   FORCE_INLINE                                                                         \
   OUT_TYPE                                                                             \
   cast##TYPE_NAME##_varbinary(gdv_int64 context, const char* in, int32_t in_len) {     \
     if (in_len == 0) {                                                                 \
       gdv_fn_context_set_error_msg(context, "Can't cast an empty string.");            \
       return -1;                                                                       \
     }                                                                                  \
     char sign = in[0];                                                                 \
                                                                                        \
     bool negative = false;                                                             \
     if (sign == '-') {                                                                 \
       negative = true;                                                                 \
       /* Ignores the sign char in the hexadecimal string */                            \
       in++;                                                                            \
       in_len--;                                                                        \
     }                                                                                  \
                                                                                        \
     if (negative && in_len == 0) {                                                     \
       gdv_fn_context_set_error_msg(context,                                            \
                                    "Can't cast hexadecimal with only a minus sign.");  \
       return -1;                                                                       \
     }                                                                                  \
                                                                                        \
     OUT_TYPE result = 0;                                                               \
     int digit;                                                                         \
                                                                                        \
     int read_index = 0;                                                                \
     while (read_index < in_len) {                                                      \
       char c1 = in[read_index];                                                        \
       if (isxdigit(c1)) {                                                              \
         digit = to_binary_from_hex(c1);                                                \
                                                                                        \
         OUT_TYPE next = result * 16 - digit;                                           \
                                                                                        \
         if (next > result) {                                                           \
           gdv_fn_context_set_error_msg(context, "Integer overflow.");                  \
           return -1;                                                                   \
         }                                                                              \
         result = next;                                                                 \
         read_index++;                                                                  \
       } else {                                                                         \
         gdv_fn_context_set_error_msg(context,                                          \
                                      "The hexadecimal given has invalid characters."); \
         return -1;                                                                     \
       }                                                                                \
     }                                                                                  \
     if (!negative) {                                                                   \
       result *= -1;                                                                    \
                                                                                        \
       if (result < 0) {                                                                \
         gdv_fn_context_set_error_msg(context, "Integer overflow.");                    \
         return -1;                                                                     \
       }                                                                                \
     }                                                                                  \
     return result;                                                                     \
   }

 CAST_INT_BIGINT_VARBINARY(int32_t, INT)
 CAST_INT_BIGINT_VARBINARY(int64_t, BIGINT)

 #undef CAST_INT_BIGINT_VARBINARY

 // Produces the binary representation of a string y characters long derived by starting
 // at offset 'x' and considering the defined length 'y'. Notice that the offset index
 // may be a negative number (starting from the end of the string), or a positive number
 // starting on index 1. Cases:
 //     BYTE_SUBSTR("TestString", 1, 10) => "TestString"
 //     BYTE_SUBSTR("TestString", 5, 10) => "String"
 //     BYTE_SUBSTR("TestString", -6, 10) => "String"
 //     BYTE_SUBSTR("TestString", -600, 10) => "TestString"
 FORCE_INLINE
 const char* byte_substr_binary_int32_int32(gdv_int64 context, const char* text,
                                            gdv_int32 text_len, gdv_int32 offset,
                                            gdv_int32 length, gdv_int32* out_len) {
   // the first offset position for a string is 1, so not consider offset == 0
   // also, the length should be always a positive number
   if (text_len == 0 || offset == 0 || length <= 0) {
     *out_len = 0;
     return "";
   }

   char* ret =
       reinterpret_cast<gdv_binary>(gdv_fn_context_arena_malloc(context, text_len));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }

   int32_t startPos = 0;
   if (offset >= 0) {
     startPos = offset - 1;
   } else if (text_len + offset >= 0) {
     startPos = text_len + offset;
   }

   // calculate end position from length and truncate to upper value bounds
   if (startPos + length > text_len) {
     *out_len = text_len - startPos;
   } else {
     *out_len = length;
   }

   memcpy(ret, text + startPos, *out_len);
   return ret;
 }

 FORCE_INLINE
 void concat_word(char* out_buf, int* out_idx, const char* in_buf, int in_len,
                  bool in_validity, const char* separator, int separator_len,
                  bool* seenAnyValidInput) {
   if (!in_validity) {
     return;
   }

   // input is valid
   if (*seenAnyValidInput) {
     // copy the separator and update *out_idx
     memcpy(out_buf + *out_idx, separator, separator_len);
     *out_idx += separator_len;
   }
   // copy the input and update *out_idx
   memcpy(out_buf + *out_idx, in_buf, in_len);
   *seenAnyValidInput = true;
   *out_idx += in_len;
 }

 FORCE_INLINE
 const char* concat_ws_utf8_utf8(int64_t context, const char* separator,
                                 int32_t separator_len, bool separator_validity,
                                 const char* word1, int32_t word1_len, bool word1_validity,
                                 const char* word2, int32_t word2_len, bool word2_validity,
                                 bool* out_valid, int32_t* out_len) {
   *out_len = 0;
   int numValidInput = 0;
   // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }

   if (word1_validity) {
     *out_len += word1_len;
     numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
     numValidInput++;
   }

   *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);
   if (*out_len == 0) {
     *out_valid = true;
     return "";
   }

   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     *out_valid = false;
     return "";
   }

   char* tmp = out;
   int out_idx = 0;
   bool seenAnyValidInput = false;

   concat_word(tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
               &seenAnyValidInput);

   *out_valid = true;
   *out_len = out_idx;
   return out;
 }

 FORCE_INLINE
 const char* concat_ws_utf8_utf8_utf8(
     int64_t context, const char* separator, int32_t separator_len,
     bool separator_validity, const char* word1, int32_t word1_len, bool word1_validity,
     const char* word2, int32_t word2_len, bool word2_validity, const char* word3,
     int32_t word3_len, bool word3_validity, bool* out_valid, int32_t* out_len) {
   *out_len = 0;
   int numValidInput = 0;
   // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }

   if (word1_validity) {
     *out_len += word1_len;
     numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
     numValidInput++;
   }
   if (word3_validity) {
     *out_len += word3_len;
     numValidInput++;
   }

   *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);

   if (*out_len == 0) {
     *out_len = 0;
     *out_valid = true;
     return "";
   }

   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     *out_valid = false;
     return "";
   }

   char* tmp = out;
   int out_idx = 0;
   bool seenAnyValidInput = false;

   concat_word(tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word3, word3_len, word3_validity, separator, separator_len,
               &seenAnyValidInput);

   *out_valid = true;
   *out_len = out_idx;
   return out;
 }

 FORCE_INLINE
 const char* concat_ws_utf8_utf8_utf8_utf8(
     int64_t context, const char* separator, int32_t separator_len,
     bool separator_validity, const char* word1, int32_t word1_len, bool word1_validity,
     const char* word2, int32_t word2_len, bool word2_validity, const char* word3,
     int32_t word3_len, bool word3_validity, const char* word4, int32_t word4_len,
     bool word4_validity, bool* out_valid, int32_t* out_len) {
   *out_len = 0;
   int numValidInput = 0;
   // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }
   if (word1_validity) {
     *out_len += word1_len;
     numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
     numValidInput++;
   }
   if (word3_validity) {
     *out_len += word3_len;
     numValidInput++;
   }
   if (word4_validity) {
     *out_len += word4_len;
     numValidInput++;
   }

   *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);

   if (*out_len == 0) {
     *out_len = 0;
     *out_valid = true;
     return "";
   }

   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_valid = false;
     *out_len = 0;
     return "";
   }

   char* tmp = out;
   int out_idx = 0;
   bool seenAnyValidInput = false;

   concat_word(tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word3, word3_len, word3_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word4, word4_len, word4_validity, separator, separator_len,
               &seenAnyValidInput);

   *out_valid = true;
   *out_len = out_idx;
   return out;
 }

 FORCE_INLINE
 const char* concat_ws_utf8_utf8_utf8_utf8_utf8(
     int64_t context, const char* separator, int32_t separator_len,
     bool separator_validity, const char* word1, int32_t word1_len, bool word1_validity,
     const char* word2, int32_t word2_len, bool word2_validity, const char* word3,
     int32_t word3_len, bool word3_validity, const char* word4, int32_t word4_len,
     bool word4_validity, const char* word5, int32_t word5_len, bool word5_validity,
     bool* out_valid, int32_t* out_len) {
   *out_len = 0;
   int numValidInput = 0;
   // If separator is null, always return null
   if (!separator_validity) {
     *out_len = 0;
     *out_valid = false;
     return "";
   }
   if (word1_validity) {
     *out_len += word1_len;
     numValidInput++;
   }
   if (word2_validity) {
     *out_len += word2_len;
     numValidInput++;
   }
   if (word3_validity) {
     *out_len += word3_len;
     numValidInput++;
   }
   if (word4_validity) {
     *out_len += word4_len;
     numValidInput++;
   }
   if (word5_validity) {
     *out_len += word5_len;
     numValidInput++;
   }

   *out_len += separator_len * (numValidInput > 1 ? numValidInput - 1 : 0);

   if (*out_len == 0) {
     *out_len = 0;
     *out_valid = true;
     return "";
   }

   char* out = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, *out_len));
   if (out == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     *out_valid = false;
     return "";
   }

   char* tmp = out;
   int out_idx = 0;
   bool seenAnyValidInput = false;

   concat_word(tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word3, word3_len, word3_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word4, word4_len, word4_validity, separator, separator_len,
               &seenAnyValidInput);
   concat_word(tmp, &out_idx, word5, word5_len, word5_validity, separator, separator_len,
               &seenAnyValidInput);

   *out_valid = true;
   *out_len = out_idx;
   return out;
 }

 FORCE_INLINE
 const char* elt_int32_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
                                 int32_t word1_len, bool in1_validity, const char* word2,
                                 int32_t word2_len, bool in2_validity, bool* out_valid,
                                 int32_t* out_len) {
   *out_valid = true;

   switch (pos) {
     case 1:
       *out_len = word1_len;
       return word1;
       break;
     case 2:
       *out_len = word2_len;
       return word2;
       break;
     default:
       *out_len = 0;
       *out_valid = false;
       return nullptr;
   }
 }

 FORCE_INLINE
 const char* elt_int32_utf8_utf8_utf8(int32_t pos, bool pos_validity, const char* word1,
                                      int32_t word1_len, bool word1_validity,
                                      const char* word2, int32_t word2_len,
                                      bool word2_validity, const char* word3,
                                      int32_t word3_len, bool word3_validity,
                                      bool* out_valid, int32_t* out_len) {
   *out_valid = true;

   switch (pos) {
     case 1:
       *out_len = word1_len;
       return word1;
       break;
     case 2:
       *out_len = word2_len;
       return word2;
       break;
     case 3:
       *out_len = word3_len;
       return word3;
       break;
     default:
       *out_len = 0;
       *out_valid = false;
       return nullptr;
   }
 }

 FORCE_INLINE
 const char* elt_int32_utf8_utf8_utf8_utf8(
     int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
     bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
     const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
     int32_t word4_len, bool word4_validity, bool* out_valid, int32_t* out_len) {
   *out_valid = true;

   switch (pos) {
     case 1:
       *out_len = word1_len;
       return word1;
       break;
     case 2:
       *out_len = word2_len;
       return word2;
       break;
     case 3:
       *out_len = word3_len;
       return word3;
       break;
     case 4:
       *out_len = word4_len;
       return word4;
       break;
     default:
       *out_len = 0;
       *out_valid = false;
       return nullptr;
   }
 }

 FORCE_INLINE
 const char* elt_int32_utf8_utf8_utf8_utf8_utf8(
     int32_t pos, bool pos_validity, const char* word1, int32_t word1_len,
     bool word1_validity, const char* word2, int32_t word2_len, bool word2_validity,
     const char* word3, int32_t word3_len, bool word3_validity, const char* word4,
     int32_t word4_len, bool word4_validity, const char* word5, int32_t word5_len,
     bool word5_validity, bool* out_valid, int32_t* out_len) {
   *out_valid = true;

   switch (pos) {
     case 1:
       *out_len = word1_len;
       return word1;
       break;
     case 2:
       *out_len = word2_len;
       return word2;
       break;
     case 3:
       *out_len = word3_len;
       return word3;
       break;
     case 4:
       *out_len = word4_len;
       return word4;
       break;
     case 5:
       *out_len = word5_len;
       return word5;
       break;
     default:
       *out_len = 0;
       *out_valid = false;
       return nullptr;
   }
 }

 // Gets a binary object and returns its hexadecimal representation. That representation
 // maps each byte in the input to a 2-length string containing a hexadecimal number.
 // - Examples:
 //     - foo -> 666F6F = 66[f] 6F[o] 6F[o]
 //     - bar -> 626172 = 62[b] 61[a] 72[r]
 FORCE_INLINE
 const char* to_hex_binary(int64_t context, const char* text, int32_t text_len,
                           int32_t* out_len) {
   if (text_len == 0) {
     *out_len = 0;
     return "";
   }

   auto ret =
       reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, text_len * 2 + 1));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }

   uint32_t ret_index = 0;
   uint32_t max_len = static_cast<uint32_t>(text_len) * 2;
   uint32_t max_char_to_write = 4;

   for (gdv_int32 i = 0; i < text_len; i++) {
     DCHECK(ret_index >= 0 && ret_index < max_len);

     int32_t ch = static_cast<int32_t>(text[i]) & 0xFF;

     ret_index += snprintf(ret + ret_index, max_char_to_write, "%02X", ch);
   }

   *out_len = static_cast<int32_t>(ret_index);
   return ret;
 }

 FORCE_INLINE
 const char* to_hex_int64(int64_t context, int64_t data, int32_t* out_len) {
   const int64_t hex_long_max_size = 2 * sizeof(int64_t);
   auto ret =
       reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, hex_long_max_size));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   snprintf(ret, hex_long_max_size + 1, "%" PRIX64, data);

   *out_len = static_cast<int32_t>(strlen(ret));
   return ret;
 }

 FORCE_INLINE
 const char* to_hex_int32(int64_t context, int32_t data, int32_t* out_len) {
   const int32_t max_size = 2 * sizeof(int32_t);
   auto ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, max_size));

   if (ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_len = 0;
     return "";
   }
   snprintf(ret, max_size + 1, "%" PRIX32, data);

   *out_len = static_cast<int32_t>(strlen(ret));
   return ret;
 }

 FORCE_INLINE
 const char* from_hex_utf8(int64_t context, const char* text, int32_t text_len,
                           bool text_validity, bool* out_valid, int32_t* out_len) {
   if (text_len == 0) {
     *out_valid = true;
     *out_len = 0;
     return "";
   }

   // the input string should have a length multiple of two and a true validity
   if (text_len % 2 != 0 || !text_validity) {
     *out_valid = false;
     *out_len = 0;
     return "";
   }

   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, text_len / 2));

   if (ret == nullptr) {
     *out_valid = false;
     *out_len = 0;
     return "";
   }

   // converting hex encoded string to normal string
   int32_t j = 0;
   for (int32_t i = 0; i < text_len; i += 2) {
     char b1 = text[i];
     char b2 = text[i + 1];
     if (isxdigit(b1) && isxdigit(b2)) {
       // [a-fA-F0-9]
       ret[j++] = to_binary_from_hex(b1) * 16 + to_binary_from_hex(b2);
     } else {
       *out_valid = false;
       *out_len = 0;
       return "";
     }
   }
   *out_valid = true;
   *out_len = j;
   return ret;
 }

 // Array that maps each letter from the alphabet to its corresponding number for the
 // soundex algorithm. ABCDEFGHIJKLMNOPQRSTUVWXYZ -> 01230120022455012623010202
 static char mappings[] = {'0', '1', '2', '3', '0', '1', '2', '0', '0',
                           '2', '2', '4', '5', '5', '0', '1', '2', '6',
                           '2', '3', '0', '1', '0', '2', '0', '2'};

 // Returns the soundex code for a given string
 //
 // The soundex function evaluates expression and returns the most significant letter in
 // the input string followed by a phonetic code. Characters that are not alphabetic are
 // ignored. If expression evaluates to the null value, null is returned.
 //
 // The soundex algorithm works with the following steps:
 //    1. Retain the first letter of the string and drop all other occurrences of a, e, i,
 //    o, u, y, h, w. (let's call them special letters)
 //    2. Replace consonants with digits as follows (after the first letter):
 //        b, f, p, v → 1
 //        c, g, j, k, q, s, x, z → 2
 //        d, t → 3
 //        l → 4
 //        m, n → 5
 //        r → 6
 //    3. If two or more letters with the same number were adjacent in the original name
 //    (before step 1), then omit all but the first. This rule also applies to the first
 //    letter.
 //    4. If the string have too few letters in the word that you can't assign three
 //    numbers, append with zeros until there are three numbers. If you have four or more
 //    numbers, retain only the first three.
 FORCE_INLINE
 const char* soundex_utf8(gdv_int64 context, const char* in, gdv_int32 in_len,
                          bool in_validity, bool* out_valid, int32_t* out_len) {
   if (in_len <= 0) {
     *out_valid = true;
     *out_len = 0;
     return "";
   }

   // The soundex code is composed by one letter and three numbers
   char* soundex = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, in_len));
   char* ret = reinterpret_cast<char*>(gdv_fn_context_arena_malloc(context, 4));

   if (soundex == nullptr || ret == nullptr) {
     gdv_fn_context_set_error_msg(context, "Could not allocate memory for output string");
     *out_valid = false;
     *out_len = 0;
     return "";
   }

   int si = 1;
   int ret_len = 1;
   unsigned char c;

   int start_idx = 0;
   for (int i = 0; i < in_len; ++i) {
     if (isalpha(in[i]) > 0) {
       // Retain the first letter
       ret[0] = toupper(in[i]);
       start_idx = i + 1;
       break;
     }
   }

   // If ret[0] is not initialised, return validity false
   if (start_idx == 0) {
     *out_valid = false;
     *out_len = 0;
     return "";
   }

   soundex[0] = '\0';
   // Replace consonants with digits and special letters with 0
   for (int i = start_idx; i < in_len; i++) {
     if (isalpha(in[i]) > 0) {
       c = toupper(in[i]) - 65;
       if (mappings[c] != soundex[si - 1]) {
         soundex[si] = mappings[c];
         si++;
       }
     }
   }

   int i = 1;
   // If the saved letter's digit is the same as the resulting first digit, skip it
   if (si > 1) {
     if (soundex[1] == mappings[ret[0] - 65]) {
       i = 2;
     }

     for (; i < si; i++) {
       // If it is a special letter, we ignore, because it has been dropped in first step
       if (soundex[i] != '0') {
         ret[ret_len] = soundex[i];
         ret_len++;
       }
       if (ret_len > 3) break;
     }
   }

   // If the return have too few numbers, append with zeros until there are three
   if (ret_len <= 3) {
     while (ret_len <= 3) {
       ret[ret_len] = '0';
       ret_len++;
     }
   }
   *out_valid = true;
   *out_len = 4;
   return ret;
 }

 FORCE_INLINE
 int32_t instr_utf8(const char* string, int32_t string_len, const char* substring,
                    int32_t substring_len) {
   if (substring_len == 0) {
     return 1;
   }

   if (string_len < substring_len) {
     return 0;
   }

   int32_t end_idx = string_len - substring_len;

   for (int i = 0; i <= end_idx; i++) {
     if (string[i] == substring[0] &&
         memcmp((void*)(string + i), substring, substring_len) == 0) {
       return (i + 1);
     }
   }
   return 0;
 }
 }  // extern "C"