blob: 86884099a56fe952b56c8a34c9372c7aa10292b3 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <cstddef>
#include <cstring>
#include <string>
#include "common/status.h"
#include "core/assert_cast.h"
#include "core/block/block.h"
#include "core/block/column_numbers.h"
#include "core/column/column_const.h"
#include "core/column/column_nullable.h"
#include "core/column/column_string.h"
#include "core/column/column_vector.h"
#include "core/data_type/data_type_nullable.h"
#include "core/data_type/data_type_number.h"
#include "core/data_type/data_type_string.h"
#include "core/string_ref.h"
#include "exec/common/stringop_substring.h"
#include "exec/common/template_helpers.hpp"
#include "exec/common/util.hpp"
#include "exprs/function/function.h"
#include "exprs/function/function_helpers.h"
#include "exprs/function/simple_function_factory.h"
#include "exprs/function_context.h"
#include "util/simd/vstring_function.h"
namespace doris {
#include "common/compile_check_avoid_begin.h"
class FunctionStrcmp : public IFunction {
public:
static constexpr auto name = "strcmp";
static FunctionPtr create() { return std::make_shared<FunctionStrcmp>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeInt8>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
const auto& [arg0_column, arg0_const] =
unpack_if_const(block.get_by_position(arguments[0]).column);
const auto& [arg1_column, arg1_const] =
unpack_if_const(block.get_by_position(arguments[1]).column);
auto result_column = ColumnInt8::create(input_rows_count);
if (auto arg0 = check_and_get_column<ColumnString>(arg0_column.get())) {
if (auto arg1 = check_and_get_column<ColumnString>(arg1_column.get())) {
if (arg0_const) {
scalar_vector(arg0->get_data_at(0), *arg1, *result_column);
} else if (arg1_const) {
vector_scalar(*arg0, arg1->get_data_at(0), *result_column);
} else {
vector_vector(*arg0, *arg1, *result_column);
}
}
}
block.replace_by_position(result, std::move(result_column));
return Status::OK();
}
private:
static void scalar_vector(const StringRef str, const ColumnString& vec1, ColumnInt8& res) {
size_t size = vec1.size();
for (size_t i = 0; i < size; ++i) {
res.get_data()[i] = str.compare(vec1.get_data_at(i));
}
}
static void vector_scalar(const ColumnString& vec0, const StringRef str, ColumnInt8& res) {
size_t size = vec0.size();
for (size_t i = 0; i < size; ++i) {
res.get_data()[i] = vec0.get_data_at(i).compare(str);
}
}
static void vector_vector(const ColumnString& vec0, const ColumnString& vec1, ColumnInt8& res) {
size_t size = vec0.size();
for (size_t i = 0; i < size; ++i) {
res.get_data()[i] = vec0.get_data_at(i).compare(vec1.get_data_at(i));
}
}
};
template <typename Impl>
class FunctionSubstring : public IFunction {
public:
static constexpr auto name = SubstringUtil::name;
String get_name() const override { return name; }
static FunctionPtr create() { return std::make_shared<FunctionSubstring<Impl>>(); }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
DataTypes get_variadic_argument_types_impl() const override {
return Impl::get_variadic_argument_types();
}
size_t get_number_of_arguments() const override {
return get_variadic_argument_types_impl().size();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl::execute_impl(context, block, arguments, result, input_rows_count);
}
};
struct Substr3Impl {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>(),
std::make_shared<DataTypeInt32>()};
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
SubstringUtil::substring_execute(block, arguments, result, input_rows_count);
return Status::OK();
}
};
struct Substr2Impl {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeInt32>()};
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
auto col_len = ColumnInt32::create(input_rows_count);
auto& strlen_data = col_len->get_data();
ColumnPtr str_col;
bool str_const;
std::tie(str_col, str_const) = unpack_if_const(block.get_by_position(arguments[0]).column);
const auto& str_offset = assert_cast<const ColumnString*>(str_col.get())->get_offsets();
if (str_const) {
std::fill(strlen_data.begin(), strlen_data.end(), str_offset[0] - str_offset[-1]);
} else {
for (int i = 0; i < input_rows_count; ++i) {
strlen_data[i] = str_offset[i] - str_offset[i - 1];
}
}
// we complete the column2(strlen) with the default value - each row's strlen.
block.insert({std::move(col_len), std::make_shared<DataTypeInt32>(), "strlen"});
ColumnNumbers temp_arguments = {arguments[0], arguments[1], block.columns() - 1};
SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
return Status::OK();
}
};
class FunctionLeft : public IFunction {
public:
static constexpr auto name = "left";
static FunctionPtr create() { return std::make_shared<FunctionLeft>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
DCHECK_EQ(arguments.size(), 2);
auto res = ColumnString::create();
bool col_const[2];
ColumnPtr argument_columns[2];
for (int i = 0; i < 2; ++i) {
std::tie(argument_columns[i], col_const[i]) =
unpack_if_const(block.get_by_position(arguments[i]).column);
}
const auto& str_col = assert_cast<const ColumnString&>(*argument_columns[0]);
const auto& len_col = assert_cast<const ColumnInt32&>(*argument_columns[1]);
const auto is_ascii = str_col.is_ascii();
std::visit(
[&](auto is_ascii, auto str_const, auto len_const) {
_execute<is_ascii, str_const, len_const>(str_col, len_col, *res,
input_rows_count);
},
make_bool_variant(is_ascii), make_bool_variant(col_const[0]),
make_bool_variant(col_const[1]));
block.get_by_position(result).column = std::move(res);
return Status::OK();
}
template <bool is_ascii, bool str_const, bool len_const>
static void _execute(const ColumnString& str_col, const ColumnInt32& len_col, ColumnString& res,
size_t size) {
auto& res_chars = res.get_chars();
auto& res_offsets = res.get_offsets();
res_offsets.resize(size);
const auto& len_data = len_col.get_data();
if constexpr (str_const) {
res_chars.reserve(size * (str_col.get_chars().size()));
} else {
res_chars.reserve(str_col.get_chars().size());
}
for (int i = 0; i < size; ++i) {
auto str = str_col.get_data_at(index_check_const<str_const>(i));
int len = len_data[index_check_const<len_const>(i)];
if (len <= 0 || str.empty()) {
StringOP::push_empty_string(i, res_chars, res_offsets);
continue;
}
const char* begin = str.begin();
const char* p = begin;
if constexpr (is_ascii) {
p = begin + std::min(len, static_cast<int>(str.size));
} else {
const char* end = str.end();
for (size_t ni = 0, char_size = 0; ni < len && p < end; ++ni, p += char_size) {
char_size = UTF8_BYTE_LENGTH[static_cast<uint8_t>(*p)];
}
}
StringOP::push_value_string_reserved_and_allow_overflow({begin, p}, i, res_chars,
res_offsets);
}
}
};
class FunctionRight : public IFunction {
public:
static constexpr auto name = "right";
static FunctionPtr create() { return std::make_shared<FunctionRight>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 2; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeString>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
auto int_type = std::make_shared<DataTypeInt32>();
auto params1 = ColumnInt32::create(input_rows_count);
auto params2 = ColumnInt32::create(input_rows_count);
size_t num_columns_without_result = block.columns();
// params1 = max(arg[1], -len(arg))
auto& index_data = params1->get_data();
auto& strlen_data = params2->get_data();
auto str_col =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
const auto* str_column = assert_cast<const ColumnString*>(str_col.get());
auto pos_col =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
const auto& pos_data = assert_cast<const ColumnInt32*>(pos_col.get())->get_data();
for (int i = 0; i < input_rows_count; ++i) {
auto str = str_column->get_data_at(i);
strlen_data[i] = simd::VStringFunctions::get_char_len(str.data, str.size);
}
for (int i = 0; i < input_rows_count; ++i) {
index_data[i] = std::max(-pos_data[i], -strlen_data[i]);
}
block.insert({std::move(params1), int_type, "index"});
block.insert({std::move(params2), int_type, "strlen"});
ColumnNumbers temp_arguments(3);
temp_arguments[0] = arguments[0];
temp_arguments[1] = num_columns_without_result;
temp_arguments[2] = num_columns_without_result + 1;
SubstringUtil::substring_execute(block, temp_arguments, result, input_rows_count);
return Status::OK();
}
};
struct NullOrEmptyImpl {
static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeUInt8>()}; }
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count, bool reverse) {
auto res_map = ColumnUInt8::create(input_rows_count, 0);
auto column = block.get_by_position(arguments[0]).column;
if (auto* nullable = check_and_get_column<const ColumnNullable>(*column)) {
column = nullable->get_nested_column_ptr();
VectorizedUtils::update_null_map(res_map->get_data(), nullable->get_null_map_data());
}
auto str_col = assert_cast<const ColumnString*>(column.get());
const auto& offsets = str_col->get_offsets();
auto& res_map_data = res_map->get_data();
for (int i = 0; i < input_rows_count; ++i) {
int size = offsets[i] - offsets[i - 1];
res_map_data[i] |= (size == 0);
}
if (reverse) {
for (int i = 0; i < input_rows_count; ++i) {
res_map_data[i] = !res_map_data[i];
}
}
block.replace_by_position(result, std::move(res_map));
return Status::OK();
}
};
class FunctionNullOrEmpty : public IFunction {
public:
static constexpr auto name = "null_or_empty";
static FunctionPtr create() { return std::make_shared<FunctionNullOrEmpty>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeUInt8>();
}
bool use_default_implementation_for_nulls() const override { return false; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
input_rows_count, false));
return Status::OK();
}
};
class FunctionNotNullOrEmpty : public IFunction {
public:
static constexpr auto name = "not_null_or_empty";
static FunctionPtr create() { return std::make_shared<FunctionNotNullOrEmpty>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return std::make_shared<DataTypeUInt8>();
}
bool use_default_implementation_for_nulls() const override { return false; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
RETURN_IF_ERROR(NullOrEmptyImpl::execute(context, block, arguments, result,
input_rows_count, true));
return Status::OK();
}
};
void register_function_string_basic(SimpleFunctionFactory& factory) {
factory.register_function<FunctionSubstring<Substr3Impl>>();
factory.register_function<FunctionSubstring<Substr2Impl>>();
factory.register_function<FunctionLeft>();
factory.register_function<FunctionRight>();
factory.register_function<FunctionNullOrEmpty>();
factory.register_function<FunctionNotNullOrEmpty>();
factory.register_function<FunctionStrcmp>();
factory.register_alias(FunctionLeft::name, "strleft");
factory.register_alias(FunctionRight::name, "strright");
factory.register_alias(SubstringUtil::name, "substr");
factory.register_alias(SubstringUtil::name, "mid");
}
#include "common/compile_check_avoid_end.h"
} // namespace doris