blob: 39b9fe1ba1a7a7b74d9fa33c100f30ec85e90e46 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include "vec/columns/column_array.h"
#include "vec/columns/column_const.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_string.h"
#include "vec/functions/array/function_array_utils.h"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
struct NameArrayJoin {
static constexpr auto name = "array_join";
};
struct ArrayJoinImpl {
public:
using column_type = ColumnArray;
using NullMapType = PaddedPODArray<UInt8>;
static bool _is_variadic() { return true; }
static size_t _get_number_of_arguments() { return 0; }
static DataTypePtr get_return_type(const DataTypes& arguments) {
DCHECK(arguments[0]->get_primitive_type() == TYPE_ARRAY)
<< "first argument for function: array_join should be DataTypeArray"
<< " and arguments[0] is " << arguments[0]->get_name();
DCHECK(is_string_type(arguments[1]->get_primitive_type()))
<< "second argument for function: array_join should be DataTypeString"
<< ", and arguments[1] is " << arguments[1]->get_name();
if (arguments.size() > 2) {
DCHECK(is_string_type(arguments[2]->get_primitive_type()))
<< "third argument for function: array_join should be DataTypeString"
<< ", and arguments[2] is " << arguments[2]->get_name();
}
return std::make_shared<DataTypeString>();
}
static Status execute(Block& block, const ColumnNumbers& arguments, uint32_t result,
const DataTypeArray* data_type_array, const ColumnArray& array) {
ColumnPtr src_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
ColumnArrayExecutionData src;
if (!extract_column_array_info(*src_column, src)) {
return Status::RuntimeError(fmt::format(
"execute failed, unsupported types for function {}({})", "array_join",
block.get_by_position(arguments[0]).type->get_name()));
}
ColumnPtr sep_column =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
ColumnPtr null_replace_column =
(arguments.size() > 2 ? block.get_by_position(arguments[2])
.column->convert_to_full_column_if_const()
: nullptr);
std::string sep_str = _get_string_from_column(sep_column);
std::string null_replace_str = _get_string_from_column(null_replace_column);
auto nested_type = data_type_array->get_nested_type();
auto dest_column_ptr = ColumnString::create();
DCHECK(dest_column_ptr);
auto res_val = _execute_string(*src.nested_col, *src.offsets_ptr, src.nested_nullmap_data,
sep_str, null_replace_str, dest_column_ptr.get());
if (!res_val) {
return Status::RuntimeError(fmt::format(
"execute failed or unsupported types for function {}({},{},{})", "array_join",
block.get_by_position(arguments[0]).type->get_name(),
block.get_by_position(arguments[1]).type->get_name(),
(arguments.size() > 2 ? block.get_by_position(arguments[2]).type->get_name()
: "")));
}
block.replace_by_position(result, std::move(dest_column_ptr));
return Status::OK();
}
private:
static std::string _get_string_from_column(const ColumnPtr& column_ptr) {
if (!column_ptr) {
return std::string("");
}
const ColumnString* column_string_ptr = check_and_get_column<ColumnString>(*column_ptr);
StringRef str_ref = column_string_ptr->get_data_at(0);
std::string str(str_ref.data, str_ref.size);
return str;
}
static void _fill_result_string(const std::string& input_str, const std::string& sep_str,
std::string& result_str, bool& is_first_elem) {
if (is_first_elem) {
result_str.append(input_str);
is_first_elem = false;
} else {
result_str.append(sep_str);
result_str.append(input_str);
}
return;
}
static bool _execute_string(const IColumn& src_column,
const ColumnArray::Offsets64& src_offsets,
const UInt8* src_null_map, const std::string& sep_str,
const std::string& null_replace_str,
ColumnString* dest_column_ptr) {
const ColumnString* src_data_concrete = assert_cast<const ColumnString*>(&src_column);
if (!src_data_concrete) {
return false;
}
size_t prev_src_offset = 0;
for (auto curr_src_offset : src_offsets) {
std::string result_str;
bool is_first_elem = true;
for (size_t j = prev_src_offset; j < curr_src_offset; ++j) {
if (src_null_map && src_null_map[j]) {
if (null_replace_str.size() == 0) {
continue;
} else {
_fill_result_string(null_replace_str, sep_str, result_str, is_first_elem);
continue;
}
}
StringRef src_str_ref = src_data_concrete->get_data_at(j);
std::string elem_str(src_str_ref.data, src_str_ref.size);
_fill_result_string(elem_str, sep_str, result_str, is_first_elem);
}
dest_column_ptr->insert_data(result_str.c_str(), result_str.size());
prev_src_offset = curr_src_offset;
}
return true;
}
};
#include "common/compile_check_end.h"
} // namespace doris::vectorized