blob: 2c458c012c520c55eece9b9d2a3c34ccc301ec6c [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "cast_base.h"
#include <cstdint>
#include "util/jsonb_writer.h"
namespace doris::vectorized::CastWrapper {
Status cast_from_generic_to_jsonb(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count, const NullMap::value_type* null_map) {
auto data_type_to = block.get_by_position(result).type;
const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
const IDataType& type = *col_with_type_and_name.type;
const IColumn& col_from = *col_with_type_and_name.column;
auto column_string = ColumnString::create();
JsonbWriter writer;
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(col_from.size(), 0);
ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
DataTypeSerDe::FormatOptions format_options;
format_options.converted_from_string = true;
DataTypeSerDeSPtr from_serde = type.get_serde();
DataTypeSerDeSPtr to_serde = data_type_to->get_serde();
auto col_to = data_type_to->create_column();
auto tmp_col = ColumnString::create();
vectorized::DataTypeSerDe::FormatOptions options;
auto time_zone = cctz::utc_time_zone();
options.timezone =
(context && context->state()) ? &context->state()->timezone_obj() : &time_zone;
options.escape_char = '\\';
for (size_t i = 0; i < input_rows_count; i++) {
// convert to string
tmp_col->clear();
VectorBufferWriter write_buffer(*tmp_col.get());
Status st = from_serde->serialize_column_to_json(col_from, i, i + 1, write_buffer, options);
// if serialized failed, will return null
(*vec_null_map_to)[i] = !st.ok();
if (!st.ok()) {
col_to->insert_default();
continue;
}
write_buffer.commit();
writer.reset();
auto str_ref = tmp_col->get_data_at(0);
Slice data((char*)(str_ref.data), str_ref.size);
// first try to parse string
st = to_serde->deserialize_one_cell_from_json(*col_to, data, format_options);
// if parsing failed, will return null
(*vec_null_map_to)[i] = !st.ok();
if (!st.ok()) {
col_to->insert_default();
}
}
block.replace_by_position(
result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)));
return Status::OK();
}
Status cast_from_string_to_generic(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count, const NullMap::value_type* null_map) {
const auto& col_with_type_and_name = block.get_by_position(arguments[0]);
const IColumn& col_from = *col_with_type_and_name.column;
// result column must set type
DCHECK(block.get_by_position(result).type != nullptr);
auto data_type_to = block.get_by_position(result).type;
if (const auto* col_from_string = check_and_get_column<ColumnString>(&col_from)) {
auto col_to = data_type_to->create_column();
auto serde = data_type_to->get_serde();
size_t size = col_from.size();
col_to->reserve(size);
ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0);
ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data();
const bool is_complex = is_complex_type(data_type_to->get_primitive_type());
DataTypeSerDe::FormatOptions format_options;
format_options.converted_from_string = true;
format_options.escape_char = '\\';
for (size_t i = 0; i < size; ++i) {
const auto& val = col_from_string->get_data_at(i);
// Note: here we should handle the null element
if (val.size == 0) {
col_to->insert_default();
// empty string('') is an invalid format for complex type, set null_map to 1
if (is_complex) {
(*vec_null_map_to)[i] = 1;
}
continue;
}
Slice string_slice(val.data, val.size);
Status st =
serde->deserialize_one_cell_from_json(*col_to, string_slice, format_options);
// if parsing failed, will return null
(*vec_null_map_to)[i] = !st.ok();
if (!st.ok()) {
col_to->insert_default();
}
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(col_to), std::move(col_null_map_to));
} else {
return Status::RuntimeError(
"Illegal column {} of first argument of conversion function from string",
col_from.get_name());
}
return Status::OK();
}
ElementWrappers get_element_wrappers(FunctionContext* context, const DataTypes& from_element_types,
const DataTypes& to_element_types) {
DCHECK(from_element_types.size() == to_element_types.size());
ElementWrappers element_wrappers;
element_wrappers.reserve(from_element_types.size());
for (size_t i = 0; i < from_element_types.size(); ++i) {
const DataTypePtr& from_element_type = from_element_types[i];
const DataTypePtr& to_element_type = to_element_types[i];
element_wrappers.push_back(
prepare_unpack_dictionaries(context, from_element_type, to_element_type));
}
return element_wrappers;
}
WrapperType create_unsupport_wrapper(const String error_msg) {
return [error_msg](FunctionContext* /*context*/, Block& /*block*/,
const ColumnNumbers& /*arguments*/, uint32_t /*result*/,
size_t /*input_rows_count*/, const NullMap::value_type* null_map = nullptr) {
return Status::InvalidArgument(error_msg);
};
}
WrapperType create_unsupport_wrapper(const String from_type_name, const String to_type_name) {
const String error_msg =
fmt::format("Conversion from {} to {} is not supported", from_type_name, to_type_name);
return create_unsupport_wrapper(error_msg);
}
WrapperType create_identity_wrapper(const DataTypePtr&) {
return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t /*input_rows_count*/,
const NullMap::value_type* null_map = nullptr) {
block.get_by_position(result).column = block.get_by_position(arguments.front()).column;
return Status::OK();
};
}
/// the only difference between these two functions is throw error or not when parsing fail.
/// the return columns are both nullable columns.
Status cast_from_string_to_complex_type(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count,
const NullMap::value_type* null_map) {
const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
block.get_by_position(arguments[0]).column.get());
auto to_type = block.get_by_position(result).type;
auto to_serde = remove_nullable(to_type)->get_serde();
// string to complex type is always nullable
MutableColumnPtr to_column = make_nullable(to_type)->create_column();
auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
auto& nested_column = nullable_col_to.get_nested_column();
DataTypeSerDe::FormatOptions options;
options.converted_from_string = true;
options.escape_char = '\\';
options.timezone = &context->state()->timezone_obj();
for (size_t i = 0; i < input_rows_count; ++i) {
if (null_map && null_map[i]) {
nullable_col_to.insert_default();
} else {
auto str = col_from->get_data_at(i);
Status st = to_serde->from_string(str, nested_column, options);
if (st.ok()) {
nullable_col_to.get_null_map_data().push_back(0);
} else {
nullable_col_to.insert_default(); // fill null if fail
}
}
}
block.get_by_position(result).column = std::move(to_column);
return Status::OK();
}
Status cast_from_string_to_complex_type_strict_mode(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count,
const NullMap::value_type* null_map) {
const auto* col_from = check_and_get_column<DataTypeString::ColumnType>(
block.get_by_position(arguments[0]).column.get());
auto to_type = block.get_by_position(result).type;
auto to_serde = remove_nullable(to_type)->get_serde();
// string to complex type is always nullable
MutableColumnPtr to_column = make_nullable(to_type)->create_column();
auto& nullable_col_to = assert_cast<ColumnNullable&>(*to_column);
auto& nested_column = nullable_col_to.get_nested_column();
DataTypeSerDe::FormatOptions options;
options.converted_from_string = true;
options.escape_char = '\\';
options.timezone = &context->state()->timezone_obj();
for (size_t i = 0; i < input_rows_count; ++i) {
if (null_map && null_map[i]) {
to_column->insert_default();
} else {
auto str = col_from->get_data_at(i);
RETURN_IF_ERROR(to_serde->from_string_strict_mode(str, nested_column, options));
// fill not null if success
nullable_col_to.get_null_map_data().push_back(0);
}
}
block.get_by_position(result).column = std::move(to_column);
return Status::OK();
}
} // namespace doris::vectorized::CastWrapper