| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| // This file is copied from |
| // https://github.com/ClickHouse/ClickHouse/blob/master/src/Functions/FunctionsConversion.h |
| // and modified by Doris |
| |
| #pragma once |
| |
| #include <cctz/time_zone.h> |
| #include <fmt/format.h> |
| #include <gen_cpp/FrontendService_types.h> |
| #include <glog/logging.h> |
| |
| #include <algorithm> |
| #include <boost/iterator/iterator_facade.hpp> |
| #include <cmath> |
| #include <cstddef> |
| #include <cstdint> |
| #include <functional> |
| #include <memory> |
| #include <string> |
| #include <type_traits> |
| #include <utility> |
| #include <vector> |
| |
| #include "common/compiler_util.h" // IWYU pragma: keep |
| #include "common/status.h" |
| #include "runtime/runtime_state.h" |
| #include "runtime/type_limit.h" |
| #include "udf/udf.h" |
| #include "util/jsonb_document.h" |
| #include "util/jsonb_stream.h" |
| #include "util/jsonb_writer.h" |
| #include "vec/aggregate_functions/aggregate_function.h" |
| #include "vec/columns/column.h" |
| #include "vec/columns/column_array.h" |
| #include "vec/columns/column_map.h" |
| #include "vec/columns/column_nullable.h" |
| #include "vec/columns/column_string.h" |
| #include "vec/columns/column_struct.h" |
| #include "vec/columns/column_variant.h" |
| #include "vec/columns/column_vector.h" |
| #include "vec/common/assert_cast.h" |
| #include "vec/common/string_buffer.hpp" |
| #include "vec/common/string_ref.h" |
| #include "vec/core/block.h" |
| #include "vec/core/call_on_type_index.h" |
| #include "vec/core/column_numbers.h" |
| #include "vec/core/column_with_type_and_name.h" |
| #include "vec/core/columns_with_type_and_name.h" |
| #include "vec/core/field.h" |
| #include "vec/core/types.h" |
| #include "vec/data_types/data_type.h" |
| #include "vec/data_types/data_type_agg_state.h" |
| #include "vec/data_types/data_type_array.h" |
| #include "vec/data_types/data_type_bitmap.h" |
| #include "vec/data_types/data_type_date.h" |
| #include "vec/data_types/data_type_date_or_datetime_v2.h" |
| #include "vec/data_types/data_type_date_time.h" |
| #include "vec/data_types/data_type_decimal.h" |
| #include "vec/data_types/data_type_hll.h" |
| #include "vec/data_types/data_type_ipv4.h" |
| #include "vec/data_types/data_type_ipv6.h" |
| #include "vec/data_types/data_type_jsonb.h" |
| #include "vec/data_types/data_type_map.h" |
| #include "vec/data_types/data_type_nullable.h" |
| #include "vec/data_types/data_type_number.h" |
| #include "vec/data_types/data_type_string.h" |
| #include "vec/data_types/data_type_struct.h" |
| #include "vec/data_types/data_type_time.h" |
| #include "vec/data_types/data_type_variant.h" |
| #include "vec/data_types/serde/data_type_serde.h" |
| #include "vec/functions/function.h" |
| #include "vec/functions/function_convert_tz.h" |
| #include "vec/functions/function_helpers.h" |
| #include "vec/io/io_helper.h" |
| #include "vec/io/reader_buffer.h" |
| #include "vec/runtime/time_value.h" |
| #include "vec/runtime/vdatetime_value.h" |
| #include "vec/utils/util.hpp" |
| |
| class DateLUTImpl; |
| |
| namespace doris { |
| namespace vectorized { |
| template <PrimitiveType T> |
| class ColumnDecimal; |
| } // namespace vectorized |
| } // namespace doris |
| |
| namespace doris::vectorized { |
| /** Type conversion functions. |
| * toType - conversion in "natural way"; |
| */ |
| inline UInt32 extract_to_decimal_scale(const ColumnWithTypeAndName& named_column) { |
| const auto* arg_type = named_column.type.get(); |
| bool ok = check_and_get_data_type<DataTypeUInt8>(arg_type); |
| if (!ok) { |
| throw doris::Exception(ErrorCode::INVALID_ARGUMENT, "Illegal type of toDecimal() scale {}", |
| named_column.type->get_name()); |
| } |
| |
| Field field; |
| named_column.column->get(0, field); |
| return field.get<UInt32>(); |
| } |
| |
| struct PrecisionScaleArg { |
| UInt32 precision; |
| UInt32 scale; |
| }; |
| |
| /** Conversion of number types to each other, enums to numbers, dates and datetimes to numbers and back: done by straight assignment. |
| * (Date is represented internally as number of days from some day; DateTime - as unix timestamp) |
| */ |
| template <typename FromDataType, typename ToDataType, typename Name> |
| struct ConvertImpl { |
| using FromFieldType = typename FromDataType::FieldType; |
| using ToFieldType = typename ToDataType::FieldType; |
| |
| // `static_cast_set` is introduced to wrap `static_cast` and handle special cases. |
| // Doris uses `uint8` to represent boolean values internally. |
| // Directly `static_cast` to `uint8` can result in non-0/1 values, |
| // To address this, `static_cast_set` performs an additional check: |
| // For `uint8` types, it explicitly uses `static_cast<bool>` to ensure |
| // the result is either 0 or 1. |
| static void static_cast_set(ToFieldType& to, const FromFieldType& from) { |
| // uint8_t now use as boolean in doris |
| if constexpr (std::is_same_v<uint8_t, ToFieldType>) { |
| to = static_cast<bool>(from); |
| } else { |
| to = static_cast<ToFieldType>(from); |
| } |
| } |
| |
| template <typename Additions = void*> |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count, |
| Additions additions = Additions()) { |
| const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); |
| |
| using ColVecFrom = std::conditional_t<IsDecimalNumber<FromFieldType>, |
| ColumnDecimal<FromDataType::PType>, |
| ColumnVector<FromDataType::PType>>; |
| using ColVecTo = |
| std::conditional_t<IsDecimalNumber<ToFieldType>, ColumnDecimal<ToDataType::PType>, |
| ColumnVector<ToDataType::PType>>; |
| |
| if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>) { |
| if constexpr (!(IsDataTypeDecimalOrNumber<FromDataType> || |
| IsDatelikeV1Types<FromDataType> || IsDatelikeV2Types<FromDataType>) || |
| !IsDataTypeDecimalOrNumber<ToDataType>) { |
| return Status::RuntimeError("Illegal column {} of first argument of function {}", |
| named_from.column->get_name(), Name::name); |
| } |
| } |
| |
| if (const ColVecFrom* col_from = |
| check_and_get_column<ColVecFrom>(named_from.column.get())) { |
| typename ColVecTo::MutablePtr col_to = nullptr; |
| UInt32 from_precision = NumberTraits::max_ascii_len<FromFieldType>(); |
| UInt32 from_scale = 0; |
| |
| if constexpr (IsDataTypeDecimal<FromDataType>) { |
| const auto& from_decimal_type = assert_cast<const FromDataType&>(*named_from.type); |
| from_precision = from_decimal_type.get_precision(); |
| from_scale = from_decimal_type.get_scale(); |
| } |
| |
| UInt32 to_max_digits = 0; |
| UInt32 to_precision = 0; |
| UInt32 to_scale = 0; |
| |
| ToFieldType max_result {0}; |
| ToFieldType min_result {0}; |
| if constexpr (IsDataTypeDecimal<ToDataType>) { |
| to_max_digits = NumberTraits::max_ascii_len<typename ToFieldType::NativeType>(); |
| |
| to_precision = ((PrecisionScaleArg)additions).precision; |
| ToDataType::check_type_precision(to_precision); |
| |
| to_scale = ((PrecisionScaleArg)additions).scale; |
| ToDataType::check_type_scale(to_scale); |
| col_to = ColVecTo::create(0, to_scale); |
| |
| max_result = ToDataType::get_max_digits_number(to_precision); |
| min_result = -max_result; |
| } else { |
| col_to = ColVecTo::create(); |
| } |
| if constexpr (IsDataTypeNumber<ToDataType>) { |
| max_result = type_limit<ToFieldType>::max(); |
| min_result = type_limit<ToFieldType>::min(); |
| } |
| if constexpr (std::is_integral_v<ToFieldType>) { |
| to_max_digits = NumberTraits::max_ascii_len<ToFieldType>(); |
| to_precision = to_max_digits; |
| } |
| |
| const auto& vec_from = col_from->get_data(); |
| auto& vec_to = col_to->get_data(); |
| size_t size = vec_from.size(); |
| vec_to.resize(size); |
| |
| if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>) { |
| // the result is rounded when doing cast, so it may still overflow after rounding |
| // if destination integer digit count is the same as source integer digit count. |
| bool narrow_integral = context->check_overflow_for_decimal() && |
| (to_precision - to_scale) <= (from_precision - from_scale); |
| |
| bool multiply_may_overflow = context->check_overflow_for_decimal(); |
| if (to_scale > from_scale) { |
| multiply_may_overflow &= |
| (from_precision + to_scale - from_scale) >= to_max_digits; |
| } |
| |
| std::visit( |
| [&](auto multiply_may_overflow, auto narrow_integral) { |
| if constexpr (IsDataTypeDecimal<FromDataType> && |
| IsDataTypeDecimal<ToDataType>) { |
| convert_decimal_cols<FromDataType, ToDataType, |
| multiply_may_overflow, narrow_integral>( |
| vec_from.data(), vec_to.data(), from_precision, |
| vec_from.get_scale(), to_precision, vec_to.get_scale(), |
| vec_from.size()); |
| } else if constexpr (IsDataTypeDecimal<FromDataType>) { |
| convert_from_decimal<FromDataType, ToDataType, narrow_integral>( |
| vec_to.data(), vec_from.data(), from_precision, |
| vec_from.get_scale(), min_result, max_result, size); |
| } else { |
| convert_to_decimal<FromDataType, ToDataType, multiply_may_overflow, |
| narrow_integral>( |
| vec_to.data(), vec_from.data(), from_scale, to_precision, |
| to_scale, min_result, max_result, size); |
| } |
| }, |
| make_bool_variant(multiply_may_overflow), |
| make_bool_variant(narrow_integral)); |
| |
| block.replace_by_position(result, std::move(col_to)); |
| |
| return Status::OK(); |
| } else if constexpr (IsDatelikeV1Types<FromDataType>) { |
| for (size_t i = 0; i < size; ++i) { |
| if constexpr (IsDatelikeV1Types<ToDataType>) { |
| vec_to[i] = static_cast<ToFieldType>(vec_from[i]); |
| if constexpr (IsDateTimeType<ToDataType>) { |
| DataTypeDateTime::cast_to_date_time(vec_to[i]); |
| } else { |
| DataTypeDate::cast_to_date(vec_to[i]); |
| } |
| } else if constexpr (IsDateV2Type<ToDataType>) { |
| DataTypeDateV2::cast_from_date(vec_from[i], vec_to[i]); |
| } else if constexpr (IsDateTimeV2Type<ToDataType>) { |
| DataTypeDateTimeV2::cast_from_date(vec_from[i], vec_to[i]); |
| } else { |
| static_cast_set( |
| vec_to[i], |
| reinterpret_cast<const VecDateTimeValue&>(vec_from[i]).to_int64()); |
| } |
| } |
| } else if constexpr (IsDatelikeV2Types<FromDataType>) { |
| for (size_t i = 0; i < size; ++i) { |
| if constexpr (IsDatelikeV2Types<ToDataType>) { |
| if constexpr (IsDateTimeV2Type<ToDataType> && IsDateV2Type<FromDataType>) { |
| DataTypeDateV2::cast_to_date_time_v2(vec_from[i], vec_to[i]); |
| } else if constexpr (IsDateTimeV2Type<FromDataType> && |
| IsDateV2Type<ToDataType>) { |
| DataTypeDateTimeV2::cast_to_date_v2(vec_from[i], vec_to[i]); |
| } else { |
| UInt32 scale = additions; |
| vec_to[i] = ToFieldType(vec_from[i] / std::pow(10, 6 - scale)); |
| } |
| } else if constexpr (IsDatelikeV1Types<ToDataType>) { |
| if constexpr (IsDateTimeType<ToDataType> && IsDateV2Type<FromDataType>) { |
| DataTypeDateV2::cast_to_date_time(vec_from[i], vec_to[i]); |
| } else if constexpr (IsDateType<ToDataType> && IsDateV2Type<FromDataType>) { |
| DataTypeDateV2::cast_to_date(vec_from[i], vec_to[i]); |
| } else if constexpr (IsDateTimeType<ToDataType> && |
| IsDateTimeV2Type<FromDataType>) { |
| DataTypeDateTimeV2::cast_to_date_time(vec_from[i], vec_to[i]); |
| } else if constexpr (IsDateType<ToDataType> && |
| IsDateTimeV2Type<FromDataType>) { |
| DataTypeDateTimeV2::cast_to_date(vec_from[i], vec_to[i]); |
| } else { |
| return Status::InvalidArgument("Wrong cast expression!"); |
| } |
| } else { |
| if constexpr (IsDateTimeV2Type<FromDataType>) { |
| static_cast_set( |
| vec_to[i], |
| reinterpret_cast<const DateV2Value<DateTimeV2ValueType>&>( |
| vec_from[i]) |
| .to_int64()); |
| } else { |
| static_cast_set(vec_to[i], |
| reinterpret_cast<const DateV2Value<DateV2ValueType>&>( |
| vec_from[i]) |
| .to_int64()); |
| } |
| } |
| } |
| } else { |
| if constexpr (IsDataTypeNumber<FromDataType> && |
| std::is_same_v<ToDataType, DataTypeTimeV2>) { |
| // 300 -> 00:03:00 360 will be parse failed , so value maybe null |
| ColumnUInt8::MutablePtr col_null_map_to; |
| ColumnUInt8::Container* vec_null_map_to = nullptr; |
| col_null_map_to = ColumnUInt8::create(size, 0); |
| vec_null_map_to = &col_null_map_to->get_data(); |
| for (size_t i = 0; i < size; ++i) { |
| (*vec_null_map_to)[i] = !TimeValue::try_parse_time(vec_from[i], vec_to[i]); |
| } |
| block.get_by_position(result).column = |
| ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
| return Status::OK(); |
| } else if constexpr ((std::is_same_v<FromDataType, DataTypeIPv4>)&&( |
| std::is_same_v<ToDataType, DataTypeIPv6>)) { |
| for (size_t i = 0; i < size; ++i) { |
| map_ipv4_to_ipv6(vec_from[i], reinterpret_cast<UInt8*>(&vec_to[i])); |
| } |
| } else { |
| for (size_t i = 0; i < size; ++i) { |
| static_cast_set(vec_to[i], vec_from[i]); |
| } |
| } |
| } |
| |
| block.replace_by_position(result, std::move(col_to)); |
| } else { |
| return Status::RuntimeError("Illegal column {} of first argument of function {}", |
| named_from.column->get_name(), Name::name); |
| } |
| return Status::OK(); |
| } |
| }; |
| |
| /** If types are identical, just take reference to column. |
| */ |
| template <typename T, typename Name> |
| requires(!T::is_parametric) |
| struct ConvertImpl<T, T, Name> { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t /*input_rows_count*/) { |
| block.get_by_position(result).column = block.get_by_position(arguments[0]).column; |
| return Status::OK(); |
| } |
| }; |
| |
| // using other type cast to Date/DateTime, unless String |
| // Date/DateTime |
| template <typename FromDataType, typename ToDataType, typename Name> |
| struct ConvertImplToTimeType { |
| using FromFieldType = typename FromDataType::FieldType; |
| using ToFieldType = typename ToDataType::FieldType; |
| |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t /*input_rows_count*/) { |
| const ColumnWithTypeAndName& named_from = block.get_by_position(arguments[0]); |
| |
| using ColVecFrom = std::conditional_t<IsDecimalNumber<FromFieldType>, |
| ColumnDecimal<FromDataType::PType>, |
| ColumnVector<FromDataType::PType>>; |
| |
| using DateValueType = std::conditional_t< |
| IsDatelikeV2Types<ToDataType>, |
| std::conditional_t<IsDateV2Type<ToDataType>, DateV2Value<DateV2ValueType>, |
| DateV2Value<DateTimeV2ValueType>>, |
| VecDateTimeValue>; |
| using ColVecTo = ColumnVector<ToDataType::PType>; |
| |
| if (const ColVecFrom* col_from = |
| check_and_get_column<ColVecFrom>(named_from.column.get())) { |
| const auto& vec_from = col_from->get_data(); |
| size_t size = vec_from.size(); |
| |
| // create nested column |
| auto col_to = ColVecTo::create(size); |
| auto& vec_to = col_to->get_data(); |
| |
| // create null column |
| ColumnUInt8::MutablePtr col_null_map_to; |
| col_null_map_to = ColumnUInt8::create(size, 0); |
| auto& vec_null_map_to = col_null_map_to->get_data(); |
| |
| if constexpr (std::is_same_v<FromDataType, DataTypeTimeV2>) { |
| DateValueType current_date_value; |
| current_date_value.from_unixtime(context->state()->timestamp_ms() / 1000, |
| context->state()->timezone_obj()); |
| uint32_t scale = 0; |
| // Only DateTimeV2 has scale |
| if (std::is_same_v<ToDataType, DataTypeDateTimeV2>) { |
| scale = remove_nullable(block.get_by_position(result).type)->get_scale(); |
| } |
| // According to MySQL rules, when casting time type to date/datetime, |
| // the current date is added to the time |
| // So here we need to clear the time part |
| current_date_value.reset_time_part(); |
| for (size_t i = 0; i < size; ++i) { |
| auto& date_value = reinterpret_cast<DateValueType&>(vec_to[i]); |
| date_value = current_date_value; |
| int64_t microsecond = TimeValue::round_time(vec_from[i], scale); |
| // Only TimeV2 type needs microseconds |
| if constexpr (IsDatelikeV2Types<ToDataType>) { |
| vec_null_map_to[i] = !date_value.template date_add_interval<MICROSECOND>( |
| TimeInterval {MICROSECOND, microsecond, false}); |
| } else { |
| vec_null_map_to[i] = |
| !date_value.template date_add_interval<SECOND>(TimeInterval { |
| SECOND, microsecond / TimeValue::ONE_SECOND_MICROSECONDS, |
| false}); |
| } |
| |
| // DateType of VecDateTimeValue should cast to date |
| if constexpr (IsDateType<ToDataType>) { |
| date_value.cast_to_date(); |
| } else if constexpr (IsDateTimeType<ToDataType>) { |
| date_value.to_datetime(); |
| } |
| } |
| } else { |
| for (size_t i = 0; i < size; ++i) { |
| auto& date_value = reinterpret_cast<DateValueType&>(vec_to[i]); |
| vec_null_map_to[i] = !date_value.from_date_int64(int64_t(vec_from[i])); |
| // DateType of VecDateTimeValue should cast to date |
| if constexpr (IsDateType<ToDataType>) { |
| date_value.cast_to_date(); |
| } else if constexpr (IsDateTimeType<ToDataType>) { |
| date_value.to_datetime(); |
| } |
| } |
| } |
| block.get_by_position(result).column = |
| ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
| } else { |
| return Status::RuntimeError("Illegal column {} of first argument of function {}", |
| named_from.column->get_name(), Name::name); |
| } |
| |
| return Status::OK(); |
| } |
| }; |
| |
| // Generic conversion of any type to String. |
| struct ConvertImplGenericToString { |
| static Status execute(Block& block, const ColumnNumbers& arguments, size_t result) { |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IDataType& type = *col_with_type_and_name.type; |
| const IColumn& col_from = *col_with_type_and_name.column; |
| |
| auto col_to = ColumnString::create(); |
| type.to_string_batch(col_from, *col_to); |
| |
| block.replace_by_position(result, std::move(col_to)); |
| return Status::OK(); |
| } |
| |
| static Status execute2(FunctionContext* /*ctx*/, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t /*input_rows_count*/) { |
| return execute(block, arguments, result); |
| } |
| }; |
| //this is for data in compound type |
| struct ConvertImplGenericFromString { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IColumn& col_from = *col_with_type_and_name.column; |
| // result column must set type |
| DCHECK(block.get_by_position(result).type != nullptr); |
| auto data_type_to = block.get_by_position(result).type; |
| if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) { |
| auto col_to = data_type_to->create_column(); |
| auto serde = data_type_to->get_serde(); |
| size_t size = col_from.size(); |
| col_to->reserve(size); |
| |
| ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0); |
| ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
| const bool is_complex = is_complex_type(data_type_to->get_primitive_type()); |
| DataTypeSerDe::FormatOptions format_options; |
| format_options.converted_from_string = true; |
| format_options.escape_char = '\\'; |
| |
| for (size_t i = 0; i < size; ++i) { |
| const auto& val = col_from_string->get_data_at(i); |
| // Note: here we should handle the null element |
| if (val.size == 0) { |
| col_to->insert_default(); |
| // empty string('') is an invalid format for complex type, set null_map to 1 |
| if (is_complex) { |
| (*vec_null_map_to)[i] = 1; |
| } |
| continue; |
| } |
| Slice string_slice(val.data, val.size); |
| Status st = serde->deserialize_one_cell_from_json(*col_to, string_slice, |
| format_options); |
| // if parsing failed, will return null |
| (*vec_null_map_to)[i] = !st.ok(); |
| if (!st.ok()) { |
| col_to->insert_default(); |
| } |
| } |
| block.get_by_position(result).column = |
| ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
| } else { |
| return Status::RuntimeError( |
| "Illegal column {} of first argument of conversion function from string", |
| col_from.get_name()); |
| } |
| return Status::OK(); |
| } |
| }; |
| |
| // Generic conversion of number to jsonb. |
| template <typename ColumnType> |
| struct ConvertImplNumberToJsonb { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| |
| auto column_string = ColumnString::create(); |
| JsonbWriter writer; |
| |
| const auto* col = |
| check_and_get_column<const ColumnType>(col_with_type_and_name.column.get()); |
| const auto& data = col->get_data(); |
| |
| for (size_t i = 0; i < input_rows_count; i++) { |
| writer.reset(); |
| if constexpr (std::is_same_v<ColumnUInt8, ColumnType>) { |
| writer.writeBool(data[i]); |
| } else if constexpr (std::is_same_v<ColumnInt8, ColumnType>) { |
| writer.writeInt8(data[i]); |
| } else if constexpr (std::is_same_v<ColumnInt16, ColumnType>) { |
| writer.writeInt16(data[i]); |
| } else if constexpr (std::is_same_v<ColumnInt32, ColumnType>) { |
| writer.writeInt32(data[i]); |
| } else if constexpr (std::is_same_v<ColumnInt64, ColumnType>) { |
| writer.writeInt64(data[i]); |
| } else if constexpr (std::is_same_v<ColumnInt128, ColumnType>) { |
| writer.writeInt128(data[i]); |
| } else if constexpr (std::is_same_v<ColumnFloat64, ColumnType>) { |
| writer.writeDouble(data[i]); |
| } else { |
| static_assert(std::is_same_v<ColumnType, ColumnUInt8> || |
| std::is_same_v<ColumnType, ColumnInt8> || |
| std::is_same_v<ColumnType, ColumnInt16> || |
| std::is_same_v<ColumnType, ColumnInt32> || |
| std::is_same_v<ColumnType, ColumnInt64> || |
| std::is_same_v<ColumnType, ColumnInt128> || |
| std::is_same_v<ColumnType, ColumnFloat64>, |
| "unsupported type"); |
| __builtin_unreachable(); |
| } |
| column_string->insert_data(writer.getOutput()->getBuffer(), |
| writer.getOutput()->getSize()); |
| } |
| |
| block.replace_by_position(result, std::move(column_string)); |
| return Status::OK(); |
| } |
| }; |
| |
| struct ConvertImplStringToJsonbAsJsonbString { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| auto data_type_to = block.get_by_position(result).type; |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IColumn& col_from = *col_with_type_and_name.column; |
| auto dst = ColumnString::create(); |
| ColumnString* dst_str = assert_cast<ColumnString*>(dst.get()); |
| const auto* from_string = assert_cast<const ColumnString*>(&col_from); |
| JsonbWriter writer; |
| for (size_t i = 0; i < input_rows_count; i++) { |
| auto str_ref = from_string->get_data_at(i); |
| writer.reset(); |
| // write raw string to jsonb |
| writer.writeStartString(); |
| writer.writeString(str_ref.data, str_ref.size); |
| writer.writeEndString(); |
| dst_str->insert_data(writer.getOutput()->getBuffer(), writer.getOutput()->getSize()); |
| } |
| block.replace_by_position(result, std::move(dst)); |
| return Status::OK(); |
| } |
| }; |
| |
| struct ConvertImplGenericFromJsonb { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| auto data_type_to = block.get_by_position(result).type; |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IColumn& col_from = *col_with_type_and_name.column; |
| if (const ColumnString* col_from_string = check_and_get_column<ColumnString>(&col_from)) { |
| auto col_to = data_type_to->create_column(); |
| |
| size_t size = col_from.size(); |
| col_to->reserve(size); |
| |
| ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 0); |
| ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
| const bool is_complex = is_complex_type(data_type_to->get_primitive_type()); |
| const bool is_dst_string = is_string_type(data_type_to->get_primitive_type()); |
| for (size_t i = 0; i < size; ++i) { |
| const auto& val = col_from_string->get_data_at(i); |
| JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size); |
| if (UNLIKELY(!doc || !doc->getValue())) { |
| (*vec_null_map_to)[i] = 1; |
| col_to->insert_default(); |
| continue; |
| } |
| |
| // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
| JsonbValue* value = doc->getValue(); |
| if (UNLIKELY(!value)) { |
| (*vec_null_map_to)[i] = 1; |
| col_to->insert_default(); |
| continue; |
| } |
| // Note: here we should handle the null element |
| if (val.size == 0) { |
| col_to->insert_default(); |
| // empty string('') is an invalid format for complex type, set null_map to 1 |
| if (is_complex) { |
| (*vec_null_map_to)[i] = 1; |
| } |
| continue; |
| } |
| // add string to string column |
| if (context->jsonb_string_as_string() && is_dst_string && value->isString()) { |
| const auto* blob = static_cast<const JsonbBlobVal*>(value); |
| assert_cast<ColumnString&, TypeCheckOnRelease::DISABLE>(*col_to).insert_data( |
| blob->getBlob(), blob->getBlobLen()); |
| (*vec_null_map_to)[i] = 0; |
| continue; |
| } |
| std::string input_str; |
| if (context->jsonb_string_as_string() && value->isString()) { |
| const auto* blob = static_cast<const JsonbBlobVal*>(value); |
| input_str = std::string(blob->getBlob(), blob->getBlobLen()); |
| } else { |
| input_str = JsonbToJson::jsonb_to_json_string(val.data, val.size); |
| } |
| if (input_str.empty()) { |
| col_to->insert_default(); |
| (*vec_null_map_to)[i] = 1; |
| continue; |
| } |
| ReadBuffer read_buffer((char*)(input_str.data()), input_str.size()); |
| Status st = data_type_to->from_string(read_buffer, col_to.get()); |
| // if parsing failed, will return null |
| (*vec_null_map_to)[i] = !st.ok(); |
| if (!st.ok()) { |
| col_to->insert_default(); |
| } |
| } |
| block.get_by_position(result).column = |
| ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
| } else { |
| return Status::RuntimeError( |
| "Illegal column {} of first argument of conversion function from string", |
| col_from.get_name()); |
| } |
| return Status::OK(); |
| } |
| }; |
| |
| // Generic conversion of any type to jsonb. |
| struct ConvertImplGenericToJsonb { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| auto data_type_to = block.get_by_position(result).type; |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IDataType& type = *col_with_type_and_name.type; |
| const IColumn& col_from = *col_with_type_and_name.column; |
| |
| auto column_string = ColumnString::create(); |
| JsonbWriter writer; |
| |
| ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(col_from.size(), 0); |
| ColumnUInt8::Container* vec_null_map_to = &col_null_map_to->get_data(); |
| DataTypeSerDe::FormatOptions format_options; |
| format_options.converted_from_string = true; |
| DataTypeSerDeSPtr from_serde = type.get_serde(); |
| DataTypeSerDeSPtr to_serde = data_type_to->get_serde(); |
| auto col_to = data_type_to->create_column(); |
| |
| auto tmp_col = ColumnString::create(); |
| vectorized::DataTypeSerDe::FormatOptions options; |
| options.escape_char = '\\'; |
| for (size_t i = 0; i < input_rows_count; i++) { |
| // convert to string |
| tmp_col->clear(); |
| VectorBufferWriter write_buffer(*tmp_col.get()); |
| Status st = |
| from_serde->serialize_column_to_json(col_from, i, i + 1, write_buffer, options); |
| // if serialized failed, will return null |
| (*vec_null_map_to)[i] = !st.ok(); |
| if (!st.ok()) { |
| col_to->insert_default(); |
| continue; |
| } |
| write_buffer.commit(); |
| writer.reset(); |
| auto str_ref = tmp_col->get_data_at(0); |
| Slice data((char*)(str_ref.data), str_ref.size); |
| // first try to parse string |
| st = to_serde->deserialize_one_cell_from_json(*col_to, data, format_options); |
| // if parsing failed, will return null |
| (*vec_null_map_to)[i] = !st.ok(); |
| if (!st.ok()) { |
| col_to->insert_default(); |
| } |
| } |
| |
| block.replace_by_position( |
| result, ColumnNullable::create(std::move(col_to), std::move(col_null_map_to))); |
| return Status::OK(); |
| } |
| }; |
| |
| struct ConvertNothingToJsonb { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IColumn& col_from = *col_with_type_and_name.column; |
| auto data_type_to = block.get_by_position(result).type; |
| size_t size = col_from.size(); |
| auto col_to = data_type_to->create_column_const_with_default_value(size); |
| ColumnUInt8::MutablePtr col_null_map_to = ColumnUInt8::create(size, 1); |
| block.replace_by_position(result, ColumnNullable::create(col_to->assume_mutable(), |
| std::move(col_null_map_to))); |
| return Status::OK(); |
| } |
| }; |
| |
| template <PrimitiveType type, typename ColumnType> |
| struct ConvertImplFromJsonb { |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| const IColumn& col_from = *col_with_type_and_name.column; |
| // result column must set type |
| DCHECK(block.get_by_position(result).type != nullptr); |
| auto data_type_to = block.get_by_position(result).type; |
| if (const ColumnString* column_string = check_and_get_column<ColumnString>(&col_from)) { |
| auto null_map_col = ColumnUInt8::create(input_rows_count, 0); |
| auto& null_map = null_map_col->get_data(); |
| auto col_to = ColumnType::create(); |
| |
| //IColumn & col_to = *res; |
| // size_t size = col_from.size(); |
| col_to->reserve(input_rows_count); |
| auto& res = col_to->get_data(); |
| res.resize(input_rows_count); |
| |
| for (size_t i = 0; i < input_rows_count; ++i) { |
| const auto& val = column_string->get_data_at(i); |
| // ReadBuffer read_buffer((char*)(val.data), val.size); |
| // RETURN_IF_ERROR(data_type_to->from_string(read_buffer, col_to)); |
| |
| if (val.size == 0) { |
| null_map[i] = 1; |
| res[i] = 0; |
| continue; |
| } |
| |
| // doc is NOT necessary to be deleted since JsonbDocument will not allocate memory |
| JsonbDocument* doc = JsonbDocument::checkAndCreateDocument(val.data, val.size); |
| if (UNLIKELY(!doc || !doc->getValue())) { |
| null_map[i] = 1; |
| res[i] = 0; |
| continue; |
| } |
| |
| // value is NOT necessary to be deleted since JsonbValue will not allocate memory |
| JsonbValue* value = doc->getValue(); |
| if (UNLIKELY(!value)) { |
| null_map[i] = 1; |
| res[i] = 0; |
| continue; |
| } |
| if constexpr (type == PrimitiveType::TYPE_BOOLEAN) { |
| // cast from json value to boolean type |
| if (value->isTrue()) { |
| res[i] = 1; |
| } else if (value->isFalse()) { |
| res[i] = 0; |
| } else if (value->isInt()) { |
| res[i] = ((const JsonbIntVal*)value)->val() == 0 ? 0 : 1; |
| } else if (value->isDouble()) { |
| res[i] = static_cast<ColumnType::value_type>( |
| ((const JsonbDoubleVal*)value)->val()) == 0 |
| ? 0 |
| : 1; |
| } else { |
| null_map[i] = 1; |
| res[i] = 0; |
| } |
| } else if constexpr (type == PrimitiveType::TYPE_TINYINT || |
| type == PrimitiveType::TYPE_SMALLINT || |
| type == PrimitiveType::TYPE_INT || |
| type == PrimitiveType::TYPE_BIGINT || |
| type == PrimitiveType::TYPE_LARGEINT) { |
| // cast from json value to integer types |
| if (value->isInt()) { |
| res[i] = ((const JsonbIntVal*)value)->val(); |
| } else if (value->isDouble()) { |
| res[i] = static_cast<ColumnType::value_type>( |
| ((const JsonbDoubleVal*)value)->val()); |
| } else if (value->isTrue()) { |
| res[i] = 1; |
| } else if (value->isFalse()) { |
| res[i] = 0; |
| } else { |
| null_map[i] = 1; |
| res[i] = 0; |
| } |
| } else if constexpr (type == PrimitiveType::TYPE_FLOAT || |
| type == PrimitiveType::TYPE_DOUBLE) { |
| // cast from json value to floating point types |
| if (value->isDouble()) { |
| res[i] = ((const JsonbDoubleVal*)value)->val(); |
| } else if (value->isFloat()) { |
| res[i] = ((const JsonbFloatVal*)value)->val(); |
| } else if (value->isTrue()) { |
| res[i] = 1; |
| } else if (value->isFalse()) { |
| res[i] = 0; |
| } else if (value->isInt()) { |
| res[i] = ((const JsonbIntVal*)value)->val(); |
| } else { |
| null_map[i] = 1; |
| res[i] = 0; |
| } |
| } else { |
| throw Exception(Status::FatalError("unsupported type")); |
| } |
| } |
| |
| block.replace_by_position( |
| result, ColumnNullable::create(std::move(col_to), std::move(null_map_col))); |
| } else { |
| return Status::RuntimeError( |
| "Illegal column {} of first argument of conversion function from string", |
| col_from.get_name()); |
| } |
| return Status::OK(); |
| } |
| }; |
| |
| template <typename ToDataType, typename Name> |
| struct ConvertImpl<DataTypeString, ToDataType, Name> { |
| template <typename Additions = void*> |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count, |
| Additions additions [[maybe_unused]] = Additions()) { |
| return Status::RuntimeError("not support convert from string"); |
| } |
| }; |
| |
| struct NameToString { |
| static constexpr auto name = "to_string"; |
| }; |
| struct NameToDecimal32 { |
| static constexpr auto name = "toDecimal32"; |
| }; |
| struct NameToDecimal64 { |
| static constexpr auto name = "toDecimal64"; |
| }; |
| struct NameToDecimal128 { |
| static constexpr auto name = "toDecimal128"; |
| }; |
| struct NameToDecimal128V3 { |
| static constexpr auto name = "toDecimal128V3"; |
| }; |
| struct NameToDecimal256 { |
| static constexpr auto name = "toDecimal256"; |
| }; |
| struct NameToUInt8 { |
| static constexpr auto name = "toUInt8"; |
| }; |
| struct NameToUInt16 { |
| static constexpr auto name = "toUInt16"; |
| }; |
| struct NameToUInt32 { |
| static constexpr auto name = "toUInt32"; |
| }; |
| struct NameToUInt64 { |
| static constexpr auto name = "toUInt64"; |
| }; |
| struct NameToInt8 { |
| static constexpr auto name = "toInt8"; |
| }; |
| struct NameToInt16 { |
| static constexpr auto name = "toInt16"; |
| }; |
| struct NameToInt32 { |
| static constexpr auto name = "toInt32"; |
| }; |
| struct NameToInt64 { |
| static constexpr auto name = "toInt64"; |
| }; |
| struct NameToInt128 { |
| static constexpr auto name = "toInt128"; |
| }; |
| struct NameToFloat32 { |
| static constexpr auto name = "toFloat32"; |
| }; |
| struct NameToFloat64 { |
| static constexpr auto name = "toFloat64"; |
| }; |
| struct NameToIPv4 { |
| static constexpr auto name = "toIPv4"; |
| }; |
| struct NameToIPv6 { |
| static constexpr auto name = "toIPv6"; |
| }; |
| struct NameToDate { |
| static constexpr auto name = "toDate"; |
| }; |
| struct NameToDateTime { |
| static constexpr auto name = "toDateTime"; |
| }; |
| |
| template <typename DataType, typename FromDataType = void*> |
| bool try_parse_impl(typename DataType::FieldType& x, ReadBuffer& rb, FunctionContext* context, |
| UInt32 scale [[maybe_unused]] = 0) { |
| if constexpr (IsDateTimeType<DataType>) { |
| return try_read_datetime_text(x, rb, context->state()->timezone_obj()); |
| } |
| |
| if constexpr (IsDateType<DataType>) { |
| return try_read_date_text(x, rb, context->state()->timezone_obj()); |
| } |
| |
| if constexpr (IsDateV2Type<DataType>) { |
| return try_read_date_v2_text(x, rb, context->state()->timezone_obj()); |
| } |
| |
| if constexpr (IsDateTimeV2Type<DataType>) { |
| return try_read_datetime_v2_text(x, rb, context->state()->timezone_obj(), scale); |
| } |
| |
| if constexpr (IsIPv4Type<DataType>) { |
| return try_read_ipv4_text(x, rb); |
| } |
| |
| if constexpr (IsIPv6Type<DataType>) { |
| return try_read_ipv6_text(x, rb); |
| } |
| |
| if constexpr (std::is_same_v<DataTypeString, FromDataType> && |
| std::is_same_v<DataTypeTimeV2, DataType>) { |
| // cast from string to time(float64) |
| auto len = rb.count(); |
| auto s = rb.position(); |
| rb.position() = rb.end(); // make is_all_read = true |
| auto ret = TimeValue::try_parse_time(s, len, x, context->state()->timezone_obj()); |
| return ret; |
| } |
| if constexpr (std::is_floating_point_v<typename DataType::FieldType>) { |
| return try_read_float_text(x, rb); |
| } |
| |
| // uint8_t now use as boolean in doris |
| if constexpr (std::is_same_v<typename DataType::FieldType, uint8_t>) { |
| return try_read_bool_text(x, rb); |
| } |
| |
| if constexpr (std::is_integral_v<typename DataType::FieldType>) { |
| return try_read_int_text(x, rb); |
| } |
| } |
| |
| template <typename DataType, typename Additions = void*> |
| StringParser::ParseResult try_parse_decimal_impl(typename DataType::FieldType& x, ReadBuffer& rb, |
| Additions additions |
| [[maybe_unused]] = Additions()) { |
| if constexpr (IsDataTypeDecimalV2<DataType>) { |
| UInt32 scale = ((PrecisionScaleArg)additions).scale; |
| UInt32 precision = ((PrecisionScaleArg)additions).precision; |
| return try_read_decimal_text<TYPE_DECIMALV2>(x, rb, precision, scale); |
| } |
| |
| if constexpr (std::is_same_v<DataTypeDecimal32, DataType>) { |
| UInt32 scale = ((PrecisionScaleArg)additions).scale; |
| UInt32 precision = ((PrecisionScaleArg)additions).precision; |
| return try_read_decimal_text<TYPE_DECIMAL32>(x, rb, precision, scale); |
| } |
| |
| if constexpr (std::is_same_v<DataTypeDecimal64, DataType>) { |
| UInt32 scale = ((PrecisionScaleArg)additions).scale; |
| UInt32 precision = ((PrecisionScaleArg)additions).precision; |
| return try_read_decimal_text<TYPE_DECIMAL64>(x, rb, precision, scale); |
| } |
| |
| if constexpr (IsDataTypeDecimal128V3<DataType>) { |
| UInt32 scale = ((PrecisionScaleArg)additions).scale; |
| UInt32 precision = ((PrecisionScaleArg)additions).precision; |
| return try_read_decimal_text<TYPE_DECIMAL128I>(x, rb, precision, scale); |
| } |
| |
| if constexpr (IsDataTypeDecimal256<DataType>) { |
| UInt32 scale = ((PrecisionScaleArg)additions).scale; |
| UInt32 precision = ((PrecisionScaleArg)additions).precision; |
| return try_read_decimal_text<TYPE_DECIMAL256>(x, rb, precision, scale); |
| } |
| } |
| |
| template <typename ToDataType, typename Name> |
| class FunctionConvert : public IFunction { |
| public: |
| static constexpr auto name = Name::name; |
| |
| static FunctionPtr create() { return std::make_shared<FunctionConvert>(); } |
| |
| String get_name() const override { return name; } |
| |
| bool is_variadic() const override { return true; } |
| size_t get_number_of_arguments() const override { return 0; } |
| |
| // This function should not be called for get DateType Ptr |
| // using the FunctionCast::get_return_type_impl |
| DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
| return std::make_shared<ToDataType>(); |
| } |
| |
| ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
| |
| Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count) const override { |
| if (!arguments.size()) { |
| return Status::RuntimeError("Function {} expects at least 1 arguments", get_name()); |
| } |
| |
| const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); |
| |
| Status ret_status; |
| /// Generic conversion of any type to String. |
| if constexpr (std::is_same_v<ToDataType, DataTypeString>) { |
| return ConvertImplGenericToString::execute(block, arguments, result); |
| } else { |
| auto call = [&](const auto& types) -> bool { |
| using Types = std::decay_t<decltype(types)>; |
| using LeftDataType = typename Types::LeftType; |
| using RightDataType = typename Types::RightType; |
| |
| // now, cast to decimal do not execute the code |
| if constexpr (IsDataTypeDecimal<RightDataType>) { |
| if (arguments.size() != 2) { |
| ret_status = Status::RuntimeError( |
| "Function {} expects 2 arguments for Decimal.", get_name()); |
| return true; |
| } |
| |
| const ColumnWithTypeAndName& scale_column = block.get_by_position(result); |
| ret_status = ConvertImpl<LeftDataType, RightDataType, Name>::execute( |
| context, block, arguments, result, input_rows_count, |
| scale_column.type->get_scale()); |
| } else if constexpr (IsDataTypeDateTimeV2<RightDataType>) { |
| const ColumnWithTypeAndName& scale_column = block.get_by_position(result); |
| ret_status = ConvertImpl<LeftDataType, RightDataType, Name>::execute( |
| context, block, arguments, result, input_rows_count, |
| scale_column.type->get_scale()); |
| } else { |
| ret_status = ConvertImpl<LeftDataType, RightDataType, Name>::execute( |
| context, block, arguments, result, input_rows_count); |
| } |
| return true; |
| }; |
| |
| bool done = |
| call_on_index_and_data_type<ToDataType>(from_type->get_primitive_type(), call); |
| if (!done) { |
| ret_status = Status::RuntimeError( |
| "Illegal type {} of argument of function {}", |
| block.get_by_position(arguments[0]).type->get_name(), get_name()); |
| } |
| return ret_status; |
| } |
| } |
| }; |
| |
| using FunctionToUInt8 = FunctionConvert<DataTypeUInt8, NameToUInt8>; |
| using FunctionToInt8 = FunctionConvert<DataTypeInt8, NameToInt8>; |
| using FunctionToInt16 = FunctionConvert<DataTypeInt16, NameToInt16>; |
| using FunctionToInt32 = FunctionConvert<DataTypeInt32, NameToInt32>; |
| using FunctionToInt64 = FunctionConvert<DataTypeInt64, NameToInt64>; |
| using FunctionToInt128 = FunctionConvert<DataTypeInt128, NameToInt128>; |
| using FunctionToFloat32 = FunctionConvert<DataTypeFloat32, NameToFloat32>; |
| using FunctionToFloat64 = FunctionConvert<DataTypeFloat64, NameToFloat64>; |
| |
| using FunctionToTimeV2 = FunctionConvert<DataTypeTimeV2, NameToFloat64>; |
| using FunctionToString = FunctionConvert<DataTypeString, NameToString>; |
| using FunctionToDecimal32 = FunctionConvert<DataTypeDecimal32, NameToDecimal32>; |
| using FunctionToDecimal64 = FunctionConvert<DataTypeDecimal64, NameToDecimal64>; |
| using FunctionToDecimal128 = FunctionConvert<DataTypeDecimalV2, NameToDecimal128>; |
| using FunctionToDecimal128V3 = FunctionConvert<DataTypeDecimal128, NameToDecimal128V3>; |
| using FunctionToDecimal256 = FunctionConvert<DataTypeDecimal256, NameToDecimal256>; |
| using FunctionToIPv4 = FunctionConvert<DataTypeIPv4, NameToIPv4>; |
| using FunctionToIPv6 = FunctionConvert<DataTypeIPv6, NameToIPv6>; |
| using FunctionToDate = FunctionConvert<DataTypeDate, NameToDate>; |
| using FunctionToDateTime = FunctionConvert<DataTypeDateTime, NameToDateTime>; |
| using FunctionToDateV2 = FunctionConvert<DataTypeDateV2, NameToDate>; |
| using FunctionToDateTimeV2 = FunctionConvert<DataTypeDateTimeV2, NameToDateTime>; |
| |
| template <typename DataType> |
| struct FunctionTo; |
| |
| template <> |
| struct FunctionTo<DataTypeUInt8> { |
| using Type = FunctionToUInt8; |
| }; |
| template <> |
| struct FunctionTo<DataTypeInt8> { |
| using Type = FunctionToInt8; |
| }; |
| template <> |
| struct FunctionTo<DataTypeInt16> { |
| using Type = FunctionToInt16; |
| }; |
| template <> |
| struct FunctionTo<DataTypeInt32> { |
| using Type = FunctionToInt32; |
| }; |
| template <> |
| struct FunctionTo<DataTypeInt64> { |
| using Type = FunctionToInt64; |
| }; |
| template <> |
| struct FunctionTo<DataTypeInt128> { |
| using Type = FunctionToInt128; |
| }; |
| template <> |
| struct FunctionTo<DataTypeFloat32> { |
| using Type = FunctionToFloat32; |
| }; |
| template <> |
| struct FunctionTo<DataTypeFloat64> { |
| using Type = FunctionToFloat64; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDecimal32> { |
| using Type = FunctionToDecimal32; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDecimal64> { |
| using Type = FunctionToDecimal64; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDecimalV2> { |
| using Type = FunctionToDecimal128; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDecimal128> { |
| using Type = FunctionToDecimal128V3; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDecimal256> { |
| using Type = FunctionToDecimal256; |
| }; |
| template <> |
| struct FunctionTo<DataTypeIPv4> { |
| using Type = FunctionToIPv4; |
| }; |
| template <> |
| struct FunctionTo<DataTypeIPv6> { |
| using Type = FunctionToIPv6; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDate> { |
| using Type = FunctionToDate; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDateTime> { |
| using Type = FunctionToDateTime; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDateV2> { |
| using Type = FunctionToDateV2; |
| }; |
| template <> |
| struct FunctionTo<DataTypeDateTimeV2> { |
| using Type = FunctionToDateTimeV2; |
| }; |
| template <> |
| struct FunctionTo<DataTypeTimeV2> { |
| using Type = FunctionToTimeV2; |
| }; |
| class PreparedFunctionCast : public PreparedFunctionImpl { |
| public: |
| using WrapperType = std::function<Status(FunctionContext* context, Block&, const ColumnNumbers&, |
| size_t, size_t)>; |
| |
| explicit PreparedFunctionCast(WrapperType&& wrapper_function_, const char* name_) |
| : wrapper_function(std::move(wrapper_function_)), name(name_) {} |
| |
| String get_name() const override { return name; } |
| |
| protected: |
| Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count) const override { |
| return wrapper_function(context, block, arguments, result, input_rows_count); |
| } |
| |
| bool use_default_implementation_for_nulls() const override { return false; } |
| bool use_default_implementation_for_low_cardinality_columns() const override { return false; } |
| ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
| |
| private: |
| WrapperType wrapper_function; |
| const char* name; |
| }; |
| |
| // always from DataTypeString |
| template <typename ToDataType, typename Name> |
| struct StringParsing { |
| using ToFieldType = typename ToDataType::FieldType; |
| |
| static bool is_all_read(ReadBuffer& in) { return in.eof(); } |
| |
| template <typename Additions = void*> |
| static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count, |
| Additions additions [[maybe_unused]] = Additions()) { |
| using ColVecTo = |
| std::conditional_t<IsDecimalNumber<ToFieldType>, ColumnDecimal<ToDataType::PType>, |
| ColumnVector<ToDataType::PType>>; |
| |
| const IColumn* col_from = block.get_by_position(arguments[0]).column.get(); |
| const auto* col_from_string = check_and_get_column<ColumnString>(col_from); |
| |
| if (!col_from_string) { |
| return Status::RuntimeError("Illegal column {} of first argument of function {}", |
| col_from->get_name(), Name::name); |
| } |
| |
| size_t row = input_rows_count; |
| typename ColVecTo::MutablePtr col_to = nullptr; |
| |
| if constexpr (IsDataTypeDecimal<ToDataType>) { |
| UInt32 scale = ((PrecisionScaleArg)additions).scale; |
| ToDataType::check_type_scale(scale); |
| col_to = ColVecTo::create(row, scale); |
| } else { |
| col_to = ColVecTo::create(row); |
| } |
| |
| typename ColVecTo::Container& vec_to = col_to->get_data(); |
| |
| ColumnUInt8::MutablePtr col_null_map_to; |
| ColumnUInt8::Container* vec_null_map_to [[maybe_unused]] = nullptr; |
| col_null_map_to = ColumnUInt8::create(row, 0); |
| vec_null_map_to = &col_null_map_to->get_data(); |
| |
| const ColumnString::Chars* chars = &col_from_string->get_chars(); |
| const IColumn::Offsets* offsets = &col_from_string->get_offsets(); |
| |
| [[maybe_unused]] UInt32 scale = 0; |
| // TODO: TimeV2 type also need scale |
| if constexpr (IsDataTypeDateTimeV2<ToDataType>) { |
| const auto* type = assert_cast<const DataTypeDateTimeV2*>( |
| block.get_by_position(result).type.get()); |
| scale = type->get_scale(); |
| } |
| |
| size_t current_offset = 0; |
| |
| for (size_t i = 0; i < row; ++i) { |
| size_t next_offset = (*offsets)[i]; |
| size_t string_size = next_offset - current_offset; |
| |
| ReadBuffer read_buffer(&(*chars)[current_offset], string_size); |
| |
| bool parsed; |
| if constexpr (IsDataTypeDecimal<ToDataType>) { |
| ToDataType::check_type_precision((PrecisionScaleArg(additions).precision)); |
| StringParser::ParseResult res = try_parse_decimal_impl<ToDataType>( |
| vec_to[i], read_buffer, PrecisionScaleArg(additions)); |
| parsed = (res == StringParser::PARSE_SUCCESS || |
| res == StringParser::PARSE_OVERFLOW || |
| res == StringParser::PARSE_UNDERFLOW); |
| } else if constexpr (IsDataTypeDateTimeV2<ToDataType>) { |
| parsed = try_parse_impl<ToDataType>(vec_to[i], read_buffer, context, scale); |
| } else { |
| parsed = |
| try_parse_impl<ToDataType, DataTypeString>(vec_to[i], read_buffer, context); |
| } |
| (*vec_null_map_to)[i] = !parsed || !is_all_read(read_buffer); |
| current_offset = next_offset; |
| } |
| |
| block.get_by_position(result).column = |
| ColumnNullable::create(std::move(col_to), std::move(col_null_map_to)); |
| return Status::OK(); |
| } |
| }; |
| |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeDecimal32, Name> |
| : StringParsing<DataTypeDecimal32, Name> {}; |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeDecimal64, Name> |
| : StringParsing<DataTypeDecimal64, Name> {}; |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeDecimalV2, Name> |
| : StringParsing<DataTypeDecimalV2, Name> {}; |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeDecimal128, Name> |
| : StringParsing<DataTypeDecimal128, Name> {}; |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeDecimal256, Name> |
| : StringParsing<DataTypeDecimal256, Name> {}; |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeIPv4, Name> : StringParsing<DataTypeIPv4, Name> {}; |
| template <typename Name> |
| struct ConvertImpl<DataTypeString, DataTypeIPv6, Name> : StringParsing<DataTypeIPv6, Name> {}; |
| |
| struct NameCast { |
| static constexpr auto name = "CAST"; |
| }; |
| |
| template <typename ToDataType, typename Name> |
| class FunctionConvertFromString : public IFunction { |
| public: |
| static constexpr auto name = Name::name; |
| static FunctionPtr create() { return std::make_shared<FunctionConvertFromString>(); } |
| String get_name() const override { return name; } |
| |
| bool is_variadic() const override { return true; } |
| size_t get_number_of_arguments() const override { return 0; } |
| |
| ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
| |
| // This function should not be called for get DateType Ptr |
| // using the FunctionCast::get_return_type_impl |
| DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
| DataTypePtr res; |
| if constexpr (IsDataTypeDecimal<ToDataType>) { |
| auto error_type = std::make_shared<ToDataType>(); |
| throw doris::Exception(ErrorCode::INVALID_ARGUMENT, |
| "something wrong type in function {}.", get_name(), |
| error_type->get_name()); |
| } else { |
| res = std::make_shared<ToDataType>(); |
| } |
| |
| return res; |
| } |
| |
| Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count) const override { |
| const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); |
| |
| if (check_and_get_data_type<DataTypeString>(from_type)) { |
| return StringParsing<ToDataType, Name>::execute(context, block, arguments, result, |
| input_rows_count); |
| } |
| |
| return Status::RuntimeError( |
| "Illegal type {} of argument of function {} . Only String or FixedString " |
| "argument is accepted for try-conversion function. For other arguments, use " |
| "function without 'orZero' or 'orNull'.", |
| block.get_by_position(arguments[0]).type->get_name(), get_name()); |
| } |
| }; |
| |
| template <typename ToDataType, typename Name> |
| class FunctionConvertToTimeType : public IFunction { |
| public: |
| static constexpr auto name = Name::name; |
| static FunctionPtr create() { return std::make_shared<FunctionConvertToTimeType>(); } |
| |
| String get_name() const override { return name; } |
| |
| bool is_variadic() const override { return true; } |
| size_t get_number_of_arguments() const override { return 0; } |
| |
| ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
| |
| // This function should not be called for get DateType Ptr |
| // using the FunctionCast::get_return_type_impl |
| DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
| return std::make_shared<ToDataType>(); |
| } |
| |
| Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count) const override { |
| Status ret_status = Status::OK(); |
| const IDataType* from_type = block.get_by_position(arguments[0]).type.get(); |
| auto call = [&](const auto& types) -> bool { |
| using Types = std::decay_t<decltype(types)>; |
| using LeftDataType = typename Types::LeftType; |
| using RightDataType = typename Types::RightType; |
| |
| ret_status = ConvertImplToTimeType<LeftDataType, RightDataType, Name>::execute( |
| context, block, arguments, result, input_rows_count); |
| return true; |
| }; |
| |
| bool done = call_on_index_and_number_data_type<ToDataType>(from_type->get_primitive_type(), |
| call); |
| if (!done) { |
| return Status::RuntimeError("Illegal type {} of argument of function {}", |
| block.get_by_position(arguments[0]).type->get_name(), |
| get_name()); |
| } |
| |
| return ret_status; |
| } |
| }; |
| |
| class FunctionCast final : public IFunctionBase { |
| public: |
| using WrapperType = |
| std::function<Status(FunctionContext*, Block&, const ColumnNumbers&, size_t, size_t)>; |
| using ElementWrappers = std::vector<WrapperType>; |
| |
| FunctionCast(const char* name_, const DataTypes& argument_types_, |
| const DataTypePtr& return_type_) |
| : name(name_), argument_types(argument_types_), return_type(return_type_) {} |
| |
| const DataTypes& get_argument_types() const override { return argument_types; } |
| const DataTypePtr& get_return_type() const override { return return_type; } |
| |
| PreparedFunctionPtr prepare(FunctionContext* context, const Block& /*sample_block*/, |
| const ColumnNumbers& /*arguments*/, |
| uint32_t /*result*/) const override { |
| return std::make_shared<PreparedFunctionCast>( |
| prepare_unpack_dictionaries(context, get_argument_types()[0], get_return_type()), |
| name); |
| } |
| |
| String get_name() const override { return name; } |
| |
| bool is_use_default_implementation_for_constants() const override { return true; } |
| |
| private: |
| const char* name = nullptr; |
| |
| DataTypes argument_types; |
| DataTypePtr return_type; |
| |
| template <typename DataType> |
| WrapperType create_wrapper(const DataTypePtr& from_type, const DataType* const, |
| bool requested_result_is_nullable) const { |
| FunctionPtr function; |
| |
| if (requested_result_is_nullable && |
| check_and_get_data_type<DataTypeString>(from_type.get())) { |
| /// In case when converting to Nullable type, we apply different parsing rule, |
| /// that will not throw an exception but return NULL in case of malformed input. |
| function = FunctionConvertFromString<DataType, NameCast>::create(); |
| } else if (requested_result_is_nullable && |
| (IsDatelikeV1Types<DataType> || IsDatelikeV2Types<DataType>)&&!( |
| check_and_get_data_type<DataTypeDateTime>(from_type.get()) || |
| check_and_get_data_type<DataTypeDate>(from_type.get()) || |
| check_and_get_data_type<DataTypeDateV2>(from_type.get()) || |
| check_and_get_data_type<DataTypeDateTimeV2>(from_type.get()))) { |
| function = FunctionConvertToTimeType<DataType, NameCast>::create(); |
| } else { |
| function = FunctionTo<DataType>::Type::create(); |
| } |
| |
| /// Check conversion using underlying function |
| { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); } |
| |
| return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| return function->execute(context, block, arguments, result, input_rows_count); |
| }; |
| } |
| |
| WrapperType create_string_wrapper(const DataTypePtr& from_type) const { |
| FunctionPtr function = FunctionToString::create(); |
| |
| /// Check conversion using underlying function |
| { function->get_return_type(ColumnsWithTypeAndName(1, {nullptr, from_type, ""})); } |
| |
| return [function](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) { |
| return function->execute(context, block, arguments, result, input_rows_count); |
| }; |
| } |
| |
| template <PrimitiveType FieldType> |
| WrapperType create_decimal_wrapper(const DataTypePtr& from_type, |
| const DataTypeDecimal<FieldType>* to_type) const { |
| using ToDataType = DataTypeDecimal<FieldType>; |
| |
| auto type = from_type->get_primitive_type(); |
| UInt32 precision = to_type->get_precision(); |
| UInt32 scale = to_type->get_scale(); |
| |
| bool ok = is_int_or_bool(type) || is_decimal(type) || is_float_or_double(type) || |
| is_date_type(type) || is_string_type(type); |
| if (!ok) { |
| return create_unsupport_wrapper(from_type->get_name(), to_type->get_name()); |
| } |
| |
| return [type, precision, scale](FunctionContext* context, Block& block, |
| const ColumnNumbers& arguments, const uint32_t result, |
| size_t input_rows_count) { |
| auto res = |
| call_on_index_and_data_type<ToDataType>(type, [&](const auto& types) -> bool { |
| using Types = std::decay_t<decltype(types)>; |
| using LeftDataType = typename Types::LeftType; |
| using RightDataType = typename Types::RightType; |
| |
| auto state = ConvertImpl<LeftDataType, RightDataType, NameCast>::execute( |
| context, block, arguments, result, input_rows_count, |
| PrecisionScaleArg {precision, scale}); |
| if (!state) { |
| throw Exception(state.code(), state.to_string()); |
| } |
| return true; |
| }); |
| |
| /// Additionally check if call_on_index_and_data_type wasn't called at all. |
| if (!res) { |
| auto to = DataTypeDecimal<FieldType>(precision, scale); |
| return Status::RuntimeError("Conversion from {} to {} is not supported", |
| type_to_string(type), to.get_name()); |
| } |
| return Status::OK(); |
| }; |
| } |
| |
| WrapperType create_identity_wrapper(const DataTypePtr&) const { |
| return [](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t /*input_rows_count*/) { |
| block.get_by_position(result).column = block.get_by_position(arguments.front()).column; |
| return Status::OK(); |
| }; |
| } |
| |
| WrapperType create_nothing_wrapper(const IDataType* to_type) const { |
| ColumnPtr res = to_type->create_column_const_with_default_value(1); |
| return [res](FunctionContext* context, Block& block, const ColumnNumbers&, |
| const uint32_t result, size_t input_rows_count) { |
| /// Column of Nothing type is trivially convertible to any other column |
| block.get_by_position(result).column = |
| res->clone_resized(input_rows_count)->convert_to_full_column_if_const(); |
| return Status::OK(); |
| }; |
| } |
| |
| WrapperType create_unsupport_wrapper(const String error_msg) const { |
| return [error_msg](FunctionContext* /*context*/, Block& /*block*/, |
| const ColumnNumbers& /*arguments*/, const size_t /*result*/, |
| size_t /*input_rows_count*/) { |
| return Status::InvalidArgument(error_msg); |
| }; |
| } |
| |
| WrapperType create_unsupport_wrapper(const String from_type_name, |
| const String to_type_name) const { |
| const String error_msg = fmt::format("Conversion from {} to {} is not supported", |
| from_type_name, to_type_name); |
| return create_unsupport_wrapper(error_msg); |
| } |
| |
| WrapperType create_hll_wrapper(FunctionContext* context, const DataTypePtr& from_type_untyped, |
| const DataTypeHLL& to_type) const { |
| /// Conversion from String through parsing. |
| if (check_and_get_data_type<DataTypeString>(from_type_untyped.get())) { |
| return &ConvertImplGenericFromString::execute; |
| } |
| |
| //TODO if from is not string, it must be HLL? |
| const auto* from_type = check_and_get_data_type<DataTypeHLL>(from_type_untyped.get()); |
| |
| if (!from_type) { |
| return create_unsupport_wrapper( |
| "CAST AS HLL can only be performed between HLL, String " |
| "types"); |
| } |
| |
| return nullptr; |
| } |
| |
| WrapperType create_bitmap_wrapper(FunctionContext* context, |
| const DataTypePtr& from_type_untyped, |
| const DataTypeBitMap& to_type) const { |
| /// Conversion from String through parsing. |
| if (check_and_get_data_type<DataTypeString>(from_type_untyped.get())) { |
| return &ConvertImplGenericFromString::execute; |
| } |
| |
| //TODO if from is not string, it must be BITMAP? |
| const auto* from_type = check_and_get_data_type<DataTypeBitMap>(from_type_untyped.get()); |
| |
| if (!from_type) { |
| return create_unsupport_wrapper( |
| "CAST AS BITMAP can only be performed between BITMAP, String " |
| "types"); |
| } |
| |
| return nullptr; |
| } |
| |
| WrapperType create_array_wrapper(FunctionContext* context, const DataTypePtr& from_type_untyped, |
| const DataTypeArray& to_type) const { |
| /// Conversion from String through parsing. |
| if (check_and_get_data_type<DataTypeString>(from_type_untyped.get())) { |
| return &ConvertImplGenericFromString::execute; |
| } |
| |
| const auto* from_type = check_and_get_data_type<DataTypeArray>(from_type_untyped.get()); |
| |
| if (!from_type) { |
| return create_unsupport_wrapper( |
| "CAST AS Array can only be performed between same-dimensional Array, String " |
| "types"); |
| } |
| |
| DataTypePtr from_nested_type = from_type->get_nested_type(); |
| |
| /// In query SELECT CAST([] AS Array(Array(String))) from type is Array(Nothing) |
| bool from_empty_array = from_nested_type->get_primitive_type() == INVALID_TYPE; |
| |
| if (from_type->get_number_of_dimensions() != to_type.get_number_of_dimensions() && |
| !from_empty_array) { |
| return create_unsupport_wrapper( |
| "CAST AS Array can only be performed between same-dimensional array types"); |
| } |
| |
| const DataTypePtr& to_nested_type = to_type.get_nested_type(); |
| |
| /// Prepare nested type conversion |
| const auto nested_function = |
| prepare_unpack_dictionaries(context, from_nested_type, to_nested_type); |
| |
| return [nested_function, from_nested_type, to_nested_type]( |
| FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t /*input_rows_count*/) -> Status { |
| ColumnPtr from_column = block.get_by_position(arguments.front()).column; |
| |
| const ColumnArray* from_col_array = |
| check_and_get_column<ColumnArray>(from_column.get()); |
| |
| if (from_col_array) { |
| /// create columns for converting nested column containing original and result columns |
| ColumnWithTypeAndName from_nested_column {from_col_array->get_data_ptr(), |
| from_nested_type, ""}; |
| |
| /// convert nested column |
| ColumnNumbers new_arguments {block.columns()}; |
| block.insert(from_nested_column); |
| |
| size_t nested_result = block.columns(); |
| block.insert({to_nested_type, ""}); |
| RETURN_IF_ERROR(nested_function(context, block, new_arguments, nested_result, |
| from_col_array->get_data_ptr()->size())); |
| auto nested_result_column = block.get_by_position(nested_result).column; |
| |
| /// set converted nested column to result |
| block.get_by_position(result).column = ColumnArray::create( |
| nested_result_column, from_col_array->get_offsets_ptr()); |
| } else { |
| return Status::RuntimeError("Illegal column {} for function CAST AS Array", |
| from_column->get_name()); |
| } |
| return Status::OK(); |
| }; |
| } |
| |
| // check jsonb value type and get to_type value |
| WrapperType create_jsonb_wrapper(const DataTypeJsonb& from_type, const DataTypePtr& to_type, |
| bool jsonb_string_as_string) const { |
| switch (to_type->get_primitive_type()) { |
| case PrimitiveType::TYPE_BOOLEAN: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_BOOLEAN, ColumnUInt8>::execute; |
| case PrimitiveType::TYPE_TINYINT: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_TINYINT, ColumnInt8>::execute; |
| case PrimitiveType::TYPE_SMALLINT: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_SMALLINT, ColumnInt16>::execute; |
| case PrimitiveType::TYPE_INT: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_INT, ColumnInt32>::execute; |
| case PrimitiveType::TYPE_BIGINT: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_BIGINT, ColumnInt64>::execute; |
| case PrimitiveType::TYPE_LARGEINT: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_LARGEINT, ColumnInt128>::execute; |
| case PrimitiveType::TYPE_DOUBLE: |
| return &ConvertImplFromJsonb<PrimitiveType::TYPE_DOUBLE, ColumnFloat64>::execute; |
| case PrimitiveType::TYPE_STRING: |
| case PrimitiveType::TYPE_CHAR: |
| case PrimitiveType::TYPE_VARCHAR: |
| if (!jsonb_string_as_string) { |
| // Conversion from String through parsing. |
| return &ConvertImplGenericToString::execute2; |
| } else { |
| return ConvertImplGenericFromJsonb::execute; |
| } |
| default: |
| return ConvertImplGenericFromJsonb::execute; |
| } |
| } |
| |
| // create cresponding jsonb value with type to_type |
| // use jsonb writer to create jsonb value |
| WrapperType create_jsonb_wrapper(const DataTypePtr& from_type, const DataTypeJsonb& to_type, |
| bool string_as_jsonb_string) const { |
| switch (from_type->get_primitive_type()) { |
| case PrimitiveType::TYPE_BOOLEAN: |
| return &ConvertImplNumberToJsonb<ColumnUInt8>::execute; |
| case PrimitiveType::TYPE_TINYINT: |
| return &ConvertImplNumberToJsonb<ColumnInt8>::execute; |
| case PrimitiveType::TYPE_SMALLINT: |
| return &ConvertImplNumberToJsonb<ColumnInt16>::execute; |
| case PrimitiveType::TYPE_INT: |
| return &ConvertImplNumberToJsonb<ColumnInt32>::execute; |
| case PrimitiveType::TYPE_BIGINT: |
| return &ConvertImplNumberToJsonb<ColumnInt64>::execute; |
| case PrimitiveType::TYPE_LARGEINT: |
| return &ConvertImplNumberToJsonb<ColumnInt128>::execute; |
| case PrimitiveType::TYPE_DOUBLE: |
| return &ConvertImplNumberToJsonb<ColumnFloat64>::execute; |
| case PrimitiveType::TYPE_STRING: |
| case PrimitiveType::TYPE_CHAR: |
| case PrimitiveType::TYPE_VARCHAR: |
| if (string_as_jsonb_string) { |
| // We convert column string to jsonb type just add a string jsonb field to dst column instead of parse |
| // each line in original string column. |
| return &ConvertImplStringToJsonbAsJsonbString::execute; |
| } else { |
| return &ConvertImplGenericFromString::execute; |
| } |
| case PrimitiveType::INVALID_TYPE: |
| return &ConvertNothingToJsonb::execute; |
| default: |
| return &ConvertImplGenericToJsonb::execute; |
| } |
| } |
| |
| struct ConvertImplGenericFromVariant { |
| static Status execute(const FunctionCast* fn, FunctionContext* context, Block& block, |
| const ColumnNumbers& arguments, const uint32_t result, |
| size_t input_rows_count) { |
| auto& data_type_to = block.get_by_position(result).type; |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| auto& col_from = col_with_type_and_name.column; |
| auto& variant = assert_cast<const ColumnVariant&>(*col_from); |
| ColumnPtr col_to = data_type_to->create_column(); |
| if (!variant.is_finalized()) { |
| // ColumnObject should be finalized before parsing, finalize maybe modify original column structure |
| variant.assume_mutable()->finalize(); |
| } |
| // It's important to convert as many elements as possible in this context. For instance, |
| // if the root of this variant column is a number column, converting it to a number column |
| // is acceptable. However, if the destination type is a string and root is none scalar root, then |
| // we should convert the entire tree to a string. |
| bool is_root_valuable = |
| variant.is_scalar_variant() || |
| (!variant.is_null_root() && |
| variant.get_root_type()->get_primitive_type() != INVALID_TYPE && |
| !is_string_type(data_type_to->get_primitive_type()) && |
| data_type_to->get_primitive_type() != TYPE_JSONB); |
| if (is_root_valuable) { |
| ColumnPtr nested = variant.get_root(); |
| auto nested_from_type = variant.get_root_type(); |
| // DCHECK(nested_from_type->is_nullable()); |
| DCHECK(!data_type_to->is_nullable()); |
| auto new_context = context->clone(); |
| new_context->set_jsonb_string_as_string(true); |
| // dst type nullable has been removed, so we should remove the inner nullable of root column |
| auto wrapper = fn->prepare_impl( |
| new_context.get(), remove_nullable(nested_from_type), data_type_to, true); |
| Block tmp_block {{remove_nullable(nested), remove_nullable(nested_from_type), ""}}; |
| tmp_block.insert({nullptr, data_type_to, ""}); |
| /// Perform the requested conversion. |
| Status st = wrapper(new_context.get(), tmp_block, {0}, 1, input_rows_count); |
| if (!st.ok()) { |
| // Fill with default values, which is null |
| col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
| col_to = make_nullable(col_to, true); |
| } else { |
| col_to = tmp_block.get_by_position(1).column; |
| // Note: here we should return the nullable result column |
| col_to = wrap_in_nullable( |
| col_to, |
| Block({{nested, nested_from_type, ""}, {col_to, data_type_to, ""}}), |
| {0}, 1, input_rows_count); |
| } |
| } else { |
| if (variant.empty()) { |
| // TODO not found root cause, a tmp fix |
| col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
| col_to = make_nullable(col_to, true); |
| } else if (is_string_type(data_type_to->get_primitive_type())) { |
| // serialize to string |
| return ConvertImplGenericToString::execute2(context, block, arguments, result, |
| input_rows_count); |
| } else if (data_type_to->get_primitive_type() == TYPE_JSONB) { |
| // serialize to json by parsing |
| return ConvertImplGenericToJsonb::execute(context, block, arguments, result, |
| input_rows_count); |
| } else if (!data_type_to->is_nullable() && |
| !is_string_type(data_type_to->get_primitive_type())) { |
| // other types |
| col_to->assume_mutable()->insert_many_defaults(input_rows_count); |
| col_to = make_nullable(col_to, true); |
| } else { |
| assert_cast<ColumnNullable&>(*col_to->assume_mutable()) |
| .insert_many_defaults(input_rows_count); |
| } |
| } |
| if (col_to->size() != input_rows_count) { |
| return Status::InternalError("Unmatched row count {}, expected {}", col_to->size(), |
| input_rows_count); |
| } |
| |
| block.replace_by_position(result, std::move(col_to)); |
| return Status::OK(); |
| } |
| }; |
| |
| struct ConvertImplGenericToVariant { |
| static Status execute(FunctionContext* context, Block& block, |
| const ColumnNumbers& arguments, const uint32_t result, |
| size_t input_rows_count) { |
| // auto& data_type_to = block.get_by_position(result).type; |
| const auto& col_with_type_and_name = block.get_by_position(arguments[0]); |
| auto& from_type = col_with_type_and_name.type; |
| auto& col_from = col_with_type_and_name.column; |
| // set variant root column/type to from column/type |
| auto variant = ColumnVariant::create(true /*always nullable*/); |
| variant->create_root(from_type, col_from->assume_mutable()); |
| block.replace_by_position(result, std::move(variant)); |
| return Status::OK(); |
| } |
| }; |
| |
| // create cresponding variant value to wrap from_type |
| WrapperType create_variant_wrapper(const DataTypePtr& from_type, |
| const DataTypeVariant& to_type) const { |
| return &ConvertImplGenericToVariant::execute; |
| } |
| |
| // create cresponding type convert from variant |
| WrapperType create_variant_wrapper(const DataTypeVariant& from_type, |
| const DataTypePtr& to_type) const { |
| return [this](FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t input_rows_count) -> Status { |
| return ConvertImplGenericFromVariant::execute(this, context, block, arguments, result, |
| input_rows_count); |
| }; |
| } |
| |
| //TODO(Amory) . Need support more cast for key , value for map |
| WrapperType create_map_wrapper(FunctionContext* context, const DataTypePtr& from_type, |
| const DataTypeMap& to_type) const { |
| if (is_string_type(from_type->get_primitive_type())) { |
| return &ConvertImplGenericFromString::execute; |
| } |
| auto from = check_and_get_data_type<DataTypeMap>(from_type.get()); |
| if (!from) { |
| return create_unsupport_wrapper( |
| fmt::format("CAST AS Map can only be performed between Map types or from " |
| "String. from type: {}, to type: {}", |
| from_type->get_name(), to_type.get_name())); |
| } |
| DataTypes from_kv_types; |
| DataTypes to_kv_types; |
| from_kv_types.reserve(2); |
| to_kv_types.reserve(2); |
| from_kv_types.push_back(from->get_key_type()); |
| from_kv_types.push_back(from->get_value_type()); |
| to_kv_types.push_back(to_type.get_key_type()); |
| to_kv_types.push_back(to_type.get_value_type()); |
| |
| auto kv_wrappers = get_element_wrappers(context, from_kv_types, to_kv_types); |
| return [kv_wrappers, from_kv_types, to_kv_types]( |
| FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t /*input_rows_count*/) -> Status { |
| auto& from_column = block.get_by_position(arguments.front()).column; |
| auto from_col_map = check_and_get_column<ColumnMap>(from_column.get()); |
| if (!from_col_map) { |
| return Status::RuntimeError("Illegal column {} for function CAST AS MAP", |
| from_column->get_name()); |
| } |
| |
| Columns converted_columns(2); |
| ColumnsWithTypeAndName columnsWithTypeAndName(2); |
| columnsWithTypeAndName[0] = {from_col_map->get_keys_ptr(), from_kv_types[0], ""}; |
| columnsWithTypeAndName[1] = {from_col_map->get_values_ptr(), from_kv_types[1], ""}; |
| |
| for (size_t i = 0; i < 2; ++i) { |
| ColumnNumbers element_arguments {block.columns()}; |
| block.insert(columnsWithTypeAndName[i]); |
| size_t element_result = block.columns(); |
| block.insert({to_kv_types[i], ""}); |
| RETURN_IF_ERROR(kv_wrappers[i](context, block, element_arguments, element_result, |
| columnsWithTypeAndName[i].column->size())); |
| converted_columns[i] = block.get_by_position(element_result).column; |
| } |
| |
| block.get_by_position(result).column = ColumnMap::create( |
| converted_columns[0], converted_columns[1], from_col_map->get_offsets_ptr()); |
| return Status::OK(); |
| }; |
| } |
| |
| ElementWrappers get_element_wrappers(FunctionContext* context, |
| const DataTypes& from_element_types, |
| const DataTypes& to_element_types) const { |
| DCHECK(from_element_types.size() == to_element_types.size()); |
| ElementWrappers element_wrappers; |
| element_wrappers.reserve(from_element_types.size()); |
| for (size_t i = 0; i < from_element_types.size(); ++i) { |
| const DataTypePtr& from_element_type = from_element_types[i]; |
| const DataTypePtr& to_element_type = to_element_types[i]; |
| element_wrappers.push_back( |
| prepare_unpack_dictionaries(context, from_element_type, to_element_type)); |
| } |
| return element_wrappers; |
| } |
| |
| // check struct value type and get to_type value |
| // TODO: need handle another type to cast struct |
| WrapperType create_struct_wrapper(FunctionContext* context, const DataTypePtr& from_type, |
| const DataTypeStruct& to_type) const { |
| // support CAST AS Struct from string |
| if (is_string_type(from_type->get_primitive_type())) { |
| return &ConvertImplGenericFromString::execute; |
| } |
| |
| // only support CAST AS Struct from struct or string types |
| auto from = check_and_get_data_type<DataTypeStruct>(from_type.get()); |
| if (!from) { |
| return create_unsupport_wrapper( |
| fmt::format("CAST AS Struct can only be performed between struct types or from " |
| "String. Left type: {}, right type: {}", |
| from_type->get_name(), to_type.get_name())); |
| } |
| |
| const auto& from_element_types = from->get_elements(); |
| const auto& to_element_types = to_type.get_elements(); |
| // only support CAST AS Struct from struct type with same number of elements |
| if (from_element_types.size() != to_element_types.size()) { |
| return create_unsupport_wrapper( |
| fmt::format("CAST AS Struct can only be performed between struct types with " |
| "the same number of elements. Left type: {}, right type: {}", |
| from_type->get_name(), to_type.get_name())); |
| } |
| |
| auto element_wrappers = get_element_wrappers(context, from_element_types, to_element_types); |
| return [element_wrappers, from_element_types, to_element_types]( |
| FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| const uint32_t result, size_t /*input_rows_count*/) -> Status { |
| auto& from_column = block.get_by_position(arguments.front()).column; |
| auto from_col_struct = check_and_get_column<ColumnStruct>(from_column.get()); |
| if (!from_col_struct) { |
| return Status::RuntimeError("Illegal column {} for function CAST AS Struct", |
| from_column->get_name()); |
| } |
| |
| size_t elements_num = to_element_types.size(); |
| Columns converted_columns(elements_num); |
| for (size_t i = 0; i < elements_num; ++i) { |
| ColumnWithTypeAndName from_element_column {from_col_struct->get_column_ptr(i), |
| from_element_types[i], ""}; |
| ColumnNumbers element_arguments {block.columns()}; |
| block.insert(from_element_column); |
| |
| size_t element_result = block.columns(); |
| block.insert({to_element_types[i], ""}); |
| |
| RETURN_IF_ERROR(element_wrappers[i](context, block, element_arguments, |
| element_result, |
| from_col_struct->get_column(i).size())); |
| converted_columns[i] = block.get_by_position(element_result).column; |
| } |
| |
| block.get_by_position(result).column = ColumnStruct::create(converted_columns); |
| return Status::OK(); |
| }; |
| } |
| |
| WrapperType prepare_unpack_dictionaries(FunctionContext* context, const DataTypePtr& from_type, |
| const DataTypePtr& to_type) const { |
| const auto& from_nested = from_type; |
| const auto& to_nested = to_type; |
| |
| if (from_type->is_null_literal()) { |
| if (!to_nested->is_nullable()) { |
| return create_unsupport_wrapper("Cannot convert NULL to a non-nullable type"); |
| } |
| |
| return [](FunctionContext* context, Block& block, const ColumnNumbers&, |
| const uint32_t result, size_t input_rows_count) { |
| auto& res = block.get_by_position(result); |
| res.column = res.type->create_column_const_with_default_value(input_rows_count) |
| ->convert_to_full_column_if_const(); |
| return Status::OK(); |
| }; |
| } |
| |
| bool skip_not_null_check = false; |
| auto wrapper = |
| prepare_remove_nullable(context, from_nested, to_nested, skip_not_null_check); |
| |
| return wrapper; |
| } |
| |
| static bool need_replace_null_data_to_default(FunctionContext* context, |
| const DataTypePtr& from_type, |
| const DataTypePtr& to_type) { |
| if (from_type->equals(*to_type)) { |
| return false; |
| } |
| |
| auto make_default_wrapper = [&](const auto& types) -> bool { |
| using Types = std::decay_t<decltype(types)>; |
| using ToDataType = typename Types::LeftType; |
| |
| if constexpr (!(IsDataTypeDecimalOrNumber<ToDataType> || |
| IsDatelikeV1Types<ToDataType> || IsDatelikeV2Types<ToDataType> || |
| std::is_same_v<ToDataType, DataTypeTimeV2>)) { |
| return false; |
| } |
| return call_on_index_and_data_type< |
| ToDataType>(from_type->get_primitive_type(), [&](const auto& types2) -> bool { |
| using Types2 = std::decay_t<decltype(types2)>; |
| using FromDataType = typename Types2::LeftType; |
| if constexpr (!(IsDataTypeDecimalOrNumber<FromDataType> || |
| IsDatelikeV1Types<FromDataType> || |
| IsDatelikeV2Types<FromDataType> || |
| std::is_same_v<FromDataType, DataTypeTimeV2>)) { |
| return false; |
| } |
| if constexpr (IsDataTypeDecimal<FromDataType> || IsDataTypeDecimal<ToDataType>) { |
| using FromFieldType = typename FromDataType::FieldType; |
| using ToFieldType = typename ToDataType::FieldType; |
| UInt32 from_precision = NumberTraits::max_ascii_len<FromFieldType>(); |
| UInt32 from_scale = 0; |
| |
| if constexpr (IsDataTypeDecimal<FromDataType>) { |
| const auto* from_decimal_type = |
| check_and_get_data_type<FromDataType>(from_type.get()); |
| from_precision = |
| NumberTraits::max_ascii_len<typename FromFieldType::NativeType>(); |
| from_scale = from_decimal_type->get_scale(); |
| } |
| |
| UInt32 to_max_digits = 0; |
| UInt32 to_precision = 0; |
| UInt32 to_scale = 0; |
| |
| ToFieldType max_result {0}; |
| ToFieldType min_result {0}; |
| if constexpr (IsDataTypeDecimal<ToDataType>) { |
| to_max_digits = |
| NumberTraits::max_ascii_len<typename ToFieldType::NativeType>(); |
| |
| const auto* to_decimal_type = |
| check_and_get_data_type<ToDataType>(to_type.get()); |
| to_precision = to_decimal_type->get_precision(); |
| ToDataType::check_type_precision(to_precision); |
| |
| to_scale = to_decimal_type->get_scale(); |
| ToDataType::check_type_scale(to_scale); |
| |
| max_result = ToDataType::get_max_digits_number(to_precision); |
| min_result = -max_result; |
| } |
| if constexpr (std::is_integral_v<ToFieldType> || |
| std::is_floating_point_v<ToFieldType>) { |
| max_result = type_limit<ToFieldType>::max(); |
| min_result = type_limit<ToFieldType>::min(); |
| to_max_digits = NumberTraits::max_ascii_len<ToFieldType>(); |
| to_precision = to_max_digits; |
| } |
| |
| bool narrow_integral = |
| context->check_overflow_for_decimal() && |
| (to_precision - to_scale) <= (from_precision - from_scale); |
| |
| bool multiply_may_overflow = context->check_overflow_for_decimal(); |
| if (to_scale > from_scale) { |
| multiply_may_overflow &= |
| (from_precision + to_scale - from_scale) >= to_max_digits; |
| } |
| return narrow_integral || multiply_may_overflow; |
| } |
| return false; |
| }); |
| }; |
| |
| return call_on_index_and_data_type<void>(to_type->get_primitive_type(), |
| make_default_wrapper); |
| } |
| |
| WrapperType prepare_remove_nullable(FunctionContext* context, const DataTypePtr& from_type, |
| const DataTypePtr& to_type, |
| bool skip_not_null_check) const { |
| /// Determine whether pre-processing and/or post-processing must take place during conversion. |
| bool result_is_nullable = to_type->is_nullable(); |
| |
| if (result_is_nullable) { |
| return [this, from_type, to_type](FunctionContext* context, Block& block, |
| const ColumnNumbers& arguments, const uint32_t result, |
| size_t input_rows_count) { |
| auto from_type_not_nullable = remove_nullable(from_type); |
| auto to_type_not_nullable = remove_nullable(to_type); |
| |
| bool replace_null_data_to_default = need_replace_null_data_to_default( |
| context, from_type_not_nullable, to_type_not_nullable); |
| |
| auto nested_result_index = block.columns(); |
| block.insert(block.get_by_position(result).get_nested()); |
| auto nested_source_index = block.columns(); |
| block.insert(block.get_by_position(arguments[0]) |
| .get_nested(replace_null_data_to_default)); |
| |
| RETURN_IF_ERROR(prepare_impl(context, from_type_not_nullable, to_type_not_nullable, |
| true)(context, block, {nested_source_index}, |
| nested_result_index, input_rows_count)); |
| |
| block.get_by_position(result).column = |
| wrap_in_nullable(block.get_by_position(nested_result_index).column, block, |
| arguments, result, input_rows_count); |
| |
| block.erase(nested_source_index); |
| block.erase(nested_result_index); |
| return Status::OK(); |
| }; |
| } else { |
| return prepare_impl(context, from_type, to_type, false); |
| } |
| } |
| |
| /// 'from_type' and 'to_type' are nested types in case of Nullable. |
| /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested. |
| WrapperType prepare_impl(FunctionContext* context, const DataTypePtr& origin_from_type, |
| const DataTypePtr& origin_to_type, |
| bool requested_result_is_nullable) const { |
| auto to_type = get_serialized_type(origin_to_type); |
| auto from_type = get_serialized_type(origin_from_type); |
| |
| if (from_type->equals(*to_type)) { |
| return create_identity_wrapper(from_type); |
| } |
| |
| // variant needs to be judged first |
| if (to_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { |
| return create_variant_wrapper(from_type, static_cast<const DataTypeVariant&>(*to_type)); |
| } |
| if (from_type->get_primitive_type() == PrimitiveType::TYPE_VARIANT) { |
| return create_variant_wrapper(static_cast<const DataTypeVariant&>(*from_type), to_type); |
| } |
| |
| switch (from_type->get_primitive_type()) { |
| case PrimitiveType::INVALID_TYPE: |
| return create_nothing_wrapper(to_type.get()); |
| case PrimitiveType::TYPE_JSONB: |
| return create_jsonb_wrapper(static_cast<const DataTypeJsonb&>(*from_type), to_type, |
| context ? context->jsonb_string_as_string() : false); |
| default: |
| break; |
| } |
| |
| WrapperType ret; |
| |
| auto make_default_wrapper = [&](const auto& types) -> bool { |
| using Types = std::decay_t<decltype(types)>; |
| using ToDataType = typename Types::LeftType; |
| |
| if constexpr (std::is_same_v<ToDataType, DataTypeUInt8> || |
| std::is_same_v<ToDataType, DataTypeInt8> || |
| std::is_same_v<ToDataType, DataTypeInt16> || |
| std::is_same_v<ToDataType, DataTypeInt32> || |
| std::is_same_v<ToDataType, DataTypeInt64> || |
| std::is_same_v<ToDataType, DataTypeInt128> || |
| std::is_same_v<ToDataType, DataTypeFloat32> || |
| std::is_same_v<ToDataType, DataTypeFloat64> || |
| std::is_same_v<ToDataType, DataTypeDate> || |
| std::is_same_v<ToDataType, DataTypeDateTime> || |
| std::is_same_v<ToDataType, DataTypeDateV2> || |
| std::is_same_v<ToDataType, DataTypeDateTimeV2> || |
| std::is_same_v<ToDataType, DataTypeTimeV2> || |
| std::is_same_v<ToDataType, DataTypeIPv4> || |
| std::is_same_v<ToDataType, DataTypeIPv6>) { |
| ret = create_wrapper(from_type, check_and_get_data_type<ToDataType>(to_type.get()), |
| requested_result_is_nullable); |
| return true; |
| } |
| |
| if constexpr (std::is_same_v<ToDataType, DataTypeDecimal32> || |
| std::is_same_v<ToDataType, DataTypeDecimal64> || |
| std::is_same_v<ToDataType, DataTypeDecimalV2> || |
| std::is_same_v<ToDataType, DataTypeDecimal128> || |
| std::is_same_v<ToDataType, DataTypeDecimal256>) { |
| ret = create_decimal_wrapper(from_type, |
| check_and_get_data_type<ToDataType>(to_type.get())); |
| return true; |
| } |
| |
| return false; |
| }; |
| |
| if (call_on_index_and_data_type<void>(to_type->get_primitive_type(), |
| make_default_wrapper)) { |
| return ret; |
| } |
| |
| switch (to_type->get_primitive_type()) { |
| case PrimitiveType::TYPE_CHAR: |
| case PrimitiveType::TYPE_VARCHAR: |
| case PrimitiveType::TYPE_STRING: |
| return create_string_wrapper(from_type); |
| case PrimitiveType::TYPE_ARRAY: |
| return create_array_wrapper(context, from_type, |
| static_cast<const DataTypeArray&>(*to_type)); |
| case PrimitiveType::TYPE_STRUCT: |
| return create_struct_wrapper(context, from_type, |
| static_cast<const DataTypeStruct&>(*to_type)); |
| case PrimitiveType::TYPE_MAP: |
| return create_map_wrapper(context, from_type, |
| static_cast<const DataTypeMap&>(*to_type)); |
| case PrimitiveType::TYPE_HLL: |
| return create_hll_wrapper(context, from_type, |
| static_cast<const DataTypeHLL&>(*to_type)); |
| case PrimitiveType::TYPE_BITMAP: |
| return create_bitmap_wrapper(context, from_type, |
| static_cast<const DataTypeBitMap&>(*to_type)); |
| case PrimitiveType::TYPE_JSONB: |
| return create_jsonb_wrapper(from_type, static_cast<const DataTypeJsonb&>(*to_type), |
| context ? context->string_as_jsonb_string() : false); |
| default: |
| break; |
| } |
| |
| return create_unsupport_wrapper(from_type->get_name(), to_type->get_name()); |
| } |
| }; |
| |
| class FunctionBuilderCast : public FunctionBuilderImpl { |
| public: |
| static constexpr auto name = "CAST"; |
| static FunctionBuilderPtr create() { return std::make_shared<FunctionBuilderCast>(); } |
| |
| FunctionBuilderCast() = default; |
| |
| String get_name() const override { return name; } |
| |
| size_t get_number_of_arguments() const override { return 2; } |
| |
| ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; } |
| |
| protected: |
| FunctionBasePtr build_impl(const ColumnsWithTypeAndName& arguments, |
| const DataTypePtr& return_type) const override { |
| DataTypes data_types(arguments.size()); |
| |
| for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; |
| |
| return std::make_shared<FunctionCast>(name, data_types, return_type); |
| } |
| |
| DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override { |
| DataTypePtr type = arguments[1].type; |
| DCHECK(type != nullptr); |
| bool need_to_be_nullable = false; |
| // 1. from_type is nullable |
| need_to_be_nullable |= arguments[0].type->is_nullable(); |
| // 2. from_type is string, to_type is not string |
| need_to_be_nullable |= (is_string_type(arguments[0].type->get_primitive_type())) && |
| (!is_string_type(type->get_primitive_type())); |
| // 3. from_type is not DateTime/Date, to_type is DateTime/Date |
| need_to_be_nullable |= |
| (arguments[0].type->get_primitive_type() != PrimitiveType::TYPE_DATE && |
| arguments[0].type->get_primitive_type() != PrimitiveType::TYPE_DATETIME) && |
| (type->get_primitive_type() == PrimitiveType::TYPE_DATE || |
| type->get_primitive_type() == PrimitiveType::TYPE_DATETIME); |
| // 4. from_type is not DateTimeV2/DateV2, to_type is DateTimeV2/DateV2 |
| need_to_be_nullable |= |
| (arguments[0].type->get_primitive_type() != PrimitiveType::TYPE_DATEV2 && |
| arguments[0].type->get_primitive_type() != PrimitiveType::TYPE_DATETIMEV2) && |
| (type->get_primitive_type() == PrimitiveType::TYPE_DATEV2 || |
| type->get_primitive_type() == PrimitiveType::TYPE_DATETIMEV2); |
| if (need_to_be_nullable && !type->is_nullable()) { |
| return make_nullable(type); |
| } |
| |
| return type; |
| } |
| |
| bool use_default_implementation_for_nulls() const override { return false; } |
| bool use_default_implementation_for_low_cardinality_columns() const override { return false; } |
| }; |
| } // namespace doris::vectorized |