| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include <stdint.h> |
| #include <stdlib.h> |
| |
| #include <boost/iterator/iterator_facade.hpp> |
| // IWYU pragma: no_include <bits/std_abs.h> |
| #include <algorithm> |
| #include <cmath> // IWYU pragma: keep |
| #include <memory> |
| #include <utility> |
| |
| #include "common/status.h" |
| #include "exprs/math_functions.h" |
| #include "util/string_parser.hpp" |
| #include "vec/aggregate_functions/aggregate_function.h" |
| #include "vec/columns/column.h" |
| #include "vec/columns/column_const.h" |
| #include "vec/columns/column_nullable.h" |
| #include "vec/columns/column_string.h" |
| #include "vec/columns/column_vector.h" |
| #include "vec/common/assert_cast.h" |
| #include "vec/common/string_ref.h" |
| #include "vec/core/block.h" |
| #include "vec/core/column_numbers.h" |
| #include "vec/core/column_with_type_and_name.h" |
| #include "vec/core/types.h" |
| #include "vec/data_types/data_type.h" |
| #include "vec/data_types/data_type_nullable.h" |
| #include "vec/data_types/data_type_number.h" |
| #include "vec/data_types/data_type_string.h" |
| #include "vec/functions/function.h" |
| #include "vec/functions/simple_function_factory.h" |
| |
| namespace doris { |
| #include "common/compile_check_begin.h" |
| class FunctionContext; |
| } // namespace doris |
| |
| namespace doris::vectorized { |
| |
| template <typename Impl> |
| class FunctionConv : public IFunction { |
| public: |
| static constexpr auto name = "conv"; |
| String get_name() const override { return name; } |
| static FunctionPtr create() { return std::make_shared<FunctionConv<Impl>>(); } |
| |
| DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { |
| return make_nullable(std::make_shared<DataTypeString>()); |
| } |
| DataTypes get_variadic_argument_types_impl() const override { |
| return {std::make_shared<typename Impl::DataType>(), std::make_shared<DataTypeInt8>(), |
| std::make_shared<DataTypeInt8>()}; |
| } |
| size_t get_number_of_arguments() const override { |
| return get_variadic_argument_types_impl().size(); |
| } |
| |
| Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, |
| uint32_t result, size_t input_rows_count) const override { |
| auto result_column = ColumnString::create(); |
| auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); |
| |
| bool col_const[3]; |
| ColumnPtr argument_columns[3]; |
| for (int i = 0; i < 3; ++i) { |
| col_const[i] = is_column_const(*block.get_by_position(arguments[i]).column); |
| } |
| argument_columns[0] = col_const[0] ? static_cast<const ColumnConst&>( |
| *block.get_by_position(arguments[0]).column) |
| .convert_to_full_column() |
| : block.get_by_position(arguments[0]).column; |
| |
| default_preprocess_parameter_columns(argument_columns, col_const, {1, 2}, block, arguments); |
| |
| if (col_const[1] && col_const[2]) { |
| execute_scalar_args( |
| context, |
| assert_cast<const typename Impl::DataType::ColumnType*>( |
| argument_columns[0].get()), |
| assert_cast<const ColumnInt8*>(argument_columns[1].get())->get_element(0), |
| assert_cast<const ColumnInt8*>(argument_columns[2].get())->get_element(0), |
| assert_cast<ColumnString*>(result_column.get()), |
| assert_cast<ColumnUInt8*>(result_null_map_column.get())->get_data(), |
| input_rows_count); |
| } else { |
| execute_straight(context, |
| assert_cast<const typename Impl::DataType::ColumnType*>( |
| argument_columns[0].get()), |
| assert_cast<const ColumnInt8*>(argument_columns[1].get()), |
| assert_cast<const ColumnInt8*>(argument_columns[2].get()), |
| assert_cast<ColumnString*>(result_column.get()), |
| assert_cast<ColumnUInt8*>(result_null_map_column.get())->get_data(), |
| input_rows_count); |
| } |
| |
| block.get_by_position(result).column = |
| ColumnNullable::create(std::move(result_column), std::move(result_null_map_column)); |
| return Status::OK(); |
| } |
| |
| private: |
| // check out of bound. |
| static bool _check_oob(const Int8 src_base, const Int8 dst_base) { |
| return std::abs(src_base) < MathFunctions::MIN_BASE || |
| std::abs(src_base) > MathFunctions::MAX_BASE || |
| std::abs(dst_base) < MathFunctions::MIN_BASE || |
| std::abs(dst_base) > MathFunctions::MAX_BASE; |
| } |
| static void execute_straight(FunctionContext* context, |
| const typename Impl::DataType::ColumnType* data_column, |
| const ColumnInt8* src_base_column, |
| const ColumnInt8* dst_base_column, ColumnString* result_column, |
| NullMap& result_null_map, size_t input_rows_count) { |
| for (size_t i = 0; i < input_rows_count; i++) { |
| if (result_null_map[i]) { |
| result_column->insert_default(); |
| continue; |
| } |
| Int8 src_base = src_base_column->get_element(i); |
| Int8 dst_base = dst_base_column->get_element(i); |
| if (_check_oob(src_base, dst_base)) { |
| result_null_map[i] = true; |
| result_column->insert_default(); |
| } else { |
| Impl::calculate_cell(context, data_column, src_base, dst_base, result_column, |
| result_null_map, i); |
| } |
| } |
| } |
| static void execute_scalar_args(FunctionContext* context, |
| const typename Impl::DataType::ColumnType* data_column, |
| const Int8 src_base, const Int8 dst_base, |
| ColumnString* result_column, NullMap& result_null_map, |
| size_t input_rows_count) { |
| if (_check_oob(src_base, dst_base)) { |
| result_null_map.assign(input_rows_count, UInt8 {true}); |
| result_column->insert_many_defaults(input_rows_count); |
| return; |
| } |
| for (size_t i = 0; i < input_rows_count; i++) { |
| if (result_null_map[i]) { |
| result_column->insert_default(); |
| continue; |
| } |
| Impl::calculate_cell(context, data_column, src_base, dst_base, result_column, |
| result_null_map, i); |
| } |
| } |
| }; |
| |
| struct ConvInt64Impl { |
| using DataType = DataTypeInt64; |
| |
| static void calculate_cell(FunctionContext* context, const DataType::ColumnType* data_column, |
| const Int8 src_base, const Int8 dst_base, |
| ColumnString* result_column, NullMap& result_null_map, |
| size_t index) { |
| Int64 num = data_column->get_element(index); |
| if (src_base < 0 && num >= 0) { |
| result_null_map[index] = true; |
| result_column->insert_default(); |
| return; |
| } |
| |
| int64_t decimal_num = num; |
| if (src_base != 10) { |
| if (!MathFunctions::decimal_in_base_to_decimal(num, src_base, &decimal_num)) { |
| MathFunctions::handle_parse_result(dst_base, &decimal_num, |
| StringParser::PARSE_OVERFLOW); |
| } |
| } |
| StringRef str = MathFunctions::decimal_to_base(context, decimal_num, dst_base); |
| result_column->insert_data(reinterpret_cast<const char*>(str.data), str.size); |
| } |
| }; |
| |
| struct ConvStringImpl { |
| using DataType = DataTypeString; |
| |
| static void calculate_cell(FunctionContext* context, const DataType::ColumnType* data_column, |
| const Int8 src_base, const Int8 dst_base, |
| ColumnString* result_column, NullMap& result_null_map, |
| size_t index) { |
| StringRef str = data_column->get_data_at(index); |
| auto new_size = str.size; |
| // eg: select conv('1.464868',10,2); the result should be return 1. |
| // But StringParser::string_to_int will PARSE_FAILURE and return 0, |
| // so should handle the point part of number firstly if need convert '1.464868' to number 1 |
| if (auto pos = str.to_string_view().find_first_of('.'); pos != std::string::npos) { |
| new_size = pos; |
| } |
| StringParser::ParseResult parse_res; |
| // select conv('ffffffffffffff', 24, 2); |
| // if 'ffffffffffffff' parse as int64_t will be overflow, will be get max value: std::numeric_limits<int64_t>::max() |
| // so change it parse as uint64_t, and return value could still use int64_t, in function decimal_to_base could handle it. |
| // But if the value is still overflow in uint64_t, will get max value of uint64_t |
| int64_t decimal_num = |
| StringParser::string_to_int<uint64_t>(str.data, new_size, src_base, &parse_res); |
| if (src_base < 0 && decimal_num >= 0) { |
| result_null_map[index] = true; |
| result_column->insert_default(); |
| return; |
| } |
| |
| if (!MathFunctions::handle_parse_result(dst_base, &decimal_num, parse_res)) { |
| result_column->insert_data("0", 1); |
| } else { |
| StringRef str_base = MathFunctions::decimal_to_base(context, decimal_num, dst_base); |
| result_column->insert_data(reinterpret_cast<const char*>(str_base.data), str_base.size); |
| } |
| } |
| }; |
| |
| void register_function_conv(SimpleFunctionFactory& factory) { |
| factory.register_function<FunctionConv<ConvInt64Impl>>(); |
| factory.register_function<FunctionConv<ConvStringImpl>>(); |
| } |
| |
| } // namespace doris::vectorized |