blob: 54d5b8bb2cc8e08a1e07e95ba91dbce2acbff5e2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <glog/logging.h>
#include <limits.h>
#include <parquet/column_writer.h>
#include <stdint.h>
#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <cstdint>
#include <cstring>
#include <memory>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/status.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
#include "runtime/runtime_state.h"
#include "runtime/types.h"
#include "udf/udf.h"
#include "util/binary_cast.hpp"
#include "util/datetype_cast.hpp"
#include "util/time.h"
#include "util/time_lut.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/columns/column_vector.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/pod_array_fwd.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/core/column_numbers.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/core/columns_with_type_and_name.h"
#include "vec/core/types.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_date.h"
#include "vec/data_types/data_type_date_or_datetime_v2.h"
#include "vec/data_types/data_type_date_time.h"
#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
#include "vec/functions/function.h"
#include "vec/functions/simple_function_factory.h"
#include "vec/runtime/vdatetime_value.h"
#include "vec/utils/util.hpp"
namespace doris::vectorized {
template <typename DateType>
struct StrToDate {
static constexpr auto name = "str_to_date";
static bool is_variadic() { return false; }
static size_t get_number_of_arguments() { return 2; }
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
}
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
if constexpr (IsDataTypeDateTimeV2<DateType>) {
// max scale
return make_nullable(std::make_shared<DataTypeDateTimeV2>(6));
}
return make_nullable(std::make_shared<DateType>());
}
static StringRef rewrite_specific_format(const char* raw_str, size_t str_size) {
const static std::string specific_format_strs[3] = {"yyyyMMdd", "yyyy-MM-dd",
"yyyy-MM-dd HH:mm:ss"};
const static std::string specific_format_rewrite[3] = {"%Y%m%d", "%Y-%m-%d",
"%Y-%m-%d %H:%i:%s"};
for (int i = 0; i < 3; i++) {
const StringRef specific_format {specific_format_strs[i].data(),
specific_format_strs[i].size()};
if (specific_format == StringRef {raw_str, str_size}) {
return {specific_format_rewrite[i].data(), specific_format_rewrite[i].size()};
}
}
return {raw_str, str_size};
}
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
const auto& col0 = block.get_by_position(arguments[0]).column;
bool col_const[2] = {is_column_const(*col0)};
ColumnPtr argument_columns[2] = {
col_const[0] ? static_cast<const ColumnConst&>(*col0).convert_to_full_column()
: col0};
std::tie(argument_columns[1], col_const[1]) =
unpack_if_const(block.get_by_position(arguments[1]).column);
auto specific_str_column = assert_cast<const ColumnString*>(argument_columns[0].get());
auto specific_char_column = assert_cast<const ColumnString*>(argument_columns[1].get());
auto& ldata = specific_str_column->get_chars();
auto& loffsets = specific_str_column->get_offsets();
auto& rdata = specific_char_column->get_chars();
auto& roffsets = specific_char_column->get_offsets();
// Because of we cant distinguish by return_type when we find function. so the return_type may NOT be same with real return type
// which decided by FE. that's found by which.
ColumnPtr res = nullptr;
switch (block.get_by_position(result).type->get_primitive_type()) {
case PrimitiveType::TYPE_DATETIMEV2: {
res = ColumnDateTimeV2::create();
if (col_const[1]) {
execute_impl_const_right<DataTypeDateTimeV2>(
context, ldata, loffsets, specific_char_column->get_data_at(0),
static_cast<ColumnDateTimeV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
} else {
execute_impl<DataTypeDateTimeV2>(
context, ldata, loffsets, rdata, roffsets,
static_cast<ColumnDateTimeV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
}
break;
}
case PrimitiveType::TYPE_DATEV2: {
res = ColumnDateV2::create();
if (col_const[1]) {
execute_impl_const_right<DataTypeDateV2>(
context, ldata, loffsets, specific_char_column->get_data_at(0),
static_cast<ColumnDateV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
} else {
execute_impl<DataTypeDateV2>(
context, ldata, loffsets, rdata, roffsets,
static_cast<ColumnDateV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
}
break;
}
default: {
res = ColumnDateTime::create();
if (col_const[1]) {
execute_impl_const_right<DataTypeDateTime>(
context, ldata, loffsets, specific_char_column->get_data_at(0),
static_cast<ColumnDateTime*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
} else {
execute_impl<DataTypeDateTime>(
context, ldata, loffsets, rdata, roffsets,
static_cast<ColumnDateTime*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
}
}
}
block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map));
return Status::OK();
}
private:
template <typename ArgDateType,
typename DateValueType = date_cast::TypeToValueTypeV<ArgDateType>,
typename NativeType = date_cast::TypeToColumnV<ArgDateType>>
static void execute_impl(FunctionContext* context, const ColumnString::Chars& ldata,
const ColumnString::Offsets& loffsets,
const ColumnString::Chars& rdata,
const ColumnString::Offsets& roffsets, PaddedPODArray<NativeType>& res,
NullMap& null_map) {
size_t size = loffsets.size();
res.resize(size);
for (size_t i = 0; i < size; ++i) {
const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
size_t l_str_size = loffsets[i] - loffsets[i - 1];
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
size_t r_str_size = roffsets[i] - roffsets[i - 1];
const StringRef format_str = rewrite_specific_format(r_raw_str, r_str_size);
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, l_str_size, format_str.data,
format_str.size, context, res, null_map,
i);
}
}
template <typename ArgDateType,
typename DateValueType = date_cast::TypeToValueTypeV<ArgDateType>,
typename NativeType = date_cast::TypeToColumnV<ArgDateType>>
static void execute_impl_const_right(FunctionContext* context, const ColumnString::Chars& ldata,
const ColumnString::Offsets& loffsets,
const StringRef& rdata, PaddedPODArray<NativeType>& res,
NullMap& null_map) {
size_t size = loffsets.size();
res.resize(size);
const StringRef format_str = rewrite_specific_format(rdata.data, rdata.size);
for (size_t i = 0; i < size; ++i) {
const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
size_t l_str_size = loffsets[i] - loffsets[i - 1];
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, l_str_size, format_str.data,
format_str.size, context, res, null_map,
i);
}
}
template <typename DateValueType, typename NativeType>
static void _execute_inner_loop(const char* l_raw_str, size_t l_str_size, const char* r_raw_str,
size_t r_str_size, FunctionContext* context,
PaddedPODArray<NativeType>& res, NullMap& null_map,
size_t index) {
auto& ts_val = *reinterpret_cast<DateValueType*>(&res[index]);
if (!ts_val.from_date_format_str(r_raw_str, r_str_size, l_raw_str, l_str_size)) {
null_map[index] = 1;
} else {
if constexpr (std::is_same_v<DateValueType, VecDateTimeValue>) {
if (context->get_return_type()->get_primitive_type() ==
doris::PrimitiveType::TYPE_DATETIME) {
ts_val.to_datetime();
} else {
ts_val.cast_to_date();
}
}
}
}
};
struct MakeDateImpl {
static constexpr auto name = "makedate";
static bool is_variadic() { return false; }
static size_t get_number_of_arguments() { return 2; }
static DataTypes get_variadic_argument_types() { return {}; }
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
return make_nullable(std::make_shared<DataTypeDateTime>());
}
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
DCHECK_EQ(arguments.size(), 2);
const auto& col0 = block.get_by_position(arguments[0]).column;
bool col_const[2] = {is_column_const(*col0)};
ColumnPtr argument_columns[2] = {
col_const[0] ? static_cast<const ColumnConst&>(*col0).convert_to_full_column()
: col0};
std::tie(argument_columns[1], col_const[1]) =
unpack_if_const(block.get_by_position(arguments[1]).column);
ColumnPtr res = nullptr;
switch (block.get_by_position(result).type->get_primitive_type()) {
case PrimitiveType::TYPE_DATEV2: {
res = ColumnDateV2::create();
if (col_const[1]) {
execute_impl_right_const<DataTypeDateV2>(
static_cast<const ColumnVector<Int32>*>(argument_columns[0].get())
->get_data(),
static_cast<const ColumnVector<Int32>*>(argument_columns[1].get())
->get_element(0),
static_cast<ColumnDateV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
} else {
execute_impl<DataTypeDateV2>(
static_cast<const ColumnVector<Int32>*>(argument_columns[0].get())
->get_data(),
static_cast<const ColumnVector<Int32>*>(argument_columns[1].get())
->get_data(),
static_cast<ColumnDateV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
}
break;
}
case PrimitiveType::TYPE_DATETIMEV2: {
res = ColumnDateTimeV2::create();
if (col_const[1]) {
execute_impl_right_const<DataTypeDateTimeV2>(
static_cast<const ColumnVector<Int32>*>(argument_columns[0].get())
->get_data(),
static_cast<const ColumnVector<Int32>*>(argument_columns[1].get())
->get_element(0),
static_cast<ColumnDateTimeV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
} else {
execute_impl<DataTypeDateTimeV2>(
static_cast<const ColumnVector<Int32>*>(argument_columns[0].get())
->get_data(),
static_cast<const ColumnVector<Int32>*>(argument_columns[1].get())
->get_data(),
static_cast<ColumnDateTimeV2*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
}
break;
}
default: {
res = ColumnDateTime::create();
if (col_const[1]) {
execute_impl_right_const<DataTypeDateTime>(
static_cast<const ColumnVector<Int32>*>(argument_columns[0].get())
->get_data(),
static_cast<const ColumnVector<Int32>*>(argument_columns[1].get())
->get_element(0),
static_cast<ColumnDateTime*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
} else {
execute_impl<DataTypeDateTime>(
static_cast<const ColumnVector<Int32>*>(argument_columns[0].get())
->get_data(),
static_cast<const ColumnVector<Int32>*>(argument_columns[1].get())
->get_data(),
static_cast<ColumnDateTime*>(res->assume_mutable().get())->get_data(),
null_map->get_data());
}
}
}
block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map));
return Status::OK();
}
private:
template <typename DateType, typename DateValueType = date_cast::TypeToValueTypeV<DateType>,
typename ReturnType = date_cast::TypeToColumnV<DateType>>
static void execute_impl(const PaddedPODArray<Int32>& ldata, const PaddedPODArray<Int32>& rdata,
PaddedPODArray<ReturnType>& res, NullMap& null_map) {
auto len = ldata.size();
res.resize(len);
for (size_t i = 0; i < len; ++i) {
const auto& l = ldata[i];
const auto& r = rdata[i];
if (r <= 0 || l < 0 || l > 9999) {
null_map[i] = 1;
continue;
}
_execute_inner_loop<DateValueType, ReturnType>(l, r, res, null_map, i);
}
}
template <typename DateType, typename DateValueType = date_cast::TypeToValueTypeV<DateType>,
typename ReturnType = date_cast::TypeToColumnV<DateType>>
static void execute_impl_right_const(const PaddedPODArray<Int32>& ldata, Int32 rdata,
PaddedPODArray<ReturnType>& res, NullMap& null_map) {
auto len = ldata.size();
res.resize(len);
const auto& r = rdata;
for (size_t i = 0; i < len; ++i) {
const auto& l = ldata[i];
if (r <= 0 || l < 0 || l > 9999) {
null_map[i] = 1;
continue;
}
_execute_inner_loop<DateValueType, ReturnType>(l, r, res, null_map, i);
}
}
template <typename DateValueType, typename ReturnType>
static void _execute_inner_loop(const int& l, const int& r, PaddedPODArray<ReturnType>& res,
NullMap& null_map, size_t index) {
auto& res_val = *reinterpret_cast<DateValueType*>(&res[index]);
// l checked outside
if constexpr (std::is_same_v<DateValueType, VecDateTimeValue>) {
VecDateTimeValue ts_value = VecDateTimeValue();
ts_value.unchecked_set_time(l, 1, 1, 0, 0, 0);
TimeInterval interval(DAY, r - 1, false);
res_val = ts_value;
if (!res_val.template date_add_interval<DAY>(interval)) {
null_map[index] = 1;
return;
}
res_val.cast_to_date();
} else {
res_val.unchecked_set_time(l, 1, 1, 0, 0, 0, 0);
TimeInterval interval(DAY, r - 1, false);
if (!res_val.template date_add_interval<DAY>(interval)) {
null_map[index] = 1;
}
}
}
};
struct DateTruncState {
using Callback_function =
std::function<void(const ColumnPtr&, ColumnPtr& res, NullMap& null_map, size_t)>;
Callback_function callback_function;
};
template <typename DateType, bool DateArgIsFirst>
struct DateTrunc {
static constexpr auto name = "date_trunc";
using ColumnType = date_cast::TypeToColumnV<DateType>;
using DateValueType = date_cast::TypeToValueTypeV<DateType>;
using ArgType = date_cast::ValueTypeOfColumnV<ColumnType>;
static bool is_variadic() { return true; }
static size_t get_number_of_arguments() { return 2; }
static DataTypes get_variadic_argument_types() {
if constexpr (DateArgIsFirst) {
return {std::make_shared<DateType>(), std::make_shared<DataTypeString>()};
} else {
return {std::make_shared<DataTypeString>(), std::make_shared<DateType>()};
}
}
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
return make_nullable(std::make_shared<DateType>());
}
static Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::THREAD_LOCAL) {
return Status::OK();
}
if (!context->is_col_constant(DateArgIsFirst ? 1 : 0)) {
return Status::InvalidArgument(
"date_trunc function of time unit argument must be constant.");
}
const auto& data_str =
context->get_constant_col(DateArgIsFirst ? 1 : 0)->column_ptr->get_data_at(0);
std::string lower_str(data_str.data, data_str.size);
std::transform(lower_str.begin(), lower_str.end(), lower_str.begin(),
[](unsigned char c) { return std::tolower(c); });
std::shared_ptr<DateTruncState> state = std::make_shared<DateTruncState>();
if (std::strncmp("year", lower_str.data(), 4) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::YEAR>;
} else if (std::strncmp("quarter", lower_str.data(), 7) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::QUARTER>;
} else if (std::strncmp("month", lower_str.data(), 5) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::MONTH>;
} else if (std::strncmp("week", lower_str.data(), 4) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::WEEK>;
} else if (std::strncmp("day", lower_str.data(), 3) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::DAY>;
} else if (std::strncmp("hour", lower_str.data(), 4) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::HOUR>;
} else if (std::strncmp("minute", lower_str.data(), 6) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::MINUTE>;
} else if (std::strncmp("second", lower_str.data(), 6) == 0) {
state->callback_function = &execute_impl_right_const<TimeUnit::SECOND>;
} else {
return Status::RuntimeError(
"Illegal second argument column of function date_trunc. now only support "
"[second,minute,hour,day,week,month,quarter,year]");
}
context->set_function_state(scope, state);
return Status::OK();
}
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) {
DCHECK_EQ(arguments.size(), 2);
auto null_map = ColumnUInt8::create(input_rows_count, 0);
const auto& datetime_column = block.get_by_position(arguments[DateArgIsFirst ? 0 : 1])
.column->convert_to_full_column_if_const();
ColumnPtr res = ColumnType::create(input_rows_count);
auto* state = reinterpret_cast<DateTruncState*>(
context->get_function_state(FunctionContext::THREAD_LOCAL));
DCHECK(state != nullptr);
state->callback_function(datetime_column, res, null_map->get_data(), input_rows_count);
block.get_by_position(result).column = ColumnNullable::create(res, std::move(null_map));
return Status::OK();
}
private:
template <TimeUnit Unit>
static void execute_impl_right_const(const ColumnPtr& datetime_column, ColumnPtr& result_column,
NullMap& null_map, size_t input_rows_count) {
auto& data = static_cast<const ColumnType*>(datetime_column.get())->get_data();
auto& res = static_cast<ColumnType*>(result_column->assume_mutable().get())->get_data();
for (size_t i = 0; i < input_rows_count; ++i) {
auto dt = binary_cast<ArgType, DateValueType>(data[i]);
null_map[i] = !dt.template datetime_trunc<Unit>();
res[i] = binary_cast<DateValueType, ArgType>(dt);
}
}
};
class FromDays : public IFunction {
public:
static constexpr auto name = "from_days";
static FunctionPtr create() { return std::make_shared<FromDays>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return make_nullable(std::make_shared<DataTypeDate>());
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
ColumnPtr& argument_column = block.get_by_position(arguments[0]).column;
auto data_col = assert_cast<const ColumnVector<Int32>*>(argument_column.get());
ColumnPtr res_column;
if (block.get_by_position(result).type->get_primitive_type() == PrimitiveType::TYPE_DATE) {
res_column = ColumnInt64::create(input_rows_count);
execute_straight<VecDateTimeValue, Int64>(
input_rows_count, null_map->get_data(), data_col->get_data(),
static_cast<ColumnDateTime*>(res_column->assume_mutable().get())->get_data());
} else {
res_column = ColumnDateV2::create(input_rows_count);
execute_straight<DateV2Value<DateV2ValueType>, UInt32>(
input_rows_count, null_map->get_data(), data_col->get_data(),
static_cast<ColumnDateV2*>(res_column->assume_mutable().get())->get_data());
}
block.replace_by_position(
result, ColumnNullable::create(std::move(res_column), std::move(null_map)));
return Status::OK();
}
private:
template <typename DateValueType, typename ReturnType>
void execute_straight(size_t input_rows_count, NullMap& null_map,
const PaddedPODArray<Int32>& data_col,
PaddedPODArray<ReturnType>& res_data) const {
for (int i = 0; i < input_rows_count; i++) {
if constexpr (std::is_same_v<DateValueType, VecDateTimeValue>) {
const auto& cur_data = data_col[i];
auto& ts_value = *reinterpret_cast<DateValueType*>(&res_data[i]);
if (!ts_value.from_date_daynr(cur_data)) {
null_map[i] = 1;
continue;
}
} else {
const auto& cur_data = data_col[i];
auto& ts_value = *reinterpret_cast<DateValueType*>(&res_data[i]);
if (!ts_value.get_date_from_daynr(cur_data)) {
null_map[i] = 1;
}
}
}
}
};
struct UnixTimeStampImpl {
static Int32 trim_timestamp(Int64 timestamp) {
if (timestamp < 0 || timestamp > INT_MAX) {
timestamp = 0;
}
return (Int32)timestamp;
}
static std::pair<Int32, Int32> trim_timestamp(std::pair<Int64, Int64> timestamp) {
if (timestamp.first < 0 || timestamp.first > INT_MAX) {
return {0, 0};
}
return std::make_pair((Int32)timestamp.first, (Int32)timestamp.second);
}
static DataTypes get_variadic_argument_types() { return {}; }
static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) {
return std::make_shared<DataTypeInt32>();
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
auto col_result = ColumnVector<Int32>::create();
col_result->resize(1);
col_result->get_data()[0] = context->state()->timestamp_ms() / 1000;
auto col_const = ColumnConst::create(std::move(col_result), input_rows_count);
block.replace_by_position(result, std::move(col_const));
return Status::OK();
}
};
template <typename DateType>
struct UnixTimeStampDateImpl {
static DataTypes get_variadic_argument_types() { return {std::make_shared<DateType>()}; }
static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) {
if constexpr (std::is_same_v<DateType, DataTypeDateTimeV2>) {
if (arguments[0].type->is_nullable()) {
UInt32 scale = static_cast<const DataTypeNullable*>(arguments[0].type.get())
->get_nested_type()
->get_scale();
return make_nullable(
std::make_shared<DataTypeDecimal<Decimal64>>(10 + scale, scale));
}
UInt32 scale = arguments[0].type->get_scale();
return std::make_shared<DataTypeDecimal<Decimal64>>(10 + scale, scale);
} else {
if (arguments[0].type->is_nullable()) {
return make_nullable(std::make_shared<DataTypeInt32>());
}
return std::make_shared<DataTypeInt32>();
}
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
const ColumnPtr& col = block.get_by_position(arguments[0]).column;
DCHECK(!col->is_nullable());
if constexpr (std::is_same_v<DateType, DataTypeDate> ||
std::is_same_v<DateType, DataTypeDateTime>) {
const auto* col_source = assert_cast<const ColumnDate*>(col.get());
auto col_result = ColumnVector<Int32>::create();
auto& col_result_data = col_result->get_data();
col_result->resize(input_rows_count);
for (int i = 0; i < input_rows_count; i++) {
StringRef source = col_source->get_data_at(i);
const auto& ts_value = reinterpret_cast<const VecDateTimeValue&>(*source.data);
int64_t timestamp {};
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());
col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp);
}
block.replace_by_position(result, std::move(col_result));
} else if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
const auto* col_source = assert_cast<const ColumnDateV2*>(col.get());
auto col_result = ColumnVector<Int32>::create();
auto& col_result_data = col_result->get_data();
col_result->resize(input_rows_count);
for (int i = 0; i < input_rows_count; i++) {
StringRef source = col_source->get_data_at(i);
const auto& ts_value =
reinterpret_cast<const DateV2Value<DateV2ValueType>&>(*source.data);
int64_t timestamp {};
const auto valid =
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());
DCHECK(valid);
col_result_data[i] = UnixTimeStampImpl::trim_timestamp(timestamp);
}
block.replace_by_position(result, std::move(col_result));
} else { // DatetimeV2
const auto* col_source = assert_cast<const ColumnDateTimeV2*>(col.get());
UInt32 scale = block.get_by_position(arguments[0]).type->get_scale();
auto col_result = ColumnDecimal<Decimal64>::create(input_rows_count, scale);
auto& col_result_data = col_result->get_data();
col_result->resize(input_rows_count);
for (int i = 0; i < input_rows_count; i++) {
StringRef source = col_source->get_data_at(i);
const auto& ts_value =
reinterpret_cast<const DateV2Value<DateTimeV2ValueType>&>(*source.data);
std::pair<int64_t, int64_t> timestamp {};
const auto valid =
ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj());
DCHECK(valid);
auto [sec, ms] = UnixTimeStampImpl::trim_timestamp(timestamp);
auto ms_str = std::to_string(ms).substr(0, scale);
if (ms_str.empty()) {
ms_str = "0";
}
col_result_data[i] = Decimal64::from_int_frac(sec, std::stoll(ms_str), scale).value;
}
block.replace_by_position(result, std::move(col_result));
}
return Status::OK();
}
};
template <typename DateType>
struct UnixTimeStampDatetimeImpl : public UnixTimeStampDateImpl<DateType> {
static DataTypes get_variadic_argument_types() { return {std::make_shared<DateType>()}; }
};
// This impl doesn't use default impl to deal null value.
struct UnixTimeStampStrImpl {
static DataTypes get_variadic_argument_types() {
return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
}
static DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) {
return make_nullable(std::make_shared<DataTypeDecimal<Decimal64>>(16, 6));
}
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
ColumnPtr col_left = nullptr, col_right = nullptr;
bool source_const = false, format_const = false;
std::tie(col_left, source_const) =
unpack_if_const(block.get_by_position(arguments[0]).column);
std::tie(col_right, format_const) =
unpack_if_const(block.get_by_position(arguments[1]).column);
auto col_result = ColumnDecimal<Decimal64>::create(input_rows_count, 6);
auto null_map = ColumnVector<UInt8>::create(input_rows_count);
auto& col_result_data = col_result->get_data();
auto& null_map_data = null_map->get_data();
const auto* col_source = assert_cast<const ColumnString*>(col_left.get());
const auto* col_format = assert_cast<const ColumnString*>(col_right.get());
for (int i = 0; i < input_rows_count; i++) {
StringRef source = col_source->get_data_at(index_check_const(i, source_const));
StringRef fmt = col_format->get_data_at(index_check_const(i, format_const));
DateV2Value<DateTimeV2ValueType> ts_value;
if (!ts_value.from_date_format_str(fmt.data, fmt.size, source.data, source.size)) {
null_map_data[i] = true;
continue;
}
std::pair<int64_t, int64_t> timestamp {};
if (!ts_value.unix_timestamp(&timestamp, context->state()->timezone_obj())) {
null_map_data[i] = true; // impossible now
} else {
null_map_data[i] = false;
auto [sec, ms] = UnixTimeStampImpl::trim_timestamp(timestamp);
// trailing ms
auto ms_str = std::to_string(ms).substr(0, 6);
if (ms_str.empty()) {
ms_str = "0";
}
col_result_data[i] = Decimal64::from_int_frac(sec, std::stoll(ms_str), 6).value;
}
}
block.replace_by_position(
result, ColumnNullable::create(std::move(col_result), std::move(null_map)));
return Status::OK();
}
};
template <typename Impl>
class FunctionUnixTimestamp : public IFunction {
public:
static constexpr auto name = "unix_timestamp";
static FunctionPtr create() { return std::make_shared<FunctionUnixTimestamp<Impl>>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override {
return get_variadic_argument_types_impl().size();
}
DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
return Impl::get_return_type_impl(arguments);
}
DataTypes get_variadic_argument_types_impl() const override {
return Impl::get_variadic_argument_types();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl::execute_impl(context, block, arguments, result, input_rows_count);
}
};
struct MicroSec {
static constexpr auto name = "microsecond_timestamp";
static constexpr Int64 ratio = 1000000;
};
struct MilliSec {
static constexpr auto name = "millisecond_timestamp";
static constexpr Int64 ratio = 1000;
};
struct Sec {
static constexpr auto name = "second_timestamp";
static constexpr Int64 ratio = 1;
};
template <typename Impl>
class DateTimeToTimestamp : public IFunction {
public:
using ReturnType = Int64;
static constexpr Int64 ratio_to_micro = (1000 * 1000) / Impl::ratio;
static constexpr auto name = Impl::name;
static FunctionPtr create() { return std::make_shared<DateTimeToTimestamp<Impl>>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
if (arguments[0].type->is_nullable()) {
return make_nullable(std::make_shared<DataTypeInt64>());
}
return std::make_shared<DataTypeInt64>();
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
const auto& arg_col = block.get_by_position(arguments[0]).column;
const auto& column_data = assert_cast<const ColumnUInt64&>(*arg_col);
auto res_col = ColumnInt64::create();
auto& res_data = res_col->get_data();
res_col->get_data().resize_fill(input_rows_count, 0);
for (int i = 0; i < input_rows_count; i++) {
StringRef source = column_data.get_data_at(i);
const auto& dt =
reinterpret_cast<const DateV2Value<DateTimeV2ValueType>&>(*source.data);
const cctz::time_zone& time_zone = context->state()->timezone_obj();
int64_t timestamp {0};
auto ret = dt.unix_timestamp(&timestamp, time_zone);
// ret must be true
DCHECK(ret);
auto microsecond = dt.microsecond();
timestamp = timestamp * Impl::ratio + microsecond / ratio_to_micro;
res_data[i] = timestamp;
}
block.replace_by_position(result, std::move(res_col));
return Status::OK();
}
};
template <template <typename> class Impl, typename DateType>
class FunctionDateOrDateTimeToDate : public IFunction {
public:
static constexpr auto name = Impl<DateType>::name;
static FunctionPtr create() {
return std::make_shared<FunctionDateOrDateTimeToDate<Impl, DateType>>();
}
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return 1; }
bool is_variadic() const override { return true; }
// input DateTime and Date, return Date
// input DateTimeV2 and DateV2, return DateV2
DataTypePtr get_return_type_impl(const ColumnsWithTypeAndName& arguments) const override {
bool is_nullable = false;
for (auto it : arguments) {
is_nullable = is_nullable || it.type->is_nullable();
}
if constexpr (date_cast::IsV1<DateType>()) {
return make_nullable(std::make_shared<DataTypeDate>());
} else {
return is_nullable ? make_nullable(std::make_shared<DataTypeDateV2>())
: std::make_shared<DataTypeDateV2>();
}
}
DataTypes get_variadic_argument_types_impl() const override {
return {std::make_shared<DateType>()};
}
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl<DateType>::execute_impl(context, block, arguments, result, input_rows_count);
}
};
template <typename DateType>
struct LastDayImpl {
static constexpr auto name = "last_day";
using DateValueType = date_cast::TypeToValueTypeV<DateType>;
using ColumnType = date_cast::TypeToColumnV<DateType>;
using NativeType = date_cast::ValueTypeOfColumnV<ColumnType>;
using ResultType =
std::conditional_t<date_cast::IsV1<DateType>(), DataTypeDate, DataTypeDateV2>;
using ResultColumnType = date_cast::TypeToColumnV<ResultType>;
using ResultNativeType = date_cast::ValueTypeOfColumnV<ResultColumnType>;
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
const auto is_nullable = block.get_by_position(result).type->is_nullable();
ColumnPtr res_column;
ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column);
if (is_nullable) {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, null_map->get_data(), data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
block.replace_by_position(result,
ColumnNullable::create(res_column, std::move(null_map)));
} else {
if constexpr (date_cast::IsV2<DateType>()) {
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())
->get_data());
block.replace_by_position(result, std::move(res_column));
}
}
return Status::OK();
}
static void execute_straight(size_t input_rows_count, NullMap& null_map,
const PaddedPODArray<NativeType>& data_col,
PaddedPODArray<ResultNativeType>& res_data) {
for (int i = 0; i < input_rows_count; i++) {
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
if (!ts_value.is_valid_date()) {
null_map[i] = 1;
continue;
}
int day = get_last_month_day(ts_value.year(), ts_value.month());
// day is definitely legal
if constexpr (date_cast::IsV1<DateType>()) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), day, 0, 0, 0);
ts_value.set_type(TIME_DATE);
res_data[i] = binary_cast<VecDateTimeValue, Int64>(ts_value);
} else if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(day);
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
} else {
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(day);
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), day, 0, 0, 0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
}
}
}
static void execute_straight(size_t input_rows_count,
const PaddedPODArray<NativeType>& data_col,
PaddedPODArray<ResultNativeType>& res_data) {
for (int i = 0; i < input_rows_count; i++) {
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
DCHECK(ts_value.is_valid_date());
int day = get_last_month_day(ts_value.year(), ts_value.month());
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(day);
if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
} else if constexpr (std::is_same_v<DateType, DataTypeDateTimeV2>) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), day, 0, 0, 0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
}
}
}
static int get_last_month_day(int year, int month) {
bool is_leap_year = doris::is_leap(year);
if (month == 2) {
return is_leap_year ? 29 : 28;
} else {
if (month == 1 || month == 3 || month == 5 || month == 7 || month == 8 || month == 10 ||
month == 12) {
return 31;
} else {
return 30;
}
}
}
};
template <typename DateType>
struct MondayImpl {
static constexpr auto name = "to_monday";
using DateValueType = date_cast::TypeToValueTypeV<DateType>;
using ColumnType = date_cast::TypeToColumnV<DateType>;
using NativeType = date_cast::ValueTypeOfColumnV<ColumnType>;
using ResultType =
std::conditional_t<date_cast::IsV1<DateType>(), DataTypeDate, DataTypeDateV2>;
using ResultColumnType = date_cast::TypeToColumnV<ResultType>;
using ResultNativeType = date_cast::ValueTypeOfColumnV<ResultColumnType>;
static Status execute_impl(FunctionContext* context, Block& block,
const ColumnNumbers& arguments, uint32_t result,
size_t input_rows_count) {
const auto is_nullable = block.get_by_position(result).type->is_nullable();
ColumnPtr argument_column = remove_nullable(block.get_by_position(arguments[0]).column);
ColumnPtr res_column;
if (is_nullable) {
auto null_map = ColumnUInt8::create(input_rows_count, 0);
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(
input_rows_count, null_map->get_data(), data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())->get_data());
block.replace_by_position(result,
ColumnNullable::create(res_column, std::move(null_map)));
} else {
if constexpr (date_cast::IsV2<DateType>()) {
auto data_col = assert_cast<const ColumnType*>(argument_column.get());
res_column = ResultColumnType::create(input_rows_count);
execute_straight(input_rows_count, data_col->get_data(),
static_cast<ResultColumnType*>(res_column->assume_mutable().get())
->get_data());
block.replace_by_position(result, std::move(res_column));
}
}
return Status::OK();
}
// v1, maybe makes null value
static void execute_straight(size_t input_rows_count, NullMap& null_map,
const PaddedPODArray<NativeType>& data_col,
PaddedPODArray<ResultNativeType>& res_data) {
for (int i = 0; i < input_rows_count; i++) {
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
if (!ts_value.is_valid_date()) [[unlikely]] {
null_map[i] = 1;
continue;
}
if constexpr (date_cast::IsV1<DateType>()) {
if (is_special_day(ts_value.year(), ts_value.month(), ts_value.day())) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), 1, 0, 0, 0);
ts_value.set_type(TIME_DATE);
res_data[i] = binary_cast<VecDateTimeValue, Int64>(ts_value);
continue;
}
// day_of_week, from 1(Mon) to 7(Sun)
int day_of_week = ts_value.weekday() + 1;
int gap_of_monday = day_of_week - 1;
TimeInterval interval(DAY, gap_of_monday, true);
ts_value.template date_add_interval<DAY>(interval);
ts_value.set_type(TIME_DATE);
res_data[i] = binary_cast<VecDateTimeValue, Int64>(ts_value);
} else if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
if (is_special_day(ts_value.year(), ts_value.month(), ts_value.day())) {
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(1);
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
continue;
}
// day_of_week, from 1(Mon) to 7(Sun)
int day_of_week = ts_value.weekday() + 1;
int gap_of_monday = day_of_week - 1;
TimeInterval interval(DAY, gap_of_monday, true);
ts_value.template date_add_interval<DAY>(interval);
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
} else {
if (is_special_day(ts_value.year(), ts_value.month(), ts_value.day())) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), 1, 0, 0, 0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
continue;
}
// day_of_week, from 1(Mon) to 7(Sun)
int day_of_week = ts_value.weekday() + 1;
int gap_of_monday = day_of_week - 1;
TimeInterval interval(DAY, gap_of_monday, true);
ts_value.template date_add_interval<DAY>(interval);
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), ts_value.day(), 0, 0,
0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
}
}
}
// v2, won't make null value
static void execute_straight(size_t input_rows_count,
const PaddedPODArray<NativeType>& data_col,
PaddedPODArray<ResultNativeType>& res_data) {
for (int i = 0; i < input_rows_count; i++) {
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
DCHECK(ts_value.is_valid_date());
if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
if (is_special_day(ts_value.year(), ts_value.month(), ts_value.day())) {
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(1);
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
continue;
}
// day_of_week, from 1(Mon) to 7(Sun)
int day_of_week = ts_value.weekday() + 1;
int gap_of_monday = day_of_week - 1;
TimeInterval interval(DAY, gap_of_monday, true);
ts_value.template date_add_interval<DAY>(interval);
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
} else if constexpr (std::is_same_v<DateType, DataTypeDateTimeV2>) {
if (is_special_day(ts_value.year(), ts_value.month(), ts_value.day())) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), 1, 0, 0, 0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
continue;
}
// day_of_week, from 1(Mon) to 7(Sun)
int day_of_week = ts_value.weekday() + 1;
int gap_of_monday = day_of_week - 1;
TimeInterval interval(DAY, gap_of_monday, true);
ts_value.template date_add_interval<DAY>(interval);
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), ts_value.day(), 0, 0,
0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
}
}
}
// specially, 1970-01-01, 1970-01-02, 1970-01-03 and 1970-01-04 return 1970-01-01
static bool is_special_day(int year, int month, int day) {
return year == 1970 && month == 1 && day > 0 && day < 5;
}
};
template <typename Impl>
class FunctionOtherTypesToDateType : public IFunction {
public:
static constexpr auto name = Impl::name;
static FunctionPtr create() { return std::make_shared<FunctionOtherTypesToDateType>(); }
String get_name() const override { return name; }
size_t get_number_of_arguments() const override { return Impl::get_number_of_arguments(); }
bool is_variadic() const override { return Impl::is_variadic(); }
DataTypes get_variadic_argument_types_impl() const override {
return Impl::get_variadic_argument_types();
}
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return Impl::get_return_type_impl(arguments);
}
Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override {
if constexpr (std::is_same_v<Impl, DateTrunc<DataTypeDate, true>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDateV2, true>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDateTime, true>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDateTimeV2, true>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDate, false>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDateV2, false>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDateTime, false>> ||
std::is_same_v<Impl, DateTrunc<DataTypeDateTimeV2, false>>) {
return Impl::open(context, scope);
} else {
return Status::OK();
}
}
//TODO: add function below when we fixed be-ut.
//ColumnNumbers get_arguments_that_are_always_constant() const override { return {1}; }
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) const override {
return Impl::execute(context, block, arguments, result, input_rows_count);
}
};
struct FromIso8601DateV2 {
static constexpr auto name = "from_iso8601_date";
static size_t get_number_of_arguments() { return 1; }
static bool is_variadic() { return false; }
static DataTypes get_variadic_argument_types() { return {std::make_shared<DataTypeString>()}; }
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
return make_nullable(std::make_shared<DataTypeDateV2>());
}
static Status execute(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
uint32_t result, size_t input_rows_count) {
const auto* src_column_ptr = block.get_by_position(arguments[0]).column.get();
auto null_map = ColumnUInt8::create(input_rows_count, 0);
ColumnDateV2::MutablePtr res = ColumnDateV2::create(input_rows_count);
auto& result_data = res->get_data();
static const std::tuple<std::vector<int>, int, std::string> ISO_STRING_FORMAT[] = {
{{
8,
},
1,
"%04d%02d%02d"}, //YYYYMMDD
{{4, -1, 2, -1, 2}, 1, "%04d-%02d-%02d"}, //YYYY-MM-DD
{{4, -1, 2}, 2, "%04d-%02d"}, //YYYY-MM
{
{
4,
},
3,
"%04d",
}, //YYYY
{
{4, -1, 3},
4,
"%04d-%03d",
}, //YYYY-DDD
{
{
7,
},
4,
"%04d%03d",
}, //YYYYDDD
{
{4, -1, -2, 2},
5,
"%04d-W%02d",
}, //YYYY-Www
{
{4, -2, 2},
5,
"%04dW%02d",
}, //YYYYWww
{
{4, -1, -2, 2, -1, 1},
6,
"%04d-W%02d-%1d",
}, //YYYY-Www-D
{
{4, -2, 3},
6,
"%04dW%02d%1d",
}, //YYYYWwwD
};
for (size_t i = 0; i < input_rows_count; ++i) {
int year, month, day, week, day_of_year;
int weekday = 1; // YYYYWww YYYY-Www default D = 1
auto src_string = src_column_ptr->get_data_at(i).to_string_view();
int iso_string_format_value = 0;
std::vector<int> src_string_values;
src_string_values.reserve(10);
//The maximum length of the current iso8601 format is 10.
if (src_string.size() <= 10) {
// The calculation string corresponds to the iso8601 format.
// The integer represents the number of consecutive numbers.
// -1 represent char '-'.
// -2 represent char 'W'.
// The calculated vector `src_string_values` will be compared with `ISO_STRING_FORMAT[]` later.
for (int idx = 0; idx < src_string.size();) {
char current = src_string[idx];
if (current == '-') {
src_string_values.emplace_back(-1);
idx++;
continue;
} else if (current == 'W') {
src_string_values.emplace_back(-2);
idx++;
continue;
} else if (!isdigit(current)) {
iso_string_format_value = -1;
break;
}
int currLen = 0;
for (; idx < src_string.size() && isdigit(src_string[idx]); ++idx) {
++currLen;
}
src_string_values.emplace_back(currLen);
}
} else {
iso_string_format_value = -1;
}
std::string_view iso_format_string;
if (iso_string_format_value != -1) {
for (const auto& j : ISO_STRING_FORMAT) {
const auto& v = std::get<0>(j);
if (v == src_string_values) {
iso_string_format_value = std::get<1>(j);
iso_format_string = std::get<2>(j);
break;
}
}
}
auto& ts_value = *reinterpret_cast<DateV2Value<DateV2ValueType>*>(&result_data[i]);
if (iso_string_format_value == 1) {
if (sscanf(src_string.data(), iso_format_string.data(), &year, &month, &day) != 3)
[[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
if (!(ts_value.template set_time_unit<YEAR>(year) &&
ts_value.template set_time_unit<MONTH>(month) &&
ts_value.template set_time_unit<DAY>(day))) [[unlikely]] {
null_map->get_data().data()[i] = true;
}
} else if (iso_string_format_value == 2) {
if (sscanf(src_string.data(), iso_format_string.data(), &year, &month) != 2)
[[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
if (!(ts_value.template set_time_unit<YEAR>(year) &&
ts_value.template set_time_unit<MONTH>(month))) [[unlikely]] {
null_map->get_data().data()[i] = true;
}
ts_value.template unchecked_set_time_unit<DAY>(1);
} else if (iso_string_format_value == 3) {
if (sscanf(src_string.data(), iso_format_string.data(), &year) != 1) [[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
if (!ts_value.template set_time_unit<YEAR>(year)) [[unlikely]] {
null_map->get_data().data()[i] = true;
}
ts_value.template unchecked_set_time_unit<MONTH>(1);
ts_value.template unchecked_set_time_unit<DAY>(1);
} else if (iso_string_format_value == 5 || iso_string_format_value == 6) {
if (iso_string_format_value == 5) {
if (sscanf(src_string.data(), iso_format_string.data(), &year, &week) != 2)
[[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
} else {
if (sscanf(src_string.data(), iso_format_string.data(), &year, &week,
&weekday) != 3) [[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
}
// weekday [1,7] week [1,53]
if (weekday < 1 || weekday > 7 || week < 1 || week > 53) [[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
auto first_day_of_week = getFirstDayOfISOWeek(year);
ts_value.template unchecked_set_time_unit<YEAR>(
first_day_of_week.year().operator int());
ts_value.template unchecked_set_time_unit<MONTH>(
first_day_of_week.month().operator unsigned int());
ts_value.template unchecked_set_time_unit<DAY>(
first_day_of_week.day().operator unsigned int());
auto day_diff = (week - 1) * 7 + weekday - 1;
TimeInterval interval(DAY, day_diff, false);
ts_value.date_add_interval<DAY>(interval);
} else if (iso_string_format_value == 4) {
if (sscanf(src_string.data(), iso_format_string.data(), &year, &day_of_year) != 2)
[[unlikely]] {
null_map->get_data().data()[i] = true;
continue;
}
if (is_leap(year)) {
if (day_of_year < 0 || day_of_year > 366) [[unlikely]] {
null_map->get_data().data()[i] = true;
}
} else {
if (day_of_year < 0 || day_of_year > 365) [[unlikely]] {
null_map->get_data().data()[i] = true;
}
}
ts_value.template unchecked_set_time_unit<YEAR>(year);
ts_value.template unchecked_set_time_unit<MONTH>(1);
ts_value.template unchecked_set_time_unit<DAY>(1);
TimeInterval interval(DAY, day_of_year - 1, false);
ts_value.template date_add_interval<DAY>(interval);
} else {
null_map->get_data().data()[i] = true;
}
}
block.get_by_position(result).column =
ColumnNullable::create(std::move(res), std::move(null_map));
return Status::OK();
}
private:
//Get the date corresponding to Monday of the first week of the year according to the ISO8601 standard.
static std::chrono::year_month_day getFirstDayOfISOWeek(int year) {
using namespace std::chrono;
auto jan4 = year_month_day {std::chrono::year(year) / January / 4};
auto jan4_sys_days = sys_days {jan4};
auto weekday_of_jan4 = weekday {jan4_sys_days};
auto first_day_of_week = jan4_sys_days - days {(weekday_of_jan4.iso_encoding() - 1)};
return year_month_day {floor<days>(first_day_of_week)};
}
};
using FunctionStrToDate = FunctionOtherTypesToDateType<StrToDate<DataTypeDate>>;
using FunctionStrToDatetime = FunctionOtherTypesToDateType<StrToDate<DataTypeDateTime>>;
using FunctionStrToDateV2 = FunctionOtherTypesToDateType<StrToDate<DataTypeDateV2>>;
using FunctionStrToDatetimeV2 = FunctionOtherTypesToDateType<StrToDate<DataTypeDateTimeV2>>;
using FunctionMakeDate = FunctionOtherTypesToDateType<MakeDateImpl>;
using FunctionDateTruncDate = FunctionOtherTypesToDateType<DateTrunc<DataTypeDate, true>>;
using FunctionDateTruncDateV2 = FunctionOtherTypesToDateType<DateTrunc<DataTypeDateV2, true>>;
using FunctionDateTruncDatetime = FunctionOtherTypesToDateType<DateTrunc<DataTypeDateTime, true>>;
using FunctionDateTruncDatetimeV2 =
FunctionOtherTypesToDateType<DateTrunc<DataTypeDateTimeV2, true>>;
using FunctionDateTruncDateWithCommonOrder =
FunctionOtherTypesToDateType<DateTrunc<DataTypeDate, false>>;
using FunctionDateTruncDateV2WithCommonOrder =
FunctionOtherTypesToDateType<DateTrunc<DataTypeDateV2, false>>;
using FunctionDateTruncDatetimeWithCommonOrder =
FunctionOtherTypesToDateType<DateTrunc<DataTypeDateTime, false>>;
using FunctionDateTruncDatetimeV2WithCommonOrder =
FunctionOtherTypesToDateType<DateTrunc<DataTypeDateTimeV2, false>>;
using FunctionFromIso8601DateV2 = FunctionOtherTypesToDateType<FromIso8601DateV2>;
void register_function_timestamp(SimpleFunctionFactory& factory) {
factory.register_function<FunctionStrToDate>();
factory.register_function<FunctionStrToDatetime>();
factory.register_function<FunctionStrToDateV2>();
factory.register_function<FunctionStrToDatetimeV2>();
factory.register_function<FunctionMakeDate>();
factory.register_function<FromDays>();
factory.register_function<FunctionDateTruncDate>();
factory.register_function<FunctionDateTruncDateV2>();
factory.register_function<FunctionDateTruncDatetime>();
factory.register_function<FunctionDateTruncDatetimeV2>();
factory.register_function<FunctionDateTruncDateWithCommonOrder>();
factory.register_function<FunctionDateTruncDateV2WithCommonOrder>();
factory.register_function<FunctionDateTruncDatetimeWithCommonOrder>();
factory.register_function<FunctionDateTruncDatetimeV2WithCommonOrder>();
factory.register_function<FunctionFromIso8601DateV2>();
factory.register_function<FunctionUnixTimestamp<UnixTimeStampImpl>>();
factory.register_function<FunctionUnixTimestamp<UnixTimeStampDateImpl<DataTypeDate>>>();
factory.register_function<FunctionUnixTimestamp<UnixTimeStampDateImpl<DataTypeDateV2>>>();
factory.register_function<FunctionUnixTimestamp<UnixTimeStampDateImpl<DataTypeDateTime>>>();
factory.register_function<FunctionUnixTimestamp<UnixTimeStampDateImpl<DataTypeDateTimeV2>>>();
factory.register_function<FunctionUnixTimestamp<UnixTimeStampStrImpl>>();
factory.register_function<FunctionDateOrDateTimeToDate<LastDayImpl, DataTypeDateTime>>();
factory.register_function<FunctionDateOrDateTimeToDate<LastDayImpl, DataTypeDate>>();
factory.register_function<FunctionDateOrDateTimeToDate<LastDayImpl, DataTypeDateV2>>();
factory.register_function<FunctionDateOrDateTimeToDate<LastDayImpl, DataTypeDateTimeV2>>();
factory.register_function<FunctionDateOrDateTimeToDate<MondayImpl, DataTypeDateV2>>();
factory.register_function<FunctionDateOrDateTimeToDate<MondayImpl, DataTypeDateTimeV2>>();
factory.register_function<FunctionDateOrDateTimeToDate<MondayImpl, DataTypeDate>>();
factory.register_function<FunctionDateOrDateTimeToDate<MondayImpl, DataTypeDateTime>>();
factory.register_function<DateTimeToTimestamp<MicroSec>>();
factory.register_function<DateTimeToTimestamp<MilliSec>>();
factory.register_function<DateTimeToTimestamp<Sec>>();
}
} // namespace doris::vectorized