| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <sys/types.h> |
| |
| #include <type_traits> |
| |
| #include "common/status.h" |
| #include "core/data_type/data_type_decimal.h" // IWYU pragma: keep |
| #include "core/data_type/primitive_type.h" |
| #include "core/data_type_serde/data_type_serde.h" |
| #include "core/data_type_serde/datelike_serde_common.hpp" |
| #include "core/types.h" |
| #include "core/value/vdatetime_value.h" |
| #include "exprs/function/cast/cast_base.h" // IWYU pragma: keep |
| #include "util/asan_util.h" |
| #include "util/string_parser.hpp" |
| |
| namespace doris { |
| #include "common/compile_check_begin.h" |
| // NOLINTBEGIN(readability-function-size) |
| // NOLINTBEGIN(readability-function-cognitive-complexity) |
| |
| enum class DataTimeCastEnumType { |
| DATE_TIME, |
| TIMESTAMP_TZ, |
| }; |
| |
| template <DatelikeParseMode ParseMode> |
| [[nodiscard]] inline static bool init_microsecond(int64_t frac_input, uint32_t frac_length, |
| DateV2Value<DateTimeV2ValueType>& val, |
| uint32_t target_scale, CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| if (frac_length > 0) { |
| // align to `target_scale` digits |
| auto in_scale_part = |
| (frac_length > target_scale) |
| ? (uint32_t)(frac_input / common::exp10_i64(frac_length - target_scale)) |
| : (uint32_t)(frac_input * common::exp10_i64(target_scale - frac_length)); |
| |
| if (frac_length > target_scale) { // _scale is up to 6 |
| // round off to at most `_scale` digits |
| auto digit_next = |
| (uint32_t)(frac_input / common::exp10_i64(frac_length - target_scale - 1)) % 10; |
| if (digit_next >= 5) { |
| in_scale_part++; |
| DCHECK(in_scale_part <= 1000000); |
| if (in_scale_part == common::exp10_i32(target_scale)) { |
| // overflow, round up to next second |
| SET_PARAMS_RET_FALSE_IFN(val.date_add_interval<TimeUnit::SECOND>( |
| TimeInterval {TimeUnit::SECOND, 1, false}), |
| "datetime overflow when rounding up to next second"); |
| in_scale_part = 0; |
| } |
| } |
| } |
| val.unchecked_set_time_unit<TimeUnit::MICROSECOND>( |
| in_scale_part * common::exp10_i32(6 - (int)target_scale)); |
| } |
| return true; |
| } |
| |
| /** |
| * For the functions: |
| * function name `strict_mode` or `non_strict_mode`: follow the RULES of which mode |
| * template parameter `IsStrict`: whether it is RUNNING IN strict mode or not. |
| * return value: whether the cast is successful or not. |
| * `params.status`: set error code ONLY IN STRICT MODE. |
| */ |
| struct CastToTimestampTz; |
| |
| struct CastToDatetimeV2 { |
| friend struct CastToTimestampTz; |
| |
| // may be slow |
| template <typename T> |
| static inline bool from_integer(T int_val, DateV2Value<DateTimeV2ValueType>& val, |
| CastParameters& params) { |
| if (params.is_strict) { |
| return from_integer<DatelikeParseMode::STRICT>(int_val, val, params); |
| } else { |
| return from_integer<DatelikeParseMode::NON_STRICT>(int_val, val, params); |
| } |
| } |
| |
| // same behaviour in both strict and non-strict mode |
| template <DatelikeParseMode ParseMode, typename T> |
| static inline bool from_integer(T int_val, DateV2Value<DateTimeV2ValueType>& val, |
| CastParameters& params); |
| |
| // may be slow |
| template <typename T> |
| requires std::is_floating_point_v<T> |
| static inline bool from_float(T float_value, DateV2Value<DateTimeV2ValueType>& val, |
| uint32_t to_scale, CastParameters& params) { |
| if (params.is_strict) { |
| return from_float<DatelikeParseMode::STRICT>(float_value, val, to_scale, params); |
| } else { |
| return from_float<DatelikeParseMode::NON_STRICT>(float_value, val, to_scale, params); |
| } |
| } |
| |
| template <DatelikeParseMode ParseMode, typename T> |
| requires std::is_floating_point_v<T> |
| static inline bool from_float(T float_value, DateV2Value<DateTimeV2ValueType>& val, |
| uint32_t to_scale, CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| DCHECK(IsStrict == params.is_strict); |
| SET_PARAMS_RET_FALSE_IFN(float_value > 0 && !std::isnan(float_value) && |
| !std::isinf(float_value) && |
| float_value < (double)std::numeric_limits<int64_t>::max(), |
| "invalid float value for datetimev2: {}", float_value); |
| |
| auto int_part = static_cast<int64_t>(float_value); |
| if (!from_integer<ParseMode>(int_part, val, params)) { |
| // if IsStrict, error code has been set in from_integer |
| return false; |
| } |
| |
| int ms_part_7 = (float_value - (double)int_part) * common::exp10_i32(7); |
| if (!init_microsecond<ParseMode>(ms_part_7, 7, val, to_scale, params)) { |
| return false; // status set in init_microsecond |
| } |
| return true; |
| } |
| |
| // may be slow |
| template <typename T> |
| static inline bool from_decimal(const T& int_part, const T& frac_part, |
| const int64_t& decimal_scale, |
| DateV2Value<DateTimeV2ValueType>& res, uint32_t to_scale, |
| CastParameters& params) { |
| if (params.is_strict) { |
| return from_decimal<DatelikeParseMode::STRICT>(int_part, frac_part, decimal_scale, res, |
| to_scale, params); |
| } else { |
| return from_decimal<DatelikeParseMode::NON_STRICT>(int_part, frac_part, decimal_scale, |
| res, to_scale, params); |
| } |
| } |
| |
| template <DatelikeParseMode ParseMode, typename T> |
| static inline bool from_decimal(const T& int_part, const T& frac_part, |
| const int64_t& decimal_scale, |
| DateV2Value<DateTimeV2ValueType>& res, uint32_t to_scale, |
| CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| DCHECK(IsStrict == params.is_strict); |
| SET_PARAMS_RET_FALSE_IFN(int_part <= std::numeric_limits<int64_t>::max() && int_part >= 1, |
| "invalid decimal value for datetimev2: {}.{}", int_part, |
| frac_part); |
| |
| if (!from_integer<ParseMode>(int_part, res, params)) { |
| // if IsStrict, error code has been set in from_integer |
| return false; |
| } |
| |
| if (!init_microsecond<ParseMode>((int64_t)frac_part, (uint32_t)decimal_scale, res, to_scale, |
| params)) { |
| return false; // status set in init_microsecond |
| } |
| return true; |
| } |
| |
| // may be slow |
| static inline bool from_string(const StringRef& str, DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, uint32_t to_scale, |
| CastParameters& params) { |
| if (params.is_strict) { |
| return from_string_strict_mode<DatelikeParseMode::STRICT>(str, res, local_time_zone, |
| to_scale, params); |
| } else { |
| return from_string_non_strict_mode(str, res, local_time_zone, to_scale, params); |
| } |
| } |
| |
| // this code follow rules of strict mode, but whether it RUNNING IN strict mode or not depends on the `IsStrict` |
| // parameter. if it's false, we dont set error code for performance and we dont need. |
| template <DatelikeParseMode ParseMode> |
| static inline bool from_string_strict_mode(const StringRef& str, |
| DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| uint32_t to_scale, CastParameters& params) { |
| return from_string_strict_mode_internal<ParseMode, DataTimeCastEnumType::DATE_TIME>( |
| str, res, local_time_zone, to_scale, params); |
| } |
| |
| static inline bool from_string_non_strict_mode(const StringRef& str, |
| DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| uint32_t to_scale, CastParameters& params) { |
| return CastToDatetimeV2::from_string_strict_mode<DatelikeParseMode::NON_STRICT>( |
| str, res, local_time_zone, to_scale, params) || |
| CastToDatetimeV2::from_string_non_strict_mode_internal< |
| DataTimeCastEnumType::DATE_TIME>(str, res, local_time_zone, to_scale, |
| params); |
| } |
| |
| private: |
| template <DatelikeParseMode ParseMode, DataTimeCastEnumType type> |
| static inline bool from_string_strict_mode_internal(const StringRef& str, |
| DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| uint32_t to_scale, CastParameters& params); |
| |
| template <DataTimeCastEnumType type> |
| static inline bool from_string_non_strict_mode_internal(const StringRef& str, |
| DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| uint32_t to_scale, |
| CastParameters& params); |
| }; |
| |
| template <DatelikeParseMode ParseMode, typename T> |
| inline bool CastToDatetimeV2::from_integer(T input, DateV2Value<DateTimeV2ValueType>& val, |
| CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| DCHECK(IsStrict == params.is_strict); |
| // T maybe int128 then bigger than int64_t. so we must check before cast |
| SET_PARAMS_RET_FALSE_IFN(input <= std::numeric_limits<int64_t>::max() && input > 0, |
| "invalid int value for datetimev2: {}", input); |
| auto int_val = static_cast<int64_t>(input); |
| int length = common::count_digits_fast(int_val); |
| |
| if (length == 3 || length == 4) { |
| val.unchecked_set_time_unit<TimeUnit::YEAR>(2000); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>((uint32_t)int_val / 100), |
| "invalid month {}", int_val / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 5) { |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::YEAR>(2000 + (uint32_t)int_val / 10000), |
| "invalid year {}", 2000 + int_val / 10000); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>(int_val % 10000 / 100), |
| "invalid month {}", int_val % 10000 / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 6) { |
| uint32_t year = (uint32_t)int_val / 10000; |
| if (year < 70) { |
| year += 2000; |
| } else { |
| year += 1900; |
| } |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::YEAR>(year), "invalid year {}", year); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>(int_val % 10000 / 100), |
| "invalid month {}", int_val % 10000 / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 8) { |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::YEAR>((uint32_t)int_val / 10000), |
| "invalid year {}", int_val / 10000); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>(int_val % 10000 / 100), |
| "invalid month {}", int_val % 10000 / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 14) { |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::YEAR>(int_val / common::exp10_i64(10)), |
| "invalid year {}", int_val / common::exp10_i64(10)); |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::MONTH>((int_val / common::exp10_i32(8)) % 100), |
| "invalid month {}", (int_val / common::exp10_i32(8)) % 100); |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::DAY>((int_val / common::exp10_i32(6)) % 100), |
| "invalid day {}", (int_val / common::exp10_i32(6)) % 100); |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::HOUR>((int_val / common::exp10_i32(4)) % 100), |
| "invalid hour {}", (int_val / common::exp10_i32(4)) % 100); |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::MINUTE>((int_val / common::exp10_i32(2)) % 100), |
| "invalid minute {}", (int_val / common::exp10_i32(2)) % 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::SECOND>(int_val % 100), |
| "invalid second {}", int_val % 100); |
| } else [[unlikely]] { |
| if constexpr (IsStrict) { |
| params.status = Status::InvalidArgument("invalid digits for datetimev2: {}", int_val); |
| } |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| <datetime> ::= <date> (("T" | " ") <time> <whitespace>* <offset>?)? |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <date> ::= <year> ("-" | "/") <month1> ("-" | "/") <day1> |
| | <year> <month2> <day2> |
| |
| <year> ::= <digit>{2} | <digit>{4} ; 1970 为界 |
| <month1> ::= <digit>{1,2} ; 01–12 |
| <day1> ::= <digit>{1,2} ; 01–28/29/30/31 视月份而定 |
| |
| <month2> ::= <digit>{2} ; 01–12 |
| <day2> ::= <digit>{2} ; 01–28/29/30/31 视月份而定 |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <time> ::= <hour1> (":" <minute1> (":" <second1> <fraction>?)?)? |
| | <hour2> (<minute2> (<second2> <fraction>?)?)? |
| |
| <hour1> ::= <digit>{1,2} ; 00–23 |
| <minute1> ::= <digit>{1,2} ; 00–59 |
| <second1> ::= <digit>{1,2} ; 00–59 |
| |
| <hour2> ::= <digit>{2} ; 00–23 |
| <minute2> ::= <digit>{2} ; 00–59 |
| <second2> ::= <digit>{2} ; 00–59 |
| |
| <fraction> ::= "." <digit>* |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <offset> ::= ( "+" | "-" ) <hour-offset> [ ":"? <minute-offset> ] |
| | <tz-name> |
| |
| <tz-name> ::= <short-tz> | <long-tz> |
| |
| <short-tz> ::= "CST" | "UTC" | "GMT" | "ZULU" | "Z" ; 忽略大小写 |
| <long-tz> ::= <area> "/" <location> ; e.g. America/New_York |
| |
| <hour-offset> ::= <digit>{1,2} ; 0–14 |
| <minute-offset> ::= <digit>{2} ; 00/30/45 |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
| |
| <area> ::= <alpha>+ |
| <location> ::= (<alpha> | "_")+ |
| <alpha> ::= "A" | … | "Z" | "a" | … | "z" |
| <whitespace> ::= " " | "\t" | "\n" | "\r" | "\v" | "\f" |
| */ |
| template <DatelikeParseMode ParseMode, DataTimeCastEnumType type> |
| inline bool CastToDatetimeV2::from_string_strict_mode_internal( |
| const StringRef& str, DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, uint32_t to_scale, CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| const char* ptr = str.data; |
| const char* end = ptr + str.size; |
| AsanPoisonGuard defer(end, 1); |
| |
| uint32_t part[4]; |
| bool has_second = false; |
| |
| // special `date` and `time` part format: 14-length digits string. parse it as YYYYMMDDHHMMSS |
| if (ptr + 13 < end && is_digit_range(ptr, ptr + 14)) { |
| // if the string is all digits, treat it as a date in YYYYMMDD format. |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 4>(ptr, end, part[0])), |
| "failed to consume 4 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 2 digits for month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[2])), |
| "failed to consume 2 digits for day, got {}", |
| std::string {ptr, end}); |
| if (!try_convert_set_zero_date(res, part[0], part[1], part[2])) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(part[0]), "invalid year {}", |
| part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[1]), |
| "invalid month {}", part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[2]), "invalid day {}", |
| part[2]); |
| } |
| |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "failed to consume 2 digits for hour, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 2 digits for minute, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[2])), |
| "failed to consume 2 digits for second, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::HOUR>(part[0]), "invalid hour {}", |
| part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MINUTE>(part[1]), "invalid minute {}", |
| part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::SECOND>(part[2]), "invalid second {}", |
| part[2]); |
| has_second = true; |
| if (ptr == end) { |
| // no fraction or timezone part, just return. |
| goto POST_PROCESS; |
| } |
| goto FRAC; |
| } |
| |
| // date part |
| SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 5), "too short date part, got '{}'", |
| std::string {ptr, end}); |
| if (is_digit_range(ptr, ptr + 5)) { |
| // no delimiter here. |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "failed to consume 2 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 2 digits for year/month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[2])), |
| "failed to consume 2 digits for month/day, got {}", |
| std::string {ptr, end}); |
| if (ptr < end && is_numeric_ascii(*ptr)) { |
| // 4 digits year |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[3])), |
| "failed to consume 2 digits for day, got {}", |
| std::string {ptr, end}); |
| if (!try_convert_set_zero_date(res, part[0] * 100 + part[1], part[2], part[3])) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(part[0] * 100 + part[1]), |
| "invalid year {}", part[0] * 100 + part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[2]), |
| "invalid month {}", part[2]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[3]), |
| "invalid day {}", part[3]); |
| } |
| } else { |
| if (!try_convert_set_zero_date(res, complete_4digit_year(part[0]), part[1], part[2])) { |
| SET_PARAMS_RET_FALSE_IFN( |
| res.set_time_unit<TimeUnit::YEAR>(complete_4digit_year(part[0])), |
| "invalid year {}", part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[1]), |
| "invalid month {}", part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[2]), |
| "invalid day {}", part[2]); |
| } |
| } |
| } else { |
| // has delimiter here. |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "failed to consume 2 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 0), "too short date part, got '{}'", |
| std::string {ptr, end}); |
| if (is_date_sep(*ptr)) { |
| // 2 digits year |
| ++ptr; // consume one bar |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[1])), |
| "failed to consume 1 or 2 digits for month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)), |
| "failed to consume one bar after month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[2])), |
| "failed to consume 1 or 2 digits for day, got {}", |
| std::string {ptr, end}); |
| |
| if (!try_convert_set_zero_date(res, part[0], part[1], part[2])) { |
| SET_PARAMS_RET_FALSE_IFN( |
| res.set_time_unit<TimeUnit::YEAR>(complete_4digit_year(part[0])), |
| "invalid year {}", part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[1]), |
| "invalid month {}", part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[2]), |
| "invalid day {}", part[2]); |
| } |
| } else { |
| // 4 digits year |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 4 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)), |
| "failed to consume one bar after year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[2])), |
| "failed to consume 1 or 2 digits for month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)), |
| "failed to consume one bar after month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[3])), |
| "failed to consume 1 or 2 digits for day, got {}", |
| std::string {ptr, end}); |
| |
| if (!try_convert_set_zero_date(res, part[0] * 100 + part[1], part[2], part[3])) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(part[0] * 100 + part[1]), |
| "invalid year {}", part[0] * 100 + part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[2]), |
| "invalid month {}", part[2]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[3]), |
| "invalid day {}", part[3]); |
| } |
| } |
| } |
| |
| if (ptr == end) { |
| // no time part, just return. |
| res.unchecked_set_time_unit<TimeUnit::HOUR>(0); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>(0); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>(0); |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| goto POST_PROCESS; |
| } |
| |
| SET_PARAMS_RET_FALSE_IFN(consume_one_delimiter(ptr, end), |
| "failed to consume one delimiter after date, got {}", |
| std::string {ptr, end}); |
| |
| // time part. |
| // hour |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[0])), |
| "failed to consume 1 or 2 digits for hour, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::HOUR>(part[0]), "invalid hour {}", |
| part[0]); |
| if (ptr == end) { |
| // no minute part, just return. |
| goto POST_PROCESS; |
| } |
| if (*ptr == ':') { |
| // with hour:minute:second |
| if (consume_one_colon(ptr, end)) { // minute |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[1])), |
| "failed to consume 1 or 2 digits for minute, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MINUTE>(part[1]), |
| "invalid minute {}", part[1]); |
| if (consume_one_colon(ptr, end)) { // second |
| has_second = true; |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[2])), |
| "failed to consume 1 or 2 digits for second, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::SECOND>(part[2]), |
| "invalid second {}", part[2]); |
| } else { |
| res.unchecked_set_time_unit<TimeUnit::SECOND>(0); |
| } |
| } else { |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>(0); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>(0); |
| } |
| } else { |
| // no ':' |
| if (in_bound(ptr, end, 1) && is_digit_range(ptr, ptr + 2)) { |
| part[1] = (ptr[0] - '0') * 10 + ptr[1] - '0'; |
| // has minute |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MINUTE>(part[1]), |
| "invalid minute {}", part[1]); |
| ptr += 2; |
| if (in_bound(ptr, end, 1) && is_digit_range(ptr, ptr + 2)) { |
| part[2] = (ptr[0] - '0') * 10 + ptr[1] - '0'; |
| // has second |
| has_second = true; |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::SECOND>(part[2]), |
| "invalid second {}", part[2]); |
| ptr += 2; |
| } |
| } |
| } |
| |
| FRAC: |
| // fractional part |
| if (has_second && ptr < end && *ptr == '.') { |
| ++ptr; |
| |
| const auto* start = ptr; |
| static_cast<void>(skip_any_digit(ptr, end)); |
| auto length = ptr - start; |
| |
| if (length > 0) { |
| StringParser::ParseResult success; |
| auto frac_literal = StringParser::string_to_uint_greedy_no_overflow<uint32_t>( |
| start, std::min<int>((int)length, to_scale), &success); |
| SET_PARAMS_RET_FALSE_IFN(success == StringParser::PARSE_SUCCESS, |
| "invalid fractional part in datetime string '{}'", |
| std::string {start, ptr}); |
| |
| if (length > to_scale) { // to_scale is up to 6 |
| // round off to at most `to_scale` digits |
| if (*(start + to_scale) - '0' >= 5) { |
| frac_literal++; |
| DCHECK(frac_literal <= 1000000); |
| if (frac_literal == common::exp10_i32(to_scale)) { |
| // overflow, round up to next second |
| SET_PARAMS_RET_FALSE_IFN( |
| res.date_add_interval<TimeUnit::SECOND>( |
| TimeInterval {TimeUnit::SECOND, 1, false}), |
| "datetime overflow when rounding up to next second"); |
| frac_literal = 0; |
| } |
| } |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>( |
| (int32_t)frac_literal * common::exp10_i32(6 - (int)to_scale)); |
| } else { // length <= to_scale |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>( |
| (int32_t)frac_literal * common::exp10_i32(6 - (int)length)); |
| } |
| } |
| } else { |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| } |
| static_cast<void>(skip_any_digit(ptr, end)); |
| |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| |
| // timezone part |
| if (ptr != end) { |
| cctz::time_zone parsed_tz {}; |
| if (*ptr == '+' || *ptr == '-') { |
| // offset |
| const char sign = *ptr; |
| ++ptr; |
| part[1] = 0; |
| |
| uint32_t length = count_digits(ptr, end); |
| // hour |
| if (length == 1 || length == 3) { |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1>(ptr, end, part[0])), |
| "invalid hour offset '{}'", std::string {ptr, end}); |
| } else { |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "invalid hour offset '{}'", std::string {ptr, end}); |
| } |
| SET_PARAMS_RET_FALSE_IFN(part[0] <= 14, "invalid hour offset '{}'", part[0]); |
| if (ptr < end) { |
| if (*ptr == ':') { |
| ++ptr; |
| } |
| // minute |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "invalid minute offset '{}'", std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((part[1] == 0 || part[1] == 30 || part[1] == 45), |
| "invalid minute offset '{}'", part[1]); |
| } |
| SET_PARAMS_RET_FALSE_IFN(part[0] != 14 || part[1] == 0, "invalid timezone offset '{}'", |
| combine_tz_offset(sign, part[0], part[1])); |
| |
| SET_PARAMS_RET_FALSE_IFN(TimezoneUtils::find_cctz_time_zone( |
| combine_tz_offset(sign, part[0], part[1]), parsed_tz), |
| "invalid timezone offset '{}'", |
| combine_tz_offset(sign, part[0], part[1])); |
| } else { |
| // timezone name |
| const auto* start = ptr; |
| // short tzname, or something legal for tzdata. depends on our TimezoneUtils. |
| SET_PARAMS_RET_FALSE_IFN(skip_tz_name_part(ptr, end), "invalid timezone name '{}'", |
| std::string {ptr, end}); |
| |
| SET_PARAMS_RET_FALSE_IFN( |
| TimezoneUtils::find_cctz_time_zone(std::string {start, ptr}, parsed_tz), |
| "invalid timezone name '{}'", std::string {start, ptr}); |
| } |
| // convert tz |
| cctz::civil_second cs {res.year(), res.month(), res.day(), |
| res.hour(), res.minute(), res.second()}; |
| |
| if constexpr (type == DataTimeCastEnumType::DATE_TIME) { |
| // if not timestamptz, the given time is in local_time_zone |
| SET_PARAMS_RET_FALSE_IFN( |
| local_time_zone != nullptr, |
| "local time zone required for datetime string without timezone"); |
| auto given = cctz::convert(cs, parsed_tz); |
| auto local = cctz::convert(given, *local_time_zone); |
| res.unchecked_set_time_unit<TimeUnit::YEAR>((uint32_t)local.year()); |
| res.unchecked_set_time_unit<TimeUnit::MONTH>((uint32_t)local.month()); |
| res.unchecked_set_time_unit<TimeUnit::DAY>((uint32_t)local.day()); |
| res.unchecked_set_time_unit<TimeUnit::HOUR>((uint32_t)local.hour()); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>((uint32_t)local.minute()); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>((uint32_t)local.second()); |
| } else { |
| // if timestamptz, the given time is in UTC |
| auto given = cctz::convert(cs, parsed_tz); |
| auto utc = cctz::convert(given, cctz::utc_time_zone()); |
| res.unchecked_set_time_unit<TimeUnit::YEAR>((uint32_t)utc.year()); |
| res.unchecked_set_time_unit<TimeUnit::MONTH>((uint32_t)utc.month()); |
| res.unchecked_set_time_unit<TimeUnit::DAY>((uint32_t)utc.day()); |
| res.unchecked_set_time_unit<TimeUnit::HOUR>((uint32_t)utc.hour()); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>((uint32_t)utc.minute()); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>((uint32_t)utc.second()); |
| } |
| SET_PARAMS_RET_FALSE_IFN(res.year() <= 9999, "datetime year {} out of range [0, 9999]", |
| res.year()); |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr == end, |
| "invalid datetime string '{}', extra characters after timezone", |
| std::string {ptr, end}); |
| |
| return true; |
| } |
| |
| POST_PROCESS: |
| if constexpr (type == DataTimeCastEnumType::TIMESTAMP_TZ) { |
| // use local time zone to convert to UTC |
| SET_PARAMS_RET_FALSE_IFN(local_time_zone != nullptr, |
| "local time zone required for datetime string without timezone"); |
| cctz::civil_second cs {res.year(), res.month(), res.day(), |
| res.hour(), res.minute(), res.second()}; |
| |
| auto local = cctz::convert(cs, *local_time_zone); |
| auto utc = cctz::convert(local, cctz::utc_time_zone()); |
| res.unchecked_set_time_unit<TimeUnit::YEAR>((uint32_t)utc.year()); |
| res.unchecked_set_time_unit<TimeUnit::MONTH>((uint32_t)utc.month()); |
| res.unchecked_set_time_unit<TimeUnit::DAY>((uint32_t)utc.day()); |
| res.unchecked_set_time_unit<TimeUnit::HOUR>((uint32_t)utc.hour()); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>((uint32_t)utc.minute()); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>((uint32_t)utc.second()); |
| SET_PARAMS_RET_FALSE_IFN(res.year() <= 9999, "datetime year {} out of range [0, 9999]", |
| res.year()); |
| } |
| return true; |
| } |
| |
| /** |
| <datetime> ::= <whitespace>* <date> (<delimiter> <time> <whitespace>* <timezone>?)? <whitespace>* |
| |
| <date> ::= <year> <separator> <month> <separator> <day> |
| <time> ::= <hour> <separator> <minute> <separator> <second> [<fraction>] |
| |
| <year> ::= <digit>{4} | <digit>{2} |
| <month> ::= <digit>{1,2} |
| <day> ::= <digit>{1,2} |
| <hour> ::= <digit>{1,2} |
| <minute> ::= <digit>{1,2} |
| <second> ::= <digit>{1,2} |
| |
| <separator> ::= ^(<digit> | <alpha>) |
| <delimiter> ::= " " | "T" |
| |
| <fraction> ::= "." <digit>* |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <offset> ::= ( "+" | "-" ) <hour-offset> [ ":"? <minute-offset> ] |
| | <tz-name> |
| |
| <tz-name> ::= <short-tz> | <long-tz> |
| |
| <short-tz> ::= "CST" | "UTC" | "GMT" | "ZULU" | "Z" ; 忽略大小写 |
| <long-tz> ::= <area> "/" <location> ; e.g. America/New_York |
| |
| <hour-offset> ::= <digit>{1,2} ; 0–14 |
| <minute-offset> ::= <digit>{2} ; 00/30/45 |
| |
| <area> ::= <alpha>+ |
| <location> ::= (<alpha> | "_")+ |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <whitespace> ::= " " | "\t" | "\n" | "\r" | "\v" | "\f" |
| |
| <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
| |
| <alpha> ::= "A" | … | "Z" | "a" | … | "z" |
| */ |
| |
| template <DataTimeCastEnumType type> |
| inline bool CastToDatetimeV2::from_string_non_strict_mode_internal( |
| const StringRef& str, DateV2Value<DateTimeV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, uint32_t to_scale, CastParameters& params) { |
| constexpr bool IsStrict = false; |
| const char* ptr = str.data; |
| const char* end = ptr + str.size; |
| AsanPoisonGuard defer(end, 1); |
| |
| // skip leading whitespace |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr != end, "empty datetime string"); |
| |
| // date part |
| uint32_t year, month, day; |
| |
| // read year |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, year))); |
| if (is_digit_range(ptr, ptr + 1)) { |
| // continue by digit, it must be a 4-digit year |
| uint32_t year2; |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, year2))); |
| year = year * 100 + year2; |
| } else { |
| // otherwise, it must be a 2-digit year |
| if (year < 100) { |
| // Convert 2-digit year based on 1970 boundary |
| year += (year >= 70) ? 1900 : 2000; |
| } |
| } |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // read month |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, month))); |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // read day |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, day))); |
| |
| if (!try_convert_set_zero_date(res, year, month, day)) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(year), "invalid year {}", year); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(month), "invalid month {}", |
| month); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(day), "invalid day {}", day); |
| } |
| |
| if (is_space_range(ptr, end)) { |
| // no time part, just return. |
| res.unchecked_set_time_unit<TimeUnit::HOUR>(0); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>(0); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>(0); |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| goto POST_PROCESS; |
| } |
| |
| PROPAGATE_FALSE(consume_one_delimiter(ptr, end)); |
| |
| // time part |
| uint32_t hour, minute, second; |
| |
| // hour |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, hour))); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::HOUR>(hour), "invalid hour {}", hour); |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // minute |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, minute))); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MINUTE>(minute), "invalid minute {}", |
| minute); |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // second |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, second))); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::SECOND>(second), "invalid second {}", |
| second); |
| |
| // fractional part |
| if (ptr < end && *ptr == '.') { |
| ++ptr; |
| |
| const auto* start = ptr; |
| static_cast<void>(skip_any_digit(ptr, end)); |
| auto length = ptr - start; |
| |
| if (length > 0) { |
| StringParser::ParseResult success; |
| auto frac_literal = StringParser::string_to_uint_greedy_no_overflow<uint32_t>( |
| start, std::min<int>((int)length, to_scale), &success); |
| SET_PARAMS_RET_FALSE_IFN(success == StringParser::PARSE_SUCCESS, |
| "invalid fractional part in datetime string '{}'", |
| std::string {start, ptr}); |
| |
| if (length > to_scale) { // to_scale is up to 6 |
| // round off to at most `to_scale` digits |
| if (*(start + to_scale) - '0' >= 5) { |
| frac_literal++; |
| DCHECK(frac_literal <= 1000000); |
| if (frac_literal == common::exp10_i32(to_scale)) { |
| // overflow, round up to next second |
| SET_PARAMS_RET_FALSE_IFN( |
| res.date_add_interval<TimeUnit::SECOND>( |
| TimeInterval {TimeUnit::SECOND, 1, false}), |
| "datetime overflow when rounding up to next second"); |
| frac_literal = 0; |
| } |
| } |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>( |
| (int32_t)frac_literal * common::exp10_i32(6 - (int)to_scale)); |
| } else { // length <= to_scale |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>( |
| (int32_t)frac_literal * common::exp10_i32(6 - (int)length)); |
| } |
| } else { |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| } |
| } else { |
| res.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| } |
| |
| // skip any whitespace after time |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| |
| // timezone part (if any) |
| if (ptr != end) { |
| cctz::time_zone parsed_tz {}; |
| if (*ptr == '+' || *ptr == '-') { |
| // offset |
| const char sign = *ptr; |
| ++ptr; |
| uint32_t hour_offset, minute_offset = 0; |
| |
| uint32_t length = count_digits(ptr, end); |
| // hour |
| if (length == 1 || length == 3) { |
| PROPAGATE_FALSE((consume_digit<UInt32, 1>(ptr, end, hour_offset))); |
| } else { |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, hour_offset))); |
| } |
| SET_PARAMS_RET_FALSE_IFN(hour_offset <= 14, "invalid hour offset '{}'", hour_offset); |
| if (ptr < end) { |
| if (*ptr == ':') { |
| ++ptr; |
| } |
| // minute |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, minute_offset))); |
| SET_PARAMS_RET_FALSE_IFN( |
| (minute_offset == 0 || minute_offset == 30 || minute_offset == 45), |
| "invalid minute offset {}", minute_offset); |
| } |
| SET_PARAMS_RET_FALSE_IFN(hour_offset != 14 || minute_offset == 0, |
| "invalid timezone offset '{}'", |
| combine_tz_offset(sign, hour_offset, minute_offset)); |
| |
| SET_PARAMS_RET_FALSE_IFN( |
| TimezoneUtils::find_cctz_time_zone( |
| combine_tz_offset(sign, hour_offset, minute_offset), parsed_tz), |
| "invalid timezone offset '{}'", |
| combine_tz_offset(sign, hour_offset, minute_offset)); |
| } else { |
| // timezone name |
| const auto* start = ptr; |
| // short tzname, or something legal for tzdata. depends on our TimezoneUtils. |
| PROPAGATE_FALSE(skip_tz_name_part(ptr, end)); |
| |
| SET_PARAMS_RET_FALSE_IFN( |
| TimezoneUtils::find_cctz_time_zone(std::string {start, ptr}, parsed_tz), |
| "invalid timezone name '{}'", std::string {start, ptr}); |
| } |
| |
| // convert tz |
| cctz::civil_second cs {res.year(), res.month(), res.day(), |
| res.hour(), res.minute(), res.second()}; |
| |
| if constexpr (type == DataTimeCastEnumType::DATE_TIME) { |
| // if not timestamptz, the given time is in local_time_zone |
| SET_PARAMS_RET_FALSE_IFN( |
| local_time_zone != nullptr, |
| "local time zone required for datetime string without timezone"); |
| auto given = cctz::convert(cs, parsed_tz); |
| auto local = cctz::convert(given, *local_time_zone); |
| res.unchecked_set_time_unit<TimeUnit::YEAR>((uint32_t)local.year()); |
| res.unchecked_set_time_unit<TimeUnit::MONTH>((uint32_t)local.month()); |
| res.unchecked_set_time_unit<TimeUnit::DAY>((uint32_t)local.day()); |
| res.unchecked_set_time_unit<TimeUnit::HOUR>((uint32_t)local.hour()); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>((uint32_t)local.minute()); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>((uint32_t)local.second()); |
| } else { |
| // if timestamptz, the given time is in UTC |
| auto given = cctz::convert(cs, parsed_tz); |
| auto utc = cctz::convert(given, cctz::utc_time_zone()); |
| res.unchecked_set_time_unit<TimeUnit::YEAR>((uint32_t)utc.year()); |
| res.unchecked_set_time_unit<TimeUnit::MONTH>((uint32_t)utc.month()); |
| res.unchecked_set_time_unit<TimeUnit::DAY>((uint32_t)utc.day()); |
| res.unchecked_set_time_unit<TimeUnit::HOUR>((uint32_t)utc.hour()); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>((uint32_t)utc.minute()); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>((uint32_t)utc.second()); |
| } |
| SET_PARAMS_RET_FALSE_IFN(res.year() <= 9999, "datetime year {} out of range [0, 9999]", |
| res.year()); |
| |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr == end, |
| "invalid datetime string '{}', extra characters after parsing", |
| std::string {ptr, end}); |
| |
| return true; |
| } |
| |
| // skip trailing whitespace |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr == end, |
| "invalid datetime string '{}', extra characters after parsing", |
| std::string {ptr, end}); |
| |
| POST_PROCESS: |
| if constexpr (type == DataTimeCastEnumType::TIMESTAMP_TZ) { |
| // use local time zone to convert to UTC |
| SET_PARAMS_RET_FALSE_IFN(local_time_zone != nullptr, |
| "local time zone required for datetime string without timezone"); |
| cctz::civil_second cs {res.year(), res.month(), res.day(), |
| res.hour(), res.minute(), res.second()}; |
| |
| auto local = cctz::convert(cs, *local_time_zone); |
| auto utc = cctz::convert(local, cctz::utc_time_zone()); |
| res.unchecked_set_time_unit<TimeUnit::YEAR>((uint32_t)utc.year()); |
| res.unchecked_set_time_unit<TimeUnit::MONTH>((uint32_t)utc.month()); |
| res.unchecked_set_time_unit<TimeUnit::DAY>((uint32_t)utc.day()); |
| res.unchecked_set_time_unit<TimeUnit::HOUR>((uint32_t)utc.hour()); |
| res.unchecked_set_time_unit<TimeUnit::MINUTE>((uint32_t)utc.minute()); |
| res.unchecked_set_time_unit<TimeUnit::SECOND>((uint32_t)utc.second()); |
| SET_PARAMS_RET_FALSE_IFN(res.year() <= 9999, "datetime year {} out of range [0, 9999]", |
| res.year()); |
| } |
| |
| return true; |
| } |
| |
| // NOLINTEND(readability-function-cognitive-complexity) |
| // NOLINTEND(readability-function-size) |
| |
| // return true if success, false if overflow |
| inline bool transform_date_scale(UInt32 to_scale, UInt32 from_scale, |
| PrimitiveTypeTraits<TYPE_DATETIMEV2>::CppType& to_value, |
| const PrimitiveTypeTraits<TYPE_DATETIMEV2>::CppType& from_value) { |
| if (to_scale >= from_scale) { |
| // nothing to do, just copy |
| to_value = from_value; |
| } else { |
| DateV2Value<DateTimeV2ValueType> dtmv2 = |
| DateV2Value<DateTimeV2ValueType>(from_value.to_date_int_val()); |
| // e.g. scale reduce to 4, means we need to round the last 2 digits |
| // 999956: 56 > 100/2, then round up to 1000000 |
| uint32_t microseconds = dtmv2.microsecond(); |
| DCHECK(to_scale <= 6) << "to_scale should be in range [0, 6], but got " << to_scale; |
| auto divisor = (uint32_t)common::exp10_i64(6 - to_scale); |
| uint32_t remainder = microseconds % divisor; |
| |
| if (remainder >= divisor / 2) { // need to round up |
| // do rounding up |
| uint32_t rounded_microseconds = ((microseconds / divisor) + 1) * divisor; |
| // need carry on |
| if (rounded_microseconds >= 1000000) { |
| DCHECK(rounded_microseconds == 1000000); |
| dtmv2.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| |
| bool overflow = !dtmv2.date_add_interval<TimeUnit::SECOND>( |
| TimeInterval {TimeUnit::SECOND, 1, false}); |
| if (overflow) { |
| return false; |
| } |
| } else { |
| static_cast<void>(dtmv2.set_time_unit<TimeUnit::MICROSECOND>(rounded_microseconds)); |
| } |
| } else { |
| // Round down (truncate) as before |
| static_cast<void>( |
| dtmv2.set_time_unit<TimeUnit::MICROSECOND>((microseconds / divisor) * divisor)); |
| } |
| to_value = dtmv2; |
| } |
| return true; |
| } |
| |
| inline bool transform_date_scale(UInt32 to_scale, UInt32 from_scale, |
| PrimitiveTypeTraits<TYPE_TIMESTAMPTZ>::CppType& to_value, |
| const PrimitiveTypeTraits<TYPE_TIMESTAMPTZ>::CppType& from_value) { |
| if (to_scale >= from_scale) { |
| // nothing to do, just copy |
| to_value = from_value; |
| } else { |
| DateV2Value<DateTimeV2ValueType> dtmv2 = |
| DateV2Value<DateTimeV2ValueType>(from_value.to_date_int_val()); |
| // e.g. scale reduce to 4, means we need to round the last 2 digits |
| // 999956: 56 > 100/2, then round up to 1000000 |
| uint32_t microseconds = dtmv2.microsecond(); |
| DCHECK(to_scale <= 6) << "to_scale should be in range [0, 6], but got " << to_scale; |
| auto divisor = (uint32_t)common::exp10_i64(6 - to_scale); |
| uint32_t remainder = microseconds % divisor; |
| |
| if (remainder >= divisor / 2) { // need to round up |
| // do rounding up |
| uint32_t rounded_microseconds = ((microseconds / divisor) + 1) * divisor; |
| // need carry on |
| if (rounded_microseconds >= 1000000) { |
| DCHECK(rounded_microseconds == 1000000); |
| dtmv2.unchecked_set_time_unit<TimeUnit::MICROSECOND>(0); |
| |
| bool overflow = !dtmv2.date_add_interval<TimeUnit::SECOND>( |
| TimeInterval {TimeUnit::SECOND, 1, false}); |
| if (overflow) { |
| return false; |
| } |
| } else { |
| static_cast<void>(dtmv2.set_time_unit<TimeUnit::MICROSECOND>(rounded_microseconds)); |
| } |
| } else { |
| // Round down (truncate) as before |
| static_cast<void>( |
| dtmv2.set_time_unit<TimeUnit::MICROSECOND>((microseconds / divisor) * divisor)); |
| } |
| to_value = TimestampTzValue(dtmv2.to_date_int_val()); |
| } |
| return true; |
| } |
| |
| #include "common/compile_check_end.h" |
| } // namespace doris |