| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <sys/types.h> |
| |
| #include <type_traits> |
| |
| #include "common/status.h" |
| #include "core/data_type/data_type_decimal.h" // IWYU pragma: keep |
| #include "core/data_type/primitive_type.h" |
| #include "core/data_type_serde/data_type_serde.h" |
| #include "core/data_type_serde/datelike_serde_common.hpp" |
| #include "core/types.h" |
| #include "core/value/vdatetime_value.h" |
| #include "exprs/function/cast/cast_base.h" // IWYU pragma: keep |
| #include "util/asan_util.h" |
| #include "util/string_parser.hpp" |
| |
| namespace doris { |
| #include "common/compile_check_begin.h" |
| // NOLINTBEGIN(readability-function-size) |
| // NOLINTBEGIN(readability-function-cognitive-complexity) |
| /** |
| * For the functions: |
| * function name `strict_mode` or `non_strict_mode`: follow the RULES of which mode |
| * template parameter `IsStrict`: whether it is RUNNING IN strict mode or not. |
| * return value: whether the cast is successful or not. |
| * `params.status`: set error code ONLY IN STRICT MODE. |
| */ |
| struct CastToDateV2 { |
| // may be slow |
| template <typename T> |
| static inline bool from_integer(T int_val, DateV2Value<DateV2ValueType>& val, |
| CastParameters& params) { |
| if (params.is_strict) { |
| return from_integer<DatelikeParseMode::STRICT>(int_val, val, params); |
| } else { |
| return from_integer<DatelikeParseMode::NON_STRICT>(int_val, val, params); |
| } |
| } |
| |
| // same behaviour in both strict and non-strict mode |
| template <DatelikeParseMode ParseMode, typename T> |
| static inline bool from_integer(T int_val, DateV2Value<DateV2ValueType>& val, |
| CastParameters& params); |
| |
| // may be slow |
| template <typename T> |
| requires std::is_floating_point_v<T> |
| static inline bool from_float(T float_value, DateV2Value<DateV2ValueType>& val, |
| CastParameters& params) { |
| if (params.is_strict) { |
| return from_float<DatelikeParseMode::STRICT>(float_value, val, params); |
| } else { |
| return from_float<DatelikeParseMode::NON_STRICT>(float_value, val, params); |
| } |
| } |
| |
| template <DatelikeParseMode ParseMode, typename T> |
| requires std::is_floating_point_v<T> |
| static inline bool from_float(T float_value, DateV2Value<DateV2ValueType>& val, |
| CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| DCHECK(IsStrict == params.is_strict); |
| SET_PARAMS_RET_FALSE_IFN(float_value > 0 && !std::isnan(float_value) && |
| !std::isinf(float_value) && |
| float_value < (double)std::numeric_limits<int64_t>::max(), |
| "invalid float value for datev2: {}", float_value); |
| |
| auto int_part = static_cast<int64_t>(float_value); |
| if (!from_integer<ParseMode>(int_part, val, params)) { |
| // if IsStrict, error code has been set in from_integer |
| return false; |
| } |
| return true; |
| } |
| |
| // may be slow |
| template <typename T> |
| static inline bool from_decimal(const T& int_part, const int64_t& decimal_scale, |
| DateV2Value<DateV2ValueType>& res, CastParameters& params) { |
| if (params.is_strict) { |
| return from_decimal<DatelikeParseMode::STRICT>(int_part, decimal_scale, res, params); |
| } else { |
| return from_decimal<DatelikeParseMode::NON_STRICT>(int_part, decimal_scale, res, |
| params); |
| } |
| } |
| |
| template <DatelikeParseMode ParseMode, typename T> |
| static inline bool from_decimal(const T& int_part, const int64_t& decimal_scale, |
| DateV2Value<DateV2ValueType>& res, CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| DCHECK(IsStrict == params.is_strict); |
| SET_PARAMS_RET_FALSE_IFN(int_part <= std::numeric_limits<int64_t>::max() && int_part >= 1, |
| "invalid decimal value for datev2: {}.xxx", int_part); |
| |
| if (!from_integer<ParseMode>(int_part, res, params)) { |
| // if IsStrict, error code has been set in from_integer |
| return false; |
| } |
| return true; |
| } |
| |
| // may be slow |
| static inline bool from_string(const StringRef& str, DateV2Value<DateV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, CastParameters& params) { |
| if (params.is_strict) { |
| return from_string_strict_mode<DatelikeParseMode::STRICT>(str, res, local_time_zone, |
| params); |
| } else { |
| return from_string_non_strict_mode(str, res, local_time_zone, params); |
| } |
| } |
| |
| // this code follow rules of strict mode, but whether it RUNNING IN strict mode or not depends on the `IsStrict` |
| // parameter. if it's false, we dont set error code for performance and we dont need. |
| template <DatelikeParseMode ParseMode> |
| static inline bool from_string_strict_mode(const StringRef& str, |
| DateV2Value<DateV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| CastParameters& params); |
| |
| static inline bool from_string_non_strict_mode(const StringRef& str, |
| DateV2Value<DateV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| CastParameters& params) { |
| return CastToDateV2::from_string_strict_mode<DatelikeParseMode::NON_STRICT>( |
| str, res, local_time_zone, params) || |
| CastToDateV2::from_string_non_strict_mode_impl(str, res, local_time_zone, params); |
| } |
| |
| static inline bool from_string_non_strict_mode_impl(const StringRef& str, |
| DateV2Value<DateV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| CastParameters& params); |
| }; |
| |
| template <DatelikeParseMode ParseMode, typename T> |
| inline bool CastToDateV2::from_integer(T input, DateV2Value<DateV2ValueType>& val, |
| CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| DCHECK(IsStrict == params.is_strict); |
| // T maybe int128 then bigger than int64_t. so we must check before cast |
| SET_PARAMS_RET_FALSE_IFN(input <= std::numeric_limits<int64_t>::max() && input > 0, |
| "invalid int value for datev2: {}", input); |
| auto int_val = static_cast<int64_t>(input); |
| int length = common::count_digits_fast(int_val); |
| |
| if (length == 3 || length == 4) { |
| val.unchecked_set_time_unit<TimeUnit::YEAR>(2000); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>((uint32_t)int_val / 100), |
| "invalid month {}", int_val / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 5) { |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::YEAR>(2000 + (uint32_t)int_val / 10000), |
| "invalid year {}", 2000 + int_val / 10000); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>(int_val % 10000 / 100), |
| "invalid month {}", int_val % 10000 / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 6) { |
| uint32_t year = (uint32_t)int_val / 10000; |
| if (year < 70) { |
| year += 2000; |
| } else { |
| year += 1900; |
| } |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::YEAR>(year), "invalid year {}", year); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>(int_val % 10000 / 100), |
| "invalid month {}", int_val % 10000 / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 8) { |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::YEAR>((uint32_t)int_val / 10000), |
| "invalid year {}", int_val / 10000); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::MONTH>(int_val % 10000 / 100), |
| "invalid month {}", int_val % 10000 / 100); |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::DAY>(int_val % 100), "invalid day {}", |
| int_val % 100); |
| } else if (length == 14) { |
| SET_PARAMS_RET_FALSE_IFN(val.set_time_unit<TimeUnit::YEAR>(int_val / common::exp10_i64(10)), |
| "invalid year {}", int_val / common::exp10_i64(10)); |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::MONTH>((int_val / common::exp10_i32(8)) % 100), |
| "invalid month {}", (int_val / common::exp10_i32(8)) % 100); |
| SET_PARAMS_RET_FALSE_IFN( |
| val.set_time_unit<TimeUnit::DAY>((int_val / common::exp10_i32(6)) % 100), |
| "invalid day {}", (int_val / common::exp10_i32(6)) % 100); |
| } else [[unlikely]] { |
| if constexpr (IsStrict) { |
| params.status = Status::InvalidArgument("invalid digits for datev2: {}", int_val); |
| } |
| return false; |
| } |
| return true; |
| } |
| |
| /** |
| <datetime> ::= <date> (("T" | " ") <time> <whitespace>* <offset>?)? |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <date> ::= <year> ("-" | "/") <month1> ("-" | "/") <day1> |
| | <year> <month2> <day2> |
| |
| <year> ::= <digit>{2} | <digit>{4} ; 1970 为界 |
| <month1> ::= <digit>{1,2} ; 01–12 |
| <day1> ::= <digit>{1,2} ; 01–28/29/30/31 视月份而定 |
| |
| <month2> ::= <digit>{2} ; 01–12 |
| <day2> ::= <digit>{2} ; 01–28/29/30/31 视月份而定 |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <time> ::= <hour1> (":" <minute1> (":" <second1> <fraction>?)?)? |
| | <hour2> (<minute2> (<second2> <fraction>?)?)? |
| |
| <hour1> ::= <digit>{1,2} ; 00–23 |
| <minute1> ::= <digit>{1,2} ; 00–59 |
| <second1> ::= <digit>{1,2} ; 00–59 |
| |
| <hour2> ::= <digit>{2} ; 00–23 |
| <minute2> ::= <digit>{2} ; 00–59 |
| <second2> ::= <digit>{2} ; 00–59 |
| |
| <fraction> ::= "." <digit>* |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <offset> ::= ( "+" | "-" ) <hour-offset> [ ":"? <minute-offset> ] |
| | <tz-name> |
| |
| <tz-name> ::= <short-tz> | <long-tz> |
| |
| <short-tz> ::= "CST" | "UTC" | "GMT" | "ZULU" | "Z" ; 忽略大小写 |
| <long-tz> ::= <area> "/" <location> ; e.g. America/New_York |
| |
| <hour-offset> ::= <digit>{1,2} ; 0–14 |
| <minute-offset> ::= <digit>{2} ; 00/30/45 |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
| |
| <area> ::= <alpha>+ |
| <location> ::= (<alpha> | "_")+ |
| <alpha> ::= "A" | … | "Z" | "a" | … | "z" |
| <whitespace> ::= " " | "\t" | "\n" | "\r" | "\v" | "\f" |
| */ |
| template <DatelikeParseMode ParseMode> |
| inline bool CastToDateV2::from_string_strict_mode(const StringRef& str, |
| DateV2Value<DateV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| CastParameters& params) { |
| constexpr bool IsStrict = is_datelike_parse_strict(ParseMode); |
| const char* ptr = str.data; |
| const char* end = ptr + str.size; |
| AsanPoisonGuard defer(end, 1); |
| |
| uint32_t part[4]; |
| bool has_second = false; |
| |
| // special `date` and `time` part format: 14-length digits string. parse it as YYYYMMDDHHMMSS |
| if (ptr + 13 < end && is_digit_range(ptr, ptr + 14)) { |
| // if the string is all digits, treat it as a date in YYYYMMDD format. |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 4>(ptr, end, part[0])), |
| "failed to consume 4 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 2 digits for month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[2])), |
| "failed to consume 2 digits for day, got {}", |
| std::string {ptr, end}); |
| if (!try_convert_set_zero_date(res, part[0], part[1], part[2])) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(part[0]), "invalid year {}", |
| part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[1]), |
| "invalid month {}", part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[2]), "invalid day {}", |
| part[2]); |
| } |
| |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "failed to consume 2 digits for hour, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 2 digits for minute, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[2])), |
| "failed to consume 2 digits for second, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::HOUR>(part[0]), "invalid hour {}", |
| part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::MINUTE>(part[1]), "invalid minute {}", |
| part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::SECOND>(part[2]), "invalid second {}", |
| part[2]); |
| has_second = true; |
| if (ptr == end) { |
| // no fraction or timezone part, just return. |
| return true; |
| } |
| goto FRAC; |
| } |
| |
| // date part |
| SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 5), "too short date part, got '{}'", |
| std::string {ptr, end}); |
| if (is_digit_range(ptr, ptr + 5)) { |
| // no delimiter here. |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "failed to consume 2 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 2 digits for year/month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[2])), |
| "failed to consume 2 digits for month/day, got {}", |
| std::string {ptr, end}); |
| if (ptr < end && is_numeric_ascii(*ptr)) { |
| // 4 digits year |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[3])), |
| "failed to consume 2 digits for day, got {}", |
| std::string {ptr, end}); |
| if (!try_convert_set_zero_date(res, part[0] * 100 + part[1], part[2], part[3])) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(part[0] * 100 + part[1]), |
| "invalid year {}", part[0] * 100 + part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[2]), |
| "invalid month {}", part[2]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[3]), |
| "invalid day {}", part[3]); |
| } |
| } else { |
| if (!try_convert_set_zero_date(res, complete_4digit_year(part[0]), part[1], part[2])) { |
| SET_PARAMS_RET_FALSE_IFN( |
| res.set_time_unit<TimeUnit::YEAR>(complete_4digit_year(part[0])), |
| "invalid year {}", part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[1]), |
| "invalid month {}", part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[2]), |
| "invalid day {}", part[2]); |
| } |
| } |
| } else { |
| // has delimiter here. |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "failed to consume 2 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(in_bound(ptr, end, 0), "too short date part, got '{}'", |
| std::string {ptr, end}); |
| if (is_date_sep(*ptr)) { |
| // 2 digits year |
| ++ptr; // consume one bar |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[1])), |
| "failed to consume 1 or 2 digits for month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)), |
| "failed to consume one bar after month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[2])), |
| "failed to consume 1 or 2 digits for day, got {}", |
| std::string {ptr, end}); |
| |
| if (!try_convert_set_zero_date(res, part[0], part[1], part[2])) { |
| SET_PARAMS_RET_FALSE_IFN( |
| res.set_time_unit<TimeUnit::YEAR>(complete_4digit_year(part[0])), |
| "invalid year {}", part[0]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[1]), |
| "invalid month {}", part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[2]), |
| "invalid day {}", part[2]); |
| } |
| } else { |
| // 4 digits year |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "failed to consume 4 digits for year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)), |
| "failed to consume one bar after year, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[2])), |
| "failed to consume 1 or 2 digits for month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_one_date_sep(ptr, end)), |
| "failed to consume one bar after month, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[3])), |
| "failed to consume 1 or 2 digits for day, got {}", |
| std::string {ptr, end}); |
| |
| if (!try_convert_set_zero_date(res, part[0] * 100 + part[1], part[2], part[3])) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(part[0] * 100 + part[1]), |
| "invalid year {}", part[0] * 100 + part[1]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(part[2]), |
| "invalid month {}", part[2]); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(part[3]), |
| "invalid day {}", part[3]); |
| } |
| } |
| } |
| |
| if (ptr == end) { |
| // no time part, just return. |
| return true; |
| } |
| |
| SET_PARAMS_RET_FALSE_IFN(consume_one_delimiter(ptr, end), |
| "failed to consume one delimiter after date, got {}", |
| std::string {ptr, end}); |
| |
| // time part. |
| // hour |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[0])), |
| "failed to consume 1 or 2 digits for hour, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::HOUR>(part[0]), "invalid hour {}", |
| part[0]); |
| if (ptr == end) { |
| // no minute part, just return. |
| return true; |
| } |
| if (*ptr == ':') { |
| // with hour:minute:second |
| if (consume_one_colon(ptr, end)) { // minute |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[1])), |
| "failed to consume 1 or 2 digits for minute, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::MINUTE>(part[1]), |
| "invalid minute {}", part[1]); |
| if (consume_one_colon(ptr, end)) { // second |
| has_second = true; |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1, 2>(ptr, end, part[2])), |
| "failed to consume 1 or 2 digits for second, got {}", |
| std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::SECOND>(part[2]), |
| "invalid second {}", part[2]); |
| } |
| } |
| } else { |
| // no ':' |
| if (in_bound(ptr, end, 1) && is_digit_range(ptr, ptr + 2)) { |
| part[1] = (ptr[0] - '0') * 10 + ptr[1] - '0'; |
| // has minute |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::MINUTE>(part[1]), |
| "invalid minute {}", part[1]); |
| ptr += 2; |
| if (in_bound(ptr, end, 1) && is_digit_range(ptr, ptr + 2)) { |
| part[2] = (ptr[0] - '0') * 10 + ptr[1] - '0'; |
| // has second |
| has_second = true; |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::SECOND>(part[2]), |
| "invalid second {}", part[2]); |
| ptr += 2; |
| } |
| } |
| } |
| |
| FRAC: |
| // fractional part |
| if (has_second && ptr < end && *ptr == '.') { |
| ++ptr; |
| static_cast<void>(skip_any_digit(ptr, end)); |
| } |
| static_cast<void>(skip_any_digit(ptr, end)); |
| |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| |
| // timezone part |
| if (ptr != end) { |
| cctz::time_zone parsed_tz {}; |
| if (*ptr == '+' || *ptr == '-') { |
| // offset |
| const char sign = *ptr; |
| ++ptr; |
| part[1] = 0; |
| |
| uint32_t length = count_digits(ptr, end); |
| // hour |
| if (length == 1 || length == 3) { |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 1>(ptr, end, part[0])), |
| "invalid hour offset '{}'", std::string {ptr, end}); |
| } else { |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[0])), |
| "invalid hour offset '{}'", std::string {ptr, end}); |
| } |
| SET_PARAMS_RET_FALSE_IFN(part[0] <= 14, "invalid hour offset '{}'", part[0]); |
| if (ptr < end) { |
| if (*ptr == ':') { |
| ++ptr; |
| } |
| // minute |
| SET_PARAMS_RET_FALSE_IFN((consume_digit<UInt32, 2>(ptr, end, part[1])), |
| "invalid minute offset '{}'", std::string {ptr, end}); |
| SET_PARAMS_RET_FALSE_IFN((part[1] == 0 || part[1] == 30 || part[1] == 45), |
| "invalid minute offset '{}'", part[1]); |
| } |
| SET_PARAMS_RET_FALSE_IFN(part[0] != 14 || part[1] == 0, "invalid timezone offset '{}'", |
| combine_tz_offset(sign, part[0], part[1])); |
| |
| SET_PARAMS_RET_FALSE_IFN(TimezoneUtils::find_cctz_time_zone( |
| combine_tz_offset(sign, part[0], part[1]), parsed_tz), |
| "invalid timezone offset '{}'", |
| combine_tz_offset(sign, part[0], part[1])); |
| } else { |
| // timezone name |
| const auto* start = ptr; |
| // short tzname, or something legal for tzdata. depends on our TimezoneUtils. |
| SET_PARAMS_RET_FALSE_IFN(skip_tz_name_part(ptr, end), "invalid timezone name '{}'", |
| std::string {start, ptr}); |
| |
| SET_PARAMS_RET_FALSE_IFN( |
| TimezoneUtils::find_cctz_time_zone(std::string {start, ptr}, parsed_tz), |
| "invalid timezone name '{}'", std::string {start, ptr}); |
| } |
| |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr == end, |
| "invalid date string '{}', extra characters after timezone", |
| std::string {ptr, end}); |
| } |
| return true; |
| } |
| |
| /** |
| <datetime> ::= <whitespace>* <date> (<delimiter> <time> <whitespace>* <timezone>?)? <whitespace>* |
| |
| <date> ::= <year> <separator> <month> <separator> <day> |
| <time> ::= <hour> <separator> <minute> <separator> <second> [<fraction>] |
| |
| <year> ::= <digit>{4} | <digit>{2} |
| <month> ::= <digit>{1,2} |
| <day> ::= <digit>{1,2} |
| <hour> ::= <digit>{1,2} |
| <minute> ::= <digit>{1,2} |
| <second> ::= <digit>{1,2} |
| |
| <separator> ::= ^(<digit> | <alpha>) |
| <delimiter> ::= " " | "T" |
| |
| <fraction> ::= "." <digit>* |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <offset> ::= ( "+" | "-" ) <hour-offset> [ ":"? <minute-offset> ] |
| | <tz-name> |
| |
| <tz-name> ::= <short-tz> | <long-tz> |
| |
| <short-tz> ::= "CST" | "UTC" | "GMT" | "ZULU" | "Z" ; 忽略大小写 |
| <long-tz> ::= <area> "/" <location> ; e.g. America/New_York |
| |
| <hour-offset> ::= <digit>{1,2} ; 0–14 |
| <minute-offset> ::= <digit>{2} ; 00/30/45 |
| |
| <area> ::= <alpha>+ |
| <location> ::= (<alpha> | "_")+ |
| |
| –––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––––– |
| |
| <whitespace> ::= " " | "\t" | "\n" | "\r" | "\v" | "\f" |
| |
| <digit> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" |
| |
| <alpha> ::= "A" | … | "Z" | "a" | … | "z" |
| */ |
| inline bool CastToDateV2::from_string_non_strict_mode_impl(const StringRef& str, |
| DateV2Value<DateV2ValueType>& res, |
| const cctz::time_zone* local_time_zone, |
| CastParameters& params) { |
| constexpr bool IsStrict = false; |
| const char* ptr = str.data; |
| const char* end = ptr + str.size; |
| AsanPoisonGuard defer(end, 1); |
| |
| // skip leading whitespace |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr != end, "empty date string"); |
| |
| // date part |
| uint32_t year, month, day; |
| |
| // read year |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, year))); |
| if (is_digit_range(ptr, ptr + 1)) { |
| // continue by digit, it must be a 4-digit year |
| uint32_t year2; |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, year2))); |
| year = year * 100 + year2; |
| } else { |
| // otherwise, it must be a 2-digit year |
| if (year < 100) { |
| // Convert 2-digit year based on 1970 boundary |
| year += (year >= 70) ? 1900 : 2000; |
| } |
| } |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // read month |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, month))); |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // read day |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, day))); |
| |
| if (!try_convert_set_zero_date(res, year, month, day)) { |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::YEAR>(year), "invalid year {}", year); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::MONTH>(month), "invalid month {}", |
| month); |
| SET_PARAMS_RET_FALSE_IFN(res.set_time_unit<TimeUnit::DAY>(day), "invalid day {}", day); |
| } |
| |
| if (is_space_range(ptr, end)) { |
| // no time part, just return. |
| return true; |
| } |
| |
| PROPAGATE_FALSE(consume_one_delimiter(ptr, end)); |
| |
| // time part |
| uint32_t hour, minute, second; |
| |
| // hour |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, hour))); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::HOUR>(hour), "invalid hour {}", hour); |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // minute |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, minute))); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::MINUTE>(minute), "invalid minute {}", |
| minute); |
| |
| // check for separator |
| PROPAGATE_FALSE(skip_one_non_alnum(ptr, end)); |
| |
| // second |
| PROPAGATE_FALSE((consume_digit<UInt32, 1, 2>(ptr, end, second))); |
| SET_PARAMS_RET_FALSE_IFN(res.test_time_unit<TimeUnit::SECOND>(second), "invalid second {}", |
| second); |
| |
| // fractional part |
| if (ptr < end && *ptr == '.') { |
| ++ptr; |
| static_cast<void>(skip_any_digit(ptr, end)); |
| } |
| |
| // skip any whitespace after time |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| |
| // timezone part (if any) |
| if (ptr != end) { |
| cctz::time_zone parsed_tz {}; |
| if (*ptr == '+' || *ptr == '-') { |
| // offset |
| const char sign = *ptr; |
| ++ptr; |
| uint32_t hour_offset, minute_offset = 0; |
| |
| uint32_t length = count_digits(ptr, end); |
| // hour |
| if (length == 1 || length == 3) { |
| PROPAGATE_FALSE((consume_digit<UInt32, 1>(ptr, end, hour_offset))); |
| } else { |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, hour_offset))); |
| } |
| SET_PARAMS_RET_FALSE_IFN(hour_offset <= 14, "invalid hour offset {}", hour_offset); |
| if (ptr < end) { |
| if (*ptr == ':') { |
| ++ptr; |
| } |
| // minute |
| PROPAGATE_FALSE((consume_digit<UInt32, 2>(ptr, end, minute_offset))); |
| SET_PARAMS_RET_FALSE_IFN( |
| (minute_offset == 0 || minute_offset == 30 || minute_offset == 45), |
| "invalid minute offset {}", minute_offset); |
| } |
| SET_PARAMS_RET_FALSE_IFN(hour_offset != 14 || minute_offset == 0, |
| "invalid timezone offset '{}'", |
| combine_tz_offset(sign, hour_offset, minute_offset)); |
| |
| SET_PARAMS_RET_FALSE_IFN( |
| TimezoneUtils::find_cctz_time_zone( |
| combine_tz_offset(sign, hour_offset, minute_offset), parsed_tz), |
| "invalid timezone offset '{}'", |
| combine_tz_offset(sign, hour_offset, minute_offset)); |
| } else { |
| // timezone name |
| const auto* start = ptr; |
| // short tzname, or something legal for tzdata. depends on our TimezoneUtils. |
| PROPAGATE_FALSE(skip_tz_name_part(ptr, end)); |
| |
| SET_PARAMS_RET_FALSE_IFN( |
| TimezoneUtils::find_cctz_time_zone(std::string {start, ptr}, parsed_tz), |
| "invalid timezone name '{}'", std::string {start, ptr}); |
| } |
| } |
| |
| // skip trailing whitespace |
| static_cast<void>(skip_any_whitespace(ptr, end)); |
| SET_PARAMS_RET_FALSE_IFN(ptr == end, "invalid date string '{}', extra characters after parsing", |
| std::string {ptr, end}); |
| |
| return true; |
| } |
| |
| // NOLINTEND(readability-function-cognitive-complexity) |
| // NOLINTEND(readability-function-size) |
| #include "common/compile_check_end.h" |
| } // namespace doris |