| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #pragma once |
| |
| #include <glog/logging.h> |
| #include <re2/re2.h> |
| |
| #include <algorithm> |
| #include <cstddef> |
| #include <cstdint> |
| #include <cstdlib> |
| #include <cstring> |
| #include <iostream> |
| #include <iterator> |
| #include <string> |
| #include <string_view> |
| #include <tuple> |
| #include <type_traits> |
| #include <utility> |
| |
| #include "runtime/define_primitive_type.h" |
| #include "util/hash_util.hpp" |
| #include "util/time_lut.h" |
| #include "util/timezone_utils.h" |
| |
| namespace cctz { |
| class time_zone; |
| } // namespace cctz |
| |
| namespace doris::vectorized { |
| class DataTypeDateTime; |
| class DataTypeDateV2; |
| class DataTypeDateTimeV2; |
| } // namespace doris::vectorized |
| |
| namespace doris { |
| |
| enum TimeUnit { |
| MICROSECOND, |
| MILLISECOND, |
| SECOND, |
| MINUTE, |
| HOUR, |
| DAY, |
| WEEK, |
| MONTH, |
| QUARTER, |
| YEAR, |
| SECOND_MICROSECOND, |
| MINUTE_MICROSECOND, |
| MINUTE_SECOND, |
| HOUR_MICROSECOND, |
| HOUR_SECOND, |
| HOUR_MINUTE, |
| DAY_MICROSECOND, |
| DAY_SECOND, |
| DAY_MINUTE, |
| DAY_HOUR, |
| YEAR_MONTH |
| }; |
| |
| struct TimeInterval { |
| int64_t year; |
| int64_t month; |
| int64_t day; |
| int64_t hour; |
| int64_t minute; |
| int64_t second; |
| int64_t millisecond; |
| int64_t microsecond; |
| bool is_neg; |
| |
| TimeInterval() |
| : year(0), |
| month(0), |
| day(0), |
| hour(0), |
| minute(0), |
| second(0), |
| millisecond(0), |
| microsecond(0), |
| is_neg(false) {} |
| |
| TimeInterval(TimeUnit unit, int64_t count, bool is_neg_param) |
| : year(0), |
| month(0), |
| day(0), |
| hour(0), |
| minute(0), |
| second(0), |
| millisecond(0), |
| microsecond(0), |
| is_neg(is_neg_param) { |
| switch (unit) { |
| case YEAR: |
| year = count; |
| break; |
| case MONTH: |
| month = count; |
| break; |
| case WEEK: |
| day = 7 * count; |
| break; |
| case DAY: |
| day = count; |
| break; |
| case HOUR: |
| hour = count; |
| break; |
| case MINUTE: |
| minute = count; |
| break; |
| case SECOND: |
| second = count; |
| break; |
| case SECOND_MICROSECOND: |
| microsecond = count; |
| break; |
| case MILLISECOND: |
| millisecond = count; |
| break; |
| case MICROSECOND: |
| microsecond = count; |
| break; |
| default: |
| break; |
| } |
| } |
| }; |
| |
| enum TimeType { TIME_TIME = 1, TIME_DATE = 2, TIME_DATETIME = 3 }; |
| |
| constexpr int SAFE_FORMAT_STRING_MARGIN = 12; |
| |
| // Used to compute week |
| const int WEEK_MONDAY_FIRST = 1; |
| const int WEEK_YEAR = 2; |
| const int WEEK_FIRST_WEEKDAY = 4; |
| |
| // 9999-99-99 99:99:99; 19 + 1('\0') |
| const int MAX_DTVALUE_STR_LEN = 20; |
| |
| const int DATE_MAX_DAYNR = 3652424; |
| // two-digit years < this are 20..; >= this are 19.. |
| const int YY_PART_YEAR = 70; |
| |
| // Limits of time value |
| const int TIME_MAX_HOUR = 256; |
| const int TIME_MAX_MINUTE = 59; |
| const int TIME_MAX_SECOND = 59; |
| const int TIME_MAX_VALUE = 10000 * TIME_MAX_HOUR + 100 * TIME_MAX_MINUTE + TIME_MAX_SECOND; |
| const int TIME_MAX_VALUE_SECONDS = 3600 * TIME_MAX_HOUR + 60 * TIME_MAX_MINUTE + TIME_MAX_SECOND; |
| |
| constexpr int HOUR_PER_DAY = 24; |
| constexpr int64_t SECOND_PER_HOUR = 3600; |
| constexpr int64_t SECOND_PER_MINUTE = 60; |
| constexpr int64_t MS_PER_SECOND = 1000 * 1000; |
| |
| inline constexpr int S_DAYS_IN_MONTH[13] = {0, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; |
| |
| constexpr size_t const_length(const char* str) { |
| return (str == nullptr || *str == 0) ? 0 : const_length(str + 1) + 1; |
| } |
| |
| constexpr size_t max_char_length(const char* const* name, size_t end) { |
| size_t res = 0; |
| for (int i = 0; i < end; ++i) { |
| res = std::max(const_length(name[i]), res); |
| } |
| return res; |
| } |
| |
| static constexpr const char* s_month_name[] = { |
| "", "January", "February", "March", "April", "May", "June", |
| "July", "August", "September", "October", "November", "December", nullptr}; |
| |
| static constexpr const char* s_day_name[] = {"Monday", "Tuesday", "Wednesday", "Thursday", |
| "Friday", "Saturday", "Sunday", nullptr}; |
| |
| static constexpr size_t MAX_DAY_NAME_LEN = max_char_length(s_day_name, std::size(s_day_name)); |
| static constexpr size_t MAX_MONTH_NAME_LEN = max_char_length(s_month_name, std::size(s_month_name)); |
| |
| static constexpr uint8_t TIME_PART_LENGTH = 37; |
| |
| static constexpr uint32_t MAX_DATE_V2 = 31 | (12 << 5) | (9999 << 9); |
| static constexpr uint32_t MIN_DATE_V2 = 1 | (1 << 5); |
| |
| static constexpr uint64_t MAX_DATETIME_V2 = ((uint64_t)MAX_DATE_V2 << TIME_PART_LENGTH) | |
| ((uint64_t)23 << 32) | ((uint64_t)59 << 26) | |
| ((uint64_t)59 << 20) | 999999; |
| static constexpr uint64_t MIN_DATETIME_V2 = (uint64_t)MIN_DATE_V2 << TIME_PART_LENGTH; |
| |
| static constexpr uint32_t MAX_YEAR = 9999; |
| static constexpr uint32_t MAX_MONTH = 12; |
| static constexpr uint32_t MAX_HOUR = 23; |
| static constexpr uint32_t MAX_MINUTE = 59; |
| static constexpr uint32_t MAX_SECOND = 59; |
| static constexpr uint32_t MAX_MICROSECOND = 999999; |
| |
| static constexpr uint32_t DATEV2_YEAR_WIDTH = 23; |
| static constexpr uint32_t DATETIMEV2_YEAR_WIDTH = 18; |
| static constexpr uint32_t DATETIMEV2_MONTH_WIDTH = 4; |
| |
| static RE2 time_zone_offset_format_reg(R"(^[+-]{1}\d{2}\:\d{2}$)"); |
| |
| uint8_t mysql_week_mode(uint32_t mode); |
| |
| inline uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day); |
| |
| struct DateV2ValueType { |
| uint32_t day_ : 5; |
| uint32_t month_ : 4; |
| uint32_t year_ : 23; |
| |
| DateV2ValueType(uint16_t year, uint8_t month, uint8_t day, uint8_t hour, uint8_t minute, |
| uint8_t second, uint32_t microsecond) |
| : day_(day), month_(month), year_(year) {} |
| }; |
| |
| struct DateTimeV2ValueType { |
| uint64_t microsecond_ : 20; |
| uint64_t second_ : 6; |
| uint64_t minute_ : 6; |
| uint64_t hour_ : 5; |
| uint64_t day_ : 5; |
| uint64_t month_ : 4; |
| uint64_t year_ : 18; |
| |
| DateTimeV2ValueType(uint16_t year, uint8_t month, uint8_t day, uint8_t hour, uint8_t minute, |
| uint8_t second, uint32_t microsecond) |
| : microsecond_(microsecond), |
| second_(second), |
| minute_(minute), |
| hour_(hour), |
| day_(day), |
| month_(month), |
| year_(year) {} |
| }; |
| |
| template <typename T> |
| class DateV2Value; |
| |
| class VecDateTimeValue { // Now this type is a temp solution with little changes, maybe large refactoring follow-up. |
| public: |
| // Constructor |
| VecDateTimeValue() |
| : _neg(0), |
| _type(TIME_DATETIME), |
| _second(0), |
| _minute(0), |
| _hour(0), |
| _day(0), // _microsecond(0): remove it to reduce memory, and Reorder the variables |
| _month(0), // so this is a difference between Vectorization mode and Rowbatch mode with DateTimeValue; |
| _year(0) {} // before int128 16 bytes ---> after int64 8 bytes |
| |
| const static VecDateTimeValue FIRST_DAY; |
| |
| // The data format of DATE/DATETIME is different in storage layer and execute layer. |
| // So we should use different creator to get data from value. |
| // We should use create_from_olap_xxx only at binary data scanned from storage engine and convert to typed data. |
| // At other case, we just use binary_cast<Int64, VecDateTimeValue>. |
| |
| // olap storage layer date data format: |
| // 64 bits binary data [year(remaining bits), month(4 bits), day(5 bits)] |
| // execute layer date/datetime and olap storage layer datetime data format: |
| // 8 bytes integer data [year(remaining digits), month(2 digits), day(2 digits), hour(2 digits), minute(2 digits) ,second(2 digits)] |
| |
| static VecDateTimeValue create_from_olap_date(uint64_t value) { |
| VecDateTimeValue date; |
| date.from_olap_date(value); |
| return date; |
| } |
| |
| static VecDateTimeValue create_from_olap_datetime(uint64_t value) { |
| VecDateTimeValue datetime; |
| datetime.from_olap_datetime(value); |
| return datetime; |
| } |
| |
| template <typename T> |
| void create_from_date_v2(DateV2Value<T>& value, TimeType type); |
| |
| template <typename T> |
| void create_from_date_v2(DateV2Value<T>&& value, TimeType type); |
| |
| // Converted from Olap Date or Datetime |
| bool from_olap_datetime(uint64_t datetime) { |
| _neg = 0; |
| _type = TIME_DATETIME; |
| uint64_t date = datetime / 1000000; |
| uint64_t time = datetime % 1000000; |
| |
| auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; |
| year = date / 10000; |
| date %= 10000; |
| month = date / 100; |
| day = date % 100; |
| hour = time / 10000; |
| time %= 10000; |
| minute = time / 100; |
| second = time % 100; |
| |
| return check_range_and_set_time(year, month, day, hour, minute, second, _type); |
| } |
| |
| uint64_t to_olap_datetime() const { |
| uint64_t date_val = _year * 10000 + _month * 100 + _day; |
| uint64_t time_val = _hour * 10000 + _minute * 100 + _second; |
| return date_val * 1000000 + time_val; |
| } |
| |
| bool from_olap_date(uint64_t date) { |
| _neg = 0; |
| _type = TIME_DATE; |
| |
| auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; |
| |
| day = date & 0x1f; |
| date >>= 5; |
| month = date & 0x0f; |
| date >>= 4; |
| year = date; |
| |
| return check_range_and_set_time(year, month, day, hour, minute, second, _type); |
| } |
| |
| //note(wb) not check in this method |
| void inline set_olap_date(uint64_t olap_date_val) { |
| _neg = 0; |
| _type = TIME_DATE; |
| |
| _day = olap_date_val & 0x1f; |
| _month = (olap_date_val >> 5) & 0x0f; |
| _year = olap_date_val >> 9; |
| _hour = 0; |
| _minute = 0; |
| _second = 0; |
| } |
| |
| uint64_t to_olap_date() const { |
| uint64_t val; |
| val = _year; |
| val <<= 4; |
| val |= _month; |
| val <<= 5; |
| val |= _day; |
| return val; |
| } |
| |
| bool from_date_format_str(const char* format, int format_len, const char* value, |
| int64_t value_len) { |
| memset(this, 0, sizeof(*this)); |
| return from_date_format_str(format, format_len, value, value_len, nullptr); |
| } |
| |
| operator int64_t() const { return to_int64(); } |
| |
| // Given days since 0000-01-01, construct the datetime value. |
| bool from_date_daynr(uint64_t); |
| |
| // Construct Date/Datetime type value from string. |
| // At least the following formats are recognised (based on number of digits) |
| // 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS' |
| // 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS' |
| // 'YYYYMMDDTHHMMSS' |
| bool from_date_str(const char* str, size_t len); |
| bool from_date_str(const char* str, size_t len, const cctz::time_zone& local_time_zone); |
| |
| // Construct Date/Datetime type value from int64_t value. |
| // Return true if convert success. Otherwise return false. |
| bool from_date_int64(int64_t value); |
| |
| bool from_date(int64_t value) { return from_date_int64(value); } |
| |
| // Construct time type value from int64_t value. |
| // Return true if convert success. Otherwise return false. |
| bool from_time_int64(int64_t value); |
| |
| // Convert this value to string |
| // this will check type to decide which format to convert |
| // TIME: format 'hh:mm:ss.xxxxxx' |
| // DATE: format 'YYYY-MM-DD' |
| // DATETIME: format 'YYYY-MM-DD hh:mm:ss.xxxxxx' |
| int32_t to_buffer(char* buffer) const; |
| |
| char* to_string(char* to) const; |
| |
| // Convert this datetime value to string by the format string. |
| // for performance of checking, may return false when just APPROACH BUT NOT REACH max_valid_length. |
| // so need a little big buffer and its length as max_valid_length to make sure store valid data. |
| // to make sure of this. make the buffer size = <data_need_length> + SAFE_FORMAT_STRING_MARGIN. and pass this size as max_valid_length |
| bool to_format_string_conservative(const char* format, size_t len, char* to, |
| size_t max_valid_length) const; |
| |
| // compute the length of data format pattern |
| static int compute_format_len(const char* format, size_t len); |
| |
| // Return true if range or date is invalid |
| static bool check_range(uint32_t year, uint32_t month, uint32_t day, uint32_t hour, |
| uint32_t minute, uint32_t second, uint16_t type); |
| |
| static bool check_date(uint32_t year, uint32_t month, uint32_t day); |
| |
| // Convert this value to uint64_t |
| // Will check its type |
| int64_t to_int64() const; |
| |
| [[nodiscard]] bool check_range_and_set_time(uint32_t year, uint32_t month, uint32_t day, |
| uint32_t hour, uint32_t minute, uint32_t second, |
| uint16_t type) { |
| if (check_range(year, month, day, hour, minute, second, type)) { |
| return false; |
| } |
| unchecked_set_time(year, month, day, hour, minute, second); |
| return true; |
| } |
| |
| void unchecked_set_time(uint32_t year, uint32_t month, uint32_t day, uint32_t hour, |
| uint32_t minute, uint32_t second); |
| |
| uint32_t daynr() const { return calc_daynr(_year, _month, _day); } |
| |
| uint16_t year() const { return _year; } |
| uint8_t month() const { return _month; } |
| int quarter() const { return (_month - 1) / 3 + 1; } |
| int week() const { return week(mysql_week_mode(0)); } //00-53 |
| uint8_t day() const { return _day; } |
| uint8_t hour() const { return _hour; } |
| uint8_t minute() const { return _minute; } |
| uint16_t second() const { return _second; } |
| uint16_t neg() const { return _neg; } |
| |
| int64_t time_part_to_seconds() const { |
| return _hour * SECOND_PER_HOUR + _minute * SECOND_PER_MINUTE + _second; |
| } |
| |
| void reset_time_part() { |
| _hour = 0; |
| _minute = 0; |
| _second = 0; |
| } |
| |
| bool check_loss_accuracy_cast_to_date() { |
| auto loss_accuracy = _hour != 0 || _minute != 0 || _second != 0; |
| cast_to_date(); |
| return loss_accuracy; |
| } |
| |
| void cast_to_date() { |
| _hour = 0; |
| _minute = 0; |
| _second = 0; |
| _type = TIME_DATE; |
| } |
| |
| void cast_to_time() { |
| _year = 0; |
| _month = 0; |
| _day = 0; |
| _type = TIME_TIME; |
| } |
| |
| void to_datetime() { _type = TIME_DATETIME; } |
| |
| // Weekday, from 0(Mon) to 6(Sun) |
| uint8_t weekday() const { return calc_weekday(daynr(), false); } |
| auto day_of_week() const { return (weekday() + 1) % 7 + 1; } |
| |
| // The bits in week_format has the following meaning: |
| // WEEK_MONDAY_FIRST (0) |
| // If not set: |
| // Sunday is first day of week |
| // If set: |
| // Monday is first day of week |
| // |
| // WEEK_YEAR (1) |
| // If not set: |
| // Week is in range 0-53 |
| // Week 0 is returned for the last week of the previous year (for |
| // a date at start of january) In this case one can get 53 for the |
| // first week of next year. This flag ensures that the week is |
| // relevant for the given year. Note that this flag is only |
| // relevant if WEEK_JANUARY is not set. |
| // If set: |
| // Week is in range 1-53. |
| // In this case one may get week 53 for a date in January (when |
| // the week is that last week of previous year) and week 1 for a |
| // date in December. |
| // |
| // WEEK_FIRST_WEEKDAY (2) |
| // If not set |
| // Weeks are numbered according to ISO 8601:1988 |
| // If set |
| // The week that contains the first 'first-day-of-week' is week 1. |
| // |
| // ISO 8601:1988 means that |
| // if the week containing January 1 has |
| // four or more days in the new year, then it is week 1; |
| // Otherwise it is the last week of the previous year, and the |
| // next week is week 1. |
| uint8_t week(uint8_t) const; |
| |
| uint32_t year_week(uint8_t mode) const; |
| |
| // Add interval |
| template <TimeUnit unit, bool need_check = true> |
| bool date_add_interval(const TimeInterval& interval); |
| |
| // set interval |
| template <TimeUnit unit> |
| bool date_set_interval(const TimeInterval& interval); |
| |
| template <TimeUnit unit> |
| bool datetime_trunc(); //datetime trunc, like trunc minute = 0 |
| |
| //unix_timestamp is called with a timezone argument, |
| //it returns seconds of the value of date literal since '1970-01-01 00:00:00' UTC |
| bool unix_timestamp(int64_t* timestamp, const std::string& timezone) const; |
| bool unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const; |
| |
| //construct datetime_value from timestamp and timezone |
| //timestamp is an internal timestamp value representing seconds since '1970-01-01 00:00:00' UTC. negative avaliable. |
| //we don't do any check in it because it's hot path. any usage want ensure the time legality should check itself. |
| bool from_unixtime(int64_t, const std::string& timezone); |
| void from_unixtime(int64_t, const cctz::time_zone& ctz); |
| |
| bool operator==(const VecDateTimeValue& other) const { |
| // NOTE: This is not same with MySQL. |
| // MySQL convert both to int with left value type and then compare |
| // We think all fields equals. |
| int64_t v1 = to_int64_datetime_packed(); |
| int64_t v2 = other.to_int64_datetime_packed(); |
| return v1 == v2; |
| } |
| |
| bool operator!=(const VecDateTimeValue& other) const { return !(*this == other); } |
| |
| // Now, we don't support TIME_TIME type, |
| bool operator<=(const VecDateTimeValue& other) const { return !(*this > other); } |
| |
| bool operator>=(const VecDateTimeValue& other) const { return !(*this < other); } |
| |
| bool operator<(const VecDateTimeValue& other) const { |
| int64_t v1 = to_int64_datetime_packed(); |
| int64_t v2 = other.to_int64_datetime_packed(); |
| return v1 < v2; |
| } |
| |
| bool operator>(const VecDateTimeValue& other) const { |
| int64_t v1 = to_int64_datetime_packed(); |
| int64_t v2 = other.to_int64_datetime_packed(); |
| return v1 > v2; |
| } |
| |
| template <typename T> |
| bool operator==(const DateV2Value<T>& other) const; |
| |
| template <typename T> |
| bool operator!=(const DateV2Value<T>& other) const { |
| return !(*this == other); |
| } |
| |
| template <typename T> |
| bool operator<=(const DateV2Value<T>& other) const; |
| |
| template <typename T> |
| bool operator>=(const DateV2Value<T>& other) const; |
| |
| template <typename T> |
| bool operator<(const DateV2Value<T>& other) const; |
| |
| template <typename T> |
| bool operator>(const DateV2Value<T>& other) const; |
| |
| const char* month_name() const; |
| |
| const char* day_name() const; |
| |
| VecDateTimeValue& operator+=(int64_t count) { |
| bool is_neg = false; |
| if (count < 0) { |
| is_neg = true; |
| count = -count; |
| } |
| switch (_type) { |
| case TIME_DATE: { |
| TimeInterval interval(DAY, count, is_neg); |
| date_add_interval<DAY>(interval); |
| break; |
| } |
| case TIME_DATETIME: { |
| TimeInterval interval(SECOND, count, is_neg); |
| date_add_interval<SECOND>(interval); |
| break; |
| } |
| case TIME_TIME: { |
| TimeInterval interval(SECOND, count, is_neg); |
| date_add_interval<SECOND>(interval); |
| break; |
| } |
| } |
| return *this; |
| } |
| |
| VecDateTimeValue& operator-=(int64_t count) { return *this += -count; } |
| |
| VecDateTimeValue& operator++() { return *this += 1; } |
| |
| VecDateTimeValue& operator--() { return *this += -1; } |
| |
| uint32_t to_date_v2() const { |
| CHECK(_type == TIME_DATE); |
| return (year() << 9 | month() << 5 | day()); |
| } |
| |
| uint64_t to_datetime_v2() const { |
| CHECK(_type == TIME_DATETIME); |
| return (uint64_t)(((uint64_t)year() << 46) | ((uint64_t)month() << 42) | |
| ((uint64_t)day() << 37) | ((uint64_t)hour() << 32) | |
| ((uint64_t)minute() << 26) | ((uint64_t)second() << 20)); |
| } |
| |
| uint32_t hash(int seed) const { return HashUtil::hash(this, sizeof(*this), seed); } |
| |
| int day_of_year() const { return daynr() - calc_daynr(_year, 1, 1) + 1; } |
| |
| // TODO(zhaochun): local time ??? |
| static VecDateTimeValue local_time(); |
| |
| std::string debug_string() const { |
| char buf[64]; |
| char* end = to_string(buf); |
| return {buf, static_cast<size_t>(end - buf)}; |
| } |
| |
| static VecDateTimeValue datetime_min_value() { |
| static VecDateTimeValue _s_min_datetime_value(0, TIME_DATETIME, 0, 0, 0, 0, 1, 1); |
| return _s_min_datetime_value; |
| } |
| |
| static VecDateTimeValue datetime_max_value() { |
| static VecDateTimeValue _s_max_datetime_value(0, TIME_DATETIME, 23, 59, 59, 9999, 12, 31); |
| return _s_max_datetime_value; |
| } |
| |
| template <typename T> |
| int64_t time_part_diff(const T& rhs) const { |
| return time_part_to_seconds() - rhs.time_part_to_seconds(); |
| } |
| |
| template <typename T> |
| int64_t datetime_diff_in_seconds(const T& rhs) const { |
| return (daynr() - rhs.daynr()) * SECOND_PER_HOUR * HOUR_PER_DAY + time_part_diff(rhs); |
| } |
| |
| void set_type(int type); |
| |
| int type() const { return _type; } |
| |
| bool is_valid_date() const { |
| return !check_range(_year, _month, _day, _hour, _minute, _second, _type) && _month > 0 && |
| _day > 0; |
| } |
| |
| int64_t to_datetime_int64() const; |
| |
| // To compatible with MySQL |
| int64_t to_int64_datetime_packed() const { |
| int64_t ymd = ((_year * 13 + _month) << 5) | _day; |
| int64_t hms = (_hour << 12) | (_minute << 6) | _second; |
| int64_t tmp = make_packed_time(((ymd << 17) | hms), 0); |
| return _neg ? -tmp : tmp; |
| } |
| |
| void from_packed_time(int64_t packed_time) { |
| int64_t ymdhms = packed_time >> 24; |
| int64_t ymd = ymdhms >> 17; |
| int64_t hms = ymdhms % (1 << 17); |
| |
| _day = ymd % (1 << 5); |
| int64_t ym = ymd >> 5; |
| _month = ym % 13; |
| _year = ym / 13; |
| _year %= 10000; |
| _second = hms % (1 << 6); |
| _minute = (hms >> 6) % (1 << 6); |
| _hour = (hms >> 12); |
| _neg = 0; |
| _type = TIME_DATETIME; |
| } |
| |
| bool get_date_from_daynr(uint64_t); |
| |
| // reset 0 |
| void reset_zero_by_type(int type) { set_zero(type); } |
| |
| private: |
| // Used to make sure sizeof VecDateTimeValue |
| friend class UnusedClass; |
| |
| int64_t make_packed_time(int64_t time, int64_t second_part) const { |
| return (time << 24) + second_part; |
| } |
| |
| int64_t to_int64_date_packed() const { |
| int64_t ymd = ((_year * 13 + _month) << 5) | _day; |
| int64_t tmp = make_packed_time(ymd << 17, 0); |
| return _neg ? -tmp : tmp; |
| } |
| |
| // Used to construct from int value |
| int64_t standardize_timevalue(int64_t value); |
| |
| // Used to convert to a string. |
| char* append_date_buffer(char* to) const; |
| char* append_time_buffer(char* to) const; |
| char* to_datetime_buffer(char* to) const; |
| char* to_date_buffer(char* to) const; |
| char* to_time_buffer(char* to) const; |
| |
| bool from_date_str_base(const char* date_str, int len, const cctz::time_zone* local_time_zone); |
| |
| int64_t to_date_int64() const; |
| int64_t to_time_int64() const; |
| |
| static uint8_t calc_week(const VecDateTimeValue& value, uint8_t mode, uint32_t* year, |
| bool disable_lut = false); |
| |
| // Helper to set max, min, zero |
| void set_zero(int type); |
| void set_max_time(bool neg); |
| |
| bool from_date_format_str(const char* format, int format_len, const char* value, |
| int64_t value_len, const char** sub_val_end); |
| |
| // 1 bits for neg. 3 bits for type. 12bit for second |
| uint16_t _neg : 1; // Used for time value. |
| uint16_t _type : 3; // Which type of this value. |
| uint16_t _second : 12; |
| uint8_t _minute; |
| uint8_t _hour; |
| uint8_t _day; |
| uint8_t _month; |
| uint16_t _year; |
| |
| VecDateTimeValue(uint8_t neg, uint8_t type, uint8_t hour, uint8_t minute, uint8_t second, |
| uint16_t year, uint8_t month, uint8_t day) |
| : _neg(neg), |
| _type(type), |
| _second(second), |
| _minute(minute), |
| _hour(hour), |
| _day(day), |
| _month(month), |
| _year(year) {} |
| }; |
| |
| inline const VecDateTimeValue VecDateTimeValue::FIRST_DAY(false, TYPE_DATETIME, 0, 0, 0, 1, 1, 1); |
| |
| template <typename T> |
| class DateV2Value { |
| public: |
| static constexpr bool is_datetime = std::is_same_v<T, DateTimeV2ValueType>; |
| using underlying_value = std::conditional_t<is_datetime, uint64_t, uint32_t>; |
| |
| // Constructor |
| DateV2Value() : date_v2_value_(0, 0, 0, 0, 0, 0, 0) {} |
| |
| DateV2Value(underlying_value int_val) : int_val_(int_val) {} |
| template <typename U> |
| requires std::is_integral_v<U> |
| DateV2Value(U other) = delete; |
| |
| DateV2Value(DateV2Value<T>& other) = default; |
| |
| DateV2Value(const DateV2Value<T>& other) = default; |
| |
| const static DateV2Value<T> FIRST_DAY; |
| |
| static DateV2Value create_from_olap_date(uint64_t value) { |
| DateV2Value<T> date; |
| date.from_olap_date(value); |
| return date; |
| } |
| |
| static DateV2Value create_from_olap_datetime(uint64_t value) { |
| DateV2Value<T> datetime; |
| datetime.from_olap_datetime(value); |
| return datetime; |
| } |
| |
| void set_microsecond(uint64_t microsecond); |
| |
| bool from_olap_date(uint64_t date) { |
| auto [year, month, day] = std::tuple {0, 0, 0}; |
| |
| day = date & 0x1f; |
| date >>= 5; |
| month = date & 0x0f; |
| date >>= 4; |
| year = date; |
| |
| return check_range_and_set_time(year, month, day, 0, 0, 0, 0); |
| } |
| |
| bool from_olap_datetime(uint64_t datetime) { |
| uint64_t date = datetime / 1000000; |
| uint64_t time = datetime % 1000000; |
| |
| auto [year, month, day, hour, minute, second] = std::tuple {0, 0, 0, 0, 0, 0}; |
| year = date / 10000; |
| date %= 10000; |
| month = date / 100; |
| day = date % 100; |
| hour = time / 10000; |
| time %= 10000; |
| minute = time / 100; |
| second = time % 100; |
| |
| return check_range_and_set_time(year, month, day, hour, minute, second, 0); |
| } |
| |
| uint64_t to_olap_date() const { |
| uint64_t val; |
| val = date_v2_value_.year_; |
| val <<= 4; |
| val |= date_v2_value_.month_; |
| val <<= 5; |
| val |= date_v2_value_.day_; |
| return val; |
| } |
| |
| // Convert this datetime value to string by the format string. |
| // for performance of checking, may return false when just APPROACH BUT NOT REACH max_valid_length. |
| // so need a little big buffer and its length as max_valid_length to make sure store valid data. |
| // to make sure of this. make the buffer size = <data_need_length> + SAFE_FORMAT_STRING_MARGIN. and pass this size as max_valid_length |
| bool to_format_string_conservative(const char* format, size_t len, char* to, |
| size_t max_valid_length) const; |
| |
| bool from_date_format_str(const char* format, size_t format_len, const char* value, |
| size_t value_len) { |
| return from_date_format_str(format, format_len, value, value_len, nullptr); |
| } |
| |
| template <typename U> |
| void assign_from(DateV2Value<U> src) { |
| date_v2_value_.year_ = src.year(); |
| date_v2_value_.month_ = src.month(); |
| date_v2_value_.day_ = src.day(); |
| if constexpr (is_datetime && std::is_same_v<U, DateTimeV2ValueType>) { |
| date_v2_value_.hour_ = src.hour(); |
| date_v2_value_.minute_ = src.minute(); |
| date_v2_value_.second_ = src.second(); |
| date_v2_value_.microsecond_ = src.microsecond(); |
| } |
| } |
| |
| // Construct Date/Datetime type value from string. |
| // At least the following formats are recognised (based on number of digits) |
| // 'YYMMDD', 'YYYYMMDD', 'YYMMDDHHMMSS', 'YYYYMMDDHHMMSS' |
| // 'YY-MM-DD', 'YYYY-MM-DD', 'YY-MM-DD HH.MM.SS' |
| // 'YYYYMMDDTHHMMSS' |
| bool from_date_str(const char* str, int len, int scale = -1, bool convert_zero = false); |
| bool from_date_str(const char* str, int len, const cctz::time_zone& local_time_zone, |
| int scale = -1, bool convert_zero = false); |
| |
| // Convert this value to string |
| // this will check type to decide which format to convert |
| // TIME: format 'hh:mm:ss.xxxxxx' |
| // DATE: format 'YYYY-MM-DD' |
| // DATETIME: format 'YYYY-MM-DD hh:mm:ss.xxxxxx' |
| int32_t to_buffer(char* buffer, int scale = -1) const; |
| |
| char* to_string(char* to, int scale = -1) const; |
| |
| // Return true if range or date is invalid |
| static bool is_invalid(uint32_t year, uint32_t month, uint32_t day, uint8_t hour, |
| uint8_t minute, uint8_t second, uint32_t microsecond, |
| bool only_time_part = false) { |
| if constexpr (is_datetime) { |
| if (hour >= 24 || minute >= 60 || second >= 60 || microsecond > 999999) { |
| return true; |
| } |
| if (only_time_part) { |
| return false; |
| } |
| } |
| return year > MAX_YEAR || !day || !month || month > 12 || |
| (day > 28 && ((month != 2 && day > S_DAYS_IN_MONTH[month]) || |
| (month == 2 && day > 28 + doris::is_leap(year)))); |
| } |
| |
| [[nodiscard]] bool check_range_and_set_time(uint16_t year, uint8_t month, uint8_t day, |
| uint8_t hour, uint8_t minute, uint8_t second, |
| uint32_t microsecond, bool only_time_part = false) { |
| if (is_invalid(year, month, day, hour, minute, second, microsecond, only_time_part)) { |
| return false; |
| } |
| if (only_time_part) { |
| // not change date part |
| unchecked_set_time(hour, minute, second, microsecond); |
| } else { |
| unchecked_set_time(year, month, day, hour, minute, second, microsecond); |
| } |
| return true; |
| } |
| |
| void unchecked_set_time(uint16_t year, uint8_t month, uint8_t day, uint8_t hour, uint8_t minute, |
| uint16_t second, uint32_t microsecond = 0); |
| |
| void unchecked_set_time(uint8_t hour, uint8_t minute, uint16_t second, uint32_t microsecond); |
| |
| // we frequently use this to do arithmetic operation, so use signed int64_t to avoid overflow. |
| int64_t daynr() const { |
| return calc_daynr(date_v2_value_.year_, date_v2_value_.month_, date_v2_value_.day_); |
| } |
| |
| uint8_t hour() const { |
| if constexpr (is_datetime) { |
| return date_v2_value_.hour_; |
| } else { |
| return 0; |
| } |
| } |
| |
| uint8_t minute() const { |
| if constexpr (is_datetime) { |
| return date_v2_value_.minute_; |
| } else { |
| return 0; |
| } |
| } |
| |
| uint8_t second() const { |
| if constexpr (is_datetime) { |
| return date_v2_value_.second_; |
| } else { |
| return 0; |
| } |
| } |
| |
| uint32_t microsecond() const { |
| if constexpr (is_datetime) { |
| return date_v2_value_.microsecond_; |
| } else { |
| return 0; |
| } |
| } |
| |
| int64_t time_part_to_seconds() const { |
| return hour() * SECOND_PER_HOUR + minute() * SECOND_PER_MINUTE + second(); |
| } |
| |
| void reset_time_part() { |
| if constexpr (is_datetime) { |
| date_v2_value_.hour_ = 0; |
| date_v2_value_.minute_ = 0; |
| date_v2_value_.second_ = 0; |
| date_v2_value_.microsecond_ = 0; |
| } |
| } |
| |
| int64_t time_part_to_microsecond() const { |
| return time_part_to_seconds() * 1000 * 1000 + microsecond(); |
| } |
| |
| uint16_t year() const { return date_v2_value_.year_; } |
| uint16_t year_of_week() const; |
| uint8_t month() const { return date_v2_value_.month_; } |
| int quarter() const { return (date_v2_value_.month_ - 1) / 3 + 1; } |
| int week() const { return week(mysql_week_mode(0)); } //00-53 |
| uint8_t day() const { return date_v2_value_.day_; } |
| |
| // Weekday, from 0(Mon) to 6(Sun) |
| uint8_t weekday() const { return calc_weekday(daynr(), false); } |
| auto day_of_week() const { return (weekday() + 1) % 7 + 1; } |
| |
| // The bits in week_format has the following meaning: |
| // WEEK_MONDAY_FIRST (0) |
| // If not set: |
| // Sunday is first day of week |
| // If set: |
| // Monday is first day of week |
| // |
| // WEEK_YEAR (1) |
| // If not set: |
| // Week is in range 0-53 |
| // Week 0 is returned for the last week of the previous year (for |
| // a date at start of january) In this case one can get 53 for the |
| // first week of next year. This flag ensures that the week is |
| // relevant for the given year. Note that this flag is only |
| // relevant if WEEK_JANUARY is not set. |
| // If set: |
| // Week is in range 1-53. |
| // In this case one may get week 53 for a date in January (when |
| // the week is that last week of previous year) and week 1 for a |
| // date in December. |
| // |
| // WEEK_FIRST_WEEKDAY (2) |
| // If not set |
| // Weeks are numbered according to ISO 8601:1988 |
| // If set |
| // The week that contains the first 'first-day-of-week' is week 1. |
| // |
| // ISO 8601:1988 means that |
| // if the week containing January 1 has |
| // four or more days in the new year, then it is week 1; |
| // Otherwise it is the last week of the previous year, and the |
| // next week is week 1. |
| uint8_t week(uint8_t) const; |
| |
| uint32_t year_week(uint8_t mode) const; |
| |
| // Add interval |
| template <TimeUnit unit, typename TO> |
| bool date_add_interval(const TimeInterval& interval, DateV2Value<TO>& to_value); |
| |
| template <TimeUnit unit, bool need_check = true> |
| bool date_add_interval(const TimeInterval& interval); |
| |
| template <TimeUnit unit> |
| bool date_set_interval(const TimeInterval& interval); |
| |
| template <TimeUnit unit> |
| bool datetime_trunc(); //datetime trunc, like trunc minute = 0 |
| |
| //unix_timestamp is called with a timezone argument, |
| //it returns seconds of the value of date literal since '1970-01-01 00:00:00' UTC |
| bool unix_timestamp(int64_t* timestamp, const std::string& timezone) const; |
| bool unix_timestamp(int64_t* timestamp, const cctz::time_zone& ctz) const; |
| //the first arg is result of fixed point |
| bool unix_timestamp(std::pair<int64_t, int64_t>* timestamp, const std::string& timezone) const; |
| bool unix_timestamp(std::pair<int64_t, int64_t>* timestamp, const cctz::time_zone& ctz) const; |
| |
| //construct datetime_value from timestamp and timezone |
| //timestamp is an internal timestamp value representing seconds since '1970-01-01 00:00:00' UTC. negative avaliable. |
| //we don't do any check in it because it's hot path. any usage want ensure the time legality should check itself. |
| bool from_unixtime(int64_t, const std::string& timezone); |
| void from_unixtime(int64_t, const cctz::time_zone& ctz); |
| bool from_unixtime(std::pair<int64_t, int64_t>, const std::string& timezone); |
| void from_unixtime(std::pair<int64_t, int64_t>, const cctz::time_zone& ctz); |
| bool from_unixtime(int64_t, int32_t, const std::string& timezone, int scale); |
| void from_unixtime(int64_t, int32_t, const cctz::time_zone& ctz, int scale); |
| |
| bool operator==(const DateV2Value<T>& other) const { |
| // NOTE: This is not same with MySQL. |
| // MySQL convert both to int with left value type and then compare |
| // We think all fields equals. |
| return this->to_date_int_val() == other.to_date_int_val(); |
| } |
| |
| bool operator==(const VecDateTimeValue& other) const { |
| int64_t ts1; |
| int64_t ts2; |
| this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); |
| other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); |
| return ts1 == ts2; |
| } |
| |
| bool operator!=(const DateV2Value<T>& other) const { |
| return this->to_date_int_val() != other.to_date_int_val(); |
| } |
| |
| bool operator!=(const VecDateTimeValue& other) const { return !(*this == other); } |
| |
| bool operator<=(const DateV2Value<T>& other) const { return !(*this > other); } |
| |
| bool operator<=(const VecDateTimeValue& other) const { |
| int64_t ts1; |
| int64_t ts2; |
| this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); |
| other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); |
| return ts1 <= ts2; |
| } |
| |
| bool operator>=(const DateV2Value<T>& other) const { return !(*this < other); } |
| |
| bool operator>=(const VecDateTimeValue& other) const { |
| int64_t ts1; |
| int64_t ts2; |
| this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); |
| other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); |
| return ts1 >= ts2; |
| } |
| |
| bool operator<(const DateV2Value<T>& other) const { |
| return this->to_date_int_val() < other.to_date_int_val(); |
| } |
| |
| bool operator<(const VecDateTimeValue& other) const { |
| int64_t ts1; |
| int64_t ts2; |
| this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); |
| other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); |
| return ts1 < ts2; |
| } |
| |
| bool operator>(const DateV2Value<T>& other) const { |
| return this->to_date_int_val() > other.to_date_int_val(); |
| } |
| |
| bool operator>(const VecDateTimeValue& other) const { |
| int64_t ts1; |
| int64_t ts2; |
| this->unix_timestamp(&ts1, TimezoneUtils::default_time_zone); |
| other.unix_timestamp(&ts2, TimezoneUtils::default_time_zone); |
| return ts1 > ts2; |
| } |
| |
| DateV2Value<T>& operator=(const DateV2Value<T>& other) = default; |
| |
| const char* month_name() const; |
| |
| const char* day_name() const; |
| |
| DateV2Value<T>& operator+=(int64_t count) { |
| bool is_neg = false; |
| if (count < 0) { |
| is_neg = true; |
| count = -count; |
| } |
| if constexpr (is_datetime) { |
| TimeInterval interval(SECOND, count, is_neg); |
| date_add_interval<SECOND>(interval); |
| } else { |
| TimeInterval interval(DAY, count, is_neg); |
| date_add_interval<DAY>(interval); |
| } |
| return *this; |
| } |
| |
| DateV2Value<T>& operator-=(int64_t count) { return *this += -count; } |
| |
| DateV2Value<T>& operator++() { return *this += 1; } |
| |
| DateV2Value<T>& operator--() { return *this += -1; } |
| |
| uint32_t hash(int seed) const { return HashUtil::hash(this, sizeof(*this), seed); } |
| |
| int day_of_year() const { return daynr() - calc_daynr(this->year(), 1, 1) + 1; } |
| |
| std::string debug_string() const { |
| char buf[64]; |
| char* end = to_string(buf); |
| return {buf, static_cast<size_t>(end - buf)}; |
| } |
| |
| bool is_valid_date() const { |
| if constexpr (is_datetime) { |
| return !is_invalid(this->year(), this->month(), this->day(), this->hour(), |
| this->minute(), this->second(), this->microsecond()); |
| } else { |
| return !is_invalid(this->year(), this->month(), this->day(), 0, 0, 0, 0); |
| } |
| } |
| |
| //only calculate the diff of dd:mm:ss |
| template <typename RHS> |
| int64_t time_part_diff_without_ms(const RHS& rhs) const { |
| return time_part_to_seconds() - rhs.time_part_to_seconds(); |
| } |
| |
| //only calculate the diff of dd:mm:ss.SSSSSS |
| template <typename RHS> |
| int64_t time_part_diff_in_ms(const RHS& rhs) const { |
| return time_part_to_microsecond() - rhs.time_part_to_microsecond(); |
| } |
| |
| template <typename RHS> |
| int64_t datetime_diff_in_seconds(const RHS& rhs) const { |
| return (daynr() - rhs.daynr()) * SECOND_PER_HOUR * HOUR_PER_DAY + |
| time_part_diff_without_ms(rhs); |
| } |
| |
| template <typename RHS> |
| int32_t date_diff_in_days(const RHS& rhs) const { |
| return daynr() - rhs.daynr(); // arithmetic calculation will auto promote to signed int32 |
| } |
| |
| int32_t date_diff_in_days_round_to_zero_by_time(const auto& rhs) const { |
| int32_t day = this->date_diff_in_days(rhs); |
| int64_t ms_diff = this->time_part_diff_in_ms(rhs); |
| if (day > 0 && ms_diff < 0) { |
| day--; |
| } else if (day < 0 && ms_diff > 0) { |
| day++; |
| } |
| return day; |
| } |
| |
| // used by INT microseconds_diff(DATETIME enddate, DATETIME startdate) |
| // return value is int type, so shouldn't have any limit. |
| // when used by TIME TIMEDIFF(DATETIME expr1, DATETIME expr2), it's return time type, should have limited. |
| template <typename RHS> |
| int64_t datetime_diff_in_microseconds(const RHS& rhs) const { |
| int64_t diff_m = (daynr() - rhs.daynr()) * HOUR_PER_DAY * SECOND_PER_HOUR * MS_PER_SECOND + |
| time_part_diff_in_ms(rhs); |
| return diff_m; |
| } |
| |
| int64_t datetime_diff_in_seconds_round_to_zero_by_ms(const auto& rhs) const { |
| int64_t second = this->datetime_diff_in_seconds(rhs); |
| int32_t ms_diff = this->microsecond() - rhs.microsecond(); |
| if (second > 0 && ms_diff < 0) { |
| second--; |
| } else if (second < 0 && ms_diff > 0) { |
| second++; |
| } |
| return second; |
| } |
| |
| underlying_value to_date_int_val() const { return int_val_; } |
| |
| bool from_date_int64(int64_t value); |
| |
| bool get_date_from_daynr(uint64_t); |
| |
| template <TimeUnit unit> |
| [[nodiscard]] bool set_time_unit(uint32_t val) { |
| // is uint so need check upper bound only |
| if constexpr (unit == TimeUnit::YEAR) { |
| if (val > MAX_YEAR) [[unlikely]] { |
| return false; |
| } |
| date_v2_value_.year_ = val; |
| } else if constexpr (unit == TimeUnit::MONTH) { |
| if (val > MAX_MONTH) [[unlikely]] { |
| return false; |
| } |
| date_v2_value_.month_ = val; |
| } else if constexpr (unit == TimeUnit::DAY) { |
| DCHECK(date_v2_value_.month_ <= MAX_MONTH); |
| DCHECK(date_v2_value_.month_ != 0); |
| if (val > S_DAYS_IN_MONTH[date_v2_value_.month_] && |
| !(is_leap(date_v2_value_.year_) && date_v2_value_.month_ == 2 && val == 29)) { |
| return false; |
| } |
| date_v2_value_.day_ = val; |
| } else if constexpr (unit == TimeUnit::HOUR) { |
| if constexpr (is_datetime) { |
| if (val > MAX_HOUR) [[unlikely]] { |
| return false; |
| } |
| date_v2_value_.hour_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } else if constexpr (unit == TimeUnit::MINUTE) { |
| if constexpr (is_datetime) { |
| if (val > MAX_MINUTE) [[unlikely]] { |
| return false; |
| } |
| date_v2_value_.minute_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } else if constexpr (unit == TimeUnit::SECOND) { |
| if constexpr (is_datetime) { |
| if (val > MAX_SECOND) [[unlikely]] { |
| return false; |
| } |
| date_v2_value_.second_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } else if constexpr (unit == TimeUnit::MICROSECOND) { |
| if constexpr (is_datetime) { |
| if (val > MAX_MICROSECOND) [[unlikely]] { |
| return false; |
| } |
| date_v2_value_.microsecond_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } |
| return true; |
| } |
| |
| template <TimeUnit unit> |
| void unchecked_set_time_unit(uint32_t val) { |
| // is uint so need check upper bound only |
| if constexpr (unit == TimeUnit::YEAR) { |
| date_v2_value_.year_ = val; |
| } else if constexpr (unit == TimeUnit::MONTH) { |
| date_v2_value_.month_ = val; |
| } else if constexpr (unit == TimeUnit::DAY) { |
| date_v2_value_.day_ = val; |
| } else if constexpr (unit == TimeUnit::HOUR) { |
| if constexpr (is_datetime) { |
| date_v2_value_.hour_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } else if constexpr (unit == TimeUnit::MINUTE) { |
| if constexpr (is_datetime) { |
| date_v2_value_.minute_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } else if constexpr (unit == TimeUnit::SECOND) { |
| if constexpr (is_datetime) { |
| date_v2_value_.second_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } else if constexpr (unit == TimeUnit::MICROSECOND) { |
| if constexpr (is_datetime) { |
| date_v2_value_.microsecond_ = val; |
| } else { |
| DCHECK(false) << "shouldn't set for date"; |
| } |
| } |
| } |
| |
| int64_t to_int64() const { |
| if constexpr (is_datetime) { |
| return (date_v2_value_.year_ * 10000L + date_v2_value_.month_ * 100 + |
| date_v2_value_.day_) * |
| 1000000L + |
| date_v2_value_.hour_ * 10000 + date_v2_value_.minute_ * 100 + |
| date_v2_value_.second_; |
| } else { |
| return date_v2_value_.year_ * 10000 + date_v2_value_.month_ * 100 + date_v2_value_.day_; |
| } |
| } |
| |
| bool from_date_format_str(const char* format, int format_len, const char* value, |
| int64_t value_len, const char** sub_val_end); |
| static constexpr int MAX_DATE_PARTS = 7; |
| static constexpr uint32_t MAX_TIME_PART_VALUE[3] = {23, 59, 59}; |
| |
| void format_datetime(uint32_t* date_v, bool* carry_bits) const; |
| |
| void set_int_val(uint64_t val) { this->int_val_ = val; } |
| |
| private: |
| static uint8_t calc_week(const uint32_t& day_nr, const uint16_t& year, const uint8_t& month, |
| const uint8_t& day, uint8_t mode, uint16_t* to_year, |
| bool disable_lut = false); |
| |
| bool from_date_str_base(const char* date_str, int len, int scale, |
| const cctz::time_zone* local_time_zone, bool convert_zero); |
| |
| // Used to construct from int value |
| int64_t standardize_timevalue(int64_t value); |
| |
| // Helper to set max, min, zero |
| void set_zero(); |
| |
| union { |
| T date_v2_value_; |
| underlying_value int_val_; |
| }; |
| |
| DateV2Value(uint16_t year, uint8_t month, uint8_t day, uint8_t hour, uint8_t minute, |
| uint8_t second, uint32_t microsecond) |
| : date_v2_value_(year, month, day, hour, minute, second, microsecond) {} |
| }; |
| |
| template <typename T> |
| inline const DateV2Value<T> DateV2Value<T>::FIRST_DAY = DateV2Value<T>(0001, 1, 1, 0, 0, 0, 0); |
| |
| // only support DATE - DATE (no support DATETIME - DATETIME) |
| std::size_t operator-(const VecDateTimeValue& v1, const VecDateTimeValue& v2); |
| |
| template <typename T> |
| std::size_t operator-(const VecDateTimeValue& v1, const DateV2Value<T>& v2); |
| |
| template <typename T> |
| std::size_t operator-(const DateV2Value<T>& v1, const VecDateTimeValue& v2); |
| |
| std::ostream& operator<<(std::ostream& os, const VecDateTimeValue& value); |
| |
| std::size_t hash_value(VecDateTimeValue const& value); |
| |
| template <typename T0, typename T1> |
| std::size_t operator-(const DateV2Value<T0>& v1, const DateV2Value<T1>& v2); |
| |
| template <typename T> |
| std::ostream& operator<<(std::ostream& os, const DateV2Value<T>& value); |
| |
| template <typename T> |
| std::size_t hash_value(DateV2Value<T> const& value); |
| |
| template <TimeUnit unit> |
| int64_t datetime_diff(const VecDateTimeValue& ts_value1, const VecDateTimeValue& ts_value2) { |
| switch (unit) { |
| case YEAR: { |
| int year = (ts_value2.year() - ts_value1.year()); |
| if (year > 0) { |
| year -= (ts_value2.to_datetime_int64() % 10000000000 - |
| ts_value1.to_datetime_int64() % 10000000000) < 0; |
| } else if (year < 0) { |
| year += (ts_value2.to_datetime_int64() % 10000000000 - |
| ts_value1.to_datetime_int64() % 10000000000) > 0; |
| } |
| return year; |
| } |
| case MONTH: { |
| int month = (ts_value2.year() - ts_value1.year()) * 12 + |
| (ts_value2.month() - ts_value1.month()); |
| if (month > 0) { |
| month -= (ts_value2.to_datetime_int64() % 100000000 - |
| ts_value1.to_datetime_int64() % 100000000) < 0; |
| } else if (month < 0) { |
| month += (ts_value2.to_datetime_int64() % 100000000 - |
| ts_value1.to_datetime_int64() % 100000000) > 0; |
| } |
| return month; |
| } |
| case WEEK: { |
| int day = ts_value2.daynr() - ts_value1.daynr(); |
| if (day > 0) { |
| day -= ts_value2.time_part_diff(ts_value1) < 0; |
| } else if (day < 0) { |
| day += ts_value2.time_part_diff(ts_value1) > 0; |
| } |
| return day / 7; |
| } |
| case DAY: { |
| int day = ts_value2.daynr() - ts_value1.daynr(); |
| if (day > 0) { |
| day -= ts_value2.time_part_diff(ts_value1) < 0; |
| } else if (day < 0) { |
| day += ts_value2.time_part_diff(ts_value1) > 0; |
| } |
| return day; |
| } |
| case HOUR: { |
| int64_t second = ts_value2.datetime_diff_in_seconds(ts_value1); |
| int64_t hour = second / 60 / 60; |
| return hour; |
| } |
| case MINUTE: { |
| int64_t second = ts_value2.datetime_diff_in_seconds(ts_value1); |
| int64_t minute = second / 60; |
| return minute; |
| } |
| case SECOND: { |
| int64_t second = ts_value2.datetime_diff_in_seconds(ts_value1); |
| return second; |
| } |
| } |
| // Rethink the default return value |
| return 0; |
| } |
| |
| // ROUND the result TO ZERO( not FLOOR). for datetime_diff<year>, everything less than year is the remainder. |
| // "ROUND TO ZERO" means `years_diff('2020-05-05', '2015-06-06')` gets 4 and |
| // `years_diff('2015-06-06', '2020-05-05')` gets -4. |
| template <TimeUnit UNIT, typename T0, typename T1> |
| int64_t datetime_diff(const DateV2Value<T0>& ts_value1, const DateV2Value<T1>& ts_value2) { |
| constexpr uint64_t uint64_minus_one = -1; |
| switch (UNIT) { |
| // for YEAR and MONTH: calculate the diff of year or month, and use bitmask to get the remainder of all other |
| // parts. then round to zero by the remainder. |
| case YEAR: { |
| int year = (ts_value2.year() - ts_value1.year()); |
| if constexpr (std::is_same_v<T0, T1>) { |
| int year_width = |
| DateV2Value<T0>::is_datetime ? DATETIMEV2_YEAR_WIDTH : DATEV2_YEAR_WIDTH; |
| decltype(ts_value2.to_date_int_val()) minus_one = -1; |
| if (year > 0) { |
| year -= ((ts_value2.to_date_int_val() & (minus_one >> year_width)) < |
| (ts_value1.to_date_int_val() & (minus_one >> year_width))); |
| } else if (year < 0) { |
| year += ((ts_value2.to_date_int_val() & (minus_one >> year_width)) > |
| (ts_value1.to_date_int_val() & (minus_one >> year_width))); |
| } |
| } else if constexpr (std::is_same_v<T0, DateV2ValueType>) { |
| auto ts1_int_value = ((uint64_t)ts_value1.to_date_int_val()) << TIME_PART_LENGTH; |
| if (year > 0) { |
| year -= ((ts_value2.to_date_int_val() & |
| (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH)) < |
| (ts1_int_value & (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH))); |
| } else if (year < 0) { |
| year += ((ts_value2.to_date_int_val() & |
| (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH)) > |
| (ts1_int_value & (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH))); |
| } |
| } else { |
| auto ts2_int_value = ((uint64_t)ts_value2.to_date_int_val()) << TIME_PART_LENGTH; |
| if (year > 0) { |
| year -= ((ts2_int_value & (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH)) < |
| (ts_value1.to_date_int_val() & |
| (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH))); |
| } else if (year < 0) { |
| year += ((ts2_int_value & (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH)) > |
| (ts_value1.to_date_int_val() & |
| (uint64_minus_one >> DATETIMEV2_YEAR_WIDTH))); |
| } |
| } |
| |
| return year; |
| } |
| case MONTH: { |
| int month = (ts_value2.year() - ts_value1.year()) * 12 + |
| (ts_value2.month() - ts_value1.month()); |
| if constexpr (std::is_same_v<T0, T1>) { |
| int shift_bits = DateV2Value<T0>::is_datetime |
| ? DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH |
| : DATEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH; |
| decltype(ts_value2.to_date_int_val()) minus_one = -1; |
| if (month > 0) { |
| month -= ((ts_value2.to_date_int_val() & (minus_one >> shift_bits)) < |
| (ts_value1.to_date_int_val() & (minus_one >> shift_bits))); |
| } else if (month < 0) { |
| month += ((ts_value2.to_date_int_val() & (minus_one >> shift_bits)) > |
| (ts_value1.to_date_int_val() & (minus_one >> shift_bits))); |
| } |
| } else if constexpr (std::is_same_v<T0, DateV2ValueType>) { |
| auto ts1_int_value = ((uint64_t)ts_value1.to_date_int_val()) << TIME_PART_LENGTH; |
| if (month > 0) { |
| month -= ((ts_value2.to_date_int_val() & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH))) < |
| (ts1_int_value & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH)))); |
| } else if (month < 0) { |
| month += ((ts_value2.to_date_int_val() & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH))) > |
| (ts1_int_value & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH)))); |
| } |
| } else { |
| auto ts2_int_value = ((uint64_t)ts_value2.to_date_int_val()) << TIME_PART_LENGTH; |
| if (month > 0) { |
| month -= ((ts2_int_value & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH))) < |
| (ts_value1.to_date_int_val() & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH)))); |
| } else if (month < 0) { |
| month += ((ts2_int_value & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH))) > |
| (ts_value1.to_date_int_val() & |
| (uint64_minus_one >> (DATETIMEV2_YEAR_WIDTH + DATETIMEV2_MONTH_WIDTH)))); |
| } |
| } |
| return month; |
| } |
| case WEEK: { |
| return ts_value2.date_diff_in_days_round_to_zero_by_time(ts_value1) / 7; |
| } |
| case DAY: { |
| return ts_value2.date_diff_in_days_round_to_zero_by_time(ts_value1); |
| } |
| case HOUR: { |
| return ts_value2.datetime_diff_in_seconds_round_to_zero_by_ms(ts_value1) / 60 / 60; |
| } |
| case MINUTE: { |
| return ts_value2.datetime_diff_in_seconds_round_to_zero_by_ms(ts_value1) / 60; |
| } |
| case SECOND: { |
| return ts_value2.datetime_diff_in_seconds_round_to_zero_by_ms(ts_value1); |
| } |
| case MILLISECOND: { |
| // C++ naturally rounds to zero |
| return ts_value2.datetime_diff_in_microseconds(ts_value1) / 1000; |
| } |
| case MICROSECOND: { |
| // no precision loss |
| return ts_value2.datetime_diff_in_microseconds(ts_value1); |
| } |
| } |
| // Rethink the default return value |
| return 0; |
| } |
| |
| /** |
| * Date dict table. date range is [1900-01-01, 2039-12-31]. |
| */ |
| class date_day_offset_dict { |
| private: |
| static constexpr int DAY_BEFORE_EPOCH = 25567; // 1900-01-01 |
| static constexpr int DAY_AFTER_EPOCH = 25566; // 2039-12-31 |
| static constexpr int DICT_DAYS = DAY_BEFORE_EPOCH + 1 + DAY_AFTER_EPOCH; // 1 means 1970-01-01 |
| |
| static constexpr int START_YEAR = 1900; // 1900-01-01 |
| static constexpr int END_YEAR = 2039; // 2039-10-24 |
| static constexpr int DAY_OFFSET_CAL_START_POINT_DAYNR = |
| 719528; // 1970-01-01 (start from 0000-01-01, 0000-01-01 is day 1, returns 1) |
| |
| static std::array<DateV2Value<DateV2ValueType>, DICT_DAYS> DATE_DAY_OFFSET_ITEMS; |
| static std::array<std::array<std::array<int, 31>, 12>, 140> DATE_DAY_OFFSET_DICT; |
| |
| static bool DATE_DAY_OFFSET_ITEMS_INIT; |
| |
| static date_day_offset_dict instance; |
| |
| date_day_offset_dict(); |
| ~date_day_offset_dict() = default; |
| date_day_offset_dict(const date_day_offset_dict&) = default; |
| date_day_offset_dict& operator=(const date_day_offset_dict&) = default; |
| |
| public: |
| static bool can_speed_up_calc_daynr(int year) { return year >= START_YEAR && year <= END_YEAR; } |
| |
| static int get_offset_by_daynr(int daynr) { return daynr - DAY_OFFSET_CAL_START_POINT_DAYNR; } |
| |
| static bool can_speed_up_daynr_to_date(int daynr) { |
| auto res = get_offset_by_daynr(daynr); |
| return res >= 0 ? res <= DAY_AFTER_EPOCH : -res <= DAY_BEFORE_EPOCH; |
| } |
| |
| static date_day_offset_dict& get() { return instance; } |
| |
| static bool get_dict_init() { return DATE_DAY_OFFSET_ITEMS_INIT; } |
| |
| inline DateV2Value<DateV2ValueType> operator[](int day) const { |
| int index = day + DAY_BEFORE_EPOCH; |
| if (LIKELY(index >= 0 && index < DICT_DAYS)) { |
| return DATE_DAY_OFFSET_ITEMS[index]; |
| } else { |
| DateV2Value<DateV2ValueType> d = DATE_DAY_OFFSET_ITEMS[0]; |
| return d += index; |
| } |
| } |
| |
| int daynr(int year, int month, int day) const { |
| return DATE_DAY_OFFSET_DICT[year - START_YEAR][month - 1][day - 1]; |
| } |
| }; |
| |
| inline uint32_t calc_daynr(uint16_t year, uint8_t month, uint8_t day) { |
| // date_day_offet_dict range from [1900-01-01, 2039-12-31] |
| if (date_day_offset_dict::can_speed_up_calc_daynr(year) && |
| LIKELY(date_day_offset_dict::get_dict_init())) { |
| return date_day_offset_dict::get().daynr(year, month, day); |
| } |
| |
| uint32_t delsum = 0; |
| int y = year; |
| |
| if (year == 0 && month == 0) { |
| return 0; |
| } |
| if (year == 0 && month == 1 && day == 1) { |
| return 1; |
| } |
| |
| /* Cast to int to be able to handle month == 0 */ |
| delsum = 365 * y + 31 * (month - 1) + day; |
| if (month <= 2) { |
| // No leap year |
| y--; |
| } else { |
| // This is great!!! |
| // 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 |
| // 0, 0, 3, 3, 4, 4, 5, 5, 5, 6, 7, 8 |
| delsum -= (month * 4 + 23) / 10; |
| } |
| // Every 400 year has 97 leap year, 100, 200, 300 are not leap year. |
| return delsum + y / 4 - y / 100 + y / 400; |
| } |
| |
| template <typename T> |
| struct DateTraits {}; |
| |
| template <> |
| struct DateTraits<int64_t> { |
| using T = VecDateTimeValue; |
| using DateType = vectorized::DataTypeDateTime; |
| }; |
| |
| template <> |
| struct DateTraits<uint32_t> { |
| using T = DateV2Value<DateV2ValueType>; |
| using DateType = vectorized::DataTypeDateV2; |
| }; |
| |
| template <> |
| struct DateTraits<uint64_t> { |
| using T = DateV2Value<DateTimeV2ValueType>; |
| using DateType = vectorized::DataTypeDateTimeV2; |
| }; |
| |
| } // namespace doris |
| |
| template <> |
| struct std::hash<doris::VecDateTimeValue> { |
| size_t operator()(const doris::VecDateTimeValue& v) const { return doris::hash_value(v); } |
| }; |
| |
| template <> |
| struct std::hash<doris::DateV2Value<doris::DateV2ValueType>> { |
| size_t operator()(const doris::DateV2Value<doris::DateV2ValueType>& v) const { |
| return doris::hash_value(v); |
| } |
| }; |
| |
| template <> |
| struct std::hash<doris::DateV2Value<doris::DateTimeV2ValueType>> { |
| size_t operator()(const doris::DateV2Value<doris::DateTimeV2ValueType>& v) const { |
| return doris::hash_value(v); |
| } |
| }; |