blob: 2b2ce79991a344ed4231da44520695ea64b6a0a8 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_RUNTIME_TIMESTAMP_VALUE_H
#define IMPALA_RUNTIME_TIMESTAMP_VALUE_H
#include <boost/date_time/compiler_config.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/local_time/local_time.hpp>
#include <gflags/gflags.h>
#include <string>
#include "common/global-types.h"
#include "gen-cpp/Data_types.h"
#include "udf/udf.h"
#include "util/hash-util.h"
/// Users who want a fix for IMPALA-97 (to be Hive compatible) can enable this flag.
/// The flag is disabled by default but should be flipped with the next release that
/// accepts breaking-changes.
DECLARE_bool(use_local_tz_for_unix_timestamp_conversions);
namespace impala {
namespace datetime_parse_util {
struct DateTimeFormatContext;
}
/// Represents either a (1) date and time, (2) a date with an undefined time, or (3)
/// a time with an undefined date. In all cases, times have up to nanosecond resolution
/// and the minimum and maximum dates are 1400-01-01 and 9999-12-31.
//
/// This type is similar to Postgresql TIMESTAMP WITHOUT TIME ZONE datatype and MySQL's
/// DATETIME datatype. Note that because TIMESTAMP does not contain time zone
/// information, the time zone must be inferred by the context when needed. For example,
/// suppose the current date and time is Jan 15 2015 5:37:56 PM PST:
/// SELECT NOW(); -- Returns '2015-01-15 17:37:56' - implicit time zone of NOW() return
/// value is PST
/// SELECT TO_UTC_TIMESTAMP(NOW(), "PST"); -- Returns '2015-01-16 01:54:21' - implicit
/// time zone is UTC, input time zone specified by second parameter.
//
/// Hive describes this data type as "Timestamps are interpreted to be timezoneless and
/// stored as an offset from the UNIX epoch." but storing a value as an offset from
/// Unix epoch, which is defined as being in UTC, is impossible unless the time zone for
/// the value is known. If all files stored values relative to the epoch, then there
/// would be no reason to interpret values as timezoneless.
//
/// TODO: Document what situation leads to #2 at the top. Cases #1 and 3 can be created
/// with literals. A literal "2000-01-01" results in a value with a "00:00:00"
/// time component. It may not be possible to actually create case #2 though
/// the code implies it is possible.
//
/// For collect timings, prefer the functions in util/time.h. If this class is used for
/// timings, the local time should never be used since it is affected by daylight savings.
/// Also keep in mind that the time component rolls over at midnight so the date should
/// always be checked when determining a duration.
class TimestampValue {
public:
TimestampValue() {}
TimestampValue(const boost::gregorian::date& d,
const boost::posix_time::time_duration& t)
: time_(t),
date_(d) {
Validate();
}
TimestampValue(const boost::posix_time::ptime& t)
: time_(t.time_of_day()),
date_(t.date()) {
Validate();
}
TimestampValue(const TimestampValue& tv) : time_(tv.time_), date_(tv.date_) {}
/// Constructors that parse from a date/time string. See TimestampParser for details
/// about the date-time format.
static TimestampValue ParseSimpleDateFormat(const std::string& str);
static TimestampValue ParseSimpleDateFormat(const char* str, int len);
static TimestampValue ParseSimpleDateFormat(const char* str, int len,
const datetime_parse_util::DateTimeFormatContext& dt_ctx);
static TimestampValue ParseIsoSqlFormat(const char* str, int len,
const datetime_parse_util::DateTimeFormatContext& dt_ctx);
/// 'days' represents the number of days since 1970-01-01.
/// The returned timestamp's date component is set so that it would correspond to
/// 'days', the time-of-day component is set to 00:00:00.
static TimestampValue FromDaysSinceUnixEpoch(int64_t days);
// Returns the number of days since 1970-01-01. Expects date_ to be valid.
int64_t DaysSinceUnixEpoch() const;
/// Unix time (seconds since 1970-01-01 UTC by definition) constructors.
/// Return the corresponding timestamp in the 'local_tz' time zone if
/// FLAGS_use_local_tz_for_unix_timestamp_conversions is true. Otherwise, return the
/// corresponding timestamp in UTC.
static TimestampValue FromUnixTime(time_t unix_time, const Timezone& local_tz);
/// Same as FromUnixTime() above, but adds the specified number of nanoseconds to the
/// resulting TimestampValue. Handles negative nanoseconds and the case where
/// abs(nanos) >= 1e9.
static TimestampValue FromUnixTimeNanos(time_t unix_time, int64_t nanos,
const Timezone& local_tz);
/// Same as FromUnixTime(), but expects the time in microseconds.
static TimestampValue FromUnixTimeMicros(int64_t unix_time_micros,
const Timezone& local_tz);
/// Return the corresponding timestamp in UTC for the Unix time specified in
/// nanoseconds. The range is smaller than in other conversion function, as the
/// full [1400 .. 10000) range cannot be represented with an int64.
/// Supported range: [1677-09-21 00:12:43.145224192 .. 2262-04-11 23:47:16.854775807]
static TimestampValue UtcFromUnixTimeLimitedRangeNanos(int64_t unix_time_nanos);
/// Return the corresponding timestamp in UTC for the Unix time specified in
/// microseconds.
static TimestampValue UtcFromUnixTimeMicros(int64_t unix_time_micros);
/// Return the corresponding timestamp in UTC for the Unix time specified in
/// milliseconds.
static TimestampValue UtcFromUnixTimeMillis(int64_t unix_time_millis);
/// Returns a TimestampValue in 'local_tz' time zone where the integer part of the
/// specified 'unix_time' specifies the number of seconds (see above), and the
/// fractional part is converted to nanoseconds and added to the resulting
/// TimestampValue.
static TimestampValue FromSubsecondUnixTime(double unix_time, const Timezone& local_tz);
/// Returns a TimestampValue converted from a TimestampVal. The caller must ensure
/// the TimestampVal does not represent a NULL.
static TimestampValue FromTimestampVal(const impala_udf::TimestampVal& udf_value) {
DCHECK(!udf_value.is_null);
TimestampValue value;
memcpy(&value.date_, &udf_value.date, sizeof(value.date_));
memcpy(&value.time_, &udf_value.time_of_day, sizeof(value.time_));
value.Validate();
return value;
}
/// Returns a TimestampVal representation in the output variable.
/// Returns null if the TimestampValue instance doesn't have a valid date or time.
void ToTimestampVal(impala_udf::TimestampVal* tv) const {
if (!HasDateOrTime()) {
tv->is_null = true;
return;
}
memcpy(&tv->date, &date_, sizeof(date_));
memcpy(&tv->time_of_day, &time_, sizeof(time_));
tv->is_null = false;
}
void ToPtime(boost::posix_time::ptime* ptp) const {
*ptp = boost::posix_time::ptime(date_, time_);
}
// Store the binary representation of this TimestampValue in 'tvalue'.
void ToTColumnValue(TColumnValue* tvalue) const {
const uint8_t* data = reinterpret_cast<const uint8_t*>(this);
tvalue->timestamp_val.assign(data, data + Size());
tvalue->__isset.timestamp_val = true;
}
// Returns a new TimestampValue created from the value in 'tvalue'.
static TimestampValue FromTColumnValue(const TColumnValue& tvalue) {
TimestampValue value;
memcpy(&value, tvalue.timestamp_val.c_str(), Size());
value.Validate();
return value;
}
bool HasDate() const { return !date_.is_special(); }
bool HasTime() const { return !time_.is_special(); }
bool HasDateOrTime() const { return HasDate() || HasTime(); }
bool HasDateAndTime() const { return HasDate() && HasTime(); }
std::string ToString() const;
/// Verifies that the date falls into a valid range (years 1400..9999).
static inline bool IsValidDate(const boost::gregorian::date& date) {
// Smallest valid day number.
const static int64_t MIN_DAY_NUMBER = static_cast<int64_t>(
boost::gregorian::date(boost::date_time::min_date_time).day_number());
// Largest valid day number.
const static int64_t MAX_DAY_NUMBER = static_cast<int64_t>(
boost::gregorian::date(boost::date_time::max_date_time).day_number());
return date.day_number() >= MIN_DAY_NUMBER
&& date.day_number() <= MAX_DAY_NUMBER;
}
/// Verifies that the time is not negative and is less than a whole day.
static inline bool IsValidTime(const boost::posix_time::time_duration& time) {
static const int64_t NANOS_PER_DAY = 1'000'000'000LL * SECONDS_PER_DAY;
return !time.is_negative()
&& time.total_nanoseconds() < NANOS_PER_DAY;
}
/// Formats the timestamp using the given date/time context and returns the result.
/// dt_ctx -- the date/time context containing the format to use
std::string Format(const datetime_parse_util::DateTimeFormatContext& dt_ctx) const;
/// Interpret 'this' as a timestamp in UTC and convert to unix time.
/// Returns false if the conversion failed ('unix_time' will be undefined), otherwise
/// true.
bool UtcToUnixTime(time_t* unix_time) const;
/// Interpret 'this' as a timestamp in UTC and convert to unix time in microseconds.
/// Nanoseconds are rounded to the nearest microsecond supported by Impala.
/// Returns false if the conversion failed ('unix_time_micros' will be undefined),
/// otherwise true.
/// TODO: Rounding towards nearest microsecond should be replaced with rounding
/// towards minus infinity. For more details, see IMPALA-8180
bool UtcToUnixTimeMicros(int64_t* unix_time_micros) const;
/// Interpret 'this' as a timestamp in UTC and convert to unix time in microseconds.
/// Nanoseconds are rounded towards minus infinity.
/// Returns false if the conversion failed ('unix_time_micros' will be undefined),
/// otherwise true.
bool FloorUtcToUnixTimeMicros(int64_t* unix_time_micros) const;
/// Interpret 'this' as a timestamp in UTC and convert to unix time in milliseconds.
/// Nanoseconds are rounded towards minus infinity.
/// Returns false if the conversion failed ('unix_time_millis' will be undefined),
/// otherwise true.
bool FloorUtcToUnixTimeMillis(int64_t* unix_time_millis) const;
/// Interpret 'this' as a timestamp in UTC and convert to unix time in nanoseconds.
/// The full [1400 .. 10000) range cannot be represented with an int64, so the
/// conversion will fail outside the supported range:
/// [1677-09-21 00:12:43.145224192 .. 2262-04-11 23:47:16.854775807]
/// Returns false if the conversion failed ('unix_time_millis' will be undefined),
/// otherwise true.
bool UtcToUnixTimeLimitedRangeNanos(int64_t* unix_time_nanos) const;
/// Converts to Unix time (seconds since the Unix epoch) representation. The time zone
/// interpretation of the TimestampValue instance is determined by
/// FLAGS_use_local_tz_for_unix_timestamp_conversions. If the flag is true, the instance
/// is interpreted as a value in the 'local_tz' time zone. If the flag is false, UTC is
/// assumed. Returns false if the conversion failed (unix_time will be undefined),
/// otherwise true.
bool ToUnixTime(const Timezone& local_tz, time_t* unix_time) const;
/// Converts to Unix time with fractional seconds. The time zone interpretation of the
/// TimestampValue instance is determined a above.
/// Returns false if the conversion failed (unix_time will be undefined), otherwise
/// true.
bool ToSubsecondUnixTime(const Timezone& local_tz, double* unix_time) const;
/// Converts from UTC to 'local_tz' time zone in-place. The caller must ensure the
/// TimestampValue this function is called upon has both a valid date and time.
///
/// If start/end_of_repeated_period is not nullptr and timestamp falls into an interval
/// where LocalToUtc() is ambiguous (e.g. Summer->Winter DST change on Northern
/// hemisphere), then these arguments are set to the start/end of this period in local
/// time. This is useful to get some ordering guarantees in the case when the order of
/// two timestamps is different in UTC and local time (e.g CET Autumn dst change
/// 00:30:00 -> 02:30:00 vs 01:15:00 -> 02:15:00) - any timestamp that is earlier than
/// 'this' in UTC is guaranteed to be earlier than 'end_of_repeated_period' in local
/// time, and any timestamp later than 'this' in UTC is guaranteed to be later than
/// 'start_of_repeated_period' in local time.
void UtcToLocal(const Timezone& local_tz,
TimestampValue* start_of_repeated_period = nullptr,
TimestampValue* end_of_repeated_period = nullptr);
/// Converts from 'local_tz' to UTC time zone in-place. The caller must ensure the
/// TimestampValue this function is called upon has both a valid date and time.
void LocalToUtc(const Timezone& local_tz);
void set_date(const boost::gregorian::date d) { date_ = d; Validate(); }
void set_time(const boost::posix_time::time_duration t) { time_ = t; Validate(); }
const boost::gregorian::date& date() const { return date_; }
const boost::posix_time::time_duration& time() const { return time_; }
TimestampValue& operator=(const boost::posix_time::ptime& ptime) {
date_ = ptime.date();
time_ = ptime.time_of_day();
Validate();
return *this;
}
bool operator==(const TimestampValue& other) const {
return date_ == other.date_ && time_ == other.time_;
}
bool operator!=(const TimestampValue& other) const { return !(*this == other); }
bool operator<(const TimestampValue& other) const {
return date_ < other.date_ || (date_ == other.date_ && time_ < other.time_);
}
bool operator<=(const TimestampValue& other) const {
return *this < other || *this == other; }
bool operator>(const TimestampValue& other) const {
return date_ > other.date_ || (date_ == other.date_ && time_ > other.time_);
}
bool operator>=(const TimestampValue& other) const {
return *this > other || *this == other;
}
static size_t Size() {
return sizeof(boost::posix_time::time_duration) + sizeof(boost::gregorian::date);
}
inline uint32_t Hash(int seed = 0) const {
uint32_t hash = HashUtil::Hash(&time_, sizeof(time_), seed);
return HashUtil::Hash(&date_, sizeof(date_), hash);
}
/// Divides 'ticks' with 'GRANULARITY' (truncated towards negative infinity) and
/// sets 'ticks' to the remainder.
template <int64_t GRANULARITY>
inline static int64_t SplitTime(int64_t* ticks) {
int64_t result = *ticks / GRANULARITY;
*ticks %= GRANULARITY;
if (*ticks < 0) {
result--;
*ticks += GRANULARITY;
}
return result;
}
static const char* LLVM_CLASS_NAME;
private:
friend class UnusedClass;
/// Used when converting a time with fractional seconds which are stored as in integer
/// to a Unix time stored as a double.
static const double ONE_BILLIONTH;
static const uint64_t SECONDS_PER_DAY = 24 * 60 * 60;
/// Boost ptime leaves a gap in the structure, so we swap the order to make it
/// 12 contiguous bytes. We then must convert to and from the boost ptime data type.
/// See IMP-87 for more information on why using ptime with the 4 byte gap is
/// problematic.
/// 8 bytes - stores the nanoseconds within the current day
boost::posix_time::time_duration time_;
/// 4 -bytes - stores the date as a day
boost::gregorian::date date_;
/// Sets both date and time to invalid value.
inline void SetToInvalidDateTime() {
time_ = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
date_ = boost::gregorian::date(boost::gregorian::not_a_date_time);
}
/// Sets both date and time to invalid if date is outside the valid range.
/// Time's validity is only checked in debug builds.
/// TODO: This could be also checked in release, but I am a bit afraid that it would
/// affect performance and probably break some scenarios that are
/// currently working more or less correctly.
inline void Validate() {
if (HasDate() && UNLIKELY(!IsValidDate(date_))) SetToInvalidDateTime();
else if (HasTime()) DCHECK(IsValidTime(time_));
}
/// Converts 'unix_time' (in UTC seconds) to TimestampValue in timezone 'local_tz'.
static TimestampValue UnixTimeToLocal(time_t unix_time, const Timezone& local_tz);
/// Converts 'unix_time_ticks'/TICKS_PER_SEC seconds to TimestampValue.
template <int32_t TICKS_PER_SEC>
static TimestampValue UtcFromUnixTimeTicks(int64_t unix_time_ticks);
};
/// This function must be called 'hash_value' to be picked up by boost.
inline std::size_t hash_value(const TimestampValue& v) {
return v.Hash();
}
std::ostream& operator<<(std::ostream& os, const TimestampValue& timestamp_value);
}
#endif