blob: 58829285f233659c906150f79907dd5a97863d24 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#ifndef IMPALA_RUNTIME_TIMESTAMP_VALUE_H
#define IMPALA_RUNTIME_TIMESTAMP_VALUE_H
#include <boost/date_time/compiler_config.hpp>
#include <boost/date_time/gregorian/gregorian.hpp>
#include <boost/date_time/posix_time/conversion.hpp>
#include <boost/date_time/posix_time/posix_time_types.hpp>
#include <ctime>
#include <gflags/gflags.h>
#include <string>
#include "udf/udf.h"
#include "util/hash-util.h"
/// Users who want a fix for IMPALA-97 (to be Hive compatible) can enable this flag.
/// The flag is disabled by default but should be flipped with the next release that
/// accepts breaking-changes.
DECLARE_bool(use_local_tz_for_unix_timestamp_conversions);
namespace impala {
struct DateTimeFormatContext;
/// Represents either a (1) date and time, (2) a date with an undefined time, or (3)
/// a time with an undefined date. In all cases, times have up to nanosecond resolution
/// and the minimum and maximum dates are 1400-01-01 and 10000-12-31.
//
/// This type is similar to Postgresql TIMESTAMP WITHOUT TIME ZONE datatype and MySQL's
/// DATETIME datatype. Note that because TIMESTAMP does not contain time zone
/// information, the time zone must be inferred by the context when needed. For example,
/// suppose the current date and time is Jan 15 2015 5:37:56 PM PST:
/// SELECT NOW(); -- Returns '2015-01-15 17:37:56' - implicit time zone of NOW() return
/// value is PST
/// SELECT TO_UTC_TIMESTAMP(NOW(), "PST"); -- Returns '2015-01-16 01:54:21' - implicit
/// time zone is UTC, input time zone specified by second parameter.
//
/// Hive describes this data type as "Timestamps are interpreted to be timezoneless and
/// stored as an offset from the UNIX epoch." but storing a value as an offset from
/// Unix epoch, which is defined as being in UTC, is impossible unless the time zone for
/// the value is known. If all files stored values relative to the epoch, then there
/// would be no reason to interpret values as timezoneless.
//
/// TODO: Document what situation leads to #2 at the top. Cases #1 and 3 can be created
/// with literals. A literal "2000-01-01" results in a value with a "00:00:00"
/// time component. It may not be possible to actually create case #2 though
/// the code implies it is possible.
//
/// For collect timings, prefer the functions in util/time.h. If this class is used for
/// timings, the local time should never be used since it is affected by daylight savings.
/// Also keep in mind that the time component rolls over at midnight so the date should
/// always be checked when determining a duration.
class TimestampValue {
public:
TimestampValue() {}
TimestampValue(const boost::gregorian::date& d,
const boost::posix_time::time_duration& t)
: time_(t),
date_(d) {}
TimestampValue(const boost::posix_time::ptime& t)
: time_(t.time_of_day()),
date_(t.date()) {}
TimestampValue(const TimestampValue& tv) : time_(tv.time_), date_(tv.date_) {}
TimestampValue(const char* str, int len);
TimestampValue(const char* str, int len, const DateTimeFormatContext& dt_ctx);
/// Unix time (seconds since 1970-01-01 UTC by definition) constructors.
/// Conversion to local time will be done if
/// FLAGS_use_local_tz_for_unix_timestamp_conversions is true.
TimestampValue(int64_t unix_time, int64_t nanos) {
boost::posix_time::ptime temp = UnixTimeToPtime(unix_time);
temp += boost::posix_time::nanoseconds(nanos);
*this = temp;
}
explicit TimestampValue(double unix_time) {
const time_t unix_time_whole = unix_time;
boost::posix_time::ptime temp = UnixTimeToPtime(unix_time_whole);
temp += boost::posix_time::nanoseconds((unix_time - unix_time_whole) / ONE_BILLIONTH);
*this = temp;
}
/// Returns the current local time with microsecond accuracy. This should not be used
/// to time something because it is affected by adjustments to the system clock such
/// as a daylight savings or a manual correction by a system admin. For timings, use
/// functions in util/time.h.
static TimestampValue LocalTime() {
return TimestampValue(boost::posix_time::microsec_clock::local_time());
}
/// Returns a TimestampValue converted from a TimestampVal. The caller must ensure
/// the TimestampVal does not represent a NULL.
static TimestampValue FromTimestampVal(const impala_udf::TimestampVal& udf_value) {
DCHECK(!udf_value.is_null);
TimestampValue value;
memcpy(&value.date_, &udf_value.date, sizeof(value.date_));
memcpy(&value.time_, &udf_value.time_of_day, sizeof(value.time_));
return value;
}
/// Returns a TimestampVal representation in the output variable.
/// Returns null if the TimestampValue instance doesn't have a valid date or time.
void ToTimestampVal(impala_udf::TimestampVal* tv) const {
if (!HasDateOrTime()) {
tv->is_null = true;
return;
}
memcpy(&tv->date, &date_, sizeof(date_));
memcpy(&tv->time_of_day, &time_, sizeof(time_));
tv->is_null = false;
}
void ToPtime(boost::posix_time::ptime* ptp) const {
*ptp = boost::posix_time::ptime(date_, time_);
}
bool HasDate() const { return !date_.is_special(); }
bool HasTime() const { return !time_.is_special(); }
bool HasDateOrTime() const { return HasDate() || HasTime(); }
bool HasDateAndTime() const { return HasDate() && HasTime(); }
std::string DebugString() const;
/// Verifies that the timestamp date falls into a valid range (years 1400..9999).
inline bool IsValidDate() const {
// Smallest valid day number.
const static int64_t MIN_DAY_NUMBER = static_cast<int64_t>(
boost::gregorian::date(boost::date_time::min_date_time).day_number());
// Largest valid day number.
const static int64_t MAX_DAY_NUMBER = static_cast<int64_t>(
boost::gregorian::date(boost::date_time::max_date_time).day_number());
return date_.day_number() >= MIN_DAY_NUMBER
&& date_.day_number() <= MAX_DAY_NUMBER;
}
/// Formats the timestamp using the given date/time context and places the result in the
/// string buffer. The size of the buffer should be at least dt_ctx.fmt_out_len + 1. A
/// string terminator will be appended to the string.
/// dt_ctx -- the date/time context containing the format to use
/// len -- the length of the buffer
/// buff -- the buffer that will hold the result
/// Returns the number of characters copied in to the buffer (minus the terminator)
int Format(const DateTimeFormatContext& dt_ctx, int len, char* buff) const;
/// Converts to Unix time (seconds since the Unix epoch) representation. The time
/// zone interpretation of the TimestampValue instance is determined by
/// FLAGS_use_local_tz_for_unix_timestamp_conversions. If the flag is true, the
/// instance is interpreted as a local value. If the flag is false, UTC is assumed.
/// Returns false if the conversion failed (unix_time will be undefined), otherwise
/// true.
bool ToUnixTime(time_t* unix_time) const {
DCHECK(unix_time != NULL);
if (UNLIKELY(!HasDateAndTime())) return false;
const boost::posix_time::ptime temp(date_, time_);
tm temp_tm = boost::posix_time::to_tm(temp);
if (FLAGS_use_local_tz_for_unix_timestamp_conversions) {
*unix_time = mktime(&temp_tm);
} else {
*unix_time = timegm(&temp_tm);
}
return true;
}
/// Converts to Unix time with fractional seconds.
/// Returns false if the conversion failed (unix_time will be undefined), otherwise
/// true.
bool ToSubsecondUnixTime(double* unix_time) const {
DCHECK(unix_time != NULL);
time_t temp;
if (UNLIKELY(!ToUnixTime(&temp))) return false;
*unix_time = static_cast<double>(temp);
DCHECK(HasTime());
*unix_time += time_.fractional_seconds() * ONE_BILLIONTH;
return true;
}
/// Converts from UTC to local time in-place. The caller must ensure the TimestampValue
/// this function is called upon has both a valid date and time.
void UtcToLocal();
void set_date(const boost::gregorian::date d) { date_ = d; }
void set_time(const boost::posix_time::time_duration t) { time_ = t; }
const boost::gregorian::date& date() const { return date_; }
const boost::posix_time::time_duration& time() const { return time_; }
TimestampValue& operator=(const boost::posix_time::ptime& ptime) {
date_ = ptime.date();
time_ = ptime.time_of_day();
return *this;
}
bool operator==(const TimestampValue& other) const {
return date_ == other.date_ && time_ == other.time_;
}
bool operator!=(const TimestampValue& other) const { return !(*this == other); }
bool operator<(const TimestampValue& other) const {
return date_ < other.date_ || (date_ == other.date_ && time_ < other.time_);
}
bool operator<=(const TimestampValue& other) const {
return *this < other || *this == other; }
bool operator>(const TimestampValue& other) const {
return date_ > other.date_ || (date_ == other.date_ && time_ > other.time_);
}
bool operator>=(const TimestampValue& other) const {
return *this > other || *this == other;
}
static size_t Size() {
return sizeof(boost::posix_time::time_duration) + sizeof(boost::gregorian::date);
}
inline uint32_t Hash(int seed = 0) const {
uint32_t hash = HashUtil::Hash(&time_, sizeof(time_), seed);
return HashUtil::Hash(&date_, sizeof(date_), hash);
}
static const char* LLVM_CLASS_NAME;
private:
friend class UnusedClass;
/// Used when converting a time with fractional seconds which are stored as in integer
/// to a Unix time stored as a double.
static const double ONE_BILLIONTH;
/// Boost ptime leaves a gap in the structure, so we swap the order to make it
/// 12 contiguous bytes. We then must convert to and from the boost ptime data type.
/// See IMP-87 for more information on why using ptime with the 4 byte gap is
/// problematic.
/// 8 bytes - stores the nanoseconds within the current day
boost::posix_time::time_duration time_;
/// 4 -bytes - stores the date as a day
boost::gregorian::date date_;
/// Return a ptime representation of the given Unix time (seconds since the Unix epoch).
/// The time zone of the resulting ptime is determined by
/// FLAGS_use_local_tz_for_unix_timestamp_conversions. If the flag is true, the value
/// will be in the local time zone. If the flag is false, the value will be in UTC.
boost::posix_time::ptime UnixTimeToPtime(time_t unix_time) const;
};
/// This function must be called 'hash_value' to be picked up by boost.
inline std::size_t hash_value(const TimestampValue& v) {
return v.Hash();
}
std::ostream& operator<<(std::ostream& os, const TimestampValue& timestamp_value);
}
#endif