blob: 20d6cee9fbb8022f0cdbfeb7551a0d6c7bff7e98 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/timestamp-value.h"
#include <boost/date_time/posix_time/posix_time.hpp>
#include "runtime/timestamp-parse-util.h"
#include "common/names.h"
using boost::date_time::not_a_date_time;
using boost::gregorian::date;
using boost::gregorian::date_duration;
using boost::posix_time::from_time_t;
using boost::posix_time::nanoseconds;
using boost::posix_time::ptime;
using boost::posix_time::ptime_from_tm;
using boost::posix_time::time_duration;
using boost::posix_time::to_tm;
DEFINE_bool(use_local_tz_for_unix_timestamp_conversions, false,
"When true, TIMESTAMPs are interpreted in the local time zone when converting to "
"and from Unix times. When false, TIMESTAMPs are interpreted in the UTC time zone. "
"Set to true for Hive compatibility.");
// Constants for use with Unix times. Leap-seconds do not apply.
const int32_t SECONDS_IN_MINUTE = 60;
const int32_t SECONDS_IN_HOUR = 60 * SECONDS_IN_MINUTE;
const int32_t SECONDS_IN_DAY = 24 * SECONDS_IN_HOUR;
// struct tm stores month/year data as an offset
const unsigned short TM_YEAR_OFFSET = 1900;
const unsigned short TM_MONTH_OFFSET = 1;
// Boost stores dates as an uint32_t. Since subtraction is needed, convert to signed.
const int64_t EPOCH_DAY_NUMBER =
static_cast<int64_t>(date(1970, boost::gregorian::Jan, 1).day_number());
namespace impala {
const char* TimestampValue::LLVM_CLASS_NAME = "class.impala::TimestampValue";
const double TimestampValue::ONE_BILLIONTH = 0.000000001;
TimestampValue TimestampValue::Parse(const char* str, int len) {
TimestampValue tv;
TimestampParser::Parse(str, len, &tv.date_, &tv.time_);
return tv;
}
TimestampValue TimestampValue::Parse(const string& str) {
return Parse(str.c_str(), str.size());
}
TimestampValue TimestampValue::Parse(const char* str, int len,
const DateTimeFormatContext& dt_ctx) {
TimestampValue tv;
TimestampParser::Parse(str, len, dt_ctx, &tv.date_, &tv.time_);
return tv;
}
int TimestampValue::Format(const DateTimeFormatContext& dt_ctx, int len, char* buff)
const {
return TimestampParser::Format(dt_ctx, date_, time_, len, buff);
}
void TimestampValue::UtcToLocal() {
DCHECK(HasDateAndTime());
// Previously, conversion was done using boost functions but it was found to be
// too slow. Doing the conversion without function calls (which also avoids some
// unnecessary validations) the conversion only take half as long. Original:
// http://www.boost.org/doc/libs/1_55_0/boost/date_time/c_local_time_adjustor.hpp
try {
time_t utc =
(static_cast<int64_t>(date_.day_number()) - EPOCH_DAY_NUMBER) * SECONDS_IN_DAY +
time_.hours() * SECONDS_IN_HOUR +
time_.minutes() * SECONDS_IN_MINUTE +
time_.seconds();
tm temp;
if (UNLIKELY(localtime_r(&utc, &temp) == nullptr)) {
*this = ptime(not_a_date_time);
return;
}
// Unlikely but a time zone conversion may push the value over the min/max
// boundary resulting in an exception.
date_ = boost::gregorian::date(
static_cast<unsigned short>(temp.tm_year + TM_YEAR_OFFSET),
static_cast<unsigned short>(temp.tm_mon + TM_MONTH_OFFSET),
static_cast<unsigned short>(temp.tm_mday));
time_ = time_duration(temp.tm_hour, temp.tm_min, temp.tm_sec,
time().fractional_seconds());
} catch (std::exception& /* from Boost */) {
*this = ptime(not_a_date_time);
}
}
ostream& operator<<(ostream& os, const TimestampValue& timestamp_value) {
return os << timestamp_value.ToString();
}
void TimestampValue::Validate() {
if (HasDate() && UNLIKELY(!IsValidDate(date_))) {
time_ = boost::posix_time::time_duration(boost::posix_time::not_a_date_time);
date_ = boost::gregorian::date(boost::gregorian::not_a_date_time);
}
}
/// Return a ptime representation of the given Unix time (seconds since the Unix epoch).
/// The time zone of the resulting ptime is local time. This is called by
/// UnixTimeToPtime.
ptime TimestampValue::UnixTimeToLocalPtime(time_t unix_time) {
tm temp_tm;
// TODO: avoid localtime*, which takes a global timezone db lock
if (UNLIKELY(localtime_r(&unix_time, &temp_tm) == nullptr)) {
return ptime(not_a_date_time);
}
try {
return ptime_from_tm(temp_tm);
} catch (std::exception&) {
return ptime(not_a_date_time);
}
}
/// Return a ptime representation of the given Unix time (seconds since the Unix epoch).
/// The time zone of the resulting ptime is UTC.
/// In order to avoid a serious performance degredation using libc (IMPALA-5357), this
/// function uses boost to convert the time_t to a ptime. Unfortunately, because the boost
/// conversion relies on time_duration to represent the time_t and internally
/// time_duration stores nanosecond precision ticks, the 'fast path' conversion using
/// boost can only handle a limited range of dates (appx years 1677-2622, while Impala
/// supports years 1600-9999). For dates outside this range, the conversion will instead
/// use the libc function gmtime_r which supports those dates but takes the global lock
/// for the timezone db (even though technically it is not needed for the conversion,
/// again see IMPALA-5357). This is called by UnixTimeToPtime.
ptime TimestampValue::UnixTimeToUtcPtime(time_t unix_time) {
// Minimum Unix time that can be converted with from_time_t: 1677-Sep-21 00:12:44
const int64_t MIN_BOOST_CONVERT_UNIX_TIME = -9223372036;
// Maximum Unix time that can be converted with from_time_t: 2262-Apr-11 23:47:16
const int64_t MAX_BOOST_CONVERT_UNIX_TIME = 9223372036;
if (LIKELY(unix_time >= MIN_BOOST_CONVERT_UNIX_TIME &&
unix_time <= MAX_BOOST_CONVERT_UNIX_TIME)) {
try {
return from_time_t(unix_time);
} catch (std::exception&) {
return ptime(not_a_date_time);
}
}
tm temp_tm;
if (UNLIKELY(gmtime_r(&unix_time, &temp_tm) == nullptr)) {
return ptime(not_a_date_time);
}
try {
return ptime_from_tm(temp_tm);
} catch (std::exception&) {
return ptime(not_a_date_time);
}
}
ptime TimestampValue::UnixTimeToPtime(time_t unix_time) {
if (FLAGS_use_local_tz_for_unix_timestamp_conversions) {
return UnixTimeToLocalPtime(unix_time);
} else {
return UnixTimeToUtcPtime(unix_time);
}
}
string TimestampValue::ToString() const {
stringstream ss;
if (HasDate()) {
ss << boost::gregorian::to_iso_extended_string(date_);
}
if (HasTime()) {
if (HasDate()) ss << " ";
ss << boost::posix_time::to_simple_string(time_);
}
return ss.str();
}
}