| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "exprs/timestamp-functions.h" |
| |
| #include <boost/algorithm/string.hpp> |
| #include <boost/date_time/posix_time/posix_time.hpp> |
| #include <ctime> |
| |
| #include "exprs/anyval-util.h" |
| #include "exprs/timezone_db.h" |
| #include "gutil/strings/substitute.h" |
| #include "runtime/string-value.inline.h" |
| #include "runtime/timestamp-parse-util.h" |
| #include "runtime/timestamp-value.h" |
| #include "udf/udf-internal.h" |
| #include "udf/udf.h" |
| |
| #include "common/names.h" |
| |
| using boost::algorithm::iequals; |
| using boost::local_time::local_date_time; |
| using boost::local_time::time_zone_ptr; |
| using boost::posix_time::ptime; |
| using boost::posix_time::to_iso_extended_string; |
| |
| namespace impala { |
| |
| // This function is not cross-compiled to avoid including unnecessary boost library's |
| // header files which bring in a bunch of unused code and global variables and increase |
| // the codegen time. boost::posix_time::to_iso_extended_string() is large enough that |
| // it won't benefit much from inlining. |
| string TimestampFunctions::ToIsoExtendedString(const TimestampValue& ts_value) { |
| return to_iso_extended_string(ts_value.date()); |
| } |
| |
| namespace { |
| /// Uses Boost's internal checking to throw an exception if 'date' is out of the |
| /// supported range of boost::gregorian. |
| void ThrowIfDateOutOfRange(const boost::gregorian::date& date) { |
| // Boost checks the ranges when instantiating the year/month/day representations. |
| boost::gregorian::greg_year year = date.year(); |
| boost::gregorian::greg_month month = date.month(); |
| boost::gregorian::greg_day day = date.day(); |
| // Ensure Boost's validation is effective. |
| DCHECK_GE(year, boost::gregorian::greg_year::min()); |
| DCHECK_LE(year, boost::gregorian::greg_year::max()); |
| DCHECK_GE(month, boost::gregorian::greg_month::min()); |
| DCHECK_LE(month, boost::gregorian::greg_month::max()); |
| DCHECK_GE(day, boost::gregorian::greg_day::min()); |
| DCHECK_LE(day, boost::gregorian::greg_day::max()); |
| } |
| } |
| |
| // This function uses inline asm functions, which we believe to be from the boost library. |
| // Inline asm is not currently supported by JIT, so this function should always be run in |
| // the interpreted mode. This is handled in ScalarFnCall::GetUdf(). |
| TimestampVal TimestampFunctions::FromUtc(FunctionContext* context, |
| const TimestampVal& ts_val, const StringVal& tz_string_val) { |
| if (ts_val.is_null || tz_string_val.is_null) return TimestampVal::null(); |
| const TimestampValue& ts_value = TimestampValue::FromTimestampVal(ts_val); |
| if (!ts_value.HasDateOrTime()) return TimestampVal::null(); |
| |
| const StringValue& tz_string_value = StringValue::FromStringVal(tz_string_val); |
| time_zone_ptr timezone = TimezoneDatabase::FindTimezone( |
| string(tz_string_value.ptr, tz_string_value.len), ts_value); |
| if (timezone == NULL) { |
| // This should return null. Hive just ignores it. |
| stringstream ss; |
| ss << "Unknown timezone '" << tz_string_value << "'" << endl; |
| context->AddWarning(ss.str().c_str()); |
| return ts_val; |
| } |
| |
| try { |
| ptime temp; |
| ts_value.ToPtime(&temp); |
| local_date_time lt(temp, timezone); |
| ptime local_time = lt.local_time(); |
| ThrowIfDateOutOfRange(local_time.date()); |
| TimestampVal return_val; |
| TimestampValue(local_time).ToTimestampVal(&return_val); |
| return return_val; |
| } catch (boost::exception&) { |
| const string& msg = Substitute( |
| "Timestamp '$0' did not convert to a valid local time in timezone '$1'", |
| ts_value.DebugString(), tz_string_value.DebugString()); |
| context->AddWarning(msg.c_str()); |
| return TimestampVal::null(); |
| } |
| } |
| |
| // This function uses inline asm functions, which we believe to be from the boost library. |
| // Inline asm is not currently supported by JIT, so this function should always be run in |
| // the interpreted mode. This is handled in ScalarFnCall::GetUdf(). |
| TimestampVal TimestampFunctions::ToUtc(FunctionContext* context, |
| const TimestampVal& ts_val, const StringVal& tz_string_val) { |
| if (ts_val.is_null || tz_string_val.is_null) return TimestampVal::null(); |
| const TimestampValue& ts_value = TimestampValue::FromTimestampVal(ts_val); |
| if (!ts_value.HasDateOrTime()) return TimestampVal::null(); |
| |
| const StringValue& tz_string_value = StringValue::FromStringVal(tz_string_val); |
| time_zone_ptr timezone = TimezoneDatabase::FindTimezone( |
| string(tz_string_value.ptr, tz_string_value.len), ts_value); |
| // This should raise some sort of error or at least null. Hive Just ignores it. |
| if (timezone == NULL) { |
| stringstream ss; |
| ss << "Unknown timezone '" << tz_string_value << "'" << endl; |
| context->AddWarning(ss.str().c_str()); |
| return ts_val; |
| } |
| |
| try { |
| local_date_time lt(ts_value.date(), ts_value.time(), timezone, |
| local_date_time::NOT_DATE_TIME_ON_ERROR); |
| ptime utc_time = lt.utc_time(); |
| // The utc_time() conversion does not check ranges - need to explicitly check. |
| ThrowIfDateOutOfRange(utc_time.date()); |
| TimestampVal return_val; |
| TimestampValue(utc_time).ToTimestampVal(&return_val); |
| return return_val; |
| } catch (boost::exception&) { |
| const string& msg = |
| Substitute("Timestamp '$0' in timezone '$1' could not be converted to UTC", |
| ts_value.DebugString(), tz_string_value.DebugString()); |
| context->AddWarning(msg.c_str()); |
| return TimestampVal::null(); |
| } |
| } |
| |
| void TimestampFunctions::UnixAndFromUnixPrepare( |
| FunctionContext* context, FunctionContext::FunctionStateScope scope) { |
| if (scope != FunctionContext::THREAD_LOCAL) return; |
| DateTimeFormatContext* dt_ctx = NULL; |
| if (context->IsArgConstant(1)) { |
| StringVal fmt_val = *reinterpret_cast<StringVal*>(context->GetConstantArg(1)); |
| const StringValue& fmt_ref = StringValue::FromStringVal(fmt_val); |
| if (fmt_val.is_null || fmt_ref.len == 0) { |
| TimestampFunctions::ReportBadFormat(context, fmt_val, true); |
| return; |
| } |
| dt_ctx = new DateTimeFormatContext(fmt_ref.ptr, fmt_ref.len); |
| bool parse_result = TimestampParser::ParseFormatTokens(dt_ctx); |
| if (!parse_result) { |
| delete dt_ctx; |
| TimestampFunctions::ReportBadFormat(context, fmt_val, true); |
| return; |
| } |
| } else { |
| // If our format string is constant, then we benefit from it only being parsed once in |
| // the code above. If it's not constant, then we can reuse a context by resetting it. |
| // This is much cheaper vs alloc/dealloc'ing a context for each evaluation. |
| dt_ctx = new DateTimeFormatContext(); |
| } |
| context->SetFunctionState(scope, dt_ctx); |
| } |
| |
| void TimestampFunctions::UnixAndFromUnixClose(FunctionContext* context, |
| FunctionContext::FunctionStateScope scope) { |
| if (scope == FunctionContext::THREAD_LOCAL) { |
| DateTimeFormatContext* dt_ctx = |
| reinterpret_cast<DateTimeFormatContext*>(context->GetFunctionState(scope)); |
| delete dt_ctx; |
| } |
| } |
| |
| time_zone_ptr TimezoneDatabase::FindTimezone(const string& tz, const TimestampValue& tv) { |
| // The backing database does not capture some subtleties, there are special cases |
| if ((tv.date().year() < 2011 || (tv.date().year() == 2011 && tv.date().month() < 4)) && |
| (iequals("Europe/Moscow", tz) || iequals("Moscow", tz) || iequals("MSK", tz))) { |
| // We transition in pre April 2011 from using the tz_database_ to a custom rule |
| // Russia stopped using daylight savings in 2011, the tz_database_ is |
| // set up assuming Russia uses daylight saving every year. |
| // Sun, Mar 27, 2:00AM Moscow clocks moved forward +1 hour (a total of GMT +4) |
| // Specifically, |
| // UTC Time 26 Mar 2011 22:59:59 +0000 ===> Sun Mar 27 01:59:59 MSK 2011 |
| // UTC Time 26 Mar 2011 23:00:00 +0000 ===> Sun Mar 27 03:00:00 MSK 2011 |
| // This means in 2011, The database rule will apply DST starting March 26 2011. |
| // This will be a correct +4 offset, and the database rule can apply until |
| // Oct 31 when tz_database_ will incorrectly attempt to turn clocks backwards 1 hour. |
| return TIMEZONE_MSK_PRE_2011_DST; |
| } |
| |
| // See if they specified a zone id |
| time_zone_ptr tzp = tz_database_.time_zone_from_region(tz); |
| if (tzp != NULL) return tzp; |
| |
| for (vector<string>::const_iterator iter = tz_region_list_.begin(); |
| iter != tz_region_list_.end(); ++iter) { |
| time_zone_ptr tzp = tz_database_.time_zone_from_region(*iter); |
| DCHECK(tzp != NULL); |
| if (tzp->dst_zone_abbrev() == tz) return tzp; |
| if (tzp->std_zone_abbrev() == tz) return tzp; |
| if (tzp->dst_zone_name() == tz) return tzp; |
| if (tzp->std_zone_name() == tz) return tzp; |
| } |
| return time_zone_ptr(); |
| } |
| |
| } |