blob: ef3b041b867aa0b3ce129fe4ca0aadd19c54aa73 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "runtime/date-parse-util.h"
#include <boost/date_time/gregorian/gregorian.hpp>
#include "cctz/civil_time.h"
#include "exprs/timestamp-functions.h"
#include "runtime/datetime-iso-sql-format-parser.h"
#include "runtime/datetime-simple-date-format-parser.h"
#include "runtime/string-value.inline.h"
#include "runtime/timestamp-value.h"
#include "util/string-parser.h"
#include "common/names.h"
using boost::gregorian::date;
using boost::posix_time::ptime;
using boost::posix_time::time_duration;
namespace impala {
using namespace datetime_parse_util;
bool DateParser::ParseSimpleDateFormat(const char* str, int len,
const DateTimeFormatContext& dt_ctx, DateValue* date) {
DCHECK(dt_ctx.toks.size() > 0);
// 'dt_ctx' must have date tokens. Time tokens are accepted but ignored.
DCHECK(dt_ctx.has_date_toks);
DCHECK(date != nullptr);
DateTimeParseResult dt_result;
if (UNLIKELY(str == nullptr || len <= 0
|| !SimpleDateFormatParser::ParseDateTime(str, len, dt_ctx, &dt_result))) {
*date = DateValue();
return false;
}
int year = dt_result.realign_year ?
RealignYear(dt_result, dt_ctx.century_break_ptime) :
dt_result.year;
*date = DateValue(year, dt_result.month, dt_result.day);
return date->IsValid();
}
int DateParser::RealignYear(const DateTimeParseResult& dt_result,
const ptime& century_break) {
DCHECK(dt_result.realign_year);
DCHECK(dt_result.year < 100);
DCHECK(!century_break.is_special());
const date& cb_date = century_break.date();
const time_duration& cb_time = century_break.time_of_day();
DCHECK(!cb_time.is_negative());
// Let the century start at AABB and the year parsed be YY, this gives us AAYY.
int year = dt_result.year + (cb_date.year() / 100) * 100;
cctz::civil_day aligned_day(year, dt_result.month, dt_result.day);
cctz::civil_day century_break_day(cb_date.year(), cb_date.month(), cb_date.day());
// Advance 100 years if parsed time is before the century break.
// For example if the century breaks at 1937 but year = 1936, the correct year would
// be 2036.
if (aligned_day < century_break_day
|| (aligned_day == century_break_day && (cb_time.ticks() > 0))) {
year += 100;
}
return year;
}
bool DateParser::ParseSimpleDateFormat(const char* str, int len, bool accept_time_toks,
DateValue* date) {
DCHECK(date != nullptr);
if (UNLIKELY(str == nullptr)) return IndicateDateParseFailure(date);
int trimmed_len = len;
// Remove leading white space.
while (trimmed_len > 0 && isspace(*str)) {
++str;
--trimmed_len;
}
// Strip the trailing blanks.
while (trimmed_len > 0 && isspace(str[trimmed_len - 1])) --trimmed_len;
if (UNLIKELY(trimmed_len <= 0)) return IndicateDateParseFailure(date);
const DateTimeFormatContext* dt_ctx =
SimpleDateFormatTokenizer::GetDefaultFormatContext(str, trimmed_len,
accept_time_toks, false);
if (dt_ctx != nullptr) return ParseSimpleDateFormat(str, trimmed_len, *dt_ctx, date);
// Generating context lazily as a fall back if default formats fail.
// ParseFormatTokenByStr() does not require a template format string.
DateTimeFormatContext lazy_ctx(str, trimmed_len);
if (!SimpleDateFormatTokenizer::TokenizeByStr(&lazy_ctx, accept_time_toks, false)) {
return IndicateDateParseFailure(date);
}
return ParseSimpleDateFormat(str, trimmed_len, lazy_ctx, date);
}
bool DateParser::ParseIsoSqlFormat(const char* str, int len,
const DateTimeFormatContext& dt_ctx, DateValue* date) {
DCHECK(dt_ctx.toks.size() > 0);
DCHECK(date != nullptr);
DCHECK(dt_ctx.has_date_toks);
if (UNLIKELY(str == nullptr || len <= 0)) return IndicateDateParseFailure(date);
DateTimeParseResult dt_result;
bool result = IsoSqlFormatParser::ParseDateTime(str, len, dt_ctx, &dt_result);
if (!result) return IndicateDateParseFailure(date);
*date = DateValue(dt_result.year, dt_result.month, dt_result.day);
return date->IsValid();
}
string DateParser::Format(const DateTimeFormatContext& dt_ctx, const DateValue& date) {
DCHECK(dt_ctx.toks.size() > 0);
DCHECK(dt_ctx.has_date_toks && !dt_ctx.has_time_toks);
int year, month, day;
if (!date.ToYearMonthDay(&year, &month, &day)) return "";
string result;
result.reserve(dt_ctx.fmt_out_len);
for (const DateTimeFormatToken& tok: dt_ctx.toks) {
int32_t num_val = -1;
switch (tok.type) {
case YEAR:
case ROUND_YEAR: {
num_val = year;
if (tok.len < 4) {
int adjust_factor = std::pow(10, tok.len);
num_val %= adjust_factor;
}
break;
}
case MONTH_IN_YEAR: num_val = month; break;
case MONTH_IN_YEAR_SLT: {
result.append(TimestampFunctions::MONTH_ARRAY[month - 1]);
break;
}
case DAY_IN_MONTH: num_val = day; break;
case DAY_IN_YEAR: {
num_val = GetDayInYear(year, month, day);
break;
}
case SEPARATOR: {
result.append(tok.val, tok.len);
break;
}
case TEXT: {
result.append(FormatTextToken(tok));
break;
}
default: DCHECK(false) << "Unknown date format token";
}
if (num_val > -1) {
string tmp_str = std::to_string(num_val);
if (!tok.fm_modifier && tmp_str.length() < tok.len) {
tmp_str.insert(0, tok.len - tmp_str.length(), '0');
}
result.append(tmp_str);
}
}
return result;
}
bool DateParser::IndicateDateParseFailure(DateValue* date) {
*date = DateValue();
return false;
}
}