| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "datetime-parser-common.h" |
| |
| #include <boost/date_time/gregorian/gregorian.hpp> |
| |
| #include "runtime/string-value.h" |
| #include "util/string-parser.h" |
| |
| using std::string; |
| using std::unordered_set; |
| |
| namespace impala { |
| |
| namespace datetime_parse_util { |
| |
| void DateTimeFormatContext::SetCenturyBreakAndCurrentTime(const TimestampValue& now) { |
| current_time = &now; |
| const boost::gregorian::date& now_date = now.date(); |
| // If the century break is at an invalid 02/29, set it to 02/28 for consistency with |
| // Hive. |
| if (now_date.month() == 2 && now_date.day() == 29 && |
| !boost::gregorian::gregorian_calendar::is_leap_year(now_date.year() - 80)) { |
| century_break_ptime = boost::posix_time::ptime( |
| boost::gregorian::date(now_date.year() - 80, 2, 28), now.time()); |
| } else { |
| century_break_ptime = boost::posix_time::ptime( |
| boost::gregorian::date(now_date.year() - 80, now_date.month(), now_date.day()), |
| now.time()); |
| } |
| DCHECK(!century_break_ptime.is_special()); |
| } |
| |
| void DateTimeFormatContext::Reset(const char* fmt, int fmt_len) { |
| this->fmt = fmt; |
| this->fmt_len = fmt_len; |
| this->fmt_out_len = fmt_len; |
| this->has_date_toks = false; |
| this->has_time_toks = false; |
| this->fx_modifier = false; |
| this->toks.clear(); |
| this->century_break_ptime = boost::posix_time::not_a_date_time; |
| this->current_time = nullptr; |
| } |
| |
| void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_type, |
| const StringVal& format, bool is_error) { |
| DCHECK(context != nullptr); |
| std::stringstream ss; |
| if (format.is_null || format.len == 0) { |
| ss << "Bad date/time conversion format: format string is NULL or has 0 length"; |
| } else { |
| switch (error_type) { |
| case DUPLICATE_FORMAT: |
| ss << "PARSE ERROR: Invalid duplication of format element"; |
| break; |
| case YEAR_WITH_ROUNDED_YEAR_ERROR: |
| ss << "PARSE ERROR: Both year and round year are provided"; |
| break; |
| case CONFLICTING_YEAR_TOKENS_ERROR: |
| ss << "PARSE ERROR: Multiple year token provided"; |
| break; |
| case DAY_OF_YEAR_TOKEN_CONFLICT: |
| ss << "PARSE ERROR: Day of year provided with day or month token"; |
| break; |
| case CONFLICTING_HOUR_TOKENS_ERROR: |
| ss << "PARSE ERROR: Multiple hour tokens provided"; |
| break; |
| case CONFLICTING_MERIDIEM_TOKENS_ERROR: |
| ss << "PARSE ERROR: Multiple median indicator tokens provided"; |
| break; |
| case MERIDIEM_CONFLICTS_WITH_HOUR_ERROR: |
| ss << "PARSE ERROR: Conflict between median indicator and hour token"; |
| break; |
| case MISSING_HOUR_TOKEN_ERROR: |
| ss << "PARSE ERROR: Missing hour token"; |
| break; |
| case SECOND_IN_DAY_CONFLICT: |
| ss << "PARSE ERROR: Second of day token conflicts with other token(s)"; |
| break; |
| case TOO_LONG_FORMAT_ERROR: |
| ss << "PARSE ERROR: The input format is too long"; |
| break; |
| case TIMEZONE_OFFSET_NOT_ALLOWED_ERROR: |
| ss << "PARSE ERROR: Timezone offset not allowed in a datetime to string " |
| "conversion"; |
| break; |
| case MISSING_TZH_TOKEN_ERROR: |
| ss << "PARSE ERROR: TZH token is required for TZM"; |
| break; |
| case DATE_WITH_TIME_ERROR: |
| ss << "PARSE ERROR: Time tokens provided with date type."; |
| break; |
| case CONFLICTING_FRACTIONAL_SECOND_TOKENS_ERROR: |
| ss << "PARSE ERROR: Multiple fractional second token provided."; |
| break; |
| case TEXT_TOKEN_NOT_CLOSED: |
| ss << "PARSE ERROR: Missing closing quotation mark."; |
| break; |
| case NO_DATETIME_TOKENS_ERROR: |
| ss << "PARSE ERROR: No datetime tokens provided."; |
| break; |
| case MISPLACED_FX_MODIFIER_ERROR: |
| ss << "PARSE ERROR: FX modifier should be at the beginning of the format string."; |
| break; |
| default: |
| const StringValue& fmt = StringValue::FromStringVal(format); |
| ss << "Bad date/time conversion format: " << fmt.DebugString(); |
| } |
| } |
| if (is_error) { |
| context->SetError(ss.str().c_str()); |
| } else { |
| context->AddWarning(ss.str().c_str()); |
| } |
| } |
| |
| bool ParseAndValidate(const char* token, int token_len, int min, int max, |
| int* result) { |
| DCHECK(token != nullptr); |
| DCHECK(token_len > 0); |
| DCHECK(result != nullptr); |
| StringParser::ParseResult status; |
| *result = StringParser::StringToInt<int>(token, token_len, &status); |
| if (UNLIKELY(StringParser::PARSE_SUCCESS != status)) return false; |
| if (UNLIKELY(*result < min || *result > max)) return false; |
| return true; |
| } |
| |
| bool ParseFractionToken(const char* token, int token_len, |
| DateTimeParseResult* result) { |
| DCHECK(token != nullptr); |
| DCHECK(token_len > 0); |
| DCHECK(result != nullptr); |
| StringParser::ParseResult status; |
| result->fraction = |
| StringParser::StringToInt<int32_t>(token, token_len, &status); |
| if (UNLIKELY(StringParser::PARSE_SUCCESS != status)) return false; |
| // A user may specify a time of 04:30:22.1238, the parser will return 1238 for |
| // the fractional portion. This does not represent the intended value of |
| // 123800000, therefore the number must be scaled up. |
| if (token_len < FRACTIONAL_SECOND_MAX_LENGTH) { |
| result->fraction *= std::pow(10, FRACTIONAL_SECOND_MAX_LENGTH - token_len); |
| } |
| return true; |
| } |
| |
| int GetDayInYear(int year, int month, int day_in_month) { |
| DCHECK(month >= 1 && month <= 12); |
| const vector<int>& month_ranges = IsLeapYear(year) ? LEAP_YEAR_MONTH_RANGES : |
| MONTH_RANGES; |
| return day_in_month + month_ranges[month - 1]; |
| } |
| |
| bool GetMonthAndDayFromDaysSinceJan1(int year, int days_since_jan1, int* month, |
| int* day) { |
| DCHECK(days_since_jan1 >= 0 && days_since_jan1 < 366); |
| DCHECK(month != nullptr); |
| DCHECK(day != nullptr); |
| // Calculate month using month ranges and the average month length. |
| const vector<int>& month_ranges = IsLeapYear(year) ? LEAP_YEAR_MONTH_RANGES : |
| MONTH_RANGES; |
| int m = static_cast<int>(days_since_jan1 / 30.5); |
| DCHECK(month_ranges[m] <= days_since_jan1); |
| |
| *month = (month_ranges[m + 1] <= days_since_jan1) ? m + 2 : m + 1; |
| if (*month < 1 || *month > 12) return false; |
| |
| // Calculate day. |
| *day = days_since_jan1 - month_ranges[*month - 1] + 1; |
| return (*day >= 1 && *day <= 31); |
| } |
| |
| string FormatTextToken(const DateTimeFormatToken& tok) { |
| DCHECK(tok.type == TEXT); |
| string result; |
| result.reserve(tok.len); |
| for (const char* text_it = tok.val; text_it < tok.val + tok.len; ++text_it) { |
| if (*text_it != '\\') { |
| result.append(text_it, 1); |
| continue; |
| } |
| if (tok.is_double_escaped && strncmp(text_it, "\\\\\\\"", 4) == 0) { |
| result.append("\""); |
| text_it += 3; |
| } else if (!tok.is_double_escaped && strncmp(text_it, "\\\"", 2) == 0) { |
| result.append("\""); |
| ++text_it; |
| } else if (strncmp(text_it, "\\\\", 2) == 0) { |
| result.append("\\"); |
| ++text_it; |
| } else if (strncmp(text_it, "\\b", 2) == 0) { |
| result.append("\b"); |
| ++text_it; |
| } else if (strncmp(text_it, "\\n", 2) == 0) { |
| result.append("\n"); |
| ++text_it; |
| } else if (strncmp(text_it, "\\r", 2) == 0) { |
| result.append("\r"); |
| ++text_it; |
| } else if (strncmp(text_it, "\\t", 2) == 0) { |
| result.append("\t"); |
| ++text_it; |
| } |
| } |
| return result; |
| } |
| |
| |
| } |
| } |