be/src/runtime/datetime-parser-common.h - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #pragma once

 #include <boost/date_time/posix_time/ptime.hpp>
 #include "gutil/macros.h"
 #include <unordered_map>
 #include <unordered_set>
 #include <vector>

 #include "exprs/timestamp-functions.h"
 #include "runtime/timestamp-value.h"
 #include "udf/udf.h"

 namespace impala {

 using impala_udf::FunctionContext;
 using impala_udf::StringVal;

 /// Impala provides multiple algorithms to parse datetime formats:
 ///   - SimpleDateFormat: This is the one that is traditionally used with functions such
 ///     as to_timestamp() and from_timestamp().
 ///   - ISO SQL:2016 compliant datetime pattern matching. CAST(..FORMAT..) comes with
 ///     support for this pattern only.
 /// This is a collection of the logic that is shared between the 2 types of pattern
 /// matching including result codes, error reporting, format token types etc.
 namespace datetime_parse_util {
 const int FRACTIONAL_SECOND_MAX_LENGTH = 9;

 /// Describes ranges for months in a non-leap year expressed as number of days since
 /// January 1.
 const std::vector<int> MONTH_RANGES = {
     0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 };

 /// Describes ranges for months in a leap year expressed as number of days since
 /// January 1.
 const std::vector<int> LEAP_YEAR_MONTH_RANGES = {
     0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 };

 /// Maps the 3-letter prefix of a month name to the suffix of the month name and the
 /// ordinal number of month. The key of this map can be used to uniquely identify the
 /// month while the suffix part of the value can be used for checking if the full month
 /// name was given correctly in the input of a string to datetime conversion. The number
 /// part of the value can be used as a result of the string to datetime conversion.
 const std::unordered_map<std::string, std::pair<std::string, int>>
     MONTH_PREFIX_TO_SUFFIX = {
         {"jan", {"uary", 1}},
         {"feb", {"ruary", 2}},
         {"mar", {"ch", 3}},
         {"apr", {"il", 4}},
         {"may", {"", 5}},
         {"jun", {"e", 6}},
         {"jul", {"y", 7}},
         {"aug", {"ust", 8}},
         {"sep", {"tember", 9}},
         {"oct", {"ober", 10}},
         {"nov", {"ember", 11}},
         {"dec", {"ember", 12}}
 };

 /// Similar to 'MONTH_PREFIX_TO_SUFFIX' but maps the 3-letter prefix of a day name to the
 /// suffix of the day name and the ordinal number of the day (1 means Monday and 7 means
 /// Sunday).
 const std::unordered_map<std::string, std::pair<std::string, int>>
     DAY_PREFIX_TO_SUFFIX = {
         {"mon", {"day", 1}},
         {"tue", {"sday", 2}},
         {"wed", {"nesday", 3}},
         {"thu", {"rsday", 4}},
         {"fri", {"day", 5}},
         {"sat", {"urday", 6}},
         {"sun", {"day", 7}}
 };

 /// Length of short month names like 'JAN', 'FEB', etc.
 const int SHORT_MONTH_NAME_LENGTH = 3;

 /// Length of the longest month name 'SEPTEMBER'.
 const int MAX_MONTH_NAME_LENGTH = 9;

 /// Length of short day names like 'MON', 'TUE', etc.
 const int SHORT_DAY_NAME_LENGTH = 3;

 /// Length of the longest day name 'WEDNESDAY'.
 const int MAX_DAY_NAME_LENGTH = 9;

 /// Contains all the possible result codes that can come from parsing a datetime format
 /// pattern.
 enum FormatTokenizationResult {
   SUCCESS,
   GENERAL_ERROR,
   DUPLICATE_FORMAT,
   YEAR_WITH_ROUNDED_YEAR_ERROR,
   CONFLICTING_YEAR_TOKENS_ERROR,
   CONFLICTING_MONTH_TOKENS_ERROR,
   DAY_OF_YEAR_TOKEN_CONFLICT,
   CONFLICTING_HOUR_TOKENS_ERROR,
   CONFLICTING_MERIDIEM_TOKENS_ERROR,
   MERIDIEM_CONFLICTS_WITH_HOUR_ERROR,
   MISSING_HOUR_TOKEN_ERROR,
   SECOND_IN_DAY_CONFLICT,
   TOO_LONG_FORMAT_ERROR,
   TIMEZONE_OFFSET_NOT_ALLOWED_ERROR,
   MISSING_TZH_TOKEN_ERROR,
   DATE_WITH_TIME_ERROR,
   CONFLICTING_FRACTIONAL_SECOND_TOKENS_ERROR,
   TEXT_TOKEN_NOT_CLOSED,
   NO_DATETIME_TOKENS_ERROR,
   MISPLACED_FX_MODIFIER_ERROR,
   QUARTER_NOT_ALLOWED_FOR_PARSING,
   DAY_OF_WEEK_NOT_ALLOWED_FOR_PARSING,
   DAY_NAME_NOT_ALLOWED_FOR_PARSING,
   WEEK_NUMBER_NOT_ALLOWED_FOR_PARSING,
   CONFLICTING_DAY_OF_WEEK_TOKENS_ERROR,
   MISSING_ISO8601_WEEK_BASED_TOKEN_ERROR,
   CONFLICTING_DATE_TOKENS_ERROR
 };

 /// Holds all the token types that serve as building blocks for datetime format patterns.
 enum DateTimeFormatTokenType {
   UNKNOWN = 0,
   SEPARATOR,
   YEAR,
   ROUND_YEAR,
   MONTH_IN_YEAR,
   DAY_IN_MONTH,
   DAY_IN_YEAR,
   HOUR_IN_DAY,
   HOUR_IN_HALF_DAY,
   MINUTE_IN_HOUR,
   SECOND_IN_DAY,
   SECOND_IN_MINUTE,
   FRACTION,
   TZ_OFFSET,
   TIMEZONE_HOUR,
   TIMEZONE_MIN,
   MERIDIEM_INDICATOR,
   ISO8601_TIME_INDICATOR,
   ISO8601_ZULU_INDICATOR,
   TEXT,
   FM_MODIFIER,
   FX_MODIFIER,
   MONTH_NAME,
   MONTH_NAME_SHORT,
   DAY_NAME,
   DAY_NAME_SHORT,
   DAY_OF_WEEK,
   QUARTER_OF_YEAR,
   WEEK_OF_YEAR,
   WEEK_OF_MONTH,
   ISO8601_WEEK_NUMBERING_YEAR,
   ISO8601_WEEK_OF_YEAR,
   ISO8601_DAY_OF_WEEK
 };

 /// Indicates whether the cast is a 'datetime to string' or a 'string to datetime' cast.
 /// PARSE is a string type to datetime type cast.
 /// FORMAT is a datetime type to string type cast.
 enum CastDirection {
   PARSE,
   FORMAT
 };

 typedef std::pair<const char*, const char*> MERIDIEM_INDICATOR_TEXT;
 const MERIDIEM_INDICATOR_TEXT AM = {"AM", "am"};
 const MERIDIEM_INDICATOR_TEXT AM_LONG = {"A.M.", "a.m."};
 const MERIDIEM_INDICATOR_TEXT PM = {"PM", "pm"};
 const MERIDIEM_INDICATOR_TEXT PM_LONG = {"P.M.", "p.m."};

 /// Stores metadata about a token within a datetime format.
 struct DateTimeFormatToken {
   /// Indicates the type of datetime format token.
   DateTimeFormatTokenType type;
   /// The position of where this token is supposed to start in the datetime string
   /// to be parsed.
   int pos;
   /// The length of the token.
   int len;
   /// A pointer to the beginning of this token in the format string.
   const char* val;
   /// True if FM modifier is active for this token. This overrides the FX modifier active
   /// for the whole format.
   bool fm_modifier;

   /// True if this is a text token that is surrounded by escaped double quotes making the
   /// content of the token double-escaped.
   bool is_double_escaped;

   DateTimeFormatToken(DateTimeFormatTokenType type, int pos, int len, const char* val)
     : type(type), pos(pos), len(len), val(val), fm_modifier(false),
       is_double_escaped(false) {
   }
 };

 /// Holds metadata about the datetime format. In the format parsing process the members of
 /// this struct are populated gradually as the process advances. After the parsing process
 /// this holds the found format tokens alongside with auxiliary information such as
 /// whether the input format contains date or time tokens or both.
 struct DateTimeFormatContext {
   /// Pointer to the beginning of the format string.
   const char* fmt;
   /// Length of the format string.
   int fmt_len;
   /// Expected length of the output of a 'datetime to string' cast. This usually equals to
   /// the length of the input format string. However, there are some edge cases where this
   /// is not true:
   ///   - SimpleDateFormat parsing on '2019-11-10' as input and 'yyyy-d-m' as format
   ///     produces output that is longer than the format string.
   ///   - ISO SQL parsing has token types where the output length is different from the
   ///     token length like: 'MONTH', 'DAY', 'HH12', 'HH24', FF1, FF2, FF4, etc.
   int fmt_out_len;
   /// Vector of tokens found in the format string.
   std::vector<DateTimeFormatToken> toks;
   bool has_date_toks;
   bool has_time_toks;

   /// True if the format contains an FX modifier effective for all the tokens.
   bool fx_modifier;

   /// Used for casting with SimpleDateFormat to handle rounded year. Make sure you call
   /// SetCenturyBreakAndCurrentTime() before using this member.
   boost::posix_time::ptime century_break_ptime;
   /// Used for round year and less than 4-digit year calculation in ISO:SQL:2016 parsing.
   /// Make sure you call SetCenturyBreakAndCurrentTime() before using this member. Not
   /// owned by this object.
   const TimestampValue* current_time;

   DateTimeFormatContext() {
     Reset(nullptr);
   }

   DateTimeFormatContext(const char* fmt) {
     Reset(fmt);
   }

   DateTimeFormatContext(const char* fmt, int fmt_len) {
     Reset(fmt, fmt_len);
   }

   /// Set the century break for parsing 1 or 2-digit year format. When parsing 1 or
   /// 2-digit year, the year should be in the interval [now - 80 years, now + 20 years),
   /// according to Hive. Also sets the current time that is used for round year
   /// calculation in ISO:SQL:2016 parsing.
   void SetCenturyBreakAndCurrentTime(const TimestampValue& now);

   /// Initializes all the members of this object.
   void Reset(const char* fmt, int fmt_len);

   void Reset(const char* fmt) {
     Reset(fmt, (fmt == nullptr) ? 0 : strlen(fmt));
   }
 };

 /// Stores the results of parsing a date/time string.
 struct DateTimeParseResult {
   int year = -1;
   int month = 0;
   int day = 0;
   int hour = 0;
   int minute = 0;
   int second = 0;
   int32_t fraction = 0;
   boost::posix_time::time_duration tz_offset =
       boost::posix_time::time_duration(0, 0, 0, 0);
   bool realign_year = false;
 };

 /// This function is used to indicate an error or warning when the input format
 /// tokenization fails for some reason. Constructs an error message based on 'error_type'
 /// and pushes it to 'context'. Depending on 'is_error' the message can be an error or
 /// warning.
 void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_type,
     const StringVal& format, bool is_error);

 bool ParseAndValidate(const char* token, int token_len, int min, int max,
     int* result) WARN_UNUSED_RESULT;

 // Given the month calculates the quarter of year.
 int GetQuarter(int month);

 bool ParseFractionToken(const char* token, int token_len,
     DateTimeParseResult* result) WARN_UNUSED_RESULT;

 /// Gets a month name token (either full or short name) and converts it to the ordinal
 /// number of month between 1 and 12. Make sure 'tok.type' is either MONTH_NAME or
 /// MONTH_NAME_SHORT. Result is stored in 'month'. Returns false if the given month name
 /// is invalid. 'fx_modifier' indicates if there is an active FX modifier on the whole
 /// format.
 /// If the month part of the input is not followed by a separator then the end of the
 /// month part is found using MONTH_PREFIX_TO_SUFFIX. First, the 3 letter prefix of the
 /// month name identifies a particular month and then checks if the rest of the month
 /// name matches. If it does then '*token_end' is adjusted to point to the character
 /// right after the end of the month part.
 bool ParseMonthNameToken(const DateTimeFormatToken& tok, const char* token_start,
     const char** token_end, bool fx_modifier, int* month)
     WARN_UNUSED_RESULT;

 /// Gets a day name token (either full or short name) and converts it to the ordinal
 /// number of day between 1 and 7. Make sure 'tok.type' is either DAY_NAME or
 /// DAY_NAME_SHORT.
 /// Result is stored in 'day'. Returns false if the given day name is invalid.
 /// 'fx_modifier' indicates if there is an active FX modifier on the whole format.
 /// If the day part of the input is not followed by a separator then the end of the day
 /// part is found using DAY_PREFIX_TO_SUFFIX. First, the 3 letter prefix of the day name
 /// identifies a particular day and then checks if the rest of the day name matches. If it
 /// does then '*token_end' is adjusted to point to the character right after the end of
 /// the day part.
 bool ParseDayNameToken(const DateTimeFormatToken& tok, const char* token_start,
     const char** token_end, bool fx_modifier, int* day)
     WARN_UNUSED_RESULT;

 inline bool IsLeapYear(int year) {
   return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
 }

 /// Given the year, month and the day in month calculates the day in year.
 int GetDayInYear(int year, int month, int day_in_month);

 /// Gets a year and the number of days passed since 1st of January that year. Calculates
 /// the month and the day of that year. Returns false if any of the in parameters are
 /// invalid e.g. if calling this function with a non-leap year and 'days_since_jan1' is
 /// 365. Returns true on success.
 bool GetMonthAndDayFromDaysSinceJan1(int year, int days_since_jan1, int* month, int* day)
     WARN_UNUSED_RESULT;

 // Receives a text token and gives its string formatted representation. This is used in
 // a string to datetime conversion path.
 std::string FormatTextToken(const DateTimeFormatToken& tok);

 /// Taking 'num_of_month' this function provides the name of the month. Based on the
 /// casing of the month format token in 'tok' this can format the results in 3 cases:
 /// Capitalized, full lowercase and full uppercase. E.g. "March", "march" and "MARCH".
 const std::string& FormatMonthName(int num_of_month, const DateTimeFormatToken& tok);

 /// Gets 'day' as a number between 1 and 7 that represents the day of week where Sunday
 /// is 1 and returns the name of the day. Based on the casing of the day format token in
 /// 'tok' this can format the results in 3 cases: Capitalized, full lowercase and full
 /// uppercase. E.g. "Monday", "monday" and "MONDAY".
 const std::string& FormatDayName(int day, const DateTimeFormatToken& tok);

 /// Returns how the output of a month or day token should be formatted. Make sure to
 /// call this when 'tok.type' is any of the month or day name tokens.
 TimestampFunctions::TextCase GetOutputCase(const DateTimeFormatToken& tok);

 /// Given the year, month and the day in month calculates the week in year where the
 /// first week of the year starts from 1st January.
 int GetWeekOfYear(int year, int month, int day);

 /// Given the day of month calculates the week in the month where the first week of the
 /// month starts from the first day of the month.
 int GetWeekOfMonth(int day);

 /// Returns the year adjusted to 'len' digits.
 /// E.g. AdjustYearToLength(1789, 3) returns 789.
 int AdjustYearToLength(int year, int len);
 }

 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#pragma once

	#include <boost/date_time/posix_time/ptime.hpp>
	#include "gutil/macros.h"
	#include <unordered_map>
	#include <unordered_set>
	#include <vector>

	#include "exprs/timestamp-functions.h"
	#include "runtime/timestamp-value.h"
	#include "udf/udf.h"

	namespace impala {

	using impala_udf::FunctionContext;
	using impala_udf::StringVal;

	/// Impala provides multiple algorithms to parse datetime formats:
	/// - SimpleDateFormat: This is the one that is traditionally used with functions such
	/// as to_timestamp() and from_timestamp().
	/// - ISO SQL:2016 compliant datetime pattern matching. CAST(..FORMAT..) comes with
	/// support for this pattern only.
	/// This is a collection of the logic that is shared between the 2 types of pattern
	/// matching including result codes, error reporting, format token types etc.
	namespace datetime_parse_util {
	const int FRACTIONAL_SECOND_MAX_LENGTH = 9;

	/// Describes ranges for months in a non-leap year expressed as number of days since
	/// January 1.
	const std::vector<int> MONTH_RANGES = {
	0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 };

	/// Describes ranges for months in a leap year expressed as number of days since
	/// January 1.
	const std::vector<int> LEAP_YEAR_MONTH_RANGES = {
	0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 };

	/// Maps the 3-letter prefix of a month name to the suffix of the month name and the
	/// ordinal number of month. The key of this map can be used to uniquely identify the
	/// month while the suffix part of the value can be used for checking if the full month
	/// name was given correctly in the input of a string to datetime conversion. The number
	/// part of the value can be used as a result of the string to datetime conversion.
	const std::unordered_map<std::string, std::pair<std::string, int>>
	MONTH_PREFIX_TO_SUFFIX = {
	{"jan", {"uary", 1}},
	{"feb", {"ruary", 2}},
	{"mar", {"ch", 3}},
	{"apr", {"il", 4}},
	{"may", {"", 5}},
	{"jun", {"e", 6}},
	{"jul", {"y", 7}},
	{"aug", {"ust", 8}},
	{"sep", {"tember", 9}},
	{"oct", {"ober", 10}},
	{"nov", {"ember", 11}},
	{"dec", {"ember", 12}}
	};

	/// Similar to 'MONTH_PREFIX_TO_SUFFIX' but maps the 3-letter prefix of a day name to the
	/// suffix of the day name and the ordinal number of the day (1 means Monday and 7 means
	/// Sunday).
	const std::unordered_map<std::string, std::pair<std::string, int>>
	DAY_PREFIX_TO_SUFFIX = {
	{"mon", {"day", 1}},
	{"tue", {"sday", 2}},
	{"wed", {"nesday", 3}},
	{"thu", {"rsday", 4}},
	{"fri", {"day", 5}},
	{"sat", {"urday", 6}},
	{"sun", {"day", 7}}
	};

	/// Length of short month names like 'JAN', 'FEB', etc.
	const int SHORT_MONTH_NAME_LENGTH = 3;

	/// Length of the longest month name 'SEPTEMBER'.
	const int MAX_MONTH_NAME_LENGTH = 9;

	/// Length of short day names like 'MON', 'TUE', etc.
	const int SHORT_DAY_NAME_LENGTH = 3;

	/// Length of the longest day name 'WEDNESDAY'.
	const int MAX_DAY_NAME_LENGTH = 9;

	/// Contains all the possible result codes that can come from parsing a datetime format
	/// pattern.
	enum FormatTokenizationResult {
	SUCCESS,
	GENERAL_ERROR,
	DUPLICATE_FORMAT,
	YEAR_WITH_ROUNDED_YEAR_ERROR,
	CONFLICTING_YEAR_TOKENS_ERROR,
	CONFLICTING_MONTH_TOKENS_ERROR,
	DAY_OF_YEAR_TOKEN_CONFLICT,
	CONFLICTING_HOUR_TOKENS_ERROR,
	CONFLICTING_MERIDIEM_TOKENS_ERROR,
	MERIDIEM_CONFLICTS_WITH_HOUR_ERROR,
	MISSING_HOUR_TOKEN_ERROR,
	SECOND_IN_DAY_CONFLICT,
	TOO_LONG_FORMAT_ERROR,
	TIMEZONE_OFFSET_NOT_ALLOWED_ERROR,
	MISSING_TZH_TOKEN_ERROR,
	DATE_WITH_TIME_ERROR,
	CONFLICTING_FRACTIONAL_SECOND_TOKENS_ERROR,
	TEXT_TOKEN_NOT_CLOSED,
	NO_DATETIME_TOKENS_ERROR,
	MISPLACED_FX_MODIFIER_ERROR,
	QUARTER_NOT_ALLOWED_FOR_PARSING,
	DAY_OF_WEEK_NOT_ALLOWED_FOR_PARSING,
	DAY_NAME_NOT_ALLOWED_FOR_PARSING,
	WEEK_NUMBER_NOT_ALLOWED_FOR_PARSING,
	CONFLICTING_DAY_OF_WEEK_TOKENS_ERROR,
	MISSING_ISO8601_WEEK_BASED_TOKEN_ERROR,
	CONFLICTING_DATE_TOKENS_ERROR
	};

	/// Holds all the token types that serve as building blocks for datetime format patterns.
	enum DateTimeFormatTokenType {
	UNKNOWN = 0,
	SEPARATOR,
	YEAR,
	ROUND_YEAR,
	MONTH_IN_YEAR,
	DAY_IN_MONTH,
	DAY_IN_YEAR,
	HOUR_IN_DAY,
	HOUR_IN_HALF_DAY,
	MINUTE_IN_HOUR,
	SECOND_IN_DAY,
	SECOND_IN_MINUTE,
	FRACTION,
	TZ_OFFSET,
	TIMEZONE_HOUR,
	TIMEZONE_MIN,
	MERIDIEM_INDICATOR,
	ISO8601_TIME_INDICATOR,
	ISO8601_ZULU_INDICATOR,
	TEXT,
	FM_MODIFIER,
	FX_MODIFIER,
	MONTH_NAME,
	MONTH_NAME_SHORT,
	DAY_NAME,
	DAY_NAME_SHORT,
	DAY_OF_WEEK,
	QUARTER_OF_YEAR,
	WEEK_OF_YEAR,
	WEEK_OF_MONTH,
	ISO8601_WEEK_NUMBERING_YEAR,
	ISO8601_WEEK_OF_YEAR,
	ISO8601_DAY_OF_WEEK
	};

	/// Indicates whether the cast is a 'datetime to string' or a 'string to datetime' cast.
	/// PARSE is a string type to datetime type cast.
	/// FORMAT is a datetime type to string type cast.
	enum CastDirection {
	PARSE,
	FORMAT
	};

	typedef std::pair<const char, const char> MERIDIEM_INDICATOR_TEXT;
	const MERIDIEM_INDICATOR_TEXT AM = {"AM", "am"};
	const MERIDIEM_INDICATOR_TEXT AM_LONG = {"A.M.", "a.m."};
	const MERIDIEM_INDICATOR_TEXT PM = {"PM", "pm"};
	const MERIDIEM_INDICATOR_TEXT PM_LONG = {"P.M.", "p.m."};

	/// Stores metadata about a token within a datetime format.
	struct DateTimeFormatToken {
	/// Indicates the type of datetime format token.
	DateTimeFormatTokenType type;
	/// The position of where this token is supposed to start in the datetime string
	/// to be parsed.
	int pos;
	/// The length of the token.
	int len;
	/// A pointer to the beginning of this token in the format string.
	const char* val;
	/// True if FM modifier is active for this token. This overrides the FX modifier active
	/// for the whole format.
	bool fm_modifier;

	/// True if this is a text token that is surrounded by escaped double quotes making the
	/// content of the token double-escaped.
	bool is_double_escaped;

	DateTimeFormatToken(DateTimeFormatTokenType type, int pos, int len, const char* val)
	: type(type), pos(pos), len(len), val(val), fm_modifier(false),
	is_double_escaped(false) {
	}
	};

	/// Holds metadata about the datetime format. In the format parsing process the members of
	/// this struct are populated gradually as the process advances. After the parsing process
	/// this holds the found format tokens alongside with auxiliary information such as
	/// whether the input format contains date or time tokens or both.
	struct DateTimeFormatContext {
	/// Pointer to the beginning of the format string.
	const char* fmt;
	/// Length of the format string.
	int fmt_len;
	/// Expected length of the output of a 'datetime to string' cast. This usually equals to
	/// the length of the input format string. However, there are some edge cases where this
	/// is not true:
	/// - SimpleDateFormat parsing on '2019-11-10' as input and 'yyyy-d-m' as format
	/// produces output that is longer than the format string.
	/// - ISO SQL parsing has token types where the output length is different from the
	/// token length like: 'MONTH', 'DAY', 'HH12', 'HH24', FF1, FF2, FF4, etc.
	int fmt_out_len;
	/// Vector of tokens found in the format string.
	std::vector<DateTimeFormatToken> toks;
	bool has_date_toks;
	bool has_time_toks;

	/// True if the format contains an FX modifier effective for all the tokens.
	bool fx_modifier;

	/// Used for casting with SimpleDateFormat to handle rounded year. Make sure you call
	/// SetCenturyBreakAndCurrentTime() before using this member.
	boost::posix_time::ptime century_break_ptime;
	/// Used for round year and less than 4-digit year calculation in ISO:SQL:2016 parsing.
	/// Make sure you call SetCenturyBreakAndCurrentTime() before using this member. Not
	/// owned by this object.
	const TimestampValue* current_time;

	DateTimeFormatContext() {
	Reset(nullptr);
	}

	DateTimeFormatContext(const char* fmt) {
	Reset(fmt);
	}

	DateTimeFormatContext(const char* fmt, int fmt_len) {
	Reset(fmt, fmt_len);
	}

	/// Set the century break for parsing 1 or 2-digit year format. When parsing 1 or
	/// 2-digit year, the year should be in the interval [now - 80 years, now + 20 years),
	/// according to Hive. Also sets the current time that is used for round year
	/// calculation in ISO:SQL:2016 parsing.
	void SetCenturyBreakAndCurrentTime(const TimestampValue& now);

	/// Initializes all the members of this object.
	void Reset(const char* fmt, int fmt_len);

	void Reset(const char* fmt) {
	Reset(fmt, (fmt == nullptr) ? 0 : strlen(fmt));
	}
	};

	/// Stores the results of parsing a date/time string.
	struct DateTimeParseResult {
	int year = -1;
	int month = 0;
	int day = 0;
	int hour = 0;
	int minute = 0;
	int second = 0;
	int32_t fraction = 0;
	boost::posix_time::time_duration tz_offset =
	boost::posix_time::time_duration(0, 0, 0, 0);
	bool realign_year = false;
	};

	/// This function is used to indicate an error or warning when the input format
	/// tokenization fails for some reason. Constructs an error message based on 'error_type'
	/// and pushes it to 'context'. Depending on 'is_error' the message can be an error or
	/// warning.
	void ReportBadFormat(FunctionContext* context, FormatTokenizationResult error_type,
	const StringVal& format, bool is_error);

	bool ParseAndValidate(const char* token, int token_len, int min, int max,
	int* result) WARN_UNUSED_RESULT;

	// Given the month calculates the quarter of year.
	int GetQuarter(int month);

	bool ParseFractionToken(const char* token, int token_len,
	DateTimeParseResult* result) WARN_UNUSED_RESULT;

	/// Gets a month name token (either full or short name) and converts it to the ordinal
	/// number of month between 1 and 12. Make sure 'tok.type' is either MONTH_NAME or
	/// MONTH_NAME_SHORT. Result is stored in 'month'. Returns false if the given month name
	/// is invalid. 'fx_modifier' indicates if there is an active FX modifier on the whole
	/// format.
	/// If the month part of the input is not followed by a separator then the end of the
	/// month part is found using MONTH_PREFIX_TO_SUFFIX. First, the 3 letter prefix of the
	/// month name identifies a particular month and then checks if the rest of the month
	/// name matches. If it does then '*token_end' is adjusted to point to the character
	/// right after the end of the month part.
	bool ParseMonthNameToken(const DateTimeFormatToken& tok, const char* token_start,
	const char** token_end, bool fx_modifier, int* month)
	WARN_UNUSED_RESULT;

	/// Gets a day name token (either full or short name) and converts it to the ordinal
	/// number of day between 1 and 7. Make sure 'tok.type' is either DAY_NAME or
	/// DAY_NAME_SHORT.
	/// Result is stored in 'day'. Returns false if the given day name is invalid.
	/// 'fx_modifier' indicates if there is an active FX modifier on the whole format.
	/// If the day part of the input is not followed by a separator then the end of the day
	/// part is found using DAY_PREFIX_TO_SUFFIX. First, the 3 letter prefix of the day name
	/// identifies a particular day and then checks if the rest of the day name matches. If it
	/// does then '*token_end' is adjusted to point to the character right after the end of
	/// the day part.
	bool ParseDayNameToken(const DateTimeFormatToken& tok, const char* token_start,
	const char** token_end, bool fx_modifier, int* day)
	WARN_UNUSED_RESULT;

	inline bool IsLeapYear(int year) {
	return year % 4 == 0 && (year % 100 != 0 \|\| year % 400 == 0);
	}

	/// Given the year, month and the day in month calculates the day in year.
	int GetDayInYear(int year, int month, int day_in_month);

	/// Gets a year and the number of days passed since 1st of January that year. Calculates
	/// the month and the day of that year. Returns false if any of the in parameters are
	/// invalid e.g. if calling this function with a non-leap year and 'days_since_jan1' is
	/// 365. Returns true on success.
	bool GetMonthAndDayFromDaysSinceJan1(int year, int days_since_jan1, int* month, int* day)
	WARN_UNUSED_RESULT;

	// Receives a text token and gives its string formatted representation. This is used in
	// a string to datetime conversion path.
	std::string FormatTextToken(const DateTimeFormatToken& tok);

	/// Taking 'num_of_month' this function provides the name of the month. Based on the
	/// casing of the month format token in 'tok' this can format the results in 3 cases:
	/// Capitalized, full lowercase and full uppercase. E.g. "March", "march" and "MARCH".
	const std::string& FormatMonthName(int num_of_month, const DateTimeFormatToken& tok);

	/// Gets 'day' as a number between 1 and 7 that represents the day of week where Sunday
	/// is 1 and returns the name of the day. Based on the casing of the day format token in
	/// 'tok' this can format the results in 3 cases: Capitalized, full lowercase and full
	/// uppercase. E.g. "Monday", "monday" and "MONDAY".
	const std::string& FormatDayName(int day, const DateTimeFormatToken& tok);

	/// Returns how the output of a month or day token should be formatted. Make sure to
	/// call this when 'tok.type' is any of the month or day name tokens.
	TimestampFunctions::TextCase GetOutputCase(const DateTimeFormatToken& tok);

	/// Given the year, month and the day in month calculates the week in year where the
	/// first week of the year starts from 1st January.
	int GetWeekOfYear(int year, int month, int day);

	/// Given the day of month calculates the week in the month where the first week of the
	/// month starts from the first day of the month.
	int GetWeekOfMonth(int day);

	/// Returns the year adjusted to 'len' digits.
	/// E.g. AdjustYearToLength(1789, 3) returns 789.
	int AdjustYearToLength(int year, int len);
	}

	}