blob: b033e6a71dbe4dd36e76838c65d17474b7cb4d53 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include "types/IntervalParser.hpp"
#include <algorithm>
#include <cctype>
#include <cinttypes>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <string>
#include "types/IntervalLit.hpp"
// TODO(chasseur): This file has some moderately complex invocations of
// sscanf() and some branchy conditional parsing. Parsing performance might be
// improved by using some pre-compiled regexes instead of just sscanf().
namespace quickstep {
bool IntervalParser::ParseDatetimeIntervalSimpleFormat(
const std::int64_t count,
const std::string &units_lowercase,
DatetimeIntervalLit *interval) {
if ((units_lowercase == "microsecond")
|| (units_lowercase == "microseconds")
|| (units_lowercase == "us")) {
interval->interval_ticks = count * (DatetimeIntervalLit::kTicksPerSecond / 1000000);
return true;
} else if ((units_lowercase == "millisecond")
|| (units_lowercase == "milliseconds")
|| (units_lowercase == "ms")) {
interval->interval_ticks = count * (DatetimeIntervalLit::kTicksPerSecond / 1000);
return true;
} else if ((units_lowercase == "second")
|| (units_lowercase == "seconds")
|| (units_lowercase == "s")) {
interval->interval_ticks = count * DatetimeIntervalLit::kTicksPerSecond;
return true;
} else if ((units_lowercase == "minute")
|| (units_lowercase == "minutes")) {
interval->interval_ticks = count * DatetimeIntervalLit::kTicksPerSecond * 60;
return true;
} else if ((units_lowercase == "hour")
|| (units_lowercase == "hours")
|| (units_lowercase == "h")) {
interval->interval_ticks = count * DatetimeIntervalLit::kTicksPerSecond * 60 * 60;
return true;
} else if ((units_lowercase == "day")
|| (units_lowercase == "days")
|| (units_lowercase == "d")) {
interval->interval_ticks = count * DatetimeIntervalLit::kTicksPerSecond * 60 * 60 * 24;
return true;
} else if ((units_lowercase == "week")
|| (units_lowercase == "weeks")
|| (units_lowercase == "w")) {
interval->interval_ticks = count * DatetimeIntervalLit::kTicksPerSecond * 60 * 60 * 24 * 7;
return true;
} else {
return false;
}
}
bool IntervalParser::ParseDatetimeIntervalComplexFormat(
const std::string &interval_string,
DatetimeIntervalLit *interval) {
// First, look for a leading minus (we don't just parse the first field as
// signed, because the first field could look like negative 0, e.g. in
// "-00:30:00", and we would mistakenly parse such an interval as being
// futureward rather than pastward).
int start_offset = 0;
int matched = std::sscanf(interval_string.c_str(),
"%*1[-]%n",
&start_offset);
// Now, 'start_offset' is 0 if there is no leading minus, or nonzero and
// pointing to the first character after the minus if there is one.
std::int64_t days, hours;
int minutes, seconds, subseconds;
char daysbuf[5]; // Buffer for unit string containing "days" or something similar.
int seconds_end = 0;
int parse_end = 0;
bool have_subseconds = false;
// First, try matching with "X day(s)" specified at the front of the string.
matched = std::sscanf(interval_string.c_str() + start_offset,
"%" SCNd64 " %4s %2" SCNd64 ":%2d:%2d%n.%6d%n",
&days,
daysbuf,
&hours,
&minutes,
&seconds,
&seconds_end,
&subseconds,
&parse_end);
if ((matched == 5) || (matched == 6)) {
// Make sure there is no garbage at the end of the string.
switch (matched) {
case 5:
if (static_cast<std::string::size_type>(start_offset + seconds_end)
!= interval_string.length()) {
return false;
}
break;
case 6:
if (static_cast<std::string::size_type>(start_offset + parse_end)
!= interval_string.length()) {
return false;
}
have_subseconds = true;
break;
default:
return false;
}
// Check that "day" or "days" (case insensitive) was specified where we
// expect.
const std::size_t daysbuf_length = std::strlen(daysbuf);
for (std::size_t i = 0; i < daysbuf_length; ++i) {
daysbuf[i] = ::tolower(daysbuf[i]);
}
if (!((std::strcmp("day", daysbuf) == 0)
|| (std::strcmp("days", daysbuf) == 0))) {
return false;
}
// Check that hours are in the range 0-23 when a day is specified.
if (hours > 23) {
return false;
}
} else {
// Try again without leading "days" portion.
days = 0;
seconds_end = parse_end = 0;
matched = std::sscanf(interval_string.c_str() + start_offset,
"%" SCNd64 ":%2d:%2d%n.%6d%n",
&hours,
&minutes,
&seconds,
&seconds_end,
&subseconds,
&parse_end);
// Make sure there is no garbage at the end of the string, and that at
// least hours, minutes, seconds have been specified.
switch (matched) {
case 3:
if (static_cast<std::string::size_type>(start_offset + seconds_end)
!= interval_string.length()) {
return false;
}
break;
case 4:
if (static_cast<std::string::size_type>(start_offset + parse_end)
!= interval_string.length()) {
return false;
}
have_subseconds = true;
break;
default:
return false;
}
}
// We already consumed the leading minus (if any) above, so make sure that
// none of the fields parsed as negative.
if ((days < 0)
|| (hours < 0)
|| (minutes < 0)
|| (seconds < 0)
|| (have_subseconds && (subseconds < 0))) {
return false;
}
// Check that minutes and seconds are in-range.
if ((minutes > 59) || (seconds > 59)) {
return false;
}
if (have_subseconds) {
// If less than 6 digits of subseconds were specified, multiply the
// the subseconds so that they are scaled properly.
const int adjustment_digits = 6 - (parse_end - seconds_end - 1);
for (int i = 0; i < adjustment_digits; ++i) {
subseconds *= 10;
}
} else {
subseconds = 0;
}
// Add up the total magnitude of the interval in ticks.
const std::int64_t total_ticks
= (((days * 24 + hours) * 60 + minutes) * 60 + seconds)
* DatetimeIntervalLit::kTicksPerSecond
+ subseconds;
// Negate if there was a preceding minus "-".
interval->interval_ticks = (start_offset == 0) ? total_ticks
: -total_ticks;
return true;
}
bool IntervalParser::ParseYearMonthIntervalSimpleFormat(
const std::int64_t count,
const std::string &units_lowercase,
YearMonthIntervalLit *interval) {
if ((units_lowercase == "month")
|| (units_lowercase == "months")
|| (units_lowercase == "mon")
|| (units_lowercase == "mons")) {
interval->months = count;
return true;
} else if ((units_lowercase == "year")
|| (units_lowercase == "years")
|| (units_lowercase == "y")) {
interval->months = count * 12;
return true;
} else if ((units_lowercase == "decade")
|| (units_lowercase == "decades")) {
interval->months = count * 12 * 10;
return true;
} else if ((units_lowercase == "century")
|| (units_lowercase == "centuries")) {
interval->months = count * 12 * 100;
return true;
} else if ((units_lowercase == "millennium")
|| (units_lowercase == "millennia")) {
interval->months = count * 12 * 1000;
return true;
} else {
return false;
}
}
bool IntervalParser::ParseYearMonthIntervalComplexFormat(
const std::string &interval_string,
YearMonthIntervalLit *interval) {
// First, look for a leading minus (we don't just parse the first field as
// signed, because the first field could look like negative 0, e.g. in
// "-0 years 2 mons", and we would mistakenly parse such an interval as being
// futureward rather than pastward).
int start_offset = 0;
int matched = std::sscanf(interval_string.c_str(),
"%*1[-]%n",
&start_offset);
// Now, 'start_offset' is 0 if there is no leading minus, or nonzero and
// pointing to the first character after the minus if there is one.
std::int64_t years;
int months;
char yearsbuf[6];
char monthsbuf[7];
int parse_end = 0;
matched = std::sscanf(interval_string.c_str() + start_offset,
"%" SCNd64 " %5s %d %6s%n",
&years,
yearsbuf,
&months,
monthsbuf,
&parse_end);
// Make sure we matched all the expected fields, that there is no trailing
// garbage at the end of the string, and that years and months are in range.
if ((matched != 4)
|| (static_cast<std::string::size_type>(start_offset + parse_end)
!= interval_string.length())
|| (years < 0) || (months < 0) || (months > 11)) {
return false;
}
// Case-insensitive check for some string indicating years.
const std::size_t yearsbuf_length = std::strlen(yearsbuf);
for (std::size_t i = 0; i < yearsbuf_length; ++i) {
yearsbuf[i] = ::tolower(yearsbuf[i]);
}
if (!((std::strcmp("year", yearsbuf) == 0)
|| (std::strcmp("years", yearsbuf) == 0)
|| (std::strcmp("y", yearsbuf) == 0))) {
return false;
}
// Case-insensitive check for some string indicating months.
const std::size_t monthsbuf_length = std::strlen(monthsbuf);
for (std::size_t i = 0; i < monthsbuf_length; ++i) {
monthsbuf[i] = ::tolower(monthsbuf[i]);
}
if (!((std::strcmp("month", monthsbuf) == 0)
|| (std::strcmp("months", monthsbuf) == 0)
|| (std::strcmp("mon", monthsbuf) == 0)
|| (std::strcmp("mons", monthsbuf) == 0))) {
return false;
}
// Add up total magnitude in months, negate if there was a preceding minus
// "-".
interval->months = (start_offset == 0) ? (years * 12 + months)
: - (years * 12 + months);
return true;
}
bool IntervalParser::ParseSimpleFormatFieldsFromCombinedString(
const std::string &interval_string,
std::int64_t *count,
std::string *units_lowercase) {
int units_start, units_end, scan_end;
// Both value and units single-quoted.
units_start = units_end = scan_end = 0;
int matched = std::sscanf(interval_string.c_str(),
" ' %" SCNd64 " %n%*[^' \t\n\v\f\r]%n ' %n",
count, &units_start, &units_end, &scan_end);
if ((matched == 1)
&& (static_cast<std::string::size_type>(scan_end) == interval_string.length())) {
units_lowercase->assign(interval_string, units_start, units_end - units_start);
std::transform(units_lowercase->begin(),
units_lowercase->end(),
units_lowercase->begin(),
::tolower);
return true;
}
// Value single-quoted, units not.
units_start = units_end = scan_end = 0;
matched = std::sscanf(interval_string.c_str(),
" ' %" SCNd64 " ' %n%*[^' \t\n\v\f\r]%n %n",
count, &units_start, &units_end, &scan_end);
if ((matched == 1)
&& (static_cast<std::string::size_type>(scan_end) == interval_string.length())) {
units_lowercase->assign(interval_string, units_start, units_end - units_start);
std::transform(units_lowercase->begin(),
units_lowercase->end(),
units_lowercase->begin(),
::tolower);
return true;
}
// No quotes.
units_start = units_end = scan_end = 0;
matched = std::sscanf(interval_string.c_str(),
" %" SCNd64 " %n%*[^' \t\n\v\f\r]%n %n",
count, &units_start, &units_end, &scan_end);
if ((matched == 1)
&& (static_cast<std::string::size_type>(scan_end) == interval_string.length())) {
units_lowercase->assign(interval_string, units_start, units_end - units_start);
std::transform(units_lowercase->begin(),
units_lowercase->end(),
units_lowercase->begin(),
::tolower);
return true;
}
// Unrecognized format.
return false;
}
bool IntervalParser::ParseSimpleFormatFieldsFromCombinedStringNoExtraWhitespace(
const std::string &interval_string,
std::int64_t *count,
std::string *units_lowercase) {
int units_start, units_end, scan_end;
// Both value and units single-quoted.
units_start = units_end = scan_end = 0;
int matched = std::sscanf(interval_string.c_str(),
"'%" SCNd64 " %n%*[^' \t\n\v\f\r]%n'%n",
count, &units_start, &units_end, &scan_end);
if ((matched == 1)
&& (static_cast<std::string::size_type>(scan_end) == interval_string.length())) {
units_lowercase->assign(interval_string, units_start, units_end - units_start);
std::transform(units_lowercase->begin(),
units_lowercase->end(),
units_lowercase->begin(),
::tolower);
return true;
}
// Value single-quoted, units not.
units_start = units_end = scan_end = 0;
matched = std::sscanf(interval_string.c_str(),
"'%" SCNd64 "' %n%*[^' \t\n\v\f\r]%n",
count, &units_start, &units_end);
if ((matched == 1)
&& (static_cast<std::string::size_type>(units_end) == interval_string.length())) {
units_lowercase->assign(interval_string, units_start, units_end - units_start);
std::transform(units_lowercase->begin(),
units_lowercase->end(),
units_lowercase->begin(),
::tolower);
return true;
}
// No quotes.
units_start = units_end = scan_end = 0;
matched = std::sscanf(interval_string.c_str(),
"%" SCNd64 " %n%*[^' \t\n\v\f\r]%n",
count, &units_start, &units_end);
if ((matched == 1)
&& (static_cast<std::string::size_type>(units_end) == interval_string.length())) {
units_lowercase->assign(interval_string, units_start, units_end - units_start);
std::transform(units_lowercase->begin(),
units_lowercase->end(),
units_lowercase->begin(),
::tolower);
return true;
}
// Unrecognized format.
return false;
}
} // namespace quickstep