blob: 73e75009d5d46f4af4f2f39918e44bc6afbaf2d1 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "gandiva/interval_holder.h"
#include <algorithm>
#include "gandiva/node.h"
#include "gandiva/regex_util.h"
namespace gandiva {
// pre-compiled pattern for matching period that only have numbers
static const RE2 period_only_contains_numbers(R"(\d+)");
// pre-compiled pattern for matching periods in 8601 formats that not contains weeks.
static const RE2 iso8601_complete_period(
R"(P(-?[[:digit:]]+Y|-?[[:digit:]]+[,.][[:digit:]]+Y)?)"
R"((-?[[:digit:]]+M|-?[[:digit:]]+[,.][[:digit:]]+M)?)"
R"((-?[[:digit:]]+D|-?[[:digit:]]+[,.][[:digit:]]+D)?)"
R"(T(-?[[:digit:]]+H|-?[[:digit:]]+[,.][[:digit:]]+H)?)"
R"((-?[[:digit:]]+M|-?[[:digit:]]+[,.][[:digit:]]+M)?)"
R"((-?[[:digit:]]+S|-?[[:digit:]]+[,.][[:digit:]]+S)?)");
// pre-compiled pattern for matching periods in 8601 formats that not contain time
// (hours, minutes and seconds) information.
static const RE2 iso8601_period_without_time(
R"(P(-?[[:digit:]]+Y|-?[[:digit:]]+[,.][[:digit:]]+Y)?)"
R"((-?[[:digit:]]+M|-?[[:digit:]]+[,.][[:digit:]]+M)?)"
R"((-?[[:digit:]]+D|-?[[:digit:]]+[,.][[:digit:]]+D)?)");
// pre-compiled pattern for matching periods in 8601 formats that not contain time
// (hours, minutes and seconds) information.
static const std::regex period_not_contains_time(R"(^((?!T).)*$)");
// pre-compiled pattern for matching periods in 8601 formats that contains weeks inside
// them. The ISO8601 specification defines that if the string contains a week, it cannot
// have other time granularities information, like day, years and months.
static const RE2 iso8601_period_with_weeks(
R"(P(-?[[:digit:]]+W|-?[[:digit:]]+[,.][[:digit:]]+W){1})");
// It considers that a day has exactly 24 hours of duration
static const int64_t kMillisInDay = 86400000;
static const int64_t kMillisInAWeek = kMillisInDay * 7;
static const int64_t kMillisInAHour = 3600000;
static const int64_t kMillisInAMinute = 60000;
static const int64_t kMillisInASecond = 1000;
static void return_error_with_cause(ExecutionContext* context, std::string& data,
int32_t suppression_error) {
if (suppression_error != 0) {
return;
}
context->set_error_msg(data.c_str());
}
int64_t IntervalDaysHolder::GetIntervalDayFromMillis(ExecutionContext* context,
std::string& number_as_string,
int32_t suppress_errors,
bool* out_valid) {
int64_t period_in_millis = 0;
try {
period_in_millis = std::stol(number_as_string);
} catch (...) {
std::string cause("Error converting the number of millis");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
int64_t qty_days = period_in_millis / kMillisInDay;
int64_t qty_millis = period_in_millis % kMillisInDay;
// The response is a 64-bit integer where the lower half of the bytes represents the
// number of the days and the other half represents the number of milliseconds.
int64_t out = (qty_days & 0x00000000FFFFFFFF);
out |= ((qty_millis << 32) & 0xFFFFFFFF00000000);
*out_valid = true;
return out;
}
int64_t IntervalDaysHolder::GetIntervalDayFromWeeks(ExecutionContext* context,
std::string& number_as_string,
int32_t suppress_errors,
bool* out_valid) {
double qty_weeks = 0;
try {
qty_weeks = std::stod(number_as_string);
} catch (...) {
// Error converting double value
std::string cause("Error converting the number of weeks");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
auto millis_in_all_weeks = static_cast<int64_t>(qty_weeks * kMillisInAWeek);
int64_t qty_days = millis_in_all_weeks / kMillisInDay;
int64_t qty_millis = millis_in_all_weeks % kMillisInDay;
// The response is a 64-bit integer where the lower half of the bytes represents the
// number of the days and the other half represents the number of milliseconds.
int64_t out = (qty_days & 0x00000000FFFFFFFF);
out |= ((qty_millis << 32) & 0xFFFFFFFF00000000);
*out_valid = true;
return out;
}
int64_t IntervalDaysHolder::GetIntervalDayFromCompletePeriod(
ExecutionContext* context, std::string& days_in_period, std::string& hours_in_period,
std::string& minutes_in_period, std::string& seconds_in_period,
int32_t suppress_errors, bool* out_valid) {
double qty_days = 0;
double qty_hours = 0;
double qty_minutes = 0;
double qty_seconds = 0;
if (!days_in_period.empty()) {
try {
std::replace(days_in_period.begin(), days_in_period.end(), ',', '.');
qty_days = std::stod(days_in_period);
} catch (...) {
std::string cause("Invalid number of days");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
}
if (!hours_in_period.empty()) {
try {
std::replace(hours_in_period.begin(), hours_in_period.end(), ',', '.');
qty_hours = std::stod(hours_in_period);
} catch (...) {
std::string cause("Invalid number of hours");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
}
if (!minutes_in_period.empty()) {
try {
std::replace(minutes_in_period.begin(), minutes_in_period.end(), ',', '.');
qty_minutes = std::stod(minutes_in_period);
} catch (...) {
std::string cause("Invalid number of minutes");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
}
if (!seconds_in_period.empty()) {
try {
std::replace(seconds_in_period.begin(), seconds_in_period.end(), ',', '.');
qty_seconds = std::stod(seconds_in_period);
} catch (...) {
std::string cause("Invalid number of seconds");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
}
auto millis_in_the_period =
static_cast<int64_t>(qty_hours * kMillisInAHour + // millis in a hour
qty_minutes * kMillisInAMinute + // millis in a minute
qty_seconds * kMillisInASecond);
int64_t total_days_in_millis = millis_in_the_period / kMillisInDay;
auto total_days = static_cast<int64_t>(qty_days + total_days_in_millis);
int64_t remainder_millis = millis_in_the_period % kMillisInDay;
// The response is a 64-bit integer where the lower half of the bytes represents the
// number of the days and the other half represents the number of milliseconds.
int64_t out = (total_days & 0x00000000FFFFFFFF);
out |= ((remainder_millis << 32) & 0xFFFFFFFF00000000);
*out_valid = true;
return out;
}
// The operator will cast a generic string defined by the user into an interval of days.
// There are two formats of strings that are acceptable:
// - The period in millis: '238398430'
// - The period using a ISO8601 compatible format: 'P[n]Y[n]M[n]DT[n]H[n]M[n]S or P[n]W'
int64_t IntervalDaysHolder::operator()(ExecutionContext* ctx, const char* data,
int32_t data_len, bool in_valid, bool* out_valid) {
*out_valid = false;
if (!in_valid) {
return 0;
}
if (data_len <= 0) {
std::string empty;
return_error(ctx, empty);
return 0;
}
std::string data_as_str(data, data_len);
if (RE2::FullMatch(data_as_str, period_only_contains_numbers)) {
return GetIntervalDayFromMillis(ctx, data_as_str, suppress_errors_, out_valid);
}
std::string period_in_weeks;
if (RE2::FullMatch(data_as_str, iso8601_period_with_weeks, &period_in_weeks)) {
return GetIntervalDayFromWeeks(ctx, period_in_weeks, suppress_errors_, out_valid);
}
std::string days_in_period;
std::string hours_in_period;
std::string minutes_in_period;
std::string seconds_in_period;
std::string ignored_string; // string to store unnecessary captured groups
if (std::regex_match(data_as_str, period_not_contains_time)) {
if (RE2::FullMatch(data_as_str, iso8601_period_without_time, &ignored_string,
&ignored_string, &days_in_period)) {
return GetIntervalDayFromCompletePeriod(ctx, days_in_period, hours_in_period,
minutes_in_period, seconds_in_period,
suppress_errors_, out_valid);
}
return_error(ctx, data_as_str);
return 0;
}
if (RE2::FullMatch(data_as_str, iso8601_complete_period, &ignored_string,
&ignored_string, &days_in_period, &hours_in_period,
&minutes_in_period, &seconds_in_period)) {
return GetIntervalDayFromCompletePeriod(ctx, days_in_period, hours_in_period,
minutes_in_period, seconds_in_period,
suppress_errors_, out_valid);
}
return_error(ctx, data_as_str);
return 0;
}
Result<std::shared_ptr<IntervalDaysHolder>> IntervalDaysHolder::Make(
const FunctionNode& node) {
const std::string function_name("castINTERVALDAY");
return IntervalHolder<IntervalDaysHolder>::Make(node, function_name);
}
Result<std::shared_ptr<IntervalDaysHolder>> IntervalDaysHolder::Make(
int32_t suppress_errors) {
return IntervalHolder<IntervalDaysHolder>::Make(suppress_errors);
}
Result<std::shared_ptr<IntervalYearsHolder>> IntervalYearsHolder::Make(
const FunctionNode& node) {
const std::string function_name("castINTERVALYEAR");
return IntervalHolder<IntervalYearsHolder>::Make(node, function_name);
}
Result<std::shared_ptr<IntervalYearsHolder>> IntervalYearsHolder::Make(
int32_t suppress_errors) {
return IntervalHolder<IntervalYearsHolder>::Make(suppress_errors);
}
// The operator will cast a generic string defined by the user into an interval of months.
// There are two formats of strings that are acceptable:
// - The period in months: '238398430'
// - The period using a ISO8601 compatible format: 'P[n]Y[n]M[n]DT[n]H[n]M[n]S or P[n]W'
int32_t IntervalYearsHolder::operator()(ExecutionContext* ctx, const char* data,
int32_t data_len, bool in_valid,
bool* out_valid) {
*out_valid = false;
if (!in_valid) {
return 0;
}
if (data_len <= 0) {
std::string empty;
return_error(ctx, empty);
return 0;
}
std::string data_as_str(data, data_len);
if (RE2::FullMatch(data_as_str, period_only_contains_numbers)) {
return GetIntervalYearFromNumber(ctx, data_as_str, suppress_errors_, out_valid);
}
std::string period_in_weeks;
if (RE2::FullMatch(data_as_str, iso8601_period_with_weeks, &period_in_weeks)) {
*out_valid = true;
return 0;
}
std::string yrs_in_period;
std::string months_in_period;
std::string ignored_string; // string to store unnecessary captured variables
if (std::regex_match(data_as_str, period_not_contains_time)) {
if (RE2::FullMatch(data_as_str, iso8601_period_without_time, &yrs_in_period,
&months_in_period, &ignored_string)) {
return GetIntervalYearFromCompletePeriod(ctx, yrs_in_period, months_in_period,
suppress_errors_, out_valid);
}
return_error(ctx, data_as_str);
return 0;
}
if (RE2::FullMatch(data_as_str, iso8601_complete_period, &yrs_in_period,
&months_in_period, &ignored_string, &ignored_string, &ignored_string,
&ignored_string)) {
return GetIntervalYearFromCompletePeriod(ctx, yrs_in_period, months_in_period,
suppress_errors_, out_valid);
}
return_error(ctx, data_as_str);
return 0;
}
int32_t IntervalYearsHolder::GetIntervalYearFromNumber(ExecutionContext* context,
std::string& number_as_string,
int32_t suppress_errors,
bool* out_valid) {
int32_t number_of_months = 0;
try {
number_of_months = std::stoi(number_as_string);
} catch (...) {
// Error converting the value
std::string error_cause("Error converting number to month");
return_error_with_cause(context, error_cause, suppress_errors);
*out_valid = false;
return 0;
}
*out_valid = true;
return number_of_months;
}
int32_t IntervalYearsHolder::GetIntervalYearFromCompletePeriod(
ExecutionContext* context, std::string& yrs_in_period, std::string& months_in_period,
int32_t suppress_errors, bool* out_valid) {
float qty_yrs = 0;
float qty_months = 0;
if (!yrs_in_period.empty()) {
std::replace(yrs_in_period.begin(), yrs_in_period.end(), ',', '.');
try {
qty_yrs = std::stof(yrs_in_period);
} catch (...) {
// Error converting the float value
std::string cause("Error converting the number of months");
return_error_with_cause(context, cause, suppress_errors);
*out_valid = false;
return 0;
}
}
if (!months_in_period.empty()) {
std::replace(months_in_period.begin(), months_in_period.end(), ',', '.');
try {
qty_months = std::stof(months_in_period);
} catch (...) {
// Error converting the float value
*out_valid = false;
return 0;
}
}
auto total_months = static_cast<int32_t>(qty_yrs * 12 + // qty months in an year
qty_months);
*out_valid = true;
return total_months;
}
} // namespace gandiva