blob: 5b405895d16a3aab34aa80106a6878f5267fcf04 [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/common/utils/string_utils.h"
#include <algorithm>
#include <cctype>
#include <ctime>
#include <iomanip>
#include <iterator>
#include <utility>
#include "fmt/format.h"
#include "paimon/status.h"
namespace paimon {
std::string StringUtils::Replace(const std::string& text, const std::string& search_string,
const std::string& replacement, int32_t max) {
std::string str = text;
size_t pos = str.find(search_string);
int32_t count = 0;
while (pos != std::string::npos && (count < max || max == -1)) {
str.replace(pos, search_string.size(), replacement);
pos = str.find(search_string, pos + replacement.size());
count++;
}
return str;
}
std::string StringUtils::ReplaceLast(const std::string& text, const std::string& old_str,
const std::string& new_str) {
std::string str = text;
size_t pos = str.rfind(old_str);
if (pos != std::string::npos) {
str.replace(pos, old_str.size(), new_str);
}
return str;
}
bool StringUtils::StartsWith(const std::string& str, const std::string& prefix, size_t start_pos) {
return (str.size() >= prefix.size()) && (str.compare(start_pos, prefix.size(), prefix) == 0);
}
bool StringUtils::EndsWith(const std::string& str, const std::string& suffix) {
size_t s1 = str.size();
size_t s2 = suffix.size();
return (s1 >= s2) && (str.compare(s1 - s2, s2, suffix) == 0);
}
bool StringUtils::IsNullOrWhitespaceOnly(const std::string& str) {
if (str.empty()) {
return true;
}
for (char c : str) {
if (!std::isspace(static_cast<unsigned char>(c))) {
return false;
}
}
return true;
}
void StringUtils::Trim(std::string* str) {
str->erase(str->find_last_not_of(' ') + 1);
str->erase(0, str->find_first_not_of(' '));
}
std::string StringUtils::ToLowerCase(const std::string& str) {
std::string result;
result.reserve(str.length());
std::transform(str.begin(), str.end(), std::back_inserter(result),
[](unsigned char c) { return std::tolower(c); });
return result;
}
std::string StringUtils::ToUpperCase(const std::string& str) {
std::string result;
result.reserve(str.length());
std::transform(str.begin(), str.end(), std::back_inserter(result),
[](unsigned char c) { return std::toupper(c); });
return result;
}
std::vector<std::string> StringUtils::Split(const std::string& text, const std::string& sep_str,
bool ignore_empty) {
std::vector<std::string> vec;
if (sep_str.empty()) {
// invalid case, do not split.
vec.emplace_back(text);
return vec;
}
size_t n = 0, old = 0;
while (n != std::string::npos) {
n = text.find(sep_str, n);
if (n != std::string::npos) {
if (!ignore_empty || n != old) {
vec.emplace_back(text.substr(old, n - old));
}
n += sep_str.length();
old = n;
}
}
if (!ignore_empty || old < text.length()) {
vec.emplace_back(text.substr(old, text.length() - old));
}
return vec;
}
std::vector<std::vector<std::string>> StringUtils::Split(const std::string& text,
const std::string& delim1,
const std::string& delim2) {
std::vector<std::vector<std::string>> result;
std::vector<std::string> split_parts = Split(text, delim1);
result.reserve(split_parts.size());
for (auto& part : split_parts) {
result.emplace_back(Split(part, delim2));
}
return result;
}
Result<int32_t> StringUtils::StringToDate(const std::string& str) {
auto int_value = StringToValue<int32_t>(str);
if (int_value) {
return int_value.value();
}
std::tm timeinfo = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, nullptr};
std::istringstream ss(str);
ss >> std::get_time(&timeinfo, "%Y-%m-%d");
if (ss.fail()) {
return Status::Invalid(fmt::format("failed to convert string '{}' to date", str));
}
int32_t orig_mon = timeinfo.tm_mon;
int32_t orig_mday = timeinfo.tm_mday;
std::time_t time = timegm(&timeinfo);
if (time == -1 || timeinfo.tm_mon != orig_mon || timeinfo.tm_mday != orig_mday) {
return Status::Invalid(fmt::format("failed to convert string '{}' to date", str));
}
static const int64_t SECONDS_PER_DAY = 86400l; // = 24 * 60 * 60
return time / SECONDS_PER_DAY;
}
/// Parses a timestamp string into unix milliseconds.
/// Supported formats: "yyyy-MM-dd", "yyyy-MM-dd HH:mm:ss", "yyyy-MM-dd HH:mm:ss.SSS".
/// Uses the default local time zone, consistent with Java Paimon behavior.
Result<int64_t> StringUtils::StringToTimestampMillis(const std::string& str) {
std::tm timeinfo{};
timeinfo.tm_isdst = -1;
// Try "yyyy-MM-dd HH:mm:ss" first (also matches "yyyy-MM-dd HH:mm:ss.SSS")
std::istringstream ss(str);
ss >> std::get_time(&timeinfo, "%Y-%m-%d %H:%M:%S");
int32_t millis_part = 0;
if (!ss.fail()) {
// Check for optional fractional seconds ".SSS"
if (ss.peek() == '.') {
ss.get();
std::string frac;
while (frac.size() < 3 && ss.peek() != std::char_traits<char>::eof() &&
std::isdigit(static_cast<unsigned char>(ss.peek()))) {
frac += static_cast<char>(ss.get());
}
if (frac.empty()) {
return Status::Invalid(
fmt::format("failed to convert string '{}' to timestamp, "
"expected digits after '.'",
str));
}
// Pad to 3 digits: "1" -> 100, "12" -> 120, "123" -> 123
while (frac.size() < 3) {
frac += '0';
}
auto parsed = StringToValue<int32_t>(frac);
if (parsed) {
millis_part = parsed.value();
}
}
} else {
// Fall back to "yyyy-MM-dd" (date only, time defaults to 00:00:00)
ss.clear();
ss.str(str);
timeinfo = std::tm{};
timeinfo.tm_isdst = -1;
ss >> std::get_time(&timeinfo, "%Y-%m-%d");
if (ss.fail()) {
return Status::Invalid(
fmt::format("failed to convert string '{}' to timestamp, "
"supported formats: yyyy-MM-dd, yyyy-MM-dd HH:mm:ss, "
"yyyy-MM-dd HH:mm:ss.SSS",
str));
}
}
if (ss.peek() != std::char_traits<char>::eof()) {
return Status::Invalid(
fmt::format("failed to convert string '{}' to timestamp, "
"unexpected trailing characters",
str));
}
int32_t orig_mon = timeinfo.tm_mon;
int32_t orig_mday = timeinfo.tm_mday;
std::time_t time = mktime(&timeinfo);
if (time == -1 || timeinfo.tm_mon != orig_mon || timeinfo.tm_mday != orig_mday) {
return Status::Invalid(fmt::format("failed to convert string '{}' to timestamp", str));
}
return static_cast<int64_t>(time) * 1000 + millis_part;
}
} // namespace paimon