blob: 632feb849d8362e16474834f46fc0659f77facfc [file]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "paimon/core/utils/partition_path_utils.h"
#include <array>
#include <cstdint>
#include <optional>
#include "paimon/status.h"
namespace paimon {
const std::bitset<128>& PartitionPathUtils::CharToEscape() {
constexpr auto char_to_escape = []() {
std::bitset<128> bitset;
for (char c = 0; c < ' '; c++) {
bitset.set(static_cast<unsigned char>(c));
}
std::array<char, 48> clist = {
'\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008',
'\u0009', '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010',
'\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018',
'\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F', '"',
'#', '%', '\'', '*', '/', ':', '=', '?',
'\\', '\u007F', '{', '}', '[', ']', '^'};
for (char c : clist) {
bitset.set(static_cast<unsigned char>(c));
}
return bitset;
};
static std::bitset<128> bitset = char_to_escape();
return bitset;
}
Result<std::string> PartitionPathUtils::GeneratePartitionPath(
const std::vector<std::pair<std::string, std::string>>& partition_spec) {
if (partition_spec.empty()) {
return std::string();
}
std::stringstream ss;
int32_t i = 0;
for (const auto& [key, value] : partition_spec) {
if (i > 0) {
ss << PATH_SEPARATOR;
}
PAIMON_ASSIGN_OR_RAISE(std::string key_esc, EscapePathName(key));
PAIMON_ASSIGN_OR_RAISE(std::string value_esc, EscapePathName(value));
ss << key_esc << "=" << value_esc;
i++;
}
ss << PATH_SEPARATOR;
return ss.str();
}
Result<std::string> PartitionPathUtils::EscapePathName(const std::string& path) {
if (path.empty()) {
return Status::Invalid("path should not be empty");
}
std::optional<std::stringstream> ss;
for (size_t i = 0; i < path.size(); i++) {
char c = path[i];
if (NeedsEscaping(c)) {
if (ss == std::nullopt) {
ss = std::stringstream();
for (size_t j = 0; j < i; j++) {
ss.value() << path[j];
}
}
EscapeChar(c, &ss.value());
} else if (ss != std::nullopt) {
ss.value() << c;
}
}
if (ss == std::nullopt) {
return path;
}
return ss.value().str();
}
void PartitionPathUtils::EscapeChar(char c, std::stringstream* ss_ptr) {
auto& ss = *ss_ptr;
ss << '%';
auto uc = static_cast<unsigned char>(c);
if (uc < 16) {
ss << '0';
}
std::stringstream hex_ss;
hex_ss << std::hex << std::uppercase << static_cast<int32_t>(uc);
ss << hex_ss.str();
}
Result<std::vector<std::string>> PartitionPathUtils::GenerateHierarchicalPartitionPaths(
const std::vector<std::pair<std::string, std::string>>& partition_spec) {
std::vector<std::string> paths;
if (partition_spec.empty()) {
return paths;
}
std::string suffix_buf;
for (const auto& [key, value] : partition_spec) {
PAIMON_ASSIGN_OR_RAISE(std::string escaped_key, EscapePathName(key));
PAIMON_ASSIGN_OR_RAISE(std::string escaped_value, EscapePathName(value));
suffix_buf.append(escaped_key);
suffix_buf.append("=");
suffix_buf.append(escaped_value);
suffix_buf.append(PATH_SEPARATOR);
paths.push_back(suffix_buf);
}
return paths;
}
} // namespace paimon