blob: 263cb675c2ae1271e74ca35c4f33b246241e8183 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* License); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#include "common/device_id.h"
#include <algorithm>
#include <cctype>
#include <numeric>
#include <stdexcept>
#include <unordered_set>
#include "constant/tsfile_constant.h"
#ifdef ENABLE_ANTLR4
#include "parser/path_nodes_generator.h"
#endif
#include "utils/errno_define.h"
namespace storage {
// IDeviceID implementation
IDeviceID::IDeviceID() : empty_segments_() {}
int IDeviceID::serialize(common::ByteStream& write_stream) { return 0; }
int IDeviceID::deserialize(common::ByteStream& read_stream) { return 0; }
std::string IDeviceID::get_table_name() { return ""; }
int IDeviceID::segment_num() { return 0; }
const std::vector<std::string*>& IDeviceID::get_segments() const {
return empty_segments_;
}
std::string IDeviceID::get_device_name() const { return ""; }
bool IDeviceID::operator<(const IDeviceID& other) { return false; }
bool IDeviceID::operator==(const IDeviceID& other) { return false; }
bool IDeviceID::operator!=(const IDeviceID& other) { return false; }
// IDeviceIDComparator implementation
bool IDeviceIDComparator::operator()(
const std::shared_ptr<IDeviceID>& lhs,
const std::shared_ptr<IDeviceID>& rhs) const {
return *lhs < *rhs;
}
// StringArrayDeviceID implementation
StringArrayDeviceID::StringArrayDeviceID(
const std::vector<std::string>& segments)
: segments_(formalize(segments)) {}
StringArrayDeviceID::StringArrayDeviceID(const std::string& device_id_string) {
auto segments = split_device_id_string(device_id_string);
segments_.reserve(segments.size());
for (const auto& segment : segments) {
segments_.push_back(new std::string(segment));
}
}
StringArrayDeviceID::StringArrayDeviceID(
const std::vector<std::string*>& segments) {
segments_.reserve(segments.size());
for (const auto& segment : segments) {
segments_.push_back(segment == nullptr ? nullptr
: new std::string(*segment));
}
}
StringArrayDeviceID::StringArrayDeviceID() : segments_() {}
StringArrayDeviceID::~StringArrayDeviceID() {
for (const auto& segment : segments_) {
delete segment;
}
for (const auto& prefix_segments : prefix_segments_) {
delete prefix_segments;
}
}
std::string StringArrayDeviceID::get_device_name() const {
if (segments_.empty()) {
return "";
}
// Builds device name by concatenating segments with '.' delimiter,
// handling null segments by replacing them with "null"
return std::accumulate(std::next(segments_.begin()), segments_.end(),
segments_.front() ? *segments_.front() : "null",
[](std::string acc, const std::string* segment) {
return std::move(acc) + "." +
(segment ? *segment : "null");
});
}
void StringArrayDeviceID::init_prefix_segments() {
#ifdef ENABLE_ANTLR4
auto splits = storage::PathNodesGenerator::invokeParser(*segments_[0]);
#else
auto splits = split_string(*segments_[0], '.');
#endif
for (int i = 0; i < splits.size(); ++i) {
prefix_segments_.push_back(new std::string(splits[i]));
}
}
int StringArrayDeviceID::serialize(common::ByteStream& write_stream) {
int ret = common::E_OK;
if (RET_FAIL(common::SerializationUtil::write_var_uint(segment_num(),
write_stream))) {
return ret;
}
for (const auto& segment : segments_) {
if (RET_FAIL(common::SerializationUtil::write_var_char_ptr(
segment, write_stream))) {
return ret;
}
}
return ret;
}
int StringArrayDeviceID::deserialize(common::ByteStream& read_stream) {
int ret = common::E_OK;
uint32_t num_segments;
if (RET_FAIL(common::SerializationUtil::read_var_uint(num_segments,
read_stream))) {
return ret;
}
for (auto& segment : segments_) {
if (segment != nullptr) {
delete segment;
}
}
segments_.clear();
for (uint32_t i = 0; i < num_segments; ++i) {
std::string* segment;
if (RET_FAIL(common::SerializationUtil::read_var_char_ptr(
segment, read_stream))) {
delete segment;
return ret;
}
segments_.push_back(segment);
}
return ret;
}
std::string StringArrayDeviceID::get_table_name() {
return segments_.empty() ? "" : *segments_[0];
}
int StringArrayDeviceID::segment_num() {
return static_cast<int>(segments_.size());
}
const std::vector<std::string*>& StringArrayDeviceID::get_segments() const {
return segments_;
}
bool StringArrayDeviceID::operator<(const IDeviceID& other) {
auto other_segments = other.get_segments();
return std::lexicographical_compare(
segments_.begin(), segments_.end(), other_segments.begin(),
other_segments.end(), [](const std::string* a, const std::string* b) {
if (a == nullptr && b == nullptr) return false; // equal
if (a == nullptr) return true; // nullptr < any string
if (b == nullptr) return false; // any string > nullptr
return *a < *b;
});
}
bool StringArrayDeviceID::operator==(const IDeviceID& other) {
auto other_segments = other.get_segments();
return (segments_.size() == other_segments.size()) &&
std::equal(segments_.begin(), segments_.end(),
other_segments.begin(),
[](const std::string* a, const std::string* b) {
if (a == nullptr && b == nullptr) return true;
if (a == nullptr || b == nullptr) return false;
return *a == *b;
});
}
bool StringArrayDeviceID::operator!=(const IDeviceID& other) {
return !(*this == other);
}
std::vector<std::string*> StringArrayDeviceID::formalize(
const std::vector<std::string>& segments) {
std::vector<std::string*> result;
result.reserve(segments.size());
for (const auto& segment : segments) {
result.emplace_back(new std::string(segment));
}
return result;
}
std::vector<std::string> StringArrayDeviceID::split_device_id_string(
const std::string& device_id_string) {
#ifdef ENABLE_ANTLR4
auto splits = storage::PathNodesGenerator::invokeParser(device_id_string);
return split_device_id_string(splits);
#else
return split_string(device_id_string, '.');
#endif
}
std::vector<std::string> StringArrayDeviceID::split_device_id_string(
const std::vector<std::string>& splits) {
size_t segment_cnt = splits.size();
std::vector<std::string> final_segments;
if (segment_cnt == 0) {
return final_segments;
}
if (segment_cnt == 1) {
// "root" -> {"root"}
final_segments.push_back(splits[0]);
} else if (segment_cnt <
static_cast<size_t>(storage::DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME +
1)) {
// "root.a" -> {"root", "a"}
// "root.a.b" -> {"root.a", "b"}
std::string table_name = std::accumulate(
splits.begin(), splits.end() - 1, std::string(),
[](const std::string& a, const std::string& b) {
return a.empty() ? b : a + storage::PATH_SEPARATOR + b;
});
final_segments.push_back(table_name);
final_segments.push_back(splits.back());
} else {
// "root.a.b.c" -> {"root.a.b", "c"}
// "root.a.b.c.d" -> {"root.a.b", "c", "d"}
std::string table_name = std::accumulate(
splits.begin(),
splits.begin() + storage::DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME,
std::string(), [](const std::string& a, const std::string& b) {
return a.empty() ? b : a + storage::PATH_SEPARATOR + b;
});
final_segments.emplace_back(std::move(table_name));
final_segments.insert(
final_segments.end(),
splits.begin() + storage::DEFAULT_SEGMENT_NUM_FOR_TABLE_NAME,
splits.end());
}
return final_segments;
}
std::vector<std::string> IDeviceID::split_string(const std::string& str,
char delimiter) {
std::vector<std::string> tokens;
// Reject newlines in path explicitly (illegal path name).
if (str.find('\n') != std::string::npos ||
str.find('\r') != std::string::npos) {
throw std::runtime_error("Path contains newline");
}
std::string token;
bool in_back_quotes = false; // Inside `quoted` section
bool in_double_quotes = false; // Inside "quoted" section
bool in_single_quotes = false; // Inside 'quoted' section
for (size_t i = 0; i < str.length(); ++i) {
char c = str[i];
// Toggle quote state when encountering a quote character outside other
// quote types.
if (c == '`' && !in_double_quotes && !in_single_quotes) {
in_back_quotes = !in_back_quotes;
token += c; // preserve the backtick character
} else if (c == '"' && !in_back_quotes && !in_single_quotes) {
in_double_quotes = !in_double_quotes;
token += c; // preserve
} else if (c == '\'' && !in_back_quotes && !in_double_quotes) {
in_single_quotes = !in_single_quotes;
token += c; // preserve
} else if (c == delimiter && !in_back_quotes && !in_double_quotes &&
!in_single_quotes) {
// delimiter outside quotes -> split
if (!token.empty()) {
validate_identifier(token);
tokens.push_back(unquote_identifier(token));
token.clear();
} else {
tokens.push_back(token);
token.clear();
}
} else {
// preserve all characters verbatim (including backslashes and
// doubled backticks)
token += c;
}
}
// Unmatched quotes are errors
if (in_back_quotes || in_double_quotes || in_single_quotes) {
throw std::runtime_error("Unmatched quotes in path");
}
// Add the last token if non-empty (mirror original behaviour)
if (!token.empty()) {
validate_identifier(token);
tokens.push_back(unquote_identifier(token));
}
return tokens;
}
std::string IDeviceID::unquote_identifier(const std::string& identifier) {
if (identifier.length() >= 2) {
char first = identifier.front();
char last = identifier.back();
if ((first == '`' && last == '`') || (first == '"' && last == '"') ||
(first == '\'' && last == '\'')) {
std::string inner = identifier.substr(1, identifier.length() - 2);
// Lowercase copy for case-insensitive comparison
std::string lower_inner = inner;
std::transform(lower_inner.begin(), lower_inner.end(),
lower_inner.begin(),
[](unsigned char ch) { return std::tolower(ch); });
static const std::unordered_set<std::string> keywords = {
"select", "device", "drop_trigger", "and",
"or", "not", "null", "contains"};
// If the identifier is enclosed in backticks AND inner is a
// keyword, unquote it (testcase: `select` -> select)
if (first == '`' && keywords.find(lower_inner) != keywords.end()) {
return inner;
}
// Otherwise: keep original quoting
}
}
return identifier;
}
void IDeviceID::validate_identifier(const std::string& identifier) {
if (identifier.empty()) return;
bool quoted = (identifier.size() >= 2 &&
((identifier.front() == '`' && identifier.back() == '`') ||
(identifier.front() == '"' && identifier.back() == '"') ||
(identifier.front() == '\'' && identifier.back() == '\'')));
if (quoted) return; // quoted identifiers are always accepted
// 1. Pure digits - unquoted numeric literals not allowed as identifiers
bool all_digits = true;
for (char c : identifier) {
if (!std::isdigit((unsigned char)c)) {
all_digits = false;
break;
}
}
if (all_digits) {
throw std::runtime_error("Unquoted pure digits are illegal");
}
// 2. Numeric-like illegal patterns (e.g., 0e38, 00.12 not allowed unquoted)
if (!identifier.empty() && std::isdigit((unsigned char)identifier[0])) {
throw std::runtime_error("Identifier cannot start with a digit");
}
// 3. Illegal wildcards in unquoted identifiers
if (identifier.find('%') != std::string::npos) {
throw std::runtime_error("Illegal wildcard in unquoted identifier");
}
// 4. Asterisk (*) validation: if present, must consist only of asterisks
if (identifier.find('*') != std::string::npos) {
// Check if all characters are asterisks
bool all_asterisks = true;
for (char c : identifier) {
if (c != '*') {
all_asterisks = false;
break;
}
}
if (!all_asterisks) {
throw std::runtime_error(
"Asterisk wildcard must be the only character type");
}
}
}
} // namespace storage