blob: 51782ab0b2de5dc7f901ae9940826982a861ba70 [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <unicode/utf8.h>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/split.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/regex.hpp>
#include <unordered_map>
#include <utility>
#include "common/exception.h"
namespace doris::segment_v2::inverted_index {
class Settings {
public:
Settings() = default;
Settings(std::unordered_map<std::string, std::string> args) : _args(std::move(args)) {}
Settings(const Settings&) = default;
Settings(Settings&&) = default;
~Settings() = default;
void set(const std::string& key, const std::string& value) {
_args.insert_or_assign(key, value);
}
bool empty() const { return _args.empty(); }
bool get_bool(const std::string& key, bool default_value) const {
auto it = _args.find(key);
if (it != _args.end()) {
std::string value = it->second;
std::transform(value.begin(), value.end(), value.begin(),
[](unsigned char c) { return std::tolower(c); });
if (value == "true" || value == "1") {
return true;
} else if (value == "false" || value == "0") {
return false;
}
}
return default_value;
}
int32_t get_int(const std::string& key, int32_t default_value) const {
auto it = _args.find(key);
if (it != _args.end()) {
try {
size_t pos;
int32_t num = std::stoi(it->second, &pos);
if (pos == it->second.size()) {
return num;
}
} catch (...) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"stoi failed (invalid argument or out of range): " + it->second);
}
}
return default_value;
}
std::string get_string(const std::string& key, const std::string& default_value = "") const {
auto it = _args.find(key);
if (it != _args.end()) {
return it->second;
}
return default_value;
}
std::vector<std::string> get_entry_list(const std::string& key) const {
static const boost::regex sep(R"((?<=\])\s*,\s*(?=\[))");
std::vector<std::string> lists;
auto it = _args.find(key);
if (it != _args.end()) {
std::string trimmed_input = boost::algorithm::trim_copy(it->second);
if (trimmed_input.empty()) {
return lists;
}
auto validate_single = [&](const std::string& item, const std::string& prefix) {
if (item.size() < 2 || item.front() != '[' || item.back() != ']') {
throw Exception(ErrorCode::INVALID_ARGUMENT,
prefix + key + " must be enclosed in []");
}
int depth = 0;
for (size_t i = 0; i + 1 < item.size(); ++i) {
char c = item[i];
if (c == '[') {
++depth;
} else if (c == ']') {
--depth;
if (depth == 0) {
throw Exception(ErrorCode::INVALID_ARGUMENT,
prefix + key + " must be enclosed in []");
}
}
}
};
if (boost::regex_search(trimmed_input, sep)) {
boost::sregex_token_iterator regex_it(trimmed_input.begin(), trimmed_input.end(),
sep, -1);
boost::sregex_token_iterator end;
for (; regex_it != end; ++regex_it) {
std::string item = boost::algorithm::trim_copy(regex_it->str());
validate_single(item, "Each item in ");
std::string content = item.substr(1, item.size() - 2);
if (!content.empty()) {
lists.emplace_back(content);
}
}
} else {
if (trimmed_input.size() < 2 || trimmed_input.front() != '[' ||
trimmed_input.back() != ']') {
throw Exception(ErrorCode::INVALID_ARGUMENT,
"Item in " + key + " must be enclosed in []");
}
std::string content = trimmed_input.substr(1, trimmed_input.size() - 2);
if (!content.empty()) {
lists.emplace_back(content);
}
}
}
return lists;
}
std::unordered_set<std::string> get_word_set(const std::string& key) const {
std::unordered_set<std::string> sets;
auto it = _args.find(key);
if (it != _args.end()) {
std::vector<std::string> lists;
boost::split(lists, it->second, boost::is_any_of(","));
for (auto& str : lists) {
boost::trim(str);
if (!str.empty()) {
sets.insert(str);
}
}
}
return sets;
}
std::string to_string() {
std::string result;
for (const auto& [key, value] : _args) {
if (!result.empty()) {
result += ", ";
}
result += key + "=" + value;
}
return result;
}
private:
std::unordered_map<std::string, std::string> _args;
};
} // namespace doris::segment_v2::inverted_index