blob: 5ead1562bcb9751e962eda0f8b62c9d22e48783e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <limits>
#include <charconv>
#include "utils/Environment.h"
#include "utils/GeneralUtils.h"
#include "utils/StringUtils.h"
namespace org::apache::nifi::minifi::utils::string {
std::optional<bool> toBool(const std::string& input) {
std::string trimmed = trim(input);
if (equalsIgnoreCase(trimmed, "true")) {
return true;
}
if (equalsIgnoreCase(trimmed, "false")) {
return false;
}
return std::nullopt;
}
std::pair<std::string, std::string> chomp(const std::string& input_line) {
if (endsWith(input_line, "\r\n")) {
return std::make_pair(input_line.substr(0, input_line.size() - 2), "\r\n");
} else if (endsWith(input_line, "\n")) {
return std::make_pair(input_line.substr(0, input_line.size() - 1), "\n");
} else {
return std::make_pair(input_line, "");
}
}
std::string trim(const std::string& s) {
return trimRight(trimLeft(s));
}
std::string_view trim(std::string_view sv) {
auto begin = std::find_if(sv.begin(), sv.end(), [](unsigned char c) -> bool { return !isspace(c); });
auto end = std::find_if(sv.rbegin(), std::reverse_iterator(begin), [](unsigned char c) -> bool { return !isspace(c); }).base();
// c++20 iterator constructor
// return std::string_view(begin, end);
// but for now
// on windows std::string_view::const_iterator is not a const char*
return sv.substr(std::distance(sv.begin(), begin), std::distance(begin, end));
}
std::string_view trim(const char* str) {
return trim(std::string_view(str));
}
template<typename Fun>
std::vector<std::string> split_transformed(std::string_view str_view, std::string_view delimiter, Fun transformation) {
std::string str{str_view};
std::vector<std::string> result;
if (delimiter.empty()) {
for (auto c : str) {
result.push_back(transformation(std::string(1, c)));
}
return result;
}
size_t pos = str.find(delimiter);
if (pos == std::string::npos) {
result.push_back(transformation(str));
return result;
}
while (pos != std::string::npos) {
result.push_back(transformation(str.substr(0, pos)));
str = str.substr(pos + delimiter.size());
pos = str.find(delimiter);
}
result.push_back(transformation(str));
return result;
}
std::vector<std::string> split(std::string_view str, std::string_view delimiter) {
return split_transformed(str, delimiter, identity{});
}
std::vector<std::string> splitRemovingEmpty(std::string_view str, std::string_view delimiter) {
auto result = split(str, delimiter);
result.erase(std::remove_if(result.begin(), result.end(), [](const std::string& str) { return str.empty(); }), result.end());
return result;
}
std::vector<std::string> splitAndTrim(std::string_view str, std::string_view delimiter) {
return split_transformed(str, delimiter, static_cast<std::string(*)(const std::string&)>(trim));
}
std::vector<std::string> splitAndTrimRemovingEmpty(std::string_view str, std::string_view delimiter) {
auto result = splitAndTrim(str, delimiter);
result.erase(std::remove_if(result.begin(), result.end(), [](const std::string& str) { return str.empty(); }), result.end());
return result;
}
bool StringToFloat(const std::string& input, float &output, FailurePolicy cp /*= RETURN*/) {
try {
output = std::stof(input);
} catch (const std::invalid_argument &ie) {
switch (cp) {
case RETURN:
case NOTHING:
return false;
case EXIT:
exit(1);
case EXCEPT:
throw ie;
}
} catch (const std::out_of_range &ofr) {
switch (cp) {
case RETURN:
case NOTHING:
return false;
case EXIT:
exit(1);
case EXCEPT:
throw ofr;
}
}
return true;
}
std::string replaceEnvironmentVariables(std::string source_string) {
std::string::size_type beg_seq = 0;
std::string::size_type end_seq = 0;
do {
beg_seq = source_string.find("${", beg_seq);
if (beg_seq == std::string::npos) {
break;
}
if (beg_seq > 0 && source_string.at(beg_seq - 1) == '\\') {
beg_seq += 2;
continue;
}
end_seq = source_string.find('}', beg_seq + 2);
if (end_seq == std::string::npos) {
break;
}
if (end_seq <= beg_seq + 2) {
beg_seq += 2;
continue;
}
auto env_var_length = end_seq - (beg_seq + 2);
const std::string env_var = source_string.substr(beg_seq + 2, env_var_length);
const std::string env_var_wrapped = source_string.substr(beg_seq, env_var_length + 3);
auto env_value = utils::Environment::getEnvironmentVariable(env_var.c_str()).value_or("");
source_string = replaceAll(source_string, env_var_wrapped, env_value);
beg_seq = 0; // restart
} while (beg_seq < source_string.size());
source_string = replaceAll(source_string, "\\$", "$");
return source_string;
}
std::string replaceOne(const std::string &input, const std::string &from, const std::string &to) {
std::size_t found_at_position = input.find(from);
if (found_at_position != std::string::npos) {
std::string input_copy = input;
return input_copy.replace(found_at_position, from.size(), to);
} else {
return input;
}
}
std::string& replaceAll(std::string& source_string, const std::string &from_string, const std::string &to_string) {
std::size_t loc = 0;
std::size_t lastFound = 0;
while ((lastFound = source_string.find(from_string, loc)) != std::string::npos) {
source_string.replace(lastFound, from_string.size(), to_string);
loc = lastFound + to_string.size();
if (from_string.empty()) {
loc++;
}
}
return source_string;
}
std::string replaceMap(std::string source_string, const std::map<std::string, std::string> &replace_map) {
auto result_string = source_string;
std::vector<std::pair<size_t, std::pair<size_t, std::string>>> replacements;
for (const auto &replace_pair : replace_map) {
size_t replace_pos = 0;
while ((replace_pos = source_string.find(replace_pair.first, replace_pos)) != std::string::npos) {
replacements.emplace_back(std::make_pair(replace_pos, std::make_pair(replace_pair.first.length(), replace_pair.second)));
replace_pos += replace_pair.first.length();
}
}
std::sort(replacements.begin(), replacements.end(), [](const std::pair<size_t, std::pair<size_t, std::string>> &a,
const std::pair<size_t, std::pair<size_t, std::string>> &b) {
return a.first > b.first;
});
for (const auto &replacement : replacements) {
result_string = source_string.replace(replacement.first, replacement.second.first, replacement.second.second);
}
return result_string;
}
namespace {
char nibble_to_hex(uint8_t nibble, bool uppercase) {
if (nibble < 10) {
return gsl::narrow<char>('0' + nibble);
} else {
return gsl::narrow<char>((uppercase ? 'A' : 'a') + nibble - 10);
}
}
void base64_digits_to_bytes(const std::array<uint8_t, 4> digits, std::byte* const bytes) {
bytes[0] = static_cast<std::byte>(digits[0] << 2 | digits[1] >> 4);
bytes[1] = static_cast<std::byte>((digits[1] & 0x0f) << 4 | digits[2] >> 2);
bytes[2] = static_cast<std::byte>((digits[2] & 0x03) << 6 | digits[3]);
}
constexpr uint8_t SKIP = 0xff;
constexpr uint8_t ILGL = 0xfe;
constexpr uint8_t PDNG = 0xfd;
constexpr std::array<uint8_t, 128> hex_lut =
{SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
0x08, 0x09, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP};
constexpr const char base64_enc_lut[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; // NOLINT(cppcoreguidelines-avoid-c-arrays)
constexpr const char base64_url_enc_lut[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; // NOLINT(cppcoreguidelines-avoid-c-arrays)
constexpr std::array<uint8_t, 128> base64_dec_lut =
{ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL,
ILGL, ILGL, SKIP, ILGL, ILGL, SKIP, ILGL, ILGL,
ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL,
ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL,
ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL, ILGL,
ILGL, ILGL, ILGL, 0x3e, ILGL, 0x3e, ILGL, 0x3f,
0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b,
0x3c, 0x3d, ILGL, ILGL, ILGL, PDNG, ILGL, ILGL,
ILGL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
0x17, 0x18, 0x19, ILGL, ILGL, ILGL, ILGL, 0x3f,
ILGL, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
0x31, 0x32, 0x33, ILGL, ILGL, ILGL, ILGL, ILGL};
} // namespace
bool from_hex(uint8_t ch, uint8_t& output) {
if (ch > 127) {
return false;
}
output = hex_lut[ch];
return output != SKIP;
}
bool from_hex(std::byte* data, size_t* data_length, std::string_view hex) {
if (*data_length < hex.size() / 2) {
return false;
}
uint8_t n1 = 0;
bool found_first_nibble = false;
*data_length = 0;
for (char c : hex) {
const auto byte = static_cast<uint8_t>(c);
if (byte > 127) {
continue;
}
uint8_t n = hex_lut[byte];
if (n != SKIP) {
if (found_first_nibble) {
data[(*data_length)++] = static_cast<std::byte>(n1 << 4 | n);
found_first_nibble = false;
} else {
n1 = n;
found_first_nibble = true;
}
}
}
return !found_first_nibble;
}
std::vector<std::byte> from_hex(std::string_view hex) {
std::vector<std::byte> decoded(hex.size() / 2);
size_t data_length = decoded.size();
if (!from_hex(decoded.data(), &data_length, hex)) {
throw std::invalid_argument("Hexencoded string is malformed");
}
decoded.resize(data_length);
return decoded;
}
size_t to_hex(char* hex, std::span<const std::byte> data_to_be_transformed, bool uppercase) {
if (data_to_be_transformed.size() > std::numeric_limits<size_t>::max() / 2) {
throw std::length_error("Data is too large to be hexencoded");
}
for (size_t i = 0; i < data_to_be_transformed.size(); i++) {
hex[i * 2] = nibble_to_hex(static_cast<uint8_t>(data_to_be_transformed[i]) >> 4, uppercase);
hex[i * 2 + 1] = nibble_to_hex(static_cast<uint8_t>(data_to_be_transformed[i]) & 0xf, uppercase);
}
return data_to_be_transformed.size() * 2;
}
std::string to_hex(std::span<const std::byte> data_to_be_transformed, bool uppercase /*= false*/) {
if (data_to_be_transformed.size() > (std::numeric_limits<size_t>::max() / 2 - 1)) {
throw std::length_error("Data is too large to be hexencoded");
}
std::string result;
result.resize(data_to_be_transformed.size() * 2);
const size_t hex_length = to_hex(result.data(), data_to_be_transformed, uppercase);
gsl_Assert(hex_length == result.size());
return result;
}
bool from_base64(std::byte* const data, size_t* const data_length, const std::string_view base64) {
if (*data_length < (base64.size() / 4 + 1) * 3) {
return false;
}
std::array<uint8_t, 4> digits{};
size_t digit_counter = 0U;
size_t decoded_size = 0U;
size_t padding_counter = 0U;
size_t i = 0;
for (i = 0U; i < base64.size(); i++) {
const auto byte = static_cast<uint8_t>(base64[i]);
if (byte > 127) {
return false;
}
const uint8_t decoded = base64_dec_lut[byte];
switch (decoded) {
case SKIP:
continue;
case ILGL:
return false;
case PDNG:
padding_counter++;
continue;
default:
if (padding_counter > 0U) {
return false;
}
digits[digit_counter++] = decoded;
if (digit_counter == 4U) {
base64_digits_to_bytes(digits, data + decoded_size);
decoded_size += 3U;
digit_counter = 0U;
}
}
}
if (padding_counter > 0U && padding_counter != 4U - digit_counter) {
return false;
}
switch (digit_counter) {
case 0:
break;
case 1:
return false;
case 2:
digits[2] = 0x00;
[[fallthrough]];
case 3: {
digits[3] = 0x00;
std::array<std::byte, 3> bytes_temp{};
base64_digits_to_bytes(digits, bytes_temp.data());
const size_t num_bytes = digit_counter - 1;
memcpy(data + decoded_size, bytes_temp.data(), num_bytes);
decoded_size += num_bytes;
break;
}
default:
return false;
}
*data_length = decoded_size;
return true;
}
std::vector<std::byte> from_base64(const std::string_view base64) {
std::vector<std::byte> decoded((base64.size() / 4 + 1) * 3);
size_t data_length = decoded.size();
if (!from_base64(decoded.data(), &data_length, base64)) {
throw std::invalid_argument("Base64 encoded string is malformed");
}
decoded.resize(data_length);
return decoded;
}
size_t to_base64(char* base64, const std::span<const std::byte> raw_data, bool url, bool padded) {
gsl_Expects(base64);
if (raw_data.size() > std::numeric_limits<size_t>::max() * 3 / 4 - 3) {
throw std::length_error("Data is too large to be base64 encoded");
}
constexpr auto null_byte = static_cast<std::byte>(0x00);
const char* enc_lut = url ? base64_url_enc_lut : base64_enc_lut;
size_t base64_length = 0U;
std::array<std::byte, 3> bytes{};
for (size_t i = 0U; i < raw_data.size(); i += 3U) {
const bool b1_present = i + 1 < raw_data.size();
const bool b2_present = i + 2 < raw_data.size();
bytes[0] = raw_data[i];
bytes[1] = b1_present ? raw_data[i + 1] : null_byte;
bytes[2] = b2_present ? raw_data[i + 2] : null_byte;
base64[base64_length++] = enc_lut[(static_cast<uint8_t>(bytes[0]) & 0xfc) >> 2];
base64[base64_length++] = enc_lut[(static_cast<uint8_t>(bytes[0]) & 0x03) << 4 | (static_cast<uint8_t>(bytes[1]) & 0xf0) >> 4];
if (b1_present) {
base64[base64_length++] = enc_lut[(static_cast<uint8_t>(bytes[1]) & 0x0f) << 2 | (static_cast<uint8_t>(bytes[2]) & 0xc0) >> 6];
} else if (padded) {
base64[base64_length++] = '=';
}
if (b2_present) {
base64[base64_length++] = enc_lut[static_cast<uint8_t>(bytes[2]) & 0x3f];
} else if (padded) {
base64[base64_length++] = '=';
}
}
return base64_length;
}
std::string to_base64(const std::span<const std::byte> raw_data, bool url /*= false*/, bool padded /*= true*/) {
std::string buf;
buf.resize((raw_data.size() / 3 + 1) * 4);
size_t base64_length = to_base64(buf.data(), raw_data, url, padded);
gsl_Assert(base64_length <= buf.size());
buf.resize(base64_length);
return buf;
}
std::string escapeUnprintableBytes(std::span<const std::byte> data) {
constexpr const char* hex_digits = "0123456789abcdef";
std::string result;
for (auto byte : data) {
char ch = static_cast<char>(byte);
if (ch == '\n') {
result += "\\n";
} else if (ch == '\t') {
result += "\\t";
} else if (ch == '\r') {
result += "\\r";
} else if (ch == '\v') {
result += "\\v";
} else if (ch == '\f') {
result += "\\f";
} else if (std::isprint(static_cast<unsigned char>(byte))) {
result += ch;
} else {
result += "\\x";
result += hex_digits[(std::to_integer<int>(byte) >> 4) & 0xf];
result += hex_digits[std::to_integer<int>(byte) & 0xf];
}
}
return result;
}
bool matchesSequence(std::string_view str, const std::vector<std::string>& patterns) {
size_t pos = 0;
for (const auto& pattern : patterns) {
pos = str.find(pattern, pos);
if (pos == std::string_view::npos) {
return false;
}
pos += pattern.size();
}
return true;
}
bool splitToValueAndUnit(std::string_view input, int64_t& value, std::string& unit) {
const char* begin = input.data();
const char* end = begin + input.size();
auto [ptr, ec] = std::from_chars(begin, end, value);
if (ptr == begin || ec != std::errc()) {
return false;
}
while (ptr != end && *ptr == ' ') {
// Skip the spaces
ptr++;
}
unit = std::string(ptr, end);
return true;
}
nonstd::expected<std::optional<char>, std::error_code> parseCharacter(const std::string_view input) {
if (input.empty()) { return std::nullopt; }
if (input.size() == 1) { return input[0]; }
if (input.size() == 2 && input.starts_with('\\')) {
switch (input[1]) {
case '0': return '\0'; // Null
case 'a': return '\a'; // Bell
case 'b': return '\b'; // Backspace
case 't': return '\t'; // Horizontal Tab
case 'n': return '\n'; // Line Feed
case 'v': return '\v'; // Vertical Tab
case 'f': return '\f'; // Form Feed
case 'r': return '\r'; // Carriage Return
case '\\': return '\\';
default: break;
}
}
return nonstd::make_unexpected(core::ParsingErrorCode::GeneralParsingError);
}
std::string replaceEscapedCharacters(std::string_view input) {
std::stringstream result;
for (size_t i = 0; i < input.size(); ++i) {
char input_char = input[i];
if (input_char != '\\' || i == input.size() - 1) {
result << input_char;
continue;
}
char next_char = input[i+1];
switch (next_char) {
case '0':
result << '\0'; // Null
++i;
break;
case 'a':
result << '\a'; // Bell
++i;
break;
case 'b':
result << '\b'; // Backspace
++i;
break;
case 't':
result << '\t'; // Horizontal Tab
++i;
break;
case 'n':
result << '\n'; // Line Feed
++i;
break;
case 'v':
result << '\v'; // Vertical Tab
++i;
break;
case 'f':
result << '\f'; // Form Feed
++i;
break;
case 'r':
result << '\r'; // Carriage Return
++i;
break;
case '\\':
result << '\\';
++i;
break;
default:
result << '\\';
break;
}
}
return result.str();
}
std::string repeat(std::string_view str, size_t count) {
std::string result;
result.reserve(count * str.length());
for (size_t i = 0; i < count; ++i) {
result.append(str);
}
return result;
}
std::string partAfterLastOccurrenceOf(std::string_view input, char delimiter) {
const size_t last_pos = input.find_last_of(delimiter);
if (last_pos == std::string::npos) {
return std::string{input};
}
return std::string{input.substr(last_pos + 1)};
}
} // namespace org::apache::nifi::minifi::utils::string