blob: d922311df1cb31912437f396159e993726f0d8d0 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "arrow/util/string.h"
#include <algorithm>
#include <cctype>
#include <memory>
#include "arrow/status.h"
namespace arrow {
static const char* kAsciiTable = "0123456789ABCDEF";
std::string HexEncode(const uint8_t* data, size_t length) {
std::string hex_string;
hex_string.reserve(length * 2);
for (size_t j = 0; j < length; ++j) {
// Convert to 2 base16 digits
hex_string.push_back(kAsciiTable[data[j] >> 4]);
hex_string.push_back(kAsciiTable[data[j] & 15]);
}
return hex_string;
}
std::string Escape(const char* data, size_t length) {
std::string escaped_string;
escaped_string.reserve(length);
for (size_t j = 0; j < length; ++j) {
switch (data[j]) {
case '"':
escaped_string += R"(\")";
break;
case '\\':
escaped_string += R"(\\)";
break;
case '\t':
escaped_string += R"(\t)";
break;
case '\r':
escaped_string += R"(\r)";
break;
case '\n':
escaped_string += R"(\n)";
break;
default:
escaped_string.push_back(data[j]);
}
}
return escaped_string;
}
std::string HexEncode(const char* data, size_t length) {
return HexEncode(reinterpret_cast<const uint8_t*>(data), length);
}
std::string HexEncode(util::string_view str) { return HexEncode(str.data(), str.size()); }
std::string Escape(util::string_view str) { return Escape(str.data(), str.size()); }
Status ParseHexValue(const char* data, uint8_t* out) {
char c1 = data[0];
char c2 = data[1];
const char* kAsciiTableEnd = kAsciiTable + 16;
const char* pos1 = std::lower_bound(kAsciiTable, kAsciiTableEnd, c1);
const char* pos2 = std::lower_bound(kAsciiTable, kAsciiTableEnd, c2);
// Error checking
if (pos1 == kAsciiTableEnd || pos2 == kAsciiTableEnd || *pos1 != c1 || *pos2 != c2) {
return Status::Invalid("Encountered non-hex digit");
}
*out = static_cast<uint8_t>((pos1 - kAsciiTable) << 4 | (pos2 - kAsciiTable));
return Status::OK();
}
namespace internal {
std::vector<util::string_view> SplitString(util::string_view v, char delimiter) {
std::vector<util::string_view> parts;
size_t start = 0, end;
while (true) {
end = v.find(delimiter, start);
parts.push_back(v.substr(start, end - start));
if (end == std::string::npos) {
break;
}
start = end + 1;
}
return parts;
}
template <typename StringLike>
static std::string JoinStringLikes(const std::vector<StringLike>& strings,
util::string_view delimiter) {
if (strings.size() == 0) {
return "";
}
std::string out = std::string(strings.front());
for (size_t i = 1; i < strings.size(); ++i) {
out.append(delimiter.begin(), delimiter.end());
out.append(strings[i].begin(), strings[i].end());
}
return out;
}
std::string JoinStrings(const std::vector<util::string_view>& strings,
util::string_view delimiter) {
return JoinStringLikes(strings, delimiter);
}
std::string JoinStrings(const std::vector<std::string>& strings,
util::string_view delimiter) {
return JoinStringLikes(strings, delimiter);
}
static constexpr bool IsWhitespace(char c) { return c == ' ' || c == '\t'; }
std::string TrimString(std::string value) {
size_t ltrim_chars = 0;
while (ltrim_chars < value.size() && IsWhitespace(value[ltrim_chars])) {
++ltrim_chars;
}
value.erase(0, ltrim_chars);
size_t rtrim_chars = 0;
while (rtrim_chars < value.size() &&
IsWhitespace(value[value.size() - 1 - rtrim_chars])) {
++rtrim_chars;
}
value.erase(value.size() - rtrim_chars, rtrim_chars);
return value;
}
bool AsciiEqualsCaseInsensitive(util::string_view left, util::string_view right) {
// TODO: ASCII validation
if (left.size() != right.size()) {
return false;
}
for (size_t i = 0; i < left.size(); ++i) {
if (std::tolower(static_cast<unsigned char>(left[i])) !=
std::tolower(static_cast<unsigned char>(right[i]))) {
return false;
}
}
return true;
}
std::string AsciiToLower(util::string_view value) {
// TODO: ASCII validation
std::string result = std::string(value);
std::transform(result.begin(), result.end(), result.begin(),
[](unsigned char c) { return std::tolower(c); });
return result;
}
std::string AsciiToUpper(util::string_view value) {
// TODO: ASCII validation
std::string result = std::string(value);
std::transform(result.begin(), result.end(), result.begin(),
[](unsigned char c) { return std::toupper(c); });
return result;
}
util::optional<std::string> Replace(util::string_view s, util::string_view token,
util::string_view replacement) {
size_t token_start = s.find(token);
if (token_start == std::string::npos) {
return util::nullopt;
}
return s.substr(0, token_start).to_string() + replacement.to_string() +
s.substr(token_start + token.size()).to_string();
}
} // namespace internal
} // namespace arrow