blob: 53476e5cbb4fe335fa8c20b6f2ab99c452756684 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef DBCOMMON_SRC_DBCOMMON_UTILS_STRING_UTIL_H_
#define DBCOMMON_SRC_DBCOMMON_UTILS_STRING_UTIL_H_
#include <iomanip>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
namespace dbcommon {
class StringUtil {
public:
StringUtil() {}
~StringUtil() {}
static bool iequals(const std::string &str1, const std::string &str2);
static void replace(std::string *subject, const std::string &search,
const std::string &replace);
static std::string regexReplace(std::string *subject,
const std::string &pattern,
const std::string &replace);
static void toLower(std::string *str);
static std::string lower(const std::string &str);
static std::string &trim(std::string &s); // NOLINT
static std::string &trimNewLine(std::string &s); // NOLINT
static std::vector<std::string> split(const std::string &s, char delimiter);
static bool StartWith(const std::string &str, const std::string &strStart);
template <typename T>
static std::string toStringWithPrecision(const T value, const int n) {
std::ostringstream out;
out << std::setiosflags(std::ios::fixed) << std::setprecision(n) << value;
return out.str();
}
static int countReplicates(const std::string &s, const std::string &sub) {
int res = 0;
std::size_t pos = 0;
while ((pos = s.find(sub, pos)) != std::string::npos) {
res++;
pos += sub.size();
}
return res;
}
static std::string toOct(const char *srcBin, uint64_t srcLen) {
std::string output;
for (auto i = 0; i < srcLen; i++) {
unsigned char byte = srcBin[i];
if (byte == '\\') {
output.append("\\\\");
} else if (byte < 0x20 || byte > 0x7e) {
output.append(1, '\\');
output.append(1, '0' + byte / 64);
output.append(1, '0' + byte / 8 % 8);
output.append(1, '0' + byte % 8);
} else {
output.append(1, byte);
}
}
return output;
}
static std::string toOct(const std::string &srcStr) {
return toOct(srcStr.data(), srcStr.size());
}
static bool parseIpPortString(const std::string &srcStr, std::string *ip,
uint16_t *port) {
size_t p = srcStr.find_last_of(':');
if (p == std::string::npos) {
return false;
}
try {
*ip = srcStr.substr(0, p);
*port = std::stoul(srcStr.substr(p + 1));
} catch (...) {
return false;
}
return true;
}
static bool isAsciiEncoding(const char *str, uint64_t len) {
while (len != 0) {
if (*reinterpret_cast<const uint8_t *>(str) > 0x80) return false;
str++;
len--;
}
return true;
}
// Check SQL LIKE matching.
// @param t
// @param tlen length of bytes stream
// @param p
// @param plen length of bytes stream
static bool MatchUtf8Pattern(const char *t, int tlen, const char *p,
int plen);
static bool MatchAsciiPattern(const char *t, int tlen, const char *p,
int plen);
};
inline int utf8_mblen(const char *src) {
int len;
auto s = reinterpret_cast<const unsigned char *>(src);
if ((*s & 0x80) == 0)
len = 1;
else if ((*s & 0xe0) == 0xc0)
len = 2;
else if ((*s & 0xf0) == 0xe0)
len = 3;
else if ((*s & 0xf8) == 0xf0)
len = 4;
#ifdef NOT_USED
else if ((*s & 0xfc) == 0xf8)
len = 5;
else if ((*s & 0xfe) == 0xfc)
len = 6;
#endif
else
len = 1;
return len;
}
inline std::size_t strlen_utf8(const char *str, std::size_t len) {
std::size_t result = 0;
const char *ptr = str;
const char *end = ptr + len;
while (ptr < end) {
int next = utf8_mblen(ptr);
/*
if (next == -1) {
throw std::runtime_error("strlen_mb(): conversion error");
}
*/
ptr += next;
++result;
}
return result;
}
inline uint64_t bpCharTrueLen(const char *val, uint64_t len) {
while (len != 0 && val[len - 1] == ' ') len--;
return len;
}
inline std::string newBlankPaddedChar(const char *val, uint64_t actualLen,
uint64_t expectedNChar) {
while (actualLen != 0 && val[actualLen - 1] == ' ') --actualLen;
uint64_t nChar = strlen_utf8(val, actualLen);
if (nChar >= expectedNChar) {
return std::move(std::string(val, actualLen));
} else {
std::string ret(val, actualLen);
ret.append(expectedNChar - nChar, ' ');
actualLen += expectedNChar - nChar;
return std::move(ret);
}
}
// todo(chiyang): the following code comes from postgres, there could be more
// optimization
inline static int wchareq(const char *p1, const char *p2) {
int p1_len;
/* Optimization: quickly compare the first byte. */
if (*p1 != *p2) return 0;
p1_len = utf8_mblen(p1);
if (utf8_mblen(p2) != p1_len) return 0;
/* They are the same length */
while (p1_len--) {
if (*p1++ != *p2++) return 0;
}
return 1;
}
#define CHAREQ(p1, p2) wchareq(p1, p2)
#define LIKE_TRUE true
#define LIKE_FALSE false
#define LIKE_ABORT false
inline void NextChar(const char *&p, int &plen) { // NOLINT
int __l = utf8_mblen(p);
(p) += __l;
(plen) -= __l;
}
inline static bool MatchText(const char *t, int tlen, const char *p, int plen) {
/* Fast path for match-everything pattern */
if ((plen == 1) && (*p == '%')) return LIKE_TRUE;
while ((tlen > 0) && (plen > 0)) {
if (*p == '\\') {
/* Next pattern char must match literally, whatever it is */
NextChar(p, plen);
if ((plen <= 0) || !CHAREQ(t, p)) return LIKE_FALSE;
} else if (*p == '%') {
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while ((plen > 0) && (*p == '%')) NextChar(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0) return LIKE_TRUE;
/*
* Otherwise, scan for a text position at which we can match the
* rest of the pattern.
*/
while (tlen > 0) {
/*
* Optimization to prevent most recursion: don't recurse
* unless first pattern char might match this text char.
*/
if (CHAREQ(t, p) || (*p == '\\') || (*p == '_')) {
int matched = MatchText(t, tlen, p, plen);
if (matched != LIKE_FALSE) return matched; /* TRUE or ABORT */
}
NextChar(t, tlen);
}
/*
* End of text with no match, so no point in trying later places
* to start matching this pattern.
*/
return LIKE_ABORT;
} else if ((*p != '_') && !CHAREQ(t, p)) {
/*
* Not the single-character wildcard and no explicit match? Then
* time to quit...
*/
return LIKE_FALSE;
}
NextChar(t, tlen);
NextChar(p, plen);
}
if (tlen > 0) return LIKE_FALSE; /* end of pattern, but not of text */
/* End of input string. Do we have matching pattern remaining? */
while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of //NOLINT
* pattern */
NextChar(p, plen);
if (plen <= 0) return LIKE_TRUE;
/*
* End of text with no match, so no point in trying later places to start
* matching this pattern.
*/
return LIKE_ABORT;
} /* MatchText() */
} // namespace dbcommon
#endif // DBCOMMON_SRC_DBCOMMON_UTILS_STRING_UTIL_H_