blob: 3e0a92213922a7b02cb91f11f00b276c5236d6d2 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include <algorithm>
#include "gutil/strings/substitute.h"
#include "util/bit-util.h"
#include "util/string-util.h"
#include "common/names.h"
namespace impala {
Status TruncateDown(const string& str, int32_t max_length, string* result) {
DCHECK(result != nullptr);
*result = str.substr(0, std::min(static_cast<int32_t>(str.length()), max_length));
return Status::OK();
}
Status TruncateUp(const string& str, int32_t max_length, string* result) {
DCHECK(result != nullptr);
if (str.length() <= max_length) {
*result = str;
return Status::OK();
}
*result = str.substr(0, max_length);
int i = max_length - 1;
while (i > 0 && static_cast<int32_t>((*result)[i]) == -1) {
(*result)[i] += 1;
--i;
}
// We convert it to unsigned because signed overflow results in undefined behavior.
unsigned char uch = static_cast<unsigned char>((*result)[i]);
uch += 1;
(*result)[i] = uch;
if (i == 0 && (*result)[i] == 0) {
return Status("TruncateUp() couldn't increase string.");
}
result->resize(i + 1);
return Status::OK();
}
bool CommaSeparatedContains(const std::string& cs_list, const std::string& item) {
size_t pos = 0;
while (pos < cs_list.size()) {
size_t comma_pos = cs_list.find(',', pos);
if (comma_pos == string::npos) return cs_list.compare(pos, string::npos, item) == 0;
if (cs_list.compare(pos, comma_pos - pos, item) == 0) return true;
pos = comma_pos + 1;
}
return false;
}
bool EndsWith(const std::string& full_string, const std::string& end) {
if (full_string.size() >= end.size()) {
return (full_string.compare(full_string.size() - end.size(), end.size(),
end) == 0);
}
return false;
}
const uint8_t* FindEndOfIdentifier(const uint8_t* start, const uint8_t* end) {
if (start == end) return nullptr;
uint8_t ch = *start++;
if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') || ch == '_')) {
return nullptr;
}
while (start != end) {
ch = *start;
if (!((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') || ch == '_')) {
return start;
}
++start;
}
return end;
}
int FindUtf8PosForward(const uint8_t* ptr, const int len, int index) {
DCHECK_GE(index, 0);
int pos = 0;
while (index > 0 && pos < len) {
// Counting malformed UTF8 characters.
while (!BitUtil::IsUtf8StartByte(ptr[pos]) && index > 0 && pos < len) {
++pos;
--index;
}
if (index == 0 || pos == len) break;
pos += BitUtil::NumBytesInUtf8Encoding(ptr[pos]);
--index;
}
if (pos >= len) return len;
return pos;
}
int FindUtf8PosBackward(const uint8_t* ptr, const int len, int index) {
DCHECK_GE(index, 0);
int pos = len - 1;
int last_pos = len;
while (pos >= 0) {
// Point to the start byte of the last character.
while (pos >= 0 && !BitUtil::IsUtf8StartByte(ptr[pos])) --pos;
if (pos < 0) {
// Can't find any legal characters. Count each byte from last_pos as one character.
// Note that index is 0-based.
if (index < last_pos) return last_pos - index - 1;
return -1;
}
// Get bytes length of the located character.
int bytes_len = BitUtil::NumBytesInUtf8Encoding(ptr[pos]);
// If there are not enough bytes after the first byte, i.e. last_pos-pos < bytes_len,
// we consider the bytes belong to a malformed character, and count them as one
// character.
int malformed_bytes = max(last_pos - pos - bytes_len, 0);
if (index < malformed_bytes) {
// Count each malformed bytes as one character.
return last_pos - index - 1;
}
// We found a legal character and 'malformed_bytes' malformed characters.
// At this point, index >= malformed_bytes. So the lowest value of the updated index
// is -1, which means 'pos' points at what we want.
index -= malformed_bytes + 1;
if (index < 0) return pos;
last_pos = pos;
--pos;
}
DCHECK_EQ(pos, -1);
return -1;
}
void StringStreamPop::move_back() {
if (tellp() > 0) {
seekp(-1, std::ios_base::cur);
}
}
}