| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include <cstring> |
| |
| #include "dbcommon/common/vector-transformer.h" |
| #include "dbcommon/common/vector/variable-length-vector.h" |
| #include "dbcommon/function/function.h" |
| #include "dbcommon/function/string-binary-function.h" |
| #include "dbcommon/utils/string-util.h" |
| |
| namespace dbcommon { |
| |
| inline void bpcharTrim(Datum *params, uint64_t size) { |
| for (uint64_t i = 1; i < size; i++) { |
| Object *para = DatumGetValue<Object *>(params[i]); |
| if (dynamic_cast<Scalar *>(para)) { |
| Scalar *temp = params[i]; |
| if (temp->isnull) continue; |
| |
| char *str = temp->value; |
| int32_t lenth = temp->length; |
| while (--lenth >= 0) |
| if (str[lenth] != ' ') break; |
| temp->length = lenth + 1; |
| } |
| } |
| } |
| |
| template <bool expetedMatch> |
| Datum string_like_proto(Datum *params, uint64_t size) { |
| assert(size == 3); |
| |
| Scalar *scalarPattern = DatumGetValue<Scalar *>(params[2]); |
| auto patternSrc = DatumGetValue<char *>(scalarPattern->value); |
| auto patternLen = scalarPattern->length; |
| |
| Object *para = DatumGetValue<Object *>(params[1]); |
| Scalar *scalar = dynamic_cast<Scalar *>(para); |
| |
| bool noUnderScore = true; // intend for optimization |
| for (auto i = 0; i < patternLen; i++) |
| if (patternSrc[i] == '_') noUnderScore = false; |
| |
| if (StringUtil::isAsciiEncoding(patternSrc, patternLen) && noUnderScore) { |
| // ASCII pattern |
| if (scalar) { |
| Scalar *ret = DatumGetValue<Scalar *>(params[0]); |
| if (scalar->isnull) { |
| ret->isnull = true; |
| } else { |
| ret->isnull = false; |
| auto str = DatumGetValue<char *>(scalar->value); |
| bool matched = StringUtil::MatchAsciiPattern(str, scalar->length, |
| patternSrc, patternLen); |
| ret->value = CreateDatum(static_cast<bool>(matched == expetedMatch)); |
| } |
| return CreateDatum(ret); |
| } else { |
| SelectList *ret = DatumGetValue<SelectList *>(params[0]); |
| ret->resize(0); |
| |
| Vector *vec = dynamic_cast<Vector *>(para); |
| assert(vec->getTypeKind() == VARCHARID || vec->getTypeKind() == STRINGID); |
| |
| auto valPtrs = vec->getValPtrs(); |
| auto lens = vec->getLengths(); |
| if (vec->hasNullValue()) { |
| auto nulls = vec->getNullBuffer()->getBools(); |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| if (nulls[idx]) continue; |
| |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], lens[idx], patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| if (nulls[idx]) continue; |
| |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], lens[idx], patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } else { |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], lens[idx], patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], lens[idx], patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } |
| |
| return CreateDatum(ret); |
| } |
| } else { |
| // UTF8 pattern |
| if (scalar) { |
| Scalar *ret = DatumGetValue<Scalar *>(params[0]); |
| if (scalar->isnull) { |
| ret->isnull = true; |
| } else { |
| ret->isnull = false; |
| auto str = DatumGetValue<char *>(scalar->value); |
| bool matched = StringUtil::MatchUtf8Pattern(str, scalar->length, |
| patternSrc, patternLen); |
| ret->value = CreateDatum(static_cast<bool>(matched == expetedMatch)); |
| } |
| return CreateDatum(ret); |
| } else { |
| SelectList *ret = DatumGetValue<SelectList *>(params[0]); |
| ret->resize(0); |
| |
| Vector *vec = dynamic_cast<Vector *>(para); |
| assert(vec->getTypeKind() == VARCHARID || vec->getTypeKind() == STRINGID); |
| |
| auto valPtrs = vec->getValPtrs(); |
| auto lens = vec->getLengths(); |
| if (vec->hasNullValue()) { |
| auto nulls = vec->getNullBuffer()->getBools(); |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| if (nulls[idx]) continue; |
| |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], lens[idx], |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| if (nulls[idx]) continue; |
| |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], lens[idx], |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } else { |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], lens[idx], |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], lens[idx], |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } |
| |
| return CreateDatum(ret); |
| } |
| } |
| } |
| |
| template <bool expetedMatch> |
| Datum bpchar_like_proto(Datum *params, uint64_t size) { |
| assert(size == 3); |
| |
| Scalar *scalarPattern = DatumGetValue<Scalar *>(params[2]); |
| auto patternSrc = DatumGetValue<char *>(scalarPattern->value); |
| auto patternLen = scalarPattern->length; |
| |
| Object *para = DatumGetValue<Object *>(params[1]); |
| Scalar *scalar = dynamic_cast<Scalar *>(para); |
| |
| bool noUnderScore = true; // intend for optimization |
| for (auto i = 0; i < patternLen; i++) |
| if (patternSrc[i] == '_') noUnderScore = false; |
| |
| if (StringUtil::isAsciiEncoding(patternSrc, patternLen) && noUnderScore) { |
| // ASCII pattern |
| if (scalar) { |
| Scalar *ret = DatumGetValue<Scalar *>(params[0]); |
| if (scalar->isnull) { |
| ret->isnull = true; |
| } else { |
| ret->isnull = false; |
| auto str = DatumGetValue<char *>(scalar->value); |
| auto strLen = scalar->length; |
| bool matched = |
| StringUtil::MatchAsciiPattern(str, strLen, patternSrc, patternLen); |
| ret->value = CreateDatum(static_cast<bool>(matched == expetedMatch)); |
| } |
| return CreateDatum(ret); |
| } else { |
| SelectList *ret = DatumGetValue<SelectList *>(params[0]); |
| ret->resize(0); |
| |
| BlankPaddedCharVector *vec = dynamic_cast<BlankPaddedCharVector *>(para); |
| assert(vec); |
| |
| auto len = vec->getMaxLenModifier(); |
| auto valPtrs = vec->getValPtrs(); |
| if (vec->hasNullValue()) { |
| auto nulls = vec->getNullBuffer()->getBools(); |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| if (nulls[idx]) continue; |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], len, patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| if (nulls[idx]) continue; |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], len, patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } else { |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], len, patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| bool matched = StringUtil::MatchAsciiPattern( |
| valPtrs[idx], len, patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } |
| |
| return CreateDatum(ret); |
| } |
| } else { |
| // UTF8 pattern |
| if (scalar) { |
| Scalar *ret = DatumGetValue<Scalar *>(params[0]); |
| if (scalar->isnull) { |
| ret->isnull = true; |
| } else { |
| ret->isnull = false; |
| auto str = DatumGetValue<char *>(scalar->value); |
| auto strLen = scalar->length; |
| bool matched = |
| StringUtil::MatchUtf8Pattern(str, strLen, patternSrc, patternLen); |
| ret->value = CreateDatum(static_cast<bool>(matched == expetedMatch)); |
| } |
| return CreateDatum(ret); |
| } else { |
| SelectList *ret = DatumGetValue<SelectList *>(params[0]); |
| ret->resize(0); |
| |
| BlankPaddedCharVector *vec = dynamic_cast<BlankPaddedCharVector *>(para); |
| assert(vec); |
| |
| auto len = vec->getMaxLenModifier(); |
| auto valPtrs = vec->getValPtrs(); |
| if (vec->hasNullValue()) { |
| auto nulls = vec->getNullBuffer()->getBools(); |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| if (nulls[idx]) continue; |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], len, |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| if (nulls[idx]) continue; |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], len, |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } else { |
| if (vec->getSelected()) { |
| auto sel = vec->getSelected(); |
| for (auto i = 0; i < vec->getNumOfRows(); i++) { |
| auto idx = (*sel)[i]; |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], len, |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } else { |
| for (auto i = 0; i < vec->getNumOfRowsPlain(); i++) { |
| auto idx = i; |
| bool matched = StringUtil::MatchUtf8Pattern(valPtrs[idx], len, |
| patternSrc, patternLen); |
| if (matched == expetedMatch) ret->push_back(idx); |
| } |
| } |
| } |
| |
| return CreateDatum(ret); |
| } |
| } |
| } |
| |
| Datum string_like(Datum *params, uint64_t size) { |
| return string_like_proto<true>(params, size); |
| } |
| |
| Datum bpchar_like(Datum *params, uint64_t size) { |
| return bpchar_like_proto<true>(params, size); |
| } |
| |
| Datum string_not_like(Datum *params, uint64_t size) { |
| return string_like_proto<false>(params, size); |
| } |
| |
| Datum bpchar_not_like(Datum *params, uint64_t size) { |
| return bpchar_like_proto<false>(params, size); |
| } |
| |
| class utf8ptr { |
| public: |
| explicit utf8ptr(const char *p) : p_(p) {} |
| operator const char *() { return p_; } |
| |
| utf8ptr &operator++() { |
| p_ += utf8_mblen(p_); |
| return *this; |
| } |
| |
| utf8ptr &operator+=(const int32_t &len) { |
| int32_t times = len; |
| while (times--) p_ += utf8_mblen(p_); |
| return *this; |
| } |
| |
| utf8ptr &operator=(const char *p) { |
| if (p_ != p) p_ = p; |
| return *this; |
| } |
| |
| bool operator==(const utf8ptr &tmp) { |
| int32_t len = utf8_mblen(p_); |
| const char *tmp_ = p_; |
| const char *cmp_ = tmp.p_; |
| while (len && *tmp_++ == *cmp_++) len--; |
| if (len) return false; |
| return true; |
| } |
| |
| char *get() { return const_cast<char *>(p_); } |
| |
| int32_t characterLength(const char *p) { |
| int32_t len = 0; |
| const char *tmp = p_; |
| while (tmp != p) { |
| tmp += utf8_mblen(tmp); |
| len++; |
| } |
| return len; |
| } |
| |
| int32_t characterLength(const int32_t &len) { |
| int32_t ret = 0, lenth = len; |
| const char *tmp = p_; |
| while (lenth > 0) { |
| int32_t tLen = utf8_mblen(tmp); |
| lenth -= tLen; |
| tmp += tLen; |
| ret++; |
| } |
| return ret; |
| } |
| |
| int32_t byteLength(const int32_t &len) { |
| int32_t ret = 0; |
| int32_t times = len; |
| const char *tmp = p_; |
| while (times--) { |
| int32_t tLen = utf8_mblen(tmp); |
| tmp += tLen; |
| ret += tLen; |
| } |
| return ret; |
| } |
| |
| private: |
| const char *p_; |
| }; |
| |
| Datum string_char_length(Datum *params, uint64_t size) { |
| auto charLength = [](ByteBuffer &buf, text str) -> int32_t { |
| utf8ptr p(str.val); |
| uint64_t len = 0; |
| while (p < str.val + str.length) { |
| ++p; |
| ++len; |
| } |
| return len; |
| }; |
| return one_param_bind<int32_t, text>(params, size, charLength); |
| } |
| |
| Datum bpchar_char_length(Datum *params, uint64_t size) { |
| bpcharTrim(params, size); |
| return string_char_length(params, size); |
| } |
| |
| Datum string_lower(Datum *params, uint64_t size) { |
| auto lower = [](ByteBuffer &buf, text str) { |
| buf.resize(buf.size() + str.length); |
| std::transform(str.val, str.val + str.length, |
| const_cast<char *>(buf.tail()) - str.length, [](char c) { |
| return 'A' <= c && c <= 'Z' ? c - ('A' - 'a') : c; |
| }); |
| return text(nullptr, str.length); |
| }; |
| return one_param_bind<text, text>(params, size, lower); |
| } |
| |
| Datum string_upper(Datum *params, uint64_t size) { |
| auto upper = [](ByteBuffer &buf, text str) { |
| buf.resize(buf.size() + str.length); |
| std::transform(str.val, str.val + str.length, |
| const_cast<char *>(buf.tail()) - str.length, [](char c) { |
| return 'a' <= c && c <= 'z' ? c + ('A' - 'a') : c; |
| }); |
| return text(nullptr, str.length); |
| }; |
| return one_param_bind<text, text>(params, size, upper); |
| } |
| |
| Datum string_concat(Datum *params, uint64_t size) { |
| auto concat = [](ByteBuffer &buf, text str1, text str2) { |
| buf.resize(buf.size() + str1.length + str2.length); |
| char *ret = const_cast<char *>(buf.tail() - str1.length - str2.length); |
| std::transform(str1.val, str1.val + str1.length, ret, |
| [](char tmp) { return tmp; }); |
| std::transform(str2.val, str2.val + str2.length, ret + str1.length, |
| [](char tmp) { return tmp; }); |
| return text(nullptr, str1.length + str2.length); |
| }; |
| return two_params_bind<text, text, text>(params, size, concat); |
| } |
| |
| int32_t kmpPos(const char *str, const char *subStr, uint64_t len, |
| uint64_t subLen, dbcommon::ByteBuffer *kmpPosBuf) { |
| if (len < subLen) return 0; |
| kmpPosBuf->resize(subLen * sizeof(int32_t)); |
| |
| int32_t *__restrict__ next = reinterpret_cast<int32_t *>(kmpPosBuf->data()); |
| |
| next[0] = -1; |
| int32_t i = 0, j = -1; |
| while (i < subLen - 1) { |
| if (j == -1 || subStr[i] == subStr[j]) |
| next[++i] = ++j; |
| else |
| j = next[j]; |
| } |
| |
| i = 0; |
| j = 0; |
| int32_t lLen = len, sLen = subLen; |
| while (i < lLen && j < sLen) { |
| if (j == -1 || subStr[j] == str[i]) { |
| i++; |
| j++; |
| } else { |
| j = next[j]; |
| } |
| } |
| if (j == sLen) |
| return i - j + 1; |
| else |
| return 0; |
| } |
| |
| int32_t naivePos(const char *str, const char *subStr, uint64_t len, |
| uint64_t subLen) { |
| if (len < subLen) return 0; |
| |
| int32_t times = len - subLen; |
| for (int32_t i = 0; i <= times; i++) { |
| bool flag = true; |
| for (int32_t j = 0; j < subLen; j++) |
| if (str[i + j] != subStr[j]) { |
| flag = false; |
| break; |
| } |
| if (flag) return i + 1; |
| } |
| return 0; |
| } |
| |
| Datum string_position(Datum *params, uint64_t size) { |
| const uint32_t KMP_LIMIT = 30; |
| auto subpos = [](ByteBuffer &buf, text src, text sub) -> int32_t { |
| int32_t byteLen = 0; |
| if (sub.length < KMP_LIMIT) { |
| byteLen = naivePos(src.val, sub.val, src.length, sub.length); |
| } else { |
| dbcommon::ByteBuffer kmpPosBuf(true); |
| byteLen = kmpPos(src.val, sub.val, src.length, sub.length, &kmpPosBuf); |
| } |
| utf8ptr utfStrPtr(src.val); |
| return byteLen ? utfStrPtr.characterLength(byteLen - 1) + 1 : 0; |
| }; |
| return two_params_bind<int32_t, text, text>(params, size, subpos); |
| } |
| |
| Datum string_initcap(Datum *params, uint64_t size) { |
| auto initcap = [](ByteBuffer &buf, text str) { |
| buf.resize(buf.size() + str.length); |
| char *ret = const_cast<char *>(buf.tail() - str.length); |
| |
| char last = ' '; |
| int32_t times = str.length; |
| while (times--) { |
| if (((unsigned int)((last | 0x20) - 'a') >= 26u && |
| (unsigned int)(last - '0') >= 10u) && |
| !(last & ~0x7F)) { |
| auto low2up = [](char c) { |
| return 'a' <= c && c <= 'z' ? c + ('A' - 'a') : c; |
| }; |
| *ret++ = low2up(*str.val); |
| last = *str.val++; |
| } else { |
| auto up2low = [](char c) { |
| return 'A' <= c && c <= 'Z' ? c - ('A' - 'a') : c; |
| }; |
| *ret++ = up2low(*str.val); |
| last = *str.val++; |
| } |
| } |
| return text(nullptr, str.length); |
| }; |
| return one_param_bind<text, text>(params, size, initcap); |
| } |
| |
| inline void fixRange(int32_t strlen, int32_t *subpos, int32_t *sublen) { |
| int32_t zero = 0; |
| *subpos = *subpos - 1; |
| if (*subpos < zero) { |
| *subpos += *sublen; |
| if (*subpos < zero) { |
| *subpos = zero; |
| *sublen = zero; |
| } else { |
| *sublen = *subpos; |
| *subpos = zero; |
| } |
| } |
| if (*subpos + *sublen > strlen) *sublen = strlen - *subpos; |
| if (*sublen < zero) *sublen = zero; |
| } |
| |
| // only support utf-8 now |
| Datum string_substring(Datum *params, uint64_t size) { |
| auto substr = [](ByteBuffer &buf, text src, int32_t pos, int32_t len) { |
| if (len < 0) { |
| LOG_ERROR(ERRCODE_SUBSTRING_ERROR, |
| "negative substring length not allowed"); |
| } |
| utf8ptr utfStrPtr(src.val); |
| int32_t utfStrLen = utfStrPtr.characterLength(src.val + src.length); |
| fixRange(utfStrLen, &pos, &len); |
| utfStrPtr += pos; |
| char *srcBegin = utfStrPtr.get(); |
| utfStrPtr += len; |
| int32_t retByteLen = utfStrPtr.get() - srcBegin; |
| buf.resize(buf.size() + retByteLen); |
| char *ret = const_cast<char *>(buf.tail() - retByteLen); |
| std::transform(srcBegin, srcBegin + retByteLen, ret, |
| [](char tmp) { return tmp; }); |
| return text(nullptr, retByteLen); |
| }; |
| return three_params_bind<text, text, int32_t, int32_t>(params, size, substr); |
| } |
| |
| Datum string_substring_nolen(Datum *params, uint64_t size) { |
| auto substrnolen = [](ByteBuffer &buf, text str, int32_t pos) { |
| if (--pos < 0) pos = 0; |
| utf8ptr utfStrPtr(str.val); |
| utfStrPtr += pos; |
| char *strBegin = utfStrPtr.get(); |
| int32_t len = str.val + str.length - strBegin; |
| if (len < 0) len = 0; |
| buf.resize(buf.size() + len); |
| char *ret = const_cast<char *>(buf.tail() - len); |
| std::transform(strBegin, strBegin + len, ret, [](char tmp) { return tmp; }); |
| return text(nullptr, len); |
| }; |
| return two_params_bind<text, text, int32_t>(params, size, substrnolen); |
| } |
| |
| inline int32_t myAscii(const unsigned char *data) { |
| int32_t retval = 0; |
| if (*data > 0x7F) { |
| int32_t tsize = 0; |
| if (*data >= 0xF0) { |
| retval = *data & 0x07; |
| tsize = 3; |
| } else if (*data >= 0xE0) { |
| retval = *data & 0x0F; |
| tsize = 2; |
| } else { |
| assert(*data > 0xC0); |
| retval = *data & 0x1F; |
| tsize = 1; |
| } |
| while (tsize--) { |
| retval = (retval << 6) + (*++data & 0x3F); |
| } |
| } else { |
| retval = (int32_t)*data; |
| } |
| return retval; |
| } |
| |
| Datum string_ascii(Datum *params, uint64_t size) { |
| auto ascii = [](ByteBuffer &buf, text str) -> int32_t { |
| unsigned char *srcval = (unsigned char *)const_cast<char *>(str.val); |
| if (str.length > 0) |
| return myAscii(srcval); |
| else |
| return 0; |
| }; |
| return one_param_bind<int32_t, text>(params, size, ascii); |
| } |
| |
| /* |
| * Convert a VARCHAR type to the specified size. |
| * |
| * N.B. currently does not handle the toast string |
| */ |
| Datum string_varchar(Datum *params, uint64_t size) { |
| auto varchar = [](ByteBuffer &buf, text str, int32_t len, bool exp) { |
| len = TypeModifierUtil::getMaxLen(len); |
| utf8ptr utfStrPtr(str.val); |
| int32_t utfStrLen = utfStrPtr.characterLength(str.val + str.length); |
| if (utfStrLen > len) { |
| if (exp == false) { |
| auto p = str.length - 1; |
| int32_t retByteLen = utfStrPtr.byteLength(len); |
| while (p >= retByteLen) { |
| if (str.val[p--] != ' ') { |
| LOG_ERROR(ERRCODE_STRING_DATA_RIGHT_TRUNCATION, |
| "value too long for type character varying(%d)", len); |
| } |
| } |
| } |
| } else { |
| len = utfStrLen; |
| } |
| int32_t retByteLen = utfStrPtr.byteLength(len); |
| buf.resize(buf.size() + retByteLen); |
| char *ret = const_cast<char *>(buf.tail() - retByteLen); |
| std::transform(utfStrPtr.get(), utfStrPtr.get() + retByteLen, ret, |
| [](char c) { return c; }); |
| return text(nullptr, retByteLen); |
| }; |
| return three_params_bind<text, text, int32_t, bool>(params, size, varchar); |
| } |
| |
| enum direction { left = 0, right, both }; |
| template <direction dir> |
| Datum string_trim_blank(Datum *params, uint64_t size) { |
| auto trim = [](ByteBuffer &buf, text str) { |
| int32_t l = 0, r = str.length - 1; |
| if (dir == direction::left || dir == direction::both) { |
| while (l <= r && str.val[l] == ' ') l++; |
| } |
| if (dir == direction::right || dir == direction::both) { |
| while (l <= r && str.val[r] == ' ') r--; |
| } |
| int32_t len = r - l + 1; |
| if (len < 0) len = 0; |
| buf.resize(buf.size() + len); |
| char *ret = const_cast<char *>(buf.tail() - len); |
| std::transform(str.val + l, str.val + r + 1, ret, [](char c) { return c; }); |
| |
| return text(nullptr, len); |
| }; |
| return one_param_bind<text, text>(params, size, trim); |
| } |
| |
| template <direction dir> |
| Datum string_trim_chars(Datum *params, uint64_t size) { |
| auto trim = [](ByteBuffer &buf, text str, text chr) { |
| int32_t l = 0, r = str.length - 1; |
| if (dir == direction::left || dir == direction::both) { |
| std::string s(const_cast<char *>(chr.val), chr.length); |
| while (l <= r && s.find(str.val[l]) != std::string::npos) l++; |
| } |
| if (dir == direction::right || dir == direction::both) { |
| std::string s(const_cast<char *>(chr.val), chr.length); |
| while (l <= r && s.find(str.val[r]) != std::string::npos) r--; |
| } |
| int32_t len = r - l + 1; |
| if (len < 0) len = 0; |
| buf.resize(buf.size() + len); |
| char *ret = const_cast<char *>(buf.tail() - len); |
| std::transform(str.val + l, str.val + r + 1, ret, [](char c) { return c; }); |
| |
| return text(nullptr, len); |
| }; |
| return two_params_bind<text, text, text>(params, size, trim); |
| } |
| |
| // CASE 1: trim left blank |
| Datum string_ltrim_blank(Datum *params, uint64_t size) { |
| return string_trim_blank<direction::left>(params, size); |
| } |
| |
| // CASE 2: trim left character |
| Datum string_ltrim_chars(Datum *params, uint64_t size) { |
| return string_trim_chars<direction::left>(params, size); |
| } |
| |
| // CASE 3: trim right blank |
| Datum string_rtrim_blank(Datum *params, uint64_t size) { |
| return string_trim_blank<direction::right>(params, size); |
| } |
| |
| // CASE 4: trim right character |
| Datum string_rtrim_chars(Datum *params, uint64_t size) { |
| return string_trim_chars<direction::right>(params, size); |
| } |
| |
| // CASE 5: trim both blank |
| Datum string_btrim_blank(Datum *params, uint64_t size) { |
| return string_trim_blank<direction::both>(params, size); |
| } |
| |
| // CASE 6: trim both character |
| Datum string_btrim_chars(Datum *params, uint64_t size) { |
| return string_trim_chars<direction::both>(params, size); |
| } |
| |
| Datum string_repeat(Datum *params, uint64_t size) { |
| auto demo_func = [](ByteBuffer &buf, text str, int32_t dupLen) { |
| int32_t retLen = str.length * dupLen; |
| if (dupLen && (retLen / dupLen) != str.length) { |
| LOG_ERROR(ERRCODE_PROGRAM_LIMIT_EXCEEDED, "requested length too large"); |
| } |
| |
| buf.resize(buf.size() + retLen); |
| char *retBufPtr = const_cast<char *>(buf.tail() - retLen); |
| while (dupLen--) { |
| std::transform(str.val, str.val + str.length, retBufPtr, |
| [](char c) { return c; }); |
| retBufPtr += str.length; |
| } |
| return text(nullptr, retLen); |
| }; |
| |
| return two_params_bind<text, text, int32_t>(params, size, demo_func); |
| } |
| |
| Datum string_chr(Datum *params, uint64_t size) { |
| auto chr = [](ByteBuffer &buf, int32_t val) { |
| int32_t len = 0; |
| char wch[4]; |
| if (val > 0x7F) { |
| if (val > 0x001fffff) { |
| LOG_ERROR(ERRCODE_PROGRAM_LIMIT_EXCEEDED, |
| "requested character too large for encoding: %u", val); |
| } |
| |
| if (val > 0xffff) { |
| len = 4; |
| } else if (val > 0x07ff) { |
| len = 3; |
| } else { |
| len = 2; |
| } |
| |
| if (len == 2) { |
| wch[0] = 0xC0 | ((val >> 6) & 0x1F); |
| wch[1] = 0x80 | (val & 0x3F); |
| } else if (len == 3) { |
| wch[0] = 0xE0 | ((val >> 12) & 0x0F); |
| wch[1] = 0x80 | ((val >> 6) & 0x3F); |
| wch[2] = 0x80 | (val & 0x3F); |
| } else { |
| wch[0] = 0xF0 | ((val >> 18) & 0x07); |
| wch[1] = 0x80 | ((val >> 12) & 0x3F); |
| wch[2] = 0x80 | ((val >> 6) & 0x3F); |
| wch[3] = 0x80 | (val & 0x3F); |
| } |
| } else { |
| if (val == 0) { |
| LOG_ERROR(ERRCODE_PROGRAM_LIMIT_EXCEEDED, |
| "null character not permitted"); |
| } |
| |
| bool isMB = true; |
| if ((isMB && (val > 127)) || (!isMB && (val > 255))) { |
| // TODO(zdh): error for Multi-byte encoding except utf8 |
| // LOG_ERROR(ERRCODE_PROGRAM_LIMIT_EXCEEDED, |
| // "requested character too large for encoding: %d", val); |
| } |
| len = 1; |
| wch[0] = static_cast<char>(val); |
| } |
| buf.resize(buf.size() + len); |
| char *ret = const_cast<char *>(buf.tail() - len); |
| strncpy(ret, wch, len); |
| return text(nullptr, len); |
| }; |
| return one_param_bind<text, int32_t>(params, size, chr); |
| } |
| |
| Datum string_bpchar(Datum *params, uint64_t size) { |
| auto bpchar = [](ByteBuffer &buf, text str, int32_t len, bool exp) { |
| len = TypeModifierUtil::getMaxLen(len); |
| utf8ptr utfStrPtr(str.val); |
| int32_t characterLen = utfStrPtr.characterLength(str.val + str.length); |
| if (characterLen > len) { |
| if (exp == false) { |
| auto p = str.length - 1; |
| int32_t byteLen = utfStrPtr.byteLength(len); |
| while (p >= len) { |
| if (str.val[p--] != ' ') { |
| LOG_ERROR(ERRCODE_STRING_DATA_RIGHT_TRUNCATION, |
| "value too long for type character(%d)", len); |
| } |
| } |
| } |
| } |
| int32_t copyLen = |
| characterLen > len ? utfStrPtr.byteLength(len) : str.length; |
| int32_t retByteLen = characterLen > len ? utfStrPtr.byteLength(len) |
| : str.length + len - characterLen; |
| buf.resize(buf.size() + retByteLen); |
| char *ret = const_cast<char *>(buf.tail() - retByteLen); |
| std::transform(str.val, str.val + copyLen, ret, [](char c) { return c; }); |
| |
| ret += copyLen; |
| while (copyLen++ < retByteLen) *ret++ = ' '; |
| return text(nullptr, retByteLen); |
| }; |
| return three_params_bind<text, text, int32_t, bool>(params, size, bpchar); |
| } |
| |
| template <direction dir> |
| Datum string_pad_blank(Datum *params, uint64_t size) { |
| auto pad = [](ByteBuffer &buf, text str, int32_t len) { |
| utf8ptr utfStrPtr(str.val); |
| int32_t characterLen = utfStrPtr.characterLength(str.val + str.length); |
| int32_t retByteLen = str.length; |
| if (characterLen < len) |
| retByteLen += len - characterLen; |
| else |
| retByteLen = utfStrPtr.byteLength(len); |
| buf.resize(buf.size() + retByteLen); |
| char *ret = const_cast<char *>(buf.tail() - retByteLen); |
| |
| if (dir == direction::left) { |
| int32_t remainder = retByteLen - str.length; |
| remainder = remainder < 0 ? 0 : remainder; |
| std::memset(ret, ' ', remainder); |
| ret += remainder; |
| } |
| |
| int32_t writeLen = str.length < retByteLen ? str.length : retByteLen; |
| for (int32_t i = 0; i < writeLen; i++) *ret++ = str.val[i]; |
| |
| if (dir == direction::right) { |
| int32_t remainder = retByteLen - str.length; |
| remainder = remainder < 0 ? 0 : remainder; |
| std::memset(ret, ' ', remainder); |
| } |
| |
| return text(nullptr, retByteLen); |
| }; |
| return two_params_bind<text, text, int32_t>(params, size, pad); |
| } |
| |
| Datum string_lpad_nofill(Datum *params, uint64_t size) { |
| return string_pad_blank<direction::left>(params, size); |
| } |
| |
| Datum string_rpad_nofill(Datum *params, uint64_t size) { |
| return string_pad_blank<direction::right>(params, size); |
| } |
| |
| template <direction dir> |
| Datum string_pad_chars(Datum *params, uint64_t size) { |
| auto pad = [](ByteBuffer &buf, text str, int32_t len, text fil) { |
| utf8ptr utfStrPtr(str.val); |
| utf8ptr utfFilPtr(fil.val); |
| int32_t strCharLen = utfStrPtr.characterLength(str.val + str.length); |
| int32_t filCharLen = utfFilPtr.characterLength(fil.val + fil.length); |
| int32_t retByteLen = str.length; |
| if (strCharLen >= len) { |
| retByteLen = utfStrPtr.byteLength(len); |
| } else { |
| int32_t rem = len - strCharLen; |
| while (rem >= filCharLen) { |
| retByteLen += fil.length; |
| rem -= filCharLen; |
| } |
| retByteLen += utfFilPtr.byteLength(rem); |
| } |
| buf.resize(buf.size() + retByteLen); |
| char *ret = const_cast<char *>(buf.tail() - retByteLen); |
| |
| if (dir == direction::left) { |
| int32_t remainder = len - strCharLen; |
| if (fil.length == 1 && remainder > 0) { |
| std::memset(ret, *fil.val, remainder); |
| ret += remainder; |
| } else { |
| while (remainder > 0) { |
| if (remainder >= filCharLen) { |
| for (int32_t i = 0; i < fil.length; i++) *ret++ = fil.val[i]; |
| } else { |
| int32_t fillLen = utfFilPtr.byteLength(remainder); |
| for (int32_t i = 0; i < fil.length; i++) *ret++ = str.val[i]; |
| } |
| remainder -= filCharLen; |
| } |
| } |
| } |
| |
| int32_t writeLen = str.length < retByteLen ? str.length : retByteLen; |
| for (int32_t i = 0; i < writeLen; i++) *ret++ = str.val[i]; |
| |
| if (dir == direction::right) { |
| int32_t remainder = len - strCharLen; |
| if (fil.length == 1 && remainder > 0) { |
| std::memset(ret, *fil.val, remainder); |
| ret += remainder; |
| } else { |
| while (remainder > 0) { |
| if (remainder >= filCharLen) { |
| for (int32_t i = 0; i < fil.length; i++) *ret++ = fil.val[i]; |
| } else { |
| int32_t fillLen = utfFilPtr.byteLength(remainder); |
| for (int32_t i = 0; i < fillLen; i++) *ret++ = fil.val[i]; |
| } |
| remainder -= filCharLen; |
| } |
| } |
| } |
| |
| return text(nullptr, retByteLen); |
| }; |
| return three_params_bind<text, text, int32_t, text>(params, size, pad); |
| } |
| |
| Datum string_lpad(Datum *params, uint64_t size) { |
| return string_pad_chars<direction::left>(params, size); |
| } |
| |
| Datum string_rpad(Datum *params, uint64_t size) { |
| return string_pad_chars<direction::right>(params, size); |
| } |
| |
| Datum string_translate(Datum *params, uint64_t size) { |
| auto translate = [](ByteBuffer &buf, text str, text from, text to) { |
| utf8ptr utfStrPtr(str.val); |
| utf8ptr utfFromPtr(from.val); |
| utf8ptr utfToPtr(to.val); |
| int32_t strCharLen = utfStrPtr.characterLength(str.val + str.length); |
| int32_t fromCharLen = utfFromPtr.characterLength(from.val + from.length); |
| int32_t toCharLen = utfToPtr.characterLength(to.val + to.length); |
| int32_t retByteLen = 0; |
| int32_t worstLen = strCharLen * 4; |
| |
| // if (worstLen / 4 != strCharLen) { |
| // it won't appear one number which has int32_t length; |
| // LOG_ERROR(ERRCODE_PROGRAM_LIMIT_EXCEEDED, |
| // "requested length too large"); |
| // } |
| |
| buf.resize(buf.size() + worstLen); |
| char *ret = const_cast<char *>(buf.tail() - worstLen); |
| |
| auto writeByte = [&](utf8ptr src) { |
| char *tmp = src.get(); |
| int32_t len = utf8_mblen(tmp); |
| retByteLen += len; |
| for (int32_t k = 0; k < len; k++) *ret++ = *tmp++; |
| }; |
| |
| for (int32_t i = 0; i < strCharLen; i++) { |
| int32_t j = 0; |
| utfFromPtr = from.val; |
| utfToPtr = to.val; |
| for (; j < fromCharLen; j++) { |
| if (utfStrPtr == utfFromPtr) { |
| if (j < toCharLen) { |
| utfToPtr += j; |
| writeByte(utfToPtr); |
| } |
| break; |
| } |
| ++utfFromPtr; |
| } |
| if (j == fromCharLen) { |
| writeByte(utfStrPtr); |
| } |
| ++utfStrPtr; |
| } |
| buf.resize(buf.size() - (worstLen - retByteLen)); |
| return text(nullptr, retByteLen); |
| }; |
| return three_params_bind<text, text, text, text>(params, size, translate); |
| } |
| |
| } // namespace dbcommon |