blob: 4e0ea3b680c2b2661d2b9c582f8c75d80363433b [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#ifndef CORE_BASE_STRING_UTIL_H
#define CORE_BASE_STRING_UTIL_H
#include <sstream>
#include <string.h>
#ifdef OS_ANDROID
#include <malloc.h>
#else
#include <stdlib.h>
#endif
#include "base/third_party/icu/icu_utf.h"
namespace weex {
namespace base {
constexpr int32_t kErrorCodePoint = 0xFFFD;
template <typename T>
std::string to_string(T value) {
std::ostringstream os;
os << value;
return os.str();
}
static bool IsValidCodepoint(uint32_t code_point) {
// Excludes the surrogate code points ([0xD800, 0xDFFF]) and
// codepoints larger than 0x10FFFF (the highest codepoint allowed).
// Non-characters and unassigned codepoints are allowed.
return code_point < 0xD800u ||
(code_point >= 0xE000u && code_point <= 0x10FFFFu);
}
static uint32_t convert_single_char(char16_t in) {
if (!CBU16_IS_SINGLE(in) || !IsValidCodepoint(in)) {
return kErrorCodePoint;
}
return in;
}
inline static std::string to_utf8(uint16_t* utf16, size_t length) {
char16_t *WC = reinterpret_cast<char16_t *>(utf16);
char* dest = static_cast<char*>(malloc(length * sizeof(char16_t) + sizeof(char16_t)));
memset(dest, 0, length * sizeof(char16_t) + sizeof(char16_t));
int32_t i = 0;
int32_t d_len = 0;
int32_t* dest_len = &d_len;
// Always have another symbol in order to avoid checking boundaries in the
// middle of the surrogate pair.
while (i < length - 1) {
int32_t code_point;
if (CBU16_IS_LEAD(WC[i]) && CBU16_IS_TRAIL(WC[i + 1])) {
code_point = CBU16_GET_SUPPLEMENTARY(WC[i], WC[i + 1]);
i += 2;
} else {
code_point = convert_single_char(WC[i]);
++i;
}
CBU8_APPEND_UNSAFE(dest, *dest_len, code_point);
}
if (i < length)
CBU8_APPEND_UNSAFE(dest, *dest_len, convert_single_char(WC[i]));
std::string output(dest);
free(dest);
return output;
}
#ifdef OS_ANDROID
static std::u16string to_utf16(char* utf8, size_t length) {
std::u16string dest_str;
dest_str.resize(length);
auto* dest = &dest_str[0];
int32_t dest_len = 0;
bool success = true;
for (int32_t i = 0; i < length;) {
int32_t code_point;
CBU8_NEXT(utf8, i, static_cast<int32_t>(length), code_point);
if (!IsValidCodepoint(code_point)) {
success = false;
code_point = kErrorCodePoint;
}
CBU16_APPEND_UNSAFE(dest, dest_len, code_point);
}
if (!success) {
return std::u16string();
}
dest_str.resize(dest_len);
dest_str.shrink_to_fit();
return dest_str;
}
#endif
} // namespace base
} // namespace weex
#endif // CORE_BASE_STRING_UTIL_H