| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| // |
| // Created by furture on 2018/5/15. |
| // |
| |
| #include "wson_util.h" |
| #include <stdio.h> |
| |
| |
| namespace wson { |
| |
| /** |
| * see java jdk source to handle handle utf-16 in 4 byte |
| * */ |
| static const u_int16_t MIN_HIGH_SURROGATE = 0xD800; |
| |
| static const u_int16_t MAX_HIGH_SURROGATE = 0xDBFF; |
| |
| static const u_int16_t MIN_LOW_SURROGATE = 0xDC00; |
| |
| static const u_int16_t MAX_LOW_SURROGATE = 0xDFFF; |
| |
| static const u_int32_t MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; |
| |
| inline bool isHighSurrogate(u_int16_t ch) { |
| return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); |
| } |
| |
| inline bool isLowSurrogate(u_int16_t ch) { |
| return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); |
| } |
| |
| inline u_int32_t toCodePoint(u_int16_t high, u_int16_t low) { |
| // Optimized form of: |
| // return ((high - MIN_HIGH_SURROGATE) << 10) |
| // + (low - MIN_LOW_SURROGATE) |
| // + MIN_SUPPLEMENTARY_CODE_POINT; |
| return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT |
| - (MIN_HIGH_SURROGATE << 10) |
| - MIN_LOW_SURROGATE); |
| } |
| |
| |
| static inline int utf16_char_convert_to_utf8_cstr(u_int32_t codePoint, char* utf8){ |
| if (codePoint <= 0x7F) |
| { |
| // Plain single-byte ASCII. |
| utf8[0] = ((char)codePoint); |
| return 1; |
| } |
| else if (codePoint <= 0x7FF) |
| { |
| // Two bytes. |
| utf8[0] = (0xC0 | (codePoint >> 6)); |
| utf8[1] = (0x80 | ((codePoint) & 0x3F)); |
| return 2; |
| } |
| else if (codePoint <= 0xFFFF) |
| { |
| // Three bytes. |
| utf8[0] = (0xE0 | (codePoint >> 12)); |
| utf8[1] = ((0x80 | ((codePoint >> 6) & 0x3F))); |
| utf8[2] = ((0x80 | ((codePoint) & 0x3F))); |
| return 3; |
| } |
| else if (codePoint <= 0x1FFFFF) |
| { |
| // Four bytes. |
| utf8[0] = (0xF0 | (codePoint >> 18)); |
| utf8[1] = (0x80 | ((codePoint >> 12) & 0x3F)); |
| utf8[2] = (0x80 | ((codePoint >> 6) & 0x3F)); |
| utf8[3] = (0x80 | ((codePoint) & 0x3F)); |
| return 4; |
| } |
| else if (codePoint <= 0x3FFFFFF) |
| { |
| // Five bytes. |
| utf8[0] = (0xF8 | (codePoint >> 24)); |
| utf8[1] = (0x80 | ((codePoint >> 18) & 0x3F)); |
| utf8[2] = (0x80 | ((codePoint >> 12) & 0x3F)); |
| utf8[3] = (0x80 | ((codePoint >> 6) & 0x3F)); |
| utf8[4] = (0x80 | ((codePoint) & 0x3F)); |
| return 5; |
| } |
| else if (codePoint <= 0x7FFFFFFF) |
| { |
| // Six bytes. |
| utf8[0] = (0xFC | (codePoint >> 30)); |
| utf8[1] = (0x80 | ((codePoint >> 24) & 0x3F)); |
| utf8[2] = (0x80 | ((codePoint >> 18) & 0x3F)); |
| utf8[3] = (0x80 | ((codePoint >> 12) & 0x3F)); |
| utf8[4] = (0x80 | ((codePoint >> 6) & 0x3F)); |
| utf8[5] = (0x80 | ((codePoint) & 0x3F)); |
| return 6; |
| } |
| return 0; |
| } |
| |
| void utf16_convert_to_utf8_string(uint16_t * utf16, int length, std::string& utf8){ |
| char* dest = new char[length*4 + 4]; |
| utf16_convert_to_utf8_string(utf16, length, dest, utf8); |
| delete [] dest; |
| } |
| |
| void utf16_convert_to_utf8_quote_string(uint16_t *utf16, int length, std::string& utf8){ |
| char* dest = new char[length*4 + 4]; |
| utf16_convert_to_utf8_quote_string(utf16, length, dest, utf8); |
| delete [] dest; |
| } |
| |
| |
| void utf16_convert_to_utf8_string(uint16_t *utf16, int length, char* decodingBuffer, std::string& utf8){ |
| int count = utf16_convert_to_utf8_cstr(utf16, length, decodingBuffer); |
| utf8.append(decodingBuffer, count); |
| } |
| void utf16_convert_to_utf8_quote_string(uint16_t *utf16, int length, char* decodingBuffer, std::string& utf8){ |
| int count = utf16_convert_to_utf8_quote_cstr(utf16, length, decodingBuffer); |
| utf8.append(decodingBuffer, count); |
| } |
| |
| int utf16_convert_to_utf8_cstr(uint16_t * utf16, int length, char* buffer){ |
| char* src = buffer; |
| int count =0; |
| for(int i=0; i<length;){ |
| u_int16_t c1 = utf16[i++]; |
| if(isHighSurrogate(c1)){ |
| if(i < length){ |
| u_int16_t c2 = utf16[i++]; |
| if (isLowSurrogate(c2)) { |
| u_int32_t codePoint = toCodePoint(c1, c2); |
| count += utf16_char_convert_to_utf8_cstr(codePoint, src + count); |
| continue; |
| }else{ |
| i--; |
| } |
| } |
| } |
| count += utf16_char_convert_to_utf8_cstr(c1, src + count); |
| } |
| src[count] = '\0'; |
| return count; |
| } |
| |
| int utf16_convert_to_utf8_quote_cstr(uint16_t *utf16, int length, char* buffer){ |
| int count =0; |
| |
| char* src = buffer; |
| src[count++] = '"'; |
| for(int i=0; i<length;){ |
| u_int16_t c1 = utf16[i++]; |
| if(isHighSurrogate(c1)){ |
| if(i < length){ |
| u_int16_t c2 = utf16[i++]; |
| if (isLowSurrogate(c2)) { |
| u_int32_t codePoint = toCodePoint(c1, c2); |
| count += utf16_char_convert_to_utf8_cstr(codePoint, src + count); |
| continue; |
| }else{ |
| i--; |
| } |
| } |
| } |
| if(c1 < 0x5D){ // 0X5C is '\' |
| if(c1 == '"' || c1 == '\\'){ |
| src[count++] = '\\'; |
| }else{ |
| if(c1 <= 0x1F){ //max control latter |
| switch (c1){ |
| case '\t': |
| src[count++] = '\\'; |
| src[count++] = 't'; |
| continue; |
| case '\r': |
| src[count++] = '\\'; |
| src[count++] = 'r'; |
| continue; |
| case '\n': |
| src[count++] = '\\'; |
| src[count++] = 'n'; |
| continue; |
| case '\f': |
| src[count++] = '\\'; |
| src[count++] = 'f'; |
| continue; |
| case '\b': |
| src[count++] = '\\'; |
| src[count++] = 'b'; |
| continue; |
| } |
| } |
| } |
| } |
| count += utf16_char_convert_to_utf8_cstr(c1, src + count); |
| } |
| src[count++] = '"'; |
| src[count] = '\0'; |
| return count; |
| } |
| |
| |
| /** min size is 32 + 1 = 33 */ |
| inline void number_to_buffer(char* buffer, int32_t num){ |
| snprintf(buffer, 32,"%d", num); |
| } |
| |
| /** min size is 64 + 1 = 65 */ |
| inline void number_to_buffer(char* buffer, float num){ |
| snprintf(buffer, 64, "%f", num); |
| } |
| |
| /** min size is 64 + 1 = 65 */ |
| inline void number_to_buffer(char* buffer, double num){ |
| snprintf(buffer, 64, "%f", num); |
| } |
| |
| /** min size is 64 + 1 = 65 */ |
| inline void number_to_buffer(char* buffer, int64_t num){ |
| snprintf(buffer, 64, "%lld", num); |
| } |
| |
| |
| void str_append_number(std::string& str, double num){ |
| char src[64 + 2]; |
| char* buffer = src; |
| number_to_buffer(buffer, num); |
| str.append(src); |
| } |
| |
| void str_append_number(std::string& str, float num){ |
| char src[64 + 2]; |
| char* buffer = src; |
| number_to_buffer(buffer, num); |
| str.append(src); |
| } |
| |
| void str_append_number(std::string& str, int32_t num){ |
| char src[32 + 2]; |
| char* buffer = src; |
| number_to_buffer(buffer, num); |
| str.append(src); |
| } |
| |
| void str_append_number(std::string& str, int64_t num){ |
| char src[64 + 2]; |
| char* buffer = src; |
| number_to_buffer(buffer, num); |
| str.append(src); |
| } |
| |
| |
| } |