| /* |
| * Copyright (c) 2014, Facebook, Inc. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. An additional grant |
| * of patent rights can be found in the PATENTS file in the same directory. |
| * |
| */ |
| |
| /* |
| * This file defines JsonbParserT (template) and JsonbParser. |
| * |
| * JsonbParserT is a template class which implements a JSON parser. |
| * JsonbParserT parses JSON text, and serialize it to JSONB binary format |
| * by using JsonbWriterT object. By default, JsonbParserT creates a new |
| * JsonbWriterT object with an output stream object. However, you can also |
| * pass in your JsonbWriterT or any stream object that implements some basic |
| * interface of std::ostream (see JsonbStream.h). |
| * |
| * JsonbParser specializes JsonbParserT with JsonbOutStream type (see |
| * JsonbStream.h). So unless you want to provide own a different output stream |
| * type, use JsonbParser object. |
| * |
| * ** Parsing JSON ** |
| * JsonbParserT parses JSON string, and directly serializes into JSONB |
| * packed bytes. There are three ways to parse a JSON string: (1) using |
| * c-string, (2) using string with len, (3) using std::istream object. You can |
| * use custom streambuf to redirect output. JsonbOutBuffer is a streambuf used |
| * internally if the input is raw character buffer. |
| * |
| * You can reuse an JsonbParserT object to parse/serialize multiple JSON |
| * strings, and the previous JSONB will be overwritten. |
| * |
| * If parsing fails (returned false), the error code will be set to one of |
| * JsonbErrType, and can be retrieved by calling getErrorCode(). |
| * |
| * ** External dictionary ** |
| * During parsing a JSON string, you can pass a call-back function to map a key |
| * string to an id, and store the dictionary id in JSONB to save space. The |
| * purpose of using an external dictionary is more towards a collection of |
| * documents (which has common keys) rather than a single document, so that |
| * space saving will be significant. |
| * |
| * ** Endianness ** |
| * Note: JSONB serialization doesn't assume endianness of the server. However |
| * you will need to ensure that the endianness at the reader side is the same |
| * as that at the writer side (if they are on different machines). Otherwise, |
| * proper conversion is needed when a number value is returned to the |
| * caller/writer. |
| * |
| * @author Tian Xia <tianx@fb.com> |
| * |
| * this file is copied from |
| * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonJsonParser.h |
| * and modified by Doris |
| */ |
| |
| #ifndef JSONB_JSONBJSONPARSER_H |
| #define JSONB_JSONBJSONPARSER_H |
| |
| #include <cmath> |
| #include <limits> |
| |
| #include "jsonb_document.h" |
| #include "jsonb_error.h" |
| #include "jsonb_writer.h" |
| #include "string_parser.hpp" |
| |
| namespace doris { |
| |
| const char* const kJsonDelim = " ,]}\t\r\n"; |
| const char* const kWhiteSpace = " \t\n\r"; |
| |
| /* |
| * Template JsonbParserT |
| */ |
| template <class OS_TYPE> |
| class JsonbParserT { |
| public: |
| JsonbParserT() : stream_pos_(0), err_(JsonbErrType::E_NONE) {} |
| |
| explicit JsonbParserT(OS_TYPE& os) : writer_(os), stream_pos_(0), err_(JsonbErrType::E_NONE) {} |
| |
| // parse a UTF-8 JSON string |
| bool parse(const std::string& str, hDictInsert handler = nullptr) { |
| return parse(str.c_str(), str.size(), handler); |
| } |
| |
| // parse a UTF-8 JSON c-style string (NULL terminated) |
| bool parse(const char* c_str, hDictInsert handler = nullptr) { |
| return parse(c_str, strlen(c_str), handler); |
| } |
| |
| // parse a UTF-8 JSON string with length |
| bool parse(const char* pch, size_t len, hDictInsert handler = nullptr) { |
| if (!pch || len == 0) { |
| err_ = JsonbErrType::E_EMPTY_DOCUMENT; |
| return false; |
| } |
| |
| JsonbInBuffer sb(pch, len); |
| std::istream in(&sb); |
| return parse(in, handler); |
| } |
| |
| // parse UTF-8 JSON text from an input stream |
| bool parse(std::istream& in, hDictInsert handler = nullptr) { |
| bool res = false; |
| err_ = JsonbErrType::E_NONE; |
| stream_pos_ = 0; |
| |
| // reset output stream |
| writer_.reset(); |
| |
| trim(in); |
| |
| // TODO(wzy): parsePrimitive should be implemented |
| if (in.peek() == '{') { |
| skipChar(in); |
| res = parseObject(in, handler); |
| } else if (in.peek() == '[') { |
| skipChar(in); |
| res = parseArray(in, handler); |
| } else { |
| res = parsePrimitive(in, handler); |
| if (!res) err_ = handle_parse_failure(in); |
| } |
| |
| trim(in); |
| if (res && !in.eof()) { |
| err_ = JsonbErrType::E_INVALID_DOCU; |
| return false; |
| } |
| |
| return res; |
| } |
| |
| JsonbWriterT<OS_TYPE>& getWriter() { return writer_; } |
| |
| JsonbErrType getErrorCode() { return err_; } |
| |
| JsonbErrInfo getErrorInfo() { |
| assert(err_ < JsonbErrType::E_NUM_ERRORS); |
| |
| JsonbErrInfo err_info; |
| |
| // stream_pos_ always points to the next char, so err_pos is 1-based |
| err_info.err_pos = stream_pos_; |
| err_info.err_msg = JsonbErrMsg::getErrMsg(err_); |
| |
| return err_info; |
| } |
| |
| // clear error code |
| void clearErr() { err_ = JsonbErrType::E_NONE; } |
| |
| private: |
| JsonbErrType handle_parse_value_failure(bool parse_res, std::istream& in) { |
| if (parse_res) { |
| trim(in); |
| if (!in.good()) { |
| return JsonbErrType::E_INVALID_DOCU_COMPAT; |
| } |
| } |
| return JsonbErrType::E_INVALID_DOCU; |
| ; |
| } |
| |
| // In case json is determined to be invalid at top level, |
| // try to parse literal values. |
| // We return a different error code E_INVALID_DOCU_COMPAT |
| // in case the input json contains these values. |
| // Returning a different error code will cause an |
| // auditing on the caller. |
| // This is mainly done because 8.0 JSON_VALID considers |
| // this as a valid input. |
| JsonbErrType handle_parse_failure(std::istream& in) { |
| JsonbErrType error = JsonbErrType::E_INVALID_DOCU; |
| if (!writer_.writeStartArray()) { |
| return error; |
| } |
| |
| switch (in.peek()) { |
| case 'n': |
| skipChar(in); |
| error = handle_parse_value_failure(parseNull(in), in); |
| break; |
| case 't': |
| skipChar(in); |
| error = handle_parse_value_failure(parseTrue(in), in); |
| break; |
| case 'f': |
| skipChar(in); |
| error = handle_parse_value_failure(parseFalse(in), in); |
| break; |
| case '"': |
| skipChar(in); |
| error = handle_parse_value_failure(parseString(in), in); |
| break; |
| default: |
| if (parseNumber(in)) { |
| trim(in); |
| if (in.eof()) { |
| error = JsonbErrType::E_INVALID_DOCU_COMPAT; |
| } |
| } |
| } |
| if (!writer_.writeEndArray()) { |
| return error; |
| } |
| |
| return error; |
| } |
| |
| // parse primitive |
| bool parsePrimitive(std::istream& in, hDictInsert handler) { |
| bool res = false; |
| switch (in.peek()) { |
| case 'n': |
| skipChar(in); |
| res = parseNull(in); |
| break; |
| case 't': |
| skipChar(in); |
| res = parseTrue(in); |
| break; |
| case 'f': |
| skipChar(in); |
| res = parseFalse(in); |
| break; |
| case '"': |
| skipChar(in); |
| res = parseString(in); |
| break; |
| default: |
| res = parseNumber(in); |
| } |
| |
| return res; |
| } |
| |
| // parse a JSON object (comma-separated list of key-value pairs) |
| bool parseObject(std::istream& in, hDictInsert handler) { |
| if (!writer_.writeStartObject()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| trim(in); |
| |
| if (in.peek() == '}') { |
| skipChar(in); |
| // empty object |
| if (!writer_.writeEndObject()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| return true; |
| } |
| |
| while (in.good()) { |
| if (nextChar(in) != '"') { |
| err_ = JsonbErrType::E_INVALID_OBJ; |
| return false; |
| } |
| |
| if (!parseKVPair(in, handler)) { |
| return false; |
| } |
| |
| trim(in); |
| |
| char ch = nextChar(in); |
| if (ch == '}') { |
| // end of the object |
| if (!writer_.writeEndObject()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| return true; |
| } else if (ch != ',') { |
| err_ = JsonbErrType::E_INVALID_OBJ; |
| return false; |
| } |
| |
| trim(in); |
| } |
| |
| err_ = JsonbErrType::E_INVALID_OBJ; |
| return false; |
| } |
| |
| // parse a JSON array (comma-separated list of values) |
| bool parseArray(std::istream& in, hDictInsert handler) { |
| if (!writer_.writeStartArray()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| trim(in); |
| |
| if (in.peek() == ']') { |
| skipChar(in); |
| // empty array |
| if (!writer_.writeEndArray()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| return true; |
| } |
| |
| while (in.good()) { |
| if (!parseValue(in, handler)) { |
| return false; |
| } |
| |
| trim(in); |
| |
| char ch = nextChar(in); |
| if (ch == ']') { |
| // end of the array |
| if (!writer_.writeEndArray()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| return true; |
| } else if (ch != ',') { |
| err_ = JsonbErrType::E_INVALID_ARR; |
| return false; |
| } |
| |
| trim(in); |
| } |
| |
| err_ = JsonbErrType::E_INVALID_ARR; |
| return false; |
| } |
| |
| // parse a key-value pair, separated by ":" |
| bool parseKVPair(std::istream& in, hDictInsert handler) { |
| if (parseKey(in, handler) && parseValue(in, handler)) { |
| return true; |
| } |
| |
| return false; |
| } |
| |
| // parse a key (must be string) |
| bool parseKey(std::istream& in, hDictInsert handler) { |
| char key[JsonbKeyValue::sMaxKeyLen]; |
| int key_len = 0; |
| while (in.good() && in.peek() != '"' && key_len < JsonbKeyValue::sMaxKeyLen) { |
| char ch = nextChar(in); |
| if (ch == '\\') { |
| char escape_buffer[5]; // buffer for escape |
| int len; |
| if (!parseEscape(in, escape_buffer, len)) { |
| err_ = JsonbErrType::E_INVALID_KEY_STRING; |
| return false; |
| } |
| if (key_len + len >= JsonbKeyValue::sMaxKeyLen) { |
| err_ = JsonbErrType::E_INVALID_KEY_LENGTH; |
| return false; |
| } |
| memcpy(key + key_len, escape_buffer, len); |
| key_len += len; |
| } else { |
| key[key_len++] = ch; |
| } |
| } |
| // The JSON key can be an empty string. |
| if (!in.good() || in.peek() != '"') { |
| if (key_len == JsonbKeyValue::sMaxKeyLen) |
| err_ = JsonbErrType::E_INVALID_KEY_LENGTH; |
| else |
| err_ = JsonbErrType::E_INVALID_KEY_STRING; |
| return false; |
| } |
| |
| skipChar(in); // discard '"' |
| |
| int key_id = -1; |
| if (handler) { |
| key_id = handler(key, key_len); |
| } |
| |
| if (key_id < 0) { |
| writer_.writeKey(key, key_len); |
| } else { |
| writer_.writeKey(key_id); |
| } |
| |
| trim(in); |
| |
| if (nextChar(in) != ':') { |
| err_ = JsonbErrType::E_INVALID_OBJ; |
| return false; |
| } |
| |
| trim(in); |
| if (!in.good()) { |
| err_ = JsonbErrType::E_INVALID_OBJ; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // parse a value |
| bool parseValue(std::istream& in, hDictInsert handler) { |
| bool res = false; |
| |
| switch (in.peek()) { |
| case 'N': |
| case 'n': { |
| skipChar(in); |
| res = parseNull(in); |
| break; |
| } |
| case 'T': |
| case 't': { |
| skipChar(in); |
| res = parseTrue(in); |
| break; |
| } |
| case 'F': |
| case 'f': { |
| skipChar(in); |
| res = parseFalse(in); |
| break; |
| } |
| case '"': { |
| skipChar(in); |
| res = parseString(in); |
| break; |
| } |
| case '{': { |
| skipChar(in); |
| ++nesting_lvl_; |
| if (nesting_lvl_ >= MaxNestingLevel) { |
| err_ = JsonbErrType::E_NESTING_LVL_OVERFLOW; |
| return false; |
| } |
| res = parseObject(in, handler); |
| if (res) { |
| --nesting_lvl_; |
| } |
| break; |
| } |
| case '[': { |
| skipChar(in); |
| ++nesting_lvl_; |
| if (nesting_lvl_ >= MaxNestingLevel) { |
| err_ = JsonbErrType::E_NESTING_LVL_OVERFLOW; |
| return false; |
| } |
| res = parseArray(in, handler); |
| if (res) { |
| --nesting_lvl_; |
| } |
| break; |
| } |
| default: { |
| res = parseNumber(in); |
| break; |
| } |
| } |
| |
| return res; |
| } |
| |
| // parse NULL value |
| bool parseNull(std::istream& in) { |
| if (tolower(nextChar(in)) == 'u' && tolower(nextChar(in)) == 'l' && |
| tolower(nextChar(in)) == 'l') { |
| writer_.writeNull(); |
| return true; |
| } |
| |
| err_ = JsonbErrType::E_INVALID_SCALAR_VALUE; |
| return false; |
| } |
| |
| // parse TRUE value |
| bool parseTrue(std::istream& in) { |
| if (tolower(nextChar(in)) == 'r' && tolower(nextChar(in)) == 'u' && |
| tolower(nextChar(in)) == 'e') { |
| writer_.writeBool(true); |
| return true; |
| } |
| |
| err_ = JsonbErrType::E_INVALID_SCALAR_VALUE; |
| return false; |
| } |
| |
| // parse FALSE value |
| bool parseFalse(std::istream& in) { |
| if (tolower(nextChar(in)) == 'a' && tolower(nextChar(in)) == 'l' && |
| tolower(nextChar(in)) == 's' && tolower(nextChar(in)) == 'e') { |
| writer_.writeBool(false); |
| return true; |
| } |
| |
| err_ = JsonbErrType::E_INVALID_SCALAR_VALUE; |
| return false; |
| } |
| |
| /* |
| This is a helper function to parse the hex value. hex_num means the |
| number of digits needed to be parsed. If less than zero, then it will |
| consider all the characters between current and any character in JsonDelim. |
| */ |
| unsigned parseHexHelper(std::istream& in, uint64_t& val, unsigned hex_num = 17) { |
| // We can't read more than 17 digits, so when read 17 digits, it's overflow |
| val = 0; |
| unsigned num_digits = 0; |
| char ch = tolower(in.peek()); |
| while (in.good() && !strchr(kJsonDelim, ch) && num_digits != hex_num) { |
| if (ch >= '0' && ch <= '9') { |
| val = (val << 4) + (ch - '0'); |
| } else if (ch >= 'a' && ch <= 'f') { |
| val = (val << 4) + (ch - 'a' + 10); |
| } else { |
| // unrecognized hex digit |
| return 0; |
| } |
| skipChar(in); |
| ch = tolower(in.peek()); |
| ++num_digits; |
| } |
| return num_digits; |
| } |
| |
| // parse HEX value |
| bool parseHex4(std::istream& in, unsigned& h) { |
| uint64_t val; |
| if (4 == parseHexHelper(in, val, 4)) { |
| h = (unsigned)val; |
| return true; |
| } |
| return false; |
| } |
| |
| /* |
| parse Escape char. |
| */ |
| bool parseEscape(std::istream& in, char* out, int& len) { |
| /* |
| This is extracted from cJSON implementation. |
| This is about the mask of the first byte in UTF-8. |
| The mask is defined in: |
| http://en.wikipedia.org/wiki/UTF-8#Description |
| */ |
| const unsigned char firstByteMark[6] = {0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC}; |
| if (!in.good()) { |
| return false; |
| } |
| char c = nextChar(in); |
| len = 1; |
| switch (c) { |
| // \" \\ \/ \b \f \n \r \t |
| case '"': |
| *out = '"'; |
| return true; |
| case '\\': |
| *out = '\\'; |
| return true; |
| case '/': |
| *out = '/'; |
| return true; |
| case 'b': |
| *out = '\b'; |
| return true; |
| case 'f': |
| *out = '\f'; |
| return true; |
| case 'n': |
| *out = '\n'; |
| return true; |
| case 'r': |
| *out = '\r'; |
| return true; |
| case 't': |
| *out = '\t'; |
| return true; |
| case 'u': { |
| unsigned uc; |
| if (!parseHex4(in, uc)) { |
| return false; |
| } |
| /* |
| For DC00 to DFFF, it should be low surrogates for UTF16. |
| So if it display in the high bits, it's invalid. |
| */ |
| if (uc >= 0xDC00 && uc <= 0xDFFF) { |
| return false; |
| } |
| |
| /* |
| For D800 to DBFF, it's the high surrogates for UTF16. |
| So it's utf-16, there must be another one between 0xDC00 |
| and 0xDFFF. |
| */ |
| if (uc >= 0xD800 && uc <= 0xDBFF) { |
| unsigned uc2; |
| |
| if (!in.good()) { |
| return false; |
| } |
| c = nextChar(in); |
| if (c != '\\') { |
| return false; |
| } |
| |
| if (!in.good()) { |
| return false; |
| } |
| c = nextChar(in); |
| if (c != 'u') { |
| return false; |
| } |
| |
| if (!parseHex4(in, uc2)) { |
| return false; |
| } |
| /* |
| Now we need the low surrogates for UTF16. It should be |
| within 0xDC00 and 0xDFFF. |
| */ |
| if (uc2 < 0xDC00 || uc2 > 0xDFFF) return false; |
| /* |
| For the character that not in the Basic Multilingual Plan, |
| it's represented as twelve-character, encoding the UTF-16 |
| surrogate pair. |
| UTF16 is between 0x10000 and 0x10FFFF. The high surrogate |
| present the high bits and the low surrogate present the |
| lower 10 bits. |
| For detailed explanation, please refer to: |
| http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-404.pdf |
| Then it will be converted to UTF8. |
| */ |
| uc = 0x10000 + (((uc & 0x3FF) << 10) | (uc2 & 0x3FF)); |
| } |
| |
| /* |
| Get the length of the unicode. |
| Please refer to http://en.wikipedia.org/wiki/UTF-8#Description. |
| */ |
| if (uc < 0x80) |
| len = 1; |
| else if (uc < 0x800) |
| len = 2; |
| else if (uc < 0x10000) |
| len = 3; |
| else |
| len = 4; |
| out += len; |
| /* |
| Encode it. |
| Please refer to http://en.wikipedia.org/wiki/UTF-8#Description. |
| This part of code has a reference to cJSON. |
| */ |
| switch (len) { |
| case 4: |
| *--out = ((uc | 0x80) & 0xBF); |
| uc >>= 6; |
| [[fallthrough]]; |
| case 3: |
| *--out = ((uc | 0x80) & 0xBF); |
| uc >>= 6; |
| [[fallthrough]]; |
| case 2: |
| *--out = ((uc | 0x80) & 0xBF); |
| uc >>= 6; |
| [[fallthrough]]; |
| case 1: |
| // Mask the first byte according to the standard. |
| *--out = (uc | firstByteMark[len - 1]); |
| } |
| return true; |
| break; |
| } |
| default: |
| return false; |
| break; |
| } |
| } |
| |
| // parse a string |
| bool parseString(std::istream& in) { |
| const int BUFFER_LEN = 4096; |
| if (!writer_.writeStartString()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| // write 4KB at a time |
| char buffer[BUFFER_LEN]; |
| int nread = 0; |
| while (in.good()) { |
| char ch = nextChar(in); |
| if (ch == '"') { |
| // write all remaining bytes in the buffer |
| if (nread > 0) { |
| if (!writer_.writeString(buffer, nread)) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| } |
| // end writing string |
| if (!writer_.writeEndString()) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| return true; |
| } else if (ch == '\\') { |
| // this is a escape char |
| char escape_buffer[5]; // buffer for escape |
| int len; |
| if (!parseEscape(in, escape_buffer, len)) { |
| err_ = JsonbErrType::E_INVALID_STR; |
| return false; |
| } |
| |
| // Write each char to the buffer |
| for (int i = 0; i != len; ++i) { |
| buffer[nread++] = escape_buffer[i]; |
| if (nread == BUFFER_LEN) { |
| if (!writer_.writeString(buffer, nread)) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| nread = 0; |
| } |
| } |
| } else { |
| // just a char |
| buffer[nread++] = ch; |
| if (nread == BUFFER_LEN) { |
| // flush buffer |
| if (!writer_.writeString(buffer, nread)) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| nread = 0; |
| } |
| } |
| } |
| |
| err_ = JsonbErrType::E_INVALID_STR; |
| return false; |
| } |
| |
| // parse a number |
| // Number format can be hex, octal, or decimal (including float). |
| // Only decimal can have (+/-) sign prefix. |
| bool parseNumber(std::istream& in) { |
| bool ret = false; |
| switch (in.peek()) { |
| case '0': { |
| skipChar(in); |
| |
| if (in.peek() == 'x' || in.peek() == 'X') { |
| skipChar(in); |
| ret = parseHex(in); |
| } else if (in.peek() == '.') { |
| skipChar(in); // remove '.' |
| num_buf_[0] = '.'; |
| ret = parseDouble(in, num_buf_ + 1); |
| } else { |
| ret = parseOctal(in); |
| } |
| |
| break; |
| } |
| case '-': { |
| skipChar(in); |
| ret = parseDecimal(in, true); |
| break; |
| } |
| case '+': |
| skipChar(in); |
| // fall through |
| default: |
| ret = parseDecimal(in); |
| break; |
| } |
| |
| return ret; |
| } |
| |
| // parse a number in hex format |
| bool parseHex(std::istream& in) { |
| uint64_t val = 0; |
| int num_digits; |
| if (0 == (num_digits = parseHexHelper(in, val))) { |
| err_ = JsonbErrType::E_INVALID_HEX; |
| return false; |
| } |
| |
| int size = 0; |
| if (num_digits <= 2) { |
| size = writer_.writeInt8((int8_t)val); |
| } else if (num_digits <= 4) { |
| size = writer_.writeInt16((int16_t)val); |
| } else if (num_digits <= 8) { |
| size = writer_.writeInt32((int32_t)val); |
| } else if (num_digits <= 16) { |
| size = writer_.writeInt64(val); |
| } else { |
| err_ = JsonbErrType::E_HEX_OVERFLOW; |
| return false; |
| } |
| |
| if (size == 0) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // parse a number in octal format |
| bool parseOctal(std::istream& in) { |
| int64_t val = 0; |
| char ch = in.peek(); |
| while (in.good() && !strchr(kJsonDelim, ch)) { |
| if (ch >= '0' && ch <= '7') { |
| val = val * 8 + (ch - '0'); |
| } else { |
| err_ = JsonbErrType::E_INVALID_OCTAL; |
| return false; |
| } |
| |
| // check if the number overflows |
| if (val < 0) { |
| err_ = JsonbErrType::E_OCTAL_OVERFLOW; |
| return false; |
| } |
| |
| skipChar(in); |
| ch = in.peek(); |
| } |
| |
| int size = 0; |
| if (val <= std::numeric_limits<int8_t>::max()) { |
| size = writer_.writeInt8((int8_t)val); |
| } else if (val <= std::numeric_limits<int16_t>::max()) { |
| size = writer_.writeInt16((int16_t)val); |
| } else if (val <= std::numeric_limits<int32_t>::max()) { |
| size = writer_.writeInt32((int32_t)val); |
| } else { // val <= INT64_MAX |
| size = writer_.writeInt64(val); |
| } |
| |
| if (size == 0) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // parse a number in decimal (including float) |
| bool parseDecimal(std::istream& in, bool neg = false) { |
| char ch = 0; |
| while (in.good() && (ch = in.peek()) == '0') skipChar(in); |
| |
| char* pbuf = num_buf_; |
| if (neg) *(pbuf++) = '-'; |
| |
| char* save_pos = pbuf; |
| while (in.good() && !strchr(kJsonDelim, ch)) { |
| *(pbuf++) = ch; |
| if (pbuf == end_buf_) { |
| err_ = JsonbErrType::E_DECIMAL_OVERFLOW; |
| return false; |
| } |
| |
| if (ch == '.') { |
| skipChar(in); // remove '.' |
| return parseDouble(in, pbuf); |
| } else if (ch == 'E' || ch == 'e') { |
| skipChar(in); // remove 'E' |
| return parseExponent(in, pbuf); |
| } else if (ch < '0' || ch > '9') { |
| err_ = JsonbErrType::E_INVALID_DECIMAL; |
| return false; |
| } |
| |
| skipChar(in); |
| ch = in.peek(); |
| } |
| if (save_pos == pbuf) { |
| err_ = JsonbErrType::E_INVALID_DECIMAL; // empty input |
| return false; |
| } |
| |
| *pbuf = 0; // set null-terminator |
| StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; |
| int128_t val = |
| StringParser::string_to_int<int128_t>(num_buf_, pbuf - num_buf_, &parse_result); |
| if (parse_result != StringParser::PARSE_SUCCESS) { |
| VLOG_ROW << "debug string_to_int error for " << num_buf_ << " val=" << val |
| << " parse_result=" << parse_result; |
| err_ = JsonbErrType::E_DECIMAL_OVERFLOW; |
| return false; |
| } |
| |
| int size = 0; |
| if (val >= std::numeric_limits<int8_t>::min() && |
| val <= std::numeric_limits<int8_t>::max()) { |
| size = writer_.writeInt8((int8_t)val); |
| } else if (val >= std::numeric_limits<int16_t>::min() && |
| val <= std::numeric_limits<int16_t>::max()) { |
| size = writer_.writeInt16((int16_t)val); |
| } else if (val >= std::numeric_limits<int32_t>::min() && |
| val <= std::numeric_limits<int32_t>::max()) { |
| size = writer_.writeInt32((int32_t)val); |
| } else if (val >= std::numeric_limits<int64_t>::min() && |
| val <= std::numeric_limits<int64_t>::max()) { |
| size = writer_.writeInt64((int64_t)val); |
| } else { // INT128 |
| size = writer_.writeInt128(val); |
| } |
| |
| if (size == 0) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| // parse IEEE745 double precision |
| bool parseDouble(std::istream& in, char* pbuf) { |
| char* save_pos = pbuf; |
| char ch = in.peek(); |
| while (in.good() && !strchr(kJsonDelim, ch)) { |
| *(pbuf++) = ch; |
| if (pbuf == end_buf_) { |
| err_ = JsonbErrType::E_DOUBLE_OVERFLOW; |
| return false; |
| } |
| |
| if (ch == 'e' || ch == 'E') { |
| skipChar(in); // remove 'E' |
| return parseExponent(in, pbuf); |
| } else if (ch < '0' || ch > '9') { |
| err_ = JsonbErrType::E_INVALID_DECIMAL; |
| return false; |
| } |
| |
| skipChar(in); |
| ch = in.peek(); |
| } |
| if (save_pos == pbuf) { |
| err_ = JsonbErrType::E_INVALID_DECIMAL; // empty input |
| return false; |
| } |
| |
| *pbuf = 0; // set null-terminator |
| return internConvertBufferToDouble(num_buf_, pbuf - num_buf_); |
| } |
| |
| // parse the exponent part of a double number |
| bool parseExponent(std::istream& in, char* pbuf) { |
| char ch = in.peek(); |
| if (in.good()) { |
| if (ch == '+' || ch == '-') { |
| *(pbuf++) = ch; |
| if (pbuf == end_buf_) { |
| err_ = JsonbErrType::E_DOUBLE_OVERFLOW; |
| return false; |
| } |
| skipChar(in); |
| ch = in.peek(); |
| } |
| } |
| |
| char* save_pos = pbuf; |
| while (in.good() && !strchr(kJsonDelim, ch)) { |
| *(pbuf++) = ch; |
| if (pbuf == end_buf_) { |
| err_ = JsonbErrType::E_DOUBLE_OVERFLOW; |
| return false; |
| } |
| |
| if (ch < '0' || ch > '9') { |
| err_ = JsonbErrType::E_INVALID_EXPONENT; |
| return false; |
| } |
| |
| skipChar(in); |
| ch = in.peek(); |
| } |
| if (save_pos == pbuf) { |
| err_ = JsonbErrType::E_INVALID_EXPONENT; // empty input |
| return false; |
| } |
| |
| *pbuf = 0; // set null-terminator |
| return internConvertBufferToDouble(num_buf_, pbuf - num_buf_); |
| } |
| |
| // call system function to parse double to string |
| bool internConvertBufferToDouble(char* num_buf_, int len) { |
| StringParser::ParseResult parse_result = StringParser::PARSE_SUCCESS; |
| double val = StringParser::string_to_float<double>(num_buf_, len, &parse_result); |
| if (parse_result != StringParser::PARSE_SUCCESS) { |
| VLOG_ROW << "debug string_to_float error for " << num_buf_ << " val=" << val |
| << " parse_result=" << parse_result; |
| err_ = JsonbErrType::E_DECIMAL_OVERFLOW; |
| return false; |
| } |
| |
| if (writer_.writeDouble(val) == 0) { |
| err_ = JsonbErrType::E_OUTPUT_FAIL; |
| return false; |
| } |
| |
| return true; |
| } |
| |
| void trim(std::istream& in) { |
| while (in.good() && strchr(kWhiteSpace, in.peek())) { |
| skipChar(in); |
| } |
| } |
| |
| /* |
| * Helper functions to keep track of characters read. |
| * Do not rely on std::istream's tellg() which may not be implemented. |
| */ |
| |
| char nextChar(std::istream& in) { |
| ++stream_pos_; |
| return in.get(); |
| } |
| |
| void skipChar(std::istream& in) { |
| ++stream_pos_; |
| in.ignore(); |
| } |
| |
| private: |
| JsonbWriterT<OS_TYPE> writer_; |
| uint32_t stream_pos_; |
| JsonbErrType err_; |
| char num_buf_[512]; // buffer to hold number string |
| const char* end_buf_ = num_buf_ + sizeof(num_buf_) - 1; |
| uint32_t nesting_lvl_ = 0; |
| }; |
| |
| typedef JsonbParserT<JsonbOutStream> JsonbParser; |
| |
| } // namespace doris |
| |
| #endif // JSONB_JSONBJSONPARSER_H |