| /* |
| * Copyright (c) 2014, Facebook, Inc. |
| * All rights reserved. |
| * |
| * This source code is licensed under the BSD-style license found in the |
| * LICENSE file in the root directory of this source tree. An additional grant |
| * of patent rights can be found in the PATENTS file in the same directory. |
| * |
| */ |
| |
| /* |
| * This header defines JsonbDocument, JsonbKeyValue, and various value classes |
| * which are derived from JsonbValue, and a forward iterator for container |
| * values - essentially everything that is related to JSONB binary data |
| * structures. |
| * |
| * Implementation notes: |
| * |
| * None of the classes in this header file can be instantiated directly (i.e. |
| * you cannot create a JsonbKeyValue or JsonbValue object - all constructors |
| * are declared non-public). We use the classes as wrappers on the packed JSONB |
| * bytes (serialized), and cast the classes (types) to the underlying packed |
| * byte array. |
| * |
| * For the same reason, we cannot define any JSONB value class to be virtual, |
| * since we never call constructors, and will not instantiate vtbl and vptrs. |
| * |
| * Therefore, the classes are defined as packed structures (i.e. no data |
| * alignment and padding), and the private member variables of the classes are |
| * defined precisely in the same order as the JSONB spec. This ensures we |
| * access the packed JSONB bytes correctly. |
| * |
| * The packed structures are highly optimized for in-place operations with low |
| * overhead. The reads (and in-place writes) are performed directly on packed |
| * bytes. There is no memory allocation at all at runtime. |
| * |
| * For updates/writes of values that will expand the original JSONB size, the |
| * write will fail, and the caller needs to handle buffer increase. |
| * |
| * ** Iterator ** |
| * Both ObjectVal class and ArrayVal class have iterator type that you can use |
| * to declare an iterator on a container object to go through the key-value |
| * pairs or value list. The iterator has both non-const and const types. |
| * |
| * Note: iterators are forward direction only. |
| * |
| * ** Query ** |
| * Querying into containers is through the member functions find (for key/value |
| * pairs) and get (for array elements), and is in streaming style. We don't |
| * need to read/scan the whole JSONB packed bytes in order to return results. |
| * Once the key/index is found, we will stop search. You can use text to query |
| * both objects and array (for array, text will be converted to integer index), |
| * and use index to retrieve from array. Array index is 0-based. |
| * |
| * ** External dictionary ** |
| * During query processing, you can also pass a call-back function, so the |
| * search will first try to check if the key string exists in the dictionary. |
| * If so, search will be based on the id instead of the key string. |
| * @author Tian Xia <tianx@fb.com> |
| * |
| * this file is copied from |
| * https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonDocument.h |
| * and modified by Doris |
| */ |
| |
| #ifndef JSONB_JSONBDOCUMENT_H |
| #define JSONB_JSONBDOCUMENT_H |
| |
| #include <algorithm> |
| #include <cctype> |
| #include <charconv> |
| #include <cstddef> |
| #include <cstdint> |
| #include <string> |
| #include <string_view> |
| #include <type_traits> |
| |
| #include "common/compiler_util.h" // IWYU pragma: keep |
| #include "common/status.h" |
| #include "runtime/define_primitive_type.h" |
| #include "util/string_util.h" |
| #include "vec/common/string_ref.h" |
| #include "vec/core/types.h" |
| |
| // #include "util/string_parser.hpp" |
| |
| // Concept to check for supported decimal types |
| template <typename T> |
| concept JsonbDecimalType = std::same_as<T, doris::vectorized::Decimal256> || |
| std::same_as<T, doris::vectorized::Decimal64> || |
| std::same_as<T, doris::vectorized::Decimal128V3> || |
| std::same_as<T, doris::vectorized::Decimal32>; |
| |
| namespace doris { |
| |
| template <typename T> |
| constexpr bool is_pod_v = std::is_trivial_v<T> && std::is_standard_layout_v<T>; |
| |
| struct JsonbStringVal; |
| struct ObjectVal; |
| struct ArrayVal; |
| struct JsonbBinaryVal; |
| struct ContainerVal; |
| |
| template <JsonbDecimalType T> |
| struct JsonbDecimalVal; |
| |
| using JsonbDecimal256 = JsonbDecimalVal<vectorized::Decimal256>; |
| using JsonbDecimal128 = JsonbDecimalVal<vectorized::Decimal128V3>; |
| using JsonbDecimal64 = JsonbDecimalVal<vectorized::Decimal64>; |
| using JsonbDecimal32 = JsonbDecimalVal<vectorized::Decimal32>; |
| |
| template <typename T> |
| requires std::is_integral_v<T> || std::is_floating_point_v<T> |
| struct NumberValT; |
| |
| using JsonbInt8Val = NumberValT<int8_t>; |
| using JsonbInt16Val = NumberValT<int16_t>; |
| using JsonbInt32Val = NumberValT<int32_t>; |
| using JsonbInt64Val = NumberValT<int64_t>; |
| using JsonbInt128Val = NumberValT<int128_t>; |
| using JsonbDoubleVal = NumberValT<double>; |
| using JsonbFloatVal = NumberValT<float>; |
| |
| template <typename T> |
| concept JsonbPodType = (std::same_as<T, JsonbStringVal> || std::same_as<T, ObjectVal> || |
| std::same_as<T, ContainerVal> || std::same_as<T, ArrayVal> || |
| std::same_as<T, JsonbBinaryVal> || std::same_as<T, JsonbDecimal32> || |
| std::same_as<T, JsonbDecimal64> || std::same_as<T, JsonbDecimal128> || |
| std::same_as<T, JsonbDecimal256> || std::same_as<T, JsonbDecimal32> || |
| std::same_as<T, JsonbInt8Val> || std::same_as<T, JsonbInt16Val> || |
| std::same_as<T, JsonbInt32Val> || std::same_as<T, JsonbInt64Val> || |
| std::same_as<T, JsonbInt128Val> || std::same_as<T, JsonbFloatVal> || |
| std::same_as<T, JsonbFloatVal> || std::same_as<T, JsonbDoubleVal>); |
| |
| #define JSONB_VER 1 |
| |
| using int128_t = __int128; |
| |
| // forward declaration |
| struct JsonbValue; |
| |
| class JsonbOutStream; |
| |
| template <class OS_TYPE> |
| class JsonbWriterT; |
| |
| using JsonbWriter = JsonbWriterT<JsonbOutStream>; |
| |
| const int MaxNestingLevel = 100; |
| |
| /* |
| * JsonbType defines 10 primitive types and 2 container types, as described |
| * below. |
| * NOTE: Do NOT modify the existing values or their order in this enum. |
| * You may only append new entries at the end before `NUM_TYPES`. |
| * This enum will be used in serialized data and/or persisted data. |
| * Changing existing values may break backward compatibility |
| * with previously stored or transmitted data. |
| * |
| * primitive_value ::= |
| * 0x00 //null value (0 byte) |
| * | 0x01 //boolean true (0 byte) |
| * | 0x02 //boolean false (0 byte) |
| * | 0x03 int8 //char/int8 (1 byte) |
| * | 0x04 int16 //int16 (2 bytes) |
| * | 0x05 int32 //int32 (4 bytes) |
| * | 0x06 int64 //int64 (8 bytes) |
| * | 0x07 double //floating point (8 bytes) |
| * | 0x08 string //variable length string |
| * | 0x09 binary //variable length binary |
| * |
| * container ::= |
| * 0x0A int32 key_value_list //object, int32 is the total bytes of the object |
| * | 0x0B int32 value_list //array, int32 is the total bytes of the array |
| */ |
| enum class JsonbType : char { |
| T_Null = 0x00, |
| T_True = 0x01, |
| T_False = 0x02, |
| T_Int8 = 0x03, |
| T_Int16 = 0x04, |
| T_Int32 = 0x05, |
| T_Int64 = 0x06, |
| T_Double = 0x07, |
| T_String = 0x08, |
| T_Binary = 0x09, |
| T_Object = 0x0A, |
| T_Array = 0x0B, |
| T_Int128 = 0x0C, |
| T_Float = 0x0D, |
| T_Decimal32 = 0x0E, // DecimalV3 only |
| T_Decimal64 = 0x0F, // DecimalV3 only |
| T_Decimal128 = 0x10, // DecimalV3 only |
| T_Decimal256 = 0x11, // DecimalV3 only |
| NUM_TYPES, |
| }; |
| |
| inline PrimitiveType get_primitive_type_from_json_type(JsonbType json_type) { |
| switch (json_type) { |
| case JsonbType::T_Null: |
| return TYPE_NULL; |
| case JsonbType::T_True: |
| case JsonbType::T_False: |
| return TYPE_BOOLEAN; |
| case JsonbType::T_Int8: |
| return TYPE_TINYINT; |
| case JsonbType::T_Int16: |
| return TYPE_SMALLINT; |
| case JsonbType::T_Int32: |
| return TYPE_INT; |
| case JsonbType::T_Int64: |
| return TYPE_BIGINT; |
| case JsonbType::T_Double: |
| return TYPE_DOUBLE; |
| case JsonbType::T_String: |
| return TYPE_STRING; |
| case JsonbType::T_Binary: |
| return TYPE_BINARY; |
| case JsonbType::T_Object: |
| return TYPE_STRUCT; |
| case JsonbType::T_Array: |
| return TYPE_ARRAY; |
| case JsonbType::T_Int128: |
| return TYPE_LARGEINT; |
| case JsonbType::T_Float: |
| return TYPE_FLOAT; |
| case JsonbType::T_Decimal32: |
| return TYPE_DECIMAL32; |
| case JsonbType::T_Decimal64: |
| return TYPE_DECIMAL64; |
| case JsonbType::T_Decimal128: |
| return TYPE_DECIMAL128I; |
| case JsonbType::T_Decimal256: |
| return TYPE_DECIMAL256; |
| default: |
| throw Exception(ErrorCode::INTERNAL_ERROR, "Unsupported JsonbType: {}", |
| static_cast<int>(json_type)); |
| } |
| } |
| |
| //for parse json path |
| constexpr char SCOPE = '$'; |
| constexpr char BEGIN_MEMBER = '.'; |
| constexpr char BEGIN_ARRAY = '['; |
| constexpr char END_ARRAY = ']'; |
| constexpr char DOUBLE_QUOTE = '"'; |
| constexpr char WILDCARD = '*'; |
| constexpr char MINUS = '-'; |
| constexpr char LAST[] = "last"; |
| constexpr char ESCAPE = '\\'; |
| constexpr unsigned int MEMBER_CODE = 0; |
| constexpr unsigned int ARRAY_CODE = 1; |
| |
| /// A simple input stream class for the JSON path parser. |
| class Stream { |
| public: |
| /// Creates an input stream reading from a character string. |
| /// @param string the input string |
| /// @param length the length of the input string |
| Stream(const char* string, size_t length) : m_position(string), m_end(string + length) {} |
| |
| /// Returns a pointer to the current position in the stream. |
| const char* position() const { return m_position; } |
| |
| /// Returns a pointer to the position just after the end of the stream. |
| const char* end() const { return m_end; } |
| |
| /// Returns the number of bytes remaining in the stream. |
| size_t remaining() const { |
| assert(m_position <= m_end); |
| return m_end - m_position; |
| } |
| |
| /// Tells if the stream has been exhausted. |
| bool exhausted() const { return remaining() == 0; } |
| |
| /// Reads the next byte from the stream and moves the position forward. |
| char read() { |
| assert(!exhausted()); |
| return *m_position++; |
| } |
| |
| /// Reads the next byte from the stream without moving the position forward. |
| char peek() const { |
| assert(!exhausted()); |
| return *m_position; |
| } |
| |
| /// Moves the position to the next non-whitespace character. |
| void skip_whitespace() { |
| m_position = std::find_if_not(m_position, m_end, [](char c) { return std::isspace(c); }); |
| } |
| |
| /// Moves the position n bytes forward. |
| void skip(size_t n) { |
| assert(remaining() >= n); |
| m_position += n; |
| skip_whitespace(); |
| } |
| |
| void advance() { m_position++; } |
| |
| void clear_leg_ptr() { leg_ptr = nullptr; } |
| |
| void set_leg_ptr(char* ptr) { |
| clear_leg_ptr(); |
| leg_ptr = ptr; |
| } |
| |
| char* get_leg_ptr() { return leg_ptr; } |
| |
| void clear_leg_len() { leg_len = 0; } |
| |
| void add_leg_len() { leg_len++; } |
| |
| unsigned int get_leg_len() const { return leg_len; } |
| |
| void remove_escapes() { |
| int new_len = 0; |
| for (int i = 0; i < leg_len; i++) { |
| if (leg_ptr[i] != '\\') { |
| leg_ptr[new_len++] = leg_ptr[i]; |
| } |
| } |
| leg_ptr[new_len] = '\0'; |
| leg_len = new_len; |
| } |
| |
| void set_has_escapes(bool has) { has_escapes = has; } |
| |
| bool get_has_escapes() const { return has_escapes; } |
| |
| private: |
| /// The current position in the stream. |
| const char* m_position = nullptr; |
| |
| /// The end of the stream. |
| const char* const m_end; |
| |
| ///path leg ptr |
| char* leg_ptr = nullptr; |
| |
| ///path leg len |
| unsigned int leg_len; |
| |
| ///Whether to contain escape characters |
| bool has_escapes = false; |
| }; |
| |
| struct leg_info { |
| ///path leg ptr |
| char* leg_ptr = nullptr; |
| |
| ///path leg len |
| unsigned int leg_len; |
| |
| ///array_index |
| int array_index; |
| |
| ///type: 0 is member 1 is array |
| unsigned int type; |
| |
| bool to_string(std::string* str) const { |
| if (type == MEMBER_CODE) { |
| str->push_back(BEGIN_MEMBER); |
| bool contains_space = false; |
| std::string tmp; |
| for (auto* it = leg_ptr; it != (leg_ptr + leg_len); ++it) { |
| if (std::isspace(*it)) { |
| contains_space = true; |
| } else if (*it == '"' || *it == ESCAPE || *it == '\r' || *it == '\n' || |
| *it == '\b' || *it == '\t') { |
| tmp.push_back(ESCAPE); |
| } |
| tmp.push_back(*it); |
| } |
| if (contains_space) { |
| str->push_back(DOUBLE_QUOTE); |
| } |
| str->append(tmp); |
| if (contains_space) { |
| str->push_back(DOUBLE_QUOTE); |
| } |
| return true; |
| } else if (type == ARRAY_CODE) { |
| str->push_back(BEGIN_ARRAY); |
| std::string int_str = std::to_string(array_index); |
| str->append(int_str); |
| str->push_back(END_ARRAY); |
| return true; |
| } else { |
| return false; |
| } |
| } |
| }; |
| |
| class JsonbPath { |
| public: |
| // parse json path |
| static bool parsePath(Stream* stream, JsonbPath* path); |
| |
| static bool parse_array(Stream* stream, JsonbPath* path); |
| static bool parse_member(Stream* stream, JsonbPath* path); |
| |
| //return true if json path valid else return false |
| bool seek(const char* string, size_t length); |
| |
| void add_leg_to_leg_vector(std::unique_ptr<leg_info> leg) { |
| leg_vector.emplace_back(leg.release()); |
| } |
| |
| void pop_leg_from_leg_vector() { leg_vector.pop_back(); } |
| |
| bool to_string(std::string* res) const { |
| res->push_back(SCOPE); |
| for (const auto& leg : leg_vector) { |
| auto valid = leg->to_string(res); |
| if (!valid) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| size_t get_leg_vector_size() const { return leg_vector.size(); } |
| |
| leg_info* get_leg_from_leg_vector(size_t i) const { return leg_vector[i].get(); } |
| |
| bool is_wildcard() const { return _is_wildcard; } |
| bool is_supper_wildcard() const { return _is_supper_wildcard; } |
| |
| void clean() { leg_vector.clear(); } |
| |
| private: |
| std::vector<std::unique_ptr<leg_info>> leg_vector; |
| bool _is_wildcard = false; // whether the path is a wildcard path |
| bool _is_supper_wildcard = false; // supper wildcard likes '$**.a' or '$**[1]' |
| }; |
| |
| /* |
| * JsonbFwdIteratorT implements JSONB's iterator template. |
| * |
| * Note: it is an FORWARD iterator only due to the design of JSONB format. |
| */ |
| template <class Iter_Type, class Cont_Type> |
| class JsonbFwdIteratorT { |
| public: |
| using iterator = Iter_Type; |
| using pointer = typename std::iterator_traits<Iter_Type>::pointer; |
| using reference = typename std::iterator_traits<Iter_Type>::reference; |
| |
| explicit JsonbFwdIteratorT() : current_(nullptr) {} |
| explicit JsonbFwdIteratorT(const iterator& i) : current_(i) {} |
| |
| // allow non-const to const iterator conversion (same container type) |
| template <class Iter_Ty> |
| JsonbFwdIteratorT(const JsonbFwdIteratorT<Iter_Ty, Cont_Type>& rhs) : current_(rhs.base()) {} |
| |
| bool operator==(const JsonbFwdIteratorT& rhs) const { return (current_ == rhs.current_); } |
| |
| bool operator!=(const JsonbFwdIteratorT& rhs) const { return !operator==(rhs); } |
| |
| bool operator<(const JsonbFwdIteratorT& rhs) const { return (current_ < rhs.current_); } |
| |
| bool operator>(const JsonbFwdIteratorT& rhs) const { return !operator<(rhs); } |
| |
| JsonbFwdIteratorT& operator++() { |
| current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
| return *this; |
| } |
| |
| JsonbFwdIteratorT operator++(int) { |
| auto tmp = *this; |
| current_ = (iterator)(((char*)current_) + current_->numPackedBytes()); |
| return tmp; |
| } |
| |
| explicit operator pointer() { return current_; } |
| |
| reference operator*() const { return *current_; } |
| |
| pointer operator->() const { return current_; } |
| |
| iterator base() const { return current_; } |
| |
| private: |
| iterator current_; |
| }; |
| using JsonbTypeUnder = std::underlying_type_t<JsonbType>; |
| |
| #if defined(__clang__) |
| #pragma clang diagnostic push |
| #pragma clang diagnostic ignored "-Wzero-length-array" |
| #endif |
| #pragma pack(push, 1) |
| |
| /* |
| * JsonbDocument is the main object that accesses and queries JSONB packed |
| * bytes. NOTE: JsonbDocument only allows object container as the top level |
| * JSONB value. However, you can use the static method "createValue" to get any |
| * JsonbValue object from the packed bytes. |
| * |
| * JsonbDocument object also dereferences to an object container value |
| * (ObjectVal) once JSONB is loaded. |
| * |
| * ** Load ** |
| * JsonbDocument is usable after loading packed bytes (memory location) into |
| * the object. We only need the header and first few bytes of the payload after |
| * header to verify the JSONB. |
| * |
| * Note: creating an JsonbDocument (through createDocument) does not allocate |
| * any memory. The document object is an efficient wrapper on the packed bytes |
| * which is accessed directly. |
| * |
| * ** Query ** |
| * Query is through dereferencing into ObjectVal. |
| */ |
| class JsonbDocument { |
| public: |
| // create an JsonbDocument object from JSONB packed bytes |
| [[nodiscard]] static Status checkAndCreateDocument(const char* pb, size_t size, |
| JsonbDocument** doc); |
| |
| // create an JsonbValue from JSONB packed bytes |
| static JsonbValue* createValue(const char* pb, size_t size); |
| |
| uint8_t version() const { return header_.ver_; } |
| |
| JsonbValue* getValue() { return ((JsonbValue*)payload_); } |
| |
| void setValue(const JsonbValue* value); |
| |
| unsigned int numPackedBytes() const; |
| |
| // ObjectVal* operator->(); |
| |
| const ObjectVal* operator->() const; |
| |
| private: |
| /* |
| * JsonbHeader class defines JSONB header (internal to JsonbDocument). |
| * |
| * Currently it only contains version information (1-byte). We may expand the |
| * header to include checksum of the JSONB binary for more security. |
| */ |
| struct JsonbHeader { |
| uint8_t ver_; |
| } header_; |
| |
| char payload_[0]; |
| }; |
| |
| /* |
| * JsonbKeyValue class defines JSONB key type, as described below. |
| * |
| * key ::= |
| * 0x00 int8 //1-byte dictionary id |
| * | int8 (byte*) //int8 (>0) is the size of the key string |
| * |
| * value ::= primitive_value | container |
| * |
| * JsonbKeyValue can be either an id mapping to the key string in an external |
| * dictionary, or it is the original key string. Whether to read an id or a |
| * string is decided by the first byte (size). |
| * |
| * Note: a key object must be followed by a value object. Therefore, a key |
| * object implicitly refers to a key-value pair, and you can get the value |
| * object right after the key object. The function numPackedBytes hence |
| * indicates the total size of the key-value pair, so that we will be able go |
| * to next pair from the key. |
| * |
| * ** Dictionary size ** |
| * By default, the dictionary size is 255 (1-byte). Users can define |
| * "USE_LARGE_DICT" to increase the dictionary size to 655535 (2-byte). |
| */ |
| class JsonbKeyValue { |
| public: |
| // now we use sMaxKeyId to represent an empty key |
| static const int sMaxKeyId = 65535; |
| using keyid_type = uint16_t; |
| |
| static const uint8_t sMaxKeyLen = 64; |
| |
| // size of the key. 0 indicates it is stored as id |
| uint8_t klen() const { return size; } |
| |
| // get the key string. Note the string may not be null terminated. |
| const char* getKeyStr() const { return key.str_; } |
| |
| keyid_type getKeyId() const { return key.id_; } |
| |
| unsigned int keyPackedBytes() const { |
| return size ? (sizeof(size) + size) : (sizeof(size) + sizeof(keyid_type)); |
| } |
| |
| JsonbValue* value() const { return (JsonbValue*)(((char*)this) + keyPackedBytes()); } |
| |
| // size of the total packed bytes (key+value) |
| unsigned int numPackedBytes() const; |
| |
| uint8_t size; |
| |
| union key_ { |
| keyid_type id_; |
| char str_[1]; |
| } key; |
| }; |
| |
| struct JsonbFindResult { |
| const JsonbValue* value = nullptr; // found value |
| std::unique_ptr<JsonbWriter> writer; // writer to write the value |
| bool is_wildcard = false; // whether the path is a wildcard path |
| }; |
| |
| /* |
| * JsonbValue is the base class of all JSONB types. It contains only one member |
| * variable - type info, which can be retrieved by member functions is[Type]() |
| * or type(). |
| */ |
| struct JsonbValue { |
| static const uint32_t sMaxValueLen = 1 << 24; // 16M |
| |
| bool isNull() const { return (type == JsonbType::T_Null); } |
| bool isTrue() const { return (type == JsonbType::T_True); } |
| bool isFalse() const { return (type == JsonbType::T_False); } |
| bool isInt() const { return isInt8() || isInt16() || isInt32() || isInt64() || isInt128(); } |
| bool isInt8() const { return (type == JsonbType::T_Int8); } |
| bool isInt16() const { return (type == JsonbType::T_Int16); } |
| bool isInt32() const { return (type == JsonbType::T_Int32); } |
| bool isInt64() const { return (type == JsonbType::T_Int64); } |
| bool isDouble() const { return (type == JsonbType::T_Double); } |
| bool isFloat() const { return (type == JsonbType::T_Float); } |
| bool isString() const { return (type == JsonbType::T_String); } |
| bool isBinary() const { return (type == JsonbType::T_Binary); } |
| bool isObject() const { return (type == JsonbType::T_Object); } |
| bool isArray() const { return (type == JsonbType::T_Array); } |
| bool isInt128() const { return (type == JsonbType::T_Int128); } |
| bool isDecimal() const { |
| return (type == JsonbType::T_Decimal32 || type == JsonbType::T_Decimal64 || |
| type == JsonbType::T_Decimal128 || type == JsonbType::T_Decimal256); |
| } |
| bool isDecimal32() const { return (type == JsonbType::T_Decimal32); } |
| bool isDecimal64() const { return (type == JsonbType::T_Decimal64); } |
| bool isDecimal128() const { return (type == JsonbType::T_Decimal128); } |
| bool isDecimal256() const { return (type == JsonbType::T_Decimal256); } |
| |
| PrimitiveType get_primitive_type() const { return get_primitive_type_from_json_type(type); } |
| |
| const char* typeName() const { |
| switch (type) { |
| case JsonbType::T_Null: |
| return "null"; |
| case JsonbType::T_True: |
| case JsonbType::T_False: |
| return "bool"; |
| case JsonbType::T_Int8: |
| case JsonbType::T_Int16: |
| case JsonbType::T_Int32: |
| return "int"; |
| case JsonbType::T_Int64: |
| return "bigint"; |
| case JsonbType::T_Int128: |
| return "largeint"; |
| case JsonbType::T_Double: |
| return "double"; |
| case JsonbType::T_Float: |
| return "float"; |
| case JsonbType::T_String: |
| return "string"; |
| case JsonbType::T_Binary: |
| return "binary"; |
| case JsonbType::T_Object: |
| return "object"; |
| case JsonbType::T_Array: |
| return "array"; |
| case JsonbType::T_Decimal32: |
| return "Decimal32"; |
| case JsonbType::T_Decimal64: |
| return "Decimal64"; |
| case JsonbType::T_Decimal128: |
| return "Decimal128"; |
| case JsonbType::T_Decimal256: |
| return "Decimal256"; |
| default: |
| return "unknown"; |
| } |
| } |
| |
| // size of the total packed bytes |
| unsigned int numPackedBytes() const; |
| |
| // size of the value in bytes |
| unsigned int size() const; |
| |
| //Get the number of jsonbvalue elements |
| int numElements() const; |
| |
| //Whether to include the jsonbvalue rhs |
| bool contains(JsonbValue* rhs) const; |
| |
| // find the JSONB value by JsonbPath |
| JsonbFindResult findValue(JsonbPath& path) const; |
| friend class JsonbDocument; |
| |
| JsonbType type; // type info |
| |
| char payload[0]; // payload, which is the packed bytes of the value |
| |
| /** |
| * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
| * |
| * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
| * This ensures that `T` is trivially copyable, standard-layout, and safe to |
| * reinterpret from raw bytes without invoking undefined behavior. |
| * |
| * @return A pointer to a `const T` object, interpreted from the internal buffer. |
| * |
| * @note The caller must ensure that the current JsonbValue actually contains data |
| * compatible with type `T`, otherwise the result is undefined. |
| */ |
| template <JsonbPodType T> |
| const T* unpack() const { |
| static_assert(is_pod_v<T>, "T must be a POD type"); |
| return reinterpret_cast<const T*>(payload); |
| } |
| |
| // /** |
| // * @brief Unpacks the underlying Jsonb binary content as a pointer to type `T`. |
| // * |
| // * @tparam T A POD (Plain Old Data) type that must satisfy the `JsonbPodType` concept. |
| // * This ensures that `T` is trivially copyable, standard-layout, and safe to |
| // * reinterpret from raw bytes without invoking undefined behavior. |
| // * |
| // * @return A pointer to a `T` object, interpreted from the internal buffer. |
| // * |
| // * @note The caller must ensure that the current JsonbValue actually contains data |
| // * compatible with type `T`, otherwise the result is undefined. |
| // */ |
| // template <JsonbPodType T> |
| // T* unpack() { |
| // static_assert(is_pod_v<T>, "T must be a POD type"); |
| // return reinterpret_cast<T*>(payload); |
| // } |
| |
| int128_t int_val() const; |
| }; |
| |
| // inline ObjectVal* JsonbDocument::operator->() { |
| // return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
| // } |
| |
| inline const ObjectVal* JsonbDocument::operator->() const { |
| return (((JsonbValue*)payload_)->unpack<ObjectVal>()); |
| } |
| |
| /* |
| * NumerValT is the template class (derived from JsonbValue) of all number |
| * types (integers and double). |
| */ |
| template <typename T> |
| requires std::is_integral_v<T> || std::is_floating_point_v<T> |
| struct NumberValT { |
| public: |
| T val() const { return num; } |
| |
| static unsigned int numPackedBytes() { return sizeof(JsonbValue) + sizeof(T); } |
| |
| T num; |
| }; |
| |
| inline int128_t JsonbValue::int_val() const { |
| switch (type) { |
| case JsonbType::T_Int8: |
| return unpack<JsonbInt8Val>()->val(); |
| case JsonbType::T_Int16: |
| return unpack<JsonbInt16Val>()->val(); |
| case JsonbType::T_Int32: |
| return unpack<JsonbInt32Val>()->val(); |
| case JsonbType::T_Int64: |
| return unpack<JsonbInt64Val>()->val(); |
| case JsonbType::T_Int128: |
| return unpack<JsonbInt128Val>()->val(); |
| default: |
| throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
| static_cast<int32_t>(type)); |
| } |
| } |
| |
| template <JsonbDecimalType T> |
| struct JsonbDecimalVal { |
| public: |
| using NativeType = typename T::NativeType; |
| |
| // get the decimal value |
| NativeType val() const { |
| // to avoid memory alignment issues, we use memcpy to copy the value |
| NativeType tmp; |
| memcpy(&tmp, &value, sizeof(NativeType)); |
| return tmp; |
| } |
| |
| static constexpr int numPackedBytes() { |
| return sizeof(JsonbValue) + sizeof(precision) + sizeof(scale) + sizeof(value); |
| } |
| |
| uint32_t precision; |
| uint32_t scale; |
| NativeType value; |
| }; |
| |
| /* |
| * BlobVal is the base class (derived from JsonbValue) for string and binary |
| * types. The size indicates the total bytes of the payload. |
| */ |
| struct JsonbBinaryVal { |
| public: |
| // size of the blob payload only |
| unsigned int getBlobLen() const { return size; } |
| |
| // return the blob as byte array |
| const char* getBlob() const { return payload; } |
| |
| // size of the total packed bytes |
| unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
| friend class JsonbDocument; |
| |
| uint32_t size; |
| char payload[0]; |
| }; |
| |
| /* |
| * String type |
| * Note: JSONB string may not be a c-string (NULL-terminated) |
| */ |
| struct JsonbStringVal : public JsonbBinaryVal { |
| public: |
| /* |
| This function return the actual size of a string. Since for |
| a string, it can be null-terminated with null paddings or it |
| can take all the space in the payload without null in the end. |
| So we need to check it to get the true actual length of a string. |
| */ |
| size_t length() const { |
| // It's an empty string |
| if (0 == size) { |
| return size; |
| } |
| // The string stored takes all the spaces in payload |
| if (payload[size - 1] != 0) { |
| return size; |
| } |
| // It's shorter than the size of payload |
| return strnlen(payload, size); |
| } |
| }; |
| |
| /* |
| * ContainerVal is the base class (derived from JsonbValue) for object and |
| * array types. The size indicates the total bytes of the payload. |
| */ |
| struct ContainerVal { |
| // size of the container payload only |
| unsigned int getContainerSize() const { return size; } |
| |
| // return the container payload as byte array |
| const char* getPayload() const { return payload; } |
| |
| // size of the total packed bytes |
| unsigned int numPackedBytes() const { return sizeof(JsonbValue) + sizeof(size) + size; } |
| friend class JsonbDocument; |
| |
| uint32_t size; |
| char payload[0]; |
| }; |
| |
| /* |
| * Object type |
| */ |
| struct ObjectVal : public ContainerVal { |
| using value_type = JsonbKeyValue; |
| using pointer = value_type*; |
| using const_pointer = const value_type*; |
| using iterator = JsonbFwdIteratorT<pointer, ObjectVal>; |
| using const_iterator = JsonbFwdIteratorT<const_pointer, ObjectVal>; |
| |
| const_iterator search(const char* key) const { |
| return const_cast<ObjectVal*>(this)->search(key); |
| } |
| |
| const_iterator search(const char* key, unsigned int klen) const { |
| return const_cast<ObjectVal*>(this)->search(key, klen); |
| } |
| |
| iterator search(const char* key) { |
| if (!key) { |
| return end(); |
| } |
| return search(key, (unsigned int)strlen(key)); |
| } |
| |
| iterator search(const char* key, unsigned int klen) { |
| if (!key || !klen) { |
| return end(); |
| } |
| return internalSearch(key, klen); |
| } |
| |
| // Get number of elements in object |
| int numElem() const { |
| const char* pch = payload; |
| const char* fence = payload + size; |
| |
| unsigned int num = 0; |
| while (pch < fence) { |
| auto* pkey = (JsonbKeyValue*)(pch); |
| ++num; |
| pch += pkey->numPackedBytes(); |
| } |
| |
| assert(pch == fence); |
| |
| return num; |
| } |
| |
| JsonbValue* find(const char* key) const { return const_cast<ObjectVal*>(this)->find(key); } |
| |
| JsonbValue* find(const char* key, unsigned int klen) const { |
| return const_cast<ObjectVal*>(this)->find(key, klen); |
| } |
| |
| // find the JSONB value by a key string (null terminated) |
| JsonbValue* find(const char* key) { |
| if (!key) { |
| return nullptr; |
| } |
| return find(key, (unsigned int)strlen(key)); |
| } |
| |
| // find the JSONB value by a key string (with length) |
| JsonbValue* find(const char* key, unsigned int klen) { |
| iterator kv = search(key, klen); |
| if (end() == kv) { |
| return nullptr; |
| } |
| return kv->value(); |
| } |
| |
| iterator begin() { return iterator((pointer)payload); } |
| |
| const_iterator begin() const { return const_iterator((pointer)payload); } |
| |
| iterator end() { return iterator((pointer)(payload + size)); } |
| |
| const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
| |
| std::vector<std::pair<StringRef, const JsonbValue*>> get_ordered_key_value_pairs() const; |
| |
| private: |
| iterator internalSearch(const char* key, unsigned int klen) { |
| const char* pch = payload; |
| const char* fence = payload + size; |
| |
| while (pch < fence) { |
| auto* pkey = (JsonbKeyValue*)(pch); |
| if (klen == pkey->klen() && strncmp(key, pkey->getKeyStr(), klen) == 0) { |
| return iterator(pkey); |
| } |
| pch += pkey->numPackedBytes(); |
| } |
| |
| assert(pch == fence); |
| |
| return end(); |
| } |
| }; |
| |
| /* |
| * Array type |
| */ |
| struct ArrayVal : public ContainerVal { |
| using value_type = JsonbValue; |
| using pointer = value_type*; |
| using const_pointer = const value_type*; |
| using iterator = JsonbFwdIteratorT<pointer, ArrayVal>; |
| using const_iterator = JsonbFwdIteratorT<const_pointer, ArrayVal>; |
| |
| // get the JSONB value at index |
| JsonbValue* get(int idx) const { |
| if (idx < 0) { |
| return nullptr; |
| } |
| |
| const char* pch = payload; |
| const char* fence = payload + size; |
| |
| while (pch < fence && idx-- > 0) { |
| pch += ((JsonbValue*)pch)->numPackedBytes(); |
| } |
| if (idx > 0 || pch == fence) { |
| return nullptr; |
| } |
| |
| return (JsonbValue*)pch; |
| } |
| |
| // Get number of elements in array |
| int numElem() const { |
| const char* pch = payload; |
| const char* fence = payload + size; |
| |
| unsigned int num = 0; |
| while (pch < fence) { |
| ++num; |
| pch += ((JsonbValue*)pch)->numPackedBytes(); |
| } |
| |
| assert(pch == fence); |
| |
| return num; |
| } |
| |
| iterator begin() { return iterator((pointer)payload); } |
| |
| const_iterator begin() const { return const_iterator((pointer)payload); } |
| |
| iterator end() { return iterator((pointer)(payload + size)); } |
| |
| const_iterator end() const { return const_iterator((pointer)(payload + size)); } |
| }; |
| |
| inline Status JsonbDocument::checkAndCreateDocument(const char* pb, size_t size, |
| JsonbDocument** doc) { |
| *doc = nullptr; |
| if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
| return Status::InvalidArgument("Invalid JSONB document: too small size({}) or null pointer", |
| size); |
| } |
| |
| auto* doc_ptr = (JsonbDocument*)pb; |
| if (doc_ptr->header_.ver_ != JSONB_VER) { |
| return Status::InvalidArgument("Invalid JSONB document: invalid version({})", |
| doc_ptr->header_.ver_); |
| } |
| |
| auto* val = (JsonbValue*)doc_ptr->payload_; |
| if (val->type < JsonbType::T_Null || val->type >= JsonbType::NUM_TYPES || |
| size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
| return Status::InvalidArgument("Invalid JSONB document: invalid type({}) or size({})", |
| static_cast<JsonbTypeUnder>(val->type), size); |
| } |
| |
| *doc = doc_ptr; |
| return Status::OK(); |
| } |
| inline void JsonbDocument::setValue(const JsonbValue* value) { |
| memcpy(payload_, value, value->numPackedBytes()); |
| } |
| |
| inline JsonbValue* JsonbDocument::createValue(const char* pb, size_t size) { |
| if (!pb || size < sizeof(JsonbHeader) + sizeof(JsonbValue)) { |
| return nullptr; |
| } |
| |
| auto* doc = (JsonbDocument*)pb; |
| if (doc->header_.ver_ != JSONB_VER) { |
| return nullptr; |
| } |
| |
| auto* val = (JsonbValue*)doc->payload_; |
| if (size != sizeof(JsonbHeader) + val->numPackedBytes()) { |
| return nullptr; |
| } |
| |
| return val; |
| } |
| |
| inline unsigned int JsonbDocument::numPackedBytes() const { |
| return ((const JsonbValue*)payload_)->numPackedBytes() + sizeof(header_); |
| } |
| |
| inline unsigned int JsonbKeyValue::numPackedBytes() const { |
| unsigned int ks = keyPackedBytes(); |
| auto* val = (JsonbValue*)(((char*)this) + ks); |
| return ks + val->numPackedBytes(); |
| } |
| |
| // Poor man's "virtual" function JsonbValue::numPackedBytes |
| inline unsigned int JsonbValue::numPackedBytes() const { |
| switch (type) { |
| case JsonbType::T_Null: |
| case JsonbType::T_True: |
| case JsonbType::T_False: { |
| return sizeof(type); |
| } |
| |
| case JsonbType::T_Int8: { |
| return sizeof(type) + sizeof(int8_t); |
| } |
| case JsonbType::T_Int16: { |
| return sizeof(type) + sizeof(int16_t); |
| } |
| case JsonbType::T_Int32: { |
| return sizeof(type) + sizeof(int32_t); |
| } |
| case JsonbType::T_Int64: { |
| return sizeof(type) + sizeof(int64_t); |
| } |
| case JsonbType::T_Double: { |
| return sizeof(type) + sizeof(double); |
| } |
| case JsonbType::T_Float: { |
| return sizeof(type) + sizeof(float); |
| } |
| case JsonbType::T_Int128: { |
| return sizeof(type) + sizeof(int128_t); |
| } |
| case JsonbType::T_String: |
| case JsonbType::T_Binary: { |
| return unpack<JsonbBinaryVal>()->numPackedBytes(); |
| } |
| |
| case JsonbType::T_Object: |
| case JsonbType::T_Array: { |
| return unpack<ContainerVal>()->numPackedBytes(); |
| } |
| case JsonbType::T_Decimal32: { |
| return JsonbDecimal32::numPackedBytes(); |
| } |
| case JsonbType::T_Decimal64: { |
| return JsonbDecimal64::numPackedBytes(); |
| } |
| case JsonbType::T_Decimal128: { |
| return JsonbDecimal128::numPackedBytes(); |
| } |
| case JsonbType::T_Decimal256: { |
| return JsonbDecimal256::numPackedBytes(); |
| } |
| case JsonbType::NUM_TYPES: |
| break; |
| } |
| |
| throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
| static_cast<int32_t>(type)); |
| } |
| |
| inline int JsonbValue::numElements() const { |
| switch (type) { |
| case JsonbType::T_Int8: |
| case JsonbType::T_Int16: |
| case JsonbType::T_Int32: |
| case JsonbType::T_Int64: |
| case JsonbType::T_Double: |
| case JsonbType::T_Float: |
| case JsonbType::T_Int128: |
| case JsonbType::T_String: |
| case JsonbType::T_Binary: |
| case JsonbType::T_Null: |
| case JsonbType::T_True: |
| case JsonbType::T_False: |
| case JsonbType::T_Decimal32: |
| case JsonbType::T_Decimal64: |
| case JsonbType::T_Decimal128: |
| case JsonbType::T_Decimal256: { |
| return 1; |
| } |
| case JsonbType::T_Object: { |
| return unpack<ObjectVal>()->numElem(); |
| } |
| case JsonbType::T_Array: { |
| return unpack<ArrayVal>()->numElem(); |
| } |
| case JsonbType::NUM_TYPES: |
| break; |
| } |
| throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
| static_cast<int32_t>(type)); |
| } |
| |
| inline bool JsonbValue::contains(JsonbValue* rhs) const { |
| switch (type) { |
| case JsonbType::T_Int8: |
| case JsonbType::T_Int16: |
| case JsonbType::T_Int32: |
| case JsonbType::T_Int64: |
| case JsonbType::T_Int128: { |
| return rhs->isInt() && this->int_val() == rhs->int_val(); |
| } |
| case JsonbType::T_Double: |
| case JsonbType::T_Float: { |
| if (!rhs->isDouble() && !rhs->isFloat()) { |
| return false; |
| } |
| double left = isDouble() ? unpack<JsonbDoubleVal>()->val() : unpack<JsonbFloatVal>()->val(); |
| double right = rhs->isDouble() ? rhs->unpack<JsonbDoubleVal>()->val() |
| : rhs->unpack<JsonbFloatVal>()->val(); |
| return left == right; |
| } |
| case JsonbType::T_String: |
| case JsonbType::T_Binary: { |
| if (rhs->isString() || rhs->isBinary()) { |
| const auto* str_value1 = unpack<JsonbStringVal>(); |
| const auto* str_value2 = rhs->unpack<JsonbStringVal>(); |
| return str_value1->length() == str_value2->length() && |
| std::memcmp(str_value1->getBlob(), str_value2->getBlob(), |
| str_value1->length()) == 0; |
| } |
| return false; |
| } |
| case JsonbType::T_Array: { |
| int lhs_num = unpack<ArrayVal>()->numElem(); |
| if (rhs->isArray()) { |
| int rhs_num = rhs->unpack<ArrayVal>()->numElem(); |
| if (rhs_num > lhs_num) { |
| return false; |
| } |
| int contains_num = 0; |
| for (int i = 0; i < lhs_num; ++i) { |
| for (int j = 0; j < rhs_num; ++j) { |
| if (unpack<ArrayVal>()->get(i)->contains(rhs->unpack<ArrayVal>()->get(j))) { |
| contains_num++; |
| break; |
| } |
| } |
| } |
| return contains_num == rhs_num; |
| } |
| for (int i = 0; i < lhs_num; ++i) { |
| if (unpack<ArrayVal>()->get(i)->contains(rhs)) { |
| return true; |
| } |
| } |
| return false; |
| } |
| case JsonbType::T_Object: { |
| if (rhs->isObject()) { |
| const auto* obj_value1 = unpack<ObjectVal>(); |
| const auto* obj_value2 = rhs->unpack<ObjectVal>(); |
| for (auto it = obj_value2->begin(); it != obj_value2->end(); ++it) { |
| JsonbValue* value = obj_value1->find(it->getKeyStr(), it->klen()); |
| if (value == nullptr || !value->contains(it->value())) { |
| return false; |
| } |
| } |
| return true; |
| } |
| return false; |
| } |
| case JsonbType::T_Null: { |
| return rhs->isNull(); |
| } |
| case JsonbType::T_True: { |
| return rhs->isTrue(); |
| } |
| case JsonbType::T_False: { |
| return rhs->isFalse(); |
| } |
| case JsonbType::T_Decimal32: { |
| if (rhs->isDecimal32()) { |
| return unpack<JsonbDecimal32>()->val() == rhs->unpack<JsonbDecimal32>()->val() && |
| unpack<JsonbDecimal32>()->precision == |
| rhs->unpack<JsonbDecimal32>()->precision && |
| unpack<JsonbDecimal32>()->scale == rhs->unpack<JsonbDecimal32>()->scale; |
| } |
| return false; |
| } |
| case JsonbType::T_Decimal64: { |
| if (rhs->isDecimal64()) { |
| return unpack<JsonbDecimal64>()->val() == rhs->unpack<JsonbDecimal64>()->val() && |
| unpack<JsonbDecimal64>()->precision == |
| rhs->unpack<JsonbDecimal64>()->precision && |
| unpack<JsonbDecimal64>()->scale == rhs->unpack<JsonbDecimal64>()->scale; |
| } |
| return false; |
| } |
| case JsonbType::T_Decimal128: { |
| if (rhs->isDecimal128()) { |
| return unpack<JsonbDecimal128>()->val() == rhs->unpack<JsonbDecimal128>()->val() && |
| unpack<JsonbDecimal128>()->precision == |
| rhs->unpack<JsonbDecimal128>()->precision && |
| unpack<JsonbDecimal128>()->scale == rhs->unpack<JsonbDecimal128>()->scale; |
| } |
| return false; |
| } |
| case JsonbType::T_Decimal256: { |
| if (rhs->isDecimal256()) { |
| return unpack<JsonbDecimal256>()->val() == rhs->unpack<JsonbDecimal256>()->val() && |
| unpack<JsonbDecimal256>()->precision == |
| rhs->unpack<JsonbDecimal256>()->precision && |
| unpack<JsonbDecimal256>()->scale == rhs->unpack<JsonbDecimal256>()->scale; |
| } |
| return false; |
| } |
| case JsonbType::NUM_TYPES: |
| break; |
| } |
| |
| throw Exception(ErrorCode::INTERNAL_ERROR, "Invalid JSONB value type: {}", |
| static_cast<int32_t>(type)); |
| } |
| |
| inline bool JsonbPath::seek(const char* key_path, size_t kp_len) { |
| while (kp_len > 0 && std::isspace(key_path[kp_len - 1])) { |
| --kp_len; |
| } |
| |
| //path invalid |
| if (!key_path || kp_len == 0) { |
| return false; |
| } |
| Stream stream(key_path, kp_len); |
| stream.skip_whitespace(); |
| if (stream.exhausted() || stream.read() != SCOPE) { |
| //path invalid |
| return false; |
| } |
| |
| while (!stream.exhausted()) { |
| stream.skip_whitespace(); |
| stream.clear_leg_ptr(); |
| stream.clear_leg_len(); |
| |
| if (!JsonbPath::parsePath(&stream, this)) { |
| //path invalid |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| inline bool JsonbPath::parsePath(Stream* stream, JsonbPath* path) { |
| // $[0] |
| if (stream->peek() == BEGIN_ARRAY) { |
| return parse_array(stream, path); |
| } |
| // $.a or $.[0] |
| else if (stream->peek() == BEGIN_MEMBER) { |
| // advance past the . |
| stream->skip(1); |
| |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| // $.[0] |
| if (stream->peek() == BEGIN_ARRAY) { |
| return parse_array(stream, path); |
| } |
| // $.a |
| else { |
| return parse_member(stream, path); |
| } |
| } else if (stream->peek() == WILDCARD) { |
| stream->skip(1); |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| // $** |
| if (stream->peek() == WILDCARD) { |
| path->_is_supper_wildcard = true; |
| } |
| |
| stream->skip(1); |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| if (stream->peek() == BEGIN_ARRAY) { |
| return parse_array(stream, path); |
| } else if (stream->peek() == BEGIN_MEMBER) { |
| // advance past the . |
| stream->skip(1); |
| |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| // $.[0] |
| if (stream->peek() == BEGIN_ARRAY) { |
| return parse_array(stream, path); |
| } |
| // $.a |
| else { |
| return parse_member(stream, path); |
| } |
| } |
| return false; |
| } else { |
| return false; //invalid json path |
| } |
| } |
| |
| inline bool JsonbPath::parse_array(Stream* stream, JsonbPath* path) { |
| assert(stream->peek() == BEGIN_ARRAY); |
| stream->skip(1); |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| if (stream->peek() == WILDCARD) { |
| stream->set_leg_ptr(const_cast<char*>(stream->position())); |
| stream->add_leg_len(); |
| stream->skip(1); |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| if (stream->peek() == END_ARRAY) { |
| std::unique_ptr<leg_info> leg( |
| new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, ARRAY_CODE)); |
| path->add_leg_to_leg_vector(std::move(leg)); |
| stream->skip(1); |
| path->_is_wildcard = true; |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| stream->set_leg_ptr(const_cast<char*>(stream->position())); |
| |
| for (; !stream->exhausted() && stream->peek() != END_ARRAY; stream->advance()) { |
| stream->add_leg_len(); |
| } |
| |
| if (stream->exhausted() || stream->peek() != END_ARRAY) { |
| return false; |
| } else { |
| stream->skip(1); |
| } |
| |
| //parse array index to int |
| |
| std::string_view idx_string(stream->get_leg_ptr(), stream->get_leg_len()); |
| int index = 0; |
| |
| if (stream->get_leg_len() >= 4 && |
| std::equal(LAST, LAST + 4, stream->get_leg_ptr(), |
| [](char c1, char c2) { return std::tolower(c1) == std::tolower(c2); })) { |
| auto pos = idx_string.find(MINUS); |
| |
| if (pos != std::string::npos) { |
| for (size_t i = 4; i < pos; ++i) { |
| if (std::isspace(idx_string[i])) { |
| continue; |
| } else { |
| // leading zeroes are not allowed |
| LOG(WARNING) << "Non-space char in idx_string: '" << idx_string << "'"; |
| return false; |
| } |
| } |
| idx_string = idx_string.substr(pos + 1); |
| idx_string = trim(idx_string); |
| |
| auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), |
| index); |
| if (result.ec != std::errc()) { |
| LOG(WARNING) << "Invalid index in JSON path: '" << idx_string << "'"; |
| return false; |
| } |
| |
| } else if (stream->get_leg_len() > 4) { |
| return false; |
| } |
| |
| std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, -index - 1, ARRAY_CODE)); |
| path->add_leg_to_leg_vector(std::move(leg)); |
| |
| return true; |
| } |
| |
| auto result = std::from_chars(idx_string.data(), idx_string.data() + idx_string.size(), index); |
| |
| if (result.ec != std::errc()) { |
| return false; |
| } |
| |
| std::unique_ptr<leg_info> leg(new leg_info(nullptr, 0, index, ARRAY_CODE)); |
| path->add_leg_to_leg_vector(std::move(leg)); |
| |
| return true; |
| } |
| |
| inline bool JsonbPath::parse_member(Stream* stream, JsonbPath* path) { |
| if (stream->exhausted()) { |
| return false; |
| } |
| |
| if (stream->peek() == WILDCARD) { |
| stream->set_leg_ptr(const_cast<char*>(stream->position())); |
| stream->add_leg_len(); |
| stream->skip(1); |
| std::unique_ptr<leg_info> leg( |
| new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
| path->add_leg_to_leg_vector(std::move(leg)); |
| path->_is_wildcard = true; |
| return true; |
| } |
| |
| stream->set_leg_ptr(const_cast<char*>(stream->position())); |
| |
| const char* left_quotation_marks = nullptr; |
| const char* right_quotation_marks = nullptr; |
| |
| for (; !stream->exhausted(); stream->advance()) { |
| // Only accept space characters quoted by double quotes. |
| if (std::isspace(stream->peek()) && left_quotation_marks == nullptr) { |
| return false; |
| } else if (stream->peek() == ESCAPE) { |
| stream->add_leg_len(); |
| stream->skip(1); |
| stream->add_leg_len(); |
| stream->set_has_escapes(true); |
| if (stream->exhausted()) { |
| return false; |
| } |
| continue; |
| } else if (stream->peek() == DOUBLE_QUOTE) { |
| if (left_quotation_marks == nullptr) { |
| left_quotation_marks = stream->position(); |
| stream->set_leg_ptr(const_cast<char*>(++left_quotation_marks)); |
| continue; |
| } else { |
| right_quotation_marks = stream->position(); |
| stream->skip(1); |
| break; |
| } |
| } else if (stream->peek() == BEGIN_MEMBER || stream->peek() == BEGIN_ARRAY) { |
| if (left_quotation_marks == nullptr) { |
| break; |
| } |
| } |
| |
| stream->add_leg_len(); |
| } |
| |
| if ((left_quotation_marks != nullptr && right_quotation_marks == nullptr) || |
| stream->get_leg_ptr() == nullptr || stream->get_leg_len() == 0) { |
| return false; //invalid json path |
| } |
| |
| if (stream->get_has_escapes()) { |
| stream->remove_escapes(); |
| } |
| |
| std::unique_ptr<leg_info> leg( |
| new leg_info(stream->get_leg_ptr(), stream->get_leg_len(), 0, MEMBER_CODE)); |
| path->add_leg_to_leg_vector(std::move(leg)); |
| |
| return true; |
| } |
| |
| static_assert(is_pod_v<JsonbDocument>, "JsonbDocument must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbValue>, "JsonbValue must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbDecimal32>, "JsonbDecimal32 must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbDecimal64>, "JsonbDecimal64 must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbDecimal128>, "JsonbDecimal128 must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbDecimal256>, "JsonbDecimal256 must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbInt8Val>, "JsonbInt8Val must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbInt32Val>, "JsonbInt32Val must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbInt64Val>, "JsonbInt64Val must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbInt128Val>, "JsonbInt128Val must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbDoubleVal>, "JsonbDoubleVal must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbFloatVal>, "JsonbFloatVal must be standard layout and trivial"); |
| static_assert(is_pod_v<JsonbBinaryVal>, "JsonbBinaryVal must be standard layout and trivial"); |
| static_assert(is_pod_v<ContainerVal>, "ContainerVal must be standard layout and trivial"); |
| |
| #define ASSERT_DECIMAL_LAYOUT(type) \ |
| static_assert(offsetof(type, precision) == 0); \ |
| static_assert(offsetof(type, scale) == 4); \ |
| static_assert(offsetof(type, value) == 8); |
| |
| ASSERT_DECIMAL_LAYOUT(JsonbDecimal32) |
| ASSERT_DECIMAL_LAYOUT(JsonbDecimal64) |
| ASSERT_DECIMAL_LAYOUT(JsonbDecimal128) |
| ASSERT_DECIMAL_LAYOUT(JsonbDecimal256) |
| |
| #define ASSERT_NUMERIC_LAYOUT(type) static_assert(offsetof(type, num) == 0); |
| |
| ASSERT_NUMERIC_LAYOUT(JsonbInt8Val) |
| ASSERT_NUMERIC_LAYOUT(JsonbInt32Val) |
| ASSERT_NUMERIC_LAYOUT(JsonbInt64Val) |
| ASSERT_NUMERIC_LAYOUT(JsonbInt128Val) |
| ASSERT_NUMERIC_LAYOUT(JsonbDoubleVal) |
| |
| static_assert(offsetof(JsonbBinaryVal, size) == 0); |
| static_assert(offsetof(JsonbBinaryVal, payload) == 4); |
| |
| static_assert(offsetof(ContainerVal, size) == 0); |
| static_assert(offsetof(ContainerVal, payload) == 4); |
| |
| #pragma pack(pop) |
| #if defined(__clang__) |
| #pragma clang diagnostic pop |
| #endif |
| } // namespace doris |
| |
| #endif // JSONB_JSONBDOCUMENT_H |