blob: 46aa36af35bd536010e06fd67f8ac5ec862b4dc8 [file] [log] [blame]
/*
* Copyright (c) 2014, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*/
/*
* This file defines JsonbWriterT (template) and JsonbWriter.
*
* JsonbWriterT is a template class which implements an JSONB serializer.
* Users call various write functions of JsonbWriterT object to write values
* directly to JSONB packed bytes. All write functions of value or key return
* the number of bytes written to JSONB, or 0 if there is an error. To write an
* object, an array, or a string, you must call writeStart[..] before writing
* values or key, and call writeEnd[..] after finishing at the end.
*
* By default, an JsonbWriterT object creates an output stream buffer.
* Alternatively, you can also pass any output stream object to a writer, as
* long as the stream object implements some basic functions of std::ostream
* (such as JsonbOutStream, see JsonbStream.h).
*
* JsonbWriter specializes JsonbWriterT with JsonbOutStream type (see
* JsonbStream.h). So unless you want to provide own a different output stream
* type, use JsonbParser object.
*
* @author Tian Xia <tianx@fb.com>
* this file is copied from
* https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonWriter.h
* and modified by Doris
*/
#ifndef JSONB_JSONBWRITER_H
#define JSONB_JSONBWRITER_H
#include <cstdint>
#include <limits>
#include <stack>
#include <string>
#include "common/exception.h"
#include "common/status.h"
#include "jsonb_document.h"
#include "jsonb_stream.h"
#include "runtime/define_primitive_type.h"
#include "runtime/primitive_type.h"
#include "vec/core/types.h"
namespace doris {
using int128_t = __int128;
template <class OS_TYPE>
class JsonbWriterT {
/// TODO: maybe we should not use a template class here
static_assert(std::is_same_v<OS_TYPE, JsonbOutStream>);
public:
JsonbWriterT() : alloc_(true), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {
os_ = new OS_TYPE();
}
explicit JsonbWriterT(OS_TYPE& os)
: os_(&os), alloc_(false), hasHdr_(false), kvState_(WS_Value), str_pos_(0) {}
~JsonbWriterT() {
if (alloc_) {
delete os_;
}
}
JsonbWriterT<OS_TYPE>& operator=(JsonbWriterT<OS_TYPE>&& other) {
if (this != &other) {
if (alloc_) {
delete os_;
}
os_ = other.os_;
other.os_ = nullptr;
alloc_ = other.alloc_;
other.alloc_ = false;
hasHdr_ = other.hasHdr_;
kvState_ = other.kvState_;
str_pos_ = other.str_pos_;
first_ = other.first_;
stack_ = std::move(other.stack_);
}
return *this;
}
void reset() {
os_->clear();
os_->seekp(0);
hasHdr_ = false;
kvState_ = WS_Value;
first_ = true;
for (; !stack_.empty(); stack_.pop()) {
;
}
}
bool writeKey(const char* key) { return writeKey(key, strlen(key)); }
// write a key string (or key id if an external dict is provided)
bool writeKey(const char* key, uint8_t len) {
if (!stack_.empty() && verifyKeyState()) {
os_->put(len);
if (len == 0) {
// NOTE: we use sMaxKeyId to represent an empty key
JsonbKeyValue::keyid_type idx = JsonbKeyValue::sMaxKeyId;
os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
} else {
os_->write(key, len);
}
kvState_ = WS_Key;
return true;
}
return false;
}
bool writeValue(const JsonbValue* value) {
if (!value) {
return writeNull();
}
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->write((char*)value, value->numPackedBytes());
kvState_ = WS_Value;
return true;
}
return false;
}
// write a key id
bool writeKey(JsonbKeyValue::keyid_type idx) {
if (!stack_.empty() && verifyKeyState()) {
os_->put(0);
os_->write((char*)&idx, sizeof(JsonbKeyValue::keyid_type));
kvState_ = WS_Key;
return true;
}
return false;
}
bool writeFirstHeader() {
if (first_ && stack_.empty()) {
first_ = false;
// if this is a new JSONB, write the header
if (!hasHdr_) {
writeHeader();
return true;
} else {
return false;
}
} else {
return true;
}
}
bool writeNull() {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Null);
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeBool(bool b) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
if (b) {
os_->put((JsonbTypeUnder)JsonbType::T_True);
} else {
os_->put((JsonbTypeUnder)JsonbType::T_False);
}
kvState_ = WS_Value;
return true;
}
return false;
}
// This function is a helper. It will make use of smallest space to
// write an int
bool writeInt(int64_t val) {
if (val >= std::numeric_limits<int8_t>::min() &&
val <= std::numeric_limits<int8_t>::max()) {
return writeInt8((int8_t)val);
} else if (val >= std::numeric_limits<int16_t>::min() &&
val <= std::numeric_limits<int16_t>::max()) {
return writeInt16((int16_t)val);
} else if (val >= std::numeric_limits<int32_t>::min() &&
val <= std::numeric_limits<int32_t>::max()) {
return writeInt32((int32_t)val);
} else {
return writeInt64(val);
}
}
bool writeInt8(int8_t v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Int8);
os_->put(v);
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeInt16(int16_t v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Int16);
os_->write((char*)&v, sizeof(int16_t));
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeInt32(int32_t v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Int32);
os_->write((char*)&v, sizeof(int32_t));
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeInt64(int64_t v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Int64);
os_->write((char*)&v, sizeof(int64_t));
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeInt128(int128_t v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Int128);
os_->write((char*)&v, sizeof(int128_t));
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeDouble(double v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Double);
os_->write((char*)&v, sizeof(double));
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeFloat(float v) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Float);
os_->write((char*)&v, sizeof(float));
kvState_ = WS_Value;
return true;
}
return false;
}
template <JsonbDecimalType T>
bool writeDecimal(const T& v, const uint32_t precision, const uint32_t scale) {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) {
return false;
}
if constexpr (std::same_as<T, vectorized::Decimal256>) {
os_->put((JsonbTypeUnder)JsonbType::T_Decimal256);
} else if constexpr (std::same_as<T, vectorized::Decimal128V3>) {
os_->put((JsonbTypeUnder)JsonbType::T_Decimal128);
} else if constexpr (std::same_as<T, vectorized::Decimal64>) {
os_->put((JsonbTypeUnder)JsonbType::T_Decimal64);
} else {
os_->put((JsonbTypeUnder)JsonbType::T_Decimal32);
}
os_->write(reinterpret_cast<const char*>(&precision), sizeof(uint32_t));
os_->write(reinterpret_cast<const char*>(&scale), sizeof(uint32_t));
os_->write((char*)(&(v.value)), sizeof(v.value));
kvState_ = WS_Value;
return true;
}
return false;
}
// must call writeStartString before writing a string val
bool writeStartString() {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) return 0;
os_->put((JsonbTypeUnder)JsonbType::T_String);
str_pos_ = os_->tellp();
// fill the size bytes with 0 for now
uint32_t size = 0;
os_->write((char*)&size, sizeof(uint32_t));
kvState_ = WS_String;
return true;
}
return false;
}
// finish writing a string val
bool writeEndString() {
if (kvState_ == WS_String) {
std::streampos cur_pos = os_->tellp();
int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
assert(size >= 0);
os_->seekp(str_pos_);
os_->write((char*)&size, sizeof(uint32_t));
os_->seekp(cur_pos);
kvState_ = WS_Value;
return true;
}
return false;
}
// TODO: here changed length to uint64_t, as some api also need changed, But the thirdparty api is uint_32t
// need consider a better way to handle case.
bool writeString(const char* str, uint64_t len) {
if (kvState_ == WS_String) {
os_->write(str, len);
return true;
}
return false;
}
bool writeString(const std::string& str) { return writeString(str.c_str(), str.size()); }
bool writeString(char ch) {
if (kvState_ == WS_String) {
os_->put(ch);
return true;
}
return false;
}
// must call writeStartBinary before writing a binary val
bool writeStartBinary() {
if ((first_ && stack_.empty()) || (!stack_.empty() && verifyValueState())) {
if (!writeFirstHeader()) return 0;
os_->put((JsonbTypeUnder)JsonbType::T_Binary);
str_pos_ = os_->tellp();
// fill the size bytes with 0 for now
uint32_t size = 0;
os_->write((char*)&size, sizeof(uint32_t));
kvState_ = WS_Binary;
return true;
}
return false;
}
// finish writing a binary val
bool writeEndBinary() {
if (kvState_ == WS_Binary) {
std::streampos cur_pos = os_->tellp();
int32_t size = (int32_t)(cur_pos - str_pos_ - sizeof(uint32_t));
assert(size >= 0);
os_->seekp(str_pos_);
os_->write((char*)&size, sizeof(uint32_t));
os_->seekp(cur_pos);
kvState_ = WS_Value;
return true;
}
return false;
}
bool writeBinary(const char* bin, uint64_t len) {
if (kvState_ == WS_Binary) {
os_->write(bin, len);
return true;
}
return false;
}
// must call writeStartObject before writing an object val
bool writeStartObject() {
if (stack_.empty() || verifyValueState()) {
if (stack_.empty()) {
// if this is a new JSONB, write the header
if (!hasHdr_) {
writeHeader();
} else
return false;
}
// check if the object exceeds the maximum nesting level
if (stack_.size() >= MaxNestingLevel) return false;
os_->put((JsonbTypeUnder)JsonbType::T_Object);
// save the size position
stack_.push(WriteInfo({WS_Object, os_->tellp()}));
// fill the size bytes with 0 for now
uint32_t size = 0;
os_->write((char*)&size, sizeof(uint32_t));
kvState_ = WS_Value;
return true;
}
return false;
}
// finish writing an object val
bool writeEndObject() {
if (!stack_.empty() && stack_.top().state == WS_Object && kvState_ == WS_Value) {
WriteInfo& ci = stack_.top();
std::streampos cur_pos = os_->tellp();
auto size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
assert(size >= 0);
os_->seekp(ci.sz_pos);
os_->write((char*)&size, sizeof(uint32_t));
os_->seekp(cur_pos);
stack_.pop();
return true;
}
return false;
}
// must call writeStartArray before writing an array val
bool writeStartArray() {
if (stack_.empty() || verifyValueState()) {
if (stack_.empty()) {
// if this is a new JSONB, write the header
if (!hasHdr_) {
writeHeader();
} else {
return false;
}
}
// check if the array exceeds the maximum nesting level
if (stack_.size() >= MaxNestingLevel) {
return false;
}
os_->put((JsonbTypeUnder)JsonbType::T_Array);
// save the size position
stack_.push(WriteInfo({WS_Array, os_->tellp()}));
// fill the size bytes with 0 for now
uint32_t size = 0;
os_->write((char*)&size, sizeof(uint32_t));
kvState_ = WS_Value;
return true;
}
return false;
}
// finish writing an array val
bool writeEndArray() {
if (!stack_.empty() && stack_.top().state == WS_Array && kvState_ == WS_Value) {
WriteInfo& ci = stack_.top();
std::streampos cur_pos = os_->tellp();
auto size = (int32_t)(cur_pos - ci.sz_pos - sizeof(uint32_t));
assert(size >= 0);
os_->seekp(ci.sz_pos);
os_->write((char*)&size, sizeof(uint32_t));
os_->seekp(cur_pos);
stack_.pop();
return true;
}
return false;
}
OS_TYPE* getOutput() { return os_; }
JsonbDocument* getDocument() {
JsonbDocument* doc = nullptr;
THROW_IF_ERROR(JsonbDocument::checkAndCreateDocument(getOutput()->getBuffer(),
getOutput()->getSize(), &doc));
return doc;
}
JsonbValue* getValue() {
return JsonbDocument::createValue(getOutput()->getBuffer(), getOutput()->getSize());
}
bool writeEnd() {
while (!stack_.empty()) {
bool ok = false;
switch (stack_.top().state) {
case WS_Array:
ok = writeEndArray();
break;
case WS_Object:
ok = writeEndObject();
break;
case WS_String:
ok = writeEndString();
break;
case WS_Binary:
ok = writeEndBinary();
break;
default:
ok = false;
break;
}
if (!ok) {
return false;
}
}
return true;
}
private:
// verify we are in the right state before writing a value
bool verifyValueState() {
assert(!stack_.empty());
// The document can only be an Object or an Array which follows
// the standard.
return (stack_.top().state == WS_Object && kvState_ == WS_Key) ||
(stack_.top().state == WS_Array && kvState_ == WS_Value);
}
// verify we are in the right state before writing a key
bool verifyKeyState() {
assert(!stack_.empty());
return stack_.top().state == WS_Object && kvState_ == WS_Value;
}
void writeHeader() {
os_->put(JSONB_VER);
hasHdr_ = true;
}
enum WriteState {
WS_NONE,
WS_Array,
WS_Object,
WS_Key,
WS_Value,
WS_String,
WS_Binary,
};
struct WriteInfo {
WriteState state;
std::streampos sz_pos;
};
OS_TYPE* os_ = nullptr;
bool alloc_;
bool hasHdr_;
WriteState kvState_; // key or value state
std::streampos str_pos_;
std::stack<WriteInfo> stack_;
bool first_ = true;
};
using JsonbWriter = JsonbWriterT<JsonbOutStream>;
} // namespace doris
#endif // JSONB_JSONBWRITER_H