blob: 1300eb1e90def587ae269d22bf458dac6918c20f [file] [log] [blame]
/*
* Copyright (c) 2015, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
* this file is copied from
* https://github.com/facebook/mysql-5.6/blob/fb-mysql-5.6.35/fbson/FbsonUpdater.h
* and modified by Doris
*/
#ifndef JSONB_UPDATER_H
#define JSONB_UPDATER_H
#include <deque>
#include <iostream>
#include <limits>
#include <type_traits>
#include "jsonb_document.h"
#include "jsonb_error.h"
#include "jsonb_writer.h"
namespace doris {
struct NodeInfo {
/*
There are two types of value with different structure: one is the
basic data type and array, and the other one is the Object. We
need to differentiate it here, because the actual value is stored
differently. For example, for basic data type and array, the
stored value is JsonbValue, but for Object data type, the stored
value is JsonbKeyValue.
The pointer addr points to the beginning of the data value, which
means that for basic data type, it points ot JsonbValue and for
the elements in the Object, it points to JsonbKeyValue.
The pointer, jsonb_value, points to the actually value part, which
means that for basic data type, it's the same with addr and for
the JsonbKeyValue, it points to the payload of it.
*/
char* addr; // The offset of the address from the beginning
JsonbValue* jsonb_value; // The address of the value of current node
};
/*
* This class is used to update the given document. The given document
* is stored in a buffer. When created, the the updater points to the
* root value in the document, and pushPathKey and popPathKey can be
* used to go to different key paths in the document.
*/
class JsonbUpdater {
public:
// buffer_size is the maximum number of bytes can be packed in document
JsonbUpdater(JsonbDocument* doc, uint buffer_size, double shrink_ratio = 0.5)
: document_(doc), buffer_size_(buffer_size), str_shrink_ratio_(shrink_ratio) {
root_ = document_->getValue();
clearPath();
}
JsonbValue* getRoot() { return root_; }
JsonbDocument* getDocument() { return document_; }
JsonbValue* getCurrent() {
return 0 == path_node_.size() ? nullptr : path_node_.back().jsonb_value;
}
// Remove the current node. It can't be root
JsonbErrType remove() {
// Remove root is not allowed
if (path_node_.size() == 1) {
return JsonbErrType::E_INVALID_OPER;
}
// Get the current node and because we want to delete it, pop it from stack
NodeInfo curr_node = path_node_.back();
int pack_size = (int)((char*)curr_node.jsonb_value - curr_node.addr +
curr_node.jsonb_value->numPackedBytes());
// Move the data after the current node to new place
if (!moveTo(curr_node.addr + pack_size, curr_node.addr)) {
return JsonbErrType::E_OUTOFMEMORY;
}
path_node_.pop_back();
return JsonbErrType::E_NONE;
}
JsonbErrType popPathKey() {
/* It's root, it means that it's the first elements in the
* document, which means that it points to the payload of the
* document. It doesn't make sense if we pop the key.
*/
if (path_node_.size() <= 1) return JsonbErrType::E_INVALID_OPER;
path_node_.pop_back();
return JsonbErrType::E_NONE;
}
// For ArrayVal
JsonbErrType pushPathKey(int idx) {
// Check whether it's an array
if (nullptr == path_node_.back().jsonb_value || !path_node_.back().jsonb_value->isArray()) {
return JsonbErrType::E_NOTARRAY;
}
// Get the element in given idx
ArrayVal::iterator ite;
JsonbErrType ret = getElemInArray(idx, path_node_.back().jsonb_value, ite);
if (JsonbErrType::E_NONE != ret) return ret;
if (ite == ((ArrayVal*)path_node_.back().jsonb_value)->end())
return JsonbErrType::E_OUTOFBOUNDARY;
path_node_.push_back({(char*)(ArrayVal::pointer)ite, (ArrayVal::pointer)ite});
return JsonbErrType::E_NONE;
}
// For ObjectVal
JsonbErrType pushPathKey(const char* key, // Should be a
// null-terminated string
hDictFind handler = nullptr) {
if (nullptr == key) return JsonbErrType::E_INVALID_KEY_STRING;
return pushPathKey(key, (unsigned int)strlen(key), handler);
}
JsonbErrType pushPathKey(const char* key, unsigned int klen, hDictFind handler = nullptr) {
// Check node validity
if (nullptr == key) {
return JsonbErrType::E_INVALID_KEY_STRING;
}
// Check whether it's an object
if (nullptr == path_node_.back().jsonb_value ||
!path_node_.back().jsonb_value->isObject()) {
return JsonbErrType::E_NOTOBJ;
}
// Get current node and search the one we want
ObjectVal* current = static_cast<ObjectVal*>(path_node_.back().jsonb_value);
ObjectVal::iterator kv = current->search(key, klen, handler);
if (kv == current->end()) {
return JsonbErrType::E_KEYNOTEXIST;
} else {
JsonbKeyValue* fb_kv = static_cast<ObjectVal::iterator::pointer>(kv);
path_node_.push_back({(char*)fb_kv, fb_kv->value()});
}
return JsonbErrType::E_NONE;
}
// Update a field
JsonbErrType updateValue(const JsonbValue* value) { return updateValueInternal(value); }
// Append a value to an array
JsonbErrType appendValue(const JsonbValue* value) {
// Check whether it's an array
if (nullptr == path_node_.back().jsonb_value || !path_node_.back().jsonb_value->isArray()) {
return JsonbErrType::E_NOTARRAY;
}
return insertArrayInternal(((ArrayVal*)path_node_.back().jsonb_value)->end(),
(const char*)value,
(const char*)value + value->numPackedBytes());
}
// Insert a value to an array
JsonbErrType insertValue(int idx, const JsonbValue* value) {
ArrayVal::iterator ite;
JsonbErrType ret = getElemInArray(idx, path_node_.back().jsonb_value, ite);
if (JsonbErrType::E_NONE != ret) return ret;
return insertArrayInternal(ite, (const char*)value,
(const char*)value + value->numPackedBytes());
}
// Insert a range of value to an array
JsonbErrType insertValue(int idx, ArrayVal::const_iterator beg, ArrayVal::const_iterator end) {
ArrayVal::iterator ite;
JsonbErrType ret = getElemInArray(idx, path_node_.back().jsonb_value, ite);
if (JsonbErrType::E_NONE != ret) return ret;
const char* beg_addr = (const char*)ArrayVal::const_iterator::pointer(beg);
const char* end_addr = (const char*)ArrayVal::const_iterator::pointer(end);
return insertArrayInternal(ite, beg_addr, end_addr);
}
// For adding element to dict
JsonbErrType insertValue(JsonbKeyValue* kvalue) {
if (nullptr == kvalue) {
return JsonbErrType::E_INVALID_JSONB_OBJ;
}
// Check whether it's an object
if (nullptr == path_node_.back().jsonb_value ||
!path_node_.back().jsonb_value->isObject()) {
return JsonbErrType::E_NOTOBJ;
}
return insertValue(ObjectVal::const_iterator(kvalue), ++ObjectVal::const_iterator(kvalue));
}
// Insert a range of elements to a dictionary
JsonbErrType insertValue(ObjectVal::const_iterator beg, ObjectVal::const_iterator end) {
// Check whether it's an object
if (nullptr == path_node_.back().jsonb_value ||
!path_node_.back().jsonb_value->isObject()) {
return JsonbErrType::E_NOTOBJ;
}
ObjectVal::const_iterator obj_end = ((ObjectVal*)path_node_.back().jsonb_value)->end();
char* curr_addr = (char*)(ObjectVal::const_iterator::pointer)(obj_end);
// Calculate the bytes needed
int need_bytes = (int)((char*)(ObjectVal::const_iterator::pointer)end -
(char*)(ObjectVal::const_iterator::pointer)beg);
char* new_addr = curr_addr + need_bytes;
if (!moveTo(curr_addr, new_addr)) {
return JsonbErrType::E_OUTOFMEMORY;
}
memcpy(curr_addr, (char*)(ObjectVal::const_iterator::pointer)beg, need_bytes);
return JsonbErrType::E_NONE;
}
// Go back to the root node
void clearPath() {
path_node_.clear();
path_node_.push_back({(char*)root_, root_});
}
private:
JsonbErrType updateValueInternal(const JsonbValue* value) {
// If we want to update root, it must be an array or object
if (path_node_.size() == 1 && !(value->isObject() || value->isArray())) {
return JsonbErrType::E_INVALID_OPER;
}
// For the updating that we don't need to move data
JsonbValue* curr = path_node_.back().jsonb_value;
if (value->isInt() && curr->isInt()) {
// Both are ints and optimization can be done here
// setVal may fail because the new value can't fit into the current one.
if (((JsonbIntVal*)curr)->setVal(((const JsonbIntVal*)value)->val())) {
return JsonbErrType::E_NONE;
}
}
// If both are strings and the new string is shorter than the
// allocated space, then we can update in place.
if (value->isString() && curr->isString()) {
// When both are strings, optimization is possible
const char* str = value->getValuePtr();
int str_len = value->size();
if ('\0' == str[str_len - 1]) {
// There are tailing NULLs in the string
str_len = (int)strlen(str);
}
// If the new length of the string range in
// [old_len * (1 - str_shrink_ratio), old_len],
// then we update it in place. Which means that
// when the length of the new string is less than
// old_len * str_shrink_ratio, we will do shrink
if (str_len <= (int)curr->size() &&
str_len >= ((int)curr->size() * (1.0 - str_shrink_ratio_))) {
// We can update string in place
if (!((JsonbStringVal*)curr)->setVal(str, str_len)) {
return JsonbErrType::E_INVALID_OPER;
}
return JsonbErrType::E_NONE;
}
}
// We can't update in place, extand it.
if (curr->numPackedBytes() != value->numPackedBytes()) {
char* curr_addr = (char*)curr;
char* next_addr = (char*)curr_addr + curr->numPackedBytes();
char* new_next_addr = curr_addr + value->numPackedBytes();
// Expand it or shrink it.
if (!moveTo(next_addr, new_next_addr)) {
// If move failed, then we do restore
return JsonbErrType::E_OUTOFMEMORY;
}
}
// Copy the new data to replace the old one
memcpy(curr, value, value->numPackedBytes());
return JsonbErrType::E_NONE;
}
/* Insert a node. Insert can only be applied for array. The node
will be inserted at the given iterator. If the array is not long
enough, it will be expanded automatically. Null will be the value
for the expaneded pard. */
JsonbErrType insertArrayInternal(ArrayVal::iterator ite, const char* val_beg,
const char* val_end) {
int bytes_needed = (int)(val_end - val_beg);
char* idx_addr = (char*)ArrayVal::iterator::pointer(ite);
if (!moveTo(idx_addr, idx_addr + bytes_needed)) {
return JsonbErrType::E_OUTOFMEMORY;
}
// Copy the inserted value
memcpy(idx_addr, val_beg, bytes_needed);
return JsonbErrType::E_NONE;
}
private:
JsonbErrType getElemInArray(int idx, JsonbValue* array, ArrayVal::iterator& ite) {
if (nullptr == array || !array->isArray()) return JsonbErrType::E_NOTARRAY;
if (idx < 0) {
return JsonbErrType::E_OUTOFBOUNDARY;
}
// Now we want to get the node at the given index.
ArrayVal* arr_val = (ArrayVal*)path_node_.back().jsonb_value;
int count = 0;
ite = arr_val->begin();
while (ite != arr_val->end() && count != idx) {
++count;
++ite;
}
if (ite == arr_val->end() && count != idx) {
return JsonbErrType::E_OUTOFBOUNDARY;
}
return JsonbErrType::E_NONE;
}
/*
Whenever the size of a sub node in the stack is updated, the
parents should also be updated. inc_size can be positive
(expanding) or negative (shrinking)
*/
void updatePackageSize(int inc_size) {
if (inc_size == 0) return;
/*
There are two cases for the top elements in the stack:
when we do inserting, the first one must be a container,
and we need to expand the size of it. But when for assignment,
the first one can be basic type or container and we don't want
to update it. But now, if we see it's the basic type, we will
not update it and when it's the container, we still update it.
It's fine, because, it will be rewritten outside.
*/
for (auto ite = path_node_.begin(); ite != path_node_.end(); ++ite) {
JsonbValue* value = ite->jsonb_value;
if (value->type() >= JsonbType::T_Null && value->type() <= JsonbType::T_Double) {
// For the last elements in the stack, it can be a scalar and
// there is no size field stored in them. Just skip to update
// it.
assert(path_node_.end() == ite + 1);
} else {
uint32_t* size_pointer = (uint32_t*)((char*)value + sizeof(JsonbTypeUnder));
*size_pointer += inc_size;
}
}
}
// Move the data from "from" to the end of the document to the new
// address "to".
bool moveTo(char* from, char* to) {
size_t remaining = root_->numPackedBytes() - ((char*)from - (char*)root_);
// Check whether it exceed the buffer
if (to + remaining > (char*)document_ + buffer_size_) return false;
updatePackageSize((int)(to - from));
memmove(to, from, remaining);
return true;
}
// Helper function to get the beginning of an array
const char* getArrayBeg(const ArrayVal* arr) {
return (const char*)(ArrayVal::const_iterator::pointer)arr->begin();
}
// Helper function to get the end of an array
const char* getArrayEnd(const ArrayVal* arr) {
return (const char*)(ArrayVal::const_iterator::pointer)arr->end();
}
private:
JsonbDocument* document_ = nullptr;
JsonbValue* root_ = nullptr;
uint buffer_size_;
// This stack store all the key path in the document. It's deserve
// noticing that the root node is always in this stack.
std::deque<NodeInfo> path_node_;
JsonbWriter writer_;
double str_shrink_ratio_;
};
} // namespace doris
#endif