blob: 9216f545a3ac338e5db94b5d706f7952c54afc10 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/JSONParsers/SimdJSONParser.h
// and modified by Doris
#pragma once
#include <rapidjson/document.h>
#include <simdjson.h>
#include "vec/core/types.h"
#include "vec/json/path_in_data.h"
namespace doris::vectorized {
/// This class can be used as an argument for the template class FunctionJSON.
/// It provides ability to parse JSONs using simdjson library.
class SimdJSONParser {
public:
class Array;
class Object;
/// References an element in a JSON document, representing a JSON null, boolean, string, number,
/// array or object.
class Element {
public:
ALWAYS_INLINE Element() {} /// NOLINT
ALWAYS_INLINE Element(const simdjson::dom::element& element_)
: element(element_) {} /// NOLINT
ALWAYS_INLINE bool isInt64() const {
return element.type() == simdjson::dom::element_type::INT64;
}
ALWAYS_INLINE bool isUInt64() const {
return element.type() == simdjson::dom::element_type::UINT64;
}
ALWAYS_INLINE bool isDouble() const {
return element.type() == simdjson::dom::element_type::DOUBLE;
}
ALWAYS_INLINE bool isString() const {
return element.type() == simdjson::dom::element_type::STRING;
}
ALWAYS_INLINE bool isArray() const {
return element.type() == simdjson::dom::element_type::ARRAY;
}
ALWAYS_INLINE bool isObject() const {
return element.type() == simdjson::dom::element_type::OBJECT;
}
ALWAYS_INLINE bool isBool() const {
return element.type() == simdjson::dom::element_type::BOOLEAN;
}
ALWAYS_INLINE bool isNull() const {
return element.type() == simdjson::dom::element_type::NULL_VALUE;
}
ALWAYS_INLINE Int64 getInt64() const { return element.get_int64().value_unsafe(); }
ALWAYS_INLINE UInt64 getUInt64() const { return element.get_uint64().value_unsafe(); }
ALWAYS_INLINE double getDouble() const { return element.get_double().value_unsafe(); }
ALWAYS_INLINE bool getBool() const { return element.get_bool().value_unsafe(); }
ALWAYS_INLINE std::string_view getString() const {
return element.get_string().value_unsafe();
}
ALWAYS_INLINE Array getArray() const;
ALWAYS_INLINE Object getObject() const;
ALWAYS_INLINE simdjson::dom::element getElement() const { return element; }
private:
simdjson::dom::element element;
};
/// References an array in a JSON document.
class Array {
public:
class Iterator {
public:
ALWAYS_INLINE Iterator(const simdjson::dom::array::iterator& it_)
: it(it_) {} /// NOLINT
ALWAYS_INLINE Element operator*() const { return *it; }
ALWAYS_INLINE Iterator& operator++() {
++it;
return *this;
}
ALWAYS_INLINE Iterator operator++(int) {
auto res = *this;
++it;
return res;
} /// NOLINT
ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
return left.it != right.it;
}
ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) {
return !(left != right);
}
private:
simdjson::dom::array::iterator it;
};
ALWAYS_INLINE Array(const simdjson::dom::array& array_) : array(array_) {} /// NOLINT
ALWAYS_INLINE Iterator begin() const { return array.begin(); }
ALWAYS_INLINE Iterator end() const { return array.end(); }
ALWAYS_INLINE size_t size() const { return array.size(); }
ALWAYS_INLINE Element operator[](size_t index) const {
assert(index < size());
return array.at(index).value_unsafe();
}
private:
simdjson::dom::array array;
};
using KeyValuePair = std::pair<std::string_view, Element>;
/// References an object in a JSON document.
class Object {
public:
class Iterator {
public:
ALWAYS_INLINE Iterator(const simdjson::dom::object::iterator& it_)
: it(it_) {} /// NOLINT
ALWAYS_INLINE KeyValuePair operator*() const {
const auto& res = *it;
return {res.key, res.value};
}
ALWAYS_INLINE Iterator& operator++() {
++it;
return *this;
}
ALWAYS_INLINE Iterator operator++(int) {
auto res = *this;
++it;
return res;
} /// NOLINT
ALWAYS_INLINE friend bool operator!=(const Iterator& left, const Iterator& right) {
return left.it != right.it;
}
ALWAYS_INLINE friend bool operator==(const Iterator& left, const Iterator& right) {
return !(left != right);
}
private:
simdjson::dom::object::iterator it;
};
ALWAYS_INLINE Object(const simdjson::dom::object& object_) : object(object_) {} /// NOLINT
ALWAYS_INLINE Iterator begin() const { return object.begin(); }
ALWAYS_INLINE Iterator end() const { return object.end(); }
ALWAYS_INLINE size_t size() const { return object.size(); }
bool find(const PathInData& path, Element& result) const {
size_t idx = 0;
auto obj = object;
simdjson::simdjson_result<simdjson::dom::element> x;
for (const auto& part : path.get_parts()) {
++idx;
x = obj.at_key(part.key);
if (x.error()) {
return false;
}
if (!x.is_object()) {
break;
}
obj = x.get_object();
}
if (idx != path.get_parts().size()) {
return false;
}
result = x.value_unsafe();
return true;
}
bool find(const std::string_view& key, Element& result) const {
auto x = object.at_key(key);
if (x.error()) {
return false;
}
result = x.value_unsafe();
return true;
}
/// Optional: Provides access to an object's element by index.
KeyValuePair operator[](size_t index) const {
assert(index < size());
auto it = object.begin();
while (index--) {
++it;
}
const auto& res = *it;
return {res.key, res.value};
}
private:
simdjson::dom::object object;
};
/// Parses a JSON document, returns the reference to its root element if succeeded.
bool parse(const std::string_view& json, Element& result) {
auto document = parser.parse(json.data(), json.size());
if (document.error()) {
return false;
}
result = document.value_unsafe();
return true;
}
/// Optional: Allocates memory to parse JSON documents faster.
void reserve(size_t max_size) {
if (parser.allocate(max_size) != simdjson::error_code::SUCCESS) {
LOG(FATAL) << "Couldn't allocate " + std::to_string(max_size) +
" bytes when parsing JSON";
}
}
private:
simdjson::dom::parser parser;
};
inline ALWAYS_INLINE SimdJSONParser::Array SimdJSONParser::Element::getArray() const {
return element.get_array().value_unsafe();
}
inline ALWAYS_INLINE SimdJSONParser::Object SimdJSONParser::Element::getObject() const {
return element.get_object().value_unsafe();
}
} // namespace doris::vectorized