blob: ae91b44499493cf56e5206ffe8d11c8f50b4976b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/PathInData.cpp
// and modified by Doris
#include "vec/json/path_in_data.h"
#include <assert.h>
#include "vec/common/sip_hash.h"
namespace doris::vectorized {
PathInData::PathInData(std::string_view path_) : path(path_) {
const char* begin = path.data();
const char* end = path.data() + path.size();
for (const char* it = path.data(); it != end; ++it) {
if (*it == '.') {
size_t size = static_cast<size_t>(it - begin);
parts.emplace_back(std::string_view {begin, size}, false, 0);
begin = it + 1;
}
}
size_t size = static_cast<size_t>(end - begin);
parts.emplace_back(std::string_view {begin, size}, false, 0.);
}
PathInData::PathInData(const Parts& parts_) {
build_path(parts_);
build_parts(parts_);
}
PathInData::PathInData(const PathInData& other) : path(other.path) {
build_parts(other.get_parts());
}
PathInData::PathInData(const std::string& root, const std::vector<std::string>& paths) {
PathInDataBuilder path_builder;
path_builder.append(root, false);
for (const std::string& path : paths) {
path_builder.append(path, false);
}
build_path(path_builder.get_parts());
build_parts(path_builder.get_parts());
}
PathInData::PathInData(const std::vector<std::string>& paths) {
PathInDataBuilder path_builder;
for (size_t i = 0; i < paths.size(); ++i) {
path_builder.append(paths[i], false);
}
build_path(path_builder.get_parts());
build_parts(path_builder.get_parts());
}
PathInData& PathInData::operator=(const PathInData& other) {
if (this != &other) {
path = other.path;
build_parts(other.parts);
}
return *this;
}
UInt128 PathInData::get_parts_hash(const Parts& parts_) {
SipHash hash;
hash.update(parts_.size());
for (const auto& part : parts_) {
hash.update(part.key.data(), part.key.length());
hash.update(part.is_nested);
hash.update(part.anonymous_array_level);
}
UInt128 res;
hash.get128(res);
return res;
}
void PathInData::build_path(const Parts& other_parts) {
if (other_parts.empty()) {
return;
}
path.clear();
auto it = other_parts.begin();
path += it->key;
++it;
for (; it != other_parts.end(); ++it) {
path += ".";
path += it->key;
}
}
void PathInData::build_parts(const Parts& other_parts) {
if (other_parts.empty()) {
return;
}
parts.clear();
parts.reserve(other_parts.size());
const char* begin = path.data();
for (const auto& part : other_parts) {
has_nested |= part.is_nested;
parts.emplace_back(std::string_view {begin, part.key.length()}, part.is_nested,
part.anonymous_array_level);
begin += part.key.length() + 1;
}
}
void PathInData::from_protobuf(const segment_v2::ColumnPathInfo& pb) {
parts.clear();
path = pb.path();
has_nested = pb.has_has_nested();
parts.reserve(pb.path_part_infos().size());
for (const segment_v2::ColumnPathPartInfo& part_info : pb.path_part_infos()) {
Part part;
part.is_nested = part_info.is_nested();
part.anonymous_array_level = part_info.anonymous_array_level();
part.key = part_info.key();
parts.push_back(part);
}
}
std::string PathInData::to_jsonpath() const {
std::string jsonpath = "$.";
if (parts.empty()) {
return jsonpath;
}
auto it = parts.begin();
jsonpath += it->key;
++it;
for (; it != parts.end(); ++it) {
jsonpath += ".";
jsonpath += it->key;
}
return jsonpath;
}
void PathInData::to_protobuf(segment_v2::ColumnPathInfo* pb, int32_t parent_col_unique_id) const {
pb->set_path(path);
pb->set_has_nested(has_nested);
pb->set_parrent_column_unique_id(parent_col_unique_id);
// set parts info
for (const Part& part : parts) {
segment_v2::ColumnPathPartInfo& part_info = *pb->add_path_part_infos();
part_info.set_key(std::string(part.key.data(), part.key.size()));
part_info.set_is_nested(part.is_nested);
part_info.set_anonymous_array_level(part.anonymous_array_level);
}
}
size_t PathInData::Hash::operator()(const PathInData& value) const {
auto hash = get_parts_hash(value.parts);
return hash.low ^ hash.high;
}
PathInData PathInData::copy_pop_front() const {
return copy_pop_nfront(1);
}
PathInData PathInData::copy_pop_nfront(size_t n) const {
if (n >= parts.size()) {
return {};
}
PathInData new_path;
Parts new_parts;
if (!parts.empty()) {
std::copy(parts.begin() + n, parts.end(), std::back_inserter(new_parts));
}
new_path.build_path(new_parts);
new_path.build_parts(new_parts);
return new_path;
}
PathInDataBuilder& PathInDataBuilder::append(std::string_view key, bool is_array) {
if (parts.empty()) {
current_anonymous_array_level += is_array;
}
if (!key.empty()) {
if (!parts.empty()) {
parts.back().is_nested = is_array;
}
parts.emplace_back(key, false, current_anonymous_array_level);
current_anonymous_array_level = 0;
}
return *this;
}
PathInDataBuilder& PathInDataBuilder::append(const PathInData::Parts& path, bool is_array) {
if (parts.empty()) {
current_anonymous_array_level += is_array;
}
if (!path.empty()) {
if (!parts.empty()) {
parts.back().is_nested = is_array;
}
auto it = parts.insert(parts.end(), path.begin(), path.end());
for (; it != parts.end(); ++it) {
it->anonymous_array_level += current_anonymous_array_level;
}
current_anonymous_array_level = 0;
}
return *this;
}
void PathInDataBuilder::pop_back() {
parts.pop_back();
}
void PathInDataBuilder::pop_back(size_t n) {
assert(n <= parts.size());
parts.resize(parts.size() - n);
}
} // namespace doris::vectorized