blob: a05542e1898a8dbb433459daf704e8e8889cc1be [file]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/DataTypes/Serializations/PathInData.cpp
// and modified by Doris
#include "vec/json/path_in_data.h"
#include <assert.h>
#include "vec/common/sip_hash.h"
namespace doris::vectorized {
PathInData::PathInData(std::string_view path_) : path(path_) {
const char* begin = path.data();
const char* end = path.data() + path.size();
for (const char* it = path.data(); it != end; ++it) {
if (*it == '.') {
size_t size = static_cast<size_t>(it - begin);
parts.emplace_back(std::string_view {begin, size}, false, 0);
begin = it + 1;
}
}
size_t size = static_cast<size_t>(end - begin);
parts.emplace_back(std::string_view {begin, size}, false, 0.);
}
PathInData::PathInData(const Parts& parts_) {
build_path(parts_);
build_parts(parts_);
}
PathInData::PathInData(const PathInData& other) : path(other.path) {
build_parts(other.get_parts());
}
PathInData& PathInData::operator=(const PathInData& other) {
if (this != &other) {
path = other.path;
build_parts(other.parts);
}
return *this;
}
UInt128 PathInData::get_parts_hash(const Parts& parts_) {
SipHash hash;
hash.update(parts_.size());
for (const auto& part : parts_) {
hash.update(part.key.data(), part.key.length());
hash.update(part.is_nested);
hash.update(part.anonymous_array_level);
}
UInt128 res;
hash.get128(res);
return res;
}
void PathInData::build_path(const Parts& other_parts) {
if (other_parts.empty()) {
return;
}
path.clear();
auto it = other_parts.begin();
path += it->key;
++it;
for (; it != other_parts.end(); ++it) {
path += ".";
path += it->key;
}
}
void PathInData::build_parts(const Parts& other_parts) {
if (other_parts.empty()) {
return;
}
parts.clear();
parts.reserve(other_parts.size());
const char* begin = path.data();
for (const auto& part : other_parts) {
has_nested |= part.is_nested;
parts.emplace_back(std::string_view {begin, part.key.length()}, part.is_nested,
part.anonymous_array_level);
begin += part.key.length() + 1;
}
}
size_t PathInData::Hash::operator()(const PathInData& value) const {
auto hash = get_parts_hash(value.parts);
return hash.low ^ hash.high;
}
PathInDataBuilder& PathInDataBuilder::append(std::string_view key, bool is_array) {
if (parts.empty()) {
current_anonymous_array_level += is_array;
}
if (!key.empty()) {
if (!parts.empty()) {
parts.back().is_nested = is_array;
}
parts.emplace_back(key, false, current_anonymous_array_level);
current_anonymous_array_level = 0;
}
return *this;
}
PathInDataBuilder& PathInDataBuilder::append(const PathInData::Parts& path, bool is_array) {
if (parts.empty()) {
current_anonymous_array_level += is_array;
}
if (!path.empty()) {
if (!parts.empty()) {
parts.back().is_nested = is_array;
}
auto it = parts.insert(parts.end(), path.begin(), path.end());
for (; it != parts.end(); ++it) {
it->anonymous_array_level += current_anonymous_array_level;
}
current_anonymous_array_level = 0;
}
return *this;
}
void PathInDataBuilder::pop_back() {
parts.pop_back();
}
void PathInDataBuilder::pop_back(size_t n) {
assert(n <= parts.size());
parts.resize(parts.size() - n);
}
} // namespace doris::vectorized