blob: 415f08701a539b2dc380351763b1674cd0b34b46 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// This file is copied from
// https://github.com/ClickHouse/ClickHouse/blob/master/src/Columns/ColumnStruct.cpp
// and modified by Doris
#include "vec/columns/column_struct.h"
#include <functional>
#include "pdqsort.h"
#include "runtime/primitive_type.h"
#include "vec/common/assert_cast.h"
#include "vec/common/typeid_cast.h"
#include "vec/core/sort_block.h"
class SipHash;
namespace doris {
namespace vectorized {
class Arena;
} // namespace vectorized
} // namespace doris
namespace doris::vectorized {
std::string ColumnStruct::get_name() const {
std::stringstream res;
res << "Struct(";
bool is_first = true;
for (const auto& column : columns) {
if (!is_first) {
res << ", ";
}
is_first = false;
res << column->get_name();
}
res << ")";
return res.str();
}
ColumnStruct::ColumnStruct(MutableColumns&& mutable_columns) {
columns.reserve(mutable_columns.size());
for (auto& column : mutable_columns) {
if (is_column_const(*column)) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"ColumnStruct cannot have ColumnConst as its element");
__builtin_unreachable();
}
columns.push_back(std::move(column));
}
}
ColumnStruct::MutablePtr ColumnStruct::create(const Columns& columns) {
for (const auto& column : columns) {
if (is_column_const(*column)) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"ColumnStruct cannot have ColumnConst as its element");
__builtin_unreachable();
}
}
auto column_struct = ColumnStruct::create(MutableColumns());
column_struct->columns.assign(columns.begin(), columns.end());
return column_struct;
}
ColumnStruct::MutablePtr ColumnStruct::create(const TupleColumns& tuple_columns) {
for (const auto& column : tuple_columns) {
if (is_column_const(*column)) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"ColumnStruct cannot have ColumnConst as its element");
__builtin_unreachable();
}
}
auto column_struct = ColumnStruct::create(MutableColumns());
column_struct->columns = tuple_columns;
return column_struct;
}
MutableColumnPtr ColumnStruct::clone_resized(size_t new_size) const {
const size_t tuple_size = columns.size();
MutableColumns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i) {
new_columns[i] = columns[i]->clone_resized(new_size);
}
return ColumnStruct::create(std::move(new_columns));
}
Field ColumnStruct::operator[](size_t n) const {
Field res;
get(n, res);
return res;
}
void ColumnStruct::get(size_t n, Field& res) const {
const size_t tuple_size = columns.size();
res = Field::create_field<TYPE_STRUCT>(Tuple());
Tuple& res_tuple = res.get<Tuple&>();
res_tuple.reserve(tuple_size);
for (size_t i = 0; i < tuple_size; ++i) {
res_tuple.push_back((*columns[i])[n]);
}
}
void ColumnStruct::insert(const Field& x) {
DCHECK_EQ(x.get_type(), PrimitiveType::TYPE_STRUCT);
const auto& tuple = x.get<const Tuple&>();
const size_t tuple_size = columns.size();
if (tuple.size() != tuple_size) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Cannot insert value of different size into tuple. field tuple size "
"{}, columns size {}",
tuple.size(), tuple_size);
}
for (size_t i = 0; i < tuple_size; ++i) {
columns[i]->insert(tuple[i]);
}
}
void ColumnStruct::insert_from(const IColumn& src_, size_t n) {
const ColumnStruct& src = assert_cast<const ColumnStruct&>(src_);
const size_t tuple_size = columns.size();
if (src.columns.size() != tuple_size) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Cannot insert value of different size into tuple.");
__builtin_unreachable();
}
for (size_t i = 0; i < tuple_size; ++i) {
columns[i]->insert_from(*src.columns[i], n);
}
}
void ColumnStruct::insert_default() {
for (auto& column : columns) {
column->insert_default();
}
}
void ColumnStruct::pop_back(size_t n) {
for (auto& column : columns) {
column->pop_back(n);
}
}
StringRef ColumnStruct::serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const {
char* pos = arena.alloc_continue(serialize_size_at(n), begin);
return {pos, serialize_impl(pos, n)};
}
size_t ColumnStruct::serialize_size_at(size_t row) const {
size_t sz = 0;
for (const auto& column : columns) {
sz += column->serialize_size_at(row);
}
return sz;
}
size_t ColumnStruct::deserialize_impl(const char* pos) {
size_t sz = 0;
for (auto& column : columns) {
sz += column->deserialize_impl(pos + sz);
}
return sz;
}
size_t ColumnStruct::serialize_impl(char* pos, const size_t row) const {
size_t sz = 0;
for (const auto& column : columns) {
sz += column->serialize_impl(pos + sz, row);
}
DCHECK_EQ(sz, serialize_size_at(row));
return sz;
}
const char* ColumnStruct::deserialize_and_insert_from_arena(const char* pos) {
return pos + deserialize_impl(pos);
}
int ColumnStruct::compare_at(size_t n, size_t m, const IColumn& rhs_,
int nan_direction_hint) const {
const ColumnStruct& rhs = assert_cast<const ColumnStruct&, TypeCheckOnRelease::DISABLE>(rhs_);
const size_t lhs_tuple_size = columns.size();
const size_t rhs_tuple_size = rhs.tuple_size();
const size_t min_size = std::min(lhs_tuple_size, rhs_tuple_size);
for (size_t i = 0; i < min_size; ++i) {
if (int res = columns[i]->compare_at(n, m, *rhs.columns[i], nan_direction_hint); res) {
return res;
}
}
return lhs_tuple_size > rhs_tuple_size ? 1 : (lhs_tuple_size < rhs_tuple_size ? -1 : 0);
}
void ColumnStruct::update_hash_with_value(size_t n, SipHash& hash) const {
for (const auto& column : columns) {
column->update_hash_with_value(n, hash);
}
}
void ColumnStruct::update_xxHash_with_value(size_t start, size_t end, uint64_t& hash,
const uint8_t* __restrict null_data) const {
for (const auto& column : columns) {
column->update_xxHash_with_value(start, end, hash, nullptr);
}
}
void ColumnStruct::update_crc_with_value(size_t start, size_t end, uint32_t& hash,
const uint8_t* __restrict null_data) const {
for (const auto& column : columns) {
column->update_crc_with_value(start, end, hash, nullptr);
}
}
void ColumnStruct::update_hashes_with_value(uint64_t* __restrict hashes,
const uint8_t* __restrict null_data) const {
for (const auto& column : columns) {
column->update_hashes_with_value(hashes, null_data);
}
}
void ColumnStruct::update_crcs_with_value(uint32_t* __restrict hash, PrimitiveType type,
uint32_t rows, uint32_t offset,
const uint8_t* __restrict null_data) const {
auto s = size();
if (null_data) {
for (size_t i = 0; i < s; ++i) {
// every row
if (null_data[i] == 0) {
update_crc_with_value(i, i + 1, hash[i], nullptr);
}
}
} else {
for (size_t i = 0; i < s; ++i) {
update_crc_with_value(i, i + 1, hash[i], nullptr);
}
}
}
void ColumnStruct::update_crc32c_batch(uint32_t* __restrict hashes,
const uint8_t* __restrict null_map) const {
for (const auto& column : columns) {
column->update_crc32c_batch(hashes, nullptr);
}
}
void ColumnStruct::update_crc32c_single(size_t start, size_t end, uint32_t& hash,
const uint8_t* __restrict null_map) const {
for (const auto& column : columns) {
column->update_crc32c_single(start, end, hash, nullptr);
}
}
void ColumnStruct::insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
const uint32_t* indices_end) {
const auto& src_concrete = assert_cast<const ColumnStruct&>(src);
for (size_t i = 0; i < columns.size(); ++i) {
columns[i]->insert_indices_from(src_concrete.get_column(i), indices_begin, indices_end);
}
}
void ColumnStruct::insert_many_from(const IColumn& src, size_t position, size_t length) {
const auto& src_concrete = assert_cast<const ColumnStruct&>(src);
for (size_t i = 0; i < columns.size(); ++i) {
columns[i]->insert_many_from(src_concrete.get_column(i), position, length);
}
}
void ColumnStruct::insert_range_from(const IColumn& src, size_t start, size_t length) {
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) {
columns[i]->insert_range_from(
*assert_cast<const ColumnStruct&, TypeCheckOnRelease::DISABLE>(src).columns[i],
start, length);
}
}
void ColumnStruct::insert_range_from_ignore_overflow(const IColumn& src, size_t start,
size_t length) {
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) {
columns[i]->insert_range_from_ignore_overflow(
*assert_cast<const ColumnStruct&, TypeCheckOnRelease::DISABLE>(src).columns[i],
start, length);
}
}
ColumnPtr ColumnStruct::filter(const Filter& filt, ssize_t result_size_hint) const {
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i) {
new_columns[i] = columns[i]->filter(filt, result_size_hint);
}
return ColumnStruct::create(new_columns);
}
size_t ColumnStruct::filter(const Filter& filter) {
const size_t tuple_size = columns.size();
size_t result_size = 0;
for (size_t i = 0; i < tuple_size; ++i) {
const auto this_result_size = columns[i]->filter(filter);
CHECK(result_size == 0 || result_size == this_result_size);
result_size = this_result_size;
}
return result_size;
}
MutableColumnPtr ColumnStruct::permute(const Permutation& perm, size_t limit) const {
const size_t tuple_size = columns.size();
Columns new_columns(tuple_size);
for (size_t i = 0; i < tuple_size; ++i) {
new_columns[i] = columns[i]->permute(perm, limit);
}
return ColumnStruct::create(new_columns);
}
void ColumnStruct::shrink_padding_chars() {
for (auto& column : columns) {
column->shrink_padding_chars();
}
}
void ColumnStruct::reserve(size_t n) {
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) {
get_column(i).reserve(n);
}
}
//please check you real need size in data column, When it mixes various data types, eg: string column with int column
void ColumnStruct::resize(size_t n) {
const size_t tuple_size = columns.size();
for (size_t i = 0; i < tuple_size; ++i) {
get_column(i).resize(n);
}
}
size_t ColumnStruct::byte_size() const {
size_t res = 0;
for (const auto& column : columns) {
res += column->byte_size();
}
return res;
}
size_t ColumnStruct::allocated_bytes() const {
size_t res = 0;
for (const auto& column : columns) {
res += column->allocated_bytes();
}
return res;
}
bool ColumnStruct::has_enough_capacity(const IColumn& src) const {
const auto& src_concrete = assert_cast<const ColumnStruct&>(src);
for (size_t i = 0; i < columns.size(); ++i) {
if (!columns[i]->has_enough_capacity(*src_concrete.columns[i])) {
return false;
}
}
return true;
}
void ColumnStruct::for_each_subcolumn(ColumnCallback callback) {
for (auto& column : columns) {
callback(column);
}
}
bool ColumnStruct::structure_equals(const IColumn& rhs) const {
if (const auto* rhs_tuple = typeid_cast<const ColumnStruct*>(&rhs)) {
const size_t tuple_size = columns.size();
if (tuple_size != rhs_tuple->columns.size()) {
return false;
}
for (size_t i = 0; i < tuple_size; ++i) {
if (!columns[i]->structure_equals(*rhs_tuple->columns[i])) {
return false;
}
}
return true;
} else {
return false;
}
}
template <bool positive>
struct ColumnStruct::less {
const ColumnStruct& parent;
const int nan_direction_hint;
explicit less(const ColumnStruct& parent_, int nan_direction_hint_)
: parent(parent_), nan_direction_hint(nan_direction_hint_) {}
bool operator()(size_t lhs, size_t rhs) const {
int res = 0;
for (auto& col : parent.get_columns()) {
if (res = col->compare_at(lhs, rhs, *col.get(), nan_direction_hint); res) {
// if res != 0 , here is something different ,just return
break;
}
}
return positive ? (res < 0) : (res > 0);
}
};
void ColumnStruct::get_permutation(bool reverse, size_t limit, int nan_direction_hint,
IColumn::Permutation& res) const {
size_t s = size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
res[i] = i;
}
if (reverse) {
pdqsort(res.begin(), res.end(), ColumnStruct::less<false>(*this, nan_direction_hint));
} else {
pdqsort(res.begin(), res.end(), ColumnStruct::less<true>(*this, nan_direction_hint));
}
}
void ColumnStruct::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms, EqualRange& range,
bool last_column) const {
sorter->sort_column(static_cast<const ColumnStruct&>(*this), flags, perms, range, last_column);
}
void ColumnStruct::serialize(StringRef* keys, size_t num_rows) const {
for (size_t i = 0; i < num_rows; ++i) {
// Used in hash_map_context.h, this address is allocated via Arena,
// but passed through StringRef, so using const_cast is acceptable.
keys[i].size += serialize_impl(const_cast<char*>(keys[i].data + keys[i].size), i);
}
}
void ColumnStruct::deserialize(StringRef* keys, const size_t num_rows) {
for (size_t i = 0; i != num_rows; ++i) {
auto sz = deserialize_impl(keys[i].data);
keys[i].data += sz;
keys[i].size -= sz;
}
}
size_t ColumnStruct::get_max_row_byte_size() const {
size_t max_row_byte_sz = 0;
for (const auto& col : columns) {
max_row_byte_sz += col->get_max_row_byte_size();
}
return max_row_byte_sz;
}
void ColumnStruct::replace_float_special_values() {
for (auto& col : columns) {
col->replace_float_special_values();
}
}
} // namespace doris::vectorized