blob: 172129fa21286cf71477ef0420a1c98765d9dc4e [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <glog/logging.h>
#include <cstddef>
#include "runtime/primitive_type.h"
#include "vec/columns/column.h"
#include "vec/columns/columns_common.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
#include "vec/common/memcmp_small.h"
#include "vec/common/pod_array.h"
#include "vec/common/sip_hash.h"
namespace doris::vectorized {
class ColumnFixedLengthObject final : public COWHelper<IColumn, ColumnFixedLengthObject> {
private:
using Self = ColumnFixedLengthObject;
friend class COWHelper<IColumn, ColumnFixedLengthObject>;
friend class OlapBlockDataConvertor;
public:
using Container = PaddedPODArray<uint8_t>;
ColumnFixedLengthObject() = delete;
private:
ColumnFixedLengthObject(const size_t _item_size_) : _item_size(_item_size_), _item_count(0) {}
ColumnFixedLengthObject(const ColumnFixedLengthObject& src)
: _item_size(src._item_size),
_item_count(src._item_count),
_data(src._data.begin(), src._data.end()) {}
public:
std::string get_name() const override { return "ColumnFixedLengthObject"; }
size_t size() const override { return _item_count; }
const Container& get_data() const { return _data; }
Container& get_data() { return _data; }
void resize(size_t n) override {
DCHECK_GT(_item_size, 0) << "_item_size should be greater than 0";
_data.resize(n * _item_size);
_item_count = n;
}
MutableColumnPtr clone_resized(size_t size) const override {
auto res = create(_item_size);
if (size > 0) {
auto& new_col = assert_cast<Self&>(*res);
new_col.resize(size);
auto* new_data = new_col._data.data();
size_t count = std::min(this->size(), size);
memcpy(new_data, _data.data(), count * _item_size);
if (size > count) {
memset(new_data + count * _item_size, 0, (size - count) * _item_size);
}
}
return res;
}
void insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
const uint32_t* indices_end) override {
const Self& src_vec = assert_cast<const Self&>(src);
auto origin_size = size();
auto new_size = indices_end - indices_begin;
if (_item_size == 0) {
_item_size = src_vec._item_size;
}
DCHECK_EQ(_item_size, src_vec._item_size) << "dst and src should have the same _item_size";
resize(origin_size + new_size);
for (uint32_t i = 0; i < new_size; ++i) {
memcpy(&_data[(origin_size + i) * _item_size],
&src_vec._data[indices_begin[i] * _item_size], _item_size);
}
}
void clear() override {
_data.clear();
_item_count = 0;
}
Field operator[](size_t n) const override {
return Field::create_field<TYPE_STRING>(
String(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size));
}
void get(size_t n, Field& res) const override {
res = Field::create_field<TYPE_STRING>(
String(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size));
}
StringRef get_data_at(size_t n) const override {
return {reinterpret_cast<const char*>(&_data[n * _item_size]), _item_size};
}
void insert(const Field& x) override {
DCHECK_EQ(vectorized::get<const String&>(x).length(), _item_size);
insert_data(vectorized::get<const String&>(x).data(), _item_size);
}
void insert_range_from(const IColumn& src, size_t start, size_t length) override {
const auto& src_col = assert_cast<const ColumnFixedLengthObject&>(src);
CHECK_EQ(src_col._item_size, _item_size);
if (length == 0) {
return;
}
if (start + length > src_col._item_count) {
throw doris::Exception(
doris::ErrorCode::INTERNAL_ERROR,
"Parameters start = {}, length = {} are out of bound in "
"ColumnFixedLengthObject::insert_range_from method (data.size() = {})",
start, length, src_col._item_count);
}
size_t old_size = size();
resize(old_size + length);
memcpy(&_data[old_size * _item_size], &src_col._data[start * _item_size],
length * _item_size);
}
void insert_from(const IColumn& src, size_t n) override {
const auto& src_col = assert_cast<const ColumnFixedLengthObject&>(src);
DCHECK(_item_size == src_col._item_size) << "dst and src should have the same _item_size "
<< _item_size << " " << src_col._item_size;
insert_data((const char*)(&src_col._data[n * _item_size]), _item_size);
}
void insert_data(const char* pos, size_t length) override {
size_t old_size = size();
resize(old_size + 1);
memcpy(&_data[old_size * _item_size], pos, _item_size);
}
void insert_default() override {
size_t old_size = size();
resize(old_size + 1);
memset(&_data[old_size * _item_size], 0, _item_size);
}
void pop_back(size_t n) override {
DCHECK_GE(_item_count, n);
resize(_item_count - n);
}
StringRef serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const override {
char* pos = arena.alloc_continue(_item_size, begin);
return {pos, serialize_impl(pos, n)};
}
const char* deserialize_and_insert_from_arena(const char* pos) override {
return pos + deserialize_impl(pos);
}
void update_hash_with_value(size_t n, SipHash& hash) const override {
hash.update(reinterpret_cast<const char*>(_data.data() + n * _item_size), _item_size);
}
ColumnPtr filter(const IColumn::Filter& filter, ssize_t result_size_hint) const override {
column_match_filter_size(size(), filter.size());
auto res = create(_item_size);
size_t column_size = size();
if (result_size_hint > 0) {
res->reserve(result_size_hint);
}
res->resize(column_size);
size_t pos = 0;
for (size_t i = 0; i < filter.size(); i++) {
if (filter[i]) {
memcpy(&res->_data[pos * _item_size], &_data[i * _item_size], _item_size);
pos++;
}
}
res->resize(pos);
return res;
}
size_t filter(const IColumn::Filter& filter) override {
size_t pos = 0;
for (size_t i = 0; i < filter.size(); i++) {
if (filter[i]) {
memcpy(&_data[pos * _item_size], &_data[i * _item_size], _item_size);
pos++;
}
}
resize(pos);
return pos;
}
MutableColumnPtr permute(const IColumn::Permutation& perm, size_t limit) const override {
if (limit == 0) {
limit = size();
} else {
limit = std::min(size(), limit);
}
auto res = ColumnFixedLengthObject::create(_item_size);
res->resize(limit);
for (size_t i = 0; i < limit; ++i) {
memcpy_small_allow_read_write_overflow15(res->_data.data() + i * _item_size,
_data.data() + perm[i] * _item_size,
_item_size);
}
return res;
}
size_t byte_size() const override { return _data.size(); }
size_t item_size() const { return _item_size; }
void set_item_size(size_t size) {
DCHECK(_item_count == 0 || size == _item_size)
<< "cannot reset _item_size of ColumnFixedLengthObject";
_item_size = size;
}
size_t allocated_bytes() const override { return _data.allocated_bytes(); }
bool has_enough_capacity(const IColumn& src) const override {
const auto& src_col = assert_cast<const ColumnFixedLengthObject&>(src);
return _data.capacity() - _data.size() > src_col.size();
}
//NOTICE: here is replace: this[self_row] = rhs[row]
//But column string is replaced all when self_row = 0
void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override {
DCHECK(size() > self_row);
DCHECK(_item_size == assert_cast<const Self&>(rhs)._item_size)
<< _item_size << " " << assert_cast<const Self&>(rhs)._item_size;
auto obj = assert_cast<const Self&>(rhs).get_data_at(row);
memcpy(&_data[self_row * _item_size], obj.data, _item_size);
}
void insert_many_continuous_binary_data(const char* data, const uint32_t* offsets,
const size_t num) override {
if (UNLIKELY(num == 0)) {
return;
}
const auto old_size = size();
const auto begin_offset = offsets[0];
const size_t total_mem_size = offsets[num] - begin_offset;
resize(old_size + num);
memcpy(&_data[old_size * _item_size], data + begin_offset, total_mem_size);
}
void insert_many_strings(const StringRef* strings, size_t num) override {
if (UNLIKELY(num == 0)) {
return;
}
size_t old_count = _item_count;
resize(old_count + num);
auto* dst = _data.data() + old_count * _item_size;
for (size_t i = 0; i < num; i++) {
memcpy(dst, strings[i].data, strings[i].size);
dst += _item_size;
}
}
size_t deserialize_impl(const char* pos) override {
insert_data(pos, _item_size);
return _item_size;
}
size_t serialize_impl(char* pos, const size_t row) const override {
memcpy(pos, &_data[row * _item_size], _item_size);
return _item_size;
}
size_t serialize_size_at(size_t row) const override { return sizeof(_item_size); }
protected:
size_t _item_size;
size_t _item_count;
Container _data;
};
} // namespace doris::vectorized