blob: e352513804c25935808808fbaa6271069e43f291 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "vec/columns/column_varbinary.h"
#include <glog/logging.h>
#include <cstddef>
#include <cstdint>
#include "runtime/primitive_type.h"
#include "vec/columns/column.h"
#include "vec/columns/column_string.h"
#include "vec/columns/columns_common.h"
#include "vec/common/arena.h"
#include "vec/common/assert_cast.h"
#include "vec/core/sort_block.h"
namespace doris::vectorized {
#include "common/compile_check_begin.h"
MutableColumnPtr ColumnVarbinary::clone_resized(size_t size) const {
auto res = create();
if (size > 0) {
auto& new_col = assert_cast<Self&>(*res);
size_t count = std::min(this->size(), size);
for (size_t i = 0; i < count; ++i) {
auto value = this->get_data_at(i);
new_col.insert_data(value.data, value.size);
}
if (size > count) {
new_col.insert_many_defaults(size - count);
}
}
return res;
}
void ColumnVarbinary::insert_range_from(const IColumn& src, size_t start, size_t length) {
if (length == 0) {
return;
}
const auto& src_col = assert_cast<const ColumnVarbinary&>(src);
if (start + length > src_col.size()) {
throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR,
"Parameters start = {}, length = {} are out of bound in "
"ColumnVarbinary::insert_range_from method (data.size() = {})",
start, length, src_col.size());
}
for (size_t i = 0; i < length; ++i) {
auto value = src_col.get_data_at(start + i);
insert_data(value.data, value.size);
}
}
void ColumnVarbinary::insert_indices_from(const IColumn& src, const uint32_t* indices_begin,
const uint32_t* indices_end) {
const Self& src_vec = assert_cast<const Self&>(src);
auto new_size = indices_end - indices_begin;
for (uint32_t i = 0; i < new_size; ++i) {
auto offset = *(indices_begin + i);
auto value = src_vec.get_data_at(offset);
insert_data(value.data, value.size);
}
}
ColumnPtr ColumnVarbinary::filter(const IColumn::Filter& filt, ssize_t result_size_hint) const {
size_t size = this->size();
column_match_filter_size(size, filt.size());
if (_data.size() == 0) {
return doris::vectorized::ColumnVarbinary::create();
}
auto res = doris::vectorized::ColumnVarbinary::create();
Container& res_data = res->get_data();
if (result_size_hint) {
res_data.reserve(result_size_hint > 0 ? result_size_hint : size);
}
const UInt8* filt_pos = filt.data();
const UInt8* filt_end = filt_pos + size;
const auto* data_pos = _data.data();
while (filt_pos < filt_end) {
if (*filt_pos) {
res->insert_data(data_pos->data(), data_pos->size());
}
++filt_pos;
++data_pos;
}
return res;
};
size_t ColumnVarbinary::filter(const IColumn::Filter& filter) {
size_t pos = 0;
const Self& src_vec = assert_cast<const Self&>(*this);
for (size_t i = 0; i < filter.size(); i++) {
if (filter[i]) {
if (src_vec.get_data()[i].isInline()) {
_data[pos] = src_vec.get_data()[i];
} else {
auto val = src_vec.get_data()[i];
const auto* dst = _arena.insert(val.data(), val.size());
_data[pos] = doris::StringView(dst, val.size());
}
pos++;
}
}
resize(pos);
return pos;
};
MutableColumnPtr ColumnVarbinary::permute(const IColumn::Permutation& perm, size_t limit) const {
size_t size = _data.size();
limit = limit ? std::min(size, limit) : size;
if (perm.size() < limit) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Size of permutation is less than required.");
__builtin_unreachable();
}
auto res = doris::vectorized::ColumnVarbinary::create(limit);
typename Self::Container& res_data = res->get_data();
for (size_t i = 0; i < limit; ++i) {
auto val = _data[perm[i]];
if (val.isInline()) {
res_data[i] = val;
continue;
}
const auto* dst = res->_arena.insert(val.data(), val.size());
res_data[i] = doris::StringView(dst, val.size());
}
return res;
};
void ColumnVarbinary::replace_column_data(const IColumn& rhs, size_t row, size_t self_row) {
DCHECK(this->size() > self_row);
const auto& rhs_col = assert_cast<const Self&, TypeCheckOnRelease::DISABLE>(rhs);
auto val = rhs_col.get_data()[row];
if (val.isInline()) {
_data[self_row] = val;
return;
}
const auto* dst = _arena.insert(val.data(), val.size());
_data[self_row] = doris::StringView(dst, val.size());
}
size_t ColumnVarbinary::get_max_row_byte_size() const {
uint32_t max_size = 0;
size_t num_rows = size();
for (size_t i = 0; i < num_rows; ++i) {
max_size = std::max(max_size, _data[i].size());
}
return max_size + sizeof(uint32_t);
}
void ColumnVarbinary::serialize(StringRef* keys, size_t num_rows) const {
for (size_t i = 0; i < num_rows; ++i) {
keys[i].size += serialize_impl(const_cast<char*>(keys[i].data + keys[i].size), i);
}
}
void ColumnVarbinary::deserialize(StringRef* keys, const size_t num_rows) {
for (size_t i = 0; i != num_rows; ++i) {
auto sz = deserialize_impl(keys[i].data);
keys[i].data += sz;
keys[i].size -= sz;
}
}
template <bool positive>
struct ColumnVarbinary::less {
const ColumnVarbinary& parent;
explicit less(const ColumnVarbinary& parent_) : parent(parent_) {}
bool operator()(size_t lhs, size_t rhs) const {
int res = parent._data[lhs].compare(parent._data[rhs]);
return positive ? (res < 0) : (res > 0);
}
};
void ColumnVarbinary::get_permutation(bool reverse, size_t limit, int /*nan_direction_hint*/,
IColumn::Permutation& res) const {
size_t s = _data.size();
res.resize(s);
for (size_t i = 0; i < s; ++i) {
res[i] = i;
}
if (reverse) {
pdqsort(res.begin(), res.end(), less<false>(*this));
} else {
pdqsort(res.begin(), res.end(), less<true>(*this));
}
}
void ColumnVarbinary::insert_many_strings(const StringRef* strings, size_t num) {
for (size_t i = 0; i < num; i++) {
insert_data(strings[i].data, strings[i].size);
}
}
void ColumnVarbinary::insert_many_strings_overflow(const StringRef* strings, size_t num,
size_t max_length) {
insert_many_strings(strings, num);
}
void ColumnVarbinary::sort_column(const ColumnSorter* sorter, EqualFlags& flags,
IColumn::Permutation& perms, EqualRange& range,
bool last_column) const {
sorter->sort_column(assert_cast<const ColumnVarbinary&>(*this), flags, perms, range,
last_column);
}
#include "common/compile_check_end.h"
} // namespace doris::vectorized