blob: d1e3954836a81bb937dfa4fe949c99e093e37a6b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#include "format/table/equality_delete.h"
#include "exprs/create_predicate_function.h"
namespace doris {
#include "common/compile_check_begin.h"
std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl(
const Block* delete_block, const std::vector<int>& delete_col_ids) {
DCHECK_EQ(delete_block->columns(), delete_col_ids.size());
if (delete_block->columns() == 1) {
return std::make_unique<SimpleEqualityDelete>(delete_block, delete_col_ids);
} else {
return std::make_unique<MultiEqualityDelete>(delete_block, delete_col_ids);
}
}
Status SimpleEqualityDelete::_build_set() {
COUNTER_UPDATE(num_delete_rows, _delete_block->rows());
if (_delete_block->columns() != 1) [[unlikely]] {
return Status::InternalError("Simple equality delete can be only applied with one column");
}
auto& column_and_type = _delete_block->get_by_position(0);
auto delete_column_type = remove_nullable(column_and_type.type)->get_primitive_type();
_hybrid_set.reset(create_set(delete_column_type, _delete_block->rows(), false));
_hybrid_set->insert_fixed_len(column_and_type.column, 0);
return Status::OK();
}
Status SimpleEqualityDelete::filter_data_block(
Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
const std::unordered_map<int, std::string>& id_to_block_column_name,
IColumn::Filter& filter) {
SCOPED_TIMER(equality_delete_time);
DCHECK(_delete_col_ids.size() == 1);
auto column_field_id = _delete_col_ids[0];
auto column_and_type = data_block->get_by_position(
col_name_to_block_idx->at(id_to_block_column_name.at(column_field_id)));
size_t rows = data_block->rows();
// _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set
if (_single_filter == nullptr) {
_single_filter = std::make_unique<IColumn::Filter>(rows, 0);
} else {
// reset the array capacity and fill all elements using the 0
_single_filter->assign(rows, UInt8(0));
}
if (column_and_type.column->is_nullable()) {
const NullMap& null_map =
reinterpret_cast<const ColumnNullable*>(column_and_type.column.get())
->get_null_map_data();
_hybrid_set->find_batch_nullable(
remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map,
*_single_filter);
if (_hybrid_set->contain_null()) {
auto* filter_data = _single_filter->data();
for (size_t i = 0; i < rows; ++i) {
filter_data[i] = filter_data[i] || null_map[i];
}
}
} else {
_hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows,
*_single_filter);
}
// should reverse _filter
auto* filter_data = filter.data();
for (size_t i = 0; i < rows; ++i) {
filter_data[i] &= !_single_filter->data()[i];
}
return Status::OK();
}
Status MultiEqualityDelete::_build_set() {
COUNTER_UPDATE(num_delete_rows, _delete_block->rows());
size_t rows = _delete_block->rows();
_delete_hashes.clear();
_delete_hashes.resize(rows, 0);
for (ColumnPtr column : _delete_block->get_columns()) {
column->update_hashes_with_value(_delete_hashes.data(), nullptr);
}
for (size_t i = 0; i < rows; ++i) {
_delete_hash_map.insert({_delete_hashes[i], i});
}
_data_column_index.resize(_delete_block->columns());
return Status::OK();
}
Status MultiEqualityDelete::filter_data_block(
Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx,
const std::unordered_map<int, std::string>& id_to_block_column_name,
IColumn::Filter& filter) {
SCOPED_TIMER(equality_delete_time);
DCHECK_EQ(_delete_block->get_columns_with_type_and_name().size(), _delete_col_ids.size());
size_t column_index = 0;
for (size_t idx = 0; idx < _delete_block->get_columns_with_type_and_name().size(); ++idx) {
auto delete_col = _delete_block->get_columns_with_type_and_name()[idx];
auto delete_col_id = _delete_col_ids[idx];
DCHECK(id_to_block_column_name.contains(delete_col_id));
const auto& block_column_name = id_to_block_column_name.at(delete_col_id);
if (!col_name_to_block_idx->contains(block_column_name)) [[unlikely]] {
return Status::InternalError("Column '{}' not found in data block: {}",
block_column_name, data_block->dump_structure());
}
auto column_and_type =
data_block->safe_get_by_position(col_name_to_block_idx->at(block_column_name));
if (!delete_col.type->equals(*column_and_type.type)) [[unlikely]] {
return Status::InternalError(
"Not support type change in column '{}', src type: {}, target type: {}",
block_column_name, delete_col.type->get_name(),
column_and_type.type->get_name());
}
_data_column_index[column_index++] = col_name_to_block_idx->at(block_column_name);
}
size_t rows = data_block->rows();
_data_hashes.clear();
_data_hashes.resize(rows, 0);
for (size_t index : _data_column_index) {
data_block->get_by_position(index).column->update_hashes_with_value(_data_hashes.data(),
nullptr);
}
auto* filter_data = filter.data();
for (size_t i = 0; i < rows; ++i) {
for (auto beg = _delete_hash_map.lower_bound(_data_hashes[i]),
end = _delete_hash_map.upper_bound(_data_hashes[i]);
beg != end; ++beg) {
if (filter[i] && _equal(data_block, i, beg->second)) {
filter_data[i] = 0;
break;
}
}
}
return Status::OK();
}
bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index,
size_t delete_row_index) {
for (size_t i = 0; i < _delete_block->columns(); ++i) {
ColumnPtr data_col = data_block->get_by_position(_data_column_index[i]).column;
ColumnPtr delete_col = _delete_block->get_by_position(i).column;
if (data_col->compare_at(data_row_index, delete_row_index, *delete_col, -1) != 0) {
return false;
}
}
return true;
}
#include "common/compile_check_end.h"
} // namespace doris