| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #include "format/table/equality_delete.h" |
| |
| #include "exprs/create_predicate_function.h" |
| |
| namespace doris { |
| #include "common/compile_check_begin.h" |
| |
| std::unique_ptr<EqualityDeleteBase> EqualityDeleteBase::get_delete_impl( |
| const Block* delete_block, const std::vector<int>& delete_col_ids) { |
| DCHECK_EQ(delete_block->columns(), delete_col_ids.size()); |
| if (delete_block->columns() == 1) { |
| return std::make_unique<SimpleEqualityDelete>(delete_block, delete_col_ids); |
| } else { |
| return std::make_unique<MultiEqualityDelete>(delete_block, delete_col_ids); |
| } |
| } |
| |
| Status SimpleEqualityDelete::_build_set() { |
| COUNTER_UPDATE(num_delete_rows, _delete_block->rows()); |
| if (_delete_block->columns() != 1) [[unlikely]] { |
| return Status::InternalError("Simple equality delete can be only applied with one column"); |
| } |
| auto& column_and_type = _delete_block->get_by_position(0); |
| auto delete_column_type = remove_nullable(column_and_type.type)->get_primitive_type(); |
| _hybrid_set.reset(create_set(delete_column_type, _delete_block->rows(), false)); |
| _hybrid_set->insert_fixed_len(column_and_type.column, 0); |
| return Status::OK(); |
| } |
| |
| Status SimpleEqualityDelete::filter_data_block( |
| Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
| const std::unordered_map<int, std::string>& id_to_block_column_name, |
| IColumn::Filter& filter) { |
| SCOPED_TIMER(equality_delete_time); |
| DCHECK(_delete_col_ids.size() == 1); |
| auto column_field_id = _delete_col_ids[0]; |
| |
| auto column_and_type = data_block->get_by_position( |
| col_name_to_block_idx->at(id_to_block_column_name.at(column_field_id))); |
| |
| size_t rows = data_block->rows(); |
| // _filter: 1 => in _hybrid_set; 0 => not in _hybrid_set |
| if (_single_filter == nullptr) { |
| _single_filter = std::make_unique<IColumn::Filter>(rows, 0); |
| } else { |
| // reset the array capacity and fill all elements using the 0 |
| _single_filter->assign(rows, UInt8(0)); |
| } |
| if (column_and_type.column->is_nullable()) { |
| const NullMap& null_map = |
| reinterpret_cast<const ColumnNullable*>(column_and_type.column.get()) |
| ->get_null_map_data(); |
| _hybrid_set->find_batch_nullable( |
| remove_nullable(column_and_type.column)->assume_mutable_ref(), rows, null_map, |
| *_single_filter); |
| if (_hybrid_set->contain_null()) { |
| auto* filter_data = _single_filter->data(); |
| for (size_t i = 0; i < rows; ++i) { |
| filter_data[i] = filter_data[i] || null_map[i]; |
| } |
| } |
| } else { |
| _hybrid_set->find_batch(column_and_type.column->assume_mutable_ref(), rows, |
| *_single_filter); |
| } |
| // should reverse _filter |
| auto* filter_data = filter.data(); |
| for (size_t i = 0; i < rows; ++i) { |
| filter_data[i] &= !_single_filter->data()[i]; |
| } |
| return Status::OK(); |
| } |
| |
| Status MultiEqualityDelete::_build_set() { |
| COUNTER_UPDATE(num_delete_rows, _delete_block->rows()); |
| size_t rows = _delete_block->rows(); |
| _delete_hashes.clear(); |
| _delete_hashes.resize(rows, 0); |
| for (ColumnPtr column : _delete_block->get_columns()) { |
| column->update_hashes_with_value(_delete_hashes.data(), nullptr); |
| } |
| for (size_t i = 0; i < rows; ++i) { |
| _delete_hash_map.insert({_delete_hashes[i], i}); |
| } |
| _data_column_index.resize(_delete_block->columns()); |
| return Status::OK(); |
| } |
| |
| Status MultiEqualityDelete::filter_data_block( |
| Block* data_block, const std::unordered_map<std::string, uint32_t>* col_name_to_block_idx, |
| const std::unordered_map<int, std::string>& id_to_block_column_name, |
| IColumn::Filter& filter) { |
| SCOPED_TIMER(equality_delete_time); |
| DCHECK_EQ(_delete_block->get_columns_with_type_and_name().size(), _delete_col_ids.size()); |
| size_t column_index = 0; |
| |
| for (size_t idx = 0; idx < _delete_block->get_columns_with_type_and_name().size(); ++idx) { |
| auto delete_col = _delete_block->get_columns_with_type_and_name()[idx]; |
| auto delete_col_id = _delete_col_ids[idx]; |
| |
| DCHECK(id_to_block_column_name.contains(delete_col_id)); |
| const auto& block_column_name = id_to_block_column_name.at(delete_col_id); |
| if (!col_name_to_block_idx->contains(block_column_name)) [[unlikely]] { |
| return Status::InternalError("Column '{}' not found in data block: {}", |
| block_column_name, data_block->dump_structure()); |
| } |
| auto column_and_type = |
| data_block->safe_get_by_position(col_name_to_block_idx->at(block_column_name)); |
| if (!delete_col.type->equals(*column_and_type.type)) [[unlikely]] { |
| return Status::InternalError( |
| "Not support type change in column '{}', src type: {}, target type: {}", |
| block_column_name, delete_col.type->get_name(), |
| column_and_type.type->get_name()); |
| } |
| _data_column_index[column_index++] = col_name_to_block_idx->at(block_column_name); |
| } |
| size_t rows = data_block->rows(); |
| _data_hashes.clear(); |
| _data_hashes.resize(rows, 0); |
| for (size_t index : _data_column_index) { |
| data_block->get_by_position(index).column->update_hashes_with_value(_data_hashes.data(), |
| nullptr); |
| } |
| auto* filter_data = filter.data(); |
| for (size_t i = 0; i < rows; ++i) { |
| for (auto beg = _delete_hash_map.lower_bound(_data_hashes[i]), |
| end = _delete_hash_map.upper_bound(_data_hashes[i]); |
| beg != end; ++beg) { |
| if (filter[i] && _equal(data_block, i, beg->second)) { |
| filter_data[i] = 0; |
| break; |
| } |
| } |
| } |
| |
| return Status::OK(); |
| } |
| |
| bool MultiEqualityDelete::_equal(Block* data_block, size_t data_row_index, |
| size_t delete_row_index) { |
| for (size_t i = 0; i < _delete_block->columns(); ++i) { |
| ColumnPtr data_col = data_block->get_by_position(_data_column_index[i]).column; |
| ColumnPtr delete_col = _delete_block->get_by_position(i).column; |
| if (data_col->compare_at(data_row_index, delete_row_index, *delete_col, -1) != 0) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| #include "common/compile_check_end.h" |
| } // namespace doris |