blob: 100a11fd2c7ac62aa0f6df406a19b865af4ad2d7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <vector>
#include "vec/columns/column.h"
#include "vec/columns/column_vector.h"
#include "vec/common/arena.h"
#include "vec/common/custom_allocator.h"
namespace doris {
namespace vectorized {
class Block;
class MutableBlock;
struct HashJoinProbeContext;
} // namespace vectorized
namespace pipeline {
class HashJoinProbeLocalState;
class HashJoinProbeOperatorX;
using MutableColumnPtr = vectorized::IColumn::MutablePtr;
using MutableColumns = std::vector<vectorized::MutableColumnPtr>;
using NullMap = vectorized::ColumnUInt8::Container;
using ConstNullMapPtr = const NullMap*;
template <int JoinOpType>
struct ProcessHashTableProbe {
ProcessHashTableProbe(HashJoinProbeLocalState* parent, int batch_size);
~ProcessHashTableProbe() = default;
// output build side result column
void build_side_output_column(vectorized::MutableColumns& mcol, bool is_mark_join);
void probe_side_output_column(vectorized::MutableColumns& mcol);
// Only process the join with no other join conjunct, because of no other join conjunt
// the output block struct is same with mutable block. we can do more opt on it and simplify
// the logic of probe
template <typename HashTableType>
Status process(HashTableType& hash_table_ctx, const uint8_t* null_map,
vectorized::MutableBlock& mutable_block, vectorized::Block* output_block,
uint32_t probe_rows, bool is_mark_join);
// In the presence of other join conjunct, the process of join become more complicated.
// each matching join column need to be processed by other join conjunct. so the struct of mutable block
// and output block may be different
// The output result is determined by the other join conjunct result and same_to_prev struct
Status do_other_join_conjuncts(vectorized::Block* output_block, DorisVector<uint8_t>& visited);
Status do_mark_join_conjuncts(vectorized::Block* output_block, const uint8_t* null_map);
Status finalize_block_with_filter(vectorized::Block* output_block, size_t filter_column_id,
size_t column_to_keep);
template <typename HashTableType>
typename HashTableType::State _init_probe_side(HashTableType& hash_table_ctx,
uint32_t probe_rows, const uint8_t* null_map);
// Process full outer join/ right join / right semi/anti join to output the join result
// in hash table
template <typename HashTableType>
Status finish_probing(HashTableType& hash_table_ctx, vectorized::MutableBlock& mutable_block,
vectorized::Block* output_block, bool* eos, bool is_mark_join);
/// For null aware join with other conjuncts, if the probe key of one row on left side is null,
/// we should make this row match with all rows in build side.
uint32_t _process_probe_null_key(uint32_t probe_idx);
pipeline::HashJoinProbeLocalState* _parent = nullptr;
pipeline::HashJoinProbeOperatorX* _parent_operator = nullptr;
const int _batch_size;
const std::shared_ptr<vectorized::Block>& _build_block;
std::unique_ptr<vectorized::Arena> _arena;
vectorized::ColumnOffset32 _probe_indexs;
vectorized::ColumnOffset32 _output_row_indexs;
bool _probe_visited = false;
bool _picking_null_keys = false;
vectorized::ColumnOffset32 _build_indexs;
std::vector<uint8_t> _null_flags;
/// If the probe key of one row on left side is null,
/// we will make all rows in build side match with this row,
/// `_build_index_for_null_probe_key` is used to record the progress if the build block is too big.
uint32_t _build_index_for_null_probe_key {0};
std::vector<int> _build_blocks_locs;
std::vector<char> _probe_side_find_result;
const bool _have_other_join_conjunct;
const std::vector<bool>& _left_output_slot_flags;
const std::vector<bool>& _right_output_slot_flags;
// nullable column but not has null except first row
std::vector<bool> _build_column_has_null;
bool _need_calculate_all_match_one = false;
RuntimeProfile::Counter* _search_hashtable_timer = nullptr;
RuntimeProfile::Counter* _init_probe_side_timer = nullptr;
RuntimeProfile::Counter* _build_side_output_timer = nullptr;
RuntimeProfile::Counter* _probe_side_output_timer = nullptr;
RuntimeProfile::Counter* _finish_probe_phase_timer = nullptr;
// See `HashJoinProbeOperatorX::_right_col_idx`
const size_t _right_col_idx;
size_t _right_col_len;
// For right semi with mark join conjunct, we need to store the mark join flags
// in the hash table.
// -1 means null, 0 means false, 1 means true
DorisVector<int8_t> mark_join_flags;
};
} // namespace pipeline
} // namespace doris