blob: ce5a8eb1dbc4fe0434c61c07f48e1b7d1df7ac4b [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#pragma once
#include <cstdint>
#include <iosfwd>
#include <memory>
#include <unordered_map>
#include <variant>
#include <vector>
#include "common/status.h"
#include "exec/exec_node.h"
#include "util/runtime_profile.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
#include "vec/common/arena.h"
#include "vec/core/block.h"
#include "vec/core/types.h"
#include "vec/exec/join/process_hash_table_probe.h"
#include "vec/exec/join/vhash_join_node.h"
namespace doris {
class DescriptorTbl;
class ObjectPool;
class RuntimeState;
class TPlanNode;
namespace vectorized {
class VExprContext;
struct RowRefListWithFlags;
using SetHashTableVariants = std::variant<
std::monostate, MethodSerialized<HashMap<StringRef, RowRefListWithFlags>>,
SetPrimaryTypeHashTableContext<UInt8>, SetPrimaryTypeHashTableContext<UInt16>,
SetPrimaryTypeHashTableContext<UInt32>, SetPrimaryTypeHashTableContext<UInt64>,
SetPrimaryTypeHashTableContext<UInt128>, SetPrimaryTypeHashTableContext<UInt256>,
SetFixedKeyHashTableContext<UInt64, true>, SetFixedKeyHashTableContext<UInt64, false>,
SetFixedKeyHashTableContext<UInt128, true>, SetFixedKeyHashTableContext<UInt128, false>,
SetFixedKeyHashTableContext<UInt256, true>, SetFixedKeyHashTableContext<UInt256, false>,
SetFixedKeyHashTableContext<UInt136, true>, SetFixedKeyHashTableContext<UInt136, false>>;
template <bool is_intersect>
class VSetOperationNode final : public ExecNode {
public:
VSetOperationNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
Status init(const TPlanNode& tnode, RuntimeState* state = nullptr) override;
Status prepare(RuntimeState* state) override;
Status open(RuntimeState* state) override;
Status get_next(RuntimeState* state, Block* output_block, bool* eos) override;
Status close(RuntimeState* state) override;
void debug_string(int indentation_level, std::stringstream* out) const override;
Status alloc_resource(RuntimeState* state) override;
void release_resource(RuntimeState* state) override;
Status sink(RuntimeState* state, Block* block, bool eos) override;
Status pull(RuntimeState* state, Block* output_block, bool* eos) override;
Status sink_probe(RuntimeState* state, int child_id, Block* block, bool eos);
bool is_child_finished(int child_id) const;
int64_t* valid_element_in_hash_tbl() { return &_valid_element_in_hash_tbl; }
private:
void _finalize_probe(int child_id);
//Todo: In build process of hashtable, It's same as join node.
//It's time to abstract out the same methods and provide them directly to others;
void hash_table_init();
Status hash_table_build(RuntimeState* state);
Status process_build_block(Block& block, RuntimeState* state);
Status extract_build_column(Block& block, ColumnRawPtrs& raw_ptrs);
Status extract_probe_column(Block& block, ColumnRawPtrs& raw_ptrs, int child_id);
void refresh_hash_table();
template <typename HashTableContext>
Status get_data_in_hashtable(HashTableContext& hash_table_ctx, Block* output_block,
const int batch_size, bool* eos);
void add_result_columns(RowRefListWithFlags& value, int& block_size);
void create_mutable_cols(Block* output_block);
void release_mem();
std::unique_ptr<SetHashTableVariants> _hash_table_variants;
std::vector<bool> _build_not_ignore_null;
//record element size in hashtable
int64_t _valid_element_in_hash_tbl;
//The i-th result expr list refers to the i-th child.
std::vector<VExprContextSPtrs> _child_expr_lists;
//record build column type
DataTypes _left_table_data_types;
//first:column_id, could point to origin column or cast column
//second:idx mapped to column types
std::unordered_map<int, int> _build_col_idx;
//record insert column id during probe
std::vector<uint16_t> _probe_column_inserted_id;
Block _build_block;
Block _probe_block;
ColumnRawPtrs _probe_columns;
std::vector<MutableColumnPtr> _mutable_cols;
bool _build_finished;
std::vector<bool> _probe_finished_children_index;
MutableBlock _mutable_block;
RuntimeProfile::Counter* _build_timer = nullptr; // time to build hash table
RuntimeProfile::Counter* _probe_timer = nullptr; // time to probe
RuntimeProfile::Counter* _pull_timer = nullptr; // time to pull data
Arena _arena;
template <class HashTableContext, bool is_intersected>
friend struct HashTableBuild;
template <class HashTableContext, bool is_intersected>
friend struct HashTableProbe;
};
using VIntersectNode = VSetOperationNode<true>;
using VExceptNode = VSetOperationNode<false>;
} // namespace vectorized
} // namespace doris