be/src/vec/exec/vaggregation_node.cpp - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "vec/exec/vaggregation_node.h"

 #include <fmt/format.h>
 #include <gen_cpp/Exprs_types.h>
 #include <gen_cpp/Metrics_types.h>
 #include <gen_cpp/PlanNodes_types.h>

 #include <array>
 #include <atomic>
 #include <memory>
 #include <string>

 #include "common/status.h"
 #include "exec/exec_node.h"
 #include "runtime/block_spill_manager.h"
 #include "runtime/define_primitive_type.h"
 #include "runtime/descriptors.h"
 #include "runtime/memory/mem_tracker.h"
 #include "runtime/primitive_type.h"
 #include "runtime/runtime_state.h"
 #include "runtime/thread_context.h"
 #include "vec/aggregate_functions/aggregate_function.h"
 #include "vec/common/hash_table/hash.h"
 #include "vec/common/hash_table/hash_map_context_creator.h"
 #include "vec/common/hash_table/partitioned_hash_map.h"
 #include "vec/common/hash_table/string_hash_table.h"
 #include "vec/common/string_buffer.hpp"
 #include "vec/core/block.h"
 #include "vec/core/columns_with_type_and_name.h"
 #include "vec/data_types/data_type.h"
 #include "vec/data_types/data_type_nullable.h"
 #include "vec/data_types/data_type_string.h"
 #include "vec/exprs/vexpr.h"
 #include "vec/exprs/vexpr_context.h"
 #include "vec/utils/util.hpp"

 namespace doris {
 class ObjectPool;
 } // namespace doris

 namespace doris::vectorized {
 /// The minimum reduction factor (input rows divided by output rows) to grow hash tables
 /// in a streaming preaggregation, given that the hash tables are currently the given
 /// size or above. The sizes roughly correspond to hash table sizes where the bucket
 /// arrays will fit in  a cache level. Intuitively, we don't want the working set of the
 /// aggregation to expand to the next level of cache unless we're reducing the input
 /// enough to outweigh the increased memory latency we'll incur for each hash table
 /// lookup.
 ///
 /// Note that the current reduction achieved is not always a good estimate of the
 /// final reduction. It may be biased either way depending on the ordering of the
 /// input. If the input order is random, we will underestimate the final reduction
 /// factor because the probability of a row having the same key as a previous row
 /// increases as more input is processed.  If the input order is correlated with the
 /// key, skew may bias the estimate. If high cardinality keys appear first, we
 /// may overestimate and if low cardinality keys appear first, we underestimate.
 /// To estimate the eventual reduction achieved, we estimate the final reduction
 /// using the planner's estimated input cardinality and the assumption that input
 /// is in a random order. This means that we assume that the reduction factor will
 /// increase over time.
 struct StreamingHtMinReductionEntry {
     // Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in
     // bytes is greater than this threshold.
     int min_ht_mem;
     // The minimum reduction factor to expand the hash tables.
     double streaming_ht_min_reduction;
 };

 // TODO: experimentally tune these values and also programmatically get the cache size
 // of the machine that we're running on.
 static constexpr StreamingHtMinReductionEntry STREAMING_HT_MIN_REDUCTION[] = {
         // Expand up to L2 cache always.
         {0, 0.0},
         // Expand into L3 cache if we look like we're getting some reduction.
         // At present, The L2 cache is generally 1024k or more
         {1024 * 1024, 1.1},
         // Expand into main memory if we're getting a significant reduction.
         // The L3 cache is generally 16MB or more
         {16 * 1024 * 1024, 2.0},
 };

 static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
         sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]);

 AggregationNode::AggregationNode(ObjectPool* pool, const TPlanNode& tnode,
                                  const DescriptorTbl& descs)
         : ExecNode(pool, tnode, descs),
           _intermediate_tuple_id(tnode.agg_node.intermediate_tuple_id),
           _output_tuple_id(tnode.agg_node.output_tuple_id),
           _needs_finalize(tnode.agg_node.need_finalize),
           _is_merge(false) {
     if (tnode.agg_node.__isset.use_streaming_preaggregation) {
         _is_streaming_preagg = tnode.agg_node.use_streaming_preaggregation;
         if (_is_streaming_preagg) {
             DCHECK(!tnode.agg_node.grouping_exprs.empty()) << "Streaming preaggs do grouping";
             DCHECK(_limit == -1) << "Preaggs have no limits";
         }
     } else {
         _is_streaming_preagg = false;
     }

     _is_first_phase = tnode.agg_node.__isset.is_first_phase && tnode.agg_node.is_first_phase;
     _agg_data = std::make_unique<AggregatedDataVariants>();
     _agg_arena_pool = std::make_unique<Arena>();
 }

 AggregationNode::~AggregationNode() = default;

 Status AggregationNode::init(const TPlanNode& tnode, RuntimeState* state) {
     RETURN_IF_ERROR(ExecNode::init(tnode, state));
     // ignore return status for now , so we need to introduce ExecNode::init()
     RETURN_IF_ERROR(VExpr::create_expr_trees(tnode.agg_node.grouping_exprs, _probe_expr_ctxs));

     // init aggregate functions
     _aggregate_evaluators.reserve(tnode.agg_node.aggregate_functions.size());
     // In case of : `select * from (select GoodEvent from hits union select CounterID from hits) as h limit 10;`
     // only union with limit: we can short circuit query the pipeline exec engine.
     _can_short_circuit =
             tnode.agg_node.aggregate_functions.empty() && state->enable_pipeline_exec();

     TSortInfo dummy;
     for (int i = 0; i < tnode.agg_node.aggregate_functions.size(); ++i) {
         AggFnEvaluator* evaluator = nullptr;
         RETURN_IF_ERROR(AggFnEvaluator::create(
                 _pool, tnode.agg_node.aggregate_functions[i],
                 tnode.agg_node.__isset.agg_sort_infos ? tnode.agg_node.agg_sort_infos[i] : dummy,
                 &evaluator));
         _aggregate_evaluators.push_back(evaluator);
     }

     const auto& agg_functions = tnode.agg_node.aggregate_functions;
     _external_agg_bytes_threshold = state->external_agg_bytes_threshold();

     if (_external_agg_bytes_threshold > 0) {
         size_t spill_partition_count_bits = 4;
         if (state->query_options().__isset.external_agg_partition_bits) {
             spill_partition_count_bits = state->query_options().external_agg_partition_bits;
         }

         _spill_partition_helper =
                 std::make_unique<SpillPartitionHelper>(spill_partition_count_bits);
     }

     _is_merge = std::any_of(agg_functions.cbegin(), agg_functions.cend(),
                             [](const auto& e) { return e.nodes[0].agg_expr.is_merge_agg; });
     return Status::OK();
 }

 void AggregationNode::_init_hash_method(const VExprContextSPtrs& probe_exprs) {
     DCHECK(probe_exprs.size() >= 1);

     using Type = AggregatedDataVariants::Type;
     Type t(Type::serialized);

     if (probe_exprs.size() == 1) {
         auto is_nullable = probe_exprs[0]->root()->is_nullable();
         PrimitiveType type = probe_exprs[0]->root()->result_type();
         switch (type) {
         case TYPE_TINYINT:
         case TYPE_BOOLEAN:
         case TYPE_SMALLINT:
         case TYPE_INT:
         case TYPE_FLOAT:
         case TYPE_DATEV2:
         case TYPE_BIGINT:
         case TYPE_DOUBLE:
         case TYPE_DATE:
         case TYPE_DATETIME:
         case TYPE_DATETIMEV2:
         case TYPE_LARGEINT:
         case TYPE_DECIMALV2:
         case TYPE_DECIMAL32:
         case TYPE_DECIMAL64:
         case TYPE_DECIMAL128I: {
             size_t size = get_primitive_type_size(type);
             if (size == 1) {
                 t = Type::int8_key;
             } else if (size == 2) {
                 t = Type::int16_key;
             } else if (size == 4) {
                 t = Type::int32_key;
             } else if (size == 8) {
                 t = Type::int64_key;
             } else if (size == 16) {
                 t = Type::int128_key;
             } else {
                 throw Exception(ErrorCode::INTERNAL_ERROR,
                                 "meet invalid type size, size={}, type={}", size,
                                 type_to_string(type));
             }
             break;
         }
         case TYPE_CHAR:
         case TYPE_VARCHAR:
         case TYPE_STRING: {
             t = Type::string_key;
             break;
         }
         default:
             t = Type::serialized;
         }

         _agg_data->init(get_hash_key_type_with_phase(t, !_is_first_phase), is_nullable);
     } else {
         if (!try_get_hash_map_context_fixed<PHNormalHashMap, HashCRC32, AggregateDataPtr>(
                     _agg_data->method_variant, probe_exprs)) {
             _agg_data->init(Type::serialized);
         }
     }
 }

 Status AggregationNode::prepare_profile(RuntimeState* state) {
     _memory_usage_counter = ADD_LABEL_COUNTER(runtime_profile(), "MemoryUsage");
     _hash_table_memory_usage =
             ADD_CHILD_COUNTER(runtime_profile(), "HashTable", TUnit::BYTES, "MemoryUsage");
     _serialize_key_arena_memory_usage = runtime_profile()->AddHighWaterMarkCounter(
             "SerializeKeyArena", TUnit::BYTES, "MemoryUsage");

     _build_table_convert_timer = ADD_TIMER(runtime_profile(), "BuildConvertToPartitionedTime");
     _serialize_key_timer = ADD_TIMER(runtime_profile(), "SerializeKeyTime");
     _merge_timer = ADD_TIMER(runtime_profile(), "MergeTime");
     _expr_timer = ADD_TIMER(runtime_profile(), "ExprTime");
     _get_results_timer = ADD_TIMER(runtime_profile(), "GetResultsTime");
     _serialize_data_timer = ADD_TIMER(runtime_profile(), "SerializeDataTime");
     _serialize_result_timer = ADD_TIMER(runtime_profile(), "SerializeResultTime");
     _deserialize_data_timer = ADD_TIMER(runtime_profile(), "DeserializeAndMergeTime");
     _hash_table_compute_timer = ADD_TIMER(runtime_profile(), "HashTableComputeTime");
     _hash_table_emplace_timer = ADD_TIMER(runtime_profile(), "HashTableEmplaceTime");
     _hash_table_iterate_timer = ADD_TIMER(runtime_profile(), "HashTableIterateTime");
     _insert_keys_to_column_timer = ADD_TIMER(runtime_profile(), "InsertKeysToColumnTime");
     _streaming_agg_timer = ADD_TIMER(runtime_profile(), "StreamingAggTime");
     _hash_table_size_counter = ADD_COUNTER(runtime_profile(), "HashTableSize", TUnit::UNIT);
     _hash_table_input_counter = ADD_COUNTER(runtime_profile(), "HashTableInputCount", TUnit::UNIT);
     _max_row_size_counter = ADD_COUNTER(runtime_profile(), "MaxRowSizeInBytes", TUnit::UNIT);
     COUNTER_SET(_max_row_size_counter, (int64_t)0);
     _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id);
     _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id);
     DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size());
     RETURN_IF_ERROR(VExpr::prepare(_probe_expr_ctxs, state, child(0)->row_desc()));

     _agg_profile_arena = std::make_unique<Arena>();

     int j = _probe_expr_ctxs.size();
     for (int i = 0; i < j; ++i) {
         auto nullable_output = _output_tuple_desc->slots()[i]->is_nullable();
         auto nullable_input = _probe_expr_ctxs[i]->root()->is_nullable();
         if (nullable_output != nullable_input) {
             DCHECK(nullable_output);
             _make_nullable_keys.emplace_back(i);
         }
     }
     for (int i = 0; i < _aggregate_evaluators.size(); ++i, ++j) {
         SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[j];
         SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j];
         RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare(
                 state, child(0)->row_desc(), intermediate_slot_desc, output_slot_desc));
     }

     // set profile timer to evaluators
     for (auto& evaluator : _aggregate_evaluators) {
         evaluator->set_timer(_merge_timer, _expr_timer);
     }

     _offsets_of_aggregate_states.resize(_aggregate_evaluators.size());

     for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
         _offsets_of_aggregate_states[i] = _total_size_of_aggregate_states;

         const auto& agg_function = _aggregate_evaluators[i]->function();
         // aggreate states are aligned based on maximum requirement
         _align_aggregate_states = std::max(_align_aggregate_states, agg_function->align_of_data());
         _total_size_of_aggregate_states += agg_function->size_of_data();

         // If not the last aggregate_state, we need pad it so that next aggregate_state will be aligned.
         if (i + 1 < _aggregate_evaluators.size()) {
             size_t alignment_of_next_state =
                     _aggregate_evaluators[i + 1]->function()->align_of_data();
             if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0) {
                 return Status::RuntimeError("Logical error: align_of_data is not 2^N");
             }

             /// Extend total_size to next alignment requirement
             /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state.
             _total_size_of_aggregate_states =
                     (_total_size_of_aggregate_states + alignment_of_next_state - 1) /
                     alignment_of_next_state * alignment_of_next_state;
         }
     }

     if (_probe_expr_ctxs.empty()) {
         _agg_data->init(AggregatedDataVariants::Type::without_key);

         _agg_data->without_key = reinterpret_cast<AggregateDataPtr>(
                 _agg_profile_arena->alloc(_total_size_of_aggregate_states));

         if (_is_merge) {
             _executor.execute = std::bind<Status>(&AggregationNode::_merge_without_key, this,
                                                   std::placeholders::_1);
         } else {
             _executor.execute = std::bind<Status>(&AggregationNode::_execute_without_key, this,
                                                   std::placeholders::_1);
         }

         if (_needs_finalize) {
             _executor.get_result = std::bind<Status>(&AggregationNode::_get_without_key_result,
                                                      this, std::placeholders::_1,
                                                      std::placeholders::_2, std::placeholders::_3);
         } else {
             _executor.get_result = std::bind<Status>(&AggregationNode::_serialize_without_key, this,
                                                      std::placeholders::_1, std::placeholders::_2,
                                                      std::placeholders::_3);
         }

         _executor.update_memusage =
                 std::bind<void>(&AggregationNode::_update_memusage_without_key, this);
         _executor.close = std::bind<void>(&AggregationNode::_close_without_key, this);
     } else {
         _init_hash_method(_probe_expr_ctxs);

         std::visit(
                 [&](auto&& agg_method) {
                     using HashTableType = std::decay_t<decltype(agg_method)>;
                     using KeyType = typename HashTableType::Key;

                     /// some aggregate functions (like AVG for decimal) have align issues.
                     _aggregate_data_container.reset(new AggregateDataContainer(
                             sizeof(KeyType),
                             ((_total_size_of_aggregate_states + _align_aggregate_states - 1) /
                              _align_aggregate_states) *
                                     _align_aggregate_states));
                 },
                 _agg_data->method_variant);
         if (_is_merge) {
             _executor.execute = std::bind<Status>(&AggregationNode::_merge_with_serialized_key,
                                                   this, std::placeholders::_1);
         } else {
             _executor.execute = std::bind<Status>(&AggregationNode::_execute_with_serialized_key,
                                                   this, std::placeholders::_1);
         }

         if (_is_streaming_preagg) {
             _executor.pre_agg =
                     std::bind<Status>(&AggregationNode::_pre_agg_with_serialized_key, this,
                                       std::placeholders::_1, std::placeholders::_2);
         }

         if (_needs_finalize) {
             _executor.get_result = std::bind<Status>(
                     &AggregationNode::_get_with_serialized_key_result, this, std::placeholders::_1,
                     std::placeholders::_2, std::placeholders::_3);
         } else {
             _executor.get_result = std::bind<Status>(
                     &AggregationNode::_serialize_with_serialized_key_result, this,
                     std::placeholders::_1, std::placeholders::_2, std::placeholders::_3);
         }
         _executor.update_memusage =
                 std::bind<void>(&AggregationNode::_update_memusage_with_serialized_key, this);
         _executor.close = std::bind<void>(&AggregationNode::_close_with_serialized_key, this);

         _should_limit_output = _limit != -1 &&       // has limit
                                _conjuncts.empty() && // no having conjunct
                                _needs_finalize;      // agg's finalize step
     }

     fmt::memory_buffer msg;
     fmt::format_to(msg,
                    "(_is_merge: {}, _needs_finalize: {}, Streaming Preaggregation: {}, agg size: "
                    "{}, limit: {})",
                    _is_merge ? "true" : "false", _needs_finalize ? "true" : "false",
                    _is_streaming_preagg ? "true" : "false",
                    std::to_string(_aggregate_evaluators.size()), std::to_string(_limit));
     runtime_profile()->add_info_string("AggInfos", fmt::to_string(msg));
     return Status::OK();
 }

 Status AggregationNode::prepare(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());

     RETURN_IF_ERROR(ExecNode::prepare(state));
     SCOPED_TIMER(_exec_timer);
     RETURN_IF_ERROR(prepare_profile(state));
     return Status::OK();
 }

 Status AggregationNode::alloc_resource(doris::RuntimeState* state) {
     SCOPED_TIMER(_exec_timer);
     RETURN_IF_ERROR(ExecNode::alloc_resource(state));

     RETURN_IF_ERROR(VExpr::open(_probe_expr_ctxs, state));

     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         RETURN_IF_ERROR(_aggregate_evaluators[i]->open(state));
         _aggregate_evaluators[i]->set_version(state->be_exec_version());
     }

     // move _create_agg_status to open not in during prepare,
     // because during prepare and open thread is not the same one,
     // this could cause unable to get JVM
     if (_probe_expr_ctxs.empty()) {
         // _create_agg_status may acquire a lot of memory, may allocate failed when memory is very few
         RETURN_IF_CATCH_EXCEPTION(static_cast<void>(_create_agg_status(_agg_data->without_key)));
         _agg_data_created_without_key = true;
     }

     return Status::OK();
 }

 Status AggregationNode::open(RuntimeState* state) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());
     RETURN_IF_ERROR(ExecNode::open(state));
     RETURN_IF_ERROR(_children[0]->open(state));

     // Streaming preaggregations do all processing in GetNext().
     if (_is_streaming_preagg) {
         return Status::OK();
     }
     bool eos = false;
     Block block;
     while (!eos) {
         RETURN_IF_CANCELLED(state);
         release_block_memory(block);
         RETURN_IF_ERROR(_children[0]->get_next_after_projects(
                 state, &block, &eos,
                 std::bind((Status(ExecNode::*)(RuntimeState*, vectorized::Block*, bool*)) &
                                   ExecNode::get_next,
                           _children[0], std::placeholders::_1, std::placeholders::_2,
                           std::placeholders::_3)));
         RETURN_IF_ERROR(sink(state, &block, eos));
     }
     static_cast<void>(_children[0]->close(state));

     return Status::OK();
 }

 Status AggregationNode::do_pre_agg(vectorized::Block* input_block,
                                    vectorized::Block* output_block) {
     SCOPED_TIMER(_exec_timer);
     RETURN_IF_ERROR(_executor.pre_agg(input_block, output_block));

     // pre stream agg need use _num_row_return to decide whether to do pre stream agg
     _num_rows_returned += output_block->rows();
     _make_nullable_output_key(output_block);
     COUNTER_SET(_rows_returned_counter, _num_rows_returned);
     _executor.update_memusage();
     return Status::OK();
 }

 Status AggregationNode::get_next(RuntimeState* state, Block* block, bool* eos) {
     SCOPED_TIMER(_runtime_profile->total_time_counter());

     if (_is_streaming_preagg) {
         RETURN_IF_CANCELLED(state);
         release_block_memory(_preagg_block);
         while (_preagg_block.rows() == 0 && !_child_eos) {
             RETURN_IF_ERROR(_children[0]->get_next_after_projects(
                     state, &_preagg_block, &_child_eos,
                     std::bind((Status(ExecNode::*)(RuntimeState*, vectorized::Block*, bool*)) &
                                       ExecNode::get_next,
                               _children[0], std::placeholders::_1, std::placeholders::_2,
                               std::placeholders::_3)));
         };
         {
             if (_preagg_block.rows() != 0) {
                 RETURN_IF_ERROR(do_pre_agg(&_preagg_block, block));
             } else {
                 RETURN_IF_ERROR(pull(state, block, eos));
             }
         }
     } else {
         RETURN_IF_ERROR(pull(state, block, eos));
     }
     return Status::OK();
 }

 Status AggregationNode::pull(doris::RuntimeState* state, vectorized::Block* block, bool* eos) {
     SCOPED_TIMER(_exec_timer);
     RETURN_IF_ERROR(_executor.get_result(state, block, eos));
     _make_nullable_output_key(block);
     // dispose the having clause, should not be execute in prestreaming agg
     RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, block, block->columns()));
     reached_limit(block, eos);

     return Status::OK();
 }

 Status AggregationNode::sink(doris::RuntimeState* state, vectorized::Block* in_block, bool eos) {
     SCOPED_TIMER(_exec_timer);
     if (in_block->rows() > 0) {
         RETURN_IF_ERROR(_executor.execute(in_block));
         RETURN_IF_ERROR(_try_spill_disk());
         _executor.update_memusage();
     }
     if (eos) {
         if (_spill_context.has_data) {
             static_cast<void>(_try_spill_disk(true));
             RETURN_IF_ERROR(_spill_context.prepare_for_reading());
         }
         _can_read = true;
     }
     return Status::OK();
 }

 void AggregationNode::release_resource(RuntimeState* state) {
     if (_executor.close) {
         _executor.close();
     }

     /// _hash_table_size_counter may be null if prepare failed.
     if (_hash_table_size_counter) {
         std::visit(
                 [&](auto&& agg_method) {
                     COUNTER_SET(_hash_table_size_counter, int64_t(agg_method.hash_table->size()));
                 },
                 _agg_data->method_variant);
     }
     _release_mem();
     ExecNode::release_resource(state);
 }

 Status AggregationNode::close(RuntimeState* state) {
     if (is_closed()) {
         return Status::OK();
     }
     return ExecNode::close(state);
 }

 Status AggregationNode::_create_agg_status(AggregateDataPtr data) {
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         try {
             _aggregate_evaluators[i]->create(data + _offsets_of_aggregate_states[i]);
         } catch (...) {
             for (int j = 0; j < i; ++j) {
                 _aggregate_evaluators[j]->destroy(data + _offsets_of_aggregate_states[j]);
             }
             throw;
         }
     }
     return Status::OK();
 }

 Status AggregationNode::_destroy_agg_status(AggregateDataPtr data) {
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         _aggregate_evaluators[i]->function()->destroy(data + _offsets_of_aggregate_states[i]);
     }
     return Status::OK();
 }

 Status AggregationNode::_get_without_key_result(RuntimeState* state, Block* block, bool* eos) {
     DCHECK(_agg_data->without_key != nullptr);
     block->clear();

     *block = VectorizedUtils::create_empty_columnswithtypename(_row_descriptor);
     int agg_size = _aggregate_evaluators.size();

     MutableColumns columns(agg_size);
     std::vector<DataTypePtr> data_types(agg_size);
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         data_types[i] = _aggregate_evaluators[i]->function()->get_return_type();
         columns[i] = data_types[i]->create_column();
     }

     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         auto* column = columns[i].get();
         _aggregate_evaluators[i]->insert_result_info(
                 _agg_data->without_key + _offsets_of_aggregate_states[i], column);
     }

     const auto& block_schema = block->get_columns_with_type_and_name();
     DCHECK_EQ(block_schema.size(), columns.size());
     for (int i = 0; i < block_schema.size(); ++i) {
         const auto column_type = block_schema[i].type;
         if (!column_type->equals(*data_types[i])) {
             if (!is_array(remove_nullable(column_type))) {
                 if (!column_type->is_nullable() || data_types[i]->is_nullable() ||
                     !remove_nullable(column_type)->equals(*data_types[i])) {
                     return Status::InternalError(
                             "column_type not match data_types, column_type={}, data_types={}",
                             column_type->get_name(), data_types[i]->get_name());
                 }
             }

             if (column_type->is_nullable() && !data_types[i]->is_nullable()) {
                 ColumnPtr ptr = std::move(columns[i]);
                 // unless `count`, other aggregate function dispose empty set should be null
                 // so here check the children row return
                 ptr = make_nullable(ptr, _children[0]->rows_returned() == 0);
                 columns[i] = ptr->assume_mutable();
             }
         }
     }

     block->set_columns(std::move(columns));
     *eos = true;
     return Status::OK();
 }

 Status AggregationNode::_serialize_without_key(RuntimeState* state, Block* block, bool* eos) {
     // 1. `child(0)->rows_returned() == 0` mean not data from child
     // in level two aggregation node should return NULL result
     //    level one aggregation node set `eos = true` return directly
     SCOPED_TIMER(_serialize_result_timer);
     if (UNLIKELY(_children[0]->rows_returned() == 0)) {
         *eos = true;
         return Status::OK();
     }
     block->clear();

     DCHECK(_agg_data->without_key != nullptr);
     int agg_size = _aggregate_evaluators.size();

     MutableColumns value_columns(agg_size);
     std::vector<DataTypePtr> data_types(agg_size);
     // will serialize data to string column
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         data_types[i] = _aggregate_evaluators[i]->function()->get_serialized_type();
         value_columns[i] = _aggregate_evaluators[i]->function()->create_serialize_column();
     }

     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         _aggregate_evaluators[i]->function()->serialize_without_key_to_column(
                 _agg_data->without_key + _offsets_of_aggregate_states[i], *value_columns[i]);
     }

     {
         ColumnsWithTypeAndName data_with_schema;
         for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
             ColumnWithTypeAndName column_with_schema = {nullptr, data_types[i], ""};
             data_with_schema.push_back(std::move(column_with_schema));
         }
         *block = Block(data_with_schema);
     }

     block->set_columns(std::move(value_columns));
     *eos = true;
     return Status::OK();
 }

 Status AggregationNode::_execute_without_key(Block* block) {
     DCHECK(_agg_data->without_key != nullptr);
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_single_add(
                 block, _agg_data->without_key + _offsets_of_aggregate_states[i],
                 _agg_arena_pool.get()));
     }
     return Status::OK();
 }

 Status AggregationNode::_merge_without_key(Block* block) {
     SCOPED_TIMER(_merge_timer);
     DCHECK(_agg_data->without_key != nullptr);
     for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
         if (_aggregate_evaluators[i]->is_merge()) {
             int col_id = _get_slot_column_id(_aggregate_evaluators[i]);
             auto column = block->get_by_position(col_id).column;
             if (column->is_nullable()) {
                 column = ((ColumnNullable*)column.get())->get_nested_column_ptr();
             }

             SCOPED_TIMER(_deserialize_data_timer);
             _aggregate_evaluators[i]->function()->deserialize_and_merge_from_column(
                     _agg_data->without_key + _offsets_of_aggregate_states[i], *column,
                     _agg_arena_pool.get());
         } else {
             RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_single_add(
                     block, _agg_data->without_key + _offsets_of_aggregate_states[i],
                     _agg_arena_pool.get()));
         }
     }
     return Status::OK();
 }

 void AggregationNode::_update_memusage_without_key() {
     auto arena_memory_usage = _agg_arena_pool->size() - _mem_usage_record.used_in_arena;
     mem_tracker()->consume(arena_memory_usage);
     _serialize_key_arena_memory_usage->add(arena_memory_usage);
     _mem_usage_record.used_in_arena = _agg_arena_pool->size();
 }

 void AggregationNode::_close_without_key() {
     //because prepare maybe failed, and couldn't create agg data.
     //but finally call close to destory agg data, if agg data has bitmapValue
     //will be core dump, it's not initialized
     if (_agg_data_created_without_key) {
         static_cast<void>(_destroy_agg_status(_agg_data->without_key));
         _agg_data_created_without_key = false;
     }
     release_tracker();
 }

 void AggregationNode::_make_nullable_output_key(Block* block) {
     if (block->rows() != 0) {
         for (auto cid : _make_nullable_keys) {
             block->get_by_position(cid).column = make_nullable(block->get_by_position(cid).column);
             block->get_by_position(cid).type = make_nullable(block->get_by_position(cid).type);
         }
     }
 }

 bool AggregationNode::_should_expand_preagg_hash_tables() {
     if (!_should_expand_hash_table) {
         return false;
     }

     return std::visit(
             [&](auto&& agg_method) -> bool {
                 auto& hash_tbl = *agg_method.hash_table;
                 auto [ht_mem, ht_rows] =
                         std::pair {hash_tbl.get_buffer_size_in_bytes(), hash_tbl.size()};

                 // Need some rows in tables to have valid statistics.
                 if (ht_rows == 0) {
                     return true;
                 }

                 // Find the appropriate reduction factor in our table for the current hash table sizes.
                 int cache_level = 0;
                 while (cache_level + 1 < STREAMING_HT_MIN_REDUCTION_SIZE &&
                        ht_mem >= STREAMING_HT_MIN_REDUCTION[cache_level + 1].min_ht_mem) {
                     ++cache_level;
                 }

                 // Compare the number of rows in the hash table with the number of input rows that
                 // were aggregated into it. Exclude passed through rows from this calculation since
                 // they were not in hash tables.
                 const int64_t input_rows = _children[0]->rows_returned();
                 const int64_t aggregated_input_rows = input_rows - _num_rows_returned;
                 // TODO chenhao
                 //  const int64_t expected_input_rows = estimated_input_cardinality_ - num_rows_returned_;
                 double current_reduction = static_cast<double>(aggregated_input_rows) / ht_rows;

                 // TODO: workaround for IMPALA-2490: subplan node rows_returned counter may be
                 // inaccurate, which could lead to a divide by zero below.
                 if (aggregated_input_rows <= 0) {
                     return true;
                 }

                 // Extrapolate the current reduction factor (r) using the formula
                 // R = 1 + (N / n) * (r - 1), where R is the reduction factor over the full input data
                 // set, N is the number of input rows, excluding passed-through rows, and n is the
                 // number of rows inserted or merged into the hash tables. This is a very rough
                 // approximation but is good enough to be useful.
                 // TODO: consider collecting more statistics to better estimate reduction.
                 //  double estimated_reduction = aggregated_input_rows >= expected_input_rows
                 //      ? current_reduction
                 //      : 1 + (expected_input_rows / aggregated_input_rows) * (current_reduction - 1);
                 double min_reduction =
                         STREAMING_HT_MIN_REDUCTION[cache_level].streaming_ht_min_reduction;

                 //  COUNTER_SET(preagg_estimated_reduction_, estimated_reduction);
                 //    COUNTER_SET(preagg_streaming_ht_min_reduction_, min_reduction);
                 //  return estimated_reduction > min_reduction;
                 _should_expand_hash_table = current_reduction > min_reduction;
                 return _should_expand_hash_table;
             },
             _agg_data->method_variant);
 }

 size_t AggregationNode::_memory_usage() const {
     size_t usage = 0;
     if (_agg_arena_pool) {
         usage += _agg_arena_pool->size();
     }

     if (_aggregate_data_container) {
         usage += _aggregate_data_container->memory_usage();
     }

     return usage;
 }

 Status AggregationNode::_reset_hash_table() {
     return std::visit(
             [&](auto&& agg_method) {
                 auto& hash_table = *agg_method.hash_table;
                 using HashTableType = std::decay_t<decltype(hash_table)>;

                 agg_method.reset();

                 hash_table.for_each_mapped([&](auto& mapped) {
                     if (mapped) {
                         static_cast<void>(_destroy_agg_status(mapped));
                         mapped = nullptr;
                     }
                 });

                 _aggregate_data_container = std::make_unique<AggregateDataContainer>(
                         sizeof(typename HashTableType::key_type),
                         ((_total_size_of_aggregate_states + _align_aggregate_states - 1) /
                          _align_aggregate_states) *
                                 _align_aggregate_states);
                 agg_method.hash_table.reset(new HashTableType());
                 _agg_arena_pool = std::make_unique<Arena>();
                 return Status::OK();
             },
             _agg_data->method_variant);
 }

 size_t AggregationNode::_get_hash_table_size() {
     return std::visit([&](auto&& agg_method) { return agg_method.hash_table->size(); },
                       _agg_data->method_variant);
 }

 void AggregationNode::_emplace_into_hash_table(AggregateDataPtr* places, ColumnRawPtrs& key_columns,
                                                const size_t num_rows) {
     std::visit(
             [&](auto&& agg_method) -> void {
                 SCOPED_TIMER(_hash_table_compute_timer);
                 using HashMethodType = std::decay_t<decltype(agg_method)>;
                 using AggState = typename HashMethodType::State;
                 AggState state(key_columns);
                 agg_method.init_serialized_keys(key_columns, num_rows);

                 auto creator = [this](const auto& ctor, auto& key, auto& origin) {
                     try {
                         HashMethodType::try_presis_key_and_origin(key, origin, *_agg_arena_pool);
                         auto mapped = _aggregate_data_container->append_data(origin);
                         auto st = _create_agg_status(mapped);
                         if (!st) {
                             throw Exception(st.code(), st.to_string());
                         }
                         ctor(key, mapped);
                     } catch (...) {
                         // Exception-safety - if it can not allocate memory or create status,
                         // the destructors will not be called.
                         ctor(key, nullptr);
                         throw;
                     }
                 };

                 auto creator_for_null_key = [this](auto& mapped) {
                     mapped = _agg_arena_pool->aligned_alloc(_total_size_of_aggregate_states,
                                                             _align_aggregate_states);
                     auto st = _create_agg_status(mapped);
                     if (!st) {
                         throw Exception(st.code(), st.to_string());
                     }
                 };

                 SCOPED_TIMER(_hash_table_emplace_timer);
                 for (size_t i = 0; i < num_rows; ++i) {
                     places[i] = agg_method.lazy_emplace(state, i, creator, creator_for_null_key);
                 }
                 COUNTER_UPDATE(_hash_table_input_counter, num_rows);
             },
             _agg_data->method_variant);
 }

 void AggregationNode::_find_in_hash_table(AggregateDataPtr* places, ColumnRawPtrs& key_columns,
                                           size_t num_rows) {
     std::visit(
             [&](auto&& agg_method) -> void {
                 using HashMethodType = std::decay_t<decltype(agg_method)>;
                 using AggState = typename HashMethodType::State;
                 AggState state(key_columns);
                 agg_method.init_serialized_keys(key_columns, num_rows);

                 /// For all rows.
                 for (size_t i = 0; i < num_rows; ++i) {
                     auto find_result = agg_method.find(state, i);
                     if (find_result.is_found()) {
                         places[i] = find_result.get_mapped();
                     } else {
                         places[i] = nullptr;
                     }
                 }
             },
             _agg_data->method_variant);
 }

 Status AggregationNode::_pre_agg_with_serialized_key(doris::vectorized::Block* in_block,
                                                      doris::vectorized::Block* out_block) {
     DCHECK(!_probe_expr_ctxs.empty());

     size_t key_size = _probe_expr_ctxs.size();
     ColumnRawPtrs key_columns(key_size);
     {
         SCOPED_TIMER(_expr_timer);
         for (size_t i = 0; i < key_size; ++i) {
             int result_column_id = -1;
             RETURN_IF_ERROR(_probe_expr_ctxs[i]->execute(in_block, &result_column_id));
             in_block->get_by_position(result_column_id).column =
                     in_block->get_by_position(result_column_id)
                             .column->convert_to_full_column_if_const();
             key_columns[i] = in_block->get_by_position(result_column_id).column.get();
         }
     }

     int rows = in_block->rows();
     if (_places.size() < rows) {
         _places.resize(rows);
     }

     // Stop expanding hash tables if we're not reducing the input sufficiently. As our
     // hash tables expand out of each level of cache hierarchy, every hash table lookup
     // will take longer. We also may not be able to expand hash tables because of memory
     // pressure. In either case we should always use the remaining space in the hash table
     // to avoid wasting memory.
     // But for fixed hash map, it never need to expand
     bool ret_flag = false;
     RETURN_IF_ERROR(std::visit(
             [&](auto&& agg_method) -> Status {
                 if (auto& hash_tbl = *agg_method.hash_table;
                     hash_tbl.add_elem_size_overflow(rows)) {
                     /// If too much memory is used during the pre-aggregation stage,
                     /// it is better to output the data directly without performing further aggregation.
                     const bool used_too_much_memory =
                             (_external_agg_bytes_threshold > 0 &&
                              _memory_usage() > _external_agg_bytes_threshold);
                     // do not try to do agg, just init and serialize directly return the out_block
                     if (!_should_expand_preagg_hash_tables() || used_too_much_memory) {
                         SCOPED_TIMER(_streaming_agg_timer);
                         ret_flag = true;

                         // will serialize value data to string column.
                         // non-nullable column(id in `_make_nullable_keys`)
                         // will be converted to nullable.
                         bool mem_reuse = _make_nullable_keys.empty() && out_block->mem_reuse();

                         std::vector<DataTypePtr> data_types;
                         MutableColumns value_columns;
                         for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
                             auto data_type =
                                     _aggregate_evaluators[i]->function()->get_serialized_type();
                             if (mem_reuse) {
                                 value_columns.emplace_back(
                                         std::move(*out_block->get_by_position(i + key_size).column)
                                                 .mutate());
                             } else {
                                 // slot type of value it should always be string type
                                 value_columns.emplace_back(_aggregate_evaluators[i]
                                                                    ->function()
                                                                    ->create_serialize_column());
                             }
                             data_types.emplace_back(data_type);
                         }

                         for (int i = 0; i != _aggregate_evaluators.size(); ++i) {
                             SCOPED_TIMER(_serialize_data_timer);
                             RETURN_IF_ERROR(
                                     _aggregate_evaluators[i]->streaming_agg_serialize_to_column(
                                             in_block, value_columns[i], rows,
                                             _agg_arena_pool.get()));
                         }

                         if (!mem_reuse) {
                             ColumnsWithTypeAndName columns_with_schema;
                             for (int i = 0; i < key_size; ++i) {
                                 columns_with_schema.emplace_back(
                                         key_columns[i]->clone_resized(rows),
                                         _probe_expr_ctxs[i]->root()->data_type(),
                                         _probe_expr_ctxs[i]->root()->expr_name());
                             }
                             for (int i = 0; i < value_columns.size(); ++i) {
                                 columns_with_schema.emplace_back(std::move(value_columns[i]),
                                                                  data_types[i], "");
                             }
                             out_block->swap(Block(columns_with_schema));
                         } else {
                             for (int i = 0; i < key_size; ++i) {
                                 std::move(*out_block->get_by_position(i).column)
                                         .mutate()
                                         ->insert_range_from(*key_columns[i], 0, rows);
                             }
                         }
                     }
                 }
                 return Status::OK();
             },
             _agg_data->method_variant));

     if (!ret_flag) {
         RETURN_IF_CATCH_EXCEPTION(_emplace_into_hash_table(_places.data(), key_columns, rows));

         for (int i = 0; i < _aggregate_evaluators.size(); ++i) {
             RETURN_IF_ERROR(_aggregate_evaluators[i]->execute_batch_add(
                     in_block, _offsets_of_aggregate_states[i], _places.data(),
                     _agg_arena_pool.get(), _should_expand_hash_table));
         }
     }

     return Status::OK();
 }

 template <typename HashTableCtxType, typename HashTableType, typename KeyType>
 Status AggregationNode::_serialize_hash_table_to_block(HashTableCtxType& context,
                                                        HashTableType& hash_table, Block& block,
                                                        std::vector<KeyType>& keys_) {
     int key_size = _probe_expr_ctxs.size();
     int agg_size = _aggregate_evaluators.size();

     MutableColumns value_columns(agg_size);
     DataTypes value_data_types(agg_size);
     MutableColumns key_columns;

     for (int i = 0; i < key_size; ++i) {
         key_columns.emplace_back(_probe_expr_ctxs[i]->root()->data_type()->create_column());
     }

     for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
         value_data_types[i] = _aggregate_evaluators[i]->function()->get_serialized_type();
         value_columns[i] = _aggregate_evaluators[i]->function()->create_serialize_column();
     }

     context.init_iterator();
     const auto size = hash_table.size();
     std::vector<KeyType> keys(size);
     if (_values.size() < size) {
         _values.resize(size);
     }

     size_t num_rows = 0;
     _aggregate_data_container->init_once();
     auto& iter = _aggregate_data_container->iterator;

     {
         while (iter != _aggregate_data_container->end()) {
             keys[num_rows] = iter.get_key<KeyType>();
             _values[num_rows] = iter.get_aggregate_data();
             ++iter;
             ++num_rows;
         }
     }

     { context.insert_keys_into_columns(keys, key_columns, num_rows); }

     if (hash_table.has_null_key_data()) {
         // only one key of group by support wrap null key
         // here need additional processing logic on the null key / value
         CHECK(key_columns.size() == 1);
         CHECK(key_columns[0]->is_nullable());
         key_columns[0]->insert_data(nullptr, 0);

         // Here is no need to set `keys[num_rows]`, keep it as default value.
         _values[num_rows] = hash_table.template get_null_key_data<AggregateDataPtr>();
         ++num_rows;
     }

     for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
         _aggregate_evaluators[i]->function()->serialize_to_column(
                 _values, _offsets_of_aggregate_states[i], value_columns[i], num_rows);
     }

     ColumnsWithTypeAndName columns_with_schema;
     for (int i = 0; i < key_size; ++i) {
         columns_with_schema.emplace_back(std::move(key_columns[i]),
                                          _probe_expr_ctxs[i]->root()->data_type(),
                                          _probe_expr_ctxs[i]->root()->expr_name());
     }
     for (int i = 0; i < agg_size; ++i) {
         columns_with_schema.emplace_back(std::move(value_columns[i]), value_data_types[i],
                                          _aggregate_evaluators[i]->function()->get_name());
     }

     block = columns_with_schema;
     keys_.swap(keys);
     return Status::OK();
 }

 template <typename HashTableCtxType, typename HashTableType>
 Status AggregationNode::_spill_hash_table(HashTableCtxType& agg_method, HashTableType& hash_table) {
     Block block;
     std::vector<typename HashTableType::key_type> keys;
     RETURN_IF_ERROR(_serialize_hash_table_to_block(agg_method, hash_table, block, keys));
     CHECK_EQ(block.rows(), hash_table.size());
     CHECK_EQ(keys.size(), block.rows());

     if (!_spill_context.has_data) {
         _spill_context.has_data = true;
         _spill_context.runtime_profile = _runtime_profile->create_child("Spill", true, true);
     }

     BlockSpillWriterUPtr writer;
     RETURN_IF_ERROR(ExecEnv::GetInstance()->block_spill_mgr()->get_writer(
             std::numeric_limits<int32_t>::max(), writer, _spill_context.runtime_profile));
     Defer defer {[&]() {
         // redundant call is ok
         static_cast<void>(writer->close());
     }};
     _spill_context.stream_ids.emplace_back(writer->get_id());

     std::vector<size_t> partitioned_indices(block.rows());
     std::vector<size_t> blocks_rows(_spill_partition_helper->partition_count);

     // The last row may contain a null key.
     const size_t rows = hash_table.has_null_key_data() ? block.rows() - 1 : block.rows();
     for (size_t i = 0; i < rows; ++i) {
         const auto index = _spill_partition_helper->get_index(hash_table.hash(keys[i]));
         partitioned_indices[i] = index;
         blocks_rows[index]++;
     }

     if (hash_table.has_null_key_data()) {
         // Here put the row with null key at the last partition.
         const auto index = _spill_partition_helper->partition_count - 1;
         partitioned_indices[rows] = index;
         blocks_rows[index]++;
     }

     for (size_t i = 0; i < _spill_partition_helper->partition_count; ++i) {
         Block block_to_write = block.clone_empty();
         if (blocks_rows[i] == 0) {
             /// Here write one empty block to ensure there are enough blocks in the file,
             /// blocks' count should be equal with partition_count.
             static_cast<void>(writer->write(block_to_write));
             continue;
         }

         MutableBlock mutable_block(std::move(block_to_write));

         for (auto& column : mutable_block.mutable_columns()) {
             column->reserve(blocks_rows[i]);
         }

         size_t begin = 0;
         size_t length = 0;
         for (size_t j = 0; j < partitioned_indices.size(); ++j) {
             if (partitioned_indices[j] != i) {
                 if (length > 0) {
                     mutable_block.add_rows(&block, begin, length);
                 }
                 length = 0;
                 continue;
             }

             if (length == 0) {
                 begin = j;
             }
             length++;
         }

         if (length > 0) {
             mutable_block.add_rows(&block, begin, length);
         }

         CHECK_EQ(mutable_block.rows(), blocks_rows[i]);
         RETURN_IF_ERROR(writer->write(mutable_block.to_block()));
     }
     RETURN_IF_ERROR(writer->close());

     return Status::OK();
 }

 Status AggregationNode::_try_spill_disk(bool eos) {
     if (_external_agg_bytes_threshold == 0) {
         return Status::OK();
     }
     return std::visit(
             [&](auto&& agg_method) -> Status {
                 auto& hash_table = *agg_method.hash_table;
                 if (!eos && _memory_usage() < _external_agg_bytes_threshold) {
                     return Status::OK();
                 }

                 if (_get_hash_table_size() == 0) {
                     return Status::OK();
                 }

                 RETURN_IF_ERROR(_spill_hash_table(agg_method, hash_table));
                 return _reset_hash_table();
             },
             _agg_data->method_variant);
 }

 Status AggregationNode::_execute_with_serialized_key(Block* block) {
     if (_reach_limit) {
         return _execute_with_serialized_key_helper<true>(block);
     } else {
         return _execute_with_serialized_key_helper<false>(block);
     }
 }

 Status AggregationNode::_merge_spilt_data() {
     CHECK(!_spill_context.stream_ids.empty());

     for (auto& reader : _spill_context.readers) {
         CHECK_LT(_spill_context.read_cursor, reader->block_count());
         reader->seek(_spill_context.read_cursor);
         Block block;
         bool eos;
         RETURN_IF_ERROR(reader->read(&block, &eos));

         if (!block.empty()) {
             auto st = _merge_with_serialized_key_helper<false /* limit */, true /* for_spill */>(
                     &block);
             RETURN_IF_ERROR(st);
         }
     }
     _spill_context.read_cursor++;
     return Status::OK();
 }

 Status AggregationNode::_get_result_with_spilt_data(RuntimeState* state, Block* block, bool* eos) {
     CHECK(!_spill_context.stream_ids.empty());
     CHECK(_spill_partition_helper != nullptr) << "_spill_partition_helper should not be null";
     _aggregate_data_container->init_once();
     while (_aggregate_data_container->iterator == _aggregate_data_container->end()) {
         if (_spill_context.read_cursor == _spill_partition_helper->partition_count) {
             break;
         }
         RETURN_IF_ERROR(_reset_hash_table());
         RETURN_IF_ERROR(_merge_spilt_data());
         _aggregate_data_container->init_once();
     }

     RETURN_IF_ERROR(_get_result_with_serialized_key_non_spill(state, block, eos));
     if (*eos) {
         *eos = _spill_context.read_cursor == _spill_partition_helper->partition_count;
     }
     CHECK(!block->empty() || *eos);
     return Status::OK();
 }

 Status AggregationNode::_get_with_serialized_key_result(RuntimeState* state, Block* block,
                                                         bool* eos) {
     if (_spill_context.has_data) {
         return _get_result_with_spilt_data(state, block, eos);
     } else {
         return _get_result_with_serialized_key_non_spill(state, block, eos);
     }
 }

 Status AggregationNode::_get_result_with_serialized_key_non_spill(RuntimeState* state, Block* block,
                                                                   bool* eos) {
     // non-nullable column(id in `_make_nullable_keys`) will be converted to nullable.
     bool mem_reuse = _make_nullable_keys.empty() && block->mem_reuse();

     auto columns_with_schema = VectorizedUtils::create_columns_with_type_and_name(_row_descriptor);
     int key_size = _probe_expr_ctxs.size();

     MutableColumns key_columns;
     for (int i = 0; i < key_size; ++i) {
         if (!mem_reuse) {
             key_columns.emplace_back(columns_with_schema[i].type->create_column());
         } else {
             key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
         }
     }
     MutableColumns value_columns;
     for (int i = key_size; i < columns_with_schema.size(); ++i) {
         if (!mem_reuse) {
             value_columns.emplace_back(columns_with_schema[i].type->create_column());
         } else {
             value_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
         }
     }

     SCOPED_TIMER(_get_results_timer);
     std::visit(
             [&](auto&& agg_method) -> void {
                 auto& data = *agg_method.hash_table;
                 agg_method.init_iterator();
                 const auto size = std::min(data.size(), size_t(state->batch_size()));
                 using KeyType = std::decay_t<decltype(agg_method.iterator->get_first())>;
                 std::vector<KeyType> keys(size);
                 if (_values.size() < size) {
                     _values.resize(size);
                 }

                 size_t num_rows = 0;
                 _aggregate_data_container->init_once();
                 auto& iter = _aggregate_data_container->iterator;

                 {
                     SCOPED_TIMER(_hash_table_iterate_timer);
                     while (iter != _aggregate_data_container->end() &&
                            num_rows < state->batch_size()) {
                         keys[num_rows] = iter.get_key<KeyType>();
                         _values[num_rows] = iter.get_aggregate_data();
                         ++iter;
                         ++num_rows;
                     }
                 }

                 {
                     SCOPED_TIMER(_insert_keys_to_column_timer);
                     agg_method.insert_keys_into_columns(keys, key_columns, num_rows);
                 }

                 for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
                     _aggregate_evaluators[i]->insert_result_info_vec(
                             _values, _offsets_of_aggregate_states[i], value_columns[i].get(),
                             num_rows);
                 }

                 if (iter == _aggregate_data_container->end()) {
                     if (agg_method.hash_table->has_null_key_data()) {
                         // only one key of group by support wrap null key
                         // here need additional processing logic on the null key / value
                         DCHECK(key_columns.size() == 1);
                         DCHECK(key_columns[0]->is_nullable());
                         if (key_columns[0]->size() < state->batch_size()) {
                             key_columns[0]->insert_data(nullptr, 0);
                             auto mapped = agg_method.hash_table
                                                   ->template get_null_key_data<AggregateDataPtr>();
                             for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
                                 _aggregate_evaluators[i]->insert_result_info(
                                         mapped + _offsets_of_aggregate_states[i],
                                         value_columns[i].get());
                             }
                             *eos = true;
                         }
                     } else {
                         *eos = true;
                     }
                 }
             },
             _agg_data->method_variant);

     if (!mem_reuse) {
         *block = columns_with_schema;
         MutableColumns columns(block->columns());
         for (int i = 0; i < block->columns(); ++i) {
             if (i < key_size) {
                 columns[i] = std::move(key_columns[i]);
             } else {
                 columns[i] = std::move(value_columns[i - key_size]);
             }
         }
         block->set_columns(std::move(columns));
     }

     return Status::OK();
 }

 Status AggregationNode::_serialize_with_serialized_key_result(RuntimeState* state, Block* block,
                                                               bool* eos) {
     if (_spill_context.has_data) {
         return _serialize_with_serialized_key_result_with_spilt_data(state, block, eos);
     } else {
         return _serialize_with_serialized_key_result_non_spill(state, block, eos);
     }
 }

 Status AggregationNode::_serialize_with_serialized_key_result_with_spilt_data(RuntimeState* state,
                                                                               Block* block,
                                                                               bool* eos) {
     CHECK(!_spill_context.stream_ids.empty());
     CHECK(_spill_partition_helper != nullptr) << "_spill_partition_helper should not be null";
     _aggregate_data_container->init_once();
     while (_aggregate_data_container->iterator == _aggregate_data_container->end()) {
         if (_spill_context.read_cursor == _spill_partition_helper->partition_count) {
             break;
         }
         RETURN_IF_ERROR(_reset_hash_table());
         RETURN_IF_ERROR(_merge_spilt_data());
         _aggregate_data_container->init_once();
     }

     RETURN_IF_ERROR(_serialize_with_serialized_key_result_non_spill(state, block, eos));
     if (*eos) {
         *eos = _spill_context.read_cursor == _spill_partition_helper->partition_count;
     }
     CHECK(!block->empty() || *eos);
     return Status::OK();
 }
 Status AggregationNode::_serialize_with_serialized_key_result_non_spill(RuntimeState* state,
                                                                         Block* block, bool* eos) {
     SCOPED_TIMER(_serialize_result_timer);
     int key_size = _probe_expr_ctxs.size();
     int agg_size = _aggregate_evaluators.size();
     MutableColumns value_columns(agg_size);
     DataTypes value_data_types(agg_size);

     // non-nullable column(id in `_make_nullable_keys`) will be converted to nullable.
     bool mem_reuse = _make_nullable_keys.empty() && block->mem_reuse();

     MutableColumns key_columns;
     for (int i = 0; i < key_size; ++i) {
         if (mem_reuse) {
             key_columns.emplace_back(std::move(*block->get_by_position(i).column).mutate());
         } else {
             key_columns.emplace_back(_probe_expr_ctxs[i]->root()->data_type()->create_column());
         }
     }

     SCOPED_TIMER(_get_results_timer);
     std::visit(
             [&](auto&& agg_method) -> void {
                 agg_method.init_iterator();
                 auto& data = *agg_method.hash_table;
                 const auto size = std::min(data.size(), size_t(state->batch_size()));
                 using KeyType = std::decay_t<decltype(agg_method.iterator->get_first())>;
                 std::vector<KeyType> keys(size);
                 _values.resize(size + 1);

                 size_t num_rows = 0;
                 _aggregate_data_container->init_once();
                 auto& iter = _aggregate_data_container->iterator;

                 {
                     SCOPED_TIMER(_hash_table_iterate_timer);
                     while (iter != _aggregate_data_container->end() &&
                            num_rows < state->batch_size()) {
                         keys[num_rows] = iter.get_key<KeyType>();
                         _values[num_rows] = iter.get_aggregate_data();
                         ++iter;
                         ++num_rows;
                     }
                 }

                 {
                     SCOPED_TIMER(_insert_keys_to_column_timer);
                     agg_method.insert_keys_into_columns(keys, key_columns, num_rows);
                 }

                 if (iter == _aggregate_data_container->end()) {
                     if (agg_method.hash_table->has_null_key_data()) {
                         // only one key of group by support wrap null key
                         // here need additional processing logic on the null key / value
                         DCHECK(key_columns.size() == 1);
                         DCHECK(key_columns[0]->is_nullable());
                         if (agg_method.hash_table->has_null_key_data()) {
                             key_columns[0]->insert_data(nullptr, 0);
                             _values[num_rows] =
                                     agg_method.hash_table
                                             ->template get_null_key_data<AggregateDataPtr>();
                             ++num_rows;
                             *eos = true;
                         }
                     } else {
                         *eos = true;
                     }
                 }

                 {
                     SCOPED_TIMER(_serialize_data_timer);
                     for (size_t i = 0; i < _aggregate_evaluators.size(); ++i) {
                         value_data_types[i] =
                                 _aggregate_evaluators[i]->function()->get_serialized_type();
                         if (mem_reuse) {
                             value_columns[i] =
                                     std::move(*block->get_by_position(i + key_size).column)
                                             .mutate();
                         } else {
                             value_columns[i] =
                                     _aggregate_evaluators[i]->function()->create_serialize_column();
                         }
                         _aggregate_evaluators[i]->function()->serialize_to_column(
                                 _values, _offsets_of_aggregate_states[i], value_columns[i],
                                 num_rows);
                     }
                 }
             },
             _agg_data->method_variant);

     if (!mem_reuse) {
         ColumnsWithTypeAndName columns_with_schema;
         for (int i = 0; i < key_size; ++i) {
             columns_with_schema.emplace_back(std::move(key_columns[i]),
                                              _probe_expr_ctxs[i]->root()->data_type(),
                                              _probe_expr_ctxs[i]->root()->expr_name());
         }
         for (int i = 0; i < agg_size; ++i) {
             columns_with_schema.emplace_back(std::move(value_columns[i]), value_data_types[i], "");
         }
         *block = Block(columns_with_schema);
     }
     return Status::OK();
 }

 Status AggregationNode::_merge_with_serialized_key(Block* block) {
     if (_reach_limit) {
         return _merge_with_serialized_key_helper<true, false>(block);
     } else {
         return _merge_with_serialized_key_helper<false, false>(block);
     }
 }

 void AggregationNode::_update_memusage_with_serialized_key() {
     std::visit(
             [&](auto&& agg_method) -> void {
                 auto& data = *agg_method.hash_table;
                 auto arena_memory_usage = _agg_arena_pool->size() +
                                           _aggregate_data_container->memory_usage() -
                                           _mem_usage_record.used_in_arena;
                 mem_tracker()->consume(arena_memory_usage);
                 mem_tracker()->consume(data.get_buffer_size_in_bytes() -
                                        _mem_usage_record.used_in_state);
                 _serialize_key_arena_memory_usage->add(arena_memory_usage);
                 COUNTER_UPDATE(_hash_table_memory_usage,
                                data.get_buffer_size_in_bytes() - _mem_usage_record.used_in_state);
                 _mem_usage_record.used_in_state = data.get_buffer_size_in_bytes();
                 _mem_usage_record.used_in_arena =
                         _agg_arena_pool->size() + _aggregate_data_container->memory_usage();
             },
             _agg_data->method_variant);
 }

 void AggregationNode::_close_with_serialized_key() {
     std::visit(
             [&](auto&& agg_method) -> void {
                 auto& data = *agg_method.hash_table;
                 data.for_each_mapped([&](auto& mapped) {
                     if (mapped) {
                         static_cast<void>(_destroy_agg_status(mapped));
                         mapped = nullptr;
                     }
                 });
                 if (data.has_null_key_data()) {
                     auto st = _destroy_agg_status(
                             data.template get_null_key_data<AggregateDataPtr>());
                     if (!st) {
                         throw Exception(st.code(), st.to_string());
                     }
                 }
             },
             _agg_data->method_variant);
     release_tracker();
 }

 void AggregationNode::release_tracker() {
     mem_tracker()->release(_mem_usage_record.used_in_state + _mem_usage_record.used_in_arena);
 }

 void AggregationNode::_release_mem() {
     _agg_data = nullptr;
     _aggregate_data_container = nullptr;
     _agg_profile_arena = nullptr;
     _agg_arena_pool = nullptr;
     _preagg_block.clear();

     PODArray<AggregateDataPtr> tmp_places;
     _places.swap(tmp_places);

     std::vector<char> tmp_deserialize_buffer;
     _deserialize_buffer.swap(tmp_deserialize_buffer);

     std::vector<AggregateDataPtr> tmp_values;
     _values.swap(tmp_values);
 }

 Status AggSpillContext::prepare_for_reading() {
     if (readers_prepared) {
         return Status::OK();
     }
     readers_prepared = true;

     readers.resize(stream_ids.size());
     auto* manager = ExecEnv::GetInstance()->block_spill_mgr();
     for (size_t i = 0; i != stream_ids.size(); ++i) {
         RETURN_IF_ERROR(manager->get_reader(stream_ids[i], readers[i], runtime_profile, true));
     }
     return Status::OK();
 }

 } // namespace doris::vectorized