blob: f83def18afbd98d1ed93e1311ac122b93072e9dc [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include <cstddef>
#include <Core/Block.h>
#include <Interpreters/Aggregator.h>
#include <Interpreters/Context.h>
#include <Processors/Chunk.h>
#include <Processors/IProcessor.h>
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/Transforms/AggregatingTransform.h>
#include <Poco/Logger.h>
#include <Common/AggregateUtil.h>
namespace local_engine
{
/// A memory efficient aggregating processor.
/// When the memory usage reaches the limit, it will evict current AggregatedDataVariants and generate
/// intermediate aggregated result blocks, and its downstream processor should be GraceMergingAggregatedTransform.
class StreamingAggregatingTransform : public DB::IProcessor
{
public:
using Status = DB::IProcessor::Status;
explicit StreamingAggregatingTransform(
DB::ContextPtr context_, const DB::SharedHeader & header_, DB::AggregatingTransformParamsPtr params_);
~StreamingAggregatingTransform() override;
String getName() const override { return "StreamingAggregatingTransform"; }
Status prepare() override;
void work() override;
private:
DB::ContextPtr context;
DB::ColumnRawPtrs key_columns;
DB::Aggregator::AggregateColumns aggregate_columns;
DB::AggregatingTransformParamsPtr params;
/// Followings are configurations defined in context config.
/// In extreme cases, other operators may occupy very large memory, and keep this processor evicting
/// empty or very small aggregated data variants. Add a size limit to avoid this situation.
size_t aggregated_keys_before_evict = 1024;
// Avoid this processor take the whole remained memory and make other processors OOM.
double max_allowed_memory_usage_ratio = 0.9;
// If the cardinality of the keys is larger than this threshold, we will evict data once the keys size in
// aggregate data variant is over aggregated_keys_before_evict, avoid the aggregated hash table becomes too large.
double high_cardinality_threshold = 0.8;
bool no_more_keys = false;
bool is_consume_finished = false;
bool is_clear_aggregator = false;
DB::AggregatedDataVariantsPtr data_variants = nullptr;
bool has_input = false;
bool has_output = false;
DB::Chunk input_chunk;
DB::Chunk output_chunk;
bool input_finished = false;
std::unique_ptr<AggregateDataBlockConverter> block_converter = nullptr;
Poco::Logger * logger = &Poco::Logger::get("StreamingAggregatingTransform");
double per_key_memory_usage = 0;
// metrics
size_t total_input_blocks = 0;
size_t total_input_rows = 0;
size_t total_output_blocks = 0;
size_t total_output_rows = 0;
size_t total_clear_data_variants_num = 0;
size_t total_aggregate_time = 0;
size_t total_convert_data_variants_time = 0;
bool needEvict();
};
class StreamingAggregatingStep : public DB::ITransformingStep
{
public:
explicit StreamingAggregatingStep(
const DB::ContextPtr & context_, const DB::SharedHeader & input_header, DB::Aggregator::Params params_);
~StreamingAggregatingStep() override = default;
String getName() const override { return "StreamingAggregating"; }
void transformPipeline(DB::QueryPipelineBuilder & pipeline, const DB::BuildQueryPipelineSettings &) override;
void describeActions(DB::JSONBuilder::JSONMap & map) const override;
void describeActions(DB::IQueryPlanStep::FormatSettings & settings) const override;
private:
DB::ContextPtr context;
DB::Aggregator::Params params;
protected:
void updateOutputHeader() override;
};
}