be/src/exec/operator/streaming_agg_min_reduction.h - doris - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #pragma once

 namespace doris {

 /// The minimum reduction factor (input rows divided by output rows) to grow hash tables
 /// in a streaming preaggregation, given that the hash tables are currently the given
 /// size or above. The sizes roughly correspond to hash table sizes where the bucket
 /// arrays will fit in  a cache level. Intuitively, we don't want the working set of the
 /// aggregation to expand to the next level of cache unless we're reducing the input
 /// enough to outweigh the increased memory latency we'll incur for each hash table
 /// lookup.
 ///
 /// Note that the current reduction achieved is not always a good estimate of the
 /// final reduction. It may be biased either way depending on the ordering of the
 /// input. If the input order is random, we will underestimate the final reduction
 /// factor because the probability of a row having the same key as a previous row
 /// increases as more input is processed.  If the input order is correlated with the
 /// key, skew may bias the estimate. If high cardinality keys appear first, we
 /// may overestimate and if low cardinality keys appear first, we underestimate.
 /// To estimate the eventual reduction achieved, we estimate the final reduction
 /// using the planner's estimated input cardinality and the assumption that input
 /// is in a random order. This means that we assume that the reduction factor will
 /// increase over time.
 struct StreamingHtMinReductionEntry {
     // Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in
     // bytes is greater than this threshold.
     int min_ht_mem;
     // The minimum reduction factor to expand the hash tables.
     double streaming_ht_min_reduction;
 };

 // TODO: experimentally tune these values and also programmatically get the cache size
 // of the machine that we're running on.
 static constexpr StreamingHtMinReductionEntry STREAMING_HT_MIN_REDUCTION[] = {
         // Expand up to L2 cache always.
         {.min_ht_mem = 0, .streaming_ht_min_reduction = 0.0},
         // Expand into L3 cache if we look like we're getting some reduction.
         // At present, The L2 cache is generally 256k or more
         {.min_ht_mem = 256 * 1024, .streaming_ht_min_reduction = 1.1},
         // Expand into main memory if we're getting a significant reduction.
         // The L3 cache is generally 16MB or more
         {.min_ht_mem = 16 * 1024 * 1024, .streaming_ht_min_reduction = 2.0},
 };

 static constexpr StreamingHtMinReductionEntry SINGLE_BE_STREAMING_HT_MIN_REDUCTION[] = {
         // Expand up to L2 cache always.
         {.min_ht_mem = 0, .streaming_ht_min_reduction = 0.0},
         // Expand into L3 cache if we look like we're getting some reduction.
         // At present, The L2 cache is generally 256k or more
         {.min_ht_mem = 256 * 1024, .streaming_ht_min_reduction = 4.0},
         // Expand into main memory if we're getting a significant reduction.
         // The L3 cache is generally 16MB or more
         {.min_ht_mem = 16 * 1024 * 1024, .streaming_ht_min_reduction = 5.0},
 };

 static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
         sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]);

 } // namespace doris
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#pragma once

	namespace doris {

	/// The minimum reduction factor (input rows divided by output rows) to grow hash tables
	/// in a streaming preaggregation, given that the hash tables are currently the given
	/// size or above. The sizes roughly correspond to hash table sizes where the bucket
	/// arrays will fit in a cache level. Intuitively, we don't want the working set of the
	/// aggregation to expand to the next level of cache unless we're reducing the input
	/// enough to outweigh the increased memory latency we'll incur for each hash table
	/// lookup.
	///
	/// Note that the current reduction achieved is not always a good estimate of the
	/// final reduction. It may be biased either way depending on the ordering of the
	/// input. If the input order is random, we will underestimate the final reduction
	/// factor because the probability of a row having the same key as a previous row
	/// increases as more input is processed. If the input order is correlated with the
	/// key, skew may bias the estimate. If high cardinality keys appear first, we
	/// may overestimate and if low cardinality keys appear first, we underestimate.
	/// To estimate the eventual reduction achieved, we estimate the final reduction
	/// using the planner's estimated input cardinality and the assumption that input
	/// is in a random order. This means that we assume that the reduction factor will
	/// increase over time.
	struct StreamingHtMinReductionEntry {
	// Use 'streaming_ht_min_reduction' if the total size of hash table bucket directories in
	// bytes is greater than this threshold.
	int min_ht_mem;
	// The minimum reduction factor to expand the hash tables.
	double streaming_ht_min_reduction;
	};

	// TODO: experimentally tune these values and also programmatically get the cache size
	// of the machine that we're running on.
	static constexpr StreamingHtMinReductionEntry STREAMING_HT_MIN_REDUCTION[] = {
	// Expand up to L2 cache always.
	{.min_ht_mem = 0, .streaming_ht_min_reduction = 0.0},
	// Expand into L3 cache if we look like we're getting some reduction.
	// At present, The L2 cache is generally 256k or more
	{.min_ht_mem = 256 * 1024, .streaming_ht_min_reduction = 1.1},
	// Expand into main memory if we're getting a significant reduction.
	// The L3 cache is generally 16MB or more
	{.min_ht_mem = 16 * 1024 * 1024, .streaming_ht_min_reduction = 2.0},
	};

	static constexpr StreamingHtMinReductionEntry SINGLE_BE_STREAMING_HT_MIN_REDUCTION[] = {
	// Expand up to L2 cache always.
	{.min_ht_mem = 0, .streaming_ht_min_reduction = 0.0},
	// Expand into L3 cache if we look like we're getting some reduction.
	// At present, The L2 cache is generally 256k or more
	{.min_ht_mem = 256 * 1024, .streaming_ht_min_reduction = 4.0},
	// Expand into main memory if we're getting a significant reduction.
	// The L3 cache is generally 16MB or more
	{.min_ht_mem = 16 * 1024 * 1024, .streaming_ht_min_reduction = 5.0},
	};

	static constexpr int STREAMING_HT_MIN_REDUCTION_SIZE =
	sizeof(STREAMING_HT_MIN_REDUCTION) / sizeof(STREAMING_HT_MIN_REDUCTION[0]);

	} // namespace doris