src/include/executor/nodeAgg.h - cloudberry - Git at Google

 /*-------------------------------------------------------------------------
  *
  * nodeAgg.h
  *	  prototypes for nodeAgg.c
  *
  *
  * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * src/include/executor/nodeAgg.h
  *
  *-------------------------------------------------------------------------
  */
 #ifndef NODEAGG_H
 #define NODEAGG_H

 #include "access/parallel.h"
 #include "nodes/execnodes.h"


 /*
  * AggStatePerTransData - per aggregate state value information
  *
  * Working state for updating the aggregate's state value, by calling the
  * transition function with an input row. This struct does not store the
  * information needed to produce the final aggregate result from the transition
  * state, that's stored in AggStatePerAggData instead. This separation allows
  * multiple aggregate results to be produced from a single state value.
  */
 typedef struct AggStatePerTransData
 {
 	/*
 	 * These values are set up during ExecInitAgg() and do not change
 	 * thereafter:
 	 */

 	/*
 	 * Link to an Aggref expr this state value is for.
 	 *
 	 * There can be multiple Aggref's sharing the same state value, so long as
 	 * the inputs and transition functions are identical and the final
 	 * functions are not read-write.  This points to the first one of them.
 	 */
 	Aggref	   *aggref;

 	/*
 	 * Is this state value actually being shared by more than one Aggref?
 	 */
 	bool		aggshared;

 	/*
 	 * True for ORDER BY and DISTINCT Aggrefs that are not aggpresorted.
 	 */
 	bool		aggsortrequired;

 	/*
 	 * Number of aggregated input columns.  This includes ORDER BY expressions
 	 * in both the plain-agg and ordered-set cases.  Ordered-set direct args
 	 * are not counted, though.
 	 */
 	int			numInputs;

 	/*
 	 * Number of aggregated input columns to pass to the transfn.  This
 	 * includes the ORDER BY columns for ordered-set aggs, but not for plain
 	 * aggs.  (This doesn't count the transition state value!)
 	 */
 	int			numTransInputs;

 	/* Oid of the state transition or combine function */
 	Oid			transfn_oid;

 	/* Oid of the serialization function or InvalidOid */
 	Oid			serialfn_oid;

 	/* Oid of the deserialization function or InvalidOid */
 	Oid			deserialfn_oid;

 	/* Oid of state value's datatype */
 	Oid			aggtranstype;

 	/*
 	 * fmgr lookup data for transition function or combine function.  Note in
 	 * particular that the fn_strict flag is kept here.
 	 */
 	FmgrInfo	transfn;

 	/* fmgr lookup data for serialization function */
 	FmgrInfo	serialfn;

 	/* fmgr lookup data for deserialization function */
 	FmgrInfo	deserialfn;

 	/* Input collation derived for aggregate */
 	Oid			aggCollation;

 	/* number of sorting columns */
 	int			numSortCols;

 	/* number of sorting columns to consider in DISTINCT comparisons */
 	/* (this is either zero or the same as numSortCols) */
 	int			numDistinctCols;

 	/* deconstructed sorting information (arrays of length numSortCols) */
 	AttrNumber *sortColIdx;
 	Oid		   *sortOperators;
 	Oid		   *sortCollations;
 	bool	   *sortNullsFirst;

 	/*
 	 * Comparators for input columns --- only set/used when aggregate has
 	 * DISTINCT flag. equalfnOne version is used for single-column
 	 * comparisons, equalfnMulti for the case of multiple columns.
 	 */
 	FmgrInfo	equalfnOne;
 	ExprState  *equalfnMulti;

 	/*
 	 * initial value from pg_aggregate entry
 	 */
 	Datum		initValue;
 	bool		initValueIsNull;

 	/*
 	 * We need the len and byval info for the agg's input and transition data
 	 * types in order to know how to copy/delete values.
 	 *
 	 * Note that the info for the input type is used only when handling
 	 * DISTINCT aggs with just one argument, so there is only one input type.
 	 */
 	int16		inputtypeLen,
 				transtypeLen;
 	bool		inputtypeByVal,
 				transtypeByVal;

 	/*
 	 * Slots for holding the evaluated input arguments.  These are set up
 	 * during ExecInitAgg() and then used for each input row requiring either
 	 * FILTER or ORDER BY/DISTINCT processing.
 	 */
 	TupleTableSlot *sortslot;	/* current input tuple */
 	TupleTableSlot *uniqslot;	/* used for multi-column DISTINCT */
 	TupleDesc	sortdesc;		/* descriptor of input tuples */
 	Datum		lastdatum;		/* used for single-column DISTINCT */
 	bool		lastisnull;		/* used for single-column DISTINCT */
 	bool		haslast;		/* got a last value for DISTINCT check */

 	/*
 	 * These values are working state that is initialized at the start of an
 	 * input tuple group and updated for each input tuple.
 	 *
 	 * For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input
 	 * values straight to the transition function.  If it's DISTINCT or
 	 * requires ORDER BY, we pass the input values into a Tuplesort object;
 	 * then at completion of the input tuple group, we scan the sorted values,
 	 * eliminate duplicates if needed, and run the transition function on the
 	 * rest.
 	 *
 	 * We need a separate tuplesort for each grouping set.
 	 */

 	Tuplesortstate **sortstates;	/* sort objects, if DISTINCT or ORDER BY */

 	/*
 	 * This field is a pre-initialized FunctionCallInfo struct used for
 	 * calling this aggregate's transfn.  We save a few cycles per row by not
 	 * re-initializing the unchanging fields; which isn't much, but it seems
 	 * worth the extra space consumption.
 	 */
 	FunctionCallInfo transfn_fcinfo;

 	/* Likewise for serialization and deserialization functions */
 	FunctionCallInfo serialfn_fcinfo;

 	FunctionCallInfo deserialfn_fcinfo;
 }			AggStatePerTransData;

 /*
  * AggStatePerAggData - per-aggregate information
  *
  * This contains the information needed to call the final function, to produce
  * a final aggregate result from the state value. If there are multiple
  * identical Aggrefs in the query, they can all share the same per-agg data.
  *
  * These values are set up during ExecInitAgg() and do not change thereafter.
  */
 typedef struct AggStatePerAggData
 {
 	/*
 	 * Link to an Aggref expr this state value is for.
 	 *
 	 * There can be multiple identical Aggref's sharing the same per-agg. This
 	 * points to the first one of them.
 	 */
 	Aggref	   *aggref;

 	/* index to the state value which this agg should use */
 	int			transno;

 	/* Optional Oid of final function (may be InvalidOid) */
 	Oid			finalfn_oid;

 	/*
 	 * fmgr lookup data for final function --- only valid when finalfn_oid is
 	 * not InvalidOid.
 	 */
 	FmgrInfo	finalfn;

 	/*
 	 * Number of arguments to pass to the finalfn.  This is always at least 1
 	 * (the transition state value) plus any ordered-set direct args. If the
 	 * finalfn wants extra args then we pass nulls corresponding to the
 	 * aggregated input columns.
 	 */
 	int			numFinalArgs;

 	/* ExprStates for any direct-argument expressions */
 	List	   *aggdirectargs;

 	/*
 	 * We need the len and byval info for the agg's result data type in order
 	 * to know how to copy/delete values.
 	 */
 	int16		resulttypeLen;
 	bool		resulttypeByVal;

 	/*
 	 * "shareable" is false if this agg cannot share state values with other
 	 * aggregates because the final function is read-write.
 	 */
 	bool		shareable;
 }			AggStatePerAggData;

 /*
  * AggStatePerGroupData - per-aggregate-per-group working state
  *
  * These values are working state that is initialized at the start of
  * an input tuple group and updated for each input tuple.
  *
  * In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
  * structs (pointed to by aggstate->pergroup); we re-use the array for
  * each input group, if it's AGG_SORTED mode.  In AGG_HASHED mode, the
  * hash table contains an array of these structs for each tuple group.
  *
  * Logically, the sortstate field belongs in this struct, but we do not
  * keep it here for space reasons: we don't support DISTINCT aggregates
  * in AGG_HASHED mode, so there's no reason to use up a pointer field
  * in every entry of the hashtable.
  */
 typedef struct AggStatePerGroupData
 {
 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0
 	Datum		transValue;		/* current transition value */
 #define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1
 	bool		transValueIsNull;

 #define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2
 	bool		noTransValue;	/* true if transValue not set yet */

 	/*
 	 * Note: noTransValue initially has the same value as transValueIsNull,
 	 * and if true both are cleared to false at the same time.  They are not
 	 * the same though: if transfn later returns a NULL, we want to keep that
 	 * NULL and not auto-replace it with a later input value. Only the first
 	 * non-NULL input will be auto-substituted.
 	 */
 }			AggStatePerGroupData;

 /*
  * AggStatePerPhaseData - per-grouping-set-phase state
  *
  * Grouping sets are divided into "phases", where a single phase can be
  * processed in one pass over the input. If there is more than one phase, then
  * at the end of input from the current phase, state is reset and another pass
  * taken over the data which has been re-sorted in the mean time.
  *
  * Accordingly, each phase specifies a list of grouping sets and group clause
  * information, plus each phase after the first also has a sort order.
  */
 typedef struct AggStatePerPhaseData
 {
 	AggStrategy aggstrategy;	/* strategy for this phase */
 	int			numsets;		/* number of grouping sets (or 0) */
 	int		   *gset_lengths;	/* lengths of grouping sets */
 	Bitmapset **grouped_cols;	/* column groupings for rollup */
 	ExprState **eqfunctions;	/* expression returning equality, indexed by
 								 * nr of cols to compare */
 	Agg		   *aggnode;		/* Agg node for phase data */
 	Sort	   *sortnode;		/* Sort node for input ordering for phase */

 	ExprState  *evaltrans;		/* evaluation of transition functions  */

 	int		   *group_id;		/* on per gset */
 	int		   *gset_id;		/* on per gset */

 	/* cached variants of the compiled expression */
 	ExprState  *evaltrans_cache
 				[2]		/* 0: outerops; 1: TTSOpsMinimalTuple */
 				[2];	/* 0: no NULL check; 1: with NULL check */
 }			AggStatePerPhaseData;

 /*
  * AggStatePerHashData - per-hashtable state
  *
  * When doing grouping sets with hashing, we have one of these for each
  * grouping set. (When doing hashing without grouping sets, we have just one of
  * them.)
  */
 typedef struct AggStatePerHashData
 {
 	TupleHashTable hashtable;	/* hash table with one entry per group */
 	TupleHashIterator hashiter; /* for iterating through hash table */
 	TupleTableSlot *hashslot;	/* slot for loading hash table */
 	FmgrInfo   *hashfunctions;	/* per-grouping-field hash fns */
 	Oid		   *eqfuncoids;		/* per-grouping-field equality fns */
 	int			numCols;		/* number of hash key columns */
 	int			numhashGrpCols; /* number of columns in hash table */
 	int			largestGrpColIdx;	/* largest col required for hashing */
 	AttrNumber *hashGrpColIdxInput; /* hash col indices in input slot */
 	AttrNumber *hashGrpColIdxHash;	/* indices in hash table tuples */
 	Agg		   *aggnode;		/* original Agg node, for numGroups etc. */
 }			AggStatePerHashData;


 extern AggState *ExecInitAgg(Agg *node, EState *estate, int eflags);
 extern void ExecEndAgg(AggState *node);
 extern void ExecReScanAgg(AggState *node);

 extern Size hash_agg_entry_size(int numTrans, Size tupleWidth,
 								Size transitionSpace);

 extern void hash_agg_set_limits(AggState *aggstate, double hashentrysize, double input_groups,
 								int used_bits, Size *mem_limit,
 								uint64 *ngroups_limit, int *num_partitions);

 extern Datum aggregate_dummy(PG_FUNCTION_ARGS);

 extern void ExecSquelchAgg(AggState *aggstate, bool force);
 extern bool ReuseHashTable(AggState *node);


 /* parallel instrumentation support */
 extern void ExecAggEstimate(AggState *node, ParallelContext *pcxt);
 extern void ExecAggInitializeDSM(AggState *node, ParallelContext *pcxt);
 extern void ExecAggInitializeWorker(AggState *node, ParallelWorkerContext *pwcxt);
 extern void ExecAggRetrieveInstrumentation(AggState *node);

 #endif							/* NODEAGG_H */
	/*-------------------------------------------------------------------------
	*
	* nodeAgg.h
	* prototypes for nodeAgg.c
	*
	*
	* Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	* src/include/executor/nodeAgg.h
	*
	*-------------------------------------------------------------------------
	*/
	#ifndef NODEAGG_H
	#define NODEAGG_H

	#include "access/parallel.h"
	#include "nodes/execnodes.h"


	/*
	* AggStatePerTransData - per aggregate state value information
	*
	* Working state for updating the aggregate's state value, by calling the
	* transition function with an input row. This struct does not store the
	* information needed to produce the final aggregate result from the transition
	* state, that's stored in AggStatePerAggData instead. This separation allows
	* multiple aggregate results to be produced from a single state value.
	*/
	typedef struct AggStatePerTransData
	{
	/*
	* These values are set up during ExecInitAgg() and do not change
	* thereafter:
	*/

	/*
	* Link to an Aggref expr this state value is for.
	*
	* There can be multiple Aggref's sharing the same state value, so long as
	* the inputs and transition functions are identical and the final
	* functions are not read-write. This points to the first one of them.
	*/
	Aggref *aggref;

	/*
	* Is this state value actually being shared by more than one Aggref?
	*/
	bool aggshared;

	/*
	* True for ORDER BY and DISTINCT Aggrefs that are not aggpresorted.
	*/
	bool aggsortrequired;

	/*
	* Number of aggregated input columns. This includes ORDER BY expressions
	* in both the plain-agg and ordered-set cases. Ordered-set direct args
	* are not counted, though.
	*/
	int numInputs;

	/*
	* Number of aggregated input columns to pass to the transfn. This
	* includes the ORDER BY columns for ordered-set aggs, but not for plain
	* aggs. (This doesn't count the transition state value!)
	*/
	int numTransInputs;

	/* Oid of the state transition or combine function */
	Oid transfn_oid;

	/* Oid of the serialization function or InvalidOid */
	Oid serialfn_oid;

	/* Oid of the deserialization function or InvalidOid */
	Oid deserialfn_oid;

	/* Oid of state value's datatype */
	Oid aggtranstype;

	/*
	* fmgr lookup data for transition function or combine function. Note in
	* particular that the fn_strict flag is kept here.
	*/
	FmgrInfo transfn;

	/* fmgr lookup data for serialization function */
	FmgrInfo serialfn;

	/* fmgr lookup data for deserialization function */
	FmgrInfo deserialfn;

	/* Input collation derived for aggregate */
	Oid aggCollation;

	/* number of sorting columns */
	int numSortCols;

	/* number of sorting columns to consider in DISTINCT comparisons */
	/* (this is either zero or the same as numSortCols) */
	int numDistinctCols;

	/* deconstructed sorting information (arrays of length numSortCols) */
	AttrNumber *sortColIdx;
	Oid *sortOperators;
	Oid *sortCollations;
	bool *sortNullsFirst;

	/*
	* Comparators for input columns --- only set/used when aggregate has
	* DISTINCT flag. equalfnOne version is used for single-column
	* comparisons, equalfnMulti for the case of multiple columns.
	*/
	FmgrInfo equalfnOne;
	ExprState *equalfnMulti;

	/*
	* initial value from pg_aggregate entry
	*/
	Datum initValue;
	bool initValueIsNull;

	/*
	* We need the len and byval info for the agg's input and transition data
	* types in order to know how to copy/delete values.
	*
	* Note that the info for the input type is used only when handling
	* DISTINCT aggs with just one argument, so there is only one input type.
	*/
	int16 inputtypeLen,
	transtypeLen;
	bool inputtypeByVal,
	transtypeByVal;

	/*
	* Slots for holding the evaluated input arguments. These are set up
	* during ExecInitAgg() and then used for each input row requiring either
	* FILTER or ORDER BY/DISTINCT processing.
	*/
	TupleTableSlot sortslot; / current input tuple */
	TupleTableSlot uniqslot; / used for multi-column DISTINCT */
	TupleDesc sortdesc; /* descriptor of input tuples */
	Datum lastdatum; /* used for single-column DISTINCT */
	bool lastisnull; /* used for single-column DISTINCT */
	bool haslast; /* got a last value for DISTINCT check */

	/*
	* These values are working state that is initialized at the start of an
	* input tuple group and updated for each input tuple.
	*
	* For a simple (non DISTINCT/ORDER BY) aggregate, we just feed the input
	* values straight to the transition function. If it's DISTINCT or
	* requires ORDER BY, we pass the input values into a Tuplesort object;
	* then at completion of the input tuple group, we scan the sorted values,
	* eliminate duplicates if needed, and run the transition function on the
	* rest.
	*
	* We need a separate tuplesort for each grouping set.
	*/

	Tuplesortstate *sortstates; / sort objects, if DISTINCT or ORDER BY */

	/*
	* This field is a pre-initialized FunctionCallInfo struct used for
	* calling this aggregate's transfn. We save a few cycles per row by not
	* re-initializing the unchanging fields; which isn't much, but it seems
	* worth the extra space consumption.
	*/
	FunctionCallInfo transfn_fcinfo;

	/* Likewise for serialization and deserialization functions */
	FunctionCallInfo serialfn_fcinfo;

	FunctionCallInfo deserialfn_fcinfo;
	} AggStatePerTransData;

	/*
	* AggStatePerAggData - per-aggregate information
	*
	* This contains the information needed to call the final function, to produce
	* a final aggregate result from the state value. If there are multiple
	* identical Aggrefs in the query, they can all share the same per-agg data.
	*
	* These values are set up during ExecInitAgg() and do not change thereafter.
	*/
	typedef struct AggStatePerAggData
	{
	/*
	* Link to an Aggref expr this state value is for.
	*
	* There can be multiple identical Aggref's sharing the same per-agg. This
	* points to the first one of them.
	*/
	Aggref *aggref;

	/* index to the state value which this agg should use */
	int transno;

	/* Optional Oid of final function (may be InvalidOid) */
	Oid finalfn_oid;

	/*
	* fmgr lookup data for final function --- only valid when finalfn_oid is
	* not InvalidOid.
	*/
	FmgrInfo finalfn;

	/*
	* Number of arguments to pass to the finalfn. This is always at least 1
	* (the transition state value) plus any ordered-set direct args. If the
	* finalfn wants extra args then we pass nulls corresponding to the
	* aggregated input columns.
	*/
	int numFinalArgs;

	/* ExprStates for any direct-argument expressions */
	List *aggdirectargs;

	/*
	* We need the len and byval info for the agg's result data type in order
	* to know how to copy/delete values.
	*/
	int16 resulttypeLen;
	bool resulttypeByVal;

	/*
	* "shareable" is false if this agg cannot share state values with other
	* aggregates because the final function is read-write.
	*/
	bool shareable;
	} AggStatePerAggData;

	/*
	* AggStatePerGroupData - per-aggregate-per-group working state
	*
	* These values are working state that is initialized at the start of
	* an input tuple group and updated for each input tuple.
	*
	* In AGG_PLAIN and AGG_SORTED modes, we have a single array of these
	* structs (pointed to by aggstate->pergroup); we re-use the array for
	* each input group, if it's AGG_SORTED mode. In AGG_HASHED mode, the
	* hash table contains an array of these structs for each tuple group.
	*
	* Logically, the sortstate field belongs in this struct, but we do not
	* keep it here for space reasons: we don't support DISTINCT aggregates
	* in AGG_HASHED mode, so there's no reason to use up a pointer field
	* in every entry of the hashtable.
	*/
	typedef struct AggStatePerGroupData
	{
	#define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUE 0
	Datum transValue; /* current transition value */
	#define FIELDNO_AGGSTATEPERGROUPDATA_TRANSVALUEISNULL 1
	bool transValueIsNull;

	#define FIELDNO_AGGSTATEPERGROUPDATA_NOTRANSVALUE 2
	bool noTransValue; /* true if transValue not set yet */

	/*
	* Note: noTransValue initially has the same value as transValueIsNull,
	* and if true both are cleared to false at the same time. They are not
	* the same though: if transfn later returns a NULL, we want to keep that
	* NULL and not auto-replace it with a later input value. Only the first
	* non-NULL input will be auto-substituted.
	*/
	} AggStatePerGroupData;

	/*
	* AggStatePerPhaseData - per-grouping-set-phase state
	*
	* Grouping sets are divided into "phases", where a single phase can be
	* processed in one pass over the input. If there is more than one phase, then
	* at the end of input from the current phase, state is reset and another pass
	* taken over the data which has been re-sorted in the mean time.
	*
	* Accordingly, each phase specifies a list of grouping sets and group clause
	* information, plus each phase after the first also has a sort order.
	*/
	typedef struct AggStatePerPhaseData
	{
	AggStrategy aggstrategy; /* strategy for this phase */
	int numsets; /* number of grouping sets (or 0) */
	int gset_lengths; / lengths of grouping sets */
	Bitmapset *grouped_cols; / column groupings for rollup */
	ExprState *eqfunctions; / expression returning equality, indexed by
	* nr of cols to compare */
	Agg aggnode; / Agg node for phase data */
	Sort sortnode; / Sort node for input ordering for phase */

	ExprState evaltrans; / evaluation of transition functions */

	int group_id; / on per gset */
	int gset_id; / on per gset */

	/* cached variants of the compiled expression */
	ExprState *evaltrans_cache
	[2] /* 0: outerops; 1: TTSOpsMinimalTuple */
	[2]; /* 0: no NULL check; 1: with NULL check */
	} AggStatePerPhaseData;

	/*
	* AggStatePerHashData - per-hashtable state
	*
	* When doing grouping sets with hashing, we have one of these for each
	* grouping set. (When doing hashing without grouping sets, we have just one of
	* them.)
	*/
	typedef struct AggStatePerHashData
	{
	TupleHashTable hashtable; /* hash table with one entry per group */
	TupleHashIterator hashiter; /* for iterating through hash table */
	TupleTableSlot hashslot; / slot for loading hash table */
	FmgrInfo hashfunctions; / per-grouping-field hash fns */
	Oid eqfuncoids; / per-grouping-field equality fns */
	int numCols; /* number of hash key columns */
	int numhashGrpCols; /* number of columns in hash table */
	int largestGrpColIdx; /* largest col required for hashing */
	AttrNumber hashGrpColIdxInput; / hash col indices in input slot */
	AttrNumber hashGrpColIdxHash; / indices in hash table tuples */
	Agg aggnode; / original Agg node, for numGroups etc. */
	} AggStatePerHashData;


	extern AggState ExecInitAgg(Agg node, EState *estate, int eflags);
	extern void ExecEndAgg(AggState *node);
	extern void ExecReScanAgg(AggState *node);

	extern Size hash_agg_entry_size(int numTrans, Size tupleWidth,
	Size transitionSpace);

	extern void hash_agg_set_limits(AggState *aggstate, double hashentrysize, double input_groups,
	int used_bits, Size *mem_limit,
	uint64 ngroups_limit, int num_partitions);

	extern Datum aggregate_dummy(PG_FUNCTION_ARGS);

	extern void ExecSquelchAgg(AggState *aggstate, bool force);
	extern bool ReuseHashTable(AggState *node);


	/* parallel instrumentation support */
	extern void ExecAggEstimate(AggState node, ParallelContext pcxt);
	extern void ExecAggInitializeDSM(AggState node, ParallelContext pcxt);
	extern void ExecAggInitializeWorker(AggState node, ParallelWorkerContext pwcxt);
	extern void ExecAggRetrieveInstrumentation(AggState *node);

	#endif /* NODEAGG_H */