| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /*------------------------------------------------------------------------- |
| * |
| * execnodes.h |
| * definitions for executor state nodes |
| * |
| * |
| * Portions Copyright (c) 2005-2009, Greenplum inc |
| * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * $PostgreSQL: pgsql/src/include/nodes/execnodes.h,v 1.161.2.2 2007/04/26 23:24:57 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #ifndef EXECNODES_H |
| #define EXECNODES_H |
| |
| #ifdef HAVE_SYS_TIME_H |
| #include <sys/time.h> |
| #endif |
| |
| #include "nodes/params.h" |
| #include "nodes/plannodes.h" |
| #include "nodes/relation.h" |
| #include "utils/hsearch.h" |
| #include "access/tupdesc.h" |
| #include "utils/relcache.h" |
| #include "gpmon/gpmon.h" /* gpmon_packet_t */ |
| #include "utils/memaccounting.h" |
| |
| |
| /* |
| * Currently, since grouping is defined as uint64 internally, it limits the |
| * maximum number of grouping attributes to 64. |
| */ |
| #define MAX_GROUPING_ATTRS_IN_GROUPING_EXTENSION 64 |
| |
| /* |
| * partition selector ids start from 1. Sometimes we use 0 to initialize variables |
| */ |
| #define InvalidPartitionSelectorId 0 |
| |
| struct CdbDispatchResults; /* in cdbdispatchresult.h */ |
| struct CdbExplain_ShowStatCtx; /* private, in "cdb/cdbexplain.c" */ |
| struct ChunkTransportState; /* #include "cdb/cdbinterconnect.h" */ |
| struct IndexInfo; /* #include "catalog/index.h" */ |
| struct StringInfoData; /* #include "lib/stringinfo.h" */ |
| struct Tuplestorestate; /* #include "utils/tuplestore.h" */ |
| struct TupleTableSlot; |
| struct TupleTableData; |
| struct MemTupleBinding; |
| struct SnapshotData; |
| struct MemTupleData; |
| struct HeapScanDescData; |
| struct IndexScanDescData; |
| struct FileScanDescData; |
| struct TBMIterateResult; |
| struct TriggerDesc; |
| struct SliceTable; |
| |
| /* ---------------- |
| * IndexInfo information |
| * |
| * CDB: Moved declaration into "catalog/index.h" from "nodes/execnodes.h" |
| * ---------------- |
| */ |
| |
| /* ---------------- |
| * ExprContext_CB |
| * |
| * List of callbacks to be called at ExprContext shutdown. |
| * ---------------- |
| */ |
| typedef void (*ExprContextCallbackFunction) (Datum arg); |
| |
| typedef struct ExprContext_CB |
| { |
| struct ExprContext_CB *next; |
| ExprContextCallbackFunction function; |
| Datum arg; |
| } ExprContext_CB; |
| |
| /* ---------------- |
| * ExprContext |
| * |
| * This class holds the "current context" information |
| * needed to evaluate expressions for doing tuple qualifications |
| * and tuple projections. For example, if an expression refers |
| * to an attribute in the current inner tuple then we need to know |
| * what the current inner tuple is and so we look at the expression |
| * context. |
| * |
| * There are two memory contexts associated with an ExprContext: |
| * * ecxt_per_query_memory is a query-lifespan context, typically the same |
| * context the ExprContext node itself is allocated in. This context |
| * can be used for purposes such as storing function call cache info. |
| * * ecxt_per_tuple_memory is a short-term context for expression results. |
| * As the name suggests, it will typically be reset once per tuple, |
| * before we begin to evaluate expressions for that tuple. Each |
| * ExprContext normally has its very own per-tuple memory context. |
| * |
| * CurrentMemoryContext should be set to ecxt_per_tuple_memory before |
| * calling ExecEvalExpr() --- see ExecEvalExprSwitchContext(). |
| * ---------------- |
| */ |
| typedef struct ExprContext |
| { |
| NodeTag type; |
| |
| /* Tuples that Var nodes in expression may refer to */ |
| struct TupleTableSlot *ecxt_scantuple; |
| struct TupleTableSlot *ecxt_innertuple; |
| struct TupleTableSlot *ecxt_outertuple; |
| |
| /* Memory contexts for expression evaluation --- see notes above */ |
| MemoryContext ecxt_per_query_memory; |
| MemoryContext ecxt_per_tuple_memory; |
| |
| /* Values to substitute for Param nodes in expression */ |
| ParamExecData *ecxt_param_exec_vals; /* for PARAM_EXEC params */ |
| ParamListInfo ecxt_param_list_info; /* for other param types */ |
| |
| /* |
| * Values to substitute for Aggref nodes in the expressions of an Agg |
| * node, or for WindowFunc nodes within a WindowAgg node. |
| */ |
| Datum *ecxt_aggvalues; /* precomputed values for aggs/windowfuncs */ |
| bool *ecxt_aggnulls; /* null flags for aggs/windowfuncs */ |
| |
| /* Value to substitute for CaseTestExpr nodes in expression */ |
| Datum caseValue_datum; |
| bool caseValue_isNull; |
| |
| /* Value to substitute for CoerceToDomainValue nodes in expression */ |
| Datum domainValue_datum; |
| bool domainValue_isNull; |
| |
| /* Link to containing EState (NULL if a standalone ExprContext) */ |
| struct EState *ecxt_estate; |
| |
| /* Functions to call back when ExprContext is shut down */ |
| ExprContext_CB *ecxt_callbacks; |
| |
| /* Representing the final grouping and group_id for a tuple |
| * in a grouping extension query. */ |
| uint64 grouping; |
| uint32 group_id; |
| } ExprContext; |
| |
| /* ---------------- |
| * Support for functions that might return sets (multiple rows) |
| * |
| * CDB: Moved these declarations into "fmgr.h" from "nodes/execnodes.h"... |
| * enum ExprDoneCond; |
| * enum SetFunctionReturnMode; |
| * struct ReturnSetInfo; |
| * ---------------- |
| */ |
| |
| /* ---------------- |
| * ProjectionInfo node information |
| * |
| * This is all the information needed to perform projections --- |
| * that is, form new tuples by evaluation of targetlist expressions. |
| * Nodes which need to do projections create one of these. |
| * |
| * ExecProject() evaluates the tlist, forms a tuple, and stores it |
| * in the given slot. Note that the result will be a "virtual" tuple |
| * unless ExecMaterializeSlot() is then called to force it to be |
| * converted to a physical tuple. The slot must have a tupledesc |
| * that matches the output of the tlist! |
| * |
| * The planner very often produces tlists that consist entirely of |
| * simple Var references (lower levels of a plan tree almost always |
| * look like that). So we have an optimization to handle that case |
| * with minimum overhead. |
| * |
| * targetlist target list for projection |
| * exprContext expression context in which to evaluate targetlist |
| * slot slot to place projection result in |
| * itemIsDone workspace for ExecProject |
| * isVarList TRUE if simple-Var-list optimization applies |
| * varSlotOffsets array indicating which slot each simple Var is from |
| * varNumbers array indicating attr numbers of simple Vars |
| * lastInnerVar highest attnum from inner tuple slot (0 if none) |
| * lastOuterVar highest attnum from outer tuple slot (0 if none) |
| * lastScanVar highest attnum from scan tuple slot (0 if none) |
| * ---------------- |
| */ |
| typedef struct ProjectionInfo |
| { |
| NodeTag type; |
| List *pi_targetlist; |
| ExprContext *pi_exprContext; |
| struct TupleTableSlot *pi_slot; |
| ExprDoneCond *pi_itemIsDone; |
| bool pi_isVarList; |
| int *pi_varSlotOffsets; |
| int *pi_varNumbers; |
| int pi_lastInnerVar; |
| int pi_lastOuterVar; |
| int pi_lastScanVar; |
| } ProjectionInfo; |
| |
| /* ---------------- |
| * JunkFilter |
| * |
| * This class is used to store information regarding junk attributes. |
| * A junk attribute is an attribute in a tuple that is needed only for |
| * storing intermediate information in the executor, and does not belong |
| * in emitted tuples. For example, when we do an UPDATE query, |
| * the planner adds a "junk" entry to the targetlist so that the tuples |
| * returned to ExecutePlan() contain an extra attribute: the ctid of |
| * the tuple to be updated. This is needed to do the update, but we |
| * don't want the ctid to be part of the stored new tuple! So, we |
| * apply a "junk filter" to remove the junk attributes and form the |
| * real output tuple. |
| * |
| * targetList: the original target list (including junk attributes). |
| * cleanTupType: the tuple descriptor for the "clean" tuple (with |
| * junk attributes removed). |
| * cleanMap: A map with the correspondence between the non-junk |
| * attribute numbers of the "original" tuple and the |
| * attribute numbers of the "clean" tuple. |
| * resultSlot: tuple slot used to hold cleaned tuple. |
| * ---------------- |
| */ |
| typedef struct JunkFilter |
| { |
| NodeTag type; |
| List *jf_targetList; |
| TupleDesc jf_cleanTupType; |
| AttrNumber *jf_cleanMap; |
| struct TupleTableSlot *jf_resultSlot; |
| } JunkFilter; |
| |
| |
| |
| /* ---------------- |
| * ResultRelInfo information |
| * |
| * Whenever we update an existing relation, we have to |
| * update indices on the relation, and perhaps also fire triggers. |
| * The ResultRelInfo class is used to hold all the information needed |
| * about a result relation, including indices.. -cim 10/15/89 |
| * |
| * RangeTableIndex result relation's range table index |
| * RelationDesc relation descriptor for result relation |
| * NumIndices # of indices existing on result relation |
| * IndexRelationDescs array of relation descriptors for indices |
| * IndexRelationInfo array of key/attr info for indices |
| * TrigDesc triggers to be fired, if any |
| * TrigFunctions cached lookup info for trigger functions |
| * TrigInstrument optional runtime measurements for triggers |
| * ConstraintExprs array of constraint-checking expr states |
| * junkFilter for removing junk attributes from tuples |
| * projectReturning for computing a RETURNING list |
| * tupdesc_match ??? |
| * mt_bind ??? |
| * aoInsertDesc context for appendonly relation buffered INSERT |
| * extInsertDesc context for external table INSERT |
| * parquetInsertDesc context for parquet table INSERT |
| * parquetSendBack information to be sent back to dispatch after INSERT in a parquet table |
| * aosegno the AO segfile we inserted into. |
| * aoprocessed tuples processed for AO |
| * partInsertMap map input attrno to target attrno |
| * partSlot TupleTableSlot for the target part relation |
| * resultSlot TupleTableSlot for the target relation |
| * ---------------- |
| */ |
| typedef struct ResultRelInfo |
| { |
| NodeTag type; |
| Index ri_RangeTableIndex; |
| Relation ri_RelationDesc; |
| int ri_NumIndices; |
| RelationPtr ri_IndexRelationDescs; |
| struct IndexInfo **ri_IndexRelationInfo; |
| struct TriggerDesc *ri_TrigDesc; |
| FmgrInfo *ri_TrigFunctions; |
| struct Instrumentation *ri_TrigInstrument; |
| List **ri_ConstraintExprs; |
| JunkFilter *ri_junkFilter; |
| ProjectionInfo *ri_projectReturning; |
| int tupdesc_match; |
| struct MemTupleBinding *mt_bind; |
| |
| struct AppendOnlyInsertDescData *ri_aoInsertDesc; |
| |
| struct ExternalInsertDescData *ri_extInsertDesc; |
| struct ParquetInsertDescData *ri_parquetInsertDesc; |
| struct QueryContextDispatchingSendBackData *ri_parquetSendBack; |
| |
| List *ri_aosegnos; |
| |
| List *ri_aosegfileinfos; |
| uint64 ri_aoprocessed; /* tuples processed for AO */ |
| struct AttrMap *ri_partInsertMap; |
| struct TupleTableSlot *ri_partSlot; |
| struct TupleTableSlot *ri_resultSlot; |
| |
| } ResultRelInfo; |
| |
| typedef struct ShareNodeEntry |
| { |
| NodeTag type; |
| |
| Node *sharePlan; |
| Node *shareState; |
| int refcount; /* reference count to guard from too-eager-free risk */ |
| } ShareNodeEntry; |
| |
| /* |
| * PartitionAccessMethods |
| * Defines the lookup access methods for partitions, one for each level. |
| */ |
| typedef struct PartitionAccessMethods |
| { |
| /* Number of partition levels */ |
| int partLevels; |
| |
| /* Access methods, one for each level */ |
| void **amstate; |
| |
| /* Memory context for access methods */ |
| MemoryContext part_cxt; |
| } PartitionAccessMethods; |
| |
| typedef struct PartitionState |
| { |
| NodeTag type; |
| |
| AttrNumber max_partition_attr; |
| int result_partition_array_size; /* max elements of result relation array */ |
| HTAB *result_partition_hash; |
| PartitionAccessMethods *accessMethods; |
| } PartitionState; |
| |
| /* |
| * PartitionMetadata |
| * Defines the metadata for partitions. |
| */ |
| typedef struct PartitionMetadata |
| { |
| PartitionNode *partsAndRules; |
| PartitionAccessMethods *accessMethods; |
| } PartitionMetadata; |
| |
| /* |
| * PartOidEntry |
| * Defines an entry in the shared partOid hash table. |
| */ |
| typedef struct PartOidEntry |
| { |
| /* oid of an individual leaf partition */ |
| Oid partOid; |
| |
| /* list of partition selectors that produced the above part oid */ |
| List *selectorList; |
| } PartOidEntry; |
| |
| /* |
| * DynamicPartitionIterator |
| * Defines the iterator state to iterate over a set of partitions. |
| */ |
| typedef struct DynamicPartitionIterator |
| { |
| /* An HTAB of partition oids to work on. */ |
| HTAB *partitionOids; |
| |
| /* The current HTAB iterator */ |
| HASH_SEQ_STATUS *partitionIterator; |
| |
| /* |
| * If the HTAB is not completely iterated, we need to |
| * call hash_seq_term. |
| */ |
| bool shouldCallHashSeqTerm; |
| |
| /* Is this first partition of the HTAB? */ |
| bool firstPartition; |
| |
| /* The current partition's relation */ |
| Relation currentRelation; |
| |
| /* |
| * The attribute mapping to use to convert varattno for an |
| * out-dated expression because of dropped attributes mismatch |
| * between the partition at last iterator position and the |
| * partition at current iterator position. |
| */ |
| AttrNumber *attMap; |
| |
| /* The relation oid at current iterator position. */ |
| Oid attMapRelOid; |
| |
| /* |
| * The per-partition memory context to prevent memory leak during |
| * processing multiple partitions. |
| */ |
| MemoryContext partitionMemoryContext; |
| } DynamicPartitionIterator; |
| |
| /* |
| * DynamicTableScanInfo |
| * Encapsulate the information that is needed to maintain the pid indexes |
| * for all dynamic table scans in a plan. |
| */ |
| typedef struct DynamicTableScanInfo |
| { |
| /* |
| * The total number of unique dynamic table scans in the plan. |
| */ |
| int numScans; |
| |
| /* |
| * List containing the number of partition selectors for every scan id. |
| * Element #i in the list corresponds to scan id i |
| */ |
| List *numSelectorsPerScanId; |
| |
| /* |
| * An array of pid indexes, one for each unique dynamic table scans. |
| * Each of these pid indexes maintains unique pids that are involved |
| * in the scan. |
| */ |
| HTAB **pidIndexes; |
| |
| /* |
| * An array of *pointers* to DynamicPartitionIterator to record the |
| * current hash table iterator position. |
| */ |
| DynamicPartitionIterator **iterators; |
| |
| /* |
| * Partitioning metadata for all relevant partition tables. |
| */ |
| List *partsMetadata; |
| |
| /* |
| * The memory context in which pidIndexes are allocated. |
| */ |
| MemoryContext memoryContext; |
| } DynamicTableScanInfo; |
| |
| /* |
| * Number of pids used when initializing the pid-index hash table for each dynamic |
| * table scan. |
| */ |
| #define INITIAL_NUM_PIDS 1000 |
| |
| /* |
| * The initial estimate size for dynamic table scan pid-index array, and the |
| * default incremental number when the array is out of space. |
| */ |
| #define NUM_PID_INDEXES_ADDED 10 |
| |
| /* |
| * The global variable for the information relevant to dynamic table scans. |
| * During execution, this will point to the value initialized in EState. |
| */ |
| extern DynamicTableScanInfo *dynamicTableScanInfo; |
| |
| /* ---------------- |
| * EState information |
| * |
| * Master working state for an Executor invocation |
| * ---------------- |
| */ |
| typedef struct EState |
| { |
| NodeTag type; |
| |
| /* Basic state for all query types: */ |
| ScanDirection es_direction; /* current scan direction */ |
| struct SnapshotData *es_snapshot; /* time qual to use */ |
| struct SnapshotData *es_crosscheck_snapshot; /* crosscheck time qual for RI */ |
| List *es_range_table; /* List of RangeTblEntry */ |
| |
| /* Info about target table for insert/update/delete queries: */ |
| ResultRelInfo *es_result_relations; /* array of ResultRelInfos */ |
| int es_num_result_relations; /* length of array */ |
| ResultRelInfo *es_result_relation_info; /* currently active array elt */ |
| JunkFilter *es_junkFilter; /* currently active junk filter */ |
| |
| Oid es_last_parq_part; /* The Oid of the last parquet partition we opened for insertion */ |
| |
| /* partitioning info for target relation */ |
| PartitionNode *es_result_partitions; |
| |
| /* AO segment file number for target relation */ |
| List *es_result_aosegnos; |
| |
| /* AO segment file info for target relation */ |
| List *es_result_segfileinfos; |
| |
| struct TupleTableSlot *es_trig_tuple_slot; /* for trigger output tuples */ |
| |
| /* Stuff used for SELECT INTO: */ |
| Relation es_into_relation_descriptor; |
| bool es_into_relation_is_bulkload; /* always false in gpsql */ |
| |
| ItemPointerData es_into_relation_last_heap_tid; |
| |
| /* Parameter info: */ |
| ParamListInfo es_param_list_info; /* values of external params */ |
| ParamExecData *es_param_exec_vals; /* values of internal params */ |
| |
| /* Other working state: */ |
| MemoryContext es_query_cxt; /* per-query context in which EState lives */ |
| |
| struct TupleTableData *es_tupleTable; /* Array of TupleTableSlots */ |
| |
| uint64 es_processed; /* # of tuples processed */ |
| Oid es_lastoid; /* last oid processed (by INSERT) */ |
| List *es_rowMarks; /* not good place, but there is no other */ |
| |
| bool es_is_subquery; /* true if subquery (es_query_cxt not mine) */ |
| |
| bool es_instrument; /* true requests runtime instrumentation */ |
| bool es_select_into; /* true if doing SELECT INTO */ |
| bool es_into_oids; /* true to generate OIDs in SELECT INTO */ |
| |
| List *into_aosegnos; /* AO file 'seg' numbers for into realtion to use */ |
| List *es_exprcontexts; /* List of ExprContexts within EState */ |
| |
| /* |
| * this ExprContext is for per-output-tuple operations, such as constraint |
| * checks and index-value computations. It will be reset for each output |
| * tuple. Note that it will be created only if needed. |
| */ |
| ExprContext *es_per_tuple_exprcontext; |
| |
| /* Below is to re-evaluate plan qual in READ COMMITTED mode */ |
| PlannedStmt *es_plannedstmt; /* link to top of plan tree */ |
| struct evalPlanQual *es_evalPlanQual; /* chain of PlanQual states */ |
| bool *es_evTupleNull; /* local array of EPQ status */ |
| HeapTuple *es_evTuple; /* shared array of EPQ substitute tuples */ |
| bool es_useEvalPlan; /* evaluating EPQ tuples? */ |
| |
| /* Additions for MPP plan slicing. */ |
| struct SliceTable *es_sliceTable; |
| |
| /* Data structure for node sharing */ |
| List **es_sharenode; |
| |
| int active_recv_id; |
| void *motionlayer_context; /* Motion Layer state */ |
| struct ChunkTransportState *interconnect_context; /* Interconnect state */ |
| |
| /* MPP used resources */ |
| bool es_interconnect_is_setup; /* is interconnect set-up? */ |
| |
| bool es_got_eos; /* was end-of-stream recieved? */ |
| |
| bool cancelUnfinished; /* when we're cleaning up, we need to make sure that we know it */ |
| |
| /* results from qExec processes */ |
| struct DispatchData *dispatch_data; |
| |
| /* CDB: EXPLAIN ANALYZE statistics */ |
| struct CdbExplain_ShowStatCtx *showstatctx; |
| |
| /* CDB: partitioning state info */ |
| PartitionState *es_partition_state; |
| |
| /* |
| * The slice number for the current node that is |
| * being processed. During the tree traversal, |
| * this value is set by Motion and InitPlan nodes. |
| * |
| * currentSliceIdInPlan and currentExecutingSliceId |
| * are basically the same, except for InitPlan nodes. |
| * For InitPlan nodes, the nodes in the top slice have |
| * an assigned slice id in the plan, while the executing |
| * slice id for these nodes is the root slice id. |
| */ |
| int currentSliceIdInPlan; |
| int currentExecutingSliceId; |
| |
| /* |
| * Each subplan has its own EState. This value indicates |
| * the level of the corresponding subplan for this EState |
| * with respect to the main plan tree. |
| * |
| * This is used to determine whether we could eager free |
| * the Material node on top of Broadcast inside a subplan |
| * (for supporting correlated subqueries). The Material |
| * node can be eager-free'ed only when this value is 0. |
| */ |
| int subplanLevel; |
| |
| /* |
| * The root slice id for this EState. |
| */ |
| int rootSliceId; |
| |
| struct PlanState *planstate; /* plan's state tree */ |
| |
| /* |
| * Information relevant to dynamic table scans. |
| */ |
| DynamicTableScanInfo *dynamicTableScanInfo; |
| |
| /* |
| * Infromation relevant to running context. |
| */ |
| struct ProcessIdentity *ctx; |
| /* MemoryAccount that records the executor memory usage information. */ |
| MemoryAccount *memoryAccount; |
| } EState; |
| |
| struct PlanState; |
| struct MotionState; |
| |
| extern struct MotionState *getMotionState(struct PlanState *ps, int sliceIndex); |
| extern int LocallyExecutingSliceIndex(EState *estate); |
| extern int RootSliceIndex(EState *estate); |
| #ifdef USE_ASSERT_CHECKING |
| extern void SliceLeafMotionStateAreValid(struct MotionState *ms); |
| #endif |
| |
| /* es_rowMarks is a list of these structs: */ |
| typedef struct ExecRowMark |
| { |
| Relation relation; /* opened and RowShareLock'd relation */ |
| Index rti; /* its range table index */ |
| bool forUpdate; /* true = FOR UPDATE, false = FOR SHARE */ |
| bool noWait; /* NOWAIT option */ |
| char resname[32]; /* name for its ctid junk attribute */ |
| } ExecRowMark; |
| |
| |
| /* ---------------------------------------------------------------- |
| * Tuple Hash Tables |
| * |
| * All-in-memory tuple hash tables are used for a number of purposes. |
| * ---------------------------------------------------------------- |
| */ |
| typedef struct TupleHashEntryData *TupleHashEntry; |
| typedef struct TupleHashTableData *TupleHashTable; |
| |
| typedef struct TupleHashEntryData |
| { |
| /* firstTuple must be the first field in this struct! */ |
| struct MemTupleData *firstTuple; /* copy of first tuple in this group */ |
| /* there may be additional data beyond the end of this struct */ |
| } TupleHashEntryData; /* VARIABLE LENGTH STRUCT */ |
| |
| typedef struct TupleHashTableData |
| { |
| HTAB *hashtab; /* underlying dynahash table */ |
| int numCols; /* number of columns in lookup key */ |
| AttrNumber *keyColIdx; /* attr numbers of key columns */ |
| FmgrInfo *eqfunctions; /* lookup data for comparison functions */ |
| FmgrInfo *hashfunctions; /* lookup data for hash functions */ |
| MemoryContext tablecxt; /* memory context containing table */ |
| MemoryContext tempcxt; /* context for function evaluations */ |
| Size entrysize; /* actual size to make each hash entry */ |
| struct TupleTableSlot *tableslot; /* slot for referencing table entries */ |
| struct TupleTableSlot *inputslot; /* current input tuple's slot */ |
| } TupleHashTableData; |
| |
| typedef HASH_SEQ_STATUS TupleHashIterator; |
| |
| /* |
| * Use InitTupleHashIterator/TermTupleHashIterator for a read/write scan. |
| * Use ResetTupleHashIterator if the table can be frozen (in this case no |
| * explicit scan termination is needed). |
| */ |
| #define InitTupleHashIterator(htable, iter) \ |
| hash_seq_init(iter, (htable)->hashtab) |
| #define TermTupleHashIterator(iter) \ |
| hash_seq_term(iter) |
| #define ResetTupleHashIterator(htable, iter) \ |
| do { \ |
| hash_freeze((htable)->hashtab); \ |
| hash_seq_init(iter, (htable)->hashtab); \ |
| } while (0) |
| #define ScanTupleHashTable(iter) \ |
| ((TupleHashEntry) hash_seq_search(iter)) |
| |
| /* Abstraction of different memory management calls */ |
| typedef struct MemoryManagerContainer |
| { |
| void *manager; /* memory manager instance */ |
| void *(*alloc)(void *manager, Size len); |
| void (*free)(void *manager, void *pointer); |
| /* |
| * If existing space is too small, the realloced space is how many |
| * times of the existing one. |
| */ |
| int realloc_ratio; |
| } MemoryManagerContainer; |
| |
| static inline void *cxt_alloc(void *manager, Size len) |
| { |
| return MemoryContextAlloc((MemoryContext)manager, len); |
| } |
| |
| static inline void cxt_free(void *manager, void *pointer) |
| { |
| UnusedArg(manager); |
| if (pointer != NULL) |
| pfree(pointer); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * Expression State Trees |
| * |
| * Each executable expression tree has a parallel ExprState tree. |
| * |
| * Unlike PlanState, there is not an exact one-for-one correspondence between |
| * ExprState node types and Expr node types. Many Expr node types have no |
| * need for node-type-specific run-time state, and so they can use plain |
| * ExprState or GenericExprState as their associated ExprState node type. |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* ---------------- |
| * ExprState node |
| * |
| * ExprState is the common superclass for all ExprState-type nodes. |
| * |
| * It can also be instantiated directly for leaf Expr nodes that need no |
| * local run-time state (such as Var, Const, or Param). |
| * |
| * To save on dispatch overhead, each ExprState node contains a function |
| * pointer to the routine to execute to evaluate the node. |
| * ---------------- |
| */ |
| |
| typedef struct ExprState ExprState; |
| |
| typedef Datum (*ExprStateEvalFunc) (ExprState *expression, |
| ExprContext *econtext, |
| bool *isNull, |
| ExprDoneCond *isDone); |
| |
| struct ExprState |
| { |
| NodeTag type; |
| Expr *expr; /* associated Expr node */ |
| ExprStateEvalFunc evalfunc; /* routine to run to execute node */ |
| }; |
| |
| /* ---------------- |
| * GenericExprState node |
| * |
| * This is used for Expr node types that need no local run-time state, |
| * but have one child Expr node. |
| * ---------------- |
| */ |
| typedef struct GenericExprState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* state of my child node */ |
| } GenericExprState; |
| |
| /* ---------------- |
| * AggrefExprState node |
| * ---------------- |
| */ |
| typedef struct AggrefExprState |
| { |
| ExprState xprstate; |
| List *args; /* states of argument expressions */ |
| List *inputTargets; /* combined TargetList */ |
| List *inputSortClauses; /* list of SortClause */ |
| int aggno; /* ID number for agg within its plan node */ |
| } AggrefExprState; |
| |
| /* |
| * ---------------- |
| * GroupingFuncExprState node |
| * ---------------- |
| */ |
| typedef struct GroupingFuncExprState |
| { |
| ExprState xprstate; |
| List *args; |
| int ngrpcols; /* number of unique grouping attributes */ |
| } GroupingFuncExprState; |
| |
| /* ---------------- |
| * WindowRefExprState node |
| * ---------------- |
| */ |
| typedef struct WindowRefExprState |
| { |
| ExprState xprstate; |
| struct WindowState *windowstate; /* reflect parent window state */ |
| List *args; /* states of argument expressions */ |
| bool *argtypbyval; /* pg_type.typbyval for each argument */ |
| int16 *argtyplen; /* pg_type.typlen of each argument */ |
| int refno; /* index in window state's wrxstates list */ |
| int funcno; /* index in window state's func_state array */ |
| // bool isAgg; /* aggregate-derived? */ |
| char winkind; /* pg_window.winkind */ |
| } WindowRefExprState; |
| |
| /* ---------------- |
| * ArrayRefExprState node |
| * |
| * Note: array types can be fixed-length (typlen > 0), but only when the |
| * element type is itself fixed-length. Otherwise they are varlena structures |
| * and have typlen = -1. In any case, an array type is never pass-by-value. |
| * ---------------- |
| */ |
| typedef struct ArrayRefExprState |
| { |
| ExprState xprstate; |
| List *refupperindexpr; /* states for child nodes */ |
| List *reflowerindexpr; |
| ExprState *refexpr; |
| ExprState *refassgnexpr; |
| int16 refattrlength; /* typlen of array type */ |
| int16 refelemlength; /* typlen of the array element type */ |
| bool refelembyval; /* is the element type pass-by-value? */ |
| char refelemalign; /* typalign of the element type */ |
| } ArrayRefExprState; |
| |
| /* ---------------- |
| * FuncExprState node |
| * |
| * Although named for FuncExpr, this is also used for OpExpr, DistinctExpr, |
| * and NullIf nodes; be careful to check what xprstate.expr is actually |
| * pointing at! |
| * ---------------- |
| */ |
| typedef struct FuncExprState |
| { |
| ExprState xprstate; |
| List *args; /* states of argument expressions */ |
| |
| /* |
| * Function manager's lookup info for the target function. If func.fn_oid |
| * is InvalidOid, we haven't initialized it yet (nor any of the following |
| * fields). |
| */ |
| FmgrInfo func; |
| |
| /* |
| * For a set-returning function (SRF) that returns a tuplestore, we |
| * keep the tuplestore here and dole out the result rows one at a time. |
| * The slot holds the row currently being returned. |
| */ |
| struct Tuplestorestate *funcResultStore; |
| struct TupleTableSlot *funcResultSlot; |
| |
| /* |
| * In some cases we need to compute a tuple descriptor for the function's |
| * output. If so, it's stored here. |
| */ |
| TupleDesc funcResultDesc; |
| bool funcReturnsTuple; /* valid when funcResultDesc isn't NULL */ |
| |
| /* |
| * We need to store argument values across calls when evaluating a SRF |
| * that uses value-per-call mode. |
| * |
| * setArgsValid is true when we are evaluating a set-valued function and |
| * we are in the middle of a call series; we want to pass the same |
| * argument values to the function again (and again, until it returns |
| * ExprEndResult). |
| */ |
| bool setArgsValid; |
| |
| /* |
| * Flag to remember whether we found a set-valued argument to the |
| * function. This causes the function result to be a set as well. Valid |
| * only when setArgsValid is true or funcResultStore isn't NULL. |
| */ |
| bool setHasSetArg; /* some argument returns a set */ |
| |
| /* |
| * Flag to remember whether we have registered a shutdown callback for |
| * this FuncExprState. We do so only if funcResultStore or setArgsValid |
| * has been set at least once (since all the callback is for is to release |
| * the tuplestore or clear setArgsValid). |
| */ |
| bool shutdown_reg; /* a shutdown callback is registered */ |
| |
| /* |
| * Current argument data for a set-valued function; contains valid data |
| * only if setArgsValid is true. |
| */ |
| FunctionCallInfoData setArgs; |
| |
| /* Fast Path */ |
| ExprState *fp_arg[2]; |
| Datum fp_datum[2]; |
| bool fp_null[2]; |
| } FuncExprState; |
| |
| /* ---------------- |
| * ScalarArrayOpExprState node |
| * |
| * This is a FuncExprState plus some additional data. |
| * ---------------- |
| */ |
| typedef struct ScalarArrayOpExprState |
| { |
| FuncExprState fxprstate; |
| /* Cached info about array element type */ |
| Oid element_type; |
| int16 typlen; |
| bool typbyval; |
| char typalign; |
| |
| /* Fast path x in ('A', 'B', 'C') */ |
| int fp_n; |
| int *fp_len; |
| Datum *fp_datum; |
| } ScalarArrayOpExprState; |
| |
| /* ---------------- |
| * BoolExprState node |
| * ---------------- |
| */ |
| typedef struct BoolExprState |
| { |
| ExprState xprstate; |
| List *args; /* states of argument expression(s) */ |
| } BoolExprState; |
| |
| /* ---------------- |
| * PartOidExprState node |
| * ---------------- |
| */ |
| typedef struct PartOidExprState |
| { |
| ExprState xprstate; |
| |
| /* accepted leaf PartitionConstraints for current tuple */ |
| struct PartitionConstraints **acceptedLeafPart; |
| } PartOidExprState; |
| |
| /* ---------------- |
| * PartDefaultExprState node |
| * ---------------- |
| */ |
| typedef struct PartDefaultExprState |
| { |
| ExprState xprstate; |
| |
| /* accepted partitions for all levels */ |
| struct PartitionConstraints **levelPartConstraints; |
| } PartDefaultExprState; |
| |
| /* ---------------- |
| * PartBoundExprState node |
| * ---------------- |
| */ |
| typedef struct PartBoundExprState |
| { |
| ExprState xprstate; |
| |
| /* accepted partitions for all levels */ |
| struct PartitionConstraints **levelPartConstraints; |
| } PartBoundExprState; |
| |
| /* ---------------- |
| * PartBoundInclusionExprState node |
| * ---------------- |
| */ |
| typedef struct PartBoundInclusionExprState |
| { |
| ExprState xprstate; |
| |
| /* accepted partitions for all levels */ |
| struct PartitionConstraints **levelPartConstraints; |
| } PartBoundInclusionExprState; |
| |
| /* ---------------- |
| * PartBoundOpenExprState node |
| * ---------------- |
| */ |
| typedef struct PartBoundOpenExprState |
| { |
| ExprState xprstate; |
| |
| /* accepted partitions for all levels */ |
| struct PartitionConstraints **levelPartConstraints; |
| } PartBoundOpenExprState; |
| |
| /* ---------------- |
| * SubPlanState node |
| * ---------------- |
| */ |
| typedef struct SubPlanState |
| { |
| ExprState xprstate; |
| EState *sub_estate; /* subselect plan has its own EState */ |
| struct PlanState *planstate; /* subselect plan's state tree */ |
| ExprState *testexpr; /* state of combining expression */ |
| List *args; /* states of argument expression(s) */ |
| bool needShutdown; /* TRUE = need to shutdown subplan */ |
| |
| struct MemTupleData * curTuple; /* copy of most recent tuple from subplan */ |
| /* these are used when hashing the subselect's output: */ |
| ProjectionInfo *projLeft; /* for projecting lefthand exprs */ |
| ProjectionInfo *projRight; /* for projecting subselect output */ |
| TupleHashTable hashtable; /* hash table for no-nulls subselect rows */ |
| TupleHashTable hashnulls; /* hash table for rows with null(s) */ |
| bool havehashrows; /* TRUE if hashtable is not empty */ |
| bool havenullrows; /* TRUE if hashnulls is not empty */ |
| |
| MemoryContext hashtablecxt; /* memory context containing hash tables */ |
| MemoryContext hashtempcxt; /* temp memory context for hash tables */ |
| |
| ExprContext *innerecontext; /* working context for comparisons */ |
| AttrNumber *keyColIdx; /* control data for hash tables */ |
| FmgrInfo *eqfunctions; /* comparison functions for hash tables */ |
| FmgrInfo *hashfunctions; /* lookup data for hash functions */ |
| struct StringInfoData *cdbextratextbuf; /* to pass text to cdbexplain */ |
| } SubPlanState; |
| |
| /* ---------------- |
| * FieldSelectState node |
| * ---------------- |
| */ |
| typedef struct FieldSelectState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* input expression */ |
| TupleDesc argdesc; /* tupdesc for most recent input */ |
| } FieldSelectState; |
| |
| /* ---------------- |
| * FieldStoreState node |
| * ---------------- |
| */ |
| typedef struct FieldStoreState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* input tuple value */ |
| List *newvals; /* new value(s) for field(s) */ |
| TupleDesc argdesc; /* tupdesc for most recent input */ |
| } FieldStoreState; |
| |
| /* ---------------- |
| * ConvertRowtypeExprState node |
| * ---------------- |
| */ |
| typedef struct ConvertRowtypeExprState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* input tuple value */ |
| TupleDesc indesc; /* tupdesc for source rowtype */ |
| TupleDesc outdesc; /* tupdesc for result rowtype */ |
| AttrNumber *attrMap; /* indexes of input fields, or 0 for null */ |
| Datum *invalues; /* workspace for deconstructing source */ |
| bool *inisnull; |
| Datum *outvalues; /* workspace for constructing result */ |
| bool *outisnull; |
| } ConvertRowtypeExprState; |
| |
| /* ---------------- |
| * CaseExprState node |
| * ---------------- |
| */ |
| typedef struct CaseExprState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* implicit equality comparison argument */ |
| List *args; /* the arguments (list of WHEN clauses) */ |
| ExprState *defresult; /* the default result (ELSE clause) */ |
| } CaseExprState; |
| |
| /* ---------------- |
| * CaseWhenState node |
| * ---------------- |
| */ |
| typedef struct CaseWhenState |
| { |
| ExprState xprstate; |
| ExprState *expr; /* condition expression */ |
| ExprState *result; /* substitution result */ |
| } CaseWhenState; |
| |
| /* ---------------- |
| * ArrayExprState node |
| * |
| * Note: ARRAY[] expressions always produce varlena arrays, never fixed-length |
| * arrays. |
| * ---------------- |
| */ |
| typedef struct ArrayExprState |
| { |
| ExprState xprstate; |
| List *elements; /* states for child nodes */ |
| int16 elemlength; /* typlen of the array element type */ |
| bool elembyval; /* is the element type pass-by-value? */ |
| char elemalign; /* typalign of the element type */ |
| } ArrayExprState; |
| |
| /* ---------------- |
| * RowExprState node |
| * ---------------- |
| */ |
| typedef struct RowExprState |
| { |
| ExprState xprstate; |
| List *args; /* the arguments */ |
| TupleDesc tupdesc; /* descriptor for result tuples */ |
| } RowExprState; |
| |
| /* ---------------- |
| * RowCompareExprState node |
| * ---------------- |
| */ |
| typedef struct RowCompareExprState |
| { |
| ExprState xprstate; |
| List *largs; /* the left-hand input arguments */ |
| List *rargs; /* the right-hand input arguments */ |
| FmgrInfo *funcs; /* array of comparison function info */ |
| } RowCompareExprState; |
| |
| /* ---------------- |
| * CoalesceExprState node |
| * ---------------- |
| */ |
| typedef struct CoalesceExprState |
| { |
| ExprState xprstate; |
| List *args; /* the arguments */ |
| } CoalesceExprState; |
| |
| /* ---------------- |
| * MinMaxExprState node |
| * ---------------- |
| */ |
| typedef struct MinMaxExprState |
| { |
| ExprState xprstate; |
| List *args; /* the arguments */ |
| FmgrInfo cfunc; /* lookup info for comparison func */ |
| } MinMaxExprState; |
| |
| /* ---------------- |
| * NullTestState node |
| * ---------------- |
| */ |
| typedef struct NullTestState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* input expression */ |
| bool argisrow; /* T if input is of a composite type */ |
| /* used only if argisrow: */ |
| TupleDesc argdesc; /* tupdesc for most recent input */ |
| } NullTestState; |
| |
| /* ---------------- |
| * CoerceToDomainState node |
| * ---------------- |
| */ |
| typedef struct CoerceToDomainState |
| { |
| ExprState xprstate; |
| ExprState *arg; /* input expression */ |
| /* Cached list of constraints that need to be checked */ |
| List *constraints; /* list of DomainConstraintState nodes */ |
| } CoerceToDomainState; |
| |
| |
| /* ---------------- |
| * PercentileExprState node |
| * ---------------- |
| */ |
| typedef struct PercentileExprState |
| { |
| ExprState xprstate; |
| List *args; /* states of argument expressions */ |
| List *tlist; /* combined TargetList */ |
| int aggno; /* ID number within its plan node */ |
| } PercentileExprState; |
| |
| /* |
| * DomainConstraintState - one item to check during CoerceToDomain |
| * |
| * Note: this is just a Node, and not an ExprState, because it has no |
| * corresponding Expr to link to. Nonetheless it is part of an ExprState |
| * tree, so we give it a name following the xxxState convention. |
| */ |
| typedef enum DomainConstraintType |
| { |
| DOM_CONSTRAINT_NOTNULL, |
| DOM_CONSTRAINT_CHECK |
| } DomainConstraintType; |
| |
| typedef struct DomainConstraintState |
| { |
| NodeTag type; |
| DomainConstraintType constrainttype; /* constraint type */ |
| char *name; /* name of constraint (for error msgs) */ |
| ExprState *check_expr; /* for CHECK, a boolean expression */ |
| } DomainConstraintState; |
| |
| |
| /* ---------------------------------------------------------------- |
| * Executor State Trees |
| * |
| * An executing query has a PlanState tree paralleling the Plan tree |
| * that describes the plan. |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* ---------------- |
| * PlanState node |
| * |
| * We never actually instantiate any PlanState nodes; this is just the common |
| * abstract superclass for all PlanState-type nodes. |
| * ---------------- |
| */ |
| |
| typedef struct PlanState |
| { |
| NodeTag type; |
| |
| Plan *plan; /* associated Plan node */ |
| |
| EState *state; /* at execution time, state's of individual |
| * nodes point to one EState for the whole |
| * top-level plan */ |
| |
| bool fHadSentGpmon; |
| |
| /* |
| * Common structural data for all Plan types. These links to subsidiary |
| * state trees parallel links in the associated plan tree (except for the |
| * subPlan list, which does not exist in the plan tree). |
| */ |
| List *targetlist; /* target list to be computed at this node */ |
| List *qual; /* implicitly-ANDed qual conditions */ |
| struct PlanState *lefttree; /* input plan tree(s) */ |
| struct PlanState *righttree; |
| List *initPlan; /* Init SubPlanState nodes (un-correlated expr |
| * subselects) */ |
| List *subPlan; /* SubPlanState nodes in my expressions */ |
| |
| /* |
| * State for management of parameter-change-driven rescanning |
| */ |
| Bitmapset *chgParam; /* set of IDs of changed Params */ |
| |
| /* |
| * Indicate whether it is unsafe to eager free the memory used by this node when |
| * this node outputted its last row. |
| * |
| * The unsafe cases are Mark/Restore, Rescan on Material/Sort on top of a Motion. |
| */ |
| bool delayEagerFree; |
| |
| /* |
| * Other run-time state needed by most if not all node types. |
| */ |
| struct TupleTableSlot *ps_OuterTupleSlot; /* slot for current "outer" tuple */ |
| struct TupleTableSlot *ps_ResultTupleSlot; /* slot for my result tuples */ |
| ExprContext *ps_ExprContext; /* node's expression-evaluation context */ |
| ProjectionInfo *ps_ProjInfo; /* info for doing tuple projection */ |
| |
| /* |
| * EXPLAIN ANALYZE statistics collection |
| */ |
| struct Instrumentation *instrument; /* runtime stats for this node */ |
| struct StringInfoData *cdbexplainbuf; /* EXPLAIN ANALYZE report buf */ |
| void (*cdbexplainfun)(struct PlanState *planstate, struct StringInfoData *buf); |
| /* callback before ExecutorEnd */ |
| |
| /* |
| * GpMon packet |
| */ |
| int gpmon_plan_tick; |
| gpmon_packet_t gpmon_pkt; |
| } PlanState; |
| |
| typedef struct Gpmon_NameUnit_MaxVal |
| { |
| char *name; |
| char *unit; |
| int64 maxval; |
| } Gpmon_NameUnit_MaxVal; |
| |
| typedef struct Gpmon_NameVal_Text |
| { |
| char *name; |
| char *value; |
| } Gpmon_NameVal_Text; |
| |
| /* Gpperfmon helper functions defined in execGpmon.h */ |
| extern char *GetScanRelNameGpmon(Oid relid, char schema_table_name[SCAN_REL_NAME_BUF_SIZE]); |
| extern void CheckSendPlanStateGpmonPkt(PlanState *ps); |
| extern void EndPlanStateGpmonPkt(PlanState *ps); |
| extern void InitPlanNodeGpmonPkt(Plan* plan, gpmon_packet_t *gpmon_pkt, EState *estate, |
| PerfmonNodeType type, int64 rowsout_est, |
| char* relname); |
| |
| |
| extern uint64 PlanStateOperatorMemKB(const PlanState *ps); |
| |
| static inline void Gpmon_M_Incr(gpmon_packet_t *pkt, int nth) |
| { |
| ++pkt->u.qexec.measures[nth]; |
| } |
| static inline void Gpmon_M_Incr_Rows_Out(gpmon_packet_t *pkt) |
| { |
| ++pkt->u.qexec.rowsout; |
| } |
| static inline void Gpmon_M_Add_Rows_Out(gpmon_packet_t *pkt, int val) |
| { |
| pkt->u.qexec.rowsout += val; |
| } |
| static inline void Gpmon_M_Add(gpmon_packet_t *pkt, int nth, int val) |
| { |
| pkt->u.qexec.measures[nth] += val; |
| } |
| static inline void Gpmon_M_Set(gpmon_packet_t *pkt, int nth, int64 val) |
| { |
| pkt->u.qexec.measures[nth] = val; |
| } |
| static inline int64 Gpmon_M_Get(gpmon_packet_t *pkt, int nth) |
| { |
| return pkt->u.qexec.measures[nth]; |
| } |
| static inline void Gpmon_M_Reset(gpmon_packet_t *pkt, int nth) |
| { |
| pkt->u.qexec.measures[nth] = 0; |
| } |
| |
| /* ---------------- |
| * these are are defined to avoid confusion problems with "left" |
| * and "right" and "inner" and "outer". The convention is that |
| * the "left" plan is the "outer" plan and the "right" plan is |
| * the inner plan, but these make the code more readable. |
| * ---------------- |
| */ |
| #define innerPlanState(node) (((PlanState *)(node))->righttree) |
| #define outerPlanState(node) (((PlanState *)(node))->lefttree) |
| |
| |
| /* ---------------- |
| * ResultState information |
| * ---------------- |
| */ |
| typedef struct ResultState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| ExprState *resconstantqual; |
| bool inputFullyConsumed; /* are we done? */ |
| bool rs_checkqual; /* do we need to check the qual? */ |
| bool isSRF;/* state flag for processing set-valued |
| * functions in targetlist */ |
| ExprDoneCond lastSRFCond; /* Applicable only if isSRF is true. Represents the last done flag */ |
| } ResultState; |
| |
| /* ---------------- |
| * RepeatState information |
| * ---------------- |
| */ |
| typedef struct RepeatState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| |
| bool repeat_done; /* are we done? */ |
| struct TupleTableSlot *slot; /* The current tuple */ |
| int repeat_count; /* The number of repeats for the current tuple */ |
| ExprState *expr_state; /* The state to evaluate the expression */ |
| } RepeatState; |
| |
| /* ---------------- |
| * AppendState information |
| * |
| * nplans how many plans are in the list |
| * whichplan which plan is being executed (0 .. n-1) |
| * firstplan first plan to execute (usually 0) |
| * lastplan last plan to execute (usually n-1) |
| * ---------------- |
| */ |
| typedef struct AppendState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| PlanState **appendplans; /* array of PlanStates for my inputs */ |
| int eflags; /* used to initialize each subplan */ |
| int as_nplans; |
| int as_whichplan; |
| int as_firstplan; |
| int as_lastplan; |
| } AppendState; |
| |
| /* |
| * SequenceState |
| */ |
| typedef struct SequenceState |
| { |
| PlanState ps; |
| PlanState **subplans; |
| int numSubplans; |
| |
| /* |
| * True if no subplan has been executed. |
| */ |
| bool initState; |
| } SequenceState; |
| |
| /* ---------------- |
| * BitmapAndState information |
| * ---------------- |
| */ |
| typedef struct BitmapAndState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| PlanState **bitmapplans; /* array of PlanStates for my inputs */ |
| int nplans; /* number of input plans */ |
| Node *bitmap; /* output stream bitmap */ |
| } BitmapAndState; |
| |
| /* ---------------- |
| * BitmapOrState information |
| * ---------------- |
| */ |
| typedef struct BitmapOrState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| PlanState **bitmapplans; /* array of PlanStates for my inputs */ |
| int nplans; /* number of input plans */ |
| Node *bitmap; /* output bitmap */ |
| } BitmapOrState; |
| |
| /* ---------------------------------------------------------------- |
| * Scan State Information |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* What stage the scan node is currently |
| * |
| * SCAN_INIT: we are initializing the scan state |
| * SCAN_FIRST: part of the initialization is done and we are |
| * ready to scan the first relation of possibly multiple |
| * relations, if it is a dynamic scan. |
| * SCAN_SCAN: all initializations for reading tuples are done |
| * and we are either reading tuples, or ready to read tuples |
| * SCAN_MARKPOS: we have marked a position in the scan state |
| * SCAN_NEXT: we are done with the current relation and waiting |
| * for the next relation (if multi-partition) |
| * SCAN_DONE: we are done with all relations/partitions, but |
| * the scan state is still valid for a ReScan (i.e., we |
| * haven't destroyed our scan state yet) |
| * SCAN_END: we are completely done. We cannot ReScan, without |
| * redoing the whole initialization phase again. |
| */ |
| enum { |
| SCAN_INIT = 0, |
| SCAN_FIRST = 1, |
| SCAN_SCAN = 2, |
| SCAN_MARKPOS = 4, |
| SCAN_NEXT = 8, |
| SCAN_DONE = 16, |
| SCAN_RESCAN = 32, |
| SCAN_END = 64, |
| }; |
| |
| /* |
| * TableType |
| * Enum for different types of tables. |
| */ |
| typedef enum |
| { |
| TableTypeHeap, |
| TableTypeAppendOnly, |
| TableTypeParquet, |
| TableTypeInvalid, |
| } TableType; |
| |
| /* ---------------- |
| * ScanState information |
| * |
| * ScanState extends PlanState for node types that represent |
| * scans of an underlying relation. It can also be used for nodes |
| * that scan the output of an underlying plan node --- in that case, |
| * only ScanTupleSlot is actually useful, and it refers to the tuple |
| * retrieved from the subplan. |
| * |
| * currentRelation relation being scanned (NULL if none) |
| * ScanTupleSlot pointer to slot in tuple table holding scan tuple |
| * scan_state the stage of scanning |
| * tableType the table type of the target relation |
| * ---------------- |
| */ |
| typedef struct ScanState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| Relation ss_currentRelation; |
| struct HeapScanDescData * ss_currentScanDesc; |
| struct TupleTableSlot *ss_ScanTupleSlot; |
| List *splits; |
| int scan_state; |
| |
| /* The type of the table that is being scanned */ |
| TableType tableType; |
| |
| } ScanState; |
| |
| /* |
| * SeqScanOpaqueData |
| * Additional state data (in addition to ScanState) for scanning heap table. |
| */ |
| typedef struct SeqScanOpaqueData |
| { |
| struct HeapScanDescData * ss_currentScanDesc; |
| |
| struct { |
| HeapTupleData item[512]; |
| int bot, top; |
| HeapTuple last; |
| int seen_EOS; |
| } ss_heapTupleData; |
| |
| } SeqScanOpaqueData; |
| |
| /* |
| * SeqScanState |
| * State data for scanning heap table. |
| */ |
| typedef struct SeqScanState |
| { |
| ScanState ss; |
| SeqScanOpaqueData *opaque; |
| } SeqScanState; |
| |
| /* |
| * These structs store information about index quals that don't have simple |
| * constant right-hand sides. See comments for ExecIndexBuildScanKeys() |
| * for discussion. |
| */ |
| typedef struct |
| { |
| ScanKey scan_key; /* scankey to put value into */ |
| ExprState *key_expr; /* expr to evaluate to get value */ |
| } IndexRuntimeKeyInfo; |
| |
| typedef struct |
| { |
| ScanKey scan_key; /* scankey to put value into */ |
| ExprState *array_expr; /* expr to evaluate to get array value */ |
| int next_elem; /* next array element to use */ |
| int num_elems; /* number of elems in current array value */ |
| Datum *elem_values; /* array of num_elems Datums */ |
| bool *elem_nulls; /* array of num_elems is-null flags */ |
| } IndexArrayKeyInfo; |
| |
| /* ---------------- |
| * IndexScanState information |
| * |
| * indexqualorig execution state for indexqualorig expressions |
| * ScanKeys Skey structures to scan index rel |
| * NumScanKeys number of Skey structs |
| * RuntimeKeys info about Skeys that must be evaluated at runtime |
| * NumRuntimeKeys number of RuntimeKeys structs |
| * RuntimeKeysReady true if runtime Skeys have been computed |
| * RuntimeContext expr context for evaling runtime Skeys |
| * RelationDesc index relation descriptor |
| * ScanDesc index scan descriptor |
| * ---------------- |
| */ |
| typedef struct IndexScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| List *indexqualorig; |
| ScanKey iss_ScanKeys; |
| int iss_NumScanKeys; |
| IndexRuntimeKeyInfo *iss_RuntimeKeys; |
| int iss_NumRuntimeKeys; |
| IndexArrayKeyInfo *iss_ArrayKeys; |
| int iss_NumArrayKeys; |
| bool iss_RuntimeKeysReady; |
| ExprContext *iss_RuntimeContext; |
| Relation iss_RelationDesc; |
| struct IndexScanDescData *iss_ScanDesc; |
| |
| /* |
| * tableOid is the oid of the partition or relation on which |
| * our current index relation is defined. |
| */ |
| Oid tableOid; |
| } IndexScanState; |
| |
| /* |
| * DynamicIndexScanState |
| */ |
| typedef struct DynamicIndexScanState |
| { |
| IndexScanState indexScanState; |
| |
| /* |
| * Partition id index that mantains all unique partition ids for the |
| * DynamicIndexScan. |
| */ |
| HTAB *pidxIndex; |
| |
| /* |
| * Status of the part to retrieve (result of the sequential search in a hash table). |
| */ |
| HASH_SEQ_STATUS pidxStatus; |
| |
| /* Like DynamicTableScanState, this flag is required to handle error condition. |
| * This flag prevent ExecEndDynamicIndexScan from calling hash_seq_term() or |
| * a NULL hash table. */ |
| bool shouldCallHashSeqTerm; |
| |
| /* |
| * We will create a new copy of logicalIndexInfo in this memory context for |
| * each partition. This memory context will be reset per-partition to free |
| * up previous partition's logicalIndexInfo memory |
| */ |
| MemoryContext partitionMemoryContext; |
| |
| /* The partition oid for which the current varnos are mapped */ |
| Oid columnLayoutOid; |
| } DynamicIndexScanState; |
| |
| |
| /* ---------------- |
| * BitmapIndexScanState information |
| * ---------------- |
| */ |
| typedef struct BitmapIndexScanState |
| { |
| IndexScanState indexScanState; /* pseudo inheritance */ |
| Node *bitmap; /* output bitmap */ |
| } BitmapIndexScanState; |
| |
| /* ---------------- |
| * BitmapHeapScanState information |
| * |
| * bitmapqualorig execution state for bitmapqualorig expressions |
| * tbm bitmap obtained from child index scan(s) |
| * tbmres current-page data |
| * ---------------- |
| */ |
| typedef struct BitmapHeapScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| struct HeapScanDescData * ss_currentScanDesc; |
| List *bitmapqualorig; |
| Node *tbm; |
| struct TBMIterateResult *tbmres; |
| } BitmapHeapScanState; |
| |
| /* ---------------- |
| * BitmapAppendOnlyScanState information |
| * |
| * bitmapqualorig execution state for bitmapqualorig expressions |
| * tbm bitmap obtained from child index scan(s) |
| * tbmres current-page data |
| * ---------------- |
| */ |
| typedef struct BitmapAppendOnlyScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| |
| struct AppendOnlyFetchDescData *baos_currentAOFetchDesc; |
| List *baos_bitmapqualorig; |
| Node *baos_tbm; |
| struct TBMIterateResult *baos_tbmres; |
| bool baos_gotpage; |
| int baos_cindex; |
| bool baos_lossy; |
| int baos_ntuples; |
| bool isAORow; /* If this is for AO Row tables. */ |
| } BitmapAppendOnlyScanState; |
| |
| /* ---------------- |
| * BitmapTableScanState information |
| * |
| * scanDesc an opaque (scan method dependent) scan descriptor |
| * bitmapqualorig execution state for bitmapqualorig expressions |
| * tbm bitmap obtained from child index scan(s) |
| * tbmres current bitmap-page data |
| * isLossyBitmapPage is the current bitmap-page lossy? |
| * recheckTuples should the tuples be rechecked for eligibility because of visibility issues |
| * needNewBitmapPage are we done with current bitmap page and therefore need a new one? |
| * iterator an opaque iterator object to iterate a bitmap page and the corresponding table data |
| * ---------------- |
| */ |
| typedef struct BitmapTableScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| |
| void *scanDesc; |
| List *bitmapqualorig; |
| Node *tbm; |
| struct TBMIterateResult *tbmres; |
| bool isLossyBitmapPage; |
| bool recheckTuples; |
| bool needNewBitmapPage; |
| void *iterator; |
| } BitmapTableScanState; |
| |
| /* ---------------- |
| * TidScanState information |
| * |
| * NumTids number of tids in this scan |
| * TidPtr index of currently fetched tid |
| * TidList evaluated item pointers (array of size NumTids) |
| * ---------------- |
| */ |
| typedef struct TidScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| List *tss_tidquals; /* list of ExprState nodes */ |
| int tss_NumTids; |
| int tss_TidPtr; |
| int tss_MarkTidPtr; |
| ItemPointerData *tss_TidList; |
| HeapTupleData tss_htup; |
| } TidScanState; |
| |
| /* ---------------- |
| * SubqueryScanState information |
| * |
| * SubqueryScanState is used for scanning a sub-query in the range table. |
| * The sub-query will have its own EState, which we save here. |
| * ScanTupleSlot references the current output tuple of the sub-query. |
| * |
| * SubEState exec state for sub-query |
| * ---------------- |
| */ |
| typedef struct SubqueryScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| PlanState *subplan; |
| EState *sss_SubEState; |
| bool cdb_want_ctid; /* true => ctid is referenced in targetlist */ |
| ItemPointerData cdb_fake_ctid; |
| } SubqueryScanState; |
| |
| /* ---------------- |
| * FunctionScanState information |
| * |
| * Function nodes are used to scan the results of a |
| * function appearing in FROM (typically a function returning set). |
| * |
| * tupdesc expected return tuple description |
| * tuplestorestate private state of tuplestore.c |
| * funcexpr state for function expression being evaluated |
| * cdb_want_ctid true => ctid is referenced in targetlist |
| * cdb_fake_ctid |
| * cdb_mark_ctid |
| * ---------------- |
| */ |
| typedef struct FunctionScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| TupleDesc tupdesc; |
| struct Tuplestorestate *tuplestorestate; |
| ExprState *funcexpr; |
| bool cdb_want_ctid; |
| ItemPointerData cdb_fake_ctid; |
| ItemPointerData cdb_mark_ctid; |
| } FunctionScanState; |
| |
| |
| /* ---------------- |
| * TableFunctionState information |
| * |
| * Table Function nodes are used to scan the results of a table function |
| * operating over a table as input. |
| * ---------------- |
| */ |
| typedef struct TableFunctionState |
| { |
| ScanState ss; /* Table Function is a Scan */ |
| struct AnyTableData *inputscan; /* subquery scan data */ |
| TupleDesc resultdesc; /* Function Result descriptor */ |
| HeapTupleData tuple; /* Returned tuple */ |
| FuncExprState *fcache; /* Function Call Cache */ |
| FunctionCallInfoData fcinfo; /* Function Call Context */ |
| ReturnSetInfo rsinfo; /* Resultset Context */ |
| bool is_rowtype; /* Function returns records */ |
| bool is_firstcall; |
| bytea *userdata; /* bytea given by describe func */ |
| } TableFunctionState; |
| |
| |
| /* ---------------- |
| * ValuesScanState information |
| * |
| * ValuesScan nodes are used to scan the results of a VALUES list |
| * |
| * rowcontext per-expression-list context |
| * exprlists array of expression lists being evaluated |
| * array_len size of array |
| * curr_idx current array index (0-based) |
| * marked_idx marked position (for mark/restore) |
| * |
| * Note: ss.ps.ps_ExprContext is used to evaluate any qual or projection |
| * expressions attached to the node. We create a second ExprContext, |
| * rowcontext, in which to build the executor expression state for each |
| * Values sublist. Resetting this context lets us get rid of expression |
| * state for each row, avoiding major memory leakage over a long values list. |
| * ---------------- |
| */ |
| typedef struct ValuesScanState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| ExprContext *rowcontext; |
| List **exprlists; |
| int array_len; |
| int curr_idx; |
| int marked_idx; |
| bool cdb_want_ctid; /* true => ctid is referenced in targetlist */ |
| } ValuesScanState; |
| |
| /* ---------------- |
| * ExternalScanState information |
| * |
| * ExternalScan nodes are used to scan external tables |
| * |
| * ess_ScanDesc the state of the file data scan |
| * ---------------- |
| */ |
| typedef struct ExternalScanState |
| { |
| ScanState ss; |
| struct FileScanDescData *ess_ScanDesc; |
| bool cdb_want_ctid; |
| ItemPointerData cdb_fake_ctid; |
| } ExternalScanState; |
| |
| /* ---------------- |
| * AppendOnlyScanState information |
| * |
| * AppendOnlyScan nodes are used to scan append only tables |
| * |
| * aos_ScanDesc is the additional data that is needed for scanning |
| * AppendOnly table. |
| * ---------------- |
| */ |
| typedef struct AppendOnlyScanState |
| { |
| ScanState ss; |
| struct AppendOnlyScanDescData *aos_ScanDesc; |
| } AppendOnlyScanState; |
| |
| /* |
| * ParquetScanOpaqueData |
| * Additional data (in addition to ScanState) for scanning parquet |
| * table. |
| */ |
| typedef struct ParquetScanOpaqueData |
| { |
| /* |
| * The array to indicate columns that are involved in the scan. |
| */ |
| bool *proj; |
| int ncol; |
| struct ParquetScanDescData *scandesc; |
| } ParquetScanOpaqueData; |
| |
| /* ----------------------------------------------- |
| * ParquetScanState, need modify for parquet special |
| * ----------------------------------------------- |
| */ |
| typedef struct ParquetScanState |
| { |
| ScanState ss; |
| ParquetScanOpaqueData *opaque; |
| } ParquetScanState; |
| |
| /* |
| * TableScanState |
| * Encapsulate the scan state for different table type. |
| * |
| * During execution, the 'opaque' is mapped to different XXXOpaqueData |
| * for different table type. |
| */ |
| typedef struct TableScanState |
| { |
| ScanState ss; |
| |
| /* |
| * Opaque data that is associated with different table type. |
| */ |
| void *opaque; |
| |
| } TableScanState; |
| |
| /* |
| * DynamicTableScanState |
| */ |
| typedef struct DynamicTableScanState |
| { |
| TableScanState tableScanState; |
| |
| /* |
| * Pid index that maintains all unique partition pids for this dynamic |
| * table scan to scan. |
| */ |
| HTAB *pidIndex; |
| |
| /* |
| * The status of sequentially scan the pid index. |
| */ |
| HASH_SEQ_STATUS pidStatus; |
| |
| /* |
| * Should we call hash_seq_term()? This is required |
| * to handle error condition, where we are required to explicitly |
| * call hash_seq_term(). Also, if we don't have any partition, this |
| * flag should prevent ExecEndDynamicTableScan from calling |
| * hash_seq_term() on a NULL hash table. |
| */ |
| bool shouldCallHashSeqTerm; |
| |
| /* |
| * The first partition requires initialization of expression states, |
| * such as qual and targetlist, regardless of whether we need to re-map varattno |
| */ |
| bool firstPartition; |
| /* |
| * lastRelOid is the last relation that corresponds to the |
| * varattno mapping of qual and target list. Each time we open a new partition, we will |
| * compare the last relation with current relation by using varattnos_map() |
| * and then convert the varattno to the new varattno |
| */ |
| Oid lastRelOid; |
| |
| /* |
| * scanrelid is the RTE index for this scan node. It will be used to select |
| * varno whose varattno will be remapped, if necessary |
| */ |
| Index scanrelid; |
| |
| /* |
| * This memory context will be reset per-partition to free |
| * up previous partition's memory |
| */ |
| MemoryContext partitionMemoryContext; |
| |
| |
| } DynamicTableScanState; |
| |
| /* ---------------------------------------------------------------- |
| * Join State Information |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* ---------------- |
| * JoinState information |
| * |
| * Superclass for state nodes of join plans. |
| * ---------------- |
| */ |
| typedef struct JoinState |
| { |
| PlanState ps; |
| JoinType jointype; |
| List *joinqual; /* JOIN quals (in addition to ps.qual) */ |
| } JoinState; |
| |
| /* ---------------- |
| * NestLoopState information |
| * |
| * NeedNewOuter true if need new outer tuple on next call |
| * MatchedOuter true if found a join match for current outer tuple |
| * NullInnerTupleSlot prepared null tuple for left outer joins |
| * ---------------- |
| */ |
| typedef struct NestLoopState |
| { |
| JoinState js; /* its first field is NodeTag */ |
| bool nl_NeedNewOuter; |
| bool nl_MatchedOuter; |
| bool nl_innerSquelchNeeded; /*CDB*/ |
| bool nl_QuitIfEmptyInner; /*CDB*/ |
| bool shared_outer; |
| bool prefetch_inner; |
| bool reset_inner; /*CDB-OLAP*/ |
| bool require_inner_reset; /*CDB-OLAP*/ |
| |
| struct TupleTableSlot *nl_NullInnerTupleSlot; |
| |
| List *nl_InnerJoinKeys; /* list of ExprState nodes */ |
| List *nl_OuterJoinKeys; /* list of ExprState nodes */ |
| bool nl_innerSideScanned; /* set to true once we've scanned all inner tuples the first time */ |
| bool nl_qualResultForNull; /* the value of the join condition when one of the sides contains a NULL */ |
| |
| } NestLoopState; |
| |
| /* ---------------- |
| * MergeJoinState information |
| * |
| * NumClauses number of mergejoinable join clauses |
| * Clauses info for each mergejoinable clause |
| * JoinState current "state" of join. see execdefs.h |
| * FillOuter true if should emit unjoined outer tuples anyway |
| * FillInner true if should emit unjoined inner tuples anyway |
| * MatchedOuter true if found a join match for current outer tuple |
| * MatchedInner true if found a join match for current inner tuple |
| * OuterTupleSlot slot in tuple table for cur outer tuple |
| * InnerTupleSlot slot in tuple table for cur inner tuple |
| * MarkedTupleSlot slot in tuple table for marked tuple |
| * NullOuterTupleSlot prepared null tuple for right outer joins |
| * NullInnerTupleSlot prepared null tuple for left outer joins |
| * OuterEContext workspace for computing outer tuple's join values |
| * InnerEContext workspace for computing inner tuple's join values |
| * ---------------- |
| */ |
| /* private in nodeMergejoin.c: */ |
| typedef struct MergeJoinClauseData *MergeJoinClause; |
| |
| typedef struct MergeJoinState |
| { |
| JoinState js; /* its first field is NodeTag */ |
| int mj_NumClauses; |
| MergeJoinClause mj_Clauses; /* array of length mj_NumClauses */ |
| int mj_JoinState; |
| bool mj_FillOuter; |
| bool mj_FillInner; |
| bool mj_MatchedOuter; |
| bool mj_MatchedInner; |
| struct TupleTableSlot *mj_OuterTupleSlot; |
| struct TupleTableSlot *mj_InnerTupleSlot; |
| struct TupleTableSlot *mj_MarkedTupleSlot; |
| struct TupleTableSlot *mj_NullOuterTupleSlot; |
| struct TupleTableSlot *mj_NullInnerTupleSlot; |
| ExprContext *mj_OuterEContext; |
| ExprContext *mj_InnerEContext; |
| bool prefetch_inner; /* MPP-3300 */ |
| bool mj_squelchInner; /* MPP-3300 */ |
| } MergeJoinState; |
| |
| /* ---------------- |
| * HashJoinState information |
| * |
| * hj_HashTable hash table for the hashjoin |
| * (NULL if table not built yet) |
| * hj_CurHashValue hash value for current outer tuple |
| * hj_CurBucketNo bucket# for current outer tuple |
| * hj_CurTuple last inner tuple matched to current outer |
| * tuple, or NULL if starting search |
| * (CurHashValue, CurBucketNo and CurTuple are |
| * undefined if OuterTupleSlot is empty!) |
| * hj_OuterHashKeys the outer hash keys in the hashjoin condition |
| * hj_InnerHashKeys the inner hash keys in the hashjoin condition |
| * hj_HashOperators the join operators in the hashjoin condition |
| * hj_OuterTupleSlot tuple slot for outer tuples |
| * hj_HashTupleSlot tuple slot for hashed tuples |
| * hj_NullInnerTupleSlot prepared null tuple for left outer joins |
| * hj_FirstOuterTupleSlot first tuple retrieved from outer plan |
| * hj_NeedNewOuter true if need new outer tuple on next call |
| * hj_MatchedOuter true if found a join match for current outer |
| * hj_OuterNotEmpty true if outer relation known not empty |
| * hj_nonequijoin true to force hash table to keep nulls |
| * ---------------- |
| */ |
| |
| /* these structs are defined in executor/hashjoin.h: */ |
| typedef struct HashJoinTupleData *HashJoinTuple; |
| typedef struct HashJoinTableData *HashJoinTable; |
| |
| typedef struct HashJoinState |
| { |
| JoinState js; /* its first field is NodeTag */ |
| List *hashclauses; /* list of ExprState nodes (hash) */ |
| List *hashqualclauses; /* CDB: list of ExprState nodes (match) */ |
| HashJoinTable hj_HashTable; |
| uint32 hj_CurHashValue; |
| int hj_CurBucketNo; |
| HashJoinTuple hj_CurTuple; |
| List *hj_OuterHashKeys; /* list of ExprState nodes */ |
| List *hj_InnerHashKeys; /* list of ExprState nodes */ |
| List *hj_HashOperators; /* list of operator OIDs */ |
| struct TupleTableSlot *hj_OuterTupleSlot; |
| struct TupleTableSlot *hj_HashTupleSlot; |
| struct TupleTableSlot *hj_NullInnerTupleSlot; |
| struct TupleTableSlot *hj_FirstOuterTupleSlot; |
| bool hj_NeedNewOuter; |
| bool hj_MatchedOuter; |
| bool hj_OuterNotEmpty; |
| bool hj_InnerEmpty; /* set to true if inner side is empty */ |
| bool prefetch_inner; |
| bool hj_nonequijoin; |
| |
| /* true if found matching and usable cached workfiles */ |
| bool cached_workfiles_found; |
| /* set after loading nbatch and nbuckets from cached workfile */ |
| bool cached_workfiles_batches_buckets_loaded; |
| /* set after loading cached workfiles */ |
| bool cached_workfiles_loaded; |
| /* set if the operator created workfiles */ |
| bool workfiles_created; |
| /* number of batches when we loaded from the state. -1 means not loaded yet */ |
| int nbatch_loaded_state; |
| |
| } HashJoinState; |
| |
| |
| /* ---------------------------------------------------------------- |
| * Materialization State Information |
| * ---------------------------------------------------------------- |
| */ |
| |
| /* ---------------- |
| * Generic tuplestore structure |
| * used to communicate between ShareInputScan nodes, |
| * Materialize and Sort |
| * |
| * ---------------- |
| */ |
| typedef union GenericTupStore |
| { |
| struct NTupleStore *matstore; /* Used by Materialize */ |
| struct Tuplesortstate_mk *sortstore_mk; /* Used by Sort when gp_enable_mk_sort = true */ |
| struct Tuplesortstate *sortstore; /* Used by Sort when gp_enable_mk_sort = false */ |
| } GenericTupStore; |
| |
| /* ---------------- |
| * MaterialState information |
| * |
| * materialize nodes are used to materialize the results |
| * of a subplan into a temporary file. |
| * |
| * ss.ss_ScanTupleSlot refers to output of underlying plan. |
| * ---------------- |
| */ |
| typedef struct MaterialState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| bool randomAccess; /* need random access to subplan output? */ |
| bool eof_underlying; /* reached end of underlying plan? */ |
| bool ts_destroyed; /* called destroy tuple store? */ |
| |
| GenericTupStore *ts_state; /* private state of tuplestore.c */ |
| void *ts_pos; |
| void *ts_markpos; |
| void *share_lk_ctxt; |
| |
| bool cached_workfiles_found; /* true if found matching and usable cached workfiles */ |
| } MaterialState; |
| |
| /* ---------------- |
| * ShareInputScanState information |
| * |
| * State of each scanner of the ShareInput node |
| * ---------------- |
| */ |
| typedef struct ShareInputScanState |
| { |
| ScanState ss; |
| /* |
| * Depends on share_type, we should have a tuplestore_state, tuplestore_pos |
| * or tuplesort_state, tuplesort_pos |
| */ |
| GenericTupStore *ts_state; |
| void *ts_pos; |
| void *ts_markpos; |
| |
| void *share_lk_ctxt; |
| bool freed; /* is this node already freed? */ |
| } ShareInputScanState; |
| |
| /* XXX Should move into buf file */ |
| extern void *shareinput_reader_waitready(int share_id, PlanGenerator planGen); |
| extern void *shareinput_writer_notifyready(int share_id, int nsharer_xslice_notify_ready, PlanGenerator planGen); |
| extern void shareinput_reader_notifydone(void *, int share_id); |
| extern void shareinput_writer_waitdone(void *, int share_id, int nsharer_xslice_wait_done); |
| extern void shareinput_create_bufname_prefix(char* p, int size, int share_id); |
| |
| /* ---------------- |
| * SortState information |
| * ---------------- |
| */ |
| typedef struct SortState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| bool randomAccess; /* need random access to sort output? */ |
| bool sort_Done; /* sort completed yet? */ |
| GenericTupStore *tuplesortstate; /* private state of tuplesort.c */ |
| /* CDB */ /* limit state */ |
| ExprState *limitOffset; /* OFFSET parameter, or NULL if none */ |
| ExprState *limitCount; /* COUNT parameter, or NULL if none */ |
| bool noduplicates; /* true if discard duplicate rows */ |
| |
| void *share_lk_ctxt; |
| |
| bool cached_workfiles_found; /* true if found matching and usable cached workfiles */ |
| bool cached_workfiles_loaded; /* set after loading cached workfiles */ |
| } SortState; |
| |
| /* --------------------- |
| * AggState information |
| * |
| * ss.ss_ScanTupleSlot refers to output of underlying plan. |
| * |
| * Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and |
| * ecxt_aggnulls arrays, which hold the computed agg values for the current |
| * input group during evaluation of an Agg node's output tuple(s). We |
| * create a second ExprContext, tmpcontext, in which to evaluate input |
| * expressions and run the aggregate transition functions. |
| * ------------------------- |
| */ |
| /* these structs are private in nodeAgg.c: */ |
| typedef struct AggStatePerAggData *AggStatePerAgg; |
| typedef struct AggStatePerGroupData *AggStatePerGroup; |
| |
| /* |
| * There are four different types of Agg nodes: |
| * (1) Scalar (Plain) Agg: Inputs are read in and aggregated into a single value. This Agg |
| * always returns a single value, even when there are no inputs at all. |
| * (2) Ordinary Grouping Agg node: Inputs come in as groups. Each group is aggregated. |
| * This Agg will handle the ordinary grouping and first stage of rollup Agg. |
| * (3) Intermediate Rollup Agg node: There are two different inputs: |
| * (a) Inputs that just need to be pass-through. These tuples are coming from |
| * 2+ level downstream of rollup Aggs, and do not need to be aggregated. |
| * (b) Inputs that need to be aggregated as groups. These tuples also need to |
| * be pass-through. |
| * (4) Final Rollup Agg node: This is similar to (3), except that the pass-through |
| * tuples need to be finalized. |
| */ |
| typedef enum AggregateType |
| { |
| AggTypeScalar, |
| AggTypeGroup, |
| AggTypeIntermediateRollup, |
| AggTypeFinalRollup |
| } AggregateType; |
| |
| |
| typedef struct AggState |
| { |
| ScanState ss; /* its first field is NodeTag */ |
| List *aggs; /* all Aggref nodes in targetlist & quals */ |
| int numaggs; /* length of list (could be zero!) */ |
| FmgrInfo *eqfunctions; /* per-grouping-field equality fns */ |
| FmgrInfo *hashfunctions; /* per-grouping-field hash fns */ |
| AggStatePerAgg peragg; /* per-Aggref information */ |
| MemoryContext aggcontext; /* memory context for long-lived data */ |
| ExprContext *tmpcontext; /* econtext for input expressions */ |
| bool agg_done; /* indicates completion of Agg scan */ |
| |
| /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */ |
| AggStatePerGroup pergroup; /* per-Aggref-per-group working state */ |
| struct MemTupleData * grp_firstTuple; /* copy of first tuple of current group */ |
| /* these fields are used in AGG_HASHED mode: */ |
| TupleHashTable hashtable; /* hash table with one entry per group */ |
| struct TupleTableSlot *hashslot; /* slot for loading hash table */ |
| List *hash_needed; /* list of columns needed in hash table */ |
| TupleHashIterator hashiter; /* for iterating through hash table */ |
| |
| /* MPP */ |
| struct HashAggTable *hhashtable; |
| MemoryManagerContainer mem_manager; |
| |
| AggregateType aggType; |
| |
| /* ROLLUP */ |
| AggStatePerGroup perpassthru; /* per-Aggref-per-pass-through-tuple working state */ |
| |
| /* |
| * The following are used to define how to modify input tuples to |
| * satisfy the rollup level of this Agg node. |
| */ |
| int num_attrs; /* number of grouping attributes for the Agg node */ |
| Datum *replValues; |
| bool *replIsnull; |
| bool *doReplace; |
| List *percs; /* all PercentileExpr nodes in targetlist & quals */ |
| |
| /* true if found matching and usable cached workfiles */ |
| bool cached_workfiles_found; |
| /* set after loading cached workfiles */ |
| bool cached_workfiles_loaded; |
| /* set if the operator created workfiles */ |
| bool workfiles_created; |
| |
| } AggState; |
| |
| |
| /* --------------------- |
| * WindowState information |
| * ------------------------- |
| */ |
| typedef struct WindowStatePerLevelData *WindowStatePerLevel; |
| typedef struct WindowStatePerFunctionData *WindowStatePerFunction; |
| typedef struct WindowInputBufferData *WindowInputBuffer; |
| |
| typedef struct WindowState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| List *wrxstates; /* all WindowRefExprState nodes in targetlist */ |
| FmgrInfo *eqfunctions; /* equality fns for partition key */ |
| struct TupleTableSlot *priorslot; /* place for prior tuple */ |
| struct TupleTableSlot *curslot; /* current tuple */ |
| struct TupleTableSlot *spare; /* current tuple */ |
| struct TupleTableSlot *saveslot; /* convenient place holder */ |
| |
| /* meta data about the current slot */ |
| bool cur_slot_is_new; /* is this a slot from a buffer or outer plan */ |
| bool cur_slot_part_break; /* slot breaks the partition key */ |
| int cur_slot_key_break; /* break level of the key in the slot */ |
| |
| /* Array of working states per distinct window function */ |
| int numfuncs; |
| WindowStatePerFunction func_state; |
| |
| /* Per row state */ |
| int64 row_index; |
| |
| int numlevels; |
| |
| WindowStatePerLevel level_state; |
| |
| /* memory context for transition value processing */ |
| /* XXX: we should probably have one context per level, so that we can |
| * reset it when there's a key change at that level |
| */ |
| MemoryContext transcontext; |
| MemoryManagerContainer mem_manager; |
| |
| /* |
| * context for comparing datums immediately. |
| * we need reset this context every time we run comparison, |
| * since window frame may contain unlimited number of rows. |
| */ |
| MemoryContext cmpcontext; |
| |
| /* framed window functions need access to their frames */ |
| WindowStatePerFunction cur_funcstate; |
| |
| /* input buffer */ |
| WindowInputBuffer input_buffer; |
| |
| /* Indicate if any function need a peer count. */ |
| bool need_peercount; |
| |
| /* A char buffer to temporarily hold serialized data |
| * before writing them to the frame buffer. |
| * |
| * Use this pre-allocated buffer to avoid doing |
| * palloc/pfree many times. |
| * |
| * The size of this array is specified by 'max_size'. |
| */ |
| char *serial_array; |
| Size max_size; |
| } WindowState; |
| |
| /* ---------------- |
| * UniqueState information |
| * |
| * Unique nodes are used "on top of" sort nodes to discard |
| * duplicate tuples returned from the sort phase. Basically |
| * all it does is compare the current tuple from the subplan |
| * with the previously fetched tuple (stored in its result slot). |
| * If the two are identical in all interesting fields, then |
| * we just fetch another tuple from the sort and try again. |
| * ---------------- |
| */ |
| typedef struct UniqueState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */ |
| MemoryContext tempContext; /* short-term context for comparisons */ |
| } UniqueState; |
| |
| /* ---------------- |
| * HashState information |
| * ---------------- |
| */ |
| typedef struct HashState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| HashJoinTable hashtable; /* hash table for the hashjoin */ |
| List *hashkeys; /* list of ExprState nodes */ |
| bool hs_keepnull; /* Keep nulls */ |
| bool hs_quit_if_hashkeys_null; /* quit building hash table if hashkeys are all null */ |
| bool hs_hashkeys_null; /* found an instance wherein hashkeys are all null */ |
| /* hashkeys is same as parent's hj_InnerHashKeys */ |
| } HashState; |
| |
| /* ---------------- |
| * SetOpState information |
| * |
| * SetOp nodes are used "on top of" sort nodes to discard |
| * duplicate tuples returned from the sort phase. These are |
| * more complex than a simple Unique since we have to count |
| * how many duplicates to return. |
| * ---------------- |
| */ |
| typedef struct SetOpState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| FmgrInfo *eqfunctions; /* per-field lookup data for equality fns */ |
| bool subplan_done; /* has subplan returned EOF? */ |
| long numLeft; /* number of left-input dups of cur group */ |
| long numRight; /* number of right-input dups of cur group */ |
| long numOutput; /* number of dups left to output */ |
| } SetOpState; |
| |
| /* ---------------- |
| * LimitState information |
| * |
| * Limit nodes are used to enforce LIMIT/OFFSET clauses. |
| * They just select the desired subrange of their subplan's output. |
| * |
| * offset is the number of initial tuples to skip (0 does nothing). |
| * count is the number of tuples to return after skipping the offset tuples. |
| * If no limit count was specified, count is undefined and noCount is true. |
| * When lstate == LIMIT_INITIAL, offset/count/noCount haven't been set yet. |
| * ---------------- |
| */ |
| typedef enum |
| { |
| LIMIT_INITIAL, /* initial state for LIMIT node */ |
| LIMIT_EMPTY, /* there are no returnable rows */ |
| LIMIT_INWINDOW, /* have returned a row in the window */ |
| LIMIT_SUBPLANEOF, /* at EOF of subplan (within window) */ |
| LIMIT_WINDOWEND, /* stepped off end of window */ |
| LIMIT_WINDOWSTART /* stepped off beginning of window */ |
| } LimitStateCond; |
| |
| typedef struct LimitState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| ExprState *limitOffset; /* OFFSET parameter, or NULL if none */ |
| ExprState *limitCount; /* COUNT parameter, or NULL if none */ |
| int64 offset; /* current OFFSET value */ |
| int64 count; /* current COUNT, if any */ |
| bool noCount; /* if true, ignore count */ |
| LimitStateCond lstate; /* state machine status, as above */ |
| int64 position; /* 1-based index of last tuple returned */ |
| struct TupleTableSlot *subSlot; /* tuple last obtained from subplan */ |
| } LimitState; |
| |
| /* |
| * DML Operations |
| */ |
| |
| /* |
| * ExecNode for DML. |
| * This operator contains a Plannode in PlanState. |
| * The Plannode contains indexes to the resjunk columns |
| * needed for deciding the action (Insert/Delete), the table oid |
| * and the tuple ctid. |
| */ |
| typedef struct DMLState |
| { |
| |
| PlanState ps; |
| JunkFilter *junkfilter; /* filter that removes junk and dropped attributes */ |
| struct TupleTableSlot *cleanedUpSlot; /* holds 'final' tuple which matches the target relation schema */ |
| |
| } DMLState; |
| |
| /* |
| * ExecNode for Split. |
| * This operator contains a Plannode in PlanState. |
| * The Plannode contains indexes to the ctid, insert, delete, resjunk columns |
| * needed for adding the action (Insert/Delete). |
| * A MemoryContext and TupleTableSlot are maintained to keep the INSERT |
| * tuple until requested. |
| */ |
| typedef struct SplitUpdateState |
| { |
| |
| PlanState ps; |
| bool processInsert; /* flag that specifies the operator's next action. */ |
| struct TupleTableSlot *insertTuple; /* tuple to Insert */ |
| struct TupleTableSlot *deleteTuple; /* tuple to Delete */ |
| |
| } SplitUpdateState; |
| |
| /* |
| * ExecNode for AssertOp. |
| * This operator contains a Plannode that contains the expressions |
| * to execute. |
| */ |
| typedef struct AssertOpState |
| { |
| PlanState ps; |
| |
| } AssertOpState; |
| |
| /* |
| * ExecNode for RowTrigger. |
| * This operator contains a Plannode that contains the triggers |
| * to execute. |
| */ |
| typedef struct RowTriggerState |
| { |
| PlanState ps; |
| struct TupleTableSlot *newTuple; /* stores new values */ |
| struct TupleTableSlot *oldTuple; /* stores old values */ |
| struct TupleTableSlot *triggerTuple; /* stores returned values by the trigger */ |
| |
| } RowTriggerState; |
| |
| |
| typedef enum MotionStateType |
| { |
| MOTIONSTATE_NONE, /* The motion state is not decided, or non active in a slice |
| * (neither send nor recv) |
| */ |
| MOTIONSTATE_SEND, /* The motion is sender */ |
| MOTIONSTATE_RECV, /* The motion is recver */ |
| } MotionStateType; |
| |
| /* ---------------- |
| * MotionState information |
| * ---------------- |
| */ |
| typedef struct MotionState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| MotionStateType mstype; /* Motion state type */ |
| bool stopRequested; /* set when we want transfer to stop */ |
| |
| /* For motion send */ |
| bool sentEndOfStream; /* set when end-of-stream has successfully been sent */ |
| List *hashExpr; /* state struct used for evaluating the hash expressions */ |
| struct CdbHash *cdbhash; /* hash api object */ |
| |
| /* For Motion recv */ |
| void *tupleheap; /* data structure for match merge in sorted motion node */ |
| int routeIdNext; /* for a sorted motion node, the routeId to get next (same as |
| * the routeId last returned ) */ |
| bool tupleheapReady; /* for a sorted motion node, false until we have a tuple from |
| * each source segindex */ |
| |
| /* The following can be used for debugging, usage stats, etc. */ |
| int numTuplesFromChild; /* Number of tuples received from child */ |
| int numTuplesToAMS; /* Number of tuples from child that were sent to AMS */ |
| int numTuplesFromAMS; /* Number of tuples received from AMS */ |
| int numTuplesToParent; /* Number of tuples either from child or AMS that were sent to parent */ |
| int *numTuplesByHashSegIdx; /* Distribution of number of tuples from child by hash seg index */ |
| |
| struct timeval otherTime; /* time accumulator used in sending motion node to keep track of time |
| * spent getting the next tuple (not sending). this could mean time spent |
| * in another motion node receiving. */ |
| |
| struct timeval motionTime; /* time accumulator for time spent in motion node. For sending motion node |
| * it is just the amount of time actually sending the tuple thru the |
| * interconnect. For receiving motion node, it is the time spent waiting |
| * and processing of the next incoming tuple. |
| */ |
| |
| Oid *outputFunArray; /* output functions for each column (debug only) */ |
| |
| int numInputSegs; /* the number of segments on the sending slice */ |
| } MotionState; |
| |
| /* |
| * ExecNode for PartitionSelector. |
| * This operator contains a Plannode in PlanState. |
| */ |
| typedef struct PartitionSelectorState |
| { |
| PlanState ps; /* its first field is NodeTag */ |
| PartitionNode *rootPartitionNode; /* PartitionNode for root table */ |
| PartitionAccessMethods *accessMethods; /* Access method for partition */ |
| struct PartitionConstraints **levelPartConstraints; /* accepted partitions for all levels */ |
| struct PartitionConstraints **acceptedLeafPart; /* accepted leaf PartitionConstraints for current tuple */ |
| List *levelEqExprStates; /* ExprState for equality expressions for all levels */ |
| List *levelExprStates; /* ExprState for general expressions for all levels */ |
| ExprState *residualPredicateExprState; /* ExprState for evaluating residual predicate */ |
| ExprState *propagationExprState; /* ExprState for evaluating propagation expression */ |
| |
| } PartitionSelectorState; |
| |
| extern void sendInitGpmonPkts(Plan *node, EState *estate); |
| extern void initGpmonPktForResult(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForAppend(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForSequence(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForBitmapAnd(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForBitmapOr(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForTableScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForDynamicTableScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForExternalScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForDynamicIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForBitmapIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForBitmapHeapScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForBitmapAppendOnlyScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForTidScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForSubqueryScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForFunctionScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForValuesScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForNestLoop(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForMergeJoin(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForHashJoin(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForMaterial(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForSort(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForGroup(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForAgg(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForUnique(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForHash(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForSetOp(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForLimit(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForMotion(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForShareInputScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForWindow(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForRepeat(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForDefunctOperators(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForDML(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| extern void initGpmonPktForPartitionSelector(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| /* |
| * The funcion pointers to init gpmon package for each plan node. |
| * The order of the function pointers are the same as the one defined in |
| * NodeTag (nodes.h). |
| */ |
| extern void (*initGpmonPktFuncs[T_Plan_End - T_Plan_Start])(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate); |
| |
| #endif /* EXECNODES_H */ |