| /*------------------------------------------------------------------------- |
| * |
| * execMain.c |
| * top level executor interface routines |
| * |
| * INTERFACE ROUTINES |
| * ExecutorStart() |
| * ExecutorRun() |
| * ExecutorFinish() |
| * ExecutorEnd() |
| * |
| * These four procedures are the external interface to the executor. |
| * In each case, the query descriptor is required as an argument. |
| * |
| * ExecutorStart must be called at the beginning of execution of any |
| * query plan and ExecutorEnd must always be called at the end of |
| * execution of a plan (unless it is aborted due to error). |
| * |
| * ExecutorRun accepts direction and count arguments that specify whether |
| * the plan is to be executed forwards, backwards, and for how many tuples. |
| * In some cases ExecutorRun may be called multiple times to process all |
| * the tuples for a plan. It is also acceptable to stop short of executing |
| * the whole plan (but only if it is a SELECT). |
| * |
| * ExecutorFinish must be called after the final ExecutorRun call and |
| * before ExecutorEnd. This can be omitted only in case of EXPLAIN, |
| * which should also omit ExecutorRun. |
| * |
| * Portions Copyright (c) 2005-2010, Greenplum inc |
| * Portions Copyright (c) 2012-Present VMware, Inc. or its affiliates. |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/executor/execMain.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| #include "pgstat.h" |
| |
| #include "access/heapam.h" |
| #include "access/htup_details.h" |
| #include "access/sysattr.h" |
| #include "access/tableam.h" |
| #include "access/transam.h" |
| #include "access/xact.h" |
| #include "catalog/namespace.h" |
| #include "catalog/pg_publication.h" |
| #include "catalog/storage_directory_table.h" |
| #include "commands/matview.h" |
| #include "commands/tablespace.h" |
| #include "commands/trigger.h" |
| #include "executor/execdebug.h" |
| #include "executor/nodeModifyTable.h" |
| #include "executor/nodeSubplan.h" |
| #include "foreign/fdwapi.h" |
| #include "jit/jit.h" |
| #include "mb/pg_wchar.h" |
| #include "miscadmin.h" |
| #include "nodes/plannodes.h" |
| #include "parser/parsetree.h" |
| #include "storage/bufmgr.h" |
| #include "storage/lmgr.h" |
| #include "tcop/utility.h" |
| #include "utils/acl.h" |
| #include "utils/backend_status.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/partcache.h" |
| #include "utils/rls.h" |
| #include "utils/ruleutils.h" |
| #include "utils/snapmgr.h" |
| #include "utils/metrics_utils.h" |
| #include "utils/queryenvironment.h" |
| |
| #include "utils/ps_status.h" |
| #include "utils/typcache.h" |
| #include "utils/workfile_mgr.h" |
| #include "utils/faultinjector.h" |
| #include "utils/resource_manager.h" |
| #include "utils/cgroup.h" |
| |
| #include "catalog/pg_statistic.h" |
| #include "catalog/pg_class.h" |
| |
| #include "tcop/tcopprot.h" |
| |
| #include "catalog/pg_tablespace.h" |
| #include "catalog/catalog.h" |
| #include "catalog/gp_matview_aux.h" |
| #include "catalog/oid_dispatch.h" |
| #include "catalog/pg_directory_table.h" |
| #include "catalog/pg_type.h" |
| #include "commands/copy.h" |
| #include "commands/createas.h" |
| #include "executor/execUtils.h" |
| #include "executor/instrument.h" |
| #include "executor/nodeSubplan.h" |
| #include "foreign/fdwapi.h" |
| #include "libpq/pqformat.h" |
| #include "cdb/cdbdisp_query.h" |
| #include "cdb/cdbdispatchresult.h" |
| #include "cdb/cdbexplain.h" /* cdbexplain_sendExecStats() */ |
| #include "cdb/cdbplan.h" |
| #include "cdb/cdbsubplan.h" |
| #include "cdb/cdbvars.h" |
| #include "cdb/ml_ipc.h" |
| #include "cdb/cdbmotion.h" |
| #include "cdb/cdbtm.h" |
| #include "cdb/cdboidsync.h" |
| #include "cdb/cdbllize.h" |
| #include "cdb/memquota.h" |
| #include "cdb/cdbtargeteddispatch.h" |
| #include "cdb/cdbutil.h" |
| #include "cdb/cdbendpoint.h" |
| |
| #define IS_PARALLEL_RETRIEVE_CURSOR(queryDesc) (queryDesc->ddesc && \ |
| queryDesc->ddesc->parallelCursorName && \ |
| strlen(queryDesc->ddesc->parallelCursorName) > 0) |
| |
| /* Hooks for plugins to get control in ExecutorStart/Run/Finish/End */ |
| ExecutorStart_hook_type ExecutorStart_hook = NULL; |
| ExecutorRun_hook_type ExecutorRun_hook = NULL; |
| ExecutorFinish_hook_type ExecutorFinish_hook = NULL; |
| ExecutorEnd_hook_type ExecutorEnd_hook = NULL; |
| |
| /* Hook for plugin to get control in ExecCheckRTPerms() */ |
| ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook = NULL; |
| |
| |
| /* |
| * Greenplum specific code: |
| * Greenplum introduces auto_stats for a long time, please refer to |
| * https://groups.google.com/a/greenplum.org/g/gpdb-dev/c/bAyw2KBP6yE/m/hmoWikrPAgAJ |
| * for details and decision of auto_stats. |
| * |
| * auto_stats() now is invoked at the following 7 places: |
| * 1. ProcessQuery() |
| * 2. _SPI_pquery() |
| * 3. postquel_end() |
| * 4. ATExecExpandTableCTAS() |
| * 5. ATExecSetDistributedBy() |
| * 6. DoCopy() |
| * 7. ExecCreateTableAs() |
| * |
| * Previously, Place 2, 3 is hard-coded as inside function, |
| * Place 1, 4~7 is hard-coded as not-inside function. |
| * Place 4~7 does not cover the case that COPY or CTAS |
| * is called inside procedure language. |
| * |
| * Since in future auto_stats will be removed, for now let's |
| * just to do some simple fix instead of big refactor. |
| * |
| * To correctly pass the inFunction parameter for auto_stats() |
| * at Place 4~7 we introduce executor_run_nesting_level to mark |
| * if the program is already under ExecutorRun(). Place 4~7 is |
| * directly taken as Utility and will not call ExecutorRun() if |
| * they are not inside procedure language. This skill is like |
| * the extension `auto_explain`. |
| * |
| * For Place 1~3, the context is clear we do not need to check |
| * executor_run_nesting_level. |
| */ |
| static int executor_run_nesting_level = 0; |
| |
| /* decls for local routines only used within this module */ |
| static void InitPlan(QueryDesc *queryDesc, int eflags); |
| static void CheckValidRowMarkRel(Relation rel, RowMarkType markType); |
| static void ExecPostprocessPlan(EState *estate); |
| static void ExecEndPlan(PlanState *planstate, EState *estate); |
| static void ExecutePlan(EState *estate, PlanState *planstate, |
| bool use_parallel_mode, |
| CmdType operation, |
| bool sendTuples, |
| uint64 numberTuples, |
| ScanDirection direction, |
| DestReceiver *dest, |
| bool execute_once); |
| static bool ExecCheckRTEPerms(RangeTblEntry *rte); |
| static bool ExecCheckRTEPermsModified(Oid relOid, Oid userid, |
| Bitmapset *modifiedCols, |
| AclMode requiredPerms); |
| static void ExecCheckXactReadOnly(PlannedStmt *plannedstmt); |
| static char *ExecBuildSlotValueDescription(Oid reloid, |
| TupleTableSlot *slot, |
| TupleDesc tupdesc, |
| Bitmapset *modifiedCols, |
| int maxfieldlen); |
| static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree); |
| |
| static void AdjustReplicatedTableCounts(EState *estate); |
| |
| static void |
| MaintainMaterializedViewStatus(QueryDesc *queryDesc, CmdType operation); |
| |
| /* end of local decls */ |
| |
| |
| /* ---------------------------------------------------------------- |
| * ExecutorStart |
| * |
| * This routine must be called at the beginning of any execution of any |
| * query plan |
| * |
| * Takes a QueryDesc previously created by CreateQueryDesc (which is separate |
| * only because some places use QueryDescs for utility commands). The tupDesc |
| * field of the QueryDesc is filled in to describe the tuples that will be |
| * returned, and the internal fields (estate and planstate) are set up. |
| * |
| * eflags contains flag bits as described in executor.h. |
| * |
| * NB: the CurrentMemoryContext when this is called will become the parent |
| * of the per-query context used for this Executor invocation. |
| * |
| * We provide a function hook variable that lets loadable plugins |
| * get control when ExecutorStart is called. Such a plugin would |
| * normally call standard_ExecutorStart(). |
| * |
| * MPP: In here we take care of setting up all the necessary items that |
| * will be needed to service the query, such as setting up interconnect, |
| * and dispatching the query. Any other items in the future |
| * must be added here. |
| * |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecutorStart(QueryDesc *queryDesc, int eflags) |
| { |
| /* |
| * In some cases (e.g. an EXECUTE statement) a query execution will skip |
| * parse analysis, which means that the query_id won't be reported. Note |
| * that it's harmless to report the query_id multiple time, as the call |
| * will be ignored if the top level query_id has already been reported. |
| */ |
| pgstat_report_query_id(queryDesc->plannedstmt->queryId, false); |
| |
| if (ExecutorStart_hook) |
| (*ExecutorStart_hook) (queryDesc, eflags); |
| else |
| standard_ExecutorStart(queryDesc, eflags); |
| } |
| |
| void |
| standard_ExecutorStart(QueryDesc *queryDesc, int eflags) |
| { |
| EState *estate; |
| MemoryContext oldcontext; |
| GpExecIdentity exec_identity; |
| bool shouldDispatch; |
| bool needDtx; |
| List *volatile toplevelOidCache = NIL; |
| |
| /* sanity checks: queryDesc must not be started already */ |
| Assert(queryDesc != NULL); |
| Assert(queryDesc->estate == NULL); |
| Assert(queryDesc->plannedstmt != NULL); |
| |
| Assert(queryDesc->plannedstmt->intoPolicy == NULL || |
| GpPolicyIsPartitioned(queryDesc->plannedstmt->intoPolicy) || |
| GpPolicyIsReplicated(queryDesc->plannedstmt->intoPolicy)); |
| |
| /* GPDB hook for collecting query info */ |
| if (query_info_collect_hook) |
| (*query_info_collect_hook)(METRICS_QUERY_START, queryDesc); |
| |
| if (Gp_role == GP_ROLE_DISPATCH) |
| { |
| if (!IsResManagerMemoryPolicyNone() && |
| LogResManagerMemory()) |
| { |
| elog(GP_RESMANAGER_MEMORY_LOG_LEVEL, "query requested %.0fKB of memory", |
| (double) queryDesc->plannedstmt->query_mem / 1024.0); |
| } |
| |
| if (queryDesc->plannedstmt->query_mem > 0) |
| { |
| PG_TRY(); |
| { |
| switch (*gp_resmanager_memory_policy) |
| { |
| case RESMANAGER_MEMORY_POLICY_AUTO: |
| PolicyAutoAssignOperatorMemoryKB(queryDesc->plannedstmt, |
| queryDesc->plannedstmt->query_mem); |
| break; |
| case RESMANAGER_MEMORY_POLICY_EAGER_FREE: |
| PolicyEagerFreeAssignOperatorMemoryKB(queryDesc->plannedstmt, |
| queryDesc->plannedstmt->query_mem); |
| break; |
| default: |
| Assert(IsResManagerMemoryPolicyNone()); |
| break; |
| } |
| } |
| PG_CATCH(); |
| { |
| /* GPDB hook for collecting query info */ |
| if (query_info_collect_hook) |
| (*query_info_collect_hook)(QueryCancelCleanup ? METRICS_QUERY_CANCELED : METRICS_QUERY_ERROR, queryDesc); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| } |
| } |
| |
| /* |
| * If the transaction is read-only, we need to check if any writes are |
| * planned to non-temporary tables. EXPLAIN is considered read-only. |
| * |
| * Don't allow writes in parallel mode. Supporting UPDATE and DELETE |
| * would require (a) storing the combo CID hash in shared memory, rather |
| * than synchronizing it just once at the start of parallelism, and (b) an |
| * alternative to heap_update()'s reliance on xmax for mutual exclusion. |
| * INSERT may have no such troubles, but we forbid it to simplify the |
| * checks. |
| * |
| * We have lower-level defenses in CommandCounterIncrement and elsewhere |
| * against performing unsafe operations in parallel mode, but this gives a |
| * more user-friendly error message. |
| * |
| * In GPDB, we must call ExecCheckXactReadOnly() in the QD even if the |
| * transaction is not read-only, because ExecCheckXactReadOnly() also |
| * determines if two-phase commit is needed. |
| */ |
| if ((XactReadOnly || IsInParallelMode() || Gp_role == GP_ROLE_DISPATCH) && |
| !(eflags & EXEC_FLAG_EXPLAIN_ONLY)) |
| { |
| PG_TRY(); |
| { |
| ExecCheckXactReadOnly(queryDesc->plannedstmt); |
| } |
| PG_CATCH(); |
| { |
| /* GPDB hook for collecting query info */ |
| if (query_info_collect_hook) |
| (*query_info_collect_hook)(QueryCancelCleanup ? METRICS_QUERY_CANCELED : METRICS_QUERY_ERROR, queryDesc); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| } |
| |
| /* |
| * Build EState, switch into per-query memory context for startup. |
| */ |
| estate = CreateExecutorState(); |
| queryDesc->estate = estate; |
| |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| |
| /** |
| * Attached the plannedstmt from queryDesc |
| */ |
| estate->es_plannedstmt = queryDesc->plannedstmt; |
| |
| /* |
| * Fill in external parameters, if any, from queryDesc; and allocate |
| * workspace for internal parameters |
| */ |
| estate->es_param_list_info = queryDesc->params; |
| |
| if (queryDesc->plannedstmt->paramExecTypes != NIL) |
| { |
| int nParamExec; |
| |
| nParamExec = list_length(queryDesc->plannedstmt->paramExecTypes); |
| estate->es_param_exec_vals = (ParamExecData *) |
| palloc0(nParamExec * sizeof(ParamExecData)); |
| } |
| |
| /* We now require all callers to provide sourceText */ |
| Assert(queryDesc->sourceText != NULL); |
| estate->es_sourceText = queryDesc->sourceText; |
| |
| /* |
| * Fill in the query environment, if any, from queryDesc. |
| */ |
| if (queryDesc->ddesc && queryDesc->ddesc->namedRelList) |
| { |
| if (queryDesc->queryEnv == NULL) |
| queryDesc->queryEnv = create_queryEnv(); |
| /* Update environment */ |
| AddPreassignedENR(queryDesc->queryEnv, queryDesc->ddesc->namedRelList); |
| } |
| estate->es_queryEnv = queryDesc->queryEnv; |
| |
| /* |
| * If non-read-only query, set the command ID to mark output tuples with |
| */ |
| switch (queryDesc->operation) |
| { |
| case CMD_SELECT: |
| |
| /* |
| * SELECT FOR [KEY] UPDATE/SHARE and modifying CTEs need to mark |
| * tuples |
| */ |
| if (queryDesc->plannedstmt->rowMarks != NIL || |
| queryDesc->plannedstmt->hasModifyingCTE) |
| { |
| estate->es_output_cid = GetCurrentCommandId(true); |
| } |
| |
| /* |
| * A SELECT without modifying CTEs can't possibly queue triggers, |
| * so force skip-triggers mode. This is just a marginal efficiency |
| * hack, since AfterTriggerBeginQuery/AfterTriggerEndQuery aren't |
| * all that expensive, but we might as well do it. |
| */ |
| if (!queryDesc->plannedstmt->hasModifyingCTE) |
| eflags |= EXEC_FLAG_SKIP_TRIGGERS; |
| break; |
| |
| case CMD_INSERT: |
| case CMD_DELETE: |
| case CMD_UPDATE: |
| estate->es_output_cid = GetCurrentCommandId(true); |
| break; |
| |
| default: |
| elog(ERROR, "unrecognized operation code: %d", |
| (int) queryDesc->operation); |
| break; |
| } |
| |
| /* |
| * Copy other important information into the EState |
| */ |
| estate->es_snapshot = RegisterSnapshot(queryDesc->snapshot); |
| estate->es_crosscheck_snapshot = RegisterSnapshot(queryDesc->crosscheck_snapshot); |
| estate->es_top_eflags = eflags; |
| estate->es_instrument = queryDesc->instrument_options; |
| estate->es_jit_flags = queryDesc->plannedstmt->jitFlags; |
| estate->showstatctx = queryDesc->showstatctx; |
| |
| /* |
| * Shared input info is needed when ROLE_EXECUTE or sequential plan |
| */ |
| estate->es_sharenode = NIL; |
| |
| /* |
| * Handling of the Slice table depends on context. |
| */ |
| if (Gp_role == GP_ROLE_DISPATCH) |
| { |
| /* Set up the slice table. */ |
| SliceTable *sliceTable; |
| |
| sliceTable = InitSliceTable(estate, queryDesc->plannedstmt); |
| estate->es_sliceTable = sliceTable; |
| |
| if (sliceTable->slices[0].gangType != GANGTYPE_UNALLOCATED || |
| sliceTable->hasMotions) |
| { |
| if (queryDesc->ddesc == NULL) |
| { |
| queryDesc->ddesc = makeNode(QueryDispatchDesc);; |
| queryDesc->ddesc->useChangedAOOpts = true; |
| } |
| |
| /* Pass EXPLAIN ANALYZE flag to qExecs. */ |
| estate->es_sliceTable->instrument_options = queryDesc->instrument_options; |
| |
| /* set our global sliceid variable for elog. */ |
| currentSliceId = LocallyExecutingSliceIndex(estate); |
| |
| /* InitPlan() will acquire locks by walking the entire plan |
| * tree -- we'd like to avoid acquiring the locks until |
| * *after* we've set up the interconnect */ |
| if (estate->es_sliceTable->hasMotions) |
| estate->motionlayer_context = createMotionLayerState(queryDesc->plannedstmt->numSlices - 1); |
| |
| shouldDispatch = !(eflags & EXEC_FLAG_EXPLAIN_ONLY); |
| } |
| else |
| { |
| /* QD-only query, no dispatching required */ |
| shouldDispatch = false; |
| } |
| |
| /* |
| * If this is CREATE TABLE AS ... WITH NO DATA, there's no need |
| * need to actually execute the plan. |
| */ |
| if (queryDesc->plannedstmt->intoClause && |
| queryDesc->plannedstmt->intoClause->skipData) |
| shouldDispatch = false; |
| } |
| else if (Gp_role == GP_ROLE_EXECUTE) |
| { |
| QueryDispatchDesc *ddesc = queryDesc->ddesc; |
| |
| shouldDispatch = false; |
| |
| /* qDisp should have sent us a slice table via MPPEXEC */ |
| if (ddesc && ddesc->sliceTable != NULL) |
| { |
| SliceTable *sliceTable; |
| ExecSlice *slice; |
| |
| sliceTable = ddesc->sliceTable; |
| Assert(IsA(sliceTable, SliceTable)); |
| slice = &sliceTable->slices[sliceTable->localSlice]; |
| |
| estate->es_sliceTable = sliceTable; |
| estate->es_cursorPositions = ddesc->cursorPositions; |
| |
| estate->currentSliceId = slice->rootIndex; |
| |
| /* set our global sliceid variable for elog. */ |
| currentSliceId = LocallyExecutingSliceIndex(estate); |
| |
| /* Should we collect statistics for EXPLAIN ANALYZE? */ |
| estate->es_instrument = sliceTable->instrument_options; |
| queryDesc->instrument_options = sliceTable->instrument_options; |
| |
| /* InitPlan() will acquire locks by walking the entire plan |
| * tree -- we'd like to avoid acquiring the locks until |
| * *after* we've set up the interconnect */ |
| if (estate->es_sliceTable->hasMotions) |
| { |
| estate->motionlayer_context = createMotionLayerState(queryDesc->plannedstmt->numSlices - 1); |
| |
| PG_TRY(); |
| { |
| /* |
| * Initialize the motion layer for this query. |
| */ |
| Assert(!estate->interconnect_context); |
| CurrentMotionIPCLayer->SetupInterconnect(estate); |
| Assert(estate->interconnect_context); |
| UpdateMotionExpectedReceivers(estate->motionlayer_context, estate->es_sliceTable); |
| |
| SIMPLE_FAULT_INJECTOR("qe_got_snapshot_and_interconnect"); |
| } |
| PG_CATCH(); |
| { |
| mppExecutorCleanup(queryDesc); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| } |
| } |
| else |
| { |
| /* local query in QE. */ |
| } |
| } |
| else |
| shouldDispatch = false; |
| |
| /* |
| * We don't eliminate aliens if we don't have an MPP plan |
| * or we are executing on master. |
| * |
| * TODO: eliminate aliens even on master, if not EXPLAIN ANALYZE |
| */ |
| estate->eliminateAliens = execute_pruned_plan && estate->es_sliceTable && estate->es_sliceTable->hasMotions && (Gp_role == GP_ROLE_EXECUTE); |
| |
| /* |
| * Set up an AFTER-trigger statement context, unless told not to, or |
| * unless it's EXPLAIN-only mode (when ExecutorFinish won't be called). |
| */ |
| if (!(eflags & (EXEC_FLAG_SKIP_TRIGGERS | EXEC_FLAG_EXPLAIN_ONLY))) |
| AfterTriggerBeginQuery(); |
| |
| /* |
| * Initialize the plan state tree |
| * |
| * If the interconnect has been set up; we need to catch any |
| * errors to shut it down -- so we have to wrap InitPlan in a PG_TRY() block. |
| */ |
| PG_TRY(); |
| { |
| /* |
| * Initialize the plan state tree |
| */ |
| Assert(CurrentMemoryContext == estate->es_query_cxt); |
| InitPlan(queryDesc, eflags); |
| |
| Assert(queryDesc->planstate); |
| |
| #ifdef USE_ASSERT_CHECKING |
| AssertSliceTableIsValid(estate->es_sliceTable); |
| #endif |
| |
| if (Debug_print_slice_table && Gp_role == GP_ROLE_DISPATCH) |
| elog_node_display(DEBUG3, "slice table", estate->es_sliceTable, true); |
| |
| /* |
| * If we're running as a QE and there's a slice table in our queryDesc, |
| * then we need to finish the EState setup we prepared for back in |
| * CdbExecQuery. |
| */ |
| if (Gp_role == GP_ROLE_EXECUTE && estate->es_sliceTable != NULL) |
| { |
| MotionState *motionstate = NULL; |
| |
| /* |
| * Note that, at this point on a QE, the estate is setup (based on the |
| * slice table transmitted from the QD via MPPEXEC) so that fields |
| * es_sliceTable, cur_root_idx and es_cur_slice_idx are correct for |
| * the QE. |
| * |
| * If responsible for a non-root slice, arrange to enter the plan at the |
| * slice's sending Motion node rather than at the top. |
| */ |
| if (LocallyExecutingSliceIndex(estate) != RootSliceIndex(estate)) |
| { |
| motionstate = getMotionState(queryDesc->planstate, LocallyExecutingSliceIndex(estate)); |
| Assert(motionstate != NULL && IsA(motionstate, MotionState)); |
| } |
| |
| if (Debug_print_slice_table) |
| elog_node_display(DEBUG3, "slice table", estate->es_sliceTable, true); |
| |
| if (gp_log_interconnect >= GPVARS_VERBOSITY_DEBUG) |
| elog(DEBUG1, "seg%d executing slice%d under root slice%d", |
| GpIdentity.segindex, |
| LocallyExecutingSliceIndex(estate), |
| RootSliceIndex(estate)); |
| } |
| |
| /* |
| * if in dispatch mode, time to serialize plan and query |
| * trees, and fire off cdb_exec command to each of the qexecs |
| */ |
| if (shouldDispatch) |
| { |
| /* |
| * MPP-2869: preprocess_initplans() may |
| * dispatch. (interacted with MPP-2859, which caused an |
| * initPlan to do a write which should have happened in |
| * main body of query) We need to call |
| * ExecutorSaysTransactionDoesWrites() before any dispatch |
| * work for this query. |
| */ |
| needDtx = ExecutorSaysTransactionDoesWrites(); |
| if (needDtx) |
| setupDtxTransaction(); |
| |
| /* |
| * Aviod dispatching OIDs for InitPlan. |
| * |
| * CTAS will first define relation in QD, and generate the OIDs, |
| * and then dispatch with these OIDs to QEs. |
| * QEs store these OIDs in a static variable and delete the one |
| * used to create table. |
| * |
| * If CTAS's query contains initplan, when we invoke |
| * preprocess_initplan to dispatch initplans, if with |
| * queryDesc->ddesc->oidAssignments be set, these OIDs are |
| * also dispatched to QEs. |
| * |
| * For details please see github issue https://github.com/greenplum-db/gpdb/issues/10760 |
| */ |
| if (queryDesc->ddesc != NULL) |
| { |
| queryDesc->ddesc->sliceTable = estate->es_sliceTable; |
| FillQueryDispatchDesc(estate->es_queryEnv, queryDesc->ddesc); |
| /* |
| * For CTAS querys that contain initplan, we need to copy a new oid dispatch list, |
| * since the preprocess_initplan will start a subtransaction, and if it's rollbacked, |
| * the memory context of 'Oid dispatch context' will be reset, which will cause invalid |
| * list reference during the serialization of dispatch_oids when dispatching plan. |
| */ |
| toplevelOidCache = copyObject(GetAssignedOidsForDispatch()); |
| } |
| |
| /* |
| * First, pre-execute any initPlan subplans. |
| */ |
| if (list_length(queryDesc->plannedstmt->paramExecTypes) > 0) |
| preprocess_initplans(queryDesc); |
| |
| if (toplevelOidCache != NIL) |
| { |
| queryDesc->ddesc->oidAssignments = toplevelOidCache; |
| } |
| |
| /* |
| * This call returns after launching the threads that send the |
| * plan to the appropriate segdbs. It does not wait for them to |
| * finish unless an error is detected before all slices have been |
| * dispatched. |
| * |
| * Main plan is parallel, send plan to it. |
| */ |
| if (estate->es_sliceTable->slices[0].gangType != GANGTYPE_UNALLOCATED || |
| estate->es_sliceTable->slices[0].children) |
| { |
| CdbDispatchPlan(queryDesc, |
| estate->es_param_exec_vals, |
| needDtx, true); |
| } |
| |
| if (toplevelOidCache != NIL) |
| { |
| list_free(toplevelOidCache); |
| toplevelOidCache = NIL; |
| } |
| } |
| |
| /* |
| * Get executor identity (who does the executor serve). we can assume |
| * Forward scan direction for now just for retrieving the identity. |
| */ |
| if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY)) |
| exec_identity = getGpExecIdentity(queryDesc, ForwardScanDirection, estate); |
| else |
| exec_identity = GP_IGNORE; |
| |
| /* |
| * If we have no slice to execute in this process, mark currentSliceId as |
| * invalid. |
| */ |
| if (exec_identity == GP_IGNORE) |
| { |
| estate->currentSliceId = -1; |
| currentSliceId = -1; |
| } |
| |
| #ifdef USE_ASSERT_CHECKING |
| /* non-root on QE */ |
| if (exec_identity == GP_NON_ROOT_ON_QE) |
| { |
| MotionState *motionState = getMotionState(queryDesc->planstate, LocallyExecutingSliceIndex(estate)); |
| |
| Assert(motionState); |
| |
| Assert(IsA(motionState->ps.plan, Motion)); |
| } |
| else |
| #endif |
| if (exec_identity == GP_ROOT_SLICE) |
| { |
| /* Run a root slice. */ |
| if (queryDesc->planstate != NULL && |
| estate->es_sliceTable && |
| estate->es_sliceTable->slices[0].gangType == GANGTYPE_UNALLOCATED && |
| estate->es_sliceTable->slices[0].children && |
| !estate->es_interconnect_is_setup) |
| { |
| Assert(!estate->interconnect_context); |
| CurrentMotionIPCLayer->SetupInterconnect(estate); |
| Assert(estate->interconnect_context); |
| UpdateMotionExpectedReceivers(estate->motionlayer_context, estate->es_sliceTable); |
| } |
| } |
| else if (exec_identity != GP_IGNORE) |
| { |
| /* should never happen */ |
| Assert(!"unsupported parallel execution strategy"); |
| } |
| |
| if(estate->es_interconnect_is_setup) |
| Assert(estate->interconnect_context != NULL); |
| |
| } |
| PG_CATCH(); |
| { |
| if (toplevelOidCache != NIL) |
| { |
| list_free(toplevelOidCache); |
| toplevelOidCache = NIL; |
| } |
| mppExecutorCleanup(queryDesc); |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| if (DEBUG1 >= log_min_messages) |
| { |
| char msec_str[32]; |
| switch (check_log_duration(msec_str, false)) |
| { |
| case 1: |
| case 2: |
| ereport(LOG, (errmsg("duration to ExecutorStart end: %s ms", msec_str))); |
| break; |
| } |
| } |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecutorRun |
| * |
| * This is the main routine of the executor module. It accepts |
| * the query descriptor from the traffic cop and executes the |
| * query plan. |
| * |
| * ExecutorStart must have been called already. |
| * |
| * If direction is NoMovementScanDirection then nothing is done |
| * except to start up/shut down the destination. Otherwise, |
| * we retrieve up to 'count' tuples in the specified direction. |
| * |
| * Note: count = 0 is interpreted as no portal limit, i.e., run to |
| * completion. Also note that the count limit is only applied to |
| * retrieved tuples, not for instance to those inserted/updated/deleted |
| * by a ModifyTable plan node. |
| * |
| * There is no return value, but output tuples (if any) are sent to |
| * the destination receiver specified in the QueryDesc; and the number |
| * of tuples processed at the top level can be found in |
| * estate->es_processed. |
| * |
| * We provide a function hook variable that lets loadable plugins |
| * get control when ExecutorRun is called. Such a plugin would |
| * normally call standard_ExecutorRun(). |
| * |
| * MPP: In here we must ensure to only run the plan and not call |
| * any setup/teardown items (unless in a CATCH block). |
| * |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecutorRun(QueryDesc *queryDesc, |
| ScanDirection direction, uint64 count, |
| bool execute_once) |
| { |
| /* |
| * Greenplum specific code: |
| * auto_stats() needs to know if it is inside procedure call so |
| * we maintain executor_run_nesting_level here. See detailed comments |
| * at the definition of the static variable executor_run_nesting_level. |
| */ |
| executor_run_nesting_level++; |
| PG_TRY(); |
| { |
| if (ExecutorRun_hook) |
| (*ExecutorRun_hook) (queryDesc, direction, count, execute_once); |
| else |
| standard_ExecutorRun(queryDesc, direction, count, execute_once); |
| executor_run_nesting_level--; |
| } |
| PG_CATCH(); |
| { |
| executor_run_nesting_level--; |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| } |
| |
| void |
| standard_ExecutorRun(QueryDesc *queryDesc, |
| ScanDirection direction, uint64 count, bool execute_once) |
| { |
| EState *estate; |
| CmdType operation; |
| DestReceiver *dest; |
| bool sendTuples; |
| MemoryContext oldcontext; |
| bool endpointCreated = false; |
| uint64 es_processed = 0; |
| /* |
| * NOTE: Any local vars that are set in the PG_TRY block and examined in the |
| * PG_CATCH block should be declared 'volatile'. (setjmp shenanigans) |
| */ |
| ExecSlice *currentSlice; |
| GpExecIdentity exec_identity; |
| bool amIParallel = false; |
| |
| /* sanity checks */ |
| Assert(queryDesc != NULL); |
| |
| estate = queryDesc->estate; |
| |
| Assert(estate != NULL); |
| Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); |
| |
| /* |
| * Switch into per-query memory context |
| */ |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| |
| /* Allow instrumentation of Executor overall runtime */ |
| if (queryDesc->totaltime) |
| InstrStartNode(queryDesc->totaltime); |
| |
| /* |
| * CDB: Update global slice id for log messages. |
| */ |
| currentSlice = getCurrentSlice(estate, LocallyExecutingSliceIndex(estate)); |
| if (currentSlice) |
| { |
| if (Gp_role == GP_ROLE_EXECUTE || |
| sliceRunsOnQD(currentSlice)) |
| { |
| currentSliceId = currentSlice->sliceIndex; |
| amIParallel = currentSlice->useMppParallelMode; |
| } |
| } |
| |
| /* |
| * extract information from the query descriptor and the query feature. |
| */ |
| operation = queryDesc->operation; |
| dest = queryDesc->dest; |
| |
| /* |
| * startup tuple receiver, if we will be emitting tuples |
| */ |
| estate->es_processed = 0; |
| |
| sendTuples = (queryDesc->tupDesc != NULL && |
| (operation == CMD_SELECT || |
| queryDesc->plannedstmt->hasReturning)); |
| |
| if (sendTuples) |
| dest->rStartup(dest, operation, queryDesc->tupDesc); |
| |
| /* |
| * run plan |
| */ |
| if (!ScanDirectionIsNoMovement(direction)) |
| { |
| if (execute_once && queryDesc->already_executed) |
| elog(ERROR, "can't re-execute query flagged for single execution"); |
| queryDesc->already_executed = true; |
| } |
| |
| /* |
| * Need a try/catch block here so that if an ereport is called from |
| * within ExecutePlan, we can clean up by calling CdbCheckDispatchResult. |
| * This cleans up the asynchronous commands running through the threads launched from |
| * CdbDispatchCommand. |
| */ |
| PG_TRY(); |
| { |
| /* |
| * Run the plan locally. There are three ways; |
| * |
| * 1. Do nothing |
| * 2. Run a root slice |
| * 3. Run a non-root slice on a QE. |
| * |
| * Here we decide what is our identity -- root slice, non-root |
| * on QE or other (in which case we do nothing), and then run |
| * the plan if required. For more information see |
| * getGpExecIdentity() in execUtils. |
| */ |
| exec_identity = getGpExecIdentity(queryDesc, direction, estate); |
| |
| if (exec_identity == GP_IGNORE) |
| { |
| /* do nothing */ |
| estate->es_got_eos = true; |
| } |
| else if (exec_identity == GP_NON_ROOT_ON_QE) |
| { |
| /* |
| * Run a non-root slice on a QE. |
| * |
| * Since the top Plan node is a (Sending) Motion, run the plan |
| * forward to completion. The plan won't return tuples locally |
| * (tuples go out over the interconnect), so the destination is |
| * uninteresting. The command type should be SELECT, however, to |
| * avoid other sorts of DML processing.. |
| * |
| * This is the center of slice plan activity -- here we arrange to |
| * blunder into the middle of the plan rather than entering at the |
| * root. |
| */ |
| |
| MotionState *motionState = getMotionState(queryDesc->planstate, LocallyExecutingSliceIndex(estate)); |
| |
| Assert(motionState); |
| |
| ExecutePlan(estate, |
| (PlanState *) motionState, |
| amIParallel, |
| CMD_SELECT, |
| sendTuples, |
| 0, |
| ForwardScanDirection, |
| dest, |
| execute_once); |
| } |
| else if (exec_identity == GP_ROOT_SLICE) |
| { |
| DestReceiver *endpointDest; |
| |
| /* |
| * When run a root slice, and it is a PARALLEL RETRIEVE CURSOR, it means |
| * QD become the end point for connection. It is true, for |
| * instance, SELECT * FROM foo LIMIT 10, and the result should |
| * go out from QD. |
| * |
| * For the scenario: endpoint on QE, the query plan is changed, |
| * the root slice also exists on QE. |
| */ |
| if (IS_PARALLEL_RETRIEVE_CURSOR(queryDesc)) |
| { |
| SetupEndpointExecState(queryDesc->tupDesc, |
| queryDesc->ddesc->parallelCursorName, |
| operation, |
| &endpointDest); |
| endpointCreated = true; |
| |
| /* |
| * Once the endpoint has been created in shared memory, send acknowledge |
| * message to QD so DECLARE PARALLEL RETRIEVE CURSOR statement can finish. |
| */ |
| EndpointNotifyQD(ENDPOINT_READY_ACK_MSG); |
| |
| ExecutePlan(estate, |
| queryDesc->planstate, |
| amIParallel, |
| operation, |
| true, |
| count, |
| direction, |
| endpointDest, |
| execute_once); |
| } |
| else |
| { |
| /* |
| * Run a root slice |
| * It corresponds to the "normal" path through the executor |
| * in that we enter the plan at the top and count on the |
| * motion nodes at the fringe of the top slice to return |
| * without ever calling nodes below them. |
| */ |
| ExecutePlan(estate, |
| queryDesc->planstate, |
| amIParallel, |
| operation, |
| sendTuples, |
| count, |
| direction, |
| dest, |
| execute_once); |
| } |
| } |
| else |
| { |
| /* should never happen */ |
| Assert(!"undefined parallel execution strategy"); |
| } |
| if ((exec_identity == GP_IGNORE || exec_identity == GP_ROOT_SLICE) && |
| (operation != CMD_SELECT || (queryDesc->plannedstmt->hasModifyingCTE))) |
| es_processed = mppExecutorWait(queryDesc); |
| |
| /* |
| * Update view info if possible. |
| * Use the operation and result relation to indentify if |
| * a table's data is changed. |
| * This is a proper place for all tables of different AMs. |
| * We handle INSERT/UPDATE/DELETE here as other operations should |
| * be handled in utility commands. |
| * |
| * Use es_processed to indentify the actual rows we modified |
| * to avoid case writablte query may not |
| * change data when success, ex: |
| * insert into t1 select * from t2; |
| * When t2 has zero rows, don't need to update view status. |
| * |
| * NB: This can't handle well in utility mode, should REFRESH by user |
| * after that. |
| */ |
| if (IS_QD_OR_SINGLENODE() && |
| (operation == CMD_INSERT || operation == CMD_UPDATE || operation == CMD_DELETE || |
| queryDesc->plannedstmt->hasModifyingCTE) && |
| ((es_processed > 0 || estate->es_processed > 0) || !queryDesc->plannedstmt->canSetTag)) |
| { |
| MaintainMaterializedViewStatus(queryDesc, operation); |
| } |
| } |
| PG_CATCH(); |
| { |
| /* Close down interconnect etc. */ |
| mppExecutorCleanup(queryDesc); |
| if (list_length(queryDesc->plannedstmt->paramExecTypes) > 0) |
| { |
| postprocess_initplans(queryDesc); |
| } |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| |
| #ifdef FAULT_INJECTOR |
| /* |
| * Allow testing of very high number of processed rows, without spending |
| * hours actually processing that many rows. |
| * |
| * Somewhat arbitrarily, only trigger this if more than 10000 rows were truly |
| * processed. This screens out some internal queries that the system might |
| * issue during planning. |
| */ |
| if (estate->es_processed >= 10000 && estate->es_processed <= 1000000) |
| //if (estate->es_processed >= 10000) |
| { |
| if (FaultInjector_InjectFaultIfSet("executor_run_high_processed", |
| DDLNotSpecified, |
| "" /* databaseName */, |
| "" /* tableName */) == FaultInjectorTypeSkip) |
| { |
| /* |
| * For testing purposes, pretend that we have already processed |
| * almost 2^32 rows. |
| */ |
| estate->es_processed = UINT_MAX - 10; |
| } |
| } |
| #endif /* FAULT_INJECTOR */ |
| |
| /* |
| * shutdown tuple receiver, if we started it |
| */ |
| if (endpointCreated) |
| DestroyEndpointExecState(); |
| |
| if (sendTuples) |
| dest->rShutdown(dest); |
| if (es_processed) |
| estate->es_processed = es_processed; |
| if (queryDesc->totaltime) |
| InstrStopNode(queryDesc->totaltime, estate->es_processed); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecutorFinish |
| * |
| * This routine must be called after the last ExecutorRun call. |
| * It performs cleanup such as firing AFTER triggers. It is |
| * separate from ExecutorEnd because EXPLAIN ANALYZE needs to |
| * include these actions in the total runtime. |
| * |
| * We provide a function hook variable that lets loadable plugins |
| * get control when ExecutorFinish is called. Such a plugin would |
| * normally call standard_ExecutorFinish(). |
| * |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecutorFinish(QueryDesc *queryDesc) |
| { |
| if (ExecutorFinish_hook) |
| (*ExecutorFinish_hook) (queryDesc); |
| else |
| standard_ExecutorFinish(queryDesc); |
| } |
| |
| void |
| standard_ExecutorFinish(QueryDesc *queryDesc) |
| { |
| EState *estate; |
| PlanState *planstate = NULL; |
| GpExecIdentity exec_identity; |
| MemoryContext oldcontext; |
| |
| /* sanity checks */ |
| Assert(queryDesc != NULL); |
| |
| estate = queryDesc->estate; |
| |
| Assert(estate != NULL); |
| Assert(!(estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); |
| |
| /* This should be run once and only once per Executor instance */ |
| Assert(!estate->es_finished); |
| |
| /* Switch into per-query memory context */ |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| |
| /* Allow instrumentation of Executor overall runtime */ |
| if (queryDesc->totaltime) |
| InstrStartNode(queryDesc->totaltime); |
| |
| /* Run ModifyTable nodes to completion */ |
| ExecPostprocessPlan(estate); |
| |
| /* Execute queued AFTER triggers, unless told not to */ |
| if (!(estate->es_top_eflags & EXEC_FLAG_SKIP_TRIGGERS)) |
| AfterTriggerEndQuery(estate); |
| |
| if (queryDesc->totaltime) |
| InstrStopNode(queryDesc->totaltime, 0); |
| |
| /* |
| * To squelch the whole plan as the query is finished |
| * |
| * For material node, when 'delayEagerFree' is true, it |
| * will not be squelched even if 'ExecSquelchNode' is |
| * called on it. But if it is not squechled, it will |
| * not send the stop msg to its child motion node, and |
| * the sender motion will hang there until the connection |
| * is reset or closed. This can lead some issues. |
| * For e.g: sometimes we need to collect the querystate |
| * from all the backends through |
| * 'cdbexplain_recvExecStats'. It will wait until the QE |
| * sending the querystate to it. But the QE only sends |
| * that in 'standard_ExecutorEnd'. But if it is still |
| * blocked at sending out tuples, then the whole query |
| * will hang up. |
| */ |
| exec_identity = getGpExecIdentity(queryDesc, ForwardScanDirection, estate); |
| if (exec_identity == GP_NON_ROOT_ON_QE) |
| planstate = (PlanState *)getMotionState(queryDesc->planstate, LocallyExecutingSliceIndex(estate)); |
| else if (exec_identity == GP_ROOT_SLICE) |
| planstate = queryDesc->planstate; |
| |
| if (exec_identity != GP_IGNORE && planstate != NULL) |
| ExecSquelchNode(planstate, true); |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| estate->es_finished = true; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecutorEnd |
| * |
| * This routine must be called at the end of execution of any |
| * query plan |
| * |
| * We provide a function hook variable that lets loadable plugins |
| * get control when ExecutorEnd is called. Such a plugin would |
| * normally call standard_ExecutorEnd(). |
| * |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecutorEnd(QueryDesc *queryDesc) |
| { |
| if (ExecutorEnd_hook) |
| (*ExecutorEnd_hook) (queryDesc); |
| else |
| standard_ExecutorEnd(queryDesc); |
| } |
| |
| void |
| standard_ExecutorEnd(QueryDesc *queryDesc) |
| { |
| EState *estate; |
| MemoryContext oldcontext; |
| |
| /* GPDB: whether this is a inner query for extension usage */ |
| bool isInnerQuery; |
| |
| /* sanity checks */ |
| Assert(queryDesc != NULL); |
| |
| estate = queryDesc->estate; |
| |
| Assert(estate != NULL); |
| |
| /* GPDB: Save SPI flag first in case the memory context of plannedstmt is cleaned up*/ |
| isInnerQuery = estate->es_plannedstmt->metricsQueryType > TOP_LEVEL_QUERY; |
| |
| if (DEBUG1 >= log_min_messages) |
| { |
| char msec_str[32]; |
| switch (check_log_duration(msec_str, false)) |
| { |
| case 1: |
| case 2: |
| ereport(LOG, (errmsg("duration to ExecutorEnd starting: %s ms", msec_str))); |
| break; |
| } |
| } |
| |
| /* |
| * Check that ExecutorFinish was called, unless in EXPLAIN-only mode. This |
| * Assert is needed because ExecutorFinish is new as of 9.1, and callers |
| * might forget to call it. |
| */ |
| Assert(estate->es_finished || |
| (estate->es_top_eflags & EXEC_FLAG_EXPLAIN_ONLY)); |
| |
| /* |
| * Switch into per-query memory context to run ExecEndPlan |
| */ |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| |
| /* |
| * If EXPLAIN ANALYZE, qExec returns stats to qDisp now. |
| */ |
| if (estate->es_sliceTable && |
| estate->es_sliceTable->instrument_options && |
| (estate->es_sliceTable->instrument_options & INSTRUMENT_CDB) && |
| Gp_role == GP_ROLE_EXECUTE) |
| cdbexplain_sendExecStats(queryDesc); |
| |
| /* |
| * if needed, collect mpp dispatch results and tear down |
| * all mpp specific resources (e.g. interconnect). |
| */ |
| PG_TRY(); |
| { |
| mppExecutorFinishup(queryDesc); |
| } |
| PG_CATCH(); |
| { |
| /* |
| * we got an error. do all the necessary cleanup. |
| */ |
| mppExecutorCleanup(queryDesc); |
| |
| /* |
| * Remove our own query's motion layer. |
| */ |
| RemoveMotionLayer(estate->motionlayer_context); |
| |
| /* |
| * GPDB specific |
| * Clean the special resources created by INITPLAN. |
| * The resources have long life cycle and are used by the main plan. |
| * It's too early to clean them in preprocess_initplans. |
| */ |
| if (list_length(queryDesc->plannedstmt->paramExecTypes) > 0) |
| { |
| postprocess_initplans(queryDesc); |
| } |
| |
| /* |
| * Release EState and per-query memory context. |
| */ |
| FreeExecutorState(estate); |
| |
| PG_RE_THROW(); |
| } |
| PG_END_TRY(); |
| |
| /* |
| * GPDB specific |
| * Clean the special resources created by INITPLAN. |
| * The resources have long life cycle and are used by the main plan. |
| * It's too early to clean them in preprocess_initplans. |
| */ |
| if (list_length(queryDesc->plannedstmt->paramExecTypes) > 0) |
| { |
| postprocess_initplans(queryDesc); |
| } |
| |
| /* |
| * If normal termination, let each operator clean itself up. |
| * Otherwise don't risk it... an error might have left some |
| * structures in an inconsistent state. |
| */ |
| ExecEndPlan(queryDesc->planstate, estate); |
| |
| /* |
| * Remove our own query's motion layer. |
| */ |
| RemoveMotionLayer(estate->motionlayer_context); |
| |
| /* do away with our snapshots */ |
| UnregisterSnapshot(estate->es_snapshot); |
| UnregisterSnapshot(estate->es_crosscheck_snapshot); |
| |
| /* |
| * Must switch out of context before destroying it |
| */ |
| MemoryContextSwitchTo(oldcontext); |
| |
| queryDesc->es_processed = estate->es_processed; |
| |
| /* |
| * Release EState and per-query memory context. This should release |
| * everything the executor has allocated. |
| */ |
| FreeExecutorState(estate); |
| |
| /* GPDB hook for collecting query info */ |
| if (query_info_collect_hook) |
| (*query_info_collect_hook)(isInnerQuery ? METRICS_INNER_QUERY_DONE : METRICS_QUERY_DONE, queryDesc); |
| |
| /* Reset queryDesc fields that no longer point to anything */ |
| queryDesc->tupDesc = NULL; |
| queryDesc->estate = NULL; |
| queryDesc->planstate = NULL; |
| queryDesc->totaltime = NULL; |
| |
| if (DEBUG1 >= log_min_messages) |
| { |
| char msec_str[32]; |
| switch (check_log_duration(msec_str, false)) |
| { |
| case 1: |
| case 2: |
| ereport(LOG, (errmsg("duration to ExecutorEnd end: %s ms", msec_str))); |
| break; |
| } |
| } |
| |
| ReportOOMConsumption(); |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecutorRewind |
| * |
| * This routine may be called on an open queryDesc to rewind it |
| * to the start. |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecutorRewind(QueryDesc *queryDesc) |
| { |
| EState *estate; |
| MemoryContext oldcontext; |
| |
| /* sanity checks */ |
| Assert(queryDesc != NULL); |
| |
| estate = queryDesc->estate; |
| |
| Assert(estate != NULL); |
| |
| /* It's probably not sensible to rescan updating queries */ |
| Assert(queryDesc->operation == CMD_SELECT); |
| |
| /* |
| * Switch into per-query memory context |
| */ |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| |
| /* |
| * rescan plan |
| */ |
| ExecReScan(queryDesc->planstate); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| |
| /* |
| * ExecCheckRTPerms |
| * Check access permissions for all relations listed in a range table. |
| * |
| * Returns true if permissions are adequate. Otherwise, throws an appropriate |
| * error if ereport_on_violation is true, or simply returns false otherwise. |
| * |
| * Note that this does NOT address row-level security policies (aka: RLS). If |
| * rows will be returned to the user as a result of this permission check |
| * passing, then RLS also needs to be consulted (and check_enable_rls()). |
| * |
| * See rewrite/rowsecurity.c. |
| */ |
| bool |
| ExecCheckRTPerms(List *rangeTable, bool ereport_on_violation) |
| { |
| ListCell *l; |
| bool result = true; |
| |
| foreach(l, rangeTable) |
| { |
| RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); |
| |
| result = ExecCheckRTEPerms(rte); |
| if (!result) |
| { |
| Assert(rte->rtekind == RTE_RELATION); |
| if (ereport_on_violation) |
| aclcheck_error(ACLCHECK_NO_PRIV, get_relkind_objtype(get_rel_relkind(rte->relid)), |
| get_rel_name(rte->relid)); |
| return false; |
| } |
| } |
| |
| if (ExecutorCheckPerms_hook) |
| result = (*ExecutorCheckPerms_hook) (rangeTable, |
| ereport_on_violation); |
| return result; |
| } |
| |
| /* |
| * ExecCheckRTEPerms |
| * Check access permissions for a single RTE. |
| */ |
| bool |
| ExecCheckRTEPerms(RangeTblEntry *rte) |
| { |
| AclMode requiredPerms; |
| AclMode relPerms; |
| AclMode remainingPerms; |
| Oid relOid; |
| Oid userid; |
| |
| /* |
| * Only plain-relation RTEs need to be checked here. Function RTEs are |
| * checked when the function is prepared for execution. Join, subquery, |
| * and special RTEs need no checks. |
| */ |
| if (rte->rtekind != RTE_RELATION) |
| return true; |
| |
| /* |
| * No work if requiredPerms is empty. |
| */ |
| requiredPerms = rte->requiredPerms; |
| if (requiredPerms == 0) |
| return true; |
| |
| relOid = rte->relid; |
| |
| /* |
| * userid to check as: current user unless we have a setuid indication. |
| * |
| * Note: GetUserId() is presently fast enough that there's no harm in |
| * calling it separately for each RTE. If that stops being true, we could |
| * call it once in ExecCheckRTPerms and pass the userid down from there. |
| * But for now, no need for the extra clutter. |
| */ |
| userid = rte->checkAsUser ? rte->checkAsUser : GetUserId(); |
| |
| /* |
| * We must have *all* the requiredPerms bits, but some of the bits can be |
| * satisfied from column-level rather than relation-level permissions. |
| * First, remove any bits that are satisfied by relation permissions. |
| */ |
| relPerms = pg_class_aclmask(relOid, userid, requiredPerms, ACLMASK_ALL); |
| remainingPerms = requiredPerms & ~relPerms; |
| if (remainingPerms != 0) |
| { |
| int col = -1; |
| |
| /* |
| * If we lack any permissions that exist only as relation permissions, |
| * we can fail straight away. |
| */ |
| if (remainingPerms & ~(ACL_SELECT | ACL_INSERT | ACL_UPDATE)) |
| return false; |
| |
| /* |
| * Check to see if we have the needed privileges at column level. |
| * |
| * Note: failures just report a table-level error; it would be nicer |
| * to report a column-level error if we have some but not all of the |
| * column privileges. |
| */ |
| if (remainingPerms & ACL_SELECT) |
| { |
| /* |
| * When the query doesn't explicitly reference any columns (for |
| * example, SELECT COUNT(*) FROM table), allow the query if we |
| * have SELECT on any column of the rel, as per SQL spec. |
| */ |
| if (bms_is_empty(rte->selectedCols)) |
| { |
| if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT, |
| ACLMASK_ANY) != ACLCHECK_OK) |
| return false; |
| } |
| |
| while ((col = bms_next_member(rte->selectedCols, col)) >= 0) |
| { |
| /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */ |
| AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber; |
| |
| if (attno == InvalidAttrNumber) |
| { |
| /* Whole-row reference, must have priv on all cols */ |
| if (pg_attribute_aclcheck_all(relOid, userid, ACL_SELECT, |
| ACLMASK_ALL) != ACLCHECK_OK) |
| return false; |
| } |
| else |
| { |
| if (pg_attribute_aclcheck(relOid, attno, userid, |
| ACL_SELECT) != ACLCHECK_OK) |
| return false; |
| } |
| } |
| } |
| |
| /* |
| * Basically the same for the mod columns, for both INSERT and UPDATE |
| * privilege as specified by remainingPerms. |
| */ |
| if (remainingPerms & ACL_INSERT && !ExecCheckRTEPermsModified(relOid, |
| userid, |
| rte->insertedCols, |
| ACL_INSERT)) |
| return false; |
| |
| if (remainingPerms & ACL_UPDATE && !ExecCheckRTEPermsModified(relOid, |
| userid, |
| rte->updatedCols, |
| ACL_UPDATE)) |
| return false; |
| } |
| return true; |
| } |
| |
| /* |
| * ExecCheckRTEPermsModified |
| * Check INSERT or UPDATE access permissions for a single RTE (these |
| * are processed uniformly). |
| */ |
| static bool |
| ExecCheckRTEPermsModified(Oid relOid, Oid userid, Bitmapset *modifiedCols, |
| AclMode requiredPerms) |
| { |
| int col = -1; |
| |
| /* |
| * When the query doesn't explicitly update any columns, allow the query |
| * if we have permission on any column of the rel. This is to handle |
| * SELECT FOR UPDATE as well as possible corner cases in UPDATE. |
| */ |
| if (bms_is_empty(modifiedCols)) |
| { |
| if (pg_attribute_aclcheck_all(relOid, userid, requiredPerms, |
| ACLMASK_ANY) != ACLCHECK_OK) |
| return false; |
| } |
| |
| while ((col = bms_next_member(modifiedCols, col)) >= 0) |
| { |
| /* bit #s are offset by FirstLowInvalidHeapAttributeNumber */ |
| AttrNumber attno = col + FirstLowInvalidHeapAttributeNumber; |
| |
| if (attno == InvalidAttrNumber) |
| { |
| /* whole-row reference can't happen here */ |
| elog(ERROR, "whole-row update is not implemented"); |
| } |
| else |
| { |
| if (pg_attribute_aclcheck(relOid, attno, userid, |
| requiredPerms) != ACLCHECK_OK) |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /* |
| * Check that the query does not imply any writes to non-temp tables; |
| * unless we're in parallel mode, in which case don't even allow writes |
| * to temp tables. |
| * |
| * This function is used to check if the current statement will perform any writes. |
| * It is used to enforce: |
| * (1) read-only mode (both fts and transaction isolation level read only) |
| * as well as |
| * (2) to keep track of when a distributed transaction becomes |
| * "dirty" and will require 2pc. |
| * |
| * Note: in a Hot Standby this would need to reject writes to temp |
| * tables just as we do in parallel mode; but an HS standby can't have created |
| * any temp tables in the first place, so no need to check that. |
| * |
| * In GPDB, an important side-effect of this is to call |
| * ExecutorMarkTransactionDoesWrites(), if the query is not read-only. That |
| * ensures that we use two-phase commit for this transaction. |
| */ |
| static void |
| ExecCheckXactReadOnly(PlannedStmt *plannedstmt) |
| { |
| ListCell *l; |
| int rti; |
| |
| /* |
| * CREATE TABLE AS or SELECT INTO? |
| * |
| * XXX should we allow this if the destination is temp? Considering that |
| * it would still require catalog changes, probably not. |
| */ |
| if (plannedstmt->intoClause != NULL) |
| { |
| if ((plannedstmt->intoClause->rel && plannedstmt->intoClause->rel->relpersistence == RELPERSISTENCE_TEMP) |
| || (plannedstmt->intoClause->rel == NULL && plannedstmt->intoClause->ivm)) |
| ExecutorMarkTransactionDoesWrites(); |
| else |
| PreventCommandIfReadOnly(CreateCommandName((Node *) plannedstmt)); |
| } |
| |
| /* |
| * Refresh matview will write xlog. |
| */ |
| if (plannedstmt->refreshClause != NULL) |
| { |
| PreventCommandIfReadOnly(CreateCommandName((Node *) plannedstmt)); |
| } |
| |
| rti = 0; |
| /* |
| * Fail if write permissions are requested in parallel mode for table |
| * (temp or non-temp), otherwise fail for any non-temp table. |
| */ |
| foreach(l, plannedstmt->rtable) |
| { |
| RangeTblEntry *rte = (RangeTblEntry *) lfirst(l); |
| |
| rti++; |
| |
| if (rte->rtekind != RTE_RELATION) |
| continue; |
| |
| if ((rte->requiredPerms & (~ACL_SELECT)) == 0) |
| continue; |
| |
| /* |
| * External and foreign tables don't need two phase commit which is for |
| * local mpp tables |
| */ |
| if (get_rel_relkind(rte->relid) == RELKIND_FOREIGN_TABLE) |
| continue; |
| |
| if (isTempNamespace(get_rel_namespace(rte->relid))) |
| { |
| ExecutorMarkTransactionDoesWrites(); |
| continue; |
| } |
| |
| /* CDB: Allow SELECT FOR SHARE/UPDATE * |
| * |
| */ |
| if ((rte->requiredPerms & ~(ACL_SELECT | ACL_SELECT_FOR_UPDATE)) == 0) |
| { |
| ListCell *cell; |
| bool foundRTI = false; |
| |
| foreach(cell, plannedstmt->rowMarks) |
| { |
| RowMarkClause *rmc = lfirst(cell); |
| if( rmc->rti == rti ) |
| { |
| foundRTI = true; |
| break; |
| } |
| } |
| |
| if (foundRTI) |
| continue; |
| } |
| |
| PreventCommandIfReadOnly(CreateCommandName((Node *) plannedstmt)); |
| } |
| |
| if (plannedstmt->commandType != CMD_SELECT || plannedstmt->hasModifyingCTE) |
| PreventCommandIfParallelMode(CreateCommandName((Node *) plannedstmt)); |
| } |
| |
| |
| /* ---------------------------------------------------------------- |
| * InitPlan |
| * |
| * Initializes the query plan: open files, allocate storage |
| * and start up the rule manager |
| * ---------------------------------------------------------------- |
| */ |
| static void |
| InitPlan(QueryDesc *queryDesc, int eflags) |
| { |
| CmdType operation = queryDesc->operation; |
| PlannedStmt *plannedstmt = queryDesc->plannedstmt; |
| Plan *plan = plannedstmt->planTree; |
| List *rangeTable = plannedstmt->rtable; |
| EState *estate = queryDesc->estate; |
| PlanState *planstate; |
| TupleDesc tupType; |
| ListCell *l; |
| |
| Assert(plannedstmt->intoPolicy == NULL || |
| GpPolicyIsPartitioned(plannedstmt->intoPolicy) || |
| GpPolicyIsReplicated(plannedstmt->intoPolicy)); |
| |
| if (DEBUG1 >= log_min_messages) |
| { |
| char msec_str[32]; |
| switch (check_log_duration(msec_str, false)) |
| { |
| case 1: |
| case 2: |
| ereport(LOG, (errmsg("duration to InitPlan start: %s ms", msec_str))); |
| break; |
| default: |
| /* do nothing */ |
| break; |
| } |
| } |
| |
| /* |
| * Do permissions checks |
| */ |
| if (operation != CMD_SELECT || Gp_role != GP_ROLE_EXECUTE) |
| { |
| ExecCheckRTPerms(rangeTable, true); |
| } |
| |
| /* |
| * initialize the node's execution state |
| */ |
| ExecInitRangeTable(estate, rangeTable); |
| |
| estate->es_plannedstmt = plannedstmt; |
| |
| /* |
| * Next, build the ExecRowMark array from the PlanRowMark(s), if any. |
| */ |
| if (plannedstmt->rowMarks) |
| { |
| estate->es_rowmarks = (ExecRowMark **) |
| palloc0(estate->es_range_table_size * sizeof(ExecRowMark *)); |
| foreach(l, plannedstmt->rowMarks) |
| { |
| PlanRowMark *rc = (PlanRowMark *) lfirst(l); |
| Oid relid; |
| Relation relation; |
| ExecRowMark *erm; |
| |
| /* ignore "parent" rowmarks; they are irrelevant at runtime */ |
| if (rc->isParent) |
| continue; |
| |
| /* get relation's OID (will produce InvalidOid if subquery) */ |
| relid = exec_rt_fetch(rc->rti, estate)->relid; |
| |
| /* open relation, if we need to access it for this mark type */ |
| switch (rc->markType) |
| { |
| /* |
| * Cloudberry specific behavior: |
| * The implementation of select statement with locking clause |
| * (for update | no key update | share | key share) in postgres |
| * is to hold RowShareLock on tables during parsing stage, and |
| * generate a LockRows plan node for executor to lock the tuples. |
| * It is not easy to lock tuples in Apache Cloudberry, since |
| * tuples may be fetched through motion nodes. |
| * |
| * But when Global Deadlock Detector is enabled, and the select |
| * statement with locking clause contains only one table, we are |
| * sure that there are no motions. For such simple cases, we could |
| * make the behavior just the same as Postgres. |
| */ |
| case ROW_MARK_EXCLUSIVE: |
| case ROW_MARK_NOKEYEXCLUSIVE: |
| case ROW_MARK_SHARE: |
| case ROW_MARK_KEYSHARE: |
| case ROW_MARK_REFERENCE: |
| relation = ExecGetRangeTableRelation(estate, rc->rti); |
| break; |
| case ROW_MARK_COPY: |
| /* no physical table access is required */ |
| relation = NULL; |
| break; |
| default: |
| elog(ERROR, "unrecognized markType: %d", rc->markType); |
| relation = NULL; /* keep compiler quiet */ |
| break; |
| } |
| |
| /* Check that relation is a legal target for marking */ |
| if (relation) |
| CheckValidRowMarkRel(relation, rc->markType); |
| |
| erm = (ExecRowMark *) palloc(sizeof(ExecRowMark)); |
| erm->relation = relation; |
| erm->relid = relid; |
| erm->rti = rc->rti; |
| erm->prti = rc->prti; |
| erm->rowmarkId = rc->rowmarkId; |
| erm->markType = rc->markType; |
| erm->strength = rc->strength; |
| erm->waitPolicy = rc->waitPolicy; |
| erm->ermActive = false; |
| ItemPointerSetInvalid(&(erm->curCtid)); |
| erm->ermExtra = NULL; |
| |
| Assert(erm->rti > 0 && erm->rti <= estate->es_range_table_size && |
| estate->es_rowmarks[erm->rti - 1] == NULL); |
| |
| estate->es_rowmarks[erm->rti - 1] = erm; |
| } |
| } |
| |
| /* |
| * Initialize the executor's tuple table to empty. |
| */ |
| estate->es_tupleTable = NIL; |
| |
| /* signal that this EState is not used for EPQ */ |
| estate->es_epq_active = NULL; |
| |
| /* |
| * Initialize private state information for each SubPlan. We must do this |
| * before running ExecInitNode on the main query tree, since |
| * ExecInitSubPlan expects to be able to find these entries. |
| */ |
| Assert(estate->es_subplanstates == NIL); |
| Plan *start_plan_node = plannedstmt->planTree; |
| |
| estate->currentSliceId = 0; |
| |
| /* |
| * If eliminateAliens is true then we extract the local Motion node |
| * and subplans for our current slice. This enables us to call ExecInitNode |
| * for only a subset of the plan tree. |
| */ |
| if (estate->eliminateAliens) |
| { |
| Motion *m = findSenderMotion(plannedstmt, LocallyExecutingSliceIndex(estate)); |
| |
| /* |
| * We may not have any motion in the current slice, e.g., in insert query |
| * the root may not have any motion. |
| */ |
| if (NULL != m) |
| { |
| start_plan_node = (Plan *) m; |
| ExecSlice *sendSlice = &estate->es_sliceTable->slices[m->motionID]; |
| estate->currentSliceId = sendSlice->parentIndex; |
| estate->useMppParallelMode = sendSlice->useMppParallelMode; |
| /* |
| * CBDB_PARALLEL |
| * Remember: parallel_workers is set to no less than = 1 when gang is filled |
| * for convenience in Motion execution. |
| */ |
| TotalParallelWorkerNumberOfSlice = sendSlice->parallel_workers > 1 ? sendSlice->parallel_workers : 0; |
| } |
| /* Compute SubPlans' root plan nodes for SubPlans reachable from this plan root */ |
| estate->locallyExecutableSubplans = getLocallyExecutableSubplans(plannedstmt, start_plan_node); |
| } |
| else |
| estate->locallyExecutableSubplans = NULL; |
| |
| int subplan_id = 1; |
| foreach(l, plannedstmt->subplans) |
| { |
| PlanState *subplanstate = NULL; |
| int sp_eflags = 0; |
| |
| /* |
| * Initialize only the subplans that are reachable from our local slice. |
| * If alien elimination is not turned on, then all subplans are considered |
| * reachable. |
| */ |
| if (!estate->eliminateAliens || |
| bms_is_member(subplan_id, estate->locallyExecutableSubplans)) |
| { |
| /* |
| * A subplan will never need to do BACKWARD scan nor MARK/RESTORE. |
| * |
| * GPDB: We always set the REWIND flag, to delay eagerfree. |
| */ |
| sp_eflags = eflags |
| & (EXEC_FLAG_EXPLAIN_ONLY | EXEC_FLAG_WITH_NO_DATA); |
| sp_eflags |= EXEC_FLAG_REWIND; |
| |
| /* set our global sliceid variable for elog. */ |
| int save_currentSliceId = estate->currentSliceId; |
| /* CBDB_PARALLEL_FIXME: Is it necessary to save and recover this? */ |
| bool save_useMppParallelMode = estate->useMppParallelMode; |
| |
| estate->currentSliceId = estate->es_plannedstmt->subplan_sliceIds[subplan_id - 1]; |
| /* CBDB_PARALLEL_FIXME: test whether mpp parallel style exists for subplan case */ |
| estate->useMppParallelMode = false; |
| |
| /* CBDB_PARALLEL_FIXME: update TotalParallelWorkerNumberOfSlice for subplan, could it be possible? */ |
| Plan *subplan = (Plan *) lfirst(l); |
| subplanstate = ExecInitNode(subplan, estate, sp_eflags); |
| |
| estate->currentSliceId = save_currentSliceId; |
| estate->useMppParallelMode = save_useMppParallelMode; |
| } |
| |
| estate->es_subplanstates = lappend(estate->es_subplanstates, subplanstate); |
| |
| ++subplan_id; |
| } |
| |
| /* |
| * If this is a query that was dispatched from the QE, install precomputed |
| * parameter values from all init plans into our EState. |
| */ |
| if (Gp_role == GP_ROLE_EXECUTE && queryDesc->ddesc) |
| InstallDispatchedExecParams(queryDesc->ddesc, estate); |
| |
| /* |
| * Initialize the private state information for all the nodes in the query |
| * tree. This opens files, allocates storage and leaves us ready to start |
| * processing tuples. |
| */ |
| planstate = ExecInitNode(start_plan_node, estate, eflags); |
| |
| queryDesc->planstate = planstate; |
| |
| Assert(queryDesc->planstate); |
| |
| /* GPDB hook for collecting query info */ |
| if (query_info_collect_hook) |
| (*query_info_collect_hook)(METRICS_PLAN_NODE_INITIALIZE, queryDesc); |
| |
| if (RootSliceIndex(estate) != LocallyExecutingSliceIndex(estate)) |
| return; |
| |
| /* |
| * Get the tuple descriptor describing the type of tuples to return. |
| */ |
| tupType = ExecGetResultType(planstate); |
| |
| /* |
| * Initialize the junk filter if needed. SELECT queries need a filter if |
| * there are any junk attrs in the top-level tlist. |
| */ |
| if (operation == CMD_SELECT) |
| { |
| bool junk_filter_needed = false; |
| ListCell *tlist; |
| |
| foreach(tlist, plan->targetlist) |
| { |
| TargetEntry *tle = (TargetEntry *) lfirst(tlist); |
| |
| if (tle->resjunk) |
| { |
| junk_filter_needed = true; |
| break; |
| } |
| } |
| |
| if (junk_filter_needed) |
| { |
| JunkFilter *j; |
| TupleTableSlot *slot; |
| |
| slot = ExecInitExtraTupleSlot(estate, NULL, &TTSOpsVirtual); |
| j = ExecInitJunkFilter(planstate->plan->targetlist, |
| slot, |
| NULL); |
| estate->es_junkFilter = j; |
| |
| /* Want to return the cleaned tuple type */ |
| tupType = j->jf_cleanTupType; |
| } |
| } |
| |
| queryDesc->tupDesc = tupType; |
| |
| /* |
| * GPDB: Hack for CTAS/MatView: |
| * Need to switch to IntoRelDest for CTAS. |
| * Also need to create tables in advance. |
| */ |
| if (queryDesc->plannedstmt->intoClause != NULL) |
| { |
| if (queryDesc->plannedstmt->intoClause->rel) |
| intorel_initplan(queryDesc, eflags); |
| if (queryDesc->plannedstmt->intoClause->rel == NULL && |
| queryDesc->plannedstmt->intoClause->ivm && Gp_role == GP_ROLE_EXECUTE) |
| { |
| transientenr_init(queryDesc); |
| } |
| } |
| else if(queryDesc->plannedstmt->copyIntoClause != NULL) |
| { |
| queryDesc->dest = CreateCopyDestReceiver(); |
| ((DR_copy*)queryDesc->dest)->queryDesc = queryDesc; |
| } |
| else if (queryDesc->plannedstmt->refreshClause != NULL && Gp_role == GP_ROLE_EXECUTE) |
| transientrel_init(queryDesc); |
| if (DEBUG1 >= log_min_messages) |
| { |
| char msec_str[32]; |
| switch (check_log_duration(msec_str, false)) |
| { |
| case 1: |
| case 2: |
| ereport(LOG, (errmsg("duration to InitPlan end: %s ms", msec_str))); |
| break; |
| } |
| } |
| |
| SIMPLE_FAULT_INJECTOR("func_init_plan_end"); |
| } |
| |
| /* |
| * Check that a proposed result relation is a legal target for the operation |
| * |
| * Generally the parser and/or planner should have noticed any such mistake |
| * already, but let's make sure. |
| * |
| * Note: when changing this function, you probably also need to look at |
| * CheckValidRowMarkRel. |
| */ |
| void |
| CheckValidResultRel(ResultRelInfo *resultRelInfo, CmdType operation, ModifyTableState *mtstate) |
| { |
| Relation resultRel = resultRelInfo->ri_RelationDesc; |
| TriggerDesc *trigDesc = resultRel->trigdesc; |
| FdwRoutine *fdwroutine; |
| ModifyTable *node; |
| int whichrel; |
| List *updateColnos; |
| ListCell *lc; |
| |
| switch (resultRel->rd_rel->relkind) |
| { |
| case RELKIND_RELATION: |
| case RELKIND_PARTITIONED_TABLE: |
| CheckCmdReplicaIdentity(resultRel, operation); |
| break; |
| case RELKIND_SEQUENCE: |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change sequence \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case RELKIND_TOASTVALUE: |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change TOAST relation \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case RELKIND_VIEW: |
| |
| /* |
| * Okay only if there's a suitable INSTEAD OF trigger. Messages |
| * here should match rewriteHandler.c's rewriteTargetView and |
| * RewriteQuery, except that we omit errdetail because we haven't |
| * got the information handy (and given that we really shouldn't |
| * get here anyway, it's not worth great exertion to get). |
| */ |
| /* |
| * GPDB_91_MERGE_FIXME: In Cloudberry, views are treated as non |
| * partitioned relations, gp_distribution_policy contains no entry |
| * for views. Consequently, flow of a ModifyTable node for a view |
| * is determined such that it is not dispatched to segments. |
| * Things get confused if the DML statement has a where clause that |
| * results in a direct dispatch to one segment. Underlying scan |
| * nodes have direct dispatch set but when it's time to commit, the |
| * direct dispatch information is not passed on to the DTM and it |
| * sends PREPARE to all segments, causing "Distributed transaction |
| * ... not found" error. Until this is fixed, INSTEAD OF triggers |
| * and DML on views need to be disabled. |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_GP_FEATURE_NOT_YET), |
| errmsg("cannot change view \"%s\"", |
| RelationGetRelationName(resultRel)), |
| errhint("changing views is not supported in Cloudberry"))); |
| |
| switch (operation) |
| { |
| case CMD_INSERT: |
| if (!trigDesc || !trigDesc->trig_insert_instead_row) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("cannot insert into view \"%s\"", |
| RelationGetRelationName(resultRel)), |
| errhint("To enable inserting into the view, provide an INSTEAD OF INSERT trigger or an unconditional ON INSERT DO INSTEAD rule."))); |
| break; |
| case CMD_UPDATE: |
| if (!trigDesc || !trigDesc->trig_update_instead_row) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("cannot update view \"%s\"", |
| RelationGetRelationName(resultRel)), |
| errhint("To enable updating the view, provide an INSTEAD OF UPDATE trigger or an unconditional ON UPDATE DO INSTEAD rule."))); |
| break; |
| case CMD_DELETE: |
| if (!trigDesc || !trigDesc->trig_delete_instead_row) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("cannot delete from view \"%s\"", |
| RelationGetRelationName(resultRel)), |
| errhint("To enable deleting from the view, provide an INSTEAD OF DELETE trigger or an unconditional ON DELETE DO INSTEAD rule."))); |
| break; |
| default: |
| elog(ERROR, "unrecognized CmdType: %d", (int) operation); |
| break; |
| } |
| break; |
| case RELKIND_MATVIEW: |
| if (!MatViewIncrementalMaintenanceIsEnabled()) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change materialized view \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case RELKIND_FOREIGN_TABLE: |
| /* Okay only if the FDW supports it */ |
| fdwroutine = resultRelInfo->ri_FdwRoutine; |
| switch (operation) |
| { |
| case CMD_INSERT: |
| if (fdwroutine->ExecForeignInsert == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot insert into foreign table \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| if (fdwroutine->IsForeignRelUpdatable != NULL && |
| (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_INSERT)) == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("foreign table \"%s\" does not allow inserts", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case CMD_UPDATE: |
| if (fdwroutine->ExecForeignUpdate == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot update foreign table \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| if (fdwroutine->IsForeignRelUpdatable != NULL && |
| (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_UPDATE)) == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("foreign table \"%s\" does not allow updates", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case CMD_DELETE: |
| if (fdwroutine->ExecForeignDelete == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot delete from foreign table \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| if (fdwroutine->IsForeignRelUpdatable != NULL && |
| (fdwroutine->IsForeignRelUpdatable(resultRel) & (1 << CMD_DELETE)) == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), |
| errmsg("foreign table \"%s\" does not allow deletes", |
| RelationGetRelationName(resultRel)))); |
| break; |
| default: |
| elog(ERROR, "unrecognized CmdType: %d", (int) operation); |
| break; |
| } |
| break; |
| |
| /* GPDB additions */ |
| case RELKIND_AOSEGMENTS: |
| if (!allowSystemTableMods) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change AO segment listing relation \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case RELKIND_AOBLOCKDIR: |
| if (!allowSystemTableMods) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change AO block directory relation \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case RELKIND_AOVISIMAP: |
| if (!allowSystemTableMods) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change AO visibility map relation \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case RELKIND_DIRECTORY_TABLE: |
| switch(operation) |
| { |
| case CMD_INSERT: |
| case CMD_DELETE: |
| if (!allow_dml_directory_table) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change directory table \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| case CMD_UPDATE: |
| if (mtstate) |
| { |
| node = (ModifyTable *) mtstate->ps.plan; |
| whichrel = mtstate->mt_lastResultIndex; |
| |
| updateColnos = (List *) list_nth(node->updateColnosLists, whichrel); |
| |
| foreach(lc, updateColnos) |
| { |
| AttrNumber targetattnum = lfirst_int(lc); |
| |
| if (targetattnum != DIRECTORY_TABLE_TAG_COLUMN_ATTNUM && !allow_dml_directory_table) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("Only allow to update directory \"tag\" column."))); |
| } |
| } |
| break; |
| default: |
| elog(ERROR, "unrecognized CmdType: %d", (int) operation); |
| break; |
| } |
| break; |
| |
| default: |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot change relation \"%s\"", |
| RelationGetRelationName(resultRel)))); |
| break; |
| } |
| } |
| |
| /* |
| * Check that a proposed rowmark target relation is a legal target |
| * |
| * In most cases parser and/or planner should have noticed this already, but |
| * they don't cover all cases. |
| */ |
| static void |
| CheckValidRowMarkRel(Relation rel, RowMarkType markType) |
| { |
| FdwRoutine *fdwroutine; |
| |
| switch (rel->rd_rel->relkind) |
| { |
| case RELKIND_RELATION: |
| case RELKIND_PARTITIONED_TABLE: |
| /* OK */ |
| break; |
| case RELKIND_SEQUENCE: |
| /* Must disallow this because we don't vacuum sequences */ |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot lock rows in sequence \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| case RELKIND_TOASTVALUE: |
| /* We could allow this, but there seems no good reason to */ |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot lock rows in TOAST relation \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| case RELKIND_VIEW: |
| /* Should not get here; planner should have expanded the view */ |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot lock rows in view \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| case RELKIND_MATVIEW: |
| /* Allow referencing a matview, but not actual locking clauses */ |
| if (markType != ROW_MARK_REFERENCE) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot lock rows in materialized view \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| case RELKIND_FOREIGN_TABLE: |
| /* Okay only if the FDW supports it */ |
| fdwroutine = GetFdwRoutineForRelation(rel, false); |
| if (fdwroutine->RefetchForeignRow == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot lock rows in foreign table \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| case RELKIND_DIRECTORY_TABLE: |
| /* Allow referencing a directory table, but not actual locking clauses */ |
| if (markType != ROW_MARK_REFERENCE) |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot lock rows in directory table \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| default: |
| ereport(ERROR, |
| (errcode(ERRCODE_WRONG_OBJECT_TYPE), |
| errmsg("cannot lock rows in relation \"%s\"", |
| RelationGetRelationName(rel)))); |
| break; |
| } |
| } |
| |
| /* |
| * Initialize ResultRelInfo data for one result relation |
| * |
| * Caution: before Postgres 9.1, this function included the relkind checking |
| * that's now in CheckValidResultRel, and it also did ExecOpenIndices if |
| * appropriate. Be sure callers cover those needs. |
| */ |
| void |
| InitResultRelInfo(ResultRelInfo *resultRelInfo, |
| Relation resultRelationDesc, |
| Index resultRelationIndex, |
| ResultRelInfo *partition_root_rri, |
| int instrument_options) |
| { |
| MemSet(resultRelInfo, 0, sizeof(ResultRelInfo)); |
| resultRelInfo->type = T_ResultRelInfo; |
| resultRelInfo->ri_RangeTableIndex = resultRelationIndex; |
| resultRelInfo->ri_RelationDesc = resultRelationDesc; |
| resultRelInfo->ri_NumIndices = 0; |
| resultRelInfo->ri_IndexRelationDescs = NULL; |
| resultRelInfo->ri_IndexRelationInfo = NULL; |
| /* make a copy so as not to depend on relcache info not changing... */ |
| resultRelInfo->ri_TrigDesc = CopyTriggerDesc(resultRelationDesc->trigdesc); |
| if (resultRelInfo->ri_TrigDesc) |
| { |
| int n = resultRelInfo->ri_TrigDesc->numtriggers; |
| |
| resultRelInfo->ri_TrigFunctions = (FmgrInfo *) |
| palloc0(n * sizeof(FmgrInfo)); |
| resultRelInfo->ri_TrigWhenExprs = (ExprState **) |
| palloc0(n * sizeof(ExprState *)); |
| if (instrument_options) |
| resultRelInfo->ri_TrigInstrument = InstrAlloc(n, instrument_options, false); |
| } |
| else |
| { |
| resultRelInfo->ri_TrigFunctions = NULL; |
| resultRelInfo->ri_TrigWhenExprs = NULL; |
| resultRelInfo->ri_TrigInstrument = NULL; |
| } |
| if (resultRelationDesc->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
| resultRelInfo->ri_FdwRoutine = GetFdwRoutineForRelation(resultRelationDesc, true); |
| else |
| resultRelInfo->ri_FdwRoutine = NULL; |
| |
| /* The following fields are set later if needed */ |
| resultRelInfo->ri_RowIdAttNo = 0; |
| resultRelInfo->ri_projectNew = NULL; |
| resultRelInfo->ri_newTupleSlot = NULL; |
| resultRelInfo->ri_oldTupleSlot = NULL; |
| resultRelInfo->ri_projectNewInfoValid = false; |
| resultRelInfo->ri_FdwState = NULL; |
| resultRelInfo->ri_usesFdwDirectModify = false; |
| resultRelInfo->ri_ConstraintExprs = NULL; |
| resultRelInfo->ri_GeneratedExprs = NULL; |
| resultRelInfo->ri_projectReturning = NULL; |
| resultRelInfo->ri_onConflictArbiterIndexes = NIL; |
| resultRelInfo->ri_onConflict = NULL; |
| resultRelInfo->ri_ReturningSlot = NULL; |
| resultRelInfo->ri_TrigOldSlot = NULL; |
| resultRelInfo->ri_TrigNewSlot = NULL; |
| |
| /* |
| * Only ExecInitPartitionInfo() and ExecInitPartitionDispatchInfo() pass |
| * non-NULL partition_root_rri. For child relations that are part of the |
| * initial query rather than being dynamically added by tuple routing, |
| * this field is filled in ExecInitModifyTable(). |
| */ |
| resultRelInfo->ri_RootResultRelInfo = partition_root_rri; |
| resultRelInfo->ri_RootToPartitionMap = NULL; /* set by |
| * ExecInitRoutingInfo */ |
| resultRelInfo->ri_PartitionTupleSlot = NULL; /* ditto */ |
| resultRelInfo->ri_ChildToRootMap = NULL; |
| resultRelInfo->ri_ChildToRootMapValid = false; |
| resultRelInfo->ri_CopyMultiInsertBuffer = NULL; |
| } |
| |
| /* |
| * ExecGetTriggerResultRel |
| * Get a ResultRelInfo for a trigger target relation. |
| * |
| * Most of the time, triggers are fired on one of the result relations of the |
| * query, and so we can just return a member of the es_result_relations array, |
| * or the es_tuple_routing_result_relations list (if any). (Note: in self-join |
| * situations there might be multiple members with the same OID; if so it |
| * doesn't matter which one we pick.) |
| * |
| * However, it is sometimes necessary to fire triggers on other relations; |
| * this happens mainly when an RI update trigger queues additional triggers |
| * on other relations, which will be processed in the context of the outer |
| * query. For efficiency's sake, we want to have a ResultRelInfo for those |
| * triggers too; that can avoid repeated re-opening of the relation. (It |
| * also provides a way for EXPLAIN ANALYZE to report the runtimes of such |
| * triggers.) So we make additional ResultRelInfo's as needed, and save them |
| * in es_trig_target_relations. |
| */ |
| ResultRelInfo * |
| ExecGetTriggerResultRel(EState *estate, Oid relid) |
| { |
| ResultRelInfo *rInfo; |
| ListCell *l; |
| Relation rel; |
| MemoryContext oldcontext; |
| |
| /* Search through the query result relations */ |
| foreach(l, estate->es_opened_result_relations) |
| { |
| rInfo = lfirst(l); |
| if (RelationGetRelid(rInfo->ri_RelationDesc) == relid) |
| return rInfo; |
| } |
| |
| /* |
| * Search through the result relations that were created during tuple |
| * routing, if any. |
| */ |
| foreach(l, estate->es_tuple_routing_result_relations) |
| { |
| rInfo = (ResultRelInfo *) lfirst(l); |
| if (RelationGetRelid(rInfo->ri_RelationDesc) == relid) |
| return rInfo; |
| } |
| |
| /* Nope, but maybe we already made an extra ResultRelInfo for it */ |
| foreach(l, estate->es_trig_target_relations) |
| { |
| rInfo = (ResultRelInfo *) lfirst(l); |
| if (RelationGetRelid(rInfo->ri_RelationDesc) == relid) |
| return rInfo; |
| } |
| /* Nope, so we need a new one */ |
| |
| /* |
| * Open the target relation's relcache entry. We assume that an |
| * appropriate lock is still held by the backend from whenever the trigger |
| * event got queued, so we need take no new lock here. Also, we need not |
| * recheck the relkind, so no need for CheckValidResultRel. |
| */ |
| rel = table_open(relid, NoLock); |
| |
| /* |
| * Make the new entry in the right context. |
| */ |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| rInfo = makeNode(ResultRelInfo); |
| InitResultRelInfo(rInfo, |
| rel, |
| 0, /* dummy rangetable index */ |
| NULL, |
| estate->es_instrument); |
| estate->es_trig_target_relations = |
| lappend(estate->es_trig_target_relations, rInfo); |
| MemoryContextSwitchTo(oldcontext); |
| |
| /* |
| * Currently, we don't need any index information in ResultRelInfos used |
| * only for triggers, so no need to call ExecOpenIndices. |
| */ |
| |
| return rInfo; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecPostprocessPlan |
| * |
| * Give plan nodes a final chance to execute before shutdown |
| * ---------------------------------------------------------------- |
| */ |
| static void |
| ExecPostprocessPlan(EState *estate) |
| { |
| ListCell *lc; |
| |
| /* |
| * Make sure nodes run forward. |
| */ |
| estate->es_direction = ForwardScanDirection; |
| |
| if (Gp_role == GP_ROLE_DISPATCH) |
| { |
| /* Fire after triggers. */ |
| foreach(lc, estate->es_auxmodifytables) |
| { |
| PlanState *ps = (PlanState *) lfirst(lc); |
| ModifyTableState *node = castNode(ModifyTableState, ps); |
| |
| fireASTriggers(node); |
| } |
| return; |
| } |
| /* |
| * Run any secondary ModifyTable nodes to completion, in case the main |
| * query did not fetch all rows from them. (We do this to ensure that |
| * such nodes have predictable results.) |
| */ |
| foreach(lc, estate->es_auxmodifytables) |
| { |
| PlanState *ps = (PlanState *) lfirst(lc); |
| |
| for (;;) |
| { |
| TupleTableSlot *slot; |
| |
| /* Reset the per-output-tuple exprcontext each time */ |
| ResetPerTupleExprContext(estate); |
| |
| slot = ExecProcNode(ps); |
| |
| if (TupIsNull(slot)) |
| break; |
| } |
| } |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecEndPlan |
| * |
| * Cleans up the query plan -- closes files and frees up storage |
| * |
| * NOTE: we are no longer very worried about freeing storage per se |
| * in this code; FreeExecutorState should be guaranteed to release all |
| * memory that needs to be released. What we are worried about doing |
| * is closing relations and dropping buffer pins. Thus, for example, |
| * tuple tables must be cleared or dropped to ensure pins are released. |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecEndPlan(PlanState *planstate, EState *estate) |
| { |
| ListCell *l; |
| |
| /* |
| * shut down the node-type-specific query processing |
| */ |
| ExecEndNode(planstate); |
| |
| /* |
| * for subplans too |
| */ |
| foreach(l, estate->es_subplanstates) |
| { |
| PlanState *subplanstate = (PlanState *) lfirst(l); |
| |
| ExecEndNode(subplanstate); |
| } |
| |
| /* |
| * destroy the executor's tuple table. Actually we only care about |
| * releasing buffer pins and tupdesc refcounts; there's no need to pfree |
| * the TupleTableSlots, since the containing memory context is about to go |
| * away anyway. |
| */ |
| ExecResetTupleTable(estate->es_tupleTable, false); |
| |
| /* Adjust INSERT/UPDATE/DELETE count for replicated table ON QD */ |
| AdjustReplicatedTableCounts(estate); |
| |
| /* |
| * Close any Relations that have been opened for range table entries or |
| * result relations. |
| */ |
| ExecCloseResultRelations(estate); |
| ExecCloseRangeTableRelations(estate); |
| } |
| |
| /* |
| * Close any relations that have been opened for ResultRelInfos. |
| */ |
| void |
| ExecCloseResultRelations(EState *estate) |
| { |
| ListCell *l; |
| |
| /* |
| * close indexes of result relation(s) if any. (Rels themselves are |
| * closed in ExecCloseRangeTableRelations()) |
| */ |
| foreach(l, estate->es_opened_result_relations) |
| { |
| ResultRelInfo *resultRelInfo = lfirst(l); |
| |
| ExecCloseIndices(resultRelInfo); |
| } |
| |
| /* Close any relations that have been opened by ExecGetTriggerResultRel(). */ |
| foreach(l, estate->es_trig_target_relations) |
| { |
| ResultRelInfo *resultRelInfo = (ResultRelInfo *) lfirst(l); |
| |
| /* |
| * Assert this is a "dummy" ResultRelInfo, see above. Otherwise we |
| * might be issuing a duplicate close against a Relation opened by |
| * ExecGetRangeTableRelation. |
| */ |
| Assert(resultRelInfo->ri_RangeTableIndex == 0); |
| |
| /* |
| * Since ExecGetTriggerResultRel doesn't call ExecOpenIndices for |
| * these rels, we needn't call ExecCloseIndices either. |
| */ |
| Assert(resultRelInfo->ri_NumIndices == 0); |
| |
| table_close(resultRelInfo->ri_RelationDesc, NoLock); |
| } |
| } |
| |
| /* |
| * Close all relations opened by ExecGetRangeTableRelation(). |
| * |
| * We do not release any locks we might hold on those rels. |
| */ |
| void |
| ExecCloseRangeTableRelations(EState *estate) |
| { |
| int i; |
| |
| for (i = 0; i < estate->es_range_table_size; i++) |
| { |
| if (estate->es_relations[i]) |
| table_close(estate->es_relations[i], NoLock); |
| } |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecutePlan |
| * |
| * Processes the query plan until we have retrieved 'numberTuples' tuples, |
| * moving in the specified direction. |
| * |
| * Runs to completion if numberTuples is 0 |
| * |
| * Note: the ctid attribute is a 'junk' attribute that is removed before the |
| * user can see it |
| * ---------------------------------------------------------------- |
| */ |
| static void |
| ExecutePlan(EState *estate, |
| PlanState *planstate, |
| bool use_parallel_mode, |
| CmdType operation, |
| bool sendTuples, |
| uint64 numberTuples, |
| ScanDirection direction, |
| DestReceiver *dest, |
| bool execute_once) |
| { |
| TupleTableSlot *slot; |
| uint64 current_tuple_count; |
| |
| /* |
| * For holdable cursor, the plan is executed without rewinding on gpdb. We |
| * need to quit if the executor has already emitted all tuples. |
| */ |
| if (estate->es_got_eos) |
| return; |
| |
| /* |
| * initialize local variables |
| */ |
| current_tuple_count = 0; |
| |
| /* |
| * Set the direction. |
| */ |
| estate->es_direction = direction; |
| |
| /* |
| * If the plan might potentially be executed multiple times, we must force |
| * it to run without parallelism, because we might exit early. |
| */ |
| if (!execute_once || GP_ROLE_DISPATCH == Gp_role) |
| use_parallel_mode = false; |
| |
| estate->es_use_parallel_mode = use_parallel_mode; |
| if (use_parallel_mode) |
| EnterParallelMode(); |
| |
| /* |
| * CBDB style parallelism won't interfere PG style parallel mechanism. |
| * So that we will pass if use_parallel_mode is true which means there exists |
| * Gather/GatherMerge node. |
| */ |
| if (estate->useMppParallelMode) |
| GpInsertParallelDSMHash(planstate); |
| |
| #ifdef FAULT_INJECTOR |
| /* Inject a fault before tuple processing started */ |
| SIMPLE_FAULT_INJECTOR("executor_pre_tuple_processed"); |
| #endif /* FAULT_INJECTOR */ |
| |
| /* |
| * Loop until we've processed the proper number of tuples from the plan. |
| */ |
| for (;;) |
| { |
| /* Reset the per-output-tuple exprcontext */ |
| ResetPerTupleExprContext(estate); |
| |
| /* |
| * Execute the plan and obtain a tuple |
| */ |
| slot = ExecProcNode(planstate); |
| |
| /* |
| * if the tuple is null, then we assume there is nothing more to |
| * process so we just end the loop... |
| */ |
| if (TupIsNull(slot)) |
| { |
| /* |
| * We got end-of-stream. We need to mark it since with a cursor |
| * end-of-stream will only be received with the fetch that |
| * returns the last tuple. ExecutorEnd needs to know if EOS was |
| * received in order to do the right cleanup. |
| */ |
| estate->es_got_eos = true; |
| /* Allow nodes to release or shut down resources. */ |
| (void) ExecShutdownNode(planstate); |
| break; |
| } |
| |
| /* |
| * If we have a junk filter, then project a new tuple with the junk |
| * removed. |
| * |
| * Store this new "clean" tuple in the junkfilter's resultSlot. |
| * (Formerly, we stored it back over the "dirty" tuple, which is WRONG |
| * because that tuple slot has the wrong descriptor.) |
| */ |
| if (estate->es_junkFilter != NULL) |
| slot = ExecFilterJunk(estate->es_junkFilter, slot); |
| |
| if (operation != CMD_SELECT && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer) |
| { |
| elog(ERROR, "INSERT/UPDATE/DELETE must be executed by a writer segworker group"); |
| } |
| |
| /* |
| * If we are supposed to send the tuple somewhere, do so. (In |
| * practice, this is probably always the case at this point.) |
| */ |
| if (sendTuples) |
| { |
| /* |
| * If we are not able to send the tuple, we assume the destination |
| * has closed and no more tuples can be sent. If that's the case, |
| * end the loop. |
| */ |
| if (!dest->receiveSlot(slot, dest)) |
| break; |
| } |
| |
| /* |
| * Count tuples processed, if this is a SELECT. (For other operation |
| * types, the ModifyTable plan node must count the appropriate |
| * events.) |
| */ |
| if (operation == CMD_SELECT) |
| { |
| (estate->es_processed)++; |
| |
| #ifdef FAULT_INJECTOR |
| /* |
| * bump es_processed using the fault injector, but only if the number rows is in a certain range |
| * this avoids bumping the counter every time after we bumped it once |
| */ |
| if (estate->es_processed >= 10000 && estate->es_processed <= 1000000) |
| { |
| if (FaultInjector_InjectFaultIfSet("executor_run_high_processed", |
| DDLNotSpecified, |
| "" /* databaseName */, |
| "" /* tableName */) == FaultInjectorTypeSkip) |
| { |
| /* |
| * For testing purposes, pretend that we have already processed |
| * almost 2^32 rows. |
| */ |
| estate->es_processed = UINT_MAX - 10; |
| } |
| } |
| #endif /* FAULT_INJECTOR */ |
| } |
| |
| /* |
| * check our tuple count.. if we've processed the proper number then |
| * quit, else loop again and process more tuples. Zero numberTuples |
| * means no limit. |
| */ |
| current_tuple_count++; |
| if (numberTuples && numberTuples == current_tuple_count) |
| break; |
| } |
| |
| /* |
| * If we know we won't need to back up, we can release resources at this |
| * point. |
| */ |
| if (!(estate->es_top_eflags & EXEC_FLAG_BACKWARD)) |
| (void) ExecShutdownNode(planstate); |
| |
| if (estate->useMppParallelMode) |
| GpDestroyParallelDSMEntry(); |
| |
| if (use_parallel_mode) |
| ExitParallelMode(); |
| } |
| |
| |
| /* |
| * ExecRelCheck --- check that tuple meets constraints for result relation |
| * |
| * Returns NULL if OK, else name of failed check constraint |
| */ |
| static const char * |
| ExecRelCheck(ResultRelInfo *resultRelInfo, |
| TupleTableSlot *slot, EState *estate) |
| { |
| Relation rel = resultRelInfo->ri_RelationDesc; |
| int ncheck = rel->rd_att->constr->num_check; |
| ConstrCheck *check = rel->rd_att->constr->check; |
| ExprContext *econtext; |
| MemoryContext oldContext; |
| int i; |
| |
| /* |
| * CheckConstraintFetch let this pass with only a warning, but now we |
| * should fail rather than possibly failing to enforce an important |
| * constraint. |
| */ |
| if (ncheck != rel->rd_rel->relchecks) |
| elog(ERROR, "%d pg_constraint record(s) missing for relation \"%s\"", |
| rel->rd_rel->relchecks - ncheck, RelationGetRelationName(rel)); |
| |
| /* |
| * If first time through for this result relation, build expression |
| * nodetrees for rel's constraint expressions. Keep them in the per-query |
| * memory context so they'll survive throughout the query. |
| */ |
| if (resultRelInfo->ri_ConstraintExprs == NULL) |
| { |
| oldContext = MemoryContextSwitchTo(estate->es_query_cxt); |
| resultRelInfo->ri_ConstraintExprs = |
| (ExprState **) palloc(ncheck * sizeof(ExprState *)); |
| for (i = 0; i < ncheck; i++) |
| { |
| Expr *checkconstr; |
| |
| checkconstr = stringToNode(check[i].ccbin); |
| resultRelInfo->ri_ConstraintExprs[i] = |
| ExecPrepareExpr(checkconstr, estate); |
| } |
| MemoryContextSwitchTo(oldContext); |
| } |
| |
| /* |
| * We will use the EState's per-tuple context for evaluating constraint |
| * expressions (creating it if it's not already there). |
| */ |
| econtext = GetPerTupleExprContext(estate); |
| |
| /* Arrange for econtext's scan tuple to be the tuple under test */ |
| econtext->ecxt_scantuple = slot; |
| |
| /* And evaluate the constraints */ |
| for (i = 0; i < ncheck; i++) |
| { |
| ExprState *checkconstr = resultRelInfo->ri_ConstraintExprs[i]; |
| |
| /* |
| * NOTE: SQL specifies that a NULL result from a constraint expression |
| * is not to be treated as a failure. Therefore, use ExecCheck not |
| * ExecQual. |
| */ |
| if (!ExecCheck(checkconstr, econtext)) |
| return check[i].ccname; |
| } |
| |
| /* NULL result means no error */ |
| return NULL; |
| } |
| |
| /* |
| * ExecPartitionCheck --- check that tuple meets the partition constraint. |
| * |
| * Returns true if it meets the partition constraint. If the constraint |
| * fails and we're asked to emit an error, do so and don't return; otherwise |
| * return false. |
| */ |
| bool |
| ExecPartitionCheck(ResultRelInfo *resultRelInfo, TupleTableSlot *slot, |
| EState *estate, bool emitError) |
| { |
| ExprContext *econtext; |
| bool success; |
| |
| /* |
| * If first time through, build expression state tree for the partition |
| * check expression. (In the corner case where the partition check |
| * expression is empty, ie there's a default partition and nothing else, |
| * we'll be fooled into executing this code each time through. But it's |
| * pretty darn cheap in that case, so we don't worry about it.) |
| */ |
| if (resultRelInfo->ri_PartitionCheckExpr == NULL) |
| { |
| /* |
| * Ensure that the qual tree and prepared expression are in the |
| * query-lifespan context. |
| */ |
| MemoryContext oldcxt = MemoryContextSwitchTo(estate->es_query_cxt); |
| List *qual = RelationGetPartitionQual(resultRelInfo->ri_RelationDesc); |
| |
| resultRelInfo->ri_PartitionCheckExpr = ExecPrepareCheck(qual, estate); |
| MemoryContextSwitchTo(oldcxt); |
| } |
| |
| /* |
| * We will use the EState's per-tuple context for evaluating constraint |
| * expressions (creating it if it's not already there). |
| */ |
| econtext = GetPerTupleExprContext(estate); |
| |
| /* Arrange for econtext's scan tuple to be the tuple under test */ |
| econtext->ecxt_scantuple = slot; |
| |
| /* |
| * As in case of the catalogued constraints, we treat a NULL result as |
| * success here, not a failure. |
| */ |
| success = ExecCheck(resultRelInfo->ri_PartitionCheckExpr, econtext); |
| |
| /* if asked to emit error, don't actually return on failure */ |
| if (!success && emitError) |
| ExecPartitionCheckEmitError(resultRelInfo, slot, estate); |
| |
| return success; |
| } |
| |
| /* |
| * ExecPartitionCheckEmitError - Form and emit an error message after a failed |
| * partition constraint check. |
| */ |
| void |
| ExecPartitionCheckEmitError(ResultRelInfo *resultRelInfo, |
| TupleTableSlot *slot, |
| EState *estate) |
| { |
| Oid root_relid; |
| TupleDesc tupdesc; |
| char *val_desc; |
| Bitmapset *modifiedCols; |
| |
| /* |
| * If the tuple has been routed, it's been converted to the partition's |
| * rowtype, which might differ from the root table's. We must convert it |
| * back to the root table's rowtype so that val_desc in the error message |
| * matches the input tuple. |
| */ |
| if (resultRelInfo->ri_RootResultRelInfo) |
| { |
| ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo; |
| TupleDesc old_tupdesc; |
| AttrMap *map; |
| |
| root_relid = RelationGetRelid(rootrel->ri_RelationDesc); |
| tupdesc = RelationGetDescr(rootrel->ri_RelationDesc); |
| |
| old_tupdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc); |
| /* a reverse map */ |
| map = build_attrmap_by_name_if_req(old_tupdesc, tupdesc); |
| |
| /* |
| * Partition-specific slot's tupdesc can't be changed, so allocate a |
| * new one. |
| */ |
| if (map != NULL) |
| slot = execute_attr_map_slot(map, slot, |
| MakeTupleTableSlot(tupdesc, &TTSOpsVirtual)); |
| modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate), |
| ExecGetUpdatedCols(rootrel, estate)); |
| } |
| else |
| { |
| root_relid = RelationGetRelid(resultRelInfo->ri_RelationDesc); |
| tupdesc = RelationGetDescr(resultRelInfo->ri_RelationDesc); |
| modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate), |
| ExecGetUpdatedCols(resultRelInfo, estate)); |
| } |
| |
| val_desc = ExecBuildSlotValueDescription(root_relid, |
| slot, |
| tupdesc, |
| modifiedCols, |
| 64); |
| ereport(ERROR, |
| (errcode(ERRCODE_CHECK_VIOLATION), |
| errmsg("new row for relation \"%s\" violates partition constraint", |
| RelationGetRelationName(resultRelInfo->ri_RelationDesc)), |
| val_desc ? errdetail("Failing row contains %s.", val_desc) : 0, |
| errtable(resultRelInfo->ri_RelationDesc))); |
| } |
| |
| /* |
| * ExecConstraints - check constraints of the tuple in 'slot' |
| * |
| * This checks the traditional NOT NULL and check constraints. |
| * |
| * The partition constraint is *NOT* checked. |
| * |
| * Note: 'slot' contains the tuple to check the constraints of, which may |
| * have been converted from the original input tuple after tuple routing. |
| * 'resultRelInfo' is the final result relation, after tuple routing. |
| */ |
| void |
| ExecConstraints(ResultRelInfo *resultRelInfo, |
| TupleTableSlot *slot, EState *estate) |
| { |
| Relation rel = resultRelInfo->ri_RelationDesc; |
| TupleDesc tupdesc = RelationGetDescr(rel); |
| TupleConstr *constr = tupdesc->constr; |
| Bitmapset *modifiedCols; |
| |
| Assert(constr); /* we should not be called otherwise */ |
| |
| if (constr->has_not_null) |
| { |
| int natts = tupdesc->natts; |
| int attrChk; |
| |
| for (attrChk = 1; attrChk <= natts; attrChk++) |
| { |
| Form_pg_attribute att = TupleDescAttr(tupdesc, attrChk - 1); |
| |
| if (att->attnotnull && slot_attisnull(slot, attrChk)) |
| { |
| char *val_desc; |
| Relation orig_rel = rel; |
| TupleDesc orig_tupdesc = RelationGetDescr(rel); |
| |
| /* |
| * If the tuple has been routed, it's been converted to the |
| * partition's rowtype, which might differ from the root |
| * table's. We must convert it back to the root table's |
| * rowtype so that val_desc shown error message matches the |
| * input tuple. |
| */ |
| if (resultRelInfo->ri_RootResultRelInfo) |
| { |
| ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo; |
| AttrMap *map; |
| |
| tupdesc = RelationGetDescr(rootrel->ri_RelationDesc); |
| /* a reverse map */ |
| map = build_attrmap_by_name_if_req(orig_tupdesc, |
| tupdesc); |
| |
| /* |
| * Partition-specific slot's tupdesc can't be changed, so |
| * allocate a new one. |
| */ |
| if (map != NULL) |
| slot = execute_attr_map_slot(map, slot, |
| MakeTupleTableSlot(tupdesc, &TTSOpsVirtual)); |
| modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate), |
| ExecGetUpdatedCols(rootrel, estate)); |
| rel = rootrel->ri_RelationDesc; |
| } |
| else |
| modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate), |
| ExecGetUpdatedCols(resultRelInfo, estate)); |
| val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), |
| slot, |
| tupdesc, |
| modifiedCols, |
| 64); |
| |
| ereport(ERROR, |
| (errcode(ERRCODE_NOT_NULL_VIOLATION), |
| errmsg("null value in column \"%s\" of relation \"%s\" violates not-null constraint", |
| NameStr(att->attname), |
| RelationGetRelationName(orig_rel)), |
| val_desc ? errdetail("Failing row contains %s.", val_desc) : 0, |
| errtablecol(orig_rel, attrChk))); |
| } |
| } |
| } |
| |
| if (rel->rd_rel->relchecks > 0) |
| { |
| const char *failed; |
| |
| if ((failed = ExecRelCheck(resultRelInfo, slot, estate)) != NULL) |
| { |
| char *val_desc; |
| Relation orig_rel = rel; |
| |
| /* See the comment above. */ |
| if (resultRelInfo->ri_RootResultRelInfo) |
| { |
| ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo; |
| TupleDesc old_tupdesc = RelationGetDescr(rel); |
| AttrMap *map; |
| |
| tupdesc = RelationGetDescr(rootrel->ri_RelationDesc); |
| /* a reverse map */ |
| map = build_attrmap_by_name_if_req(old_tupdesc, |
| tupdesc); |
| |
| /* |
| * Partition-specific slot's tupdesc can't be changed, so |
| * allocate a new one. |
| */ |
| if (map != NULL) |
| slot = execute_attr_map_slot(map, slot, |
| MakeTupleTableSlot(tupdesc, &TTSOpsVirtual)); |
| modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate), |
| ExecGetUpdatedCols(rootrel, estate)); |
| rel = rootrel->ri_RelationDesc; |
| } |
| else |
| modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate), |
| ExecGetUpdatedCols(resultRelInfo, estate)); |
| val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), |
| slot, |
| tupdesc, |
| modifiedCols, |
| 64); |
| ereport(ERROR, |
| (errcode(ERRCODE_CHECK_VIOLATION), |
| errmsg("new row for relation \"%s\" violates check constraint \"%s\"", |
| RelationGetRelationName(orig_rel), failed), |
| val_desc ? errdetail("Failing row contains %s.", val_desc) : 0, |
| errtableconstraint(orig_rel, failed))); |
| } |
| } |
| } |
| |
| /* |
| * ExecWithCheckOptions -- check that tuple satisfies any WITH CHECK OPTIONs |
| * of the specified kind. |
| * |
| * Note that this needs to be called multiple times to ensure that all kinds of |
| * WITH CHECK OPTIONs are handled (both those from views which have the WITH |
| * CHECK OPTION set and from row-level security policies). See ExecInsert() |
| * and ExecUpdate(). |
| */ |
| void |
| ExecWithCheckOptions(WCOKind kind, ResultRelInfo *resultRelInfo, |
| TupleTableSlot *slot, EState *estate) |
| { |
| Relation rel = resultRelInfo->ri_RelationDesc; |
| TupleDesc tupdesc = RelationGetDescr(rel); |
| ExprContext *econtext; |
| ListCell *l1, |
| *l2; |
| |
| /* |
| * We will use the EState's per-tuple context for evaluating constraint |
| * expressions (creating it if it's not already there). |
| */ |
| econtext = GetPerTupleExprContext(estate); |
| |
| /* Arrange for econtext's scan tuple to be the tuple under test */ |
| econtext->ecxt_scantuple = slot; |
| |
| /* Check each of the constraints */ |
| forboth(l1, resultRelInfo->ri_WithCheckOptions, |
| l2, resultRelInfo->ri_WithCheckOptionExprs) |
| { |
| WithCheckOption *wco = (WithCheckOption *) lfirst(l1); |
| ExprState *wcoExpr = (ExprState *) lfirst(l2); |
| |
| /* |
| * Skip any WCOs which are not the kind we are looking for at this |
| * time. |
| */ |
| if (wco->kind != kind) |
| continue; |
| |
| /* |
| * WITH CHECK OPTION checks are intended to ensure that the new tuple |
| * is visible (in the case of a view) or that it passes the |
| * 'with-check' policy (in the case of row security). If the qual |
| * evaluates to NULL or FALSE, then the new tuple won't be included in |
| * the view or doesn't pass the 'with-check' policy for the table. |
| */ |
| if (!ExecQual(wcoExpr, econtext)) |
| { |
| char *val_desc; |
| Bitmapset *modifiedCols; |
| |
| switch (wco->kind) |
| { |
| /* |
| * For WITH CHECK OPTIONs coming from views, we might be |
| * able to provide the details on the row, depending on |
| * the permissions on the relation (that is, if the user |
| * could view it directly anyway). For RLS violations, we |
| * don't include the data since we don't know if the user |
| * should be able to view the tuple as that depends on the |
| * USING policy. |
| */ |
| case WCO_VIEW_CHECK: |
| /* See the comment in ExecConstraints(). */ |
| if (resultRelInfo->ri_RootResultRelInfo) |
| { |
| ResultRelInfo *rootrel = resultRelInfo->ri_RootResultRelInfo; |
| TupleDesc old_tupdesc = RelationGetDescr(rel); |
| AttrMap *map; |
| |
| tupdesc = RelationGetDescr(rootrel->ri_RelationDesc); |
| /* a reverse map */ |
| map = build_attrmap_by_name_if_req(old_tupdesc, |
| tupdesc); |
| |
| /* |
| * Partition-specific slot's tupdesc can't be changed, |
| * so allocate a new one. |
| */ |
| if (map != NULL) |
| slot = execute_attr_map_slot(map, slot, |
| MakeTupleTableSlot(tupdesc, &TTSOpsVirtual)); |
| |
| modifiedCols = bms_union(ExecGetInsertedCols(rootrel, estate), |
| ExecGetUpdatedCols(rootrel, estate)); |
| rel = rootrel->ri_RelationDesc; |
| } |
| else |
| modifiedCols = bms_union(ExecGetInsertedCols(resultRelInfo, estate), |
| ExecGetUpdatedCols(resultRelInfo, estate)); |
| val_desc = ExecBuildSlotValueDescription(RelationGetRelid(rel), |
| slot, |
| tupdesc, |
| modifiedCols, |
| 64); |
| |
| ereport(ERROR, |
| (errcode(ERRCODE_WITH_CHECK_OPTION_VIOLATION), |
| errmsg("new row violates check option for view \"%s\"", |
| wco->relname), |
| val_desc ? errdetail("Failing row contains %s.", |
| val_desc) : 0)); |
| break; |
| case WCO_RLS_INSERT_CHECK: |
| case WCO_RLS_UPDATE_CHECK: |
| if (wco->polname != NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("new row violates row-level security policy \"%s\" for table \"%s\"", |
| wco->polname, wco->relname))); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("new row violates row-level security policy for table \"%s\"", |
| wco->relname))); |
| break; |
| case WCO_RLS_CONFLICT_CHECK: |
| if (wco->polname != NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("new row violates row-level security policy \"%s\" (USING expression) for table \"%s\"", |
| wco->polname, wco->relname))); |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| errmsg("new row violates row-level security policy (USING expression) for table \"%s\"", |
| wco->relname))); |
| break; |
| default: |
| elog(ERROR, "unrecognized WCO kind: %u", wco->kind); |
| break; |
| } |
| } |
| } |
| } |
| |
| /* |
| * ExecBuildSlotValueDescription -- construct a string representing a tuple |
| * |
| * This is intentionally very similar to BuildIndexValueDescription, but |
| * unlike that function, we truncate long field values (to at most maxfieldlen |
| * bytes). That seems necessary here since heap field values could be very |
| * long, whereas index entries typically aren't so wide. |
| * |
| * Also, unlike the case with index entries, we need to be prepared to ignore |
| * dropped columns. We used to use the slot's tuple descriptor to decode the |
| * data, but the slot's descriptor doesn't identify dropped columns, so we |
| * now need to be passed the relation's descriptor. |
| * |
| * Note that, like BuildIndexValueDescription, if the user does not have |
| * permission to view any of the columns involved, a NULL is returned. Unlike |
| * BuildIndexValueDescription, if the user has access to view a subset of the |
| * column involved, that subset will be returned with a key identifying which |
| * columns they are. |
| */ |
| static char * |
| ExecBuildSlotValueDescription(Oid reloid, |
| TupleTableSlot *slot, |
| TupleDesc tupdesc, |
| Bitmapset *modifiedCols, |
| int maxfieldlen) |
| { |
| StringInfoData buf; |
| StringInfoData collist; |
| bool write_comma = false; |
| bool write_comma_collist = false; |
| int i; |
| AclResult aclresult; |
| bool table_perm = false; |
| bool any_perm = false; |
| |
| /* |
| * Check if RLS is enabled and should be active for the relation; if so, |
| * then don't return anything. Otherwise, go through normal permission |
| * checks. |
| */ |
| if (check_enable_rls(reloid, InvalidOid, true) == RLS_ENABLED) |
| return NULL; |
| |
| initStringInfo(&buf); |
| |
| appendStringInfoChar(&buf, '('); |
| |
| /* |
| * Check if the user has permissions to see the row. Table-level SELECT |
| * allows access to all columns. If the user does not have table-level |
| * SELECT then we check each column and include those the user has SELECT |
| * rights on. Additionally, we always include columns the user provided |
| * data for. |
| */ |
| aclresult = pg_class_aclcheck(reloid, GetUserId(), ACL_SELECT); |
| if (aclresult != ACLCHECK_OK) |
| { |
| /* Set up the buffer for the column list */ |
| initStringInfo(&collist); |
| appendStringInfoChar(&collist, '('); |
| } |
| else |
| table_perm = any_perm = true; |
| |
| /* Make sure the tuple is fully deconstructed */ |
| slot_getallattrs(slot); |
| |
| for (i = 0; i < tupdesc->natts; i++) |
| { |
| bool column_perm = false; |
| char *val; |
| int vallen; |
| Form_pg_attribute att = TupleDescAttr(tupdesc, i); |
| |
| /* ignore dropped columns */ |
| if (att->attisdropped) |
| continue; |
| |
| if (!table_perm) |
| { |
| /* |
| * No table-level SELECT, so need to make sure they either have |
| * SELECT rights on the column or that they have provided the data |
| * for the column. If not, omit this column from the error |
| * message. |
| */ |
| aclresult = pg_attribute_aclcheck(reloid, att->attnum, |
| GetUserId(), ACL_SELECT); |
| if (bms_is_member(att->attnum - FirstLowInvalidHeapAttributeNumber, |
| modifiedCols) || aclresult == ACLCHECK_OK) |
| { |
| column_perm = any_perm = true; |
| |
| if (write_comma_collist) |
| appendStringInfoString(&collist, ", "); |
| else |
| write_comma_collist = true; |
| |
| appendStringInfoString(&collist, NameStr(att->attname)); |
| } |
| } |
| |
| if (table_perm || column_perm) |
| { |
| if (slot->tts_isnull[i]) |
| val = "null"; |
| else |
| { |
| Oid foutoid; |
| bool typisvarlena; |
| |
| getTypeOutputInfo(att->atttypid, |
| &foutoid, &typisvarlena); |
| val = OidOutputFunctionCall(foutoid, slot->tts_values[i]); |
| } |
| |
| if (write_comma) |
| appendStringInfoString(&buf, ", "); |
| else |
| write_comma = true; |
| |
| /* truncate if needed */ |
| vallen = strlen(val); |
| if (vallen <= maxfieldlen) |
| appendBinaryStringInfo(&buf, val, vallen); |
| else |
| { |
| vallen = pg_mbcliplen(val, vallen, maxfieldlen); |
| appendBinaryStringInfo(&buf, val, vallen); |
| appendStringInfoString(&buf, "..."); |
| } |
| } |
| } |
| |
| /* If we end up with zero columns being returned, then return NULL. */ |
| if (!any_perm) |
| return NULL; |
| |
| appendStringInfoChar(&buf, ')'); |
| |
| if (!table_perm) |
| { |
| appendStringInfoString(&collist, ") = "); |
| appendBinaryStringInfo(&collist, buf.data, buf.len); |
| |
| return collist.data; |
| } |
| |
| return buf.data; |
| } |
| |
| |
| /* |
| * ExecUpdateLockMode -- find the appropriate UPDATE tuple lock mode for a |
| * given ResultRelInfo |
| */ |
| LockTupleMode |
| ExecUpdateLockMode(EState *estate, ResultRelInfo *relinfo) |
| { |
| Bitmapset *keyCols; |
| Bitmapset *updatedCols; |
| |
| /* |
| * Compute lock mode to use. If columns that are part of the key have not |
| * been modified, then we can use a weaker lock, allowing for better |
| * concurrency. |
| */ |
| updatedCols = ExecGetAllUpdatedCols(relinfo, estate); |
| keyCols = RelationGetIndexAttrBitmap(relinfo->ri_RelationDesc, |
| INDEX_ATTR_BITMAP_KEY); |
| |
| if (bms_overlap(keyCols, updatedCols)) |
| return LockTupleExclusive; |
| |
| return LockTupleNoKeyExclusive; |
| } |
| |
| /* |
| * ExecFindRowMark -- find the ExecRowMark struct for given rangetable index |
| * |
| * If no such struct, either return NULL or throw error depending on missing_ok |
| */ |
| ExecRowMark * |
| ExecFindRowMark(EState *estate, Index rti, bool missing_ok) |
| { |
| if (rti > 0 && rti <= estate->es_range_table_size && |
| estate->es_rowmarks != NULL) |
| { |
| ExecRowMark *erm = estate->es_rowmarks[rti - 1]; |
| |
| if (erm) |
| return erm; |
| } |
| if (!missing_ok) |
| elog(ERROR, "failed to find ExecRowMark for rangetable index %u", rti); |
| return NULL; |
| } |
| |
| /* |
| * ExecBuildAuxRowMark -- create an ExecAuxRowMark struct |
| * |
| * Inputs are the underlying ExecRowMark struct and the targetlist of the |
| * input plan node (not planstate node!). We need the latter to find out |
| * the column numbers of the resjunk columns. |
| */ |
| ExecAuxRowMark * |
| ExecBuildAuxRowMark(ExecRowMark *erm, List *targetlist) |
| { |
| ExecAuxRowMark *aerm = (ExecAuxRowMark *) palloc0(sizeof(ExecAuxRowMark)); |
| char resname[32]; |
| |
| aerm->rowmark = erm; |
| |
| /* Look up the resjunk columns associated with this rowmark */ |
| if (erm->markType != ROW_MARK_COPY) |
| { |
| /* need ctid for all methods other than COPY */ |
| snprintf(resname, sizeof(resname), "ctid%u", erm->rowmarkId); |
| aerm->ctidAttNo = ExecFindJunkAttributeInTlist(targetlist, |
| resname); |
| if (!AttributeNumberIsValid(aerm->ctidAttNo)) |
| elog(ERROR, "could not find junk %s column", resname); |
| } |
| else |
| { |
| /* need wholerow if COPY */ |
| snprintf(resname, sizeof(resname), "wholerow%u", erm->rowmarkId); |
| aerm->wholeAttNo = ExecFindJunkAttributeInTlist(targetlist, |
| resname); |
| if (!AttributeNumberIsValid(aerm->wholeAttNo)) |
| elog(ERROR, "could not find junk %s column", resname); |
| } |
| |
| /* if child rel, need tableoid */ |
| if (erm->rti != erm->prti) |
| { |
| snprintf(resname, sizeof(resname), "tableoid%u", erm->rowmarkId); |
| aerm->toidAttNo = ExecFindJunkAttributeInTlist(targetlist, |
| resname); |
| if (!AttributeNumberIsValid(aerm->toidAttNo)) |
| elog(ERROR, "could not find junk %s column", resname); |
| } |
| |
| return aerm; |
| } |
| |
| |
| /* |
| * EvalPlanQual logic --- recheck modified tuple(s) to see if we want to |
| * process the updated version under READ COMMITTED rules. |
| * |
| * See backend/executor/README for some info about how this works. |
| */ |
| |
| |
| /* |
| * Check the updated version of a tuple to see if we want to process it under |
| * READ COMMITTED rules. |
| * |
| * epqstate - state for EvalPlanQual rechecking |
| * relation - table containing tuple |
| * rti - rangetable index of table containing tuple |
| * inputslot - tuple for processing - this can be the slot from |
| * EvalPlanQualSlot(), for the increased efficiency. |
| * |
| * This tests whether the tuple in inputslot still matches the relevant |
| * quals. For that result to be useful, typically the input tuple has to be |
| * last row version (otherwise the result isn't particularly useful) and |
| * locked (otherwise the result might be out of date). That's typically |
| * achieved by using table_tuple_lock() with the |
| * TUPLE_LOCK_FLAG_FIND_LAST_VERSION flag. |
| * |
| * Returns a slot containing the new candidate update/delete tuple, or |
| * NULL if we determine we shouldn't process the row. |
| */ |
| TupleTableSlot * |
| EvalPlanQual(EPQState *epqstate, Relation relation, |
| Index rti, TupleTableSlot *inputslot) |
| { |
| TupleTableSlot *slot; |
| TupleTableSlot *testslot; |
| |
| Assert(rti > 0); |
| |
| /* |
| * Need to run a recheck subquery. Initialize or reinitialize EPQ state. |
| */ |
| EvalPlanQualBegin(epqstate); |
| |
| /* |
| * Callers will often use the EvalPlanQualSlot to store the tuple to avoid |
| * an unnecessary copy. |
| */ |
| testslot = EvalPlanQualSlot(epqstate, relation, rti); |
| if (testslot != inputslot) |
| ExecCopySlot(testslot, inputslot); |
| |
| /* |
| * Run the EPQ query. We assume it will return at most one tuple. |
| */ |
| slot = EvalPlanQualNext(epqstate); |
| |
| /* |
| * If we got a tuple, force the slot to materialize the tuple so that it |
| * is not dependent on any local state in the EPQ query (in particular, |
| * it's highly likely that the slot contains references to any pass-by-ref |
| * datums that may be present in copyTuple). As with the next step, this |
| * is to guard against early re-use of the EPQ query. |
| */ |
| if (!TupIsNull(slot)) |
| ExecMaterializeSlot(slot); |
| |
| /* |
| * Clear out the test tuple. This is needed in case the EPQ query is |
| * re-used to test a tuple for a different relation. (Not clear that can |
| * really happen, but let's be safe.) |
| */ |
| ExecClearTuple(testslot); |
| |
| return slot; |
| } |
| |
| /* |
| * EvalPlanQualInit -- initialize during creation of a plan state node |
| * that might need to invoke EPQ processing. |
| * |
| * Note: subplan/auxrowmarks can be NULL/NIL if they will be set later |
| * with EvalPlanQualSetPlan. |
| */ |
| void |
| EvalPlanQualInit(EPQState *epqstate, EState *parentestate, |
| Plan *subplan, List *auxrowmarks, int epqParam) |
| { |
| Index rtsize = parentestate->es_range_table_size; |
| |
| /* initialize data not changing over EPQState's lifetime */ |
| epqstate->parentestate = parentestate; |
| epqstate->epqParam = epqParam; |
| |
| /* |
| * Allocate space to reference a slot for each potential rti - do so now |
| * rather than in EvalPlanQualBegin(), as done for other dynamically |
| * allocated resources, so EvalPlanQualSlot() can be used to hold tuples |
| * that *may* need EPQ later, without forcing the overhead of |
| * EvalPlanQualBegin(). |
| */ |
| epqstate->tuple_table = NIL; |
| epqstate->relsubs_slot = (TupleTableSlot **) |
| palloc0(rtsize * sizeof(TupleTableSlot *)); |
| |
| /* ... and remember data that EvalPlanQualBegin will need */ |
| epqstate->plan = subplan; |
| epqstate->arowMarks = auxrowmarks; |
| |
| /* ... and mark the EPQ state inactive */ |
| epqstate->origslot = NULL; |
| epqstate->recheckestate = NULL; |
| epqstate->recheckplanstate = NULL; |
| epqstate->relsubs_rowmark = NULL; |
| epqstate->relsubs_done = NULL; |
| } |
| |
| /* |
| * EvalPlanQualSetPlan -- set or change subplan of an EPQState. |
| * |
| * We used to need this so that ModifyTable could deal with multiple subplans. |
| * It could now be refactored out of existence. |
| */ |
| void |
| EvalPlanQualSetPlan(EPQState *epqstate, Plan *subplan, List *auxrowmarks) |
| { |
| /* If we have a live EPQ query, shut it down */ |
| EvalPlanQualEnd(epqstate); |
| /* And set/change the plan pointer */ |
| epqstate->plan = subplan; |
| /* The rowmarks depend on the plan, too */ |
| epqstate->arowMarks = auxrowmarks; |
| } |
| |
| /* |
| * Return, and create if necessary, a slot for an EPQ test tuple. |
| * |
| * Note this only requires EvalPlanQualInit() to have been called, |
| * EvalPlanQualBegin() is not necessary. |
| */ |
| TupleTableSlot * |
| EvalPlanQualSlot(EPQState *epqstate, |
| Relation relation, Index rti) |
| { |
| TupleTableSlot **slot; |
| |
| Assert(relation); |
| Assert(rti > 0 && rti <= epqstate->parentestate->es_range_table_size); |
| slot = &epqstate->relsubs_slot[rti - 1]; |
| |
| if (*slot == NULL) |
| { |
| MemoryContext oldcontext; |
| |
| oldcontext = MemoryContextSwitchTo(epqstate->parentestate->es_query_cxt); |
| *slot = table_slot_create(relation, &epqstate->tuple_table); |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| return *slot; |
| } |
| |
| /* |
| * Fetch the current row value for a non-locked relation, identified by rti, |
| * that needs to be scanned by an EvalPlanQual operation. origslot must have |
| * been set to contain the current result row (top-level row) that we need to |
| * recheck. Returns true if a substitution tuple was found, false if not. |
| */ |
| bool |
| EvalPlanQualFetchRowMark(EPQState *epqstate, Index rti, TupleTableSlot *slot) |
| { |
| ExecAuxRowMark *earm = epqstate->relsubs_rowmark[rti - 1]; |
| ExecRowMark *erm = earm->rowmark; |
| Datum datum; |
| bool isNull; |
| |
| Assert(earm != NULL); |
| Assert(epqstate->origslot != NULL); |
| |
| if (RowMarkRequiresRowShareLock(erm->markType)) |
| elog(ERROR, "EvalPlanQual doesn't support locking rowmarks"); |
| |
| /* if child rel, must check whether it produced this row */ |
| if (erm->rti != erm->prti) |
| { |
| Oid tableoid; |
| |
| datum = ExecGetJunkAttribute(epqstate->origslot, |
| earm->toidAttNo, |
| &isNull); |
| /* non-locked rels could be on the inside of outer joins */ |
| if (isNull) |
| return false; |
| |
| tableoid = DatumGetObjectId(datum); |
| |
| Assert(OidIsValid(erm->relid)); |
| if (tableoid != erm->relid) |
| { |
| /* this child is inactive right now */ |
| return false; |
| } |
| } |
| |
| if (erm->markType == ROW_MARK_REFERENCE) |
| { |
| Assert(erm->relation != NULL); |
| |
| /* fetch the tuple's ctid */ |
| datum = ExecGetJunkAttribute(epqstate->origslot, |
| earm->ctidAttNo, |
| &isNull); |
| /* non-locked rels could be on the inside of outer joins */ |
| if (isNull) |
| return false; |
| |
| /* fetch requests on foreign tables must be passed to their FDW */ |
| if (erm->relation->rd_rel->relkind == RELKIND_FOREIGN_TABLE) |
| { |
| FdwRoutine *fdwroutine; |
| bool updated = false; |
| |
| fdwroutine = GetFdwRoutineForRelation(erm->relation, false); |
| /* this should have been checked already, but let's be safe */ |
| if (fdwroutine->RefetchForeignRow == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("cannot lock rows in foreign table \"%s\"", |
| RelationGetRelationName(erm->relation)))); |
| |
| fdwroutine->RefetchForeignRow(epqstate->recheckestate, |
| erm, |
| datum, |
| slot, |
| &updated); |
| if (TupIsNull(slot)) |
| elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); |
| |
| /* |
| * Ideally we'd insist on updated == false here, but that assumes |
| * that FDWs can track that exactly, which they might not be able |
| * to. So just ignore the flag. |
| */ |
| return true; |
| } |
| else |
| { |
| /* ordinary table, fetch the tuple */ |
| if (!table_tuple_fetch_row_version(erm->relation, |
| (ItemPointer) DatumGetPointer(datum), |
| SnapshotAny, slot)) |
| elog(ERROR, "failed to fetch tuple for EvalPlanQual recheck"); |
| return true; |
| } |
| } |
| else |
| { |
| Assert(erm->markType == ROW_MARK_COPY); |
| |
| /* fetch the whole-row Var for the relation */ |
| datum = ExecGetJunkAttribute(epqstate->origslot, |
| earm->wholeAttNo, |
| &isNull); |
| /* non-locked rels could be on the inside of outer joins */ |
| if (isNull) |
| return false; |
| |
| ExecStoreHeapTupleDatum(datum, slot); |
| return true; |
| } |
| } |
| |
| /* |
| * Fetch the next row (if any) from EvalPlanQual testing |
| * |
| * (In practice, there should never be more than one row...) |
| */ |
| TupleTableSlot * |
| EvalPlanQualNext(EPQState *epqstate) |
| { |
| MemoryContext oldcontext; |
| TupleTableSlot *slot; |
| |
| oldcontext = MemoryContextSwitchTo(epqstate->recheckestate->es_query_cxt); |
| slot = ExecProcNode(epqstate->recheckplanstate); |
| MemoryContextSwitchTo(oldcontext); |
| |
| return slot; |
| } |
| |
| /* |
| * Initialize or reset an EvalPlanQual state tree |
| */ |
| void |
| EvalPlanQualBegin(EPQState *epqstate) |
| { |
| EState *parentestate = epqstate->parentestate; |
| EState *recheckestate = epqstate->recheckestate; |
| |
| if (recheckestate == NULL) |
| { |
| /* First time through, so create a child EState */ |
| EvalPlanQualStart(epqstate, epqstate->plan); |
| } |
| else |
| { |
| /* |
| * We already have a suitable child EPQ tree, so just reset it. |
| */ |
| Index rtsize = parentestate->es_range_table_size; |
| PlanState *rcplanstate = epqstate->recheckplanstate; |
| |
| MemSet(epqstate->relsubs_done, 0, rtsize * sizeof(bool)); |
| |
| /* Recopy current values of parent parameters */ |
| if (parentestate->es_plannedstmt->paramExecTypes != NIL) |
| { |
| int i; |
| |
| /* |
| * Force evaluation of any InitPlan outputs that could be needed |
| * by the subplan, just in case they got reset since |
| * EvalPlanQualStart (see comments therein). |
| */ |
| ExecSetParamPlanMulti(rcplanstate->plan->extParam, |
| GetPerTupleExprContext(parentestate), NULL); |
| |
| i = list_length(parentestate->es_plannedstmt->paramExecTypes); |
| |
| while (--i >= 0) |
| { |
| /* copy value if any, but not execPlan link */ |
| recheckestate->es_param_exec_vals[i].value = |
| parentestate->es_param_exec_vals[i].value; |
| recheckestate->es_param_exec_vals[i].isnull = |
| parentestate->es_param_exec_vals[i].isnull; |
| } |
| } |
| |
| /* |
| * Mark child plan tree as needing rescan at all scan nodes. The |
| * first ExecProcNode will take care of actually doing the rescan. |
| */ |
| rcplanstate->chgParam = bms_add_member(rcplanstate->chgParam, |
| epqstate->epqParam); |
| } |
| } |
| |
| /* |
| * Start execution of an EvalPlanQual plan tree. |
| * |
| * This is a cut-down version of ExecutorStart(): we copy some state from |
| * the top-level estate rather than initializing it fresh. |
| */ |
| static void |
| EvalPlanQualStart(EPQState *epqstate, Plan *planTree) |
| { |
| EState *parentestate = epqstate->parentestate; |
| Index rtsize = parentestate->es_range_table_size; |
| EState *rcestate; |
| MemoryContext oldcontext; |
| ListCell *l; |
| |
| epqstate->recheckestate = rcestate = CreateExecutorState(); |
| |
| oldcontext = MemoryContextSwitchTo(rcestate->es_query_cxt); |
| |
| /* signal that this is an EState for executing EPQ */ |
| rcestate->es_epq_active = epqstate; |
| |
| /* |
| * Child EPQ EStates share the parent's copy of unchanging state such as |
| * the snapshot, rangetable, and external Param info. They need their own |
| * copies of local state, including a tuple table, es_param_exec_vals, |
| * result-rel info, etc. |
| */ |
| rcestate->es_direction = ForwardScanDirection; |
| rcestate->es_snapshot = parentestate->es_snapshot; |
| rcestate->es_crosscheck_snapshot = parentestate->es_crosscheck_snapshot; |
| rcestate->es_range_table = parentestate->es_range_table; |
| rcestate->es_range_table_size = parentestate->es_range_table_size; |
| rcestate->es_relations = parentestate->es_relations; |
| rcestate->es_queryEnv = parentestate->es_queryEnv; |
| rcestate->es_rowmarks = parentestate->es_rowmarks; |
| rcestate->es_plannedstmt = parentestate->es_plannedstmt; |
| rcestate->es_junkFilter = parentestate->es_junkFilter; |
| rcestate->es_output_cid = parentestate->es_output_cid; |
| |
| /* |
| * ResultRelInfos needed by subplans are initialized from scratch when the |
| * subplans themselves are initialized. |
| */ |
| rcestate->es_result_relations = NULL; |
| /* es_trig_target_relations must NOT be copied */ |
| rcestate->es_top_eflags = parentestate->es_top_eflags; |
| rcestate->es_instrument = parentestate->es_instrument; |
| /* es_auxmodifytables must NOT be copied */ |
| |
| /* |
| * The external param list is simply shared from parent. The internal |
| * param workspace has to be local state, but we copy the initial values |
| * from the parent, so as to have access to any param values that were |
| * already set from other parts of the parent's plan tree. |
| */ |
| rcestate->es_param_list_info = parentestate->es_param_list_info; |
| if (parentestate->es_plannedstmt->paramExecTypes != NIL) |
| { |
| int i; |
| |
| /* |
| * Force evaluation of any InitPlan outputs that could be needed by |
| * the subplan. (With more complexity, maybe we could postpone this |
| * till the subplan actually demands them, but it doesn't seem worth |
| * the trouble; this is a corner case already, since usually the |
| * InitPlans would have been evaluated before reaching EvalPlanQual.) |
| * |
| * This will not touch output params of InitPlans that occur somewhere |
| * within the subplan tree, only those that are attached to the |
| * ModifyTable node or above it and are referenced within the subplan. |
| * That's OK though, because the planner would only attach such |
| * InitPlans to a lower-level SubqueryScan node, and EPQ execution |
| * will not descend into a SubqueryScan. |
| * |
| * The EState's per-output-tuple econtext is sufficiently short-lived |
| * for this, since it should get reset before there is any chance of |
| * doing EvalPlanQual again. |
| */ |
| ExecSetParamPlanMulti(planTree->extParam, |
| GetPerTupleExprContext(parentestate), NULL); |
| |
| /* now make the internal param workspace ... */ |
| i = list_length(parentestate->es_plannedstmt->paramExecTypes); |
| rcestate->es_param_exec_vals = (ParamExecData *) |
| palloc0(i * sizeof(ParamExecData)); |
| /* ... and copy down all values, whether really needed or not */ |
| while (--i >= 0) |
| { |
| /* copy value if any, but not execPlan link */ |
| rcestate->es_param_exec_vals[i].value = |
| parentestate->es_param_exec_vals[i].value; |
| rcestate->es_param_exec_vals[i].isnull = |
| parentestate->es_param_exec_vals[i].isnull; |
| } |
| } |
| |
| /* |
| * Initialize private state information for each SubPlan. We must do this |
| * before running ExecInitNode on the main query tree, since |
| * ExecInitSubPlan expects to be able to find these entries. Some of the |
| * SubPlans might not be used in the part of the plan tree we intend to |
| * run, but since it's not easy to tell which, we just initialize them |
| * all. |
| */ |
| Assert(rcestate->es_subplanstates == NIL); |
| foreach(l, parentestate->es_plannedstmt->subplans) |
| { |
| Plan *subplan = (Plan *) lfirst(l); |
| PlanState *subplanstate; |
| |
| subplanstate = ExecInitNode(subplan, rcestate, 0); |
| rcestate->es_subplanstates = lappend(rcestate->es_subplanstates, |
| subplanstate); |
| } |
| |
| /* |
| * Build an RTI indexed array of rowmarks, so that |
| * EvalPlanQualFetchRowMark() can efficiently access the to be fetched |
| * rowmark. |
| */ |
| epqstate->relsubs_rowmark = (ExecAuxRowMark **) |
| palloc0(rtsize * sizeof(ExecAuxRowMark *)); |
| foreach(l, epqstate->arowMarks) |
| { |
| ExecAuxRowMark *earm = (ExecAuxRowMark *) lfirst(l); |
| |
| epqstate->relsubs_rowmark[earm->rowmark->rti - 1] = earm; |
| } |
| |
| /* |
| * Initialize per-relation EPQ tuple states to not-fetched. |
| */ |
| epqstate->relsubs_done = (bool *) |
| palloc0(rtsize * sizeof(bool)); |
| |
| /* |
| * Initialize the private state information for all the nodes in the part |
| * of the plan tree we need to run. This opens files, allocates storage |
| * and leaves us ready to start processing tuples. |
| */ |
| epqstate->recheckplanstate = ExecInitNode(planTree, rcestate, 0); |
| |
| MemoryContextSwitchTo(oldcontext); |
| } |
| |
| /* |
| * EvalPlanQualEnd -- shut down at termination of parent plan state node, |
| * or if we are done with the current EPQ child. |
| * |
| * This is a cut-down version of ExecutorEnd(); basically we want to do most |
| * of the normal cleanup, but *not* close result relations (which we are |
| * just sharing from the outer query). We do, however, have to close any |
| * result and trigger target relations that got opened, since those are not |
| * shared. (There probably shouldn't be any of the latter, but just in |
| * case...) |
| */ |
| void |
| EvalPlanQualEnd(EPQState *epqstate) |
| { |
| EState *estate = epqstate->recheckestate; |
| Index rtsize; |
| MemoryContext oldcontext; |
| ListCell *l; |
| |
| rtsize = epqstate->parentestate->es_range_table_size; |
| |
| /* |
| * We may have a tuple table, even if EPQ wasn't started, because we allow |
| * use of EvalPlanQualSlot() without calling EvalPlanQualBegin(). |
| */ |
| if (epqstate->tuple_table != NIL) |
| { |
| memset(epqstate->relsubs_slot, 0, |
| rtsize * sizeof(TupleTableSlot *)); |
| ExecResetTupleTable(epqstate->tuple_table, true); |
| epqstate->tuple_table = NIL; |
| } |
| |
| /* EPQ wasn't started, nothing further to do */ |
| if (estate == NULL) |
| return; |
| |
| oldcontext = MemoryContextSwitchTo(estate->es_query_cxt); |
| |
| ExecEndNode(epqstate->recheckplanstate); |
| |
| foreach(l, estate->es_subplanstates) |
| { |
| PlanState *subplanstate = (PlanState *) lfirst(l); |
| |
| ExecEndNode(subplanstate); |
| } |
| |
| /* throw away the per-estate tuple table, some node may have used it */ |
| ExecResetTupleTable(estate->es_tupleTable, false); |
| |
| /* Close any result and trigger target relations attached to this EState */ |
| ExecCloseResultRelations(estate); |
| |
| MemoryContextSwitchTo(oldcontext); |
| |
| FreeExecutorState(estate); |
| |
| /* Mark EPQState idle */ |
| epqstate->origslot = NULL; |
| epqstate->recheckestate = NULL; |
| epqstate->recheckplanstate = NULL; |
| epqstate->relsubs_rowmark = NULL; |
| epqstate->relsubs_done = NULL; |
| } |
| |
| /* Use the given attribute map to convert an attribute number in the |
| * base relation to an attribute number in the other relation. Forgive |
| * out-of-range attributes by mapping them to zero. Treat null as |
| * the identity map. |
| */ |
| AttrNumber |
| attrMap(TupleConversionMap *map, AttrNumber anum) |
| { |
| if (map == NULL) |
| return anum; |
| |
| if (0 < anum && anum <= map->indesc->natts) |
| { |
| for (int i = 0; i < map->outdesc->natts; i++) |
| { |
| if (map->attrMap->attnums[i] == anum) |
| return i + 1; |
| } |
| } |
| return 0; |
| } |
| |
| /* For attrMapExpr below. |
| * |
| * Mutate Var nodes in an expression using the given attribute map. |
| * Insist the Var nodes have varno == 1 and the that the mapping |
| * yields a live attribute number (non-zero). |
| */ |
| static Node *apply_attrmap_mutator(Node *node, TupleConversionMap *map) |
| { |
| if ( node == NULL ) |
| return NULL; |
| |
| if (IsA(node, Var) ) |
| { |
| Var *var = (Var *) node; |
| AttrNumber anum; |
| |
| Assert(var->varno == 1); /* in CHECK constraints */ |
| anum = attrMap(map, var->varattno); |
| |
| if (anum == 0) |
| { |
| /* Should never happen, but best caught early. */ |
| elog(ERROR, "attribute map discrepancy"); |
| } |
| else if (anum != var->varattno) |
| { |
| var = copyObject(var); |
| var->varattno = anum; |
| } |
| return (Node *) var; |
| } |
| return expression_tree_mutator(node, apply_attrmap_mutator, (void *) map); |
| } |
| |
| /* Apply attrMap over the Var nodes in an expression. */ |
| Node * |
| attrMapExpr(TupleConversionMap *map, Node *expr) |
| { |
| return apply_attrmap_mutator(expr, map); |
| } |
| |
| /* |
| * Adjust INSERT/UPDATE/DELETE count for replicated table ON QD |
| */ |
| static void |
| AdjustReplicatedTableCounts(EState *estate) |
| { |
| ResultRelInfo *resultRelInfo; |
| ListCell *l; |
| bool containReplicatedTable = false; |
| int numsegments = 1; |
| |
| if (Gp_role != GP_ROLE_DISPATCH) |
| return; |
| |
| /* check if result_relations contain replicated table*/ |
| foreach(l, estate->es_opened_result_relations) |
| { |
| resultRelInfo = lfirst(l); |
| if (!resultRelInfo->ri_RelationDesc->rd_cdbpolicy) |
| continue; |
| |
| if (GpPolicyIsReplicated(resultRelInfo->ri_RelationDesc->rd_cdbpolicy)) |
| { |
| containReplicatedTable = true; |
| numsegments = resultRelInfo->ri_RelationDesc->rd_cdbpolicy->numsegments; |
| } |
| else if (containReplicatedTable) |
| { |
| /* |
| * If one is replicated table, error if other tables are not |
| * replicated table. |
| */ |
| elog(ERROR, "mix of replicated and non-replicated tables in result_relation is not supported"); |
| } |
| } |
| |
| if (containReplicatedTable) |
| estate->es_processed = estate->es_processed / numsegments; |
| } |
| |
| /* |
| * Greenplum specific code: |
| * For details, see comments at the definition of static var executor_run_nesting_level |
| */ |
| bool |
| already_under_executor_run(void) |
| { |
| return executor_run_nesting_level > 0; |
| } |
| |
| /* |
| * Maintain the status of Materialized Views in response to write operations on the underlying relations. |
| * |
| * For partitioned tables, changes are tracked using the relations of their leaf partitions rather than |
| * the parent tables themselves. This minimizes the impact on Materialized Views that depend on the |
| * partition tree, ensuring only relevant partitions are affected. |
| * |
| * In the case of cross-partition updates, an UPDATE operation on a parent table is decomposed into |
| * an INSERT on one leaf partition and a DELETE on another. As a result, the status transition follows |
| * an UP direction for both INSERT and DELETE operations, rather than an UP_AND_DOWN direction on the |
| * parent table. This approach optimizes performance and reduces unnecessary status changes avoding |
| * invalidations of unrelated materialized views. |
| * |
| * For non-partitioned tables, the status transition is handled based on the semantic relations. |
| */ |
| static void |
| MaintainMaterializedViewStatus(QueryDesc *queryDesc, CmdType operation) |
| { |
| List *inserted = NULL; |
| List *updated = NULL; |
| List *deleted = NULL; |
| List *unique_result_relations = NIL; |
| List *rtable = queryDesc->plannedstmt->rtable; |
| int length = list_length(rtable); |
| ListCell *lc; |
| Oid relid; |
| |
| /* |
| * Process epd first to get the affected relations. |
| */ |
| ConsumeAndProcessExtendProtocolData_IUD(&inserted, &updated, &deleted); |
| |
| foreach (lc, inserted) |
| { |
| relid = lfirst_oid(lc); |
| /* Only need to transfer to UP direction. */ |
| SetRelativeMatviewAuxStatus(relid, MV_DATA_STATUS_EXPIRED_INSERT_ONLY, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_UP); |
| } |
| |
| foreach (lc, updated) |
| { |
| relid = lfirst_oid(lc); |
| SetRelativeMatviewAuxStatus(relid, MV_DATA_STATUS_EXPIRED, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_UP); |
| |
| } |
| |
| foreach (lc, deleted) |
| { |
| relid = lfirst_oid(lc); |
| SetRelativeMatviewAuxStatus(relid, MV_DATA_STATUS_EXPIRED, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_UP); |
| |
| } |
| |
| unique_result_relations = list_concat_unique_int(NIL, queryDesc->plannedstmt->resultRelations); |
| foreach(lc, unique_result_relations) |
| { |
| int varno = lfirst_int(lc); |
| RangeTblEntry *rte = rt_fetch(varno, rtable); |
| |
| /* Avoid crash in case we don't find a rte. */ |
| if (varno > length + 1) |
| { |
| ereport(WARNING, (errmsg("could not find rte of varno: %u ", varno))); |
| continue; |
| } |
| |
| if (RELKIND_PARTITIONED_TABLE == rte->relkind) |
| { |
| /* |
| * There should be leaf paritions if we modifed a partitioned table |
| * Do a second check and fall back to partitioned table |
| * in case that if we failed to find a one. |
| */ |
| if ((list_length(inserted) == 0) && |
| (list_length(updated) == 0) && |
| (list_length(deleted) == 0)) |
| { |
| ereport(WARNING, |
| (errmsg("fail to find leafs of partitioned table: %s", get_rel_name(rte->relid)))); |
| } |
| /* Should already be processed, just bypass. */ |
| continue; |
| } |
| else |
| { |
| /* Do a normal update. */ |
| switch (operation) |
| { |
| case CMD_INSERT: |
| SetRelativeMatviewAuxStatus(rte->relid, |
| MV_DATA_STATUS_EXPIRED_INSERT_ONLY, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_ALL); |
| break; |
| case CMD_UPDATE: |
| case CMD_DELETE: |
| SetRelativeMatviewAuxStatus(rte->relid, |
| MV_DATA_STATUS_EXPIRED, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_ALL); |
| break; |
| default: |
| /* If there were writable CTE, just mark it as expired. */ |
| if (queryDesc->plannedstmt->hasModifyingCTE) |
| SetRelativeMatviewAuxStatus(rte->relid, MV_DATA_STATUS_EXPIRED, |
| MV_DATA_STATUS_TRANSFER_DIRECTION_ALL); |
| break; |
| } |
| } |
| } |
| } |