| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*------------------------------------------------------------------------- |
| * |
| * cdbexplain.c |
| * Functions supporting the Greenplum EXPLAIN ANALYZE command |
| * |
| *------------------------------------------------------------------------- |
| */ |
| |
| #include <unistd.h> |
| |
| #include "postgres.h" |
| #include "portability/instr_time.h" |
| |
| #include "cdb/cdbconn.h" /* SegmentDatabaseDescriptor */ |
| #include "cdb/cdbdispatchresult.h" /* CdbDispatchResults */ |
| #include "cdb/cdbexplain.h" /* me */ |
| #include "cdb/cdbpartition.h" |
| #include "cdb/cdbvars.h" /* Gp_segment */ |
| #include "executor/executor.h" /* ExecStateTreeWalker */ |
| #include "executor/instrument.h" /* Instrumentation */ |
| #include "lib/stringinfo.h" /* StringInfo */ |
| #include "gp-libpq-fe.h" /* PGresult; prereq for libpq-int.h */ |
| #include "gp-libpq-int.h" /* pg_result */ |
| #include "libpq/pqformat.h" /* pq_beginmessage() etc. */ |
| #include "utils/memutils.h" /* MemoryContextGetPeakSpace() */ |
| #include "cdb/memquota.h" |
| #include "cdb/cdbgang.h" |
| #include "inttypes.h" |
| #include "parser/parsetree.h" |
| #include "utils/vmem_tracker.h" |
| |
| /* EXPLAIN ANALYZE statistics for one plan node of a slice */ |
| typedef struct CdbExplain_StatInst |
| { |
| NodeTag pstype; /* PlanState node type */ |
| bool running; /* True if we've completed first tuple */ |
| instr_time starttime; /* Start time of current iteration of node */ |
| instr_time counter; /* Accumulated runtime for this node */ |
| double firsttuple; /* Time for first tuple of this cycle */ |
| double startup; /* Total startup time (in seconds) */ |
| double total; /* Total total time (in seconds) */ |
| double ntuples; /* Total tuples produced */ |
| double nloops; /* # of run cycles for this node */ |
| double execmemused; /* executor memory used (bytes) */ |
| double workmemused; /* work_mem actually used (bytes) */ |
| double workmemwanted; /* work_mem to avoid workfile i/o (bytes) */ |
| bool workfileReused; /* workfile reused in this node */ |
| bool workfileCreated;/* workfile created in this node */ |
| instr_time firststart; /* Start time of first iteration of node */ |
| double peakMemBalance; /* Max mem account balance */ |
| int numPartScanned; /* Number of part tables scanned */ |
| int bnotes; /* Offset to beginning of node's extra text */ |
| int enotes; /* Offset to end of node's extra text */ |
| } CdbExplain_StatInst; |
| |
| |
| /* EXPLAIN ANALYZE statistics for one process working on one slice */ |
| typedef struct CdbExplain_SliceWorker |
| { |
| double peakmemused; /* bytes alloc in per-query mem context tree */ |
| double vmem_reserved; /* vmem reserved by a QE */ |
| double memory_accounting_global_peak; /* peak memory observed during memory accounting */ |
| } CdbExplain_SliceWorker; |
| |
| |
| /* Header of EXPLAIN ANALYZE statistics message sent from qExec to qDisp */ |
| typedef struct CdbExplain_StatHdr |
| { |
| NodeTag type; /* T_CdbExplain_StatHdr */ |
| int segindex; /* segment id */ |
| char hostname[SEGMENT_IDENTITY_NAME_LENGTH]; /* segment hostname */ |
| int nInst; /* num of StatInst entries following StatHdr */ |
| int bnotes; /* offset to extra text area */ |
| int enotes; /* offset to end of extra text area */ |
| |
| int memAccountTreeNodeCount; /* How many mem account we serialized */ |
| int memAccountTreeStartOffset; /* Where in the header our mem account tree is serialized */ |
| |
| CdbExplain_SliceWorker worker; /* qExec's overall stats for slice */ |
| |
| /* |
| * During serialization, we use this as a temporary StatInst and save "one-at-a-time" |
| * StatInst into this variable. We then write this variable into buffer (serialize it) |
| * and then "recycle" the same inst for next plan node's StatInst. |
| * During deserialization, an Array [0..nInst-1] of StatInst entries is appended starting here. |
| */ |
| CdbExplain_StatInst inst[1]; |
| |
| /* extra text is appended after that */ |
| } CdbExplain_StatHdr; |
| |
| |
| /* Dispatch status summarized over workers in a slice */ |
| typedef struct CdbExplain_DispatchSummary |
| { |
| int nResult; |
| int nOk; |
| int nError; |
| int nCanceled; |
| int nNotDispatched; |
| int nIgnorableError; |
| } CdbExplain_DispatchSummary; |
| |
| |
| /* One node's EXPLAIN ANALYZE statistics for all the workers of its segworker group */ |
| typedef struct CdbExplain_NodeSummary |
| { |
| /* Summary over all the node's workers */ |
| CdbExplain_Agg ntuples; |
| CdbExplain_Agg execmemused; |
| CdbExplain_Agg workmemused; |
| CdbExplain_Agg workmemwanted; |
| CdbExplain_Agg totalWorkfileReused; |
| CdbExplain_Agg totalWorkfileCreated; |
| CdbExplain_Agg peakMemBalance; |
| /* Used for DynamicTableScan, DynamicIndexScan and DynamicBitmapTableScan */ |
| CdbExplain_Agg totalPartTableScanned; |
| |
| /* insts array info */ |
| int segindex0; /* segment id of insts[0] */ |
| int ninst; /* num of StatInst entries in inst array */ |
| |
| /* Array [0..ninst-1] of StatInst entries is appended starting here */ |
| CdbExplain_StatInst insts[1]; /* variable size - must be last */ |
| } CdbExplain_NodeSummary; |
| |
| |
| /* One slice's statistics for all the workers of its segworker group */ |
| typedef struct CdbExplain_SliceSummary |
| { |
| Slice *slice; |
| |
| /* worker array */ |
| int nworker; /* num of SliceWorker slots in worker array */ |
| int segindex0; /* segment id of workers[0] */ |
| CdbExplain_SliceWorker *workers; /* -> array [0..nworker-1] of SliceWorker */ |
| |
| SerializedMemoryAccount **memoryTreeRoots; /* Array of pointers to pseudo roots [0...nworker-1] */ |
| |
| CdbExplain_Agg peakmemused; /* Summary of SliceWorker stats over all of the slice's workers */ |
| |
| CdbExplain_Agg vmem_reserved; /* vmem reserved by QEs */ |
| |
| CdbExplain_Agg memory_accounting_global_peak; /* Peak memory accounting balance by QEs */ |
| |
| /* Rollup of per-node stats over all of the slice's workers and nodes */ |
| double workmemused_max; |
| double workmemwanted_max; |
| |
| /* How many workers were dispatched and returned results? (0 if local) */ |
| CdbExplain_DispatchSummary dispatchSummary; |
| } CdbExplain_SliceSummary; |
| |
| |
| /* State for cdbexplain_showExecStats() */ |
| typedef struct CdbExplain_ShowStatCtx |
| { |
| MemoryContext explaincxt; /* alloc all our buffers from this */ |
| StringInfoData extratextbuf; |
| instr_time querystarttime; |
| |
| /* Rollup of per-node stats over the entire query plan */ |
| double workmemused_max; |
| double workmemwanted_max; |
| |
| /* Per-slice statistics are deposited in this SliceSummary array */ |
| int nslice; /* num of slots in slices array */ |
| CdbExplain_SliceSummary *slices; /* -> array[0..nslice-1] of SliceSummary */ |
| } CdbExplain_ShowStatCtx; |
| |
| |
| /* State for cdbexplain_sendStatWalker() and cdbexplain_collectStatsFromNode() */ |
| typedef struct CdbExplain_SendStatCtx |
| { |
| StringInfoData *notebuf; |
| StringInfoData buf; |
| CdbExplain_StatHdr hdr; |
| } CdbExplain_SendStatCtx; |
| |
| |
| /* State for cdbexplain_recvStatWalker() and cdbexplain_depositStatsToNode() */ |
| typedef struct CdbExplain_RecvStatCtx |
| { |
| /* |
| * iStatInst is the current StatInst serial during the depositing process for a slice. |
| * We walk the plan tree, and for each node we deposit stat from all the QEs |
| * of the segworker group for current slice. After we finish one node, we increase |
| * iStatInst, which means we are done with one plan node's stat across |
| * all segments and now moving forward to the next one. Once we are done |
| * processing all the plan node of a PARTICULAR slice, then we switch to the |
| * next slice, read the messages from all the QEs of the next slice (another |
| * segworker group) store them in the msgptrs, reset the iStatInst and then start |
| * parsing these messages and depositing them in the nodes of the new slice. |
| */ |
| int iStatInst; |
| /* |
| * nStatInst is the total number of StatInst for current slice. Typically this is |
| * the number of plan nodes in the current slice. |
| */ |
| int nStatInst; |
| /* segIndexMin is the min of segment index from which we collected message (i.e., saved msgptrs) */ |
| int segindexMin; |
| /* segIndexMax is the max of segment index from which we collected message (i.e., saved msgptrs) */ |
| int segindexMax; |
| /* We deposit stat for one slice at a time. sliceIndex saves the current slice */ |
| int sliceIndex; |
| int segmentNum; |
| /* |
| * The number of msgptrs that we have saved for current slice. This is |
| * typically the number of QE processes |
| */ |
| int nmsgptr; |
| /* The actual messages. Contains an array of StatInst too */ |
| CdbExplain_StatHdr **msgptrs; |
| CdbDispatchResults *dispatchResults; |
| StringInfoData *extratextbuf; |
| CdbExplain_ShowStatCtx *showstatctx; |
| |
| /* Rollup of per-node stats over all of the slice's workers and nodes */ |
| double workmemused_max; |
| double workmemwanted_max; |
| } CdbExplain_RecvStatCtx; |
| |
| |
| /* State for cdbexplain_localStatWalker() */ |
| typedef struct CdbExplain_LocalStatCtx |
| { |
| CdbExplain_SendStatCtx send; |
| CdbExplain_RecvStatCtx recv; |
| CdbExplain_StatHdr *msgptrs[1]; |
| } CdbExplain_LocalStatCtx; |
| |
| |
| static CdbVisitOpt |
| cdbexplain_localStatWalker(PlanState *planstate, void *context); |
| static CdbVisitOpt |
| cdbexplain_sendStatWalker(PlanState *planstate, void *context); |
| static CdbVisitOpt |
| cdbexplain_recvStatWalker(PlanState *planstate, void *context); |
| |
| static void |
| cdbexplain_collectSliceStats(PlanState *planstate, |
| CdbExplain_SliceWorker *out_worker); |
| static void |
| cdbexplain_depositSliceStats(CdbExplain_StatHdr *hdr, |
| CdbExplain_RecvStatCtx *recvstatctx); |
| static void |
| cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ctx); |
| static void |
| cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx); |
| static int |
| cdbexplain_collectExtraText(PlanState *planstate, StringInfo notebuf); |
| static int |
| cdbexplain_countLeafPartTables(PlanState *planstate); |
| |
| |
| /* |
| * cdbexplain_localExecStats |
| * Called by qDisp to build NodeSummary and SliceSummary blocks |
| * containing EXPLAIN ANALYZE statistics for a root slice that |
| * has been executed locally in the qDisp process. Attaches these |
| * structures to the PlanState nodes' Instrumentation objects for |
| * later use by cdbexplain_showExecStats(). |
| * |
| * 'planstate' is the top PlanState node of the slice. |
| * 'showstatctx' is a CdbExplain_ShowStatCtx object which was created by |
| * calling cdbexplain_showExecStatsBegin(). |
| */ |
| void |
| cdbexplain_localExecStats(struct PlanState *planstate, |
| struct CdbExplain_ShowStatCtx *showstatctx) |
| { |
| CdbExplain_LocalStatCtx ctx; |
| MemoryContext oldcxt; |
| |
| Assert(Gp_role != GP_ROLE_EXECUTE); |
| |
| Insist(planstate && planstate->instrument && showstatctx); |
| |
| /* Switch to the EXPLAIN memory context. */ |
| oldcxt = MemoryContextSwitchTo(showstatctx->explaincxt); |
| |
| memset(&ctx, 0, sizeof(ctx)); |
| |
| /* Set up send context area. */ |
| ctx.send.notebuf = &showstatctx->extratextbuf; |
| |
| /* Set up a temporary StatHdr for both collecting and depositing stats. */ |
| gethostname(ctx.send.hdr.hostname,SEGMENT_IDENTITY_NAME_LENGTH-1); |
| ctx.msgptrs[0] = &ctx.send.hdr; |
| ctx.send.hdr.segindex = GetQEIndex(); |
| ctx.send.hdr.nInst = 1; |
| |
| /* Set up receive context area referencing our temp StatHdr. */ |
| ctx.recv.nStatInst = ctx.send.hdr.nInst; |
| ctx.recv.segindexMin = ctx.recv.segindexMax = ctx.send.hdr.segindex; |
| |
| ctx.recv.sliceIndex = LocallyExecutingSliceIndex(planstate->state); |
| ctx.recv.msgptrs = ctx.msgptrs; |
| ctx.recv.nmsgptr = 1; |
| ctx.recv.dispatchResults = NULL; |
| ctx.recv.extratextbuf = NULL; |
| ctx.recv.showstatctx = showstatctx; |
| |
| /* |
| * Collect and redeposit statistics from each PlanState node in this slice. |
| * Any extra message text will be appended directly to extratextbuf. |
| */ |
| planstate_walk_node(planstate, cdbexplain_localStatWalker, &ctx); |
| |
| /* Obtain per-slice stats and put them in SliceSummary. */ |
| cdbexplain_collectSliceStats(planstate, &ctx.send.hdr.worker); |
| cdbexplain_depositSliceStats(&ctx.send.hdr, &ctx.recv); |
| |
| /* Restore caller's memory context. */ |
| MemoryContextSwitchTo(oldcxt); |
| } /* cdbexplain_localExecStats */ |
| |
| |
| /* |
| * cdbexplain_localStatWalker |
| */ |
| CdbVisitOpt |
| cdbexplain_localStatWalker(PlanState *planstate, void *context) |
| { |
| CdbExplain_LocalStatCtx *ctx = (CdbExplain_LocalStatCtx *)context; |
| |
| /* Collect stats into our temporary StatInst and caller's extratextbuf. */ |
| cdbexplain_collectStatsFromNode(planstate, &ctx->send); |
| |
| /* Redeposit stats back into Instrumentation, and attach a NodeSummary. */ |
| cdbexplain_depositStatsToNode(planstate, &ctx->recv); |
| |
| /* Don't descend across a slice boundary. */ |
| if (IsA(planstate, MotionState)) |
| return CdbVisit_Skip; |
| |
| return CdbVisit_Walk; |
| } /* cdbexplain_localStatWalker */ |
| |
| |
| /* |
| * cdbexplain_sendExecStats |
| * Called by qExec process to send EXPLAIN ANALYZE statistics to qDisp. |
| * On the qDisp, libpq will recognize our special message type ('Y') and |
| * attach the message to the current command's PGresult object. |
| */ |
| void |
| cdbexplain_sendExecStats(QueryDesc *queryDesc) |
| { |
| EState *estate; |
| PlanState *planstate; |
| CdbExplain_SendStatCtx ctx; |
| StringInfoData notebuf; |
| StringInfoData memoryAccountTreeBuffer; |
| |
| /* Header offset (where header begins in the message buffer) */ |
| int hoff; |
| |
| Assert(Gp_role == GP_ROLE_EXECUTE); |
| |
| if (!queryDesc || |
| !queryDesc->estate) |
| return; |
| |
| /* If executing a root slice (UPD/DEL/INS), start at top of plan tree. */ |
| estate = queryDesc->estate; |
| if (LocallyExecutingSliceIndex(estate) == RootSliceIndex(estate)) |
| planstate = queryDesc->planstate; |
| |
| /* Non-root slice: Start at child of our sending Motion node. */ |
| else |
| { |
| planstate = &(getMotionState(queryDesc->planstate, LocallyExecutingSliceIndex(estate))->ps); |
| Assert(planstate && |
| IsA(planstate, MotionState) && |
| planstate->lefttree); |
| planstate = planstate->lefttree; |
| } |
| |
| if (planstate == NULL) |
| return; |
| |
| /* Start building the message header in our context area. */ |
| memset(&ctx, 0, sizeof(ctx)); |
| ctx.hdr.type = T_CdbExplain_StatHdr; |
| ctx.hdr.segindex = GetQEIndex(); |
| //ctx.hdr.hostname = |
| ctx.hdr.nInst = 0; |
| gethostname(ctx.hdr.hostname,SEGMENT_IDENTITY_NAME_LENGTH); |
| // strncpy(ctx.hdr.hostname, estate->ctx->name,SEGMENT_IDENTITY_NAME_LENGTH-1); |
| |
| /* Allocate a separate buffer where nodes can append extra message text. */ |
| initStringInfo(¬ebuf); |
| ctx.notebuf = ¬ebuf; |
| |
| /* Reserve buffer space for the message header (excluding 'inst' array). */ |
| pq_beginmessage(&ctx.buf, 'Y'); |
| |
| /* Where the actual StatHdr begins*/ |
| hoff = ctx.buf.len; |
| |
| /* Write everything until inst member including "CdbExplain_SliceWorker worker" */ |
| appendBinaryStringInfo(&ctx.buf, (char *)&ctx.hdr, sizeof(ctx.hdr) - sizeof(ctx.hdr.inst)); |
| |
| /* Append statistics from each PlanState node in this slice. */ |
| planstate_walk_node(planstate, cdbexplain_sendStatWalker, &ctx); |
| |
| /* Obtain per-slice stats and put them in StatHdr. */ |
| cdbexplain_collectSliceStats(planstate, &ctx.hdr.worker); |
| |
| /* Append MemoryAccount Tree */ |
| ctx.hdr.memAccountTreeStartOffset = ctx.buf.len - hoff; |
| initStringInfo(&memoryAccountTreeBuffer); |
| uint totalSerialized = MemoryAccounting_Serialize(&memoryAccountTreeBuffer); |
| |
| ctx.hdr.memAccountTreeNodeCount = totalSerialized; |
| appendBinaryStringInfo(&ctx.buf, memoryAccountTreeBuffer.data, memoryAccountTreeBuffer.len); |
| pfree(memoryAccountTreeBuffer.data); |
| |
| /* Append the extra message text. */ |
| ctx.hdr.bnotes = ctx.buf.len - hoff; |
| appendBinaryStringInfo(&ctx.buf, notebuf.data, notebuf.len); |
| ctx.hdr.enotes = ctx.buf.len - hoff; |
| pfree(notebuf.data); |
| |
| /* |
| * Move the message header into the buffer. Rewrite the updated header (with bnotes, enotes, nInst etc.) |
| * Note: this is the second time we are writing the header. The first write merely reserves space for |
| * the header |
| */ |
| memcpy(ctx.buf.data+hoff, (char *)&ctx.hdr, sizeof(ctx.hdr) - sizeof(ctx.hdr.inst)); |
| |
| /* Send message to qDisp process. */ |
| pq_endmessage(&ctx.buf); |
| } /* cdbexplain_sendExecStats */ |
| |
| |
| /* |
| * cdbexplain_sendStatWalker |
| */ |
| CdbVisitOpt |
| cdbexplain_sendStatWalker(PlanState *planstate, void *context) |
| { |
| CdbExplain_SendStatCtx *ctx = (CdbExplain_SendStatCtx *)context; |
| CdbExplain_StatInst *si = &ctx->hdr.inst[0]; |
| |
| /* Stuff stats into our temporary StatInst. Add extra text to notebuf. */ |
| cdbexplain_collectStatsFromNode(planstate, ctx); |
| |
| /* Append StatInst instance to message. */ |
| appendBinaryStringInfo(&ctx->buf, (char *)si, sizeof(*si)); |
| ctx->hdr.nInst++; |
| |
| /* Don't descend across a slice boundary. */ |
| if (IsA(planstate, MotionState)) |
| return CdbVisit_Skip; |
| |
| return CdbVisit_Walk; |
| } /* cdbexplain_sendStatWalker */ |
| |
| |
| /* |
| * cdbexplain_recvExecStats |
| * Called by qDisp to transfer a slice's EXPLAIN ANALYZE statistics |
| * from the CdbDispatchResults structures to the PlanState tree. |
| * Recursively does the same for slices that are descendants of the |
| * one specified. |
| * |
| * 'showstatctx' is a CdbExplain_ShowStatCtx object which was created by |
| * calling cdbexplain_showExecStatsBegin(). |
| */ |
| void |
| cdbexplain_recvExecStats(struct PlanState *planstate, |
| struct CdbDispatchResults *dispatchResults, |
| int sliceIndex, |
| struct CdbExplain_ShowStatCtx *showstatctx, |
| int segmentNum) |
| { |
| CdbDispatchResult *dispatchResultBeg; |
| CdbDispatchResult *dispatchResultEnd; |
| CdbExplain_RecvStatCtx ctx; |
| CdbExplain_DispatchSummary ds; |
| MemoryContext oldcxt; |
| int iDispatch; |
| int nDispatch; |
| int imsgptr; |
| bool isFirstValidateStat = true; |
| |
| if (!planstate || |
| !planstate->instrument || |
| !showstatctx) |
| return; |
| |
| /* |
| * Note that the caller may free the CdbDispatchResults upon return, maybe |
| * before EXPLAIN ANALYZE examines the PlanState tree. Consequently we |
| * must not return ptrs into the dispatch result buffers, but must copy any |
| * needed information into a sufficiently long-lived memory context. |
| */ |
| oldcxt = MemoryContextSwitchTo(showstatctx->explaincxt); |
| |
| /* Initialize treewalk context. */ |
| memset(&ctx, 0, sizeof(ctx)); |
| ctx.dispatchResults = dispatchResults; |
| ctx.extratextbuf = &showstatctx->extratextbuf; |
| ctx.showstatctx = showstatctx; |
| ctx.sliceIndex = sliceIndex; |
| ctx.segmentNum = segmentNum; |
| |
| /* Find the slice's CdbDispatchResult objects. */ |
| dispatchResultBeg = cdbdisp_resultBegin(dispatchResults, sliceIndex); |
| dispatchResultEnd = cdbdisp_resultEnd(dispatchResults, sliceIndex); |
| nDispatch = dispatchResultEnd - dispatchResultBeg; |
| |
| /* Initialize worker counts. */ |
| memset(&ds, 0, sizeof(ds)); |
| ds.nResult = nDispatch; |
| |
| /* Find and validate the statistics returned from each qExec. */ |
| if (nDispatch > 0) |
| ctx.msgptrs = (CdbExplain_StatHdr **)palloc0(nDispatch * sizeof(ctx.msgptrs[0])); |
| for (iDispatch = 0; iDispatch < nDispatch; iDispatch++) |
| { |
| CdbDispatchResult *dispatchResult = &dispatchResultBeg[iDispatch]; |
| PGresult *pgresult; |
| CdbExplain_StatHdr *hdr; |
| pgCdbStatCell *statcell; |
| |
| /* Update worker counts. */ |
| if (!dispatchResult->hasDispatched) |
| ds.nNotDispatched++; |
| else if (dispatchResult->wasCanceled) |
| ds.nCanceled++; |
| else if (dispatchResult->errcode) |
| ds.nError++; |
| else if (dispatchResult->okindex >= 0) |
| ds.nOk++; /* qExec returned successful completion */ |
| else |
| ds.nIgnorableError++; /* qExec returned an error that's likely a |
| * side-effect of another qExec's failure, |
| * e.g. an interconnect error |
| */ |
| |
| /* Find this qExec's last PGresult. If none, skip to next qExec. */ |
| pgresult = cdbdisp_getPGresult(dispatchResult, -1); |
| if (!pgresult) |
| continue; |
| |
| /* Find our statistics in list of response messages. If none, skip. */ |
| for (statcell = pgresult->cdbstats; statcell; statcell = statcell->next) |
| { |
| if (IsA((Node *)statcell->data, CdbExplain_StatHdr)) |
| break; |
| } |
| if (!statcell) |
| continue; |
| |
| /* Validate the message header. */ |
| hdr = (CdbExplain_StatHdr *)statcell->data; |
| if ((size_t)statcell->len < sizeof(*hdr) || |
| (size_t)statcell->len != (sizeof(*hdr) - sizeof(hdr->inst) + |
| hdr->nInst * sizeof(hdr->inst) + |
| hdr->memAccountTreeNodeCount * sizeof(SerializedMemoryAccount) + |
| hdr->enotes - hdr->bnotes) || |
| statcell->len != hdr->enotes || |
| hdr->segindex < -1 || |
| hdr->segindex >= segmentNum) |
| { |
| ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), |
| errmsg_internal("Invalid execution statistics " |
| "response returned from seg%d. " |
| "length=%d", |
| hdr->segindex, |
| statcell->len), |
| errhint("Please verify that all instances are using " |
| "the correct %s software version.", |
| PACKAGE_NAME) |
| )); |
| } |
| |
| /* Slice should have same number of plan nodes on every qExec. */ |
| if (isFirstValidateStat) |
| ctx.nStatInst = hdr->nInst; |
| else |
| { |
| /* MPP-2140: what causes this ? */ |
| if (ctx.nStatInst != hdr->nInst) |
| ereport(ERROR, (errcode(ERRCODE_GP_INTERCONNECTION_ERROR), |
| errmsg("Invalid execution statistics " |
| "received stats node-count mismatch: cdbexplain_recvExecStats() ctx.nStatInst %d hdr->nInst %d", ctx.nStatInst, hdr->nInst), |
| errhint("Please verify that all instances are using " |
| "the correct %s software version.", |
| PACKAGE_NAME))); |
| |
| Insist(ctx.nStatInst == hdr->nInst); |
| } |
| |
| /* Save lowest and highest segment id for which we have stats. */ |
| if (isFirstValidateStat) |
| ctx.segindexMin = ctx.segindexMax = hdr->segindex; |
| else if (ctx.segindexMax < hdr->segindex) |
| ctx.segindexMax = hdr->segindex; |
| else if (ctx.segindexMin > hdr->segindex) |
| ctx.segindexMin = hdr->segindex; |
| |
| if (isFirstValidateStat) isFirstValidateStat = false; |
| |
| /* Save message ptr for easy reference. */ |
| ctx.msgptrs[ctx.nmsgptr] = hdr; |
| ctx.nmsgptr++; |
| } |
| |
| /* Attach NodeSummary to each PlanState node's Instrumentation node. */ |
| planstate_walk_node(planstate, cdbexplain_recvStatWalker, &ctx); |
| |
| /* Make sure we visited the right number of PlanState nodes. */ |
| Insist(ctx.iStatInst == ctx.nStatInst); |
| |
| /* Transfer per-slice stats from message headers to the SliceSummary. */ |
| for (imsgptr = 0; imsgptr < ctx.nmsgptr; imsgptr++) |
| cdbexplain_depositSliceStats(ctx.msgptrs[imsgptr], &ctx); |
| |
| /* Transfer worker counts to SliceSummary. */ |
| showstatctx->slices[sliceIndex].dispatchSummary = ds; |
| |
| /* Restore caller's memory context. */ |
| MemoryContextSwitchTo(oldcxt); |
| |
| /* Clean up. */ |
| if (ctx.msgptrs) |
| pfree(ctx.msgptrs); |
| } /* cdbexplain_recvExecStats */ |
| |
| |
| /* |
| * cdbexplain_recvStatWalker |
| * Update the given PlanState node's Instrument node with statistics |
| * received from qExecs. Attach a CdbExplain_NodeSummary block to |
| * the Instrument node. At a MotionState node, descend to child slice. |
| */ |
| CdbVisitOpt |
| cdbexplain_recvStatWalker(PlanState *planstate, void *context) |
| { |
| CdbExplain_RecvStatCtx *ctx = (CdbExplain_RecvStatCtx *)context; |
| |
| /* If slice was dispatched to qExecs, and stats came back, grab 'em. */ |
| if (ctx->nmsgptr > 0) |
| { |
| /* Transfer received stats to Instrumentation, NodeSummary, etc. */ |
| cdbexplain_depositStatsToNode(planstate, ctx); |
| |
| /* Advance to next node's entry in all of the StatInst arrays. */ |
| ctx->iStatInst++; |
| } |
| |
| /* Motion operator? Descend to next slice. */ |
| if (IsA(planstate, MotionState)) |
| { |
| cdbexplain_recvExecStats(planstate->lefttree, |
| ctx->dispatchResults, |
| ((Motion *)planstate->plan)->motionID, |
| ctx->showstatctx, |
| ctx->segmentNum); |
| return CdbVisit_Skip; |
| } |
| |
| return CdbVisit_Walk; |
| } /* cdbexplain_recvStatWalker */ |
| |
| |
| /* |
| * cdbexplain_collectSliceStats |
| * Obtain per-slice statistical observations from the current slice |
| * (which has just completed execution in the current process) and |
| * store the information in the given SliceWorker struct. |
| * |
| * 'planstate' is the top PlanState node of the current slice. |
| */ |
| static void |
| cdbexplain_collectSliceStats(PlanState *planstate, |
| CdbExplain_SliceWorker *out_worker) |
| { |
| EState *estate = planstate->state; |
| |
| /* Max bytes malloc'ed under executor's per-query memory context. */ |
| out_worker->peakmemused = |
| (double)MemoryContextGetPeakSpace(estate->es_query_cxt); |
| |
| out_worker->vmem_reserved = (double) VmemTracker_GetMaxReservedVmemBytes(); |
| |
| out_worker->memory_accounting_global_peak = (double) MemoryAccountingPeakBalance; |
| |
| } /* cdbexplain_collectSliceStats */ |
| |
| |
| /* |
| * cdbexplain_depositSliceStats |
| * Transfer a worker's per-slice stats contribution from StatHdr into the |
| * SliceSummary array in the ShowStatCtx. Transfer the rollup of per-node |
| * stats from the RecvStatCtx into the SliceSummary. |
| * |
| * Kludge: In a non-parallel plan, slice numbers haven't been assigned, so we |
| * may be called more than once with sliceIndex == 0: once for the outermost |
| * query and once for each InitPlan subquery. In this case we dynamically |
| * expand the SliceSummary array. CDB TODO: Always assign proper root slice |
| * ids (in qDispSliceId field of SubPlan node); then remove this kludge. |
| */ |
| static void |
| cdbexplain_depositSliceStats(CdbExplain_StatHdr *hdr, |
| CdbExplain_RecvStatCtx *recvstatctx) |
| { |
| int sliceIndex = recvstatctx->sliceIndex; |
| CdbExplain_ShowStatCtx *showstatctx = recvstatctx->showstatctx; |
| CdbExplain_SliceSummary *ss = &showstatctx->slices[sliceIndex]; |
| CdbExplain_SliceWorker *ssw; |
| int iworker; |
| |
| Insist(sliceIndex >= 0 && |
| sliceIndex < showstatctx->nslice); |
| |
| /* Kludge: QD can have more than one 'Slice 0' if plan is non-parallel. */ |
| if (sliceIndex == 0 && |
| recvstatctx->dispatchResults == NULL && |
| ss->workers) |
| { |
| Assert(ss->nworker == 1 && |
| recvstatctx->segindexMin == hdr->segindex && |
| recvstatctx->segindexMax == hdr->segindex); |
| |
| /* Expand the SliceSummary array to make room for InitPlan subquery. */ |
| sliceIndex = showstatctx->nslice++; |
| showstatctx->slices = (CdbExplain_SliceSummary *) |
| repalloc(showstatctx->slices, showstatctx->nslice * sizeof(showstatctx->slices[0])); |
| ss = &showstatctx->slices[sliceIndex]; |
| memset(ss, 0, sizeof(*ss)); |
| } |
| |
| /* Slice's first worker? */ |
| if (!ss->workers) |
| { |
| /* Caller already switched to EXPLAIN context. */ |
| Assert(CurrentMemoryContext == showstatctx->explaincxt); |
| |
| /* Allocate SliceWorker array and attach it to the SliceSummary. */ |
| ss->segindex0 = recvstatctx->segindexMin; |
| ss->nworker = recvstatctx->segindexMax + 1 - ss->segindex0; |
| ss->workers = (CdbExplain_SliceWorker *)palloc0(ss->nworker * sizeof(ss->workers[0])); |
| ss->memoryTreeRoots = (SerializedMemoryAccount **)palloc0(ss->nworker * sizeof(ss->memoryTreeRoots[0])); |
| } |
| |
| /* Save a copy of this SliceWorker instance in the worker array. */ |
| iworker = hdr->segindex - ss->segindex0; |
| ssw = &ss->workers[iworker]; |
| Insist(iworker >= 0 && iworker < ss->nworker); |
| Insist(ssw->peakmemused == 0); /* each worker should be seen just once */ |
| *ssw = hdr->worker; |
| |
| const char *originalSerializedMemoryAccountingStartAddress = ((const char*) hdr) + |
| hdr->memAccountTreeStartOffset; |
| |
| size_t bitCount = sizeof(SerializedMemoryAccount) * hdr->memAccountTreeNodeCount; |
| /* |
| * We need to copy of the serialized bits. These bits have shorter lifespan |
| * and can get out of scope before we finish explain analyze. |
| */ |
| void *copiedSerializedMemoryAccountingStartAddress = palloc(bitCount); |
| memcpy(copiedSerializedMemoryAccountingStartAddress, originalSerializedMemoryAccountingStartAddress, bitCount); |
| |
| ss->memoryTreeRoots[iworker] = MemoryAccounting_Deserialize(copiedSerializedMemoryAccountingStartAddress, |
| hdr->memAccountTreeNodeCount); |
| |
| /* Rollup of per-worker stats into SliceSummary */ |
| cdbexplain_agg_upd(&ss->peakmemused, hdr->worker.peakmemused, hdr->segindex, hdr->hostname); |
| cdbexplain_agg_upd(&ss->vmem_reserved, hdr->worker.vmem_reserved, hdr->segindex, hdr->hostname); |
| cdbexplain_agg_upd(&ss->memory_accounting_global_peak, hdr->worker.memory_accounting_global_peak, hdr->segindex, hdr->hostname); |
| |
| /* Rollup of per-node stats over all nodes of the slice into SliceSummary */ |
| ss->workmemused_max = recvstatctx->workmemused_max; |
| ss->workmemwanted_max = recvstatctx->workmemwanted_max; |
| |
| /* Rollup of per-node stats over the whole query into ShowStatCtx. */ |
| showstatctx->workmemused_max = Max(showstatctx->workmemused_max, recvstatctx->workmemused_max); |
| showstatctx->workmemwanted_max = Max(showstatctx->workmemwanted_max, recvstatctx->workmemwanted_max); |
| } /* cdbexplain_depositSliceStats */ |
| |
| |
| /* |
| * cdbexplain_collectStatsFromNode |
| * |
| * Called by sendStatWalker and localStatWalker to obtain a node's statistics |
| * and transfer them into the temporary StatHdr and StatInst in the SendStatCtx. |
| * Also obtains the node's extra message text, which it appends to the caller's |
| * cxt->nodebuf. |
| */ |
| static void |
| cdbexplain_collectStatsFromNode(PlanState *planstate, CdbExplain_SendStatCtx *ctx) |
| { |
| CdbExplain_StatInst *si = &ctx->hdr.inst[0]; |
| Instrumentation *instr = planstate->instrument; |
| |
| Insist(instr); |
| |
| /* Save the state whether this node is completed the first tuple */ |
| bool running = instr->running; |
| |
| /* We have to finalize statistics, since ExecutorEnd hasn't been called. */ |
| InstrEndLoop(instr); |
| |
| if (Debug_print_execution_detail) { |
| instr_time time; |
| INSTR_TIME_SET_CURRENT(time); |
| elog(DEBUG1,"The time after processing node %d : %.3f ms", |
| planstate->type, 1000.0 * INSTR_TIME_GET_DOUBLE(time)); |
| } |
| |
| /* Initialize the StatInst slot in the temporary StatHdr. */ |
| memset(si, 0, sizeof(*si)); |
| si->pstype = planstate->type; |
| |
| /* Add this node's extra message text to notebuf. Store final stats. */ |
| si->bnotes = cdbexplain_collectExtraText(planstate, ctx->notebuf); |
| si->enotes = ctx->notebuf->len; |
| |
| /* Make sure there is a '\0' between this node's message and the next. */ |
| if (si->bnotes < si->enotes) |
| appendStringInfoChar(ctx->notebuf, '\0'); |
| |
| /* Transfer this node's statistics from Instrumentation into StatInst. */ |
| si->running = running; |
| si->starttime = instr->starttime; |
| si->counter = instr->counter; |
| si->firsttuple = instr->firsttuple; |
| si->startup = instr->startup; |
| si->total = instr->total; |
| si->ntuples = instr->ntuples; |
| si->nloops = instr->nloops; |
| si->execmemused = instr->execmemused; |
| si->workmemused = instr->workmemused; |
| si->workmemwanted = instr->workmemwanted; |
| si->workfileReused = instr->workfileReused; |
| si->workfileCreated = instr->workfileCreated; |
| si->peakMemBalance = MemoryAccounting_GetPeak(planstate->plan->memoryAccount); |
| si->firststart = instr->firststart; |
| si->numPartScanned = instr->numPartScanned; |
| } /* cdbexplain_collectStatsFromNode */ |
| |
| |
| /* |
| * CdbExplain_DepStatAcc |
| * Segment statistic accumulator used by cdbexplain_depositStatsToNode(). |
| */ |
| typedef struct CdbExplain_DepStatAcc |
| { |
| /* vmax, vsum, vcnt, segmax */ |
| CdbExplain_Agg agg; |
| |
| /* max's received StatHdr */ |
| CdbExplain_StatHdr *rshmax; |
| /* max's received inst in StatHdr */ |
| CdbExplain_StatInst *rsimax; |
| /* max's inst in NodeSummary */ |
| CdbExplain_StatInst *nsimax; |
| |
| /* max's received StatHdr */ |
| CdbExplain_StatHdr *rshLast; |
| /* max's received inst in StatHdr */ |
| CdbExplain_StatInst *rsiLast; |
| /* max's inst in NodeSummary */ |
| CdbExplain_StatInst *nsiLast; |
| |
| /* max run-time of all the segments */ |
| double max_total; |
| /* start time of the first iteration for node with maximum runtime */ |
| instr_time firststart_of_max_total; |
| } CdbExplain_DepStatAcc; |
| |
| static void |
| cdbexplain_depStatAcc_init0(CdbExplain_DepStatAcc *acc) |
| { |
| cdbexplain_agg_init0(&acc->agg); |
| acc->rshmax = NULL; |
| acc->rsimax = NULL; |
| acc->nsimax = NULL; |
| acc->rshLast = NULL; |
| acc->rsiLast = NULL; |
| acc->nsiLast = NULL; |
| acc->max_total = 0; |
| INSTR_TIME_SET_ZERO(acc->firststart_of_max_total); |
| } /* cdbexplain_depStatAcc_init0 */ |
| |
| static inline void |
| cdbexplain_depStatAcc_upd(CdbExplain_DepStatAcc *acc, |
| double v, |
| CdbExplain_StatHdr *rsh, |
| CdbExplain_StatInst *rsi, |
| CdbExplain_StatInst *nsi) |
| { |
| if (cdbexplain_agg_upd(&acc->agg, v, rsh->segindex,rsh->hostname)) |
| { |
| acc->rshmax = rsh; |
| acc->rsimax = rsi; |
| acc->nsimax = nsi; |
| } |
| |
| if (acc->max_total <= nsi->total) |
| { |
| acc->rshLast = rsh; |
| acc->rsiLast = rsi; |
| acc->nsiLast = nsi; |
| acc->agg.ilast = rsh->segindex; |
| strncpy(acc->agg.hostnamelast, rsh->hostname,SEGMENT_IDENTITY_NAME_LENGTH-1); |
| acc->agg.vlast = v; |
| acc->max_total = nsi->total; |
| INSTR_TIME_ASSIGN(acc->firststart_of_max_total, nsi->firststart); |
| } |
| } /* cdbexplain_depStatAcc_upd */ |
| |
| static void |
| cdbexplain_depStatAcc_saveText(CdbExplain_DepStatAcc *acc, |
| StringInfoData *extratextbuf, |
| bool *saved_inout) |
| { |
| CdbExplain_StatHdr *rsh = acc->rshmax; |
| CdbExplain_StatInst *rsi = acc->rsimax; |
| CdbExplain_StatInst *nsi = acc->nsimax; |
| |
| if (acc->agg.vcnt > 0 && |
| nsi->bnotes == nsi->enotes && |
| rsi->bnotes < rsi->enotes) |
| { |
| /* Locate extra message text in dispatch result buffer. */ |
| int notelen = rsi->enotes - rsi->bnotes; |
| const char *notes = (const char *)rsh + rsh->bnotes + rsi->bnotes; |
| |
| Insist(rsh->bnotes + rsi->enotes < rsh->enotes && |
| notes[notelen] == '\0'); |
| |
| /* Append to extratextbuf. */ |
| nsi->bnotes = extratextbuf->len; |
| appendBinaryStringInfo(extratextbuf, notes, notelen); |
| nsi->enotes = extratextbuf->len; |
| |
| /* Tell caller that some extra text has been saved. */ |
| if (saved_inout) |
| *saved_inout = true; |
| } |
| } /* cdbexplain_depStatAcc_saveText */ |
| |
| |
| /* |
| * cdbexplain_depositStatsToNode |
| * |
| * Called by recvStatWalker and localStatWalker to update the given |
| * PlanState node's Instrument node with statistics received from |
| * workers or collected locally. Attaches a CdbExplain_NodeSummary |
| * block to the Instrument node. If top node of slice, per-slice |
| * statistics are transferred from the StatHdr to the SliceSummary. |
| */ |
| static void |
| cdbexplain_depositStatsToNode(PlanState *planstate, CdbExplain_RecvStatCtx *ctx) |
| { |
| Instrumentation *instr = planstate->instrument; |
| CdbExplain_StatHdr *rsh; /* The header (which includes StatInst) */ |
| CdbExplain_StatInst *rsi; /* The current StatInst */ |
| |
| /* |
| * Points to the insts array of node summary (CdbExplain_NodeSummary). Used |
| * for saving every rsi in the node summary (in addition to saving the max/avg). |
| */ |
| CdbExplain_StatInst *nsi; |
| |
| /* |
| * ns is the node summary across all QEs of the segworker group. It also contains detailed "unsummarized" |
| * raw stat for a node across all QEs in current segworker group (in the insts array) |
| */ |
| CdbExplain_NodeSummary *ns; |
| CdbExplain_DepStatAcc ntuples; |
| CdbExplain_DepStatAcc execmemused; |
| CdbExplain_DepStatAcc workmemused; |
| CdbExplain_DepStatAcc workmemwanted; |
| CdbExplain_DepStatAcc totalWorkfileReused; |
| CdbExplain_DepStatAcc totalWorkfileCreated; |
| CdbExplain_DepStatAcc peakmemused; |
| CdbExplain_DepStatAcc vmem_reserved; |
| CdbExplain_DepStatAcc memory_accounting_global_peak; |
| CdbExplain_DepStatAcc peakMemBalance; |
| CdbExplain_DepStatAcc totalPartTableScanned; |
| int imsgptr; |
| int nInst; |
| |
| Insist(instr && |
| ctx->iStatInst < ctx->nStatInst); |
| |
| /* Caller already switched to EXPLAIN context. */ |
| Assert(CurrentMemoryContext == ctx->showstatctx->explaincxt); |
| |
| /* Allocate NodeSummary block. */ |
| nInst = ctx->segindexMax + 1 - ctx->segindexMin; |
| ns = (CdbExplain_NodeSummary *)palloc0(sizeof(*ns) - sizeof(ns->insts) + |
| nInst*sizeof(ns->insts[0])); |
| ns->segindex0 = ctx->segindexMin; |
| ns->ninst = nInst; |
| |
| /* Attach our new NodeSummary to the Instrumentation node. */ |
| instr->cdbNodeSummary = ns; |
| |
| /* Initialize per-node accumulators. */ |
| cdbexplain_depStatAcc_init0(&ntuples); |
| cdbexplain_depStatAcc_init0(&execmemused); |
| cdbexplain_depStatAcc_init0(&workmemused); |
| cdbexplain_depStatAcc_init0(&workmemwanted); |
| cdbexplain_depStatAcc_init0(&totalWorkfileReused); |
| cdbexplain_depStatAcc_init0(&totalWorkfileCreated); |
| cdbexplain_depStatAcc_init0(&peakMemBalance); |
| cdbexplain_depStatAcc_init0(&totalPartTableScanned); |
| |
| /* Initialize per-slice accumulators. */ |
| cdbexplain_depStatAcc_init0(&peakmemused); |
| cdbexplain_depStatAcc_init0(&vmem_reserved); |
| cdbexplain_depStatAcc_init0(&memory_accounting_global_peak); |
| |
| bool isRunning = false; |
| |
| /* Examine the statistics from each qExec. */ |
| for (imsgptr = 0; imsgptr < ctx->nmsgptr; imsgptr++) |
| { |
| /* Locate PlanState node's StatInst received from this qExec. */ |
| rsh = ctx->msgptrs[imsgptr]; |
| // char* hostname = ctx->dispatchResults->resultArray[imsgptr].segdbDesc->segment->hostname; |
| rsi = &rsh->inst[ctx->iStatInst]; |
| |
| Insist(rsi->pstype == planstate->type && |
| ns->segindex0 <= rsh->segindex && |
| rsh->segindex < ns->segindex0 + ns->ninst); |
| |
| /* Locate this qExec's StatInst slot in node's NodeSummary block. */ |
| nsi = &ns->insts[rsh->segindex - ns->segindex0]; |
| |
| /* Copy the StatInst to NodeSummary from dispatch result buffer. */ |
| *nsi = *rsi; |
| |
| |
| if (Debug_print_execution_detail) { |
| elog(DEBUG1,"The time information for node %d on executor %d: " |
| "starttime: %.3f ms, counter: %.3f ms, firsttuple: %.3f ms, " |
| "startup: %.3f ms, total: %.3f ms, ntuples: %.3f, " |
| "nloops: %.3f", |
| planstate->type, rsh->segindex, |
| 1000.0 * INSTR_TIME_GET_DOUBLE(nsi->starttime), |
| 1000.0 * INSTR_TIME_GET_DOUBLE(nsi->counter), |
| 1000.0 * nsi->firsttuple, 1000.0 * nsi->startup, 1000.0 * nsi->total, |
| nsi->ntuples, nsi->nloops |
| ); |
| } |
| |
| /* |
| * Drop qExec's extra text. We rescue it below if qExec is a winner. |
| * For local qDisp slice, ctx->extratextbuf is NULL, which tells us to |
| * leave the extra text undisturbed in its existing buffer. |
| */ |
| if (ctx->extratextbuf) |
| nsi->bnotes = nsi->enotes = 0; |
| |
| if (nsi->running) |
| { |
| isRunning = true; |
| } |
| |
| /* Update per-node accumulators. */ |
| cdbexplain_depStatAcc_upd(&ntuples, rsi->ntuples, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&execmemused, rsi->execmemused, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&workmemused, rsi->workmemused, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&workmemwanted, rsi->workmemwanted, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&totalWorkfileReused, (rsi->workfileReused ? 1 : 0), rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&totalWorkfileCreated, (rsi->workfileCreated ? 1 : 0), rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&peakMemBalance, rsi->peakMemBalance, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&totalPartTableScanned, rsi->numPartScanned, rsh, rsi, nsi); |
| |
| /* Update per-slice accumulators. */ |
| cdbexplain_depStatAcc_upd(&peakmemused, rsh->worker.peakmemused, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&vmem_reserved, rsh->worker.vmem_reserved, rsh, rsi, nsi); |
| cdbexplain_depStatAcc_upd(&memory_accounting_global_peak, rsh->worker.memory_accounting_global_peak, rsh, rsi, nsi); |
| } |
| |
| if (Debug_print_execution_detail) { |
| if (ntuples.nsimax) |
| { |
| elog(DEBUG1,"The time information for node %d on executor: RSIMAX" |
| "starttime: %.3f ms, counter: %.3f ms, firsttuple: %.3f ms, " |
| "startup: %.3f ms, total: %.3f ms, ntuples: %.3f, " |
| "nloops: %.3f", |
| planstate->type, 1000.0 * INSTR_TIME_GET_DOUBLE(ntuples.nsimax->starttime), |
| 1000.0 * INSTR_TIME_GET_DOUBLE(ntuples.nsimax->counter), |
| 1000.0 * ntuples.nsimax->firsttuple, 1000.0 * ntuples.nsimax->startup, |
| 1000.0 * ntuples.nsimax->total, |
| ntuples.nsimax->ntuples, ntuples.nsimax->nloops |
| ); |
| } |
| else |
| { |
| elog(DEBUG1, "The time information for node %d on executor: RSIMAX", |
| planstate->type); |
| } |
| } |
| |
| /* Save per-node accumulated stats in NodeSummary. */ |
| ns->ntuples = ntuples.agg; |
| ns->execmemused = execmemused.agg; |
| ns->workmemused = workmemused.agg; |
| ns->workmemwanted = workmemwanted.agg; |
| ns->totalWorkfileReused = totalWorkfileReused.agg; |
| ns->totalWorkfileCreated = totalWorkfileCreated.agg; |
| ns->peakMemBalance = peakMemBalance.agg; |
| ns->totalPartTableScanned = totalPartTableScanned.agg; |
| |
| /* Roll up summary over all nodes of slice into RecvStatCtx. */ |
| ctx->workmemused_max = Max(ctx->workmemused_max, workmemused.agg.vmax); |
| ctx->workmemwanted_max = Max(ctx->workmemwanted_max, workmemwanted.agg.vmax); |
| |
| instr->running = isRunning; |
| instr->total = ntuples.max_total; |
| instr->totalLast = ntuples.max_total; |
| INSTR_TIME_ASSIGN(instr->firststart, ntuples.firststart_of_max_total); |
| |
| /* Put winner's stats into qDisp PlanState's Instrument node. */ |
| if (ntuples.agg.vcnt > 0) |
| { |
| instr->running = ntuples.nsimax->running; |
| instr->starttime = ntuples.nsimax->starttime; |
| instr->counter = ntuples.nsimax->counter; |
| instr->firsttuple = ntuples.nsimax->firsttuple; |
| instr->startup = ntuples.nsimax->startup; |
| instr->total = ntuples.nsimax->total; |
| instr->startupLast = ntuples.nsiLast ? ntuples.nsiLast->startup : ntuples.nsimax->startup; |
| instr->totalLast = ntuples.nsiLast ? ntuples.nsiLast->total : ntuples.nsimax->total; |
| instr->ntuples = ntuples.nsimax->ntuples; |
| instr->nloops = ntuples.nsimax->nloops; |
| instr->execmemused = ntuples.nsimax->execmemused; |
| instr->workmemused = ntuples.nsimax->workmemused; |
| instr->workmemwanted = ntuples.nsimax->workmemwanted; |
| instr->workfileReused = ntuples.nsimax->workfileReused; |
| instr->workfileCreated = ntuples.nsimax->workfileCreated; |
| instr->firststart = ntuples.nsimax->firststart; |
| instr->firststartLast = ntuples.nsiLast ? ntuples.nsiLast->firststart : ntuples.nsimax->firststart; |
| } |
| |
| /* Save extra message text for the most interesting winning qExecs. */ |
| if (ctx->extratextbuf) |
| { |
| bool saved = false; |
| |
| /* One worker which used or wanted the most work_mem */ |
| if (workmemwanted.agg.vmax >= workmemused.agg.vmax) |
| cdbexplain_depStatAcc_saveText(&workmemwanted, ctx->extratextbuf, &saved); |
| else if (workmemused.agg.vmax > 1.05 * cdbexplain_agg_avg(&workmemused.agg)) |
| cdbexplain_depStatAcc_saveText(&workmemused, ctx->extratextbuf, &saved); |
| |
| /* Worker which used the most executor memory (this node's usage) */ |
| if (execmemused.agg.vmax > 1.05 * cdbexplain_agg_avg(&execmemused.agg)) |
| cdbexplain_depStatAcc_saveText(&execmemused, ctx->extratextbuf, &saved); |
| |
| /* |
| * For the worker which had the highest peak executor memory usage |
| * overall across the whole slice, we'll report the extra message |
| * text from all of the nodes in the slice. But only if that worker |
| * stands out more than 5% above the average. |
| */ |
| if (peakmemused.agg.vmax > 1.05 * cdbexplain_agg_avg(&peakmemused.agg)) |
| cdbexplain_depStatAcc_saveText(&peakmemused, ctx->extratextbuf, &saved); |
| |
| /* |
| * One worker that used cached workfiles. |
| */ |
| if (totalWorkfileReused.agg.vcnt > 0) |
| { |
| cdbexplain_depStatAcc_saveText(&totalWorkfileReused, ctx->extratextbuf, &saved); |
| } |
| |
| /* |
| * One worker which produced the greatest number of output rows. |
| * (Always give at least one node a chance to have its extra message |
| * text seen. In case no node stood out above the others, make a |
| * repeatable choice based on the number of output rows.) |
| */ |
| if (!saved || |
| ntuples.agg.vmax > 1.05 * cdbexplain_agg_avg(&ntuples.agg)) |
| cdbexplain_depStatAcc_saveText(&ntuples, ctx->extratextbuf, &saved); |
| } |
| } /* cdbexplain_depositStatsToNode */ |
| |
| |
| /* |
| * cdbexplain_collectExtraText |
| * Allow a node to supply additional text for its EXPLAIN ANALYZE report. |
| * |
| * Returns the starting offset of the extra message text from notebuf->data. |
| * The caller can compute the length as notebuf->len minus the starting offset. |
| * If the node did not provide any extra message text, the length will be 0. |
| */ |
| int |
| cdbexplain_collectExtraText(PlanState *planstate, StringInfo notebuf) |
| { |
| int bnotes = notebuf->len; |
| |
| /* |
| * Invoke node's callback. It may append to our notebuf and/or its own |
| * cdbexplainbuf; and store final statistics in its Instrumentation node. |
| */ |
| if (planstate->cdbexplainfun) |
| planstate->cdbexplainfun(planstate, notebuf); |
| |
| /* |
| * Append contents of node's extra message buffer. This allows nodes to |
| * contribute EXPLAIN ANALYZE info without having to set up a callback. |
| */ |
| if (planstate->cdbexplainbuf && |
| planstate->cdbexplainbuf->len > 0) |
| { |
| /* If callback added to notebuf, make sure text ends with a newline. */ |
| if (bnotes < notebuf->len && |
| notebuf->data[notebuf->len-1] != '\n') |
| appendStringInfoChar(notebuf, '\n'); |
| |
| appendBinaryStringInfo(notebuf, planstate->cdbexplainbuf->data, |
| planstate->cdbexplainbuf->len); |
| |
| truncateStringInfo(planstate->cdbexplainbuf, 0); |
| } |
| |
| return bnotes; |
| } /* cdbexplain_collectExtraText */ |
| |
| |
| /* |
| * cdbexplain_formatExtraText |
| * Format extra message text into the EXPLAIN output buffer. |
| */ |
| static void |
| cdbexplain_formatExtraText(StringInfo str, |
| int indent, |
| int segindex, |
| bool workfileReuse, |
| const char *notes, |
| int notelen) |
| { |
| const char *cp = notes; |
| const char *ep = notes + notelen; |
| const char *reuse = ""; |
| if (workfileReuse) |
| { |
| reuse = " reuse"; |
| } |
| |
| /* Could be more than one line... */ |
| while (cp < ep) |
| { |
| const char *nlp = strchr(cp, '\n'); |
| const char *dp = nlp ? nlp : ep; |
| |
| /* Strip trailing whitespace. */ |
| while (cp < dp && |
| isspace(dp[-1])) |
| dp--; |
| |
| /* Add to output buffer. */ |
| if (cp < dp) |
| { |
| appendStringInfoFill(str, 2*indent, ' '); |
| if (segindex >= 0) |
| { |
| appendStringInfo(str, "(seg%d%s) ", segindex, reuse); |
| if (segindex < 10) |
| appendStringInfoChar(str, ' '); |
| if (segindex < 100) |
| appendStringInfoChar(str, ' '); |
| } |
| appendBinaryStringInfo(str, cp, dp-cp); |
| appendStringInfoChar(str, '\n'); |
| } |
| |
| if (!nlp) |
| break; |
| cp = nlp+1; |
| } |
| } /* cdbexplain_formatExtraText */ |
| |
| |
| |
| /* |
| * cdbexplain_formatMemory |
| * Convert memory size to string from (double) bytes. |
| * |
| * outbuf: [output] pointer to a char buffer to be filled |
| * bufsize: [input] maximum number of characters to write to outbuf (must be set by the caller) |
| * bytes: [input] a value representing memory size in bytes to be written to outbuf |
| */ |
| static void |
| cdbexplain_formatMemory(char *outbuf, int bufsize, double bytes) |
| { |
| Assert(outbuf != NULL && "CDBEXPLAIN: char buffer is null"); |
| Assert(bufsize > 0 && "CDBEXPLAIN: size of char buffer is zero"); |
| /*check if truncation occurs */ |
| #ifdef USE_ASSERT_CHECKING |
| int nchars_written = |
| #endif /* USE_ASSERT_CHECKING */ |
| snprintf(outbuf, bufsize, "%.0fK bytes", floor((bytes + 1023.0)/1024.0)); |
| Assert(nchars_written < bufsize && |
| "CDBEXPLAIN: size of char buffer is smaller than the required number of chars"); |
| } /* cdbexplain_formatMemory */ |
| |
| |
| |
| /* |
| * cdbexplain_formatSeconds |
| * Convert time in seconds to readable string |
| * |
| * outbuf: [output] pointer to a char buffer to be filled |
| * bufsize: [input] maximum number of characters to write to outbuf (must be set by the caller) |
| * seconds: [input] a value representing no. of seconds to be written to outbuf |
| */ |
| static void |
| cdbexplain_formatSeconds(char* outbuf, int bufsize, double seconds) |
| { |
| Assert(outbuf != NULL && "CDBEXPLAIN: char buffer is null"); |
| Assert(bufsize > 0 && "CDBEXPLAIN: size of char buffer is zero"); |
| double ms = seconds * 1000.0; |
| /*check if truncation occurs */ |
| #ifdef USE_ASSERT_CHECKING |
| int nchars_written = |
| #endif /* USE_ASSERT_CHECKING */ |
| snprintf(outbuf, bufsize, "%.*f ms", |
| (ms < 10.0 && ms != 0.0 && ms > -10.0) ? 3 : 0, |
| ms); |
| Assert(nchars_written < bufsize && |
| "CDBEXPLAIN: size of char buffer is smaller than the required number of chars"); |
| } /* cdbexplain_formatSeconds */ |
| |
| /* |
| * cdbexplain_formatSeconds |
| * Convert time in seconds to readable string |
| * |
| * outbuf: [output] pointer to a char buffer to be filled |
| * bufsize: [input] maximum number of characters to write to outbuf (must be set by the caller) |
| * seconds: [input] a value representing no. of seconds to be written to outbuf |
| */ |
| static void |
| cdbexplain_formatPairSeconds(char* outbuf, int bufsize, double seconds,double secondsLast) |
| { |
| Assert(outbuf != NULL && "CDBEXPLAIN: char buffer is null"); |
| Assert(bufsize > 0 && "CDBEXPLAIN: size of char buffer is zero"); |
| double ms = seconds * 1000.0; |
| double mslast = secondsLast * 1000.0; |
| /*check if truncation occurs */ |
| #ifdef USE_ASSERT_CHECKING |
| int nchars_written = |
| #endif /* USE_ASSERT_CHECKING */ |
| snprintf(outbuf, bufsize, "%.*f/%.*f ms", |
| (ms < 10.0 && ms != 0.0 && ms > -10.0) ? 3 : 0, |
| ms,(mslast < 10.0 && mslast != 0.0 && mslast > -10.0) ? 3 : 0, mslast); |
| Assert(nchars_written < bufsize && |
| "CDBEXPLAIN: size of char buffer is smaller than the required number of chars"); |
| } |
| |
| /* |
| * cdbexplain_formatSeg |
| * Convert segment id to string. |
| * |
| * outbuf: [output] pointer to a char buffer to be filled |
| * bufsize: [input] maximum number of characters to write to outbuf (must be set by the caller) |
| * segindex:[input] a value representing segment index to be written to outbuf |
| * nInst: [input] no. of stat instances |
| */ |
| static void |
| cdbexplain_formatSeg(char *outbuf, int bufsize, int segindex, int nInst,char* hostname) |
| { |
| Assert(outbuf != NULL && "CDBEXPLAIN: char buffer is null"); |
| Assert(bufsize > 0 && "CDBEXPLAIN: size of char buffer is zero"); |
| |
| if ( nInst > 1 && segindex >= 0){ |
| /*check if truncation occurs */ |
| #ifdef USE_ASSERT_CHECKING |
| int nchars_written = |
| #endif /* USE_ASSERT_CHECKING */ |
| snprintf(outbuf, bufsize, " (seg%d:%s)", segindex,hostname); |
| Assert(nchars_written < bufsize && |
| "CDBEXPLAIN: size of char buffer is smaller than the required number of chars"); |
| } |
| else{ |
| outbuf[0] = '\0'; |
| } |
| } /* cdbexplain_formatSeg */ |
| |
| /* |
| * cdbexplain_formatSeg |
| * Convert segment id to string. |
| * |
| * outbuf: [output] pointer to a char buffer to be filled |
| * bufsize: [input] maximum number of characters to write to outbuf (must be set by the caller) |
| * segindex:[input] a value representing segment index to be written to outbuf |
| * nInst: [input] no. of stat instances |
| */ |
| static void |
| cdbexplain_formatSegNoParenthesis(char *outbuf, int bufsize, int segindex, int nInst,char* hostname) |
| { |
| Assert(outbuf != NULL && "CDBEXPLAIN: char buffer is null"); |
| Assert(bufsize > 0 && "CDBEXPLAIN: size of char buffer is zero"); |
| |
| if ( nInst >= 0 && segindex >= -1){ |
| /*check if truncation occurs */ |
| #ifdef USE_ASSERT_CHECKING |
| int nchars_written = |
| #endif /* USE_ASSERT_CHECKING */ |
| snprintf(outbuf, bufsize, "seg%d:%s", segindex,hostname); |
| Assert(nchars_written < bufsize && |
| "CDBEXPLAIN: size of char buffer is smaller than the required number of chars"); |
| } |
| else{ |
| outbuf[0] = '\0'; |
| } |
| } /* cdbexplain_formatSeg */ |
| |
| |
| /* |
| * cdbexplain_showExecStatsBegin |
| * Called by qDisp process to create a CdbExplain_ShowStatCtx structure |
| * in which to accumulate overall statistics for a query. |
| * |
| * 'querystarttime' is the timestamp of the start of the query, in a |
| * platform-dependent format. |
| * 'explaincxt' is a MemoryContext from which to allocate the ShowStatCtx as |
| * well as any needed buffers and the like. The explaincxt ptr is saved |
| * in the ShowStatCtx. The caller is expected to reset or destroy the |
| * explaincxt not too long after calling cdbexplain_showExecStatsEnd(); so |
| * we don't bother to pfree() memory that we allocate from this context. |
| * |
| * Note this function is called before ExecutorStart(), so there is no EState |
| * or SliceTable yet. |
| */ |
| struct CdbExplain_ShowStatCtx * |
| cdbexplain_showExecStatsBegin(struct QueryDesc *queryDesc, |
| MemoryContext explaincxt, |
| instr_time querystarttime) |
| { |
| MemoryContext oldcontext; |
| CdbExplain_ShowStatCtx *ctx; |
| int nslice; |
| |
| Assert(Gp_role != GP_ROLE_EXECUTE); |
| |
| /* Switch to EXPLAIN memory context. */ |
| oldcontext = MemoryContextSwitchTo(explaincxt); |
| |
| /* Allocate and zero the ShowStatCtx */ |
| ctx = (CdbExplain_ShowStatCtx *)palloc0(sizeof(*ctx)); |
| |
| ctx->explaincxt = explaincxt; |
| ctx->querystarttime = querystarttime; |
| |
| /* Determine number of slices. (SliceTable hasn't been built yet.) */ |
| nslice = 1 + queryDesc->plannedstmt->planTree->nMotionNodes + queryDesc->plannedstmt->planTree->nInitPlans; |
| |
| /* Allocate and zero the SliceSummary array. */ |
| ctx->nslice = nslice; |
| ctx->slices = (CdbExplain_SliceSummary *)palloc0(nslice * sizeof(ctx->slices[0])); |
| |
| /* Allocate a buffer in which we can collect any extra message text. */ |
| initStringInfoOfSize(&ctx->extratextbuf, 4000); |
| |
| /* Restore caller's MemoryContext. */ |
| MemoryContextSwitchTo(oldcontext); |
| return ctx; |
| } /* cdbexplain_showExecStatsBegin */ |
| |
| /* |
| * nodeSupportWorkfileCaching |
| * Return true if a given node supports workfile caching. |
| */ |
| static bool |
| nodeSupportWorkfileCaching(PlanState *planstate) |
| { |
| return (IsA(planstate, SortState) || |
| IsA(planstate, HashJoinState) || |
| (IsA(planstate, AggState) && ((Agg*)planstate->plan)->aggstrategy == AGG_HASHED) || |
| IsA(planstate, MaterialState)); |
| } |
| |
| /* |
| * cdbexplain_showExecStats |
| * Called by qDisp process to format a node's EXPLAIN ANALYZE statistics. |
| * |
| * 'planstate' is the node whose statistics are to be displayed. |
| * 'str' is the output buffer. |
| * 'indent' is the root indentation for all the text generated for explain output |
| * 'ctx' is a CdbExplain_ShowStatCtx object which was created by a call to |
| * cdbexplain_showExecStatsBegin(). |
| */ |
| void |
| cdbexplain_showExecStats(struct PlanState *planstate, |
| struct StringInfoData *str, |
| int indent, |
| struct CdbExplain_ShowStatCtx *ctx) |
| { |
| Instrumentation *instr = planstate->instrument; |
| CdbExplain_NodeSummary *ns = instr->cdbNodeSummary; |
| instr_time timediff; |
| instr_time timediffLast; |
| double ntuples_avg; |
| int i; |
| |
| const char *s_row = " row"; |
| const char *s_rows = " rows"; |
| char firstbuf[50]; |
| char totalbuf[50]; |
| char avgbuf[50]; |
| char maxbuf[50]; |
| char segbufWithParenthese[50]; |
| char segbuf[50]; |
| char segbufLast[50]; |
| char startbuf[50]; |
| |
| /* Might not have received stats from qExecs if they hit errors. */ |
| if (!ns) |
| return; |
| |
| Assert(instr != NULL); |
| const char *noRowRequested = ""; |
| if (!instr->running) |
| { |
| noRowRequested = "(No row requested) "; |
| } |
| |
| /* |
| * Row counts. Also, timings from the worker with the most output rows. |
| */ |
| appendStringInfoFill(str, 2 * indent, ' '); |
| cdbexplain_formatSeg(segbufWithParenthese, sizeof(segbufWithParenthese), ns->ntuples.imax, ns->ninst,ns->ntuples.hostnamemax); |
| cdbexplain_formatSegNoParenthesis(segbuf, sizeof(segbuf), ns->ntuples.imax, ns->ninst,ns->ntuples.hostnamemax); |
| cdbexplain_formatSegNoParenthesis(segbufLast, sizeof(segbufLast), ns->ntuples.ilast, ns->ninst,ns->ntuples.hostnamelast); |
| ntuples_avg = cdbexplain_agg_avg(&ns->ntuples); |
| bool containMaxRowAndLast=false; |
| switch (planstate->type) |
| { |
| case T_BitmapAndState: |
| case T_BitmapOrState: |
| case T_BitmapIndexScanState: |
| s_row = ""; |
| s_rows = ""; |
| if (ns->ntuples.vcnt >= 0){ |
| appendStringInfo(str, |
| "Bitmaps out: Avg %.1f x %d workers." |
| " Max/Last(%s/%s) %.0f/%.0f rows", |
| ntuples_avg, |
| ns->ntuples.vcnt, |
| segbuf,segbufLast, |
| ns->ntuples.vmax,ns->ntuples.vlast); |
| containMaxRowAndLast = true; |
| } |
| break; |
| case T_HashState: |
| if (ns->ntuples.vcnt >= 0){ |
| appendStringInfo(str, |
| "Rows in: Avg %.1f rows x %d workers." |
| " Max/Last(%s/%s) %.0f/%.0f rows", |
| ntuples_avg, |
| ns->ntuples.vcnt, |
| segbuf,segbufLast, |
| ns->ntuples.vmax,ns->ntuples.vlast); |
| containMaxRowAndLast = true; |
| } |
| break; |
| case T_MotionState: |
| if (ns->ntuples.vcnt >= 0){ |
| appendStringInfo(str, |
| "Rows out: Avg %.1f rows x %d workers" |
| " at destination. Max/Last(%s/%s) %.0f/%.0f rows", |
| ntuples_avg, |
| ns->ntuples.vcnt, |
| segbuf,segbufLast, |
| ns->ntuples.vmax,ns->ntuples.vlast); |
| containMaxRowAndLast = true; |
| } |
| break; |
| default: |
| if (ns->ntuples.vcnt >= 0){ |
| appendStringInfo(str, |
| "Rows out: Avg %.1f rows x %d workers." |
| " Max/Last(%s/%s) %.0f/%.0f rows", |
| ntuples_avg, |
| ns->ntuples.vcnt, |
| segbuf,segbufLast, |
| ns->ntuples.vmax,ns->ntuples.vlast); |
| containMaxRowAndLast = true; |
| } |
| } |
| |
| if( containMaxRowAndLast ){ |
| /* Time from this worker's first InstrStartNode() to its first result row */ |
| cdbexplain_formatPairSeconds(firstbuf, sizeof(firstbuf), instr->startup, instr->startupLast); |
| |
| /* Time from this worker's first InstrStartNode() to end of its results */ |
| cdbexplain_formatPairSeconds(totalbuf, sizeof(totalbuf), instr->total, instr->totalLast); |
| } |
| else |
| { |
| /* Time from this worker's first InstrStartNode() to its first result row */ |
| cdbexplain_formatSeconds(firstbuf, sizeof(firstbuf), instr->startup); |
| |
| /* Time from this worker's first InstrStartNode() to end of its results */ |
| cdbexplain_formatSeconds(totalbuf, sizeof(totalbuf), instr->total); |
| } |
| |
| /* |
| * Show elapsed time just once if they are the same or if we don't have |
| * any valid elapsed time for first tuple. |
| */ |
| if ((instr->ntuples > 0) && (strcmp(firstbuf, totalbuf) != 0)) |
| appendStringInfo(str, |
| " with %s to first%s, %s to end", |
| firstbuf, |
| s_row, |
| totalbuf); |
| else |
| appendStringInfo(str, |
| " with %s to end", |
| totalbuf); |
| |
| /* Number of rescans */ |
| if (instr->nloops > 1) |
| appendStringInfo(str, " of %.0f scans", instr->nloops); |
| |
| /* Time from start of query on qDisp to this worker's first result row */ |
| if (!(INSTR_TIME_IS_ZERO(instr->firststart)) && !(INSTR_TIME_IS_ZERO(instr->firststartLast))) |
| { |
| INSTR_TIME_SET_ZERO(timediff); |
| INSTR_TIME_ACCUM_DIFF(timediff, instr->firststart, ctx->querystarttime); |
| INSTR_TIME_SET_ZERO(timediffLast); |
| INSTR_TIME_ACCUM_DIFF(timediffLast, instr->firststartLast, ctx->querystarttime); |
| cdbexplain_formatPairSeconds(startbuf, sizeof(startbuf), INSTR_TIME_GET_DOUBLE(timediff), INSTR_TIME_GET_DOUBLE(timediffLast)); |
| appendStringInfo(str, ", start offset by %s", startbuf); |
| } |
| |
| appendStringInfoString(str, ".\n"); |
| |
| if ((EXPLAIN_MEMORY_VERBOSITY_DETAIL <= explain_memory_verbosity) |
| && planstate->type == T_MotionState) |
| { |
| Motion *pMotion = (Motion *) planstate->plan; |
| int curSliceId = pMotion->motionID; |
| |
| for (int iWorker = 0; iWorker < ctx->slices[curSliceId].nworker; iWorker++) |
| { |
| appendStringInfoFill(str, 2*indent, ' '); |
| appendStringInfo(str, "slice %d, seg %d\n", curSliceId, iWorker); |
| |
| MemoryAccounting_ToString(&(ctx->slices[curSliceId].memoryTreeRoots[iWorker]->memoryAccount), str, indent + 1); |
| } |
| } |
| |
| |
| /* |
| * Executor memory used by this individual node, if it allocates from a |
| * memory context of its own instead of sharing the per-query context. |
| */ |
| if (ns->execmemused.vcnt > 0) |
| { |
| appendStringInfoFill(str, 2*indent, ' '); |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ns->execmemused.vmax); |
| if (ns->execmemused.vcnt == 1) |
| appendStringInfo(str, |
| "Executor memory: %s.\n", |
| maxbuf); |
| else |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ns->execmemused.imax, ns->ninst,ns->ntuples.hostnamemax); |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ns->execmemused)); |
| appendStringInfo(str, |
| "Executor memory: %s avg, %s max%s.\n", |
| avgbuf, |
| maxbuf, |
| segbuf); |
| } |
| } |
| |
| /* |
| * Actual work_mem used. |
| */ |
| if (ns->workmemused.vcnt > 0) |
| { |
| appendStringInfoFill(str, 2*indent, ' '); |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ns->workmemused.vmax); |
| if (ns->workmemused.vcnt == 1) |
| appendStringInfo(str, |
| "Work_mem used: %s.", |
| maxbuf); |
| else |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ns->workmemused.imax, ns->ninst,ns->ntuples.hostnamemax); |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ns->workmemused)); |
| appendStringInfo(str, |
| "Work_mem used: %s avg, %s max%s.", |
| avgbuf, |
| maxbuf, |
| segbuf); |
| } |
| /* |
| * Total number of segments in which this node reuses cached or creates workfiles. |
| */ |
| if (nodeSupportWorkfileCaching(planstate)) |
| { |
| appendStringInfo(str, |
| " Workfile: (%d spilling, %d reused)", |
| ns->totalWorkfileCreated.vcnt, |
| ns->totalWorkfileReused.vcnt); |
| } |
| |
| appendStringInfo(str,"\n"); |
| |
| } |
| |
| if (EXPLAIN_MEMORY_VERBOSITY_SUPPRESS < explain_memory_verbosity) |
| { |
| /* |
| * Memory account balance without overhead |
| */ |
| appendStringInfoFill(str, 2*indent, ' '); |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ns->peakMemBalance.vmax); |
| if (ns->peakMemBalance.vcnt == 1) |
| { |
| appendStringInfo(str, |
| "Memory: %s.\n", |
| maxbuf); |
| } |
| else |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ns->peakMemBalance.imax, ns->ninst,ns->ntuples.hostnamemax); |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ns->peakMemBalance)); |
| appendStringInfo(str, |
| "Memory: %s avg, %s max%s.\n", |
| avgbuf, |
| maxbuf, |
| segbuf); |
| } |
| } |
| |
| /* |
| * What value of work_mem would suffice to eliminate workfile I/O? |
| */ |
| if (ns->workmemwanted.vcnt > 0) |
| { |
| appendStringInfoFill(str, 2*indent, ' '); |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ns->workmemwanted.vmax); |
| if (ns->ninst == 1) |
| { |
| appendStringInfo(str, |
| "Work_mem wanted: %s to lessen workfile I/O.\n", |
| maxbuf); |
| } |
| else |
| { |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ns->workmemwanted)); |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ns->workmemwanted.imax, ns->ninst,ns->ntuples.hostnamemax); |
| appendStringInfo(str, |
| "Work_mem wanted: %s avg, %s max%s" |
| " to lessen workfile I/O affecting %d workers.\n", |
| avgbuf, |
| maxbuf, |
| segbuf, |
| ns->workmemwanted.vcnt); |
| } |
| } |
| |
| /* |
| * Print number of partitioned tables scanned for dynamic scans. |
| */ |
| if (0 <= ns->totalPartTableScanned.vcnt && (T_BitmapTableScanState == planstate->type |
| || T_DynamicTableScanState == planstate->type |
| || T_DynamicIndexScanState == planstate->type)) |
| { |
| double nPartTableScanned_avg = cdbexplain_agg_avg(&ns->totalPartTableScanned); |
| if (0 == nPartTableScanned_avg) |
| { |
| bool displayPartitionScanned = true; |
| if (T_BitmapTableScanState == planstate->type) |
| { |
| ScanState *scanState = (ScanState *) planstate; |
| if (!isDynamicScan((Scan *)scanState->ps.plan)) |
| { |
| displayPartitionScanned = false; |
| } |
| } |
| |
| if (displayPartitionScanned) |
| { |
| int numTotalLeafParts = cdbexplain_countLeafPartTables(planstate); |
| appendStringInfoFill(str, 2 * indent, ' '); |
| appendStringInfo(str, |
| "Partitions scanned: 0 (out of %d).\n", |
| numTotalLeafParts); |
| } |
| } |
| else |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ns->totalPartTableScanned.imax, ns->ninst,ns->ntuples.hostnamemax); |
| int numTotalLeafParts = cdbexplain_countLeafPartTables(planstate); |
| appendStringInfoFill(str, 2 * indent, ' '); |
| |
| /* only 1 segment scans partitions */ |
| if (1 == ns->totalPartTableScanned.vcnt) |
| { |
| /* rescan */ |
| if (1 < instr->nloops) |
| { |
| double totalPartTableScannedPerRescan = ns->totalPartTableScanned.vmax / instr->nloops; |
| appendStringInfo(str, |
| "Partitions scanned: %.0f (out of %d) %s of %.0f scans.\n", |
| totalPartTableScannedPerRescan, |
| numTotalLeafParts, |
| segbuf, |
| instr->nloops); |
| } |
| else |
| { |
| appendStringInfo(str, |
| "Partitions scanned: %.0f (out of %d) %s.\n", |
| ns->totalPartTableScanned.vmax, |
| numTotalLeafParts, |
| segbuf); |
| } |
| } |
| else |
| { |
| /* rescan */ |
| if (1 < instr->nloops) |
| { |
| double totalPartTableScannedPerRescan = nPartTableScanned_avg / instr->nloops; |
| double maxPartTableScannedPerRescan = ns->totalPartTableScanned.vmax / instr->nloops; |
| appendStringInfo(str, |
| "Partitions scanned: Avg %.1f (out of %d) x %d workers of %.0f scans." |
| " Max %.0f parts%s.\n", |
| totalPartTableScannedPerRescan, |
| numTotalLeafParts, |
| ns->totalPartTableScanned.vcnt, |
| instr->nloops, |
| maxPartTableScannedPerRescan, |
| segbuf |
| ); |
| } |
| else |
| { |
| appendStringInfo(str, |
| "Partitions scanned: Avg %.1f (out of %d) x %d workers." |
| " Max %.0f parts%s.\n", |
| nPartTableScanned_avg, |
| numTotalLeafParts, |
| ns->totalPartTableScanned.vcnt, |
| ns->totalPartTableScanned.vmax, |
| segbuf); |
| } |
| } |
| } |
| } |
| |
| /* |
| * Extra message text. |
| */ |
| for (i = 0; i < ns->ninst; i++) |
| { |
| CdbExplain_StatInst *nsi = &ns->insts[i]; |
| |
| if (nsi->bnotes < nsi->enotes) |
| { |
| cdbexplain_formatExtraText(str, |
| indent, |
| (ns->ninst == 1) ? -1 |
| : ns->segindex0 + i, |
| nsi->workfileReused, |
| ctx->extratextbuf.data + nsi->bnotes, |
| nsi->enotes - nsi->bnotes); |
| } |
| } |
| |
| /* |
| * Dump stats for all workers. |
| */ |
| if (gp_enable_explain_allstat && |
| ns->segindex0 >= 0 && |
| ns->ninst > 0) |
| { |
| /* create a header for all stats: separate each individual |
| * stat by an underscore, separate the grouped stats for each |
| * node by a slash |
| */ |
| appendStringInfoFill(str, 2*indent, ' '); |
| appendStringInfoString(str, |
| "allstat: " |
| /* "seg_starttime_firststart_counter_firsttuple_startup_total_ntuples_nloops" */ |
| "seg_firststart_total_ntuples"); |
| |
| for (i = 0; i < ns->ninst; i++) |
| { |
| CdbExplain_StatInst *nsi = &ns->insts[i]; |
| |
| if (INSTR_TIME_IS_ZERO(nsi->firststart)) |
| { |
| continue; |
| } |
| |
| /* Time from start of query on qDisp to worker's first result row */ |
| INSTR_TIME_SET_ZERO(timediff); |
| INSTR_TIME_ACCUM_DIFF(timediff, nsi->firststart, ctx->querystarttime); |
| cdbexplain_formatSeconds(startbuf, sizeof(startbuf), INSTR_TIME_GET_DOUBLE(timediff)); |
| cdbexplain_formatSeconds(totalbuf, sizeof(totalbuf), nsi->total); |
| |
| appendStringInfo(str, |
| "/seg%d_%s_%s_%.0f", |
| ns->segindex0 + i, |
| startbuf, |
| totalbuf, |
| nsi->ntuples); |
| } |
| appendStringInfoString(str, "//end\n"); |
| } |
| } /* cdbexplain_showExecStats */ |
| |
| |
| /* |
| * cdbexplain_showExecStatsEnd |
| * Called by qDisp process to format the overall statistics for a query |
| * into the caller's buffer. |
| * |
| * 'ctx' is the CdbExplain_ShowStatCtx object which was created by a call to |
| * cdbexplain_showExecStatsBegin() and contains statistics which have |
| * been accumulated over a series of calls to cdbexplain_showExecStats(). |
| * Invalid on return (it is freed). |
| * 'str' is the output buffer. |
| * |
| * This doesn't free the CdbExplain_ShowStatCtx object or buffers, because |
| * shortly afterwards the caller is expected to destroy the 'explaincxt' |
| * MemoryContext which was passed to cdbexplain_showExecStatsBegin(), thus |
| * freeing all at once. |
| */ |
| void |
| cdbexplain_showExecStatsEnd(struct PlannedStmt *stmt, |
| struct CdbExplain_ShowStatCtx *showstatctx, |
| struct StringInfoData *str, |
| struct EState *estate) |
| { |
| Slice *slice; |
| int sliceIndex; |
| int startlen; |
| int tab; |
| int flag; |
| |
| char avgbuf[50]; |
| char maxbuf[50]; |
| char segbuf[50]; |
| |
| /* |
| * Summary by slice |
| */ |
| if (showstatctx->nslice > 0) |
| appendStringInfoString(str, "Slice statistics:\n"); |
| |
| for (sliceIndex = 0; sliceIndex < showstatctx->nslice; sliceIndex++) |
| { |
| CdbExplain_SliceSummary *ss = &showstatctx->slices[sliceIndex]; |
| CdbExplain_DispatchSummary *ds = &ss->dispatchSummary; |
| |
| startlen = str->len; |
| appendStringInfo(str, " (slice%d) ", sliceIndex); |
| if (sliceIndex < 10) |
| appendStringInfoChar(str, ' '); |
| |
| flag = str->len; |
| appendStringInfoString(str, " "); |
| tab = str->len - startlen; |
| |
| /* Worker counts */ |
| slice = getCurrentSlice(estate, sliceIndex); |
| if (slice && |
| slice->numGangMembersToBeActive > 0 && |
| slice->numGangMembersToBeActive != ss->dispatchSummary.nOk) |
| { |
| int nNotDispatched = slice->numGangMembersToBeActive - ds->nResult + ds->nNotDispatched; |
| |
| str->data[flag] = (ss->dispatchSummary.nError > 0) ? 'X' : '_'; |
| |
| appendStringInfoString(str, "Workers:"); |
| if (ds->nError == 1) |
| { |
| appendStringInfo(str, |
| " %d error;", |
| ds->nError); |
| } |
| else if (ds->nError > 1) |
| { |
| appendStringInfo(str, |
| " %d errors;", |
| ds->nError); |
| } |
| if (ds->nCanceled > 0) |
| { |
| appendStringInfo(str, |
| " %d canceled;", |
| ds->nCanceled); |
| } |
| if (nNotDispatched > 0) |
| { |
| appendStringInfo(str, |
| " %d not dispatched;", |
| nNotDispatched); |
| } |
| if (ds->nIgnorableError > 0) |
| { |
| appendStringInfo(str, |
| " %d aborted;", |
| ds->nIgnorableError); |
| } |
| if (ds->nOk > 0) |
| { |
| appendStringInfo(str, |
| " %d ok;", |
| ds->nOk); |
| } |
| str->len--; |
| appendStringInfoString(str, ". "); |
| } |
| |
| /* Executor memory high-water mark */ |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ss->peakmemused.vmax); |
| if (ss->peakmemused.vcnt == 1) |
| { |
| const char *seg = segbuf; |
| |
| if (ss->peakmemused.imax >= 0) |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ss->peakmemused.imax, 999,ss->peakmemused.hostnamemax); |
| } |
| else if (slice && |
| slice->gangSize > 0) |
| { |
| seg = " (entry db)"; |
| } |
| else |
| { |
| seg = ""; |
| } |
| appendStringInfo(str, |
| "Executor memory: %s%s.", |
| maxbuf, |
| seg); |
| } |
| else if (ss->peakmemused.vcnt > 1) |
| { |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ss->peakmemused)); |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ss->peakmemused.imax, ss->nworker,ss->peakmemused.hostnamemax); |
| appendStringInfo(str, |
| "Executor memory: %s avg x %d workers, %s max%s.", |
| avgbuf, |
| ss->peakmemused.vcnt, |
| maxbuf, |
| segbuf); |
| } |
| |
| if (EXPLAIN_MEMORY_VERBOSITY_SUPPRESS < explain_memory_verbosity) |
| { |
| /* Memory accounting global peak memory usage */ |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ss->memory_accounting_global_peak.vmax); |
| if (ss->memory_accounting_global_peak.vcnt == 1) |
| { |
| const char *seg = segbuf; |
| |
| if (ss->memory_accounting_global_peak.imax >= 0) |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ss->memory_accounting_global_peak.imax, 999,ss->memory_accounting_global_peak.hostnamemax); |
| } |
| else if (slice && |
| slice->gangSize > 0) |
| { |
| seg = " (entry db)"; |
| } |
| else |
| { |
| seg = ""; |
| } |
| appendStringInfo(str, |
| " Peak memory: %s%s.", |
| maxbuf, |
| seg); |
| } |
| else if (ss->memory_accounting_global_peak.vcnt > 1) |
| { |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ss->memory_accounting_global_peak)); |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ss->memory_accounting_global_peak.imax, ss->nworker,ss->memory_accounting_global_peak.hostnamemax); |
| appendStringInfo(str, |
| " Peak memory: %s avg x %d workers, %s max%s.", |
| avgbuf, |
| ss->memory_accounting_global_peak.vcnt, |
| maxbuf, |
| segbuf); |
| } |
| |
| /* Vmem reserved by QEs */ |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ss->vmem_reserved.vmax); |
| if (ss->vmem_reserved.vcnt == 1) |
| { |
| const char *seg = segbuf; |
| |
| if (ss->vmem_reserved.imax >= 0) |
| { |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ss->vmem_reserved.imax, 999,ss->vmem_reserved.hostnamemax); |
| } |
| else if (slice && |
| slice->gangSize > 0) |
| { |
| seg = " (entry db)"; |
| } |
| else |
| { |
| seg = ""; |
| } |
| appendStringInfo(str, |
| " Vmem reserved: %s%s.", |
| maxbuf, |
| seg); |
| } |
| else if (ss->vmem_reserved.vcnt > 1) |
| { |
| cdbexplain_formatMemory(avgbuf, sizeof(avgbuf), cdbexplain_agg_avg(&ss->vmem_reserved)); |
| cdbexplain_formatSeg(segbuf, sizeof(segbuf), ss->vmem_reserved.imax, ss->nworker,ss->vmem_reserved.hostnamemax); |
| appendStringInfo(str, |
| " Vmem reserved: %s avg x %d workers, %s max%s.", |
| avgbuf, |
| ss->vmem_reserved.vcnt, |
| maxbuf, |
| segbuf); |
| } |
| } |
| |
| /* Work_mem used/wanted (max over all nodes and workers of slice) */ |
| if (ss->workmemused_max + ss->workmemwanted_max > 0) |
| { |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ss->workmemused_max); |
| appendStringInfo(str, " Work_mem: %s max", maxbuf); |
| if (ss->workmemwanted_max > 0) |
| { |
| str->data[flag] = '*'; /* draw attention to this slice */ |
| cdbexplain_formatMemory(maxbuf, sizeof(maxbuf), ss->workmemwanted_max); |
| appendStringInfo(str, ", %s wanted", maxbuf); |
| } |
| appendStringInfoChar(str, '.'); |
| } |
| |
| appendStringInfoChar(str, '\n'); |
| } |
| |
| appendStringInfoString(str, "Statement statistics:\n"); |
| appendStringInfo(str, " Memory used: %.0fK bytes", ceil((double) stmt->query_mem / 1024.0)); |
| |
| if (showstatctx->workmemwanted_max > 0) |
| { |
| appendStringInfo(str, "\n Memory wanted: %.0fK bytes", |
| (double) StatementMemForNoSpillKB(stmt, (uint64) showstatctx->workmemwanted_max / 1024L)); |
| } |
| |
| appendStringInfoChar(str, '\n'); |
| |
| } /* cdbexplain_showExecStatsEnd */ |
| |
| static int |
| cdbexplain_countLeafPartTables(PlanState *planstate) |
| { |
| Assert (IsA(planstate, DynamicTableScanState) || IsA(planstate, DynamicIndexScanState) |
| || IsA(planstate, BitmapTableScanState)); |
| Scan *scan = (Scan *) planstate->plan; |
| |
| Oid root_oid = getrelid(scan->scanrelid, planstate->state->es_range_table); |
| return countLeafPartTables(root_oid); |
| } |