blob: bd64e01649190ff7abf7907f7401fc15bccf507b [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*-------------------------------------------------------------------------
*
* nodeMergejoin.c
* routines supporting merge joins
*
* Portions Copyright (c) 2005-2008, Greenplum inc
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.82.2.1 2007/02/02 00:07:28 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
* INTERFACE ROUTINES
* ExecMergeJoin mergejoin outer and inner relations.
* ExecInitMergeJoin creates and initializes run time states
* ExecEndMergeJoin cleans up the node.
*
* NOTES
*
* Merge-join is done by joining the inner and outer tuples satisfying
* join clauses of the form ((= outerKey innerKey) ...).
* The join clause list is provided by the query planner and may contain
* more than one (= outerKey innerKey) clause (for composite sort key).
*
* However, the query executor needs to know whether an outer
* tuple is "greater/smaller" than an inner tuple so that it can
* "synchronize" the two relations. For example, consider the following
* relations:
*
* outer: (0 ^1 1 2 5 5 5 6 6 7) current tuple: 1
* inner: (1 ^3 5 5 5 5 6) current tuple: 3
*
* To continue the merge-join, the executor needs to scan both inner
* and outer relations till the matching tuples 5. It needs to know
* that currently inner tuple 3 is "greater" than outer tuple 1 and
* therefore it should scan the outer relation first to find a
* matching tuple and so on.
*
* Therefore, when initializing the merge-join node, we look up the
* associated sort operators. We assume the planner has seen to it
* that the inputs are correctly sorted by these operators. Rather
* than directly executing the merge join clauses, we evaluate the
* left and right key expressions separately and then compare the
* columns one at a time (see MJCompare).
*
*
* Consider the above relations and suppose that the executor has
* just joined the first outer "5" with the last inner "5". The
* next step is of course to join the second outer "5" with all
* the inner "5's". This requires repositioning the inner "cursor"
* to point at the first inner "5". This is done by "marking" the
* first inner 5 so we can restore the "cursor" to it before joining
* with the second outer 5. The access method interface provides
* routines to mark and restore to a tuple.
*
*
* Essential operation of the merge join algorithm is as follows:
*
* Join {
* get initial outer and inner tuples INITIALIZE
* do forever {
* while (outer != inner) { SKIP_TEST
* if (outer < inner)
* advance outer SKIPOUTER_ADVANCE
* else
* advance inner SKIPINNER_ADVANCE
* }
* mark inner position SKIP_TEST
* do forever {
* while (outer == inner) {
* join tuples JOINTUPLES
* advance inner position NEXTINNER
* }
* advance outer position NEXTOUTER
* if (outer == mark) TESTOUTER
* restore inner position to mark TESTOUTER
* else
* break // return to top of outer loop
* }
* }
* }
*
* The merge join operation is coded in the fashion
* of a state machine. At each state, we do something and then
* proceed to another state. This state is stored in the node's
* execution state information and is preserved across calls to
* ExecMergeJoin. -cim 10/31/89
*/
#include "postgres.h"
#include "access/nbtree.h"
#include "catalog/catquery.h"
#include "catalog/pg_amop.h"
#include "cdb/cdbvars.h"
#include "executor/execdebug.h"
#include "executor/execdefs.h"
#include "executor/nodeMergejoin.h"
#include "miscadmin.h"
#include "utils/acl.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/syscache.h"
/*
* Comparison strategies supported by MJCompare
*
* XXX eventually should extend these to support descending-order sorts.
* There are some tricky issues however about being sure we are on the same
* page as the underlying sort or index as to which end NULLs sort to.
*/
typedef enum
{
MERGEFUNC_LT, /* raw "<" operator */
MERGEFUNC_CMP /* -1 / 0 / 1 three-way comparator */
} MergeFunctionKind;
/* Runtime data for each mergejoin clause */
typedef struct MergeJoinClauseData
{
/* Executable expression trees */
ExprState *lexpr; /* left-hand (outer) input expression */
ExprState *rexpr; /* right-hand (inner) input expression */
/*
* If we have a current left or right input tuple, the values of the
* expressions are loaded into these fields:
*/
Datum ldatum; /* current left-hand value */
Datum rdatum; /* current right-hand value */
bool lisnull; /* and their isnull flags */
bool risnull;
/*
* Remember whether mergejoin operator is strict (usually it will be).
* NOTE: if it's not strict, we still assume it cannot return true for one
* null and one non-null input.
*/
bool mergestrict;
/*
* CDB: Remember whether the mergejoin operation was actually an "is
* not distinct from" predicate.
*/
bool notdistinct;
/*
* The comparison strategy in use, and the lookup info to let us call the
* needed comparison routines. eqfinfo is the "=" operator itself.
* cmpfinfo is either the btree comparator or the "<" operator.
*/
MergeFunctionKind cmpstrategy;
FmgrInfo eqfinfo;
FmgrInfo cmpfinfo;
} MergeJoinClauseData;
#define MarkInnerTuple(innerTupleSlot, mergestate) \
ExecCopySlot((mergestate)->mj_MarkedTupleSlot, (innerTupleSlot))
/*
* MJExamineQuals
*
* This deconstructs the list of mergejoinable expressions, which is given
* to us by the planner in the form of a list of "leftexpr = rightexpr"
* expression trees in the order matching the sort columns of the inputs.
* We build an array of MergeJoinClause structs containing the information
* we will need at runtime. Each struct essentially tells us how to compare
* the two expressions from the original clause.
*
* The best, most efficient way to compare two expressions is to use a btree
* comparison support routine, since that requires only one function call
* per comparison. Hence we try to find a btree opclass that matches the
* mergejoinable operator. If we cannot find one, we'll have to call both
* the "=" and (often) the "<" operator for each comparison.
*
* CDB: We also recognize the "is not distinct from" predicate which is
* interesting for sequential window plans. The pseudo-Lisp for this
* predicate is (BoolExpr_NOT (DistinctExpr_= leftexpr rightexpr)).
*/
static MergeJoinClause
MJExamineQuals(List *qualList, PlanState *parent)
{
MergeJoinClause clauses;
int nClauses = list_length(qualList);
int iClause;
ListCell *l;
clauses = (MergeJoinClause) palloc0(nClauses * sizeof(MergeJoinClauseData));
iClause = 0;
foreach(l, qualList)
{
OpExpr *qual = (OpExpr *) lfirst(l);
MergeJoinClause clause = &clauses[iClause];
Oid ltop;
Oid gtop;
RegProcedure ltproc;
RegProcedure gtproc;
AclResult aclresult;
CatCList *catlist;
int i;
if (!IsA(qual, OpExpr))
{
BoolExpr *bx = (BoolExpr*)qual;
bool ok = false;
if ( IsA(bx, BoolExpr) && bx->boolop == NOT_EXPR && list_length(bx->args) == 1 )
{
DistinctExpr *dx = (DistinctExpr*)linitial(bx->args);
if ( IsA(dx, DistinctExpr) )
{
clause->notdistinct = true;
qual = (OpExpr *)dx;
ok = true;
}
}
insist_log(ok, "mergejoin clause is not an OpExpr");
}
/*
* Prepare the input expressions for execution.
*/
clause->lexpr = ExecInitExpr((Expr *) linitial(qual->args), parent);
clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent);
/*
* Check permission to call the mergejoinable operator. For
* predictability, we check this even if we end up not using it.
*/
aclresult = pg_proc_aclcheck(qual->opfuncid, GetUserId(), ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_PROC,
get_func_name(qual->opfuncid));
/* Set up the fmgr lookup information */
fmgr_info(qual->opfuncid, &(clause->eqfinfo));
/* And remember strictness */
clause->mergestrict = clause->eqfinfo.fn_strict;
/*
* Lookup the comparison operators that go with the mergejoinable
* top-level operator. (This will elog if the operator isn't
* mergejoinable, which would be the planner's mistake.)
*/
op_mergejoin_crossops(qual->opno,
&ltop,
&gtop,
&ltproc,
&gtproc);
clause->cmpstrategy = MERGEFUNC_LT;
/*
* Look for a btree opclass including all three operators. This is
* much like SelectSortFunction except we insist on matching all the
* operators provided, and it can be a cross-type opclass.
*
* XXX for now, insist on forward sort so that NULLs can be counted on
* to be high.
*/
catlist = caql_begin_CacheList(
NULL,
cql("SELECT * FROM pg_amop "
" WHERE amopopr = :1 "
" ORDER BY amopopr, "
" amopclaid ",
ObjectIdGetDatum(qual->opno)));
for (i = 0; i < catlist->n_members; i++)
{
HeapTuple tuple = &catlist->members[i]->tuple;
Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple);
Oid opcid = aform->amopclaid;
if (aform->amopstrategy != BTEqualStrategyNumber)
continue;
if (!opclass_is_btree(opcid))
continue;
if (get_op_opclass_strategy(ltop, opcid) == BTLessStrategyNumber &&
get_op_opclass_strategy(gtop, opcid) == BTGreaterStrategyNumber)
{
clause->cmpstrategy = MERGEFUNC_CMP;
ltproc = get_opclass_proc(opcid, aform->amopsubtype,
BTORDER_PROC);
Assert(RegProcedureIsValid(ltproc));
break; /* done looking */
}
}
caql_end_CacheList(catlist);
/* Check permission to call "<" operator or cmp function */
aclresult = pg_proc_aclcheck(ltproc, GetUserId(), ACL_EXECUTE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_PROC,
get_func_name(ltproc));
/* Set up the fmgr lookup information */
fmgr_info(ltproc, &(clause->cmpfinfo));
iClause++;
}
return clauses;
}
/*
* MJEvalOuterValues
*
* Compute the values of the mergejoined expressions for the current
* outer tuple. We also detect whether it's impossible for the current
* outer tuple to match anything --- this is true if it yields a NULL
* input for any strict mergejoin operator.
*
* We evaluate the values in OuterEContext, which can be reset each
* time we move to a new tuple.
*/
static bool
MJEvalOuterValues(MergeJoinState *mergestate)
{
ExprContext *econtext = mergestate->mj_OuterEContext;
bool canmatch = true;
int i;
MemoryContext oldContext;
ResetExprContext(econtext);
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
econtext->ecxt_outertuple = mergestate->mj_OuterTupleSlot;
for (i = 0; i < mergestate->mj_NumClauses; i++)
{
MergeJoinClause clause = &mergestate->mj_Clauses[i];
clause->ldatum = ExecEvalExpr(clause->lexpr, econtext,
&clause->lisnull, NULL);
if (clause->lisnull && clause->mergestrict)
canmatch = clause->notdistinct;
}
MemoryContextSwitchTo(oldContext);
return canmatch;
}
/*
* MJEvalInnerValues
*
* Same as above, but for the inner tuple. Here, we have to be prepared
* to load data from either the true current inner, or the marked inner,
* so caller must tell us which slot to load from.
*/
static bool
MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot)
{
ExprContext *econtext = mergestate->mj_InnerEContext;
bool canmatch = true;
int i;
MemoryContext oldContext;
ResetExprContext(econtext);
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
econtext->ecxt_innertuple = innerslot;
for (i = 0; i < mergestate->mj_NumClauses; i++)
{
MergeJoinClause clause = &mergestate->mj_Clauses[i];
clause->rdatum = ExecEvalExpr(clause->rexpr, econtext,
&clause->risnull, NULL);
if (clause->risnull && clause->mergestrict)
canmatch = clause->notdistinct;
}
MemoryContextSwitchTo(oldContext);
return canmatch;
}
/*
* MJCompare
*
* Compare the mergejoinable values of the current two input tuples
* and return 0 if they are equal (ie, the mergejoin equalities all
* succeed), +1 if outer > inner, -1 if outer < inner.
*
* MJEvalOuterValues and MJEvalInnerValues must already have been called
* for the current outer and inner tuples, respectively.
*/
static int
MJCompare(MergeJoinState *mergestate)
{
int result = 0;
bool nulleqnull = false;
ExprContext *econtext = mergestate->js.ps.ps_ExprContext;
int i;
MemoryContext oldContext;
FunctionCallInfoData fcinfo;
/*
* Call the comparison functions in short-lived context, in case they leak
* memory.
*/
ResetExprContext(econtext);
oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
for (i = 0; i < mergestate->mj_NumClauses; i++)
{
MergeJoinClause clause = &mergestate->mj_Clauses[i];
Datum fresult;
/*
* Deal with null inputs. We treat NULL as sorting after non-NULL.
*
* If both inputs are NULL, and the comparison function isn't strict,
* then we call it and check for a true result (this allows operators
* that behave like IS NOT DISTINCT to be mergejoinable). If the
* function is strict or returns false, we temporarily pretend NULL ==
* NULL and contine checking remaining columns.
*/
if (clause->lisnull)
{
if (clause->risnull)
{
if (!clause->eqfinfo.fn_strict)
{
InitFunctionCallInfoData(fcinfo, &(clause->eqfinfo), 2,
NULL, NULL);
fcinfo.arg[0] = clause->ldatum;
fcinfo.arg[1] = clause->rdatum;
fcinfo.argnull[0] = true;
fcinfo.argnull[1] = true;
fresult = FunctionCallInvoke(&fcinfo);
if (!fcinfo.isnull && DatumGetBool(fresult))
{
/* treat nulls as really equal */
continue;
}
}
nulleqnull = !clause->notdistinct;
continue;
}
/* NULL > non-NULL */
result = 1;
break;
}
if (clause->risnull)
{
/* non-NULL < NULL */
result = -1;
break;
}
if (clause->cmpstrategy == MERGEFUNC_LT)
{
InitFunctionCallInfoData(fcinfo, &(clause->eqfinfo), 2,
NULL, NULL);
fcinfo.arg[0] = clause->ldatum;
fcinfo.arg[1] = clause->rdatum;
fcinfo.argnull[0] = false;
fcinfo.argnull[1] = false;
fresult = FunctionCallInvoke(&fcinfo);
if (fcinfo.isnull)
{
nulleqnull = !clause->notdistinct;
continue;
}
else if (DatumGetBool(fresult))
{
/* equal */
continue;
}
InitFunctionCallInfoData(fcinfo, &(clause->cmpfinfo), 2,
NULL, NULL);
fcinfo.arg[0] = clause->ldatum;
fcinfo.arg[1] = clause->rdatum;
fcinfo.argnull[0] = false;
fcinfo.argnull[1] = false;
fresult = FunctionCallInvoke(&fcinfo);
if (fcinfo.isnull)
{
nulleqnull = !clause->notdistinct;
continue;
}
else if (DatumGetBool(fresult))
{
/* less than */
result = -1;
break;
}
else
{
/* greater than */
result = 1;
break;
}
}
else
/* must be MERGEFUNC_CMP */
{
InitFunctionCallInfoData(fcinfo, &(clause->cmpfinfo), 2,
NULL, NULL);
fcinfo.arg[0] = clause->ldatum;
fcinfo.arg[1] = clause->rdatum;
fcinfo.argnull[0] = false;
fcinfo.argnull[1] = false;
fresult = FunctionCallInvoke(&fcinfo);
if (fcinfo.isnull)
{
nulleqnull = !clause->notdistinct;
continue;
}
else if (DatumGetInt32(fresult) == 0)
{
/* equal */
continue;
}
else if (DatumGetInt32(fresult) < 0)
{
/* less than */
result = -1;
break;
}
else
{
/* greater than */
result = 1;
break;
}
}
}
/*
* If we had any null comparison results or NULL-vs-NULL inputs, we do not
* want to report that the tuples are equal. Instead, if result is still
* 0, change it to +1. This will result in advancing the inner side of
* the join.
*/
if (nulleqnull && result == 0)
result = 1;
MemoryContextSwitchTo(oldContext);
return result;
}
/*
* Generate a fake join tuple with nulls for the inner tuple,
* and return it if it passes the non-join quals.
*/
static TupleTableSlot *
MJFillOuter(MergeJoinState *node)
{
ExprContext *econtext = node->js.ps.ps_ExprContext;
List *otherqual = node->js.ps.qual;
ResetExprContext(econtext);
econtext->ecxt_outertuple = node->mj_OuterTupleSlot;
econtext->ecxt_innertuple = node->mj_NullInnerTupleSlot;
if (TupIsNull(node->mj_OuterTupleSlot))
return NULL;
if (ExecQual(otherqual, econtext, false))
{
/*
* qualification succeeded. now form the desired projection tuple and
* return the slot containing it.
*/
MJ_printf("ExecMergeJoin: returning outer fill tuple\n");
return ExecProject(node->js.ps.ps_ProjInfo, NULL);
}
return NULL;
}
/*
* Generate a fake join tuple with nulls for the outer tuple,
* and return it if it passes the non-join quals.
*/
static TupleTableSlot *
MJFillInner(MergeJoinState *node)
{
ExprContext *econtext = node->js.ps.ps_ExprContext;
List *otherqual = node->js.ps.qual;
ResetExprContext(econtext);
/* If we don't have an inner, return NULL */
if(TupIsNull(node->mj_InnerTupleSlot))
return NULL;
econtext->ecxt_outertuple = node->mj_NullOuterTupleSlot;
econtext->ecxt_innertuple = node->mj_InnerTupleSlot;
if (ExecQual(otherqual, econtext, false))
{
/*
* qualification succeeded. now form the desired projection tuple and
* return the slot containing it.
*/
MJ_printf("ExecMergeJoin: returning inner fill tuple\n");
return ExecProject(node->js.ps.ps_ProjInfo, NULL);
}
return NULL;
}
/* ----------------------------------------------------------------
* ExecMergeTupleDump
*
* This function is called through the MJ_dump() macro
* when EXEC_MERGEJOINDEBUG is defined
* ----------------------------------------------------------------
*/
#ifdef EXEC_MERGEJOINDEBUG
static void
ExecMergeTupleDumpOuter(MergeJoinState *mergestate)
{
TupleTableSlot *outerSlot = mergestate->mj_OuterTupleSlot;
printf("==== outer tuple ====\n");
if (TupIsNull(outerSlot))
printf("(nil)\n");
else
MJ_debugtup(outerSlot);
}
static void
ExecMergeTupleDumpInner(MergeJoinState *mergestate)
{
TupleTableSlot *innerSlot = mergestate->mj_InnerTupleSlot;
printf("==== inner tuple ====\n");
if (TupIsNull(innerSlot))
printf("(nil)\n");
else
MJ_debugtup(innerSlot);
}
static void
ExecMergeTupleDumpMarked(MergeJoinState *mergestate)
{
TupleTableSlot *markedSlot = mergestate->mj_MarkedTupleSlot;
printf("==== marked tuple ====\n");
if (TupIsNull(markedSlot))
printf("(nil)\n");
else
MJ_debugtup(markedSlot);
}
static void
ExecMergeTupleDump(MergeJoinState *mergestate)
{
printf("******** ExecMergeTupleDump ********\n");
ExecMergeTupleDumpOuter(mergestate);
ExecMergeTupleDumpInner(mergestate);
ExecMergeTupleDumpMarked(mergestate);
printf("******** \n");
}
#endif
/* ----------------------------------------------------------------
* ExecMergeJoin
* ----------------------------------------------------------------
*/
TupleTableSlot *
ExecMergeJoin(MergeJoinState *node)
{
EState *estate;
List *joinqual;
List *otherqual;
bool qualResult;
int compareResult;
PlanState *innerPlan;
TupleTableSlot *innerTupleSlot;
PlanState *outerPlan;
TupleTableSlot *outerTupleSlot;
ExprContext *econtext;
bool doFillOuter;
bool doFillInner;
/*
* get information from node
*/
estate = node->js.ps.state;
innerPlan = innerPlanState(node);
outerPlan = outerPlanState(node);
econtext = node->js.ps.ps_ExprContext;
joinqual = node->js.joinqual;
otherqual = node->js.ps.qual;
doFillOuter = node->mj_FillOuter;
doFillInner = node->mj_FillInner;
/*
* Reset per-tuple memory context to free any expression evaluation
* storage allocated in the previous tuple cycle. Note this can't happen
* until we're done projecting out tuples from a join tuple.
*/
ResetExprContext(econtext);
/*
* MPP-4165: My fix for MPP-3300 was correct in that we avoided
* the *deadlock* but had very unexpected (and painful)
* performance characteristics: we basically de-pipeline and
* de-parallelize execution of any query which has motion below
* us.
*
* So now prefetch_inner is set (see createplan.c) if we have *any* motion
* below us. If we don't have any motion, it doesn't matter.
*/
if (node->prefetch_inner)
{
innerTupleSlot = ExecProcNode(innerPlan);
if (!TupIsNull(innerTupleSlot))
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
node->mj_InnerTupleSlot = innerTupleSlot;
ExecReScan(innerPlan, econtext);
ResetExprContext(econtext);
node->mj_squelchInner = false; /* we will never need to Squelch the inner, we've fetched it all */
node->prefetch_inner = false;
}
/*
* ok, everything is setup.. let's go to work
*/
for (;;)
{
MJ_dump(node);
CheckSendPlanStateGpmonPkt(&node->js.ps);
/*
* get the current state of the join and do things accordingly.
*/
switch (node->mj_JoinState)
{
/*
* EXEC_MJ_INITIALIZE_OUTER means that this is the first time
* ExecMergeJoin() has been called and so we have to fetch the
* first matchable tuple for both outer and inner subplans. We
* do the outer side in INITIALIZE_OUTER state, then advance
* to INITIALIZE_INNER state for the inner subplan.
*/
case EXEC_MJ_INITIALIZE_OUTER:
MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE_OUTER\n");
outerTupleSlot = ExecProcNode(outerPlan);
node->mj_OuterTupleSlot = outerTupleSlot;
if (TupIsNull(outerTupleSlot))
{
MJ_printf("ExecMergeJoin: nothing in outer subplan\n");
if (doFillInner)
{
/*
* Need to emit right-join tuples for remaining inner
* tuples. We set MatchedInner = true to force the
* ENDOUTER state to advance inner.
*/
node->mj_JoinState = EXEC_MJ_ENDOUTER;
node->mj_MatchedInner = true;
break;
}
/*
* CDB: We'll read no more from inner subtree. To keep our
* sibling QEs from being starved, tell source QEs not to
* clog up the pipeline with our never-to-be-consumed
* data.
*/
if (node->mj_squelchInner)
ExecSquelchNode(innerPlan);
/*
* The memory used by child nodes might not be freed because
* they are not eager free safe. However, when the merge join
* is done, we can free the memory used by the child nodes.
*/
ExecEagerFreeMergeJoin(node);
/* Otherwise we're done. */
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Compute join values and check for unmatchability */
if (MJEvalOuterValues(node))
{
/* OK to go get the first inner tuple */
node->mj_JoinState = EXEC_MJ_INITIALIZE_INNER;
}
else
{
/* Stay in same state to fetch next outer tuple */
if (doFillOuter)
{
/*
* Generate a fake join tuple with nulls for the inner
* tuple, and return it if it passes the non-join
* quals.
*/
TupleTableSlot *result;
result = MJFillOuter(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
}
break;
case EXEC_MJ_INITIALIZE_INNER:
MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE_INNER\n");
innerTupleSlot = ExecProcNode(innerPlan);
node->mj_InnerTupleSlot = innerTupleSlot;
if (TupIsNull(innerTupleSlot))
{
MJ_printf("ExecMergeJoin: nothing in inner subplan\n");
if (doFillOuter)
{
/*
* Need to emit left-join tuples for all outer tuples,
* including the one we just fetched. We set
* MatchedOuter = false to force the ENDINNER state to
* emit first tuple before advancing outer.
*/
node->mj_JoinState = EXEC_MJ_ENDINNER;
node->mj_MatchedOuter = false;
break;
}
/*
* CDB: We'll read no more from outer subtree. To keep our
* sibling QEs from being starved, tell source QEs not to
* clog up the pipeline with our never-to-be-consumed
* data.
*/
ExecSquelchNode(outerPlan);
ExecEagerFreeMergeJoin(node);
/* Otherwise we're done. */
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Compute join values and check for unmatchability */
if (MJEvalInnerValues(node, innerTupleSlot))
{
/*
* OK, we have the initial tuples. Begin by skipping
* non-matching tuples.
*/
node->mj_JoinState = EXEC_MJ_SKIP_TEST;
}
else
{
/* Stay in same state to fetch next inner tuple */
if (doFillInner)
{
/*
* Generate a fake join tuple with nulls for the outer
* tuple, and return it if it passes the non-join
* quals.
*/
TupleTableSlot *result;
result = MJFillInner(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
}
break;
/*
* EXEC_MJ_JOINTUPLES means we have two tuples which satisfied
* the merge clause so we join them and then proceed to get
* the next inner tuple (EXEC_MJ_NEXTINNER).
*/
case EXEC_MJ_JOINTUPLES:
MJ_printf("ExecMergeJoin: EXEC_MJ_JOINTUPLES\n");
/*
* Set the next state machine state. The right things will
* happen whether we return this join tuple or just fall
* through to continue the state machine execution.
*/
node->mj_JoinState = EXEC_MJ_NEXTINNER;
/*
* Check the extra qual conditions to see if we actually want
* to return this join tuple. If not, can proceed with merge.
* We must distinguish the additional joinquals (which must
* pass to consider the tuples "matched" for outer-join logic)
* from the otherquals (which must pass before we actually
* return the tuple).
*
* We don't bother with a ResetExprContext here, on the
* assumption that we just did one while checking the merge
* qual. One per tuple should be sufficient. We do have to
* set up the econtext links to the tuples for ExecQual to
* use.
*/
outerTupleSlot = node->mj_OuterTupleSlot;
econtext->ecxt_outertuple = outerTupleSlot;
innerTupleSlot = node->mj_InnerTupleSlot;
econtext->ecxt_innertuple = innerTupleSlot;
if (node->js.jointype == JOIN_IN &&
node->mj_MatchedOuter)
qualResult = false;
else
{
qualResult = (joinqual == NIL ||
ExecQual(joinqual, econtext, false));
MJ_DEBUG_QUAL(joinqual, qualResult);
}
if (qualResult)
{
node->mj_MatchedOuter = true;
node->mj_MatchedInner = true;
/* In an antijoin, we never return a matched tuple */
if (node->js.jointype == JOIN_LASJ)
{
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
break;
}
qualResult = (otherqual == NIL ||
ExecQual(otherqual, econtext, false));
MJ_DEBUG_QUAL(otherqual, qualResult);
if (qualResult)
{
/*
* qualification succeeded. now form the desired
* projection tuple and return the slot containing it.
*/
MJ_printf("ExecMergeJoin: returning tuple\n");
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return ExecProject(node->js.ps.ps_ProjInfo, NULL);
}
}
break;
/*
* EXEC_MJ_NEXTINNER means advance the inner scan to the next
* tuple. If the tuple is not nil, we then proceed to test it
* against the join qualification.
*
* Before advancing, we check to see if we must emit an
* outer-join fill tuple for this inner tuple.
*/
case EXEC_MJ_NEXTINNER:
MJ_printf("ExecMergeJoin: EXEC_MJ_NEXTINNER\n");
if (doFillInner && !node->mj_MatchedInner)
{
/*
* Generate a fake join tuple with nulls for the outer
* tuple, and return it if it passes the non-join quals.
*/
TupleTableSlot *result;
node->mj_MatchedInner = true; /* do it only once */
result = MJFillInner(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
/*
* now we get the next inner tuple, if any. If there's none,
* advance to next outer tuple (which may be able to join to
* previously marked tuples).
*/
innerTupleSlot = ExecProcNode(innerPlan);
node->mj_InnerTupleSlot = innerTupleSlot;
MJ_DEBUG_PROC_NODE(innerTupleSlot);
node->mj_MatchedInner = false;
if (TupIsNull(innerTupleSlot))
{
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
if (((MergeJoin*)node->js.ps.plan)->unique_outer)
{
ExecEagerFreeMergeJoin(node);
/* we are done */
return NULL;
}
break;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/*
* Load up the new inner tuple's comparison values. If we see
* that it contains a NULL and hence can't match any outer
* tuple, we can skip the comparison and assume the new tuple
* is greater than current outer.
*/
if (!MJEvalInnerValues(node, innerTupleSlot))
{
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
break;
}
/*
* Test the new inner tuple to see if it matches outer.
*
* If they do match, then we join them and move on to the next
* inner tuple (EXEC_MJ_JOINTUPLES).
*
* If they do not match then advance to next outer tuple.
*/
compareResult = MJCompare(node);
MJ_DEBUG_COMPARE(compareResult);
if (compareResult == 0)
node->mj_JoinState = EXEC_MJ_JOINTUPLES;
else
{
Assert(compareResult < 0 || ((MergeJoin*)node->js.ps.plan)->unique_outer);
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
}
break;
/*-------------------------------------------
* EXEC_MJ_NEXTOUTER means
*
* outer inner
* outer tuple - 5 5 - marked tuple
* 5 5
* 6 6 - inner tuple
* 7 7
*
* we know we just bumped into the
* first inner tuple > current outer tuple (or possibly
* the end of the inner stream)
* so get a new outer tuple and then
* proceed to test it against the marked tuple
* (EXEC_MJ_TESTOUTER)
*
* Before advancing, we check to see if we must emit an
* outer-join fill tuple for this outer tuple.
*------------------------------------------------
*/
case EXEC_MJ_NEXTOUTER:
MJ_printf("ExecMergeJoin: EXEC_MJ_NEXTOUTER\n");
if (doFillOuter && !node->mj_MatchedOuter)
{
/*
* Generate a fake join tuple with nulls for the inner
* tuple, and return it if it passes the non-join quals.
*/
TupleTableSlot *result;
node->mj_MatchedOuter = true; /* do it only once */
result = MJFillOuter(node);
if (result)
return result;
}
/*
* now we get the next outer tuple, if any
*/
outerTupleSlot = ExecProcNode(outerPlan);
node->mj_OuterTupleSlot = outerTupleSlot;
MJ_DEBUG_PROC_NODE(outerTupleSlot);
node->mj_MatchedOuter = false;
/*
* if the outer tuple is null then we are done with the join,
* unless we have inner tuples we need to null-fill.
*/
if (TupIsNull(outerTupleSlot))
{
MJ_printf("ExecMergeJoin: end of outer subplan\n");
innerTupleSlot = node->mj_InnerTupleSlot;
if (doFillInner && !TupIsNull(innerTupleSlot))
{
/*
* Need to emit right-join tuples for remaining inner
* tuples.
*/
node->mj_JoinState = EXEC_MJ_ENDOUTER;
break;
}
/*
* CDB: We'll read no more from inner subtree. To keep our
* sibling QEs from being starved, tell source QEs not to
* clog up the pipeline with our never-to-be-consumed
* data.
*/
if (!TupIsNull(innerTupleSlot) && node->mj_squelchInner)
ExecSquelchNode(innerPlan);
ExecEagerFreeMergeJoin(node);
/* Otherwise we're done. */
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Compute join values and check for unmatchability */
if (MJEvalOuterValues(node))
{
if (((MergeJoin*)node->js.ps.plan)->unique_outer)
/* The current innerTuple will match with this outerTuple.*/
node->mj_JoinState = EXEC_MJ_JOINTUPLES;
else
/* Go test the new tuple against the marked tuple */
node->mj_JoinState = EXEC_MJ_TESTOUTER;
}
else
{
/* Can't match, so fetch next outer tuple */
node->mj_JoinState = EXEC_MJ_NEXTOUTER;
}
break;
/*--------------------------------------------------------
* EXEC_MJ_TESTOUTER If the new outer tuple and the marked
* tuple satisfy the merge clause then we know we have
* duplicates in the outer scan so we have to restore the
* inner scan to the marked tuple and proceed to join the
* new outer tuple with the inner tuples.
*
* This is the case when
* outer inner
* 4 5 - marked tuple
* outer tuple - 5 5
* new outer tuple - 5 5
* 6 8 - inner tuple
* 7 12
*
* new outer tuple == marked tuple
*
* If the outer tuple fails the test, then we are done
* with the marked tuples, and we have to look for a
* match to the current inner tuple. So we will
* proceed to skip outer tuples until outer >= inner
* (EXEC_MJ_SKIP_TEST).
*
* This is the case when
*
* outer inner
* 5 5 - marked tuple
* outer tuple - 5 5
* new outer tuple - 6 8 - inner tuple
* 7 12
*
* new outer tuple > marked tuple
*
*---------------------------------------------------------
*/
case EXEC_MJ_TESTOUTER:
MJ_printf("ExecMergeJoin: EXEC_MJ_TESTOUTER\n");
/*
* Here we must compare the outer tuple with the marked inner
* tuple. (We can ignore the result of MJEvalInnerValues,
* since the marked inner tuple is certainly matchable.)
*/
innerTupleSlot = node->mj_MarkedTupleSlot;
(void) MJEvalInnerValues(node, innerTupleSlot);
compareResult = MJCompare(node);
MJ_DEBUG_COMPARE(compareResult);
if (compareResult == 0)
{
/*
* the merge clause matched so now we restore the inner
* scan position to the first mark, and go join that tuple
* (and any following ones) to the new outer.
*
* NOTE: we do not need to worry about the MatchedInner
* state for the rescanned inner tuples. We know all of
* them will match this new outer tuple and therefore
* won't be emitted as fill tuples. This works *only*
* because we require the extra joinquals to be nil when
* doing a right or full join --- otherwise some of the
* rescanned tuples might fail the extra joinquals.
*/
ExecRestrPos(innerPlan);
/*
* ExecRestrPos probably should give us back a new Slot,
* but since it doesn't, use the marked slot. (The
* previously returned mj_InnerTupleSlot cannot be assumed
* to hold the required tuple.)
*/
node->mj_InnerTupleSlot = innerTupleSlot;
/* we need not do MJEvalInnerValues again */
node->mj_JoinState = EXEC_MJ_JOINTUPLES;
}
else
{
/* ----------------
* if the new outer tuple didn't match the marked inner
* tuple then we have a case like:
*
* outer inner
* 4 4 - marked tuple
* new outer - 5 4
* 6 5 - inner tuple
* 7
*
* which means that all subsequent outer tuples will be
* larger than our marked inner tuples. So we need not
* revisit any of the marked tuples but can proceed to
* look for a match to the current inner. If there's
* no more inners, we are done.
* ----------------
*/
if (compareResult <= 0 && !((MergeJoin*)node->js.ps.plan)->unique_outer)
insist_log(false, "Mergejoin: compareResult > 0, bad plan ?");
innerTupleSlot = node->mj_InnerTupleSlot;
if (TupIsNull(innerTupleSlot))
{
if (doFillOuter)
{
/*
* Need to emit left-join tuples for remaining
* outer tuples.
*/
node->mj_JoinState = EXEC_MJ_ENDINNER;
break;
}
/*
* CDB: We'll read no more from outer subtree. To keep
* our sibling QEs from being starved, tell source QEs
* not to clog up the pipeline with our
* never-to-be-consumed data.
*/
ExecSquelchNode(outerPlan);
ExecEagerFreeMergeJoin(node);
/* Otherwise we're done. */
return NULL;
}
/* reload comparison data for current inner */
if (MJEvalInnerValues(node, innerTupleSlot))
{
/* proceed to compare it to the current outer */
node->mj_JoinState = EXEC_MJ_SKIP_TEST;
}
else
{
/*
* current inner can't possibly match any outer;
* better to advance the inner scan than the outer.
*/
node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE;
}
}
break;
/*----------------------------------------------------------
* EXEC_MJ_SKIP means compare tuples and if they do not
* match, skip whichever is lesser.
*
* For example:
*
* outer inner
* 5 5
* 5 5
* outer tuple - 6 8 - inner tuple
* 7 12
* 8 14
*
* we have to advance the outer scan
* until we find the outer 8.
*
* On the other hand:
*
* outer inner
* 5 5
* 5 5
* outer tuple - 12 8 - inner tuple
* 14 10
* 17 12
*
* we have to advance the inner scan
* until we find the inner 12.
*----------------------------------------------------------
*/
case EXEC_MJ_SKIP_TEST:
MJ_printf("ExecMergeJoin: EXEC_MJ_SKIP_TEST\n");
/*
* before we advance, make sure the current tuples do not
* satisfy the mergeclauses. If they do, then we update the
* marked tuple position and go join them.
*/
compareResult = MJCompare(node);
MJ_DEBUG_COMPARE(compareResult);
if (compareResult == 0)
{
if (!((MergeJoin*)node->js.ps.plan)->unique_outer)
{
ExecMarkPos(innerPlan);
MarkInnerTuple(node->mj_InnerTupleSlot, node);
}
node->mj_JoinState = EXEC_MJ_JOINTUPLES;
}
else if (compareResult < 0)
node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE;
else
/* compareResult > 0 */
node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE;
break;
/*
* Before advancing, we check to see if we must emit an
* outer-join fill tuple for this outer tuple.
*/
case EXEC_MJ_SKIPOUTER_ADVANCE:
MJ_printf("ExecMergeJoin: EXEC_MJ_SKIPOUTER_ADVANCE\n");
if (doFillOuter && !node->mj_MatchedOuter)
{
/*
* Generate a fake join tuple with nulls for the inner
* tuple, and return it if it passes the non-join quals.
*/
TupleTableSlot *result;
node->mj_MatchedOuter = true; /* do it only once */
result = MJFillOuter(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
/*
* now we get the next outer tuple, if any
*/
outerTupleSlot = ExecProcNode(outerPlan);
node->mj_OuterTupleSlot = outerTupleSlot;
MJ_DEBUG_PROC_NODE(outerTupleSlot);
node->mj_MatchedOuter = false;
/*
* if the outer tuple is null then we are done with the join,
* unless we have inner tuples we need to null-fill.
*/
if (TupIsNull(outerTupleSlot))
{
MJ_printf("ExecMergeJoin: end of outer subplan\n");
innerTupleSlot = node->mj_InnerTupleSlot;
if (doFillInner && !TupIsNull(innerTupleSlot))
{
/*
* Need to emit right-join tuples for remaining inner
* tuples.
*/
node->mj_JoinState = EXEC_MJ_ENDOUTER;
break;
}
/*
* CDB: We'll read no more from inner subtree. To keep our
* sibling QEs from being starved, tell source QEs not to
* clog up the pipeline with our never-to-be-consumed
* data.
*/
if (!TupIsNull(innerTupleSlot) && node->mj_squelchInner)
ExecSquelchNode(innerPlan);
ExecEagerFreeMergeJoin(node);
/* Otherwise we're done. */
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Compute join values and check for unmatchability */
if (MJEvalOuterValues(node))
{
/* Go test the new tuple against the current inner */
node->mj_JoinState = EXEC_MJ_SKIP_TEST;
}
else
{
/* Can't match, so fetch next outer tuple */
node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE;
}
break;
/*
* Before advancing, we check to see if we must emit an
* outer-join fill tuple for this inner tuple.
*/
case EXEC_MJ_SKIPINNER_ADVANCE:
MJ_printf("ExecMergeJoin: EXEC_MJ_SKIPINNER_ADVANCE\n");
if (doFillInner && !node->mj_MatchedInner)
{
/*
* Generate a fake join tuple with nulls for the outer
* tuple, and return it if it passes the non-join quals.
*/
TupleTableSlot *result;
node->mj_MatchedInner = true; /* do it only once */
result = MJFillInner(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
/*
* now we get the next inner tuple, if any
*/
innerTupleSlot = ExecProcNode(innerPlan);
node->mj_InnerTupleSlot = innerTupleSlot;
MJ_DEBUG_PROC_NODE(innerTupleSlot);
node->mj_MatchedInner = false;
/*
* if the inner tuple is null then we are done with the join,
* unless we have outer tuples we need to null-fill.
*/
if (TupIsNull(innerTupleSlot))
{
MJ_printf("ExecMergeJoin: end of inner subplan\n");
outerTupleSlot = node->mj_OuterTupleSlot;
if (doFillOuter && !TupIsNull(outerTupleSlot))
{
/*
* Need to emit left-join tuples for remaining outer
* tuples.
*/
node->mj_JoinState = EXEC_MJ_ENDINNER;
break;
}
/*
* CDB: We'll read no more from outer subtree. To keep our
* sibling QEs from being starved, tell source QEs not to
* clog up the pipeline with our never-to-be-consumed
* data.
*/
if (!TupIsNull(outerTupleSlot))
ExecSquelchNode(outerPlan);
ExecEagerFreeMergeJoin(node);
/* Otherwise we're done. */
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Compute join values and check for unmatchability */
if (MJEvalInnerValues(node, innerTupleSlot))
{
/* proceed to compare it to the current outer */
node->mj_JoinState = EXEC_MJ_SKIP_TEST;
}
else
{
/*
* current inner can't possibly match any outer; better to
* advance the inner scan than the outer.
*/
node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE;
}
break;
/*
* EXEC_MJ_ENDOUTER means we have run out of outer tuples, but
* are doing a right/full join and therefore must null-fill
* any remaing unmatched inner tuples.
*/
case EXEC_MJ_ENDOUTER:
MJ_printf("ExecMergeJoin: EXEC_MJ_ENDOUTER\n");
Assert(doFillInner);
if (!node->mj_MatchedInner)
{
/*
* Generate a fake join tuple with nulls for the outer
* tuple, and return it if it passes the non-join quals.
*/
TupleTableSlot *result;
node->mj_MatchedInner = true; /* do it only once */
result = MJFillInner(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
/*
* now we get the next inner tuple, if any
*/
innerTupleSlot = ExecProcNode(innerPlan);
node->mj_InnerTupleSlot = innerTupleSlot;
MJ_DEBUG_PROC_NODE(innerTupleSlot);
node->mj_MatchedInner = false;
if (TupIsNull(innerTupleSlot))
{
MJ_printf("ExecMergeJoin: end of inner subplan\n");
ExecEagerFreeMergeJoin(node);
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Else remain in ENDOUTER state and process next tuple. */
break;
/*
* EXEC_MJ_ENDINNER means we have run out of inner tuples, but
* are doing a left/full join and therefore must null- fill
* any remaing unmatched outer tuples.
*/
case EXEC_MJ_ENDINNER:
MJ_printf("ExecMergeJoin: EXEC_MJ_ENDINNER\n");
Assert(doFillOuter);
if (!node->mj_MatchedOuter)
{
/*
* Generate a fake join tuple with nulls for the inner
* tuple, and return it if it passes the non-join quals.
*/
TupleTableSlot *result;
node->mj_MatchedOuter = true; /* do it only once */
result = MJFillOuter(node);
if (result)
{
Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node));
CheckSendPlanStateGpmonPkt(&node->js.ps);
return result;
}
}
/*
* now we get the next outer tuple, if any
*/
outerTupleSlot = ExecProcNode(outerPlan);
node->mj_OuterTupleSlot = outerTupleSlot;
MJ_DEBUG_PROC_NODE(outerTupleSlot);
node->mj_MatchedOuter = false;
if (TupIsNull(outerTupleSlot))
{
MJ_printf("ExecMergeJoin: end of outer subplan\n");
ExecEagerFreeMergeJoin(node);
return NULL;
}
else
{
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE);
Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN);
}
/* Else remain in ENDINNER state and process next tuple. */
break;
/*
* broken state value?
*/
default:
insist_log(false, "unrecognized mergejoin state: %d",
(int) node->mj_JoinState);
}
}
}
/* ----------------------------------------------------------------
* ExecInitMergeJoin
* ----------------------------------------------------------------
*/
MergeJoinState *
ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags)
{
MergeJoinState *mergestate;
int markflag;
/* check for unsupported flags */
Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
MJ1_printf("ExecInitMergeJoin: %s\n",
"initializing node");
/*
* create state structure
*/
mergestate = makeNode(MergeJoinState);
mergestate->js.ps.plan = (Plan *) node;
mergestate->js.ps.state = estate;
/*
* Miscellaneous initialization
*
* create expression context for node
*/
ExecAssignExprContext(estate, &mergestate->js.ps);
/*
* we need two additional econtexts in which we can compute the join
* expressions from the left and right input tuples. The node's regular
* econtext won't do because it gets reset too often.
*/
mergestate->mj_OuterEContext = CreateExprContext(estate);
mergestate->mj_InnerEContext = CreateExprContext(estate);
/*
* initialize child expressions
*/
mergestate->js.ps.targetlist = (List *)
ExecInitExpr((Expr *) node->join.plan.targetlist,
(PlanState *) mergestate);
mergestate->js.ps.qual = (List *)
ExecInitExpr((Expr *) node->join.plan.qual,
(PlanState *) mergestate);
mergestate->js.jointype = node->join.jointype;
mergestate->js.joinqual = (List *)
ExecInitExpr((Expr *) node->join.joinqual,
(PlanState *) mergestate);
mergestate->prefetch_inner = node->join.prefetch_inner;
mergestate->mj_squelchInner = true;
/* mergeclauses are handled below */
/*
* initialize child nodes
*
* inner child must support MARK/RESTORE unless the outer plan is
* known to have no duplicate merge keys.
*/
markflag = node->unique_outer ? 0 : EXEC_FLAG_MARK;
outerPlanState(mergestate) = ExecInitNode(outerPlan(node), estate, eflags);
innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate,
eflags | markflag);
#define MERGEJOIN_NSLOTS 4
/*
* tuple table initialization
*/
ExecInitResultTupleSlot(estate, &mergestate->js.ps);
mergestate->mj_MarkedTupleSlot = ExecInitExtraTupleSlot(estate);
ExecSetSlotDescriptor(mergestate->mj_MarkedTupleSlot,
ExecGetResultType(innerPlanState(mergestate)));
switch (node->join.jointype)
{
case JOIN_INNER:
case JOIN_IN:
mergestate->mj_FillOuter = false;
mergestate->mj_FillInner = false;
break;
case JOIN_LEFT:
case JOIN_LASJ:
mergestate->mj_FillOuter = true;
mergestate->mj_FillInner = false;
mergestate->mj_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(mergestate)));
break;
case JOIN_RIGHT:
mergestate->mj_FillOuter = false;
mergestate->mj_FillInner = true;
mergestate->mj_NullOuterTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(outerPlanState(mergestate)));
/*
* Can't handle right or full join with non-nil extra joinclauses.
* This should have been caught by planner.
*/
if (node->join.joinqual != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("RIGHT JOIN is only supported with merge-joinable join conditions")));
break;
case JOIN_FULL:
mergestate->mj_FillOuter = true;
mergestate->mj_FillInner = true;
mergestate->mj_NullOuterTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(outerPlanState(mergestate)));
mergestate->mj_NullInnerTupleSlot =
ExecInitNullTupleSlot(estate,
ExecGetResultType(innerPlanState(mergestate)));
/*
* Can't handle right or full join with non-nil extra joinclauses.
*/
if (node->join.joinqual != NIL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
break;
case JOIN_LASJ_NOTIN:
insist_log(false, "join type not supported");
break;
default:
insist_log(false, "unrecognized join type: %d",
(int) node->join.jointype);
}
/*
* initialize tuple type and projection info
*/
ExecAssignResultTypeFromTL(&mergestate->js.ps);
ExecAssignProjectionInfo(&mergestate->js.ps, NULL);
/*
* preprocess the merge clauses
*/
mergestate->mj_NumClauses = list_length(node->mergeclauses);
mergestate->mj_Clauses = MJExamineQuals(node->mergeclauses,
(PlanState *) mergestate);
/*
* initialize join state
*/
mergestate->mj_JoinState = EXEC_MJ_INITIALIZE_OUTER;
mergestate->mj_MatchedOuter = false;
mergestate->mj_MatchedInner = false;
mergestate->mj_OuterTupleSlot = NULL;
mergestate->mj_InnerTupleSlot = NULL;
/*
* initialization successful
*/
MJ1_printf("ExecInitMergeJoin: %s\n",
"node initialized");
initGpmonPktForMergeJoin((Plan *)node, &mergestate->js.ps.gpmon_pkt, estate);
return mergestate;
}
int
ExecCountSlotsMergeJoin(MergeJoin *node)
{
return ExecCountSlotsNode(outerPlan((Plan *) node)) +
ExecCountSlotsNode(innerPlan((Plan *) node)) +
MERGEJOIN_NSLOTS;
}
/* ----------------------------------------------------------------
* ExecEndMergeJoin
*
* old comments
* frees storage allocated through C routines.
* ----------------------------------------------------------------
*/
void
ExecEndMergeJoin(MergeJoinState *node)
{
MJ1_printf("ExecEndMergeJoin: %s\n",
"ending node processing");
/*
* Free the exprcontext
*/
ExecFreeExprContext(&node->js.ps);
/*
* clean out the tuple table
*/
ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
ExecClearTuple(node->mj_MarkedTupleSlot);
/*
* shut down the subplans
*/
ExecEndNode(innerPlanState(node));
ExecEndNode(outerPlanState(node));
MJ1_printf("ExecEndMergeJoin: %s\n",
"node processing ended");
EndPlanStateGpmonPkt(&node->js.ps);
}
void
ExecReScanMergeJoin(MergeJoinState *node, ExprContext *exprCtxt)
{
ExecClearTuple(node->mj_MarkedTupleSlot);
node->mj_JoinState = EXEC_MJ_INITIALIZE_OUTER;
node->mj_MatchedOuter = false;
node->mj_MatchedInner = false;
node->mj_OuterTupleSlot = NULL;
node->mj_InnerTupleSlot = NULL;
/*
* if chgParam of subnodes is not null then plans will be re-scanned by
* first ExecProcNode.
*/
if (((PlanState *) node)->lefttree->chgParam == NULL)
ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
if (((PlanState *) node)->righttree->chgParam == NULL)
ExecReScan(((PlanState *) node)->righttree, exprCtxt);
}
void
initGpmonPktForMergeJoin(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate)
{
Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, MergeJoin));
{
PerfmonNodeType type = PMNT_Invalid;
switch(((MergeJoin *)planNode)->join.jointype)
{
case JOIN_INNER:
type = PMNT_MergeJoin;
break;
case JOIN_LEFT:
type = PMNT_MergeLeftJoin;
break;
case JOIN_LASJ:
type = PMNT_MergeLeftAntiSemiJoin;
break;
case JOIN_FULL:
type = PMNT_MergeFullJoin;
break;
case JOIN_RIGHT:
type = PMNT_MergeRightJoin;
break;
case JOIN_IN:
type = PMNT_MergeExistsJoin;
break;
case JOIN_REVERSE_IN:
type = PMNT_MergeReverseInJoin;
break;
case JOIN_UNIQUE_OUTER:
type = PMNT_MergeUniqueOuterJoin;
break;
case JOIN_UNIQUE_INNER:
type = PMNT_MergeUniqueInnerJoin;
break;
case JOIN_LASJ_NOTIN:
insist_log(false, "Join type not supported");
break;
}
Assert(type != PMNT_Invalid);
Assert(GPMON_MERGEJOIN_TOTAL <= (int) GPMON_QEXEC_M_COUNT);
InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, type,
(int64)planNode->plan_rows,
NULL);
}
}
void
ExecEagerFreeMergeJoin(MergeJoinState *node)
{
/*
* Since MergeJoin might call Mark/restore on its child nodes, its child nodes
* are not eager free safe. We will free their memory here.
*/
ExecEagerFreeChildNodes((PlanState *)node, false);
}