| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /*------------------------------------------------------------------------- |
| * |
| * nodeMergejoin.c |
| * routines supporting merge joins |
| * |
| * Portions Copyright (c) 2005-2008, Greenplum inc |
| * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.82.2.1 2007/02/02 00:07:28 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| /* |
| * INTERFACE ROUTINES |
| * ExecMergeJoin mergejoin outer and inner relations. |
| * ExecInitMergeJoin creates and initializes run time states |
| * ExecEndMergeJoin cleans up the node. |
| * |
| * NOTES |
| * |
| * Merge-join is done by joining the inner and outer tuples satisfying |
| * join clauses of the form ((= outerKey innerKey) ...). |
| * The join clause list is provided by the query planner and may contain |
| * more than one (= outerKey innerKey) clause (for composite sort key). |
| * |
| * However, the query executor needs to know whether an outer |
| * tuple is "greater/smaller" than an inner tuple so that it can |
| * "synchronize" the two relations. For example, consider the following |
| * relations: |
| * |
| * outer: (0 ^1 1 2 5 5 5 6 6 7) current tuple: 1 |
| * inner: (1 ^3 5 5 5 5 6) current tuple: 3 |
| * |
| * To continue the merge-join, the executor needs to scan both inner |
| * and outer relations till the matching tuples 5. It needs to know |
| * that currently inner tuple 3 is "greater" than outer tuple 1 and |
| * therefore it should scan the outer relation first to find a |
| * matching tuple and so on. |
| * |
| * Therefore, when initializing the merge-join node, we look up the |
| * associated sort operators. We assume the planner has seen to it |
| * that the inputs are correctly sorted by these operators. Rather |
| * than directly executing the merge join clauses, we evaluate the |
| * left and right key expressions separately and then compare the |
| * columns one at a time (see MJCompare). |
| * |
| * |
| * Consider the above relations and suppose that the executor has |
| * just joined the first outer "5" with the last inner "5". The |
| * next step is of course to join the second outer "5" with all |
| * the inner "5's". This requires repositioning the inner "cursor" |
| * to point at the first inner "5". This is done by "marking" the |
| * first inner 5 so we can restore the "cursor" to it before joining |
| * with the second outer 5. The access method interface provides |
| * routines to mark and restore to a tuple. |
| * |
| * |
| * Essential operation of the merge join algorithm is as follows: |
| * |
| * Join { |
| * get initial outer and inner tuples INITIALIZE |
| * do forever { |
| * while (outer != inner) { SKIP_TEST |
| * if (outer < inner) |
| * advance outer SKIPOUTER_ADVANCE |
| * else |
| * advance inner SKIPINNER_ADVANCE |
| * } |
| * mark inner position SKIP_TEST |
| * do forever { |
| * while (outer == inner) { |
| * join tuples JOINTUPLES |
| * advance inner position NEXTINNER |
| * } |
| * advance outer position NEXTOUTER |
| * if (outer == mark) TESTOUTER |
| * restore inner position to mark TESTOUTER |
| * else |
| * break // return to top of outer loop |
| * } |
| * } |
| * } |
| * |
| * The merge join operation is coded in the fashion |
| * of a state machine. At each state, we do something and then |
| * proceed to another state. This state is stored in the node's |
| * execution state information and is preserved across calls to |
| * ExecMergeJoin. -cim 10/31/89 |
| */ |
| #include "postgres.h" |
| |
| #include "access/nbtree.h" |
| #include "catalog/catquery.h" |
| #include "catalog/pg_amop.h" |
| #include "cdb/cdbvars.h" |
| #include "executor/execdebug.h" |
| #include "executor/execdefs.h" |
| #include "executor/nodeMergejoin.h" |
| #include "miscadmin.h" |
| #include "utils/acl.h" |
| #include "utils/lsyscache.h" |
| #include "utils/memutils.h" |
| #include "utils/syscache.h" |
| |
| |
| /* |
| * Comparison strategies supported by MJCompare |
| * |
| * XXX eventually should extend these to support descending-order sorts. |
| * There are some tricky issues however about being sure we are on the same |
| * page as the underlying sort or index as to which end NULLs sort to. |
| */ |
| typedef enum |
| { |
| MERGEFUNC_LT, /* raw "<" operator */ |
| MERGEFUNC_CMP /* -1 / 0 / 1 three-way comparator */ |
| } MergeFunctionKind; |
| |
| /* Runtime data for each mergejoin clause */ |
| typedef struct MergeJoinClauseData |
| { |
| /* Executable expression trees */ |
| ExprState *lexpr; /* left-hand (outer) input expression */ |
| ExprState *rexpr; /* right-hand (inner) input expression */ |
| |
| /* |
| * If we have a current left or right input tuple, the values of the |
| * expressions are loaded into these fields: |
| */ |
| Datum ldatum; /* current left-hand value */ |
| Datum rdatum; /* current right-hand value */ |
| bool lisnull; /* and their isnull flags */ |
| bool risnull; |
| |
| /* |
| * Remember whether mergejoin operator is strict (usually it will be). |
| * NOTE: if it's not strict, we still assume it cannot return true for one |
| * null and one non-null input. |
| */ |
| bool mergestrict; |
| |
| /* |
| * CDB: Remember whether the mergejoin operation was actually an "is |
| * not distinct from" predicate. |
| */ |
| bool notdistinct; |
| |
| /* |
| * The comparison strategy in use, and the lookup info to let us call the |
| * needed comparison routines. eqfinfo is the "=" operator itself. |
| * cmpfinfo is either the btree comparator or the "<" operator. |
| */ |
| MergeFunctionKind cmpstrategy; |
| FmgrInfo eqfinfo; |
| FmgrInfo cmpfinfo; |
| } MergeJoinClauseData; |
| |
| |
| #define MarkInnerTuple(innerTupleSlot, mergestate) \ |
| ExecCopySlot((mergestate)->mj_MarkedTupleSlot, (innerTupleSlot)) |
| |
| |
| /* |
| * MJExamineQuals |
| * |
| * This deconstructs the list of mergejoinable expressions, which is given |
| * to us by the planner in the form of a list of "leftexpr = rightexpr" |
| * expression trees in the order matching the sort columns of the inputs. |
| * We build an array of MergeJoinClause structs containing the information |
| * we will need at runtime. Each struct essentially tells us how to compare |
| * the two expressions from the original clause. |
| * |
| * The best, most efficient way to compare two expressions is to use a btree |
| * comparison support routine, since that requires only one function call |
| * per comparison. Hence we try to find a btree opclass that matches the |
| * mergejoinable operator. If we cannot find one, we'll have to call both |
| * the "=" and (often) the "<" operator for each comparison. |
| * |
| * CDB: We also recognize the "is not distinct from" predicate which is |
| * interesting for sequential window plans. The pseudo-Lisp for this |
| * predicate is (BoolExpr_NOT (DistinctExpr_= leftexpr rightexpr)). |
| */ |
| static MergeJoinClause |
| MJExamineQuals(List *qualList, PlanState *parent) |
| { |
| MergeJoinClause clauses; |
| int nClauses = list_length(qualList); |
| int iClause; |
| ListCell *l; |
| |
| clauses = (MergeJoinClause) palloc0(nClauses * sizeof(MergeJoinClauseData)); |
| |
| iClause = 0; |
| foreach(l, qualList) |
| { |
| OpExpr *qual = (OpExpr *) lfirst(l); |
| MergeJoinClause clause = &clauses[iClause]; |
| Oid ltop; |
| Oid gtop; |
| RegProcedure ltproc; |
| RegProcedure gtproc; |
| AclResult aclresult; |
| CatCList *catlist; |
| int i; |
| |
| if (!IsA(qual, OpExpr)) |
| { |
| BoolExpr *bx = (BoolExpr*)qual; |
| bool ok = false; |
| |
| if ( IsA(bx, BoolExpr) && bx->boolop == NOT_EXPR && list_length(bx->args) == 1 ) |
| { |
| DistinctExpr *dx = (DistinctExpr*)linitial(bx->args); |
| |
| if ( IsA(dx, DistinctExpr) ) |
| { |
| clause->notdistinct = true; |
| qual = (OpExpr *)dx; |
| ok = true; |
| } |
| } |
| insist_log(ok, "mergejoin clause is not an OpExpr"); |
| } |
| |
| /* |
| * Prepare the input expressions for execution. |
| */ |
| clause->lexpr = ExecInitExpr((Expr *) linitial(qual->args), parent); |
| clause->rexpr = ExecInitExpr((Expr *) lsecond(qual->args), parent); |
| |
| /* |
| * Check permission to call the mergejoinable operator. For |
| * predictability, we check this even if we end up not using it. |
| */ |
| aclresult = pg_proc_aclcheck(qual->opfuncid, GetUserId(), ACL_EXECUTE); |
| if (aclresult != ACLCHECK_OK) |
| aclcheck_error(aclresult, ACL_KIND_PROC, |
| get_func_name(qual->opfuncid)); |
| |
| /* Set up the fmgr lookup information */ |
| fmgr_info(qual->opfuncid, &(clause->eqfinfo)); |
| |
| /* And remember strictness */ |
| clause->mergestrict = clause->eqfinfo.fn_strict; |
| |
| /* |
| * Lookup the comparison operators that go with the mergejoinable |
| * top-level operator. (This will elog if the operator isn't |
| * mergejoinable, which would be the planner's mistake.) |
| */ |
| op_mergejoin_crossops(qual->opno, |
| <op, |
| >op, |
| <proc, |
| >proc); |
| |
| clause->cmpstrategy = MERGEFUNC_LT; |
| |
| /* |
| * Look for a btree opclass including all three operators. This is |
| * much like SelectSortFunction except we insist on matching all the |
| * operators provided, and it can be a cross-type opclass. |
| * |
| * XXX for now, insist on forward sort so that NULLs can be counted on |
| * to be high. |
| */ |
| catlist = caql_begin_CacheList( |
| NULL, |
| cql("SELECT * FROM pg_amop " |
| " WHERE amopopr = :1 " |
| " ORDER BY amopopr, " |
| " amopclaid ", |
| ObjectIdGetDatum(qual->opno))); |
| |
| for (i = 0; i < catlist->n_members; i++) |
| { |
| HeapTuple tuple = &catlist->members[i]->tuple; |
| Form_pg_amop aform = (Form_pg_amop) GETSTRUCT(tuple); |
| Oid opcid = aform->amopclaid; |
| |
| if (aform->amopstrategy != BTEqualStrategyNumber) |
| continue; |
| if (!opclass_is_btree(opcid)) |
| continue; |
| if (get_op_opclass_strategy(ltop, opcid) == BTLessStrategyNumber && |
| get_op_opclass_strategy(gtop, opcid) == BTGreaterStrategyNumber) |
| { |
| clause->cmpstrategy = MERGEFUNC_CMP; |
| ltproc = get_opclass_proc(opcid, aform->amopsubtype, |
| BTORDER_PROC); |
| Assert(RegProcedureIsValid(ltproc)); |
| break; /* done looking */ |
| } |
| } |
| |
| caql_end_CacheList(catlist); |
| |
| /* Check permission to call "<" operator or cmp function */ |
| aclresult = pg_proc_aclcheck(ltproc, GetUserId(), ACL_EXECUTE); |
| if (aclresult != ACLCHECK_OK) |
| aclcheck_error(aclresult, ACL_KIND_PROC, |
| get_func_name(ltproc)); |
| |
| /* Set up the fmgr lookup information */ |
| fmgr_info(ltproc, &(clause->cmpfinfo)); |
| |
| iClause++; |
| } |
| |
| return clauses; |
| } |
| |
| /* |
| * MJEvalOuterValues |
| * |
| * Compute the values of the mergejoined expressions for the current |
| * outer tuple. We also detect whether it's impossible for the current |
| * outer tuple to match anything --- this is true if it yields a NULL |
| * input for any strict mergejoin operator. |
| * |
| * We evaluate the values in OuterEContext, which can be reset each |
| * time we move to a new tuple. |
| */ |
| static bool |
| MJEvalOuterValues(MergeJoinState *mergestate) |
| { |
| ExprContext *econtext = mergestate->mj_OuterEContext; |
| bool canmatch = true; |
| int i; |
| MemoryContext oldContext; |
| |
| ResetExprContext(econtext); |
| |
| oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); |
| |
| econtext->ecxt_outertuple = mergestate->mj_OuterTupleSlot; |
| |
| for (i = 0; i < mergestate->mj_NumClauses; i++) |
| { |
| MergeJoinClause clause = &mergestate->mj_Clauses[i]; |
| |
| clause->ldatum = ExecEvalExpr(clause->lexpr, econtext, |
| &clause->lisnull, NULL); |
| if (clause->lisnull && clause->mergestrict) |
| canmatch = clause->notdistinct; |
| } |
| |
| MemoryContextSwitchTo(oldContext); |
| |
| return canmatch; |
| } |
| |
| /* |
| * MJEvalInnerValues |
| * |
| * Same as above, but for the inner tuple. Here, we have to be prepared |
| * to load data from either the true current inner, or the marked inner, |
| * so caller must tell us which slot to load from. |
| */ |
| static bool |
| MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot) |
| { |
| ExprContext *econtext = mergestate->mj_InnerEContext; |
| bool canmatch = true; |
| int i; |
| MemoryContext oldContext; |
| |
| ResetExprContext(econtext); |
| |
| oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); |
| |
| econtext->ecxt_innertuple = innerslot; |
| |
| for (i = 0; i < mergestate->mj_NumClauses; i++) |
| { |
| MergeJoinClause clause = &mergestate->mj_Clauses[i]; |
| |
| clause->rdatum = ExecEvalExpr(clause->rexpr, econtext, |
| &clause->risnull, NULL); |
| if (clause->risnull && clause->mergestrict) |
| canmatch = clause->notdistinct; |
| } |
| |
| MemoryContextSwitchTo(oldContext); |
| |
| return canmatch; |
| } |
| |
| /* |
| * MJCompare |
| * |
| * Compare the mergejoinable values of the current two input tuples |
| * and return 0 if they are equal (ie, the mergejoin equalities all |
| * succeed), +1 if outer > inner, -1 if outer < inner. |
| * |
| * MJEvalOuterValues and MJEvalInnerValues must already have been called |
| * for the current outer and inner tuples, respectively. |
| */ |
| static int |
| MJCompare(MergeJoinState *mergestate) |
| { |
| int result = 0; |
| bool nulleqnull = false; |
| ExprContext *econtext = mergestate->js.ps.ps_ExprContext; |
| int i; |
| MemoryContext oldContext; |
| FunctionCallInfoData fcinfo; |
| |
| /* |
| * Call the comparison functions in short-lived context, in case they leak |
| * memory. |
| */ |
| ResetExprContext(econtext); |
| |
| oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); |
| |
| for (i = 0; i < mergestate->mj_NumClauses; i++) |
| { |
| MergeJoinClause clause = &mergestate->mj_Clauses[i]; |
| Datum fresult; |
| |
| /* |
| * Deal with null inputs. We treat NULL as sorting after non-NULL. |
| * |
| * If both inputs are NULL, and the comparison function isn't strict, |
| * then we call it and check for a true result (this allows operators |
| * that behave like IS NOT DISTINCT to be mergejoinable). If the |
| * function is strict or returns false, we temporarily pretend NULL == |
| * NULL and contine checking remaining columns. |
| */ |
| if (clause->lisnull) |
| { |
| if (clause->risnull) |
| { |
| if (!clause->eqfinfo.fn_strict) |
| { |
| InitFunctionCallInfoData(fcinfo, &(clause->eqfinfo), 2, |
| NULL, NULL); |
| fcinfo.arg[0] = clause->ldatum; |
| fcinfo.arg[1] = clause->rdatum; |
| fcinfo.argnull[0] = true; |
| fcinfo.argnull[1] = true; |
| fresult = FunctionCallInvoke(&fcinfo); |
| if (!fcinfo.isnull && DatumGetBool(fresult)) |
| { |
| /* treat nulls as really equal */ |
| continue; |
| } |
| } |
| nulleqnull = !clause->notdistinct; |
| continue; |
| } |
| /* NULL > non-NULL */ |
| result = 1; |
| break; |
| } |
| if (clause->risnull) |
| { |
| /* non-NULL < NULL */ |
| result = -1; |
| break; |
| } |
| |
| if (clause->cmpstrategy == MERGEFUNC_LT) |
| { |
| InitFunctionCallInfoData(fcinfo, &(clause->eqfinfo), 2, |
| NULL, NULL); |
| fcinfo.arg[0] = clause->ldatum; |
| fcinfo.arg[1] = clause->rdatum; |
| fcinfo.argnull[0] = false; |
| fcinfo.argnull[1] = false; |
| fresult = FunctionCallInvoke(&fcinfo); |
| if (fcinfo.isnull) |
| { |
| nulleqnull = !clause->notdistinct; |
| continue; |
| } |
| else if (DatumGetBool(fresult)) |
| { |
| /* equal */ |
| continue; |
| } |
| InitFunctionCallInfoData(fcinfo, &(clause->cmpfinfo), 2, |
| NULL, NULL); |
| fcinfo.arg[0] = clause->ldatum; |
| fcinfo.arg[1] = clause->rdatum; |
| fcinfo.argnull[0] = false; |
| fcinfo.argnull[1] = false; |
| fresult = FunctionCallInvoke(&fcinfo); |
| if (fcinfo.isnull) |
| { |
| nulleqnull = !clause->notdistinct; |
| continue; |
| } |
| else if (DatumGetBool(fresult)) |
| { |
| /* less than */ |
| result = -1; |
| break; |
| } |
| else |
| { |
| /* greater than */ |
| result = 1; |
| break; |
| } |
| } |
| else |
| /* must be MERGEFUNC_CMP */ |
| { |
| InitFunctionCallInfoData(fcinfo, &(clause->cmpfinfo), 2, |
| NULL, NULL); |
| fcinfo.arg[0] = clause->ldatum; |
| fcinfo.arg[1] = clause->rdatum; |
| fcinfo.argnull[0] = false; |
| fcinfo.argnull[1] = false; |
| fresult = FunctionCallInvoke(&fcinfo); |
| if (fcinfo.isnull) |
| { |
| nulleqnull = !clause->notdistinct; |
| continue; |
| } |
| else if (DatumGetInt32(fresult) == 0) |
| { |
| /* equal */ |
| continue; |
| } |
| else if (DatumGetInt32(fresult) < 0) |
| { |
| /* less than */ |
| result = -1; |
| break; |
| } |
| else |
| { |
| /* greater than */ |
| result = 1; |
| break; |
| } |
| } |
| } |
| |
| /* |
| * If we had any null comparison results or NULL-vs-NULL inputs, we do not |
| * want to report that the tuples are equal. Instead, if result is still |
| * 0, change it to +1. This will result in advancing the inner side of |
| * the join. |
| */ |
| if (nulleqnull && result == 0) |
| result = 1; |
| |
| MemoryContextSwitchTo(oldContext); |
| |
| return result; |
| } |
| |
| |
| /* |
| * Generate a fake join tuple with nulls for the inner tuple, |
| * and return it if it passes the non-join quals. |
| */ |
| static TupleTableSlot * |
| MJFillOuter(MergeJoinState *node) |
| { |
| ExprContext *econtext = node->js.ps.ps_ExprContext; |
| List *otherqual = node->js.ps.qual; |
| |
| ResetExprContext(econtext); |
| |
| econtext->ecxt_outertuple = node->mj_OuterTupleSlot; |
| econtext->ecxt_innertuple = node->mj_NullInnerTupleSlot; |
| |
| if (TupIsNull(node->mj_OuterTupleSlot)) |
| return NULL; |
| |
| if (ExecQual(otherqual, econtext, false)) |
| { |
| /* |
| * qualification succeeded. now form the desired projection tuple and |
| * return the slot containing it. |
| */ |
| MJ_printf("ExecMergeJoin: returning outer fill tuple\n"); |
| |
| return ExecProject(node->js.ps.ps_ProjInfo, NULL); |
| } |
| |
| return NULL; |
| } |
| |
| /* |
| * Generate a fake join tuple with nulls for the outer tuple, |
| * and return it if it passes the non-join quals. |
| */ |
| static TupleTableSlot * |
| MJFillInner(MergeJoinState *node) |
| { |
| ExprContext *econtext = node->js.ps.ps_ExprContext; |
| List *otherqual = node->js.ps.qual; |
| |
| ResetExprContext(econtext); |
| |
| /* If we don't have an inner, return NULL */ |
| if(TupIsNull(node->mj_InnerTupleSlot)) |
| return NULL; |
| |
| econtext->ecxt_outertuple = node->mj_NullOuterTupleSlot; |
| econtext->ecxt_innertuple = node->mj_InnerTupleSlot; |
| |
| if (ExecQual(otherqual, econtext, false)) |
| { |
| /* |
| * qualification succeeded. now form the desired projection tuple and |
| * return the slot containing it. |
| */ |
| MJ_printf("ExecMergeJoin: returning inner fill tuple\n"); |
| |
| return ExecProject(node->js.ps.ps_ProjInfo, NULL); |
| } |
| |
| return NULL; |
| } |
| |
| |
| /* ---------------------------------------------------------------- |
| * ExecMergeTupleDump |
| * |
| * This function is called through the MJ_dump() macro |
| * when EXEC_MERGEJOINDEBUG is defined |
| * ---------------------------------------------------------------- |
| */ |
| #ifdef EXEC_MERGEJOINDEBUG |
| |
| static void |
| ExecMergeTupleDumpOuter(MergeJoinState *mergestate) |
| { |
| TupleTableSlot *outerSlot = mergestate->mj_OuterTupleSlot; |
| |
| printf("==== outer tuple ====\n"); |
| if (TupIsNull(outerSlot)) |
| printf("(nil)\n"); |
| else |
| MJ_debugtup(outerSlot); |
| } |
| |
| static void |
| ExecMergeTupleDumpInner(MergeJoinState *mergestate) |
| { |
| TupleTableSlot *innerSlot = mergestate->mj_InnerTupleSlot; |
| |
| printf("==== inner tuple ====\n"); |
| if (TupIsNull(innerSlot)) |
| printf("(nil)\n"); |
| else |
| MJ_debugtup(innerSlot); |
| } |
| |
| static void |
| ExecMergeTupleDumpMarked(MergeJoinState *mergestate) |
| { |
| TupleTableSlot *markedSlot = mergestate->mj_MarkedTupleSlot; |
| |
| printf("==== marked tuple ====\n"); |
| if (TupIsNull(markedSlot)) |
| printf("(nil)\n"); |
| else |
| MJ_debugtup(markedSlot); |
| } |
| |
| static void |
| ExecMergeTupleDump(MergeJoinState *mergestate) |
| { |
| printf("******** ExecMergeTupleDump ********\n"); |
| |
| ExecMergeTupleDumpOuter(mergestate); |
| ExecMergeTupleDumpInner(mergestate); |
| ExecMergeTupleDumpMarked(mergestate); |
| |
| printf("******** \n"); |
| } |
| #endif |
| |
| /* ---------------------------------------------------------------- |
| * ExecMergeJoin |
| * ---------------------------------------------------------------- |
| */ |
| TupleTableSlot * |
| ExecMergeJoin(MergeJoinState *node) |
| { |
| EState *estate; |
| List *joinqual; |
| List *otherqual; |
| bool qualResult; |
| int compareResult; |
| PlanState *innerPlan; |
| TupleTableSlot *innerTupleSlot; |
| PlanState *outerPlan; |
| TupleTableSlot *outerTupleSlot; |
| ExprContext *econtext; |
| bool doFillOuter; |
| bool doFillInner; |
| |
| /* |
| * get information from node |
| */ |
| estate = node->js.ps.state; |
| innerPlan = innerPlanState(node); |
| outerPlan = outerPlanState(node); |
| econtext = node->js.ps.ps_ExprContext; |
| joinqual = node->js.joinqual; |
| otherqual = node->js.ps.qual; |
| doFillOuter = node->mj_FillOuter; |
| doFillInner = node->mj_FillInner; |
| |
| /* |
| * Reset per-tuple memory context to free any expression evaluation |
| * storage allocated in the previous tuple cycle. Note this can't happen |
| * until we're done projecting out tuples from a join tuple. |
| */ |
| ResetExprContext(econtext); |
| |
| /* |
| * MPP-4165: My fix for MPP-3300 was correct in that we avoided |
| * the *deadlock* but had very unexpected (and painful) |
| * performance characteristics: we basically de-pipeline and |
| * de-parallelize execution of any query which has motion below |
| * us. |
| * |
| * So now prefetch_inner is set (see createplan.c) if we have *any* motion |
| * below us. If we don't have any motion, it doesn't matter. |
| */ |
| if (node->prefetch_inner) |
| { |
| innerTupleSlot = ExecProcNode(innerPlan); |
| if (!TupIsNull(innerTupleSlot)) |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| node->mj_InnerTupleSlot = innerTupleSlot; |
| |
| ExecReScan(innerPlan, econtext); |
| ResetExprContext(econtext); |
| |
| node->mj_squelchInner = false; /* we will never need to Squelch the inner, we've fetched it all */ |
| node->prefetch_inner = false; |
| } |
| |
| /* |
| * ok, everything is setup.. let's go to work |
| */ |
| for (;;) |
| { |
| MJ_dump(node); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| |
| /* |
| * get the current state of the join and do things accordingly. |
| */ |
| switch (node->mj_JoinState) |
| { |
| /* |
| * EXEC_MJ_INITIALIZE_OUTER means that this is the first time |
| * ExecMergeJoin() has been called and so we have to fetch the |
| * first matchable tuple for both outer and inner subplans. We |
| * do the outer side in INITIALIZE_OUTER state, then advance |
| * to INITIALIZE_INNER state for the inner subplan. |
| */ |
| case EXEC_MJ_INITIALIZE_OUTER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE_OUTER\n"); |
| |
| outerTupleSlot = ExecProcNode(outerPlan); |
| node->mj_OuterTupleSlot = outerTupleSlot; |
| if (TupIsNull(outerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: nothing in outer subplan\n"); |
| if (doFillInner) |
| { |
| /* |
| * Need to emit right-join tuples for remaining inner |
| * tuples. We set MatchedInner = true to force the |
| * ENDOUTER state to advance inner. |
| */ |
| node->mj_JoinState = EXEC_MJ_ENDOUTER; |
| node->mj_MatchedInner = true; |
| break; |
| } |
| |
| /* |
| * CDB: We'll read no more from inner subtree. To keep our |
| * sibling QEs from being starved, tell source QEs not to |
| * clog up the pipeline with our never-to-be-consumed |
| * data. |
| */ |
| if (node->mj_squelchInner) |
| ExecSquelchNode(innerPlan); |
| |
| /* |
| * The memory used by child nodes might not be freed because |
| * they are not eager free safe. However, when the merge join |
| * is done, we can free the memory used by the child nodes. |
| */ |
| ExecEagerFreeMergeJoin(node); |
| |
| /* Otherwise we're done. */ |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Compute join values and check for unmatchability */ |
| if (MJEvalOuterValues(node)) |
| { |
| /* OK to go get the first inner tuple */ |
| node->mj_JoinState = EXEC_MJ_INITIALIZE_INNER; |
| } |
| else |
| { |
| /* Stay in same state to fetch next outer tuple */ |
| if (doFillOuter) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the inner |
| * tuple, and return it if it passes the non-join |
| * quals. |
| */ |
| TupleTableSlot *result; |
| |
| result = MJFillOuter(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| } |
| break; |
| |
| case EXEC_MJ_INITIALIZE_INNER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_INITIALIZE_INNER\n"); |
| |
| innerTupleSlot = ExecProcNode(innerPlan); |
| node->mj_InnerTupleSlot = innerTupleSlot; |
| if (TupIsNull(innerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: nothing in inner subplan\n"); |
| if (doFillOuter) |
| { |
| /* |
| * Need to emit left-join tuples for all outer tuples, |
| * including the one we just fetched. We set |
| * MatchedOuter = false to force the ENDINNER state to |
| * emit first tuple before advancing outer. |
| */ |
| node->mj_JoinState = EXEC_MJ_ENDINNER; |
| node->mj_MatchedOuter = false; |
| break; |
| } |
| |
| /* |
| * CDB: We'll read no more from outer subtree. To keep our |
| * sibling QEs from being starved, tell source QEs not to |
| * clog up the pipeline with our never-to-be-consumed |
| * data. |
| */ |
| ExecSquelchNode(outerPlan); |
| |
| ExecEagerFreeMergeJoin(node); |
| |
| /* Otherwise we're done. */ |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Compute join values and check for unmatchability */ |
| if (MJEvalInnerValues(node, innerTupleSlot)) |
| { |
| /* |
| * OK, we have the initial tuples. Begin by skipping |
| * non-matching tuples. |
| */ |
| node->mj_JoinState = EXEC_MJ_SKIP_TEST; |
| } |
| else |
| { |
| /* Stay in same state to fetch next inner tuple */ |
| if (doFillInner) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the outer |
| * tuple, and return it if it passes the non-join |
| * quals. |
| */ |
| TupleTableSlot *result; |
| |
| result = MJFillInner(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| } |
| break; |
| |
| /* |
| * EXEC_MJ_JOINTUPLES means we have two tuples which satisfied |
| * the merge clause so we join them and then proceed to get |
| * the next inner tuple (EXEC_MJ_NEXTINNER). |
| */ |
| case EXEC_MJ_JOINTUPLES: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_JOINTUPLES\n"); |
| |
| /* |
| * Set the next state machine state. The right things will |
| * happen whether we return this join tuple or just fall |
| * through to continue the state machine execution. |
| */ |
| node->mj_JoinState = EXEC_MJ_NEXTINNER; |
| |
| /* |
| * Check the extra qual conditions to see if we actually want |
| * to return this join tuple. If not, can proceed with merge. |
| * We must distinguish the additional joinquals (which must |
| * pass to consider the tuples "matched" for outer-join logic) |
| * from the otherquals (which must pass before we actually |
| * return the tuple). |
| * |
| * We don't bother with a ResetExprContext here, on the |
| * assumption that we just did one while checking the merge |
| * qual. One per tuple should be sufficient. We do have to |
| * set up the econtext links to the tuples for ExecQual to |
| * use. |
| */ |
| outerTupleSlot = node->mj_OuterTupleSlot; |
| econtext->ecxt_outertuple = outerTupleSlot; |
| innerTupleSlot = node->mj_InnerTupleSlot; |
| econtext->ecxt_innertuple = innerTupleSlot; |
| |
| if (node->js.jointype == JOIN_IN && |
| node->mj_MatchedOuter) |
| qualResult = false; |
| else |
| { |
| qualResult = (joinqual == NIL || |
| ExecQual(joinqual, econtext, false)); |
| MJ_DEBUG_QUAL(joinqual, qualResult); |
| } |
| |
| if (qualResult) |
| { |
| node->mj_MatchedOuter = true; |
| node->mj_MatchedInner = true; |
| |
| /* In an antijoin, we never return a matched tuple */ |
| if (node->js.jointype == JOIN_LASJ) |
| { |
| node->mj_JoinState = EXEC_MJ_NEXTOUTER; |
| break; |
| } |
| qualResult = (otherqual == NIL || |
| ExecQual(otherqual, econtext, false)); |
| MJ_DEBUG_QUAL(otherqual, qualResult); |
| |
| if (qualResult) |
| { |
| /* |
| * qualification succeeded. now form the desired |
| * projection tuple and return the slot containing it. |
| */ |
| MJ_printf("ExecMergeJoin: returning tuple\n"); |
| |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return ExecProject(node->js.ps.ps_ProjInfo, NULL); |
| } |
| } |
| break; |
| |
| /* |
| * EXEC_MJ_NEXTINNER means advance the inner scan to the next |
| * tuple. If the tuple is not nil, we then proceed to test it |
| * against the join qualification. |
| * |
| * Before advancing, we check to see if we must emit an |
| * outer-join fill tuple for this inner tuple. |
| */ |
| case EXEC_MJ_NEXTINNER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_NEXTINNER\n"); |
| |
| if (doFillInner && !node->mj_MatchedInner) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the outer |
| * tuple, and return it if it passes the non-join quals. |
| */ |
| TupleTableSlot *result; |
| |
| node->mj_MatchedInner = true; /* do it only once */ |
| |
| result = MJFillInner(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| |
| /* |
| * now we get the next inner tuple, if any. If there's none, |
| * advance to next outer tuple (which may be able to join to |
| * previously marked tuples). |
| */ |
| innerTupleSlot = ExecProcNode(innerPlan); |
| node->mj_InnerTupleSlot = innerTupleSlot; |
| MJ_DEBUG_PROC_NODE(innerTupleSlot); |
| node->mj_MatchedInner = false; |
| |
| if (TupIsNull(innerTupleSlot)) |
| { |
| node->mj_JoinState = EXEC_MJ_NEXTOUTER; |
| |
| if (((MergeJoin*)node->js.ps.plan)->unique_outer) |
| { |
| ExecEagerFreeMergeJoin(node); |
| |
| /* we are done */ |
| return NULL; |
| } |
| |
| break; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* |
| * Load up the new inner tuple's comparison values. If we see |
| * that it contains a NULL and hence can't match any outer |
| * tuple, we can skip the comparison and assume the new tuple |
| * is greater than current outer. |
| */ |
| if (!MJEvalInnerValues(node, innerTupleSlot)) |
| { |
| node->mj_JoinState = EXEC_MJ_NEXTOUTER; |
| break; |
| } |
| |
| /* |
| * Test the new inner tuple to see if it matches outer. |
| * |
| * If they do match, then we join them and move on to the next |
| * inner tuple (EXEC_MJ_JOINTUPLES). |
| * |
| * If they do not match then advance to next outer tuple. |
| */ |
| compareResult = MJCompare(node); |
| MJ_DEBUG_COMPARE(compareResult); |
| |
| if (compareResult == 0) |
| node->mj_JoinState = EXEC_MJ_JOINTUPLES; |
| else |
| { |
| Assert(compareResult < 0 || ((MergeJoin*)node->js.ps.plan)->unique_outer); |
| node->mj_JoinState = EXEC_MJ_NEXTOUTER; |
| } |
| break; |
| |
| /*------------------------------------------- |
| * EXEC_MJ_NEXTOUTER means |
| * |
| * outer inner |
| * outer tuple - 5 5 - marked tuple |
| * 5 5 |
| * 6 6 - inner tuple |
| * 7 7 |
| * |
| * we know we just bumped into the |
| * first inner tuple > current outer tuple (or possibly |
| * the end of the inner stream) |
| * so get a new outer tuple and then |
| * proceed to test it against the marked tuple |
| * (EXEC_MJ_TESTOUTER) |
| * |
| * Before advancing, we check to see if we must emit an |
| * outer-join fill tuple for this outer tuple. |
| *------------------------------------------------ |
| */ |
| case EXEC_MJ_NEXTOUTER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_NEXTOUTER\n"); |
| |
| if (doFillOuter && !node->mj_MatchedOuter) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the inner |
| * tuple, and return it if it passes the non-join quals. |
| */ |
| TupleTableSlot *result; |
| |
| node->mj_MatchedOuter = true; /* do it only once */ |
| |
| result = MJFillOuter(node); |
| if (result) |
| return result; |
| } |
| |
| /* |
| * now we get the next outer tuple, if any |
| */ |
| outerTupleSlot = ExecProcNode(outerPlan); |
| node->mj_OuterTupleSlot = outerTupleSlot; |
| MJ_DEBUG_PROC_NODE(outerTupleSlot); |
| node->mj_MatchedOuter = false; |
| |
| /* |
| * if the outer tuple is null then we are done with the join, |
| * unless we have inner tuples we need to null-fill. |
| */ |
| if (TupIsNull(outerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: end of outer subplan\n"); |
| innerTupleSlot = node->mj_InnerTupleSlot; |
| if (doFillInner && !TupIsNull(innerTupleSlot)) |
| { |
| /* |
| * Need to emit right-join tuples for remaining inner |
| * tuples. |
| */ |
| node->mj_JoinState = EXEC_MJ_ENDOUTER; |
| break; |
| } |
| |
| /* |
| * CDB: We'll read no more from inner subtree. To keep our |
| * sibling QEs from being starved, tell source QEs not to |
| * clog up the pipeline with our never-to-be-consumed |
| * data. |
| */ |
| if (!TupIsNull(innerTupleSlot) && node->mj_squelchInner) |
| ExecSquelchNode(innerPlan); |
| |
| ExecEagerFreeMergeJoin(node); |
| |
| /* Otherwise we're done. */ |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Compute join values and check for unmatchability */ |
| if (MJEvalOuterValues(node)) |
| { |
| if (((MergeJoin*)node->js.ps.plan)->unique_outer) |
| /* The current innerTuple will match with this outerTuple.*/ |
| node->mj_JoinState = EXEC_MJ_JOINTUPLES; |
| else |
| /* Go test the new tuple against the marked tuple */ |
| node->mj_JoinState = EXEC_MJ_TESTOUTER; |
| } |
| else |
| { |
| /* Can't match, so fetch next outer tuple */ |
| node->mj_JoinState = EXEC_MJ_NEXTOUTER; |
| } |
| break; |
| |
| /*-------------------------------------------------------- |
| * EXEC_MJ_TESTOUTER If the new outer tuple and the marked |
| * tuple satisfy the merge clause then we know we have |
| * duplicates in the outer scan so we have to restore the |
| * inner scan to the marked tuple and proceed to join the |
| * new outer tuple with the inner tuples. |
| * |
| * This is the case when |
| * outer inner |
| * 4 5 - marked tuple |
| * outer tuple - 5 5 |
| * new outer tuple - 5 5 |
| * 6 8 - inner tuple |
| * 7 12 |
| * |
| * new outer tuple == marked tuple |
| * |
| * If the outer tuple fails the test, then we are done |
| * with the marked tuples, and we have to look for a |
| * match to the current inner tuple. So we will |
| * proceed to skip outer tuples until outer >= inner |
| * (EXEC_MJ_SKIP_TEST). |
| * |
| * This is the case when |
| * |
| * outer inner |
| * 5 5 - marked tuple |
| * outer tuple - 5 5 |
| * new outer tuple - 6 8 - inner tuple |
| * 7 12 |
| * |
| * new outer tuple > marked tuple |
| * |
| *--------------------------------------------------------- |
| */ |
| case EXEC_MJ_TESTOUTER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_TESTOUTER\n"); |
| |
| /* |
| * Here we must compare the outer tuple with the marked inner |
| * tuple. (We can ignore the result of MJEvalInnerValues, |
| * since the marked inner tuple is certainly matchable.) |
| */ |
| innerTupleSlot = node->mj_MarkedTupleSlot; |
| (void) MJEvalInnerValues(node, innerTupleSlot); |
| |
| compareResult = MJCompare(node); |
| MJ_DEBUG_COMPARE(compareResult); |
| |
| if (compareResult == 0) |
| { |
| /* |
| * the merge clause matched so now we restore the inner |
| * scan position to the first mark, and go join that tuple |
| * (and any following ones) to the new outer. |
| * |
| * NOTE: we do not need to worry about the MatchedInner |
| * state for the rescanned inner tuples. We know all of |
| * them will match this new outer tuple and therefore |
| * won't be emitted as fill tuples. This works *only* |
| * because we require the extra joinquals to be nil when |
| * doing a right or full join --- otherwise some of the |
| * rescanned tuples might fail the extra joinquals. |
| */ |
| ExecRestrPos(innerPlan); |
| |
| /* |
| * ExecRestrPos probably should give us back a new Slot, |
| * but since it doesn't, use the marked slot. (The |
| * previously returned mj_InnerTupleSlot cannot be assumed |
| * to hold the required tuple.) |
| */ |
| node->mj_InnerTupleSlot = innerTupleSlot; |
| /* we need not do MJEvalInnerValues again */ |
| |
| node->mj_JoinState = EXEC_MJ_JOINTUPLES; |
| } |
| else |
| { |
| /* ---------------- |
| * if the new outer tuple didn't match the marked inner |
| * tuple then we have a case like: |
| * |
| * outer inner |
| * 4 4 - marked tuple |
| * new outer - 5 4 |
| * 6 5 - inner tuple |
| * 7 |
| * |
| * which means that all subsequent outer tuples will be |
| * larger than our marked inner tuples. So we need not |
| * revisit any of the marked tuples but can proceed to |
| * look for a match to the current inner. If there's |
| * no more inners, we are done. |
| * ---------------- |
| */ |
| if (compareResult <= 0 && !((MergeJoin*)node->js.ps.plan)->unique_outer) |
| insist_log(false, "Mergejoin: compareResult > 0, bad plan ?"); |
| innerTupleSlot = node->mj_InnerTupleSlot; |
| if (TupIsNull(innerTupleSlot)) |
| { |
| if (doFillOuter) |
| { |
| /* |
| * Need to emit left-join tuples for remaining |
| * outer tuples. |
| */ |
| node->mj_JoinState = EXEC_MJ_ENDINNER; |
| break; |
| } |
| |
| /* |
| * CDB: We'll read no more from outer subtree. To keep |
| * our sibling QEs from being starved, tell source QEs |
| * not to clog up the pipeline with our |
| * never-to-be-consumed data. |
| */ |
| ExecSquelchNode(outerPlan); |
| |
| ExecEagerFreeMergeJoin(node); |
| |
| /* Otherwise we're done. */ |
| return NULL; |
| } |
| |
| /* reload comparison data for current inner */ |
| if (MJEvalInnerValues(node, innerTupleSlot)) |
| { |
| /* proceed to compare it to the current outer */ |
| node->mj_JoinState = EXEC_MJ_SKIP_TEST; |
| } |
| else |
| { |
| /* |
| * current inner can't possibly match any outer; |
| * better to advance the inner scan than the outer. |
| */ |
| node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; |
| } |
| } |
| break; |
| |
| /*---------------------------------------------------------- |
| * EXEC_MJ_SKIP means compare tuples and if they do not |
| * match, skip whichever is lesser. |
| * |
| * For example: |
| * |
| * outer inner |
| * 5 5 |
| * 5 5 |
| * outer tuple - 6 8 - inner tuple |
| * 7 12 |
| * 8 14 |
| * |
| * we have to advance the outer scan |
| * until we find the outer 8. |
| * |
| * On the other hand: |
| * |
| * outer inner |
| * 5 5 |
| * 5 5 |
| * outer tuple - 12 8 - inner tuple |
| * 14 10 |
| * 17 12 |
| * |
| * we have to advance the inner scan |
| * until we find the inner 12. |
| *---------------------------------------------------------- |
| */ |
| case EXEC_MJ_SKIP_TEST: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_SKIP_TEST\n"); |
| |
| /* |
| * before we advance, make sure the current tuples do not |
| * satisfy the mergeclauses. If they do, then we update the |
| * marked tuple position and go join them. |
| */ |
| compareResult = MJCompare(node); |
| MJ_DEBUG_COMPARE(compareResult); |
| |
| if (compareResult == 0) |
| { |
| if (!((MergeJoin*)node->js.ps.plan)->unique_outer) |
| { |
| ExecMarkPos(innerPlan); |
| |
| MarkInnerTuple(node->mj_InnerTupleSlot, node); |
| } |
| |
| node->mj_JoinState = EXEC_MJ_JOINTUPLES; |
| } |
| else if (compareResult < 0) |
| node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; |
| else |
| /* compareResult > 0 */ |
| node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; |
| break; |
| |
| /* |
| * Before advancing, we check to see if we must emit an |
| * outer-join fill tuple for this outer tuple. |
| */ |
| case EXEC_MJ_SKIPOUTER_ADVANCE: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_SKIPOUTER_ADVANCE\n"); |
| |
| if (doFillOuter && !node->mj_MatchedOuter) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the inner |
| * tuple, and return it if it passes the non-join quals. |
| */ |
| TupleTableSlot *result; |
| |
| node->mj_MatchedOuter = true; /* do it only once */ |
| |
| result = MJFillOuter(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| |
| /* |
| * now we get the next outer tuple, if any |
| */ |
| outerTupleSlot = ExecProcNode(outerPlan); |
| node->mj_OuterTupleSlot = outerTupleSlot; |
| MJ_DEBUG_PROC_NODE(outerTupleSlot); |
| node->mj_MatchedOuter = false; |
| |
| /* |
| * if the outer tuple is null then we are done with the join, |
| * unless we have inner tuples we need to null-fill. |
| */ |
| if (TupIsNull(outerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: end of outer subplan\n"); |
| innerTupleSlot = node->mj_InnerTupleSlot; |
| if (doFillInner && !TupIsNull(innerTupleSlot)) |
| { |
| /* |
| * Need to emit right-join tuples for remaining inner |
| * tuples. |
| */ |
| node->mj_JoinState = EXEC_MJ_ENDOUTER; |
| break; |
| } |
| |
| /* |
| * CDB: We'll read no more from inner subtree. To keep our |
| * sibling QEs from being starved, tell source QEs not to |
| * clog up the pipeline with our never-to-be-consumed |
| * data. |
| */ |
| if (!TupIsNull(innerTupleSlot) && node->mj_squelchInner) |
| ExecSquelchNode(innerPlan); |
| |
| ExecEagerFreeMergeJoin(node); |
| |
| /* Otherwise we're done. */ |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Compute join values and check for unmatchability */ |
| if (MJEvalOuterValues(node)) |
| { |
| /* Go test the new tuple against the current inner */ |
| node->mj_JoinState = EXEC_MJ_SKIP_TEST; |
| } |
| else |
| { |
| /* Can't match, so fetch next outer tuple */ |
| node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; |
| } |
| break; |
| |
| /* |
| * Before advancing, we check to see if we must emit an |
| * outer-join fill tuple for this inner tuple. |
| */ |
| case EXEC_MJ_SKIPINNER_ADVANCE: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_SKIPINNER_ADVANCE\n"); |
| |
| if (doFillInner && !node->mj_MatchedInner) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the outer |
| * tuple, and return it if it passes the non-join quals. |
| */ |
| TupleTableSlot *result; |
| |
| node->mj_MatchedInner = true; /* do it only once */ |
| |
| result = MJFillInner(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| |
| /* |
| * now we get the next inner tuple, if any |
| */ |
| innerTupleSlot = ExecProcNode(innerPlan); |
| node->mj_InnerTupleSlot = innerTupleSlot; |
| MJ_DEBUG_PROC_NODE(innerTupleSlot); |
| node->mj_MatchedInner = false; |
| |
| /* |
| * if the inner tuple is null then we are done with the join, |
| * unless we have outer tuples we need to null-fill. |
| */ |
| if (TupIsNull(innerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: end of inner subplan\n"); |
| outerTupleSlot = node->mj_OuterTupleSlot; |
| if (doFillOuter && !TupIsNull(outerTupleSlot)) |
| { |
| /* |
| * Need to emit left-join tuples for remaining outer |
| * tuples. |
| */ |
| node->mj_JoinState = EXEC_MJ_ENDINNER; |
| break; |
| } |
| |
| /* |
| * CDB: We'll read no more from outer subtree. To keep our |
| * sibling QEs from being starved, tell source QEs not to |
| * clog up the pipeline with our never-to-be-consumed |
| * data. |
| */ |
| if (!TupIsNull(outerTupleSlot)) |
| ExecSquelchNode(outerPlan); |
| |
| ExecEagerFreeMergeJoin(node); |
| |
| /* Otherwise we're done. */ |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Compute join values and check for unmatchability */ |
| if (MJEvalInnerValues(node, innerTupleSlot)) |
| { |
| /* proceed to compare it to the current outer */ |
| node->mj_JoinState = EXEC_MJ_SKIP_TEST; |
| } |
| else |
| { |
| /* |
| * current inner can't possibly match any outer; better to |
| * advance the inner scan than the outer. |
| */ |
| node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; |
| } |
| break; |
| |
| /* |
| * EXEC_MJ_ENDOUTER means we have run out of outer tuples, but |
| * are doing a right/full join and therefore must null-fill |
| * any remaing unmatched inner tuples. |
| */ |
| case EXEC_MJ_ENDOUTER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_ENDOUTER\n"); |
| |
| Assert(doFillInner); |
| |
| if (!node->mj_MatchedInner) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the outer |
| * tuple, and return it if it passes the non-join quals. |
| */ |
| TupleTableSlot *result; |
| |
| node->mj_MatchedInner = true; /* do it only once */ |
| |
| result = MJFillInner(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| |
| /* |
| * now we get the next inner tuple, if any |
| */ |
| innerTupleSlot = ExecProcNode(innerPlan); |
| node->mj_InnerTupleSlot = innerTupleSlot; |
| MJ_DEBUG_PROC_NODE(innerTupleSlot); |
| node->mj_MatchedInner = false; |
| |
| if (TupIsNull(innerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: end of inner subplan\n"); |
| ExecEagerFreeMergeJoin(node); |
| |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_INNERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Else remain in ENDOUTER state and process next tuple. */ |
| break; |
| |
| /* |
| * EXEC_MJ_ENDINNER means we have run out of inner tuples, but |
| * are doing a left/full join and therefore must null- fill |
| * any remaing unmatched outer tuples. |
| */ |
| case EXEC_MJ_ENDINNER: |
| MJ_printf("ExecMergeJoin: EXEC_MJ_ENDINNER\n"); |
| |
| Assert(doFillOuter); |
| |
| if (!node->mj_MatchedOuter) |
| { |
| /* |
| * Generate a fake join tuple with nulls for the inner |
| * tuple, and return it if it passes the non-join quals. |
| */ |
| TupleTableSlot *result; |
| |
| node->mj_MatchedOuter = true; /* do it only once */ |
| |
| result = MJFillOuter(node); |
| if (result) |
| { |
| Gpmon_M_Incr_Rows_Out(GpmonPktFromMergeJoinState(node)); |
| CheckSendPlanStateGpmonPkt(&node->js.ps); |
| return result; |
| } |
| } |
| |
| /* |
| * now we get the next outer tuple, if any |
| */ |
| outerTupleSlot = ExecProcNode(outerPlan); |
| node->mj_OuterTupleSlot = outerTupleSlot; |
| MJ_DEBUG_PROC_NODE(outerTupleSlot); |
| node->mj_MatchedOuter = false; |
| |
| if (TupIsNull(outerTupleSlot)) |
| { |
| MJ_printf("ExecMergeJoin: end of outer subplan\n"); |
| |
| ExecEagerFreeMergeJoin(node); |
| |
| return NULL; |
| } |
| else |
| { |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_MERGEJOIN_OUTERTUPLE); |
| Gpmon_M_Incr(GpmonPktFromMergeJoinState(node), GPMON_QEXEC_M_ROWSIN); |
| } |
| |
| /* Else remain in ENDINNER state and process next tuple. */ |
| break; |
| |
| /* |
| * broken state value? |
| */ |
| default: |
| insist_log(false, "unrecognized mergejoin state: %d", |
| (int) node->mj_JoinState); |
| } |
| } |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecInitMergeJoin |
| * ---------------------------------------------------------------- |
| */ |
| MergeJoinState * |
| ExecInitMergeJoin(MergeJoin *node, EState *estate, int eflags) |
| { |
| MergeJoinState *mergestate; |
| int markflag; |
| |
| /* check for unsupported flags */ |
| Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); |
| |
| MJ1_printf("ExecInitMergeJoin: %s\n", |
| "initializing node"); |
| |
| /* |
| * create state structure |
| */ |
| mergestate = makeNode(MergeJoinState); |
| mergestate->js.ps.plan = (Plan *) node; |
| mergestate->js.ps.state = estate; |
| |
| /* |
| * Miscellaneous initialization |
| * |
| * create expression context for node |
| */ |
| ExecAssignExprContext(estate, &mergestate->js.ps); |
| |
| /* |
| * we need two additional econtexts in which we can compute the join |
| * expressions from the left and right input tuples. The node's regular |
| * econtext won't do because it gets reset too often. |
| */ |
| mergestate->mj_OuterEContext = CreateExprContext(estate); |
| mergestate->mj_InnerEContext = CreateExprContext(estate); |
| |
| /* |
| * initialize child expressions |
| */ |
| mergestate->js.ps.targetlist = (List *) |
| ExecInitExpr((Expr *) node->join.plan.targetlist, |
| (PlanState *) mergestate); |
| mergestate->js.ps.qual = (List *) |
| ExecInitExpr((Expr *) node->join.plan.qual, |
| (PlanState *) mergestate); |
| mergestate->js.jointype = node->join.jointype; |
| mergestate->js.joinqual = (List *) |
| ExecInitExpr((Expr *) node->join.joinqual, |
| (PlanState *) mergestate); |
| |
| mergestate->prefetch_inner = node->join.prefetch_inner; |
| mergestate->mj_squelchInner = true; |
| |
| /* mergeclauses are handled below */ |
| |
| /* |
| * initialize child nodes |
| * |
| * inner child must support MARK/RESTORE unless the outer plan is |
| * known to have no duplicate merge keys. |
| */ |
| markflag = node->unique_outer ? 0 : EXEC_FLAG_MARK; |
| outerPlanState(mergestate) = ExecInitNode(outerPlan(node), estate, eflags); |
| innerPlanState(mergestate) = ExecInitNode(innerPlan(node), estate, |
| eflags | markflag); |
| |
| #define MERGEJOIN_NSLOTS 4 |
| |
| /* |
| * tuple table initialization |
| */ |
| ExecInitResultTupleSlot(estate, &mergestate->js.ps); |
| |
| mergestate->mj_MarkedTupleSlot = ExecInitExtraTupleSlot(estate); |
| ExecSetSlotDescriptor(mergestate->mj_MarkedTupleSlot, |
| ExecGetResultType(innerPlanState(mergestate))); |
| |
| switch (node->join.jointype) |
| { |
| case JOIN_INNER: |
| case JOIN_IN: |
| mergestate->mj_FillOuter = false; |
| mergestate->mj_FillInner = false; |
| break; |
| case JOIN_LEFT: |
| case JOIN_LASJ: |
| mergestate->mj_FillOuter = true; |
| mergestate->mj_FillInner = false; |
| mergestate->mj_NullInnerTupleSlot = |
| ExecInitNullTupleSlot(estate, |
| ExecGetResultType(innerPlanState(mergestate))); |
| break; |
| case JOIN_RIGHT: |
| mergestate->mj_FillOuter = false; |
| mergestate->mj_FillInner = true; |
| mergestate->mj_NullOuterTupleSlot = |
| ExecInitNullTupleSlot(estate, |
| ExecGetResultType(outerPlanState(mergestate))); |
| |
| /* |
| * Can't handle right or full join with non-nil extra joinclauses. |
| * This should have been caught by planner. |
| */ |
| if (node->join.joinqual != NIL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("RIGHT JOIN is only supported with merge-joinable join conditions"))); |
| break; |
| case JOIN_FULL: |
| mergestate->mj_FillOuter = true; |
| mergestate->mj_FillInner = true; |
| mergestate->mj_NullOuterTupleSlot = |
| ExecInitNullTupleSlot(estate, |
| ExecGetResultType(outerPlanState(mergestate))); |
| mergestate->mj_NullInnerTupleSlot = |
| ExecInitNullTupleSlot(estate, |
| ExecGetResultType(innerPlanState(mergestate))); |
| |
| /* |
| * Can't handle right or full join with non-nil extra joinclauses. |
| */ |
| if (node->join.joinqual != NIL) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("FULL JOIN is only supported with merge-joinable join conditions"))); |
| break; |
| case JOIN_LASJ_NOTIN: |
| insist_log(false, "join type not supported"); |
| break; |
| default: |
| insist_log(false, "unrecognized join type: %d", |
| (int) node->join.jointype); |
| } |
| |
| /* |
| * initialize tuple type and projection info |
| */ |
| ExecAssignResultTypeFromTL(&mergestate->js.ps); |
| ExecAssignProjectionInfo(&mergestate->js.ps, NULL); |
| |
| /* |
| * preprocess the merge clauses |
| */ |
| mergestate->mj_NumClauses = list_length(node->mergeclauses); |
| mergestate->mj_Clauses = MJExamineQuals(node->mergeclauses, |
| (PlanState *) mergestate); |
| |
| /* |
| * initialize join state |
| */ |
| mergestate->mj_JoinState = EXEC_MJ_INITIALIZE_OUTER; |
| mergestate->mj_MatchedOuter = false; |
| mergestate->mj_MatchedInner = false; |
| mergestate->mj_OuterTupleSlot = NULL; |
| mergestate->mj_InnerTupleSlot = NULL; |
| |
| /* |
| * initialization successful |
| */ |
| MJ1_printf("ExecInitMergeJoin: %s\n", |
| "node initialized"); |
| |
| initGpmonPktForMergeJoin((Plan *)node, &mergestate->js.ps.gpmon_pkt, estate); |
| |
| return mergestate; |
| } |
| |
| int |
| ExecCountSlotsMergeJoin(MergeJoin *node) |
| { |
| return ExecCountSlotsNode(outerPlan((Plan *) node)) + |
| ExecCountSlotsNode(innerPlan((Plan *) node)) + |
| MERGEJOIN_NSLOTS; |
| } |
| |
| /* ---------------------------------------------------------------- |
| * ExecEndMergeJoin |
| * |
| * old comments |
| * frees storage allocated through C routines. |
| * ---------------------------------------------------------------- |
| */ |
| void |
| ExecEndMergeJoin(MergeJoinState *node) |
| { |
| MJ1_printf("ExecEndMergeJoin: %s\n", |
| "ending node processing"); |
| |
| /* |
| * Free the exprcontext |
| */ |
| ExecFreeExprContext(&node->js.ps); |
| |
| /* |
| * clean out the tuple table |
| */ |
| ExecClearTuple(node->js.ps.ps_ResultTupleSlot); |
| ExecClearTuple(node->mj_MarkedTupleSlot); |
| |
| /* |
| * shut down the subplans |
| */ |
| ExecEndNode(innerPlanState(node)); |
| ExecEndNode(outerPlanState(node)); |
| |
| MJ1_printf("ExecEndMergeJoin: %s\n", |
| "node processing ended"); |
| |
| EndPlanStateGpmonPkt(&node->js.ps); |
| } |
| |
| void |
| ExecReScanMergeJoin(MergeJoinState *node, ExprContext *exprCtxt) |
| { |
| ExecClearTuple(node->mj_MarkedTupleSlot); |
| |
| node->mj_JoinState = EXEC_MJ_INITIALIZE_OUTER; |
| node->mj_MatchedOuter = false; |
| node->mj_MatchedInner = false; |
| node->mj_OuterTupleSlot = NULL; |
| node->mj_InnerTupleSlot = NULL; |
| |
| /* |
| * if chgParam of subnodes is not null then plans will be re-scanned by |
| * first ExecProcNode. |
| */ |
| if (((PlanState *) node)->lefttree->chgParam == NULL) |
| ExecReScan(((PlanState *) node)->lefttree, exprCtxt); |
| if (((PlanState *) node)->righttree->chgParam == NULL) |
| ExecReScan(((PlanState *) node)->righttree, exprCtxt); |
| |
| } |
| void |
| initGpmonPktForMergeJoin(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate) |
| { |
| Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, MergeJoin)); |
| |
| { |
| PerfmonNodeType type = PMNT_Invalid; |
| |
| switch(((MergeJoin *)planNode)->join.jointype) |
| { |
| case JOIN_INNER: |
| type = PMNT_MergeJoin; |
| break; |
| case JOIN_LEFT: |
| type = PMNT_MergeLeftJoin; |
| break; |
| case JOIN_LASJ: |
| type = PMNT_MergeLeftAntiSemiJoin; |
| break; |
| case JOIN_FULL: |
| type = PMNT_MergeFullJoin; |
| break; |
| case JOIN_RIGHT: |
| type = PMNT_MergeRightJoin; |
| break; |
| case JOIN_IN: |
| type = PMNT_MergeExistsJoin; |
| break; |
| case JOIN_REVERSE_IN: |
| type = PMNT_MergeReverseInJoin; |
| break; |
| case JOIN_UNIQUE_OUTER: |
| type = PMNT_MergeUniqueOuterJoin; |
| break; |
| case JOIN_UNIQUE_INNER: |
| type = PMNT_MergeUniqueInnerJoin; |
| break; |
| case JOIN_LASJ_NOTIN: |
| insist_log(false, "Join type not supported"); |
| break; |
| } |
| |
| Assert(type != PMNT_Invalid); |
| Assert(GPMON_MERGEJOIN_TOTAL <= (int) GPMON_QEXEC_M_COUNT); |
| InitPlanNodeGpmonPkt(planNode, gpmon_pkt, estate, type, |
| (int64)planNode->plan_rows, |
| NULL); |
| } |
| } |
| |
| void |
| ExecEagerFreeMergeJoin(MergeJoinState *node) |
| { |
| /* |
| * Since MergeJoin might call Mark/restore on its child nodes, its child nodes |
| * are not eager free safe. We will free their memory here. |
| */ |
| ExecEagerFreeChildNodes((PlanState *)node, false); |
| } |