blob: 29200bf39e30e3d317008ddaad72406862b810e4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*-------------------------------------------------------------------------
*
* pathnode.c
* Routines to manipulate pathlists and create path nodes
*
* Portions Copyright (c) 2005-2008, Greenplum inc
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.133 2006/10/04 00:29:55 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "catalog/pg_operator.h"
#include "catalog/catquery.h"
#include "executor/executor.h"
#include "miscadmin.h"
#include "optimizer/clauses.h" /* contain_mutable_functions() */
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
#include "optimizer/tlist.h"
#include "parser/parse_expr.h"
#include "parser/parse_oper.h"
#include "parser/parsetree.h"
#include "utils/memutils.h"
#include "utils/selfuncs.h"
#include "utils/syscache.h"
#include "utils/hawq_type_mapping.h"
#include "cdb/cdbpath.h" /* cdb_join_motion() etc */
static void cdb_set_cheapest_dedup(PlannerInfo *root, RelOptInfo *rel);
static bool cdb_is_path_deduped(Path *path);
static bool cdb_subpath_tried_postjoin_dedup(Path *path, Relids subqrelids);
static UniquePath *create_limit1_path(PlannerInfo *root, Path *subpath);
static UniquePath *make_limit1_path(Path *subpath);
static UniquePath *make_unique_path(Path *subpath);
static List *translate_sub_tlist(List *tlist, int relid);
static bool query_is_distinct_for(Query *query, List *colnos);
static bool hash_safe_tlist(List *tlist);
#define FLATCOPY(newnode, node, nodetype) \
( (newnode) = makeNode(nodetype), \
memcpy((newnode), (node), sizeof(nodetype)) )
/*
* pathnode_copy_node
* Returns a flat copy of the given Path node.
*/
Path *
pathnode_copy_node(const Path *s)
{
MemoryContext oldcontext;
void *t;
if (!s)
return NULL;
/* Allocate in same context as parent rel for GEQO safety. */
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(s->parent));
switch (s->type)
{
case T_Path:
FLATCOPY(t, s, Path);
break;
case T_IndexPath:
FLATCOPY(t, s, IndexPath);
break;
case T_BitmapHeapPath:
FLATCOPY(t, s, BitmapHeapPath);
break;
case T_BitmapAppendOnlyPath:
FLATCOPY(t, s, BitmapAppendOnlyPath);
break;
case T_BitmapAndPath:
FLATCOPY(t, s, BitmapAndPath);
break;
case T_BitmapOrPath:
FLATCOPY(t, s, BitmapOrPath);
break;
case T_TidPath:
FLATCOPY(t, s, TidPath);
break;
case T_AppendPath:
FLATCOPY(t, s, AppendPath);
break;
case T_ResultPath:
FLATCOPY(t, s, ResultPath);
break;
case T_MaterialPath:
FLATCOPY(t, s, MaterialPath);
break;
case T_UniquePath:
FLATCOPY(t, s, UniquePath);
break;
case T_NestPath:
FLATCOPY(t, s, NestPath);
break;
case T_MergePath:
FLATCOPY(t, s, MergePath);
break;
case T_HashPath:
FLATCOPY(t, s, HashPath);
break;
case T_CdbMotionPath:
FLATCOPY(t, s, CdbMotionPath);
break;
default:
t = NULL; /* keep compiler quiet */
elog(ERROR, "unrecognized pathnode: %d", (int)s->type);
}
MemoryContextSwitchTo(oldcontext);
return (Path *)t;
} /* pathnode_copy_node */
/*
* pathnode_walk_node
* Calls a 'walker' function for the given Path node; or returns
* CdbVisit_Walk if 'path' is NULL.
*
* If 'walker' returns CdbVisit_Walk, then this function calls
* pathnode_walk_kids() to visit the node's children, and returns
* the result.
*
* If 'walker' returns CdbVisit_Skip, then this function immediately
* returns CdbVisit_Walk and does not visit the node's children.
*
* If 'walker' returns CdbVisit_Stop or another value, then this function
* immediately returns that value and does not visit the node's children.
*
* pathnode_walk_list
* Calls pathnode_walk_node() for each Path node in the given List.
*
* Quits if the result of pathnode_walk_node() is CdbVisit_Stop or another
* value other than CdbVisit_Walk, and returns that result without visiting
* any more nodes.
*
* Returns CdbVisit_Walk if all of the subtrees return CdbVisit_Walk, or
* if the list is empty.
*
* Note that this function never returns CdbVisit_Skip to its caller.
* Only the 'walker' can return CdbVisit_Skip.
*
* pathnode_walk_kids
* Calls pathnode_walk_node() for each child of the given Path node.
*
* Quits if the result of pathnode_walk_node() is CdbVisit_Stop or another
* value other than CdbVisit_Walk, and returns that result without visiting
* any more nodes.
*
* Returns CdbVisit_Walk if all of the children return CdbVisit_Walk, or
* if there are no children.
*
* Note that this function never returns CdbVisit_Skip to its caller.
* Only the 'walker' can return CdbVisit_Skip.
*
* NB: All CdbVisitOpt values other than CdbVisit_Walk or CdbVisit_Skip are
* treated as equivalent to CdbVisit_Stop. Thus the walker can break out
* of a traversal and at the same time return a smidgen of information to the
* caller, perhaps to indicate the reason for termination. For convenience,
* a couple of alternative stopping codes are predefined for walkers to use at
* their discretion: CdbVisit_Failure and CdbVisit_Success.
*/
// inline
CdbVisitOpt
pathnode_walk_node(Path *path,
CdbVisitOpt (*walker)(Path *path, void *context),
void *context)
{
CdbVisitOpt whatnext;
if (path == NULL)
whatnext = CdbVisit_Walk;
else
{
whatnext = walker(path, context);
if (whatnext == CdbVisit_Walk)
whatnext = pathnode_walk_kids(path, walker, context);
else if (whatnext == CdbVisit_Skip)
whatnext = CdbVisit_Walk;
}
Assert(whatnext != CdbVisit_Skip);
return whatnext;
} /* pathnode_walk_node */
CdbVisitOpt
pathnode_walk_list(List *pathlist,
CdbVisitOpt (*walker)(Path *path, void *context),
void *context)
{
ListCell *cell;
Path *path;
CdbVisitOpt v = CdbVisit_Walk;
foreach(cell, pathlist)
{
path = (Path *)lfirst(cell);
v = pathnode_walk_node(path, walker, context);
if (v != CdbVisit_Walk) /* stop */
break;
}
return v;
} /* pathnode_walk_list */
CdbVisitOpt
pathnode_walk_kids(Path *path,
CdbVisitOpt (*walker)(Path *path, void *context),
void *context)
{
CdbVisitOpt v;
Assert(path != NULL);
switch (path->pathtype)
{
case T_SeqScan:
case T_ExternalScan:
case T_MagmaIndexScan:
case T_MagmaIndexOnlyScan:
case T_AppendOnlyScan:
case T_ParquetScan:
case T_IndexScan:
case T_OrcIndexScan:
case T_OrcIndexOnlyScan:
case T_TidScan:
case T_SubqueryScan:
case T_FunctionScan:
case T_ValuesScan:
case T_CteScan:
case T_TableFunctionScan:
return CdbVisit_Walk;
case T_BitmapHeapScan:
v = pathnode_walk_node(((BitmapHeapPath *)path)->bitmapqual, walker, context);
break;
case T_BitmapAnd:
v = pathnode_walk_list(((BitmapAndPath *)path)->bitmapquals, walker, context);
break;
case T_BitmapOr:
v = pathnode_walk_list(((BitmapOrPath *)path)->bitmapquals, walker, context);
break;
case T_HashJoin:
case T_MergeJoin:
v = pathnode_walk_node(((JoinPath *)path)->outerjoinpath, walker, context);
if (v != CdbVisit_Walk) /* stop */
break;
v = pathnode_walk_node(((JoinPath *)path)->innerjoinpath, walker, context);
break;
case T_NestLoop:
{
NestPath *nestpath = (NestPath *)path;
v = pathnode_walk_node(nestpath->jpath.outerjoinpath, walker, context);
if (v != CdbVisit_Walk) /* stop */
break;
v = pathnode_walk_node(nestpath->jpath.innerjoinpath, walker, context);
if (v != CdbVisit_Walk) /* stop */
break;
}
break;
case T_Append:
v = pathnode_walk_list(((AppendPath *)path)->subpaths, walker, context);
break;
case T_Result:
v = pathnode_walk_node(((ResultPath *)path)->subpath, walker, context);
break;
case T_Material:
v = pathnode_walk_node(((MaterialPath *)path)->subpath, walker, context);
break;
case T_Unique:
v = pathnode_walk_node(((UniquePath *)path)->subpath, walker, context);
break;
case T_Motion:
v = pathnode_walk_node(((CdbMotionPath *)path)->subpath, walker, context);
break;
default:
v = CdbVisit_Walk; /* keep compiler quiet */
elog(ERROR, "unrecognized path type: %d", (int)path->pathtype);
}
return v;
} /* pathnode_walk_kids */
/*****************************************************************************
* MISC. PATH UTILITIES
*****************************************************************************/
/*
* compare_path_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for the specified criterion.
*/
int
compare_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
if (criterion == STARTUP_COST)
{
if (path1->startup_cost < path2->startup_cost)
return -1;
if (path1->startup_cost > path2->startup_cost)
return +1;
/*
* If paths have the same startup cost (not at all unlikely), order
* them by total cost.
*/
if (path1->total_cost < path2->total_cost)
return -1;
if (path1->total_cost > path2->total_cost)
return +1;
}
else
{
if (path1->total_cost < path2->total_cost)
return -1;
if (path1->total_cost > path2->total_cost)
return +1;
/*
* If paths have the same total cost, order them by startup cost.
*/
if (path1->startup_cost < path2->startup_cost)
return -1;
if (path1->startup_cost > path2->startup_cost)
return +1;
}
return 0;
}
/*
* compare_fuzzy_path_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for the specified criterion.
*
* This differs from compare_path_costs in that we consider the costs the
* same if they agree to within a "fuzz factor". This is used by add_path
* to avoid keeping both of a pair of paths that really have insignificantly
* different cost.
*/
static int
compare_fuzzy_path_costs(Path *path1, Path *path2, CostSelector criterion)
{
/*
* We use a fuzz factor of 1% of the smaller cost.
*
* XXX does this percentage need to be user-configurable?
*/
if (criterion == STARTUP_COST)
{
if (path1->startup_cost > path2->startup_cost * 1.01)
return +1;
if (path2->startup_cost > path1->startup_cost * 1.01)
return -1;
/*
* If paths have the same startup cost (not at all unlikely), order
* them by total cost.
*/
if (path1->total_cost > path2->total_cost * 1.01)
return +1;
if (path2->total_cost > path1->total_cost * 1.01)
return -1;
}
else
{
if (path1->total_cost > path2->total_cost * 1.01)
return +1;
if (path2->total_cost > path1->total_cost * 1.01)
return -1;
/*
* If paths have the same total cost, order them by startup cost.
*/
if (path1->startup_cost > path2->startup_cost * 1.01)
return +1;
if (path2->startup_cost > path1->startup_cost * 1.01)
return -1;
}
return 0;
}
/*
* compare_path_fractional_costs
* Return -1, 0, or +1 according as path1 is cheaper, the same cost,
* or more expensive than path2 for fetching the specified fraction
* of the total tuples.
*
* If fraction is <= 0 or > 1, we interpret it as 1, ie, we select the
* path with the cheaper total_cost.
*/
int
compare_fractional_path_costs(Path *path1, Path *path2,
double fraction)
{
Cost cost1,
cost2;
if (fraction <= 0.0 || fraction >= 1.0)
return compare_path_costs(path1, path2, TOTAL_COST);
cost1 = path1->startup_cost +
fraction * (path1->total_cost - path1->startup_cost);
cost2 = path2->startup_cost +
fraction * (path2->total_cost - path2->startup_cost);
if (cost1 < cost2)
return -1;
if (cost1 > cost2)
return +1;
return 0;
}
/*
* set_cheapest
* Find the minimum-cost paths from among a relation's paths,
* and save them in the rel's cheapest-path fields.
*
* This is normally called only after we've finished constructing the path
* list for the rel node.
*
* If we find two paths of identical costs, try to keep the better-sorted one.
* The paths might have unrelated sort orderings, in which case we can only
* guess which might be better to keep, but if one is superior then we
* definitely should keep it.
*/
void
set_cheapest(PlannerInfo *root, RelOptInfo *parent_rel)
{
ListCell *p;
Path *cheapest_startup_path;
Path *cheapest_total_path;
CdbRelDedupInfo *dedup_info = parent_rel->dedup_info;
Assert(IsA(parent_rel, RelOptInfo));
/* CDB: Add paths incorporating subquery duplicate suppression. */
if (dedup_info &&
dedup_info->later_dedup_pathlist)
{
cdb_set_cheapest_dedup(root, parent_rel);
/* If main pathlist is empty, put the later_dedup paths there. */
if (!parent_rel->pathlist &&
dedup_info->later_dedup_pathlist)
{
parent_rel->pathlist = dedup_info->later_dedup_pathlist;
parent_rel->cheapest_startup_path = dedup_info->cheapest_startup_path;
parent_rel->cheapest_total_path = dedup_info->cheapest_total_path;
dedup_info->later_dedup_pathlist = NIL;
dedup_info->cheapest_startup_path = NULL;
dedup_info->cheapest_total_path = NULL;
Assert(parent_rel->cheapest_total_path);
return;
}
}
/* CDB: Empty pathlist is possible if user set some enable_xxx = off. */
if (!parent_rel->pathlist)
{
parent_rel->cheapest_startup_path = parent_rel->cheapest_total_path = NULL;
return;
}
cheapest_startup_path = cheapest_total_path = (Path *) linitial(parent_rel->pathlist);
for_each_cell(p, lnext(list_head(parent_rel->pathlist)))
{
Path *path = (Path *) lfirst(p);
int cmp;
cmp = compare_path_costs(cheapest_startup_path, path, STARTUP_COST);
if (cmp > 0 ||
(cmp == 0 &&
compare_pathkeys(cheapest_startup_path->pathkeys,
path->pathkeys) == PATHKEYS_BETTER2))
cheapest_startup_path = path;
cmp = compare_path_costs(cheapest_total_path, path, TOTAL_COST);
if (cmp > 0 ||
(cmp == 0 &&
compare_pathkeys(cheapest_total_path->pathkeys,
path->pathkeys) == PATHKEYS_BETTER2))
cheapest_total_path = path;
}
parent_rel->cheapest_startup_path = cheapest_startup_path;
parent_rel->cheapest_total_path = cheapest_total_path;
} /* set_cheapest */
/*
* cdb_set_cheapest_dedup
*
* Called by set_cheapest() to augment a RelOptInfo with paths that
* provide for subquery duplicate suppression.
*/
static void
cdb_set_cheapest_dedup(PlannerInfo *root, RelOptInfo *rel)
{
CdbRelDedupInfo *dedup = rel->dedup_info;
ListCell *cell;
List *save_pathlist;
Path *save_cheapest_startup;
Path *save_cheapest_total;
Path *subpath;
UniquePath *upath = NULL;
/*
* If rel has only 1 row, joining with it cannot produce duplicates.
*
* Some paths could have been added to the later_dedup_pathlist before
* the discovery that at most one row can satisfy the predicates.
* Transfer any such to the main pathlist.
*/
if (rel->onerow)
{
/* Seize the later_dedup_pathlist. */
save_pathlist = dedup->later_dedup_pathlist;
dedup->later_dedup_pathlist = NIL;
/* Add its paths to the main pathlist. */
foreach(cell, save_pathlist)
add_path(root, rel, (Path *)lfirst(cell));
/* Verify that the paths weren't added to the wrong list. */
Insist(!dedup->later_dedup_pathlist);
return;
}
/*
* Pick out the lowest-cost paths in later_dedup_pathlist.
*
* The choice is made by a recursive call to set_cheapest(), for which we
* momentarily hijack the main pathlist fields in RelOptInfo.
*/
save_pathlist = rel->pathlist;
save_cheapest_startup = rel->cheapest_startup_path;
save_cheapest_total = rel->cheapest_total_path;
rel->pathlist = dedup->later_dedup_pathlist;
rel->cheapest_startup_path = NULL;
rel->cheapest_total_path = NULL;
dedup->later_dedup_pathlist = NIL; /* to stop the recursion */
set_cheapest(root, rel);
dedup->later_dedup_pathlist = rel->pathlist;
dedup->cheapest_startup_path = rel->cheapest_startup_path;
dedup->cheapest_total_path = rel->cheapest_total_path;
rel->pathlist = save_pathlist;
rel->cheapest_startup_path = save_cheapest_startup;
rel->cheapest_total_path = save_cheapest_total;
/*
* If rel is the final result of a pulled-up uncorrelated "= ANY" subquery,
* consider a path that yields the distinct rows of the subquery.
*
* For an uncorrelated "= ANY" subquery, we'll consider plans in which
* we remove duplicates from the rel containing the subquery result,
* before that is joined with any other rels. An "=" predicate (which
* was created by convert_IN_to_join()) ensures that at most one row of
* the duplicate-free subquery result can join with any one row of the
* subquery's parent query.
*
* (Formerly in PostgreSQL this pre-join duplicate suppression was invoked
* via special jointype codes: JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER.)
*
* CDB TODO: We wouldn't have to do anything special if we knew the
* subquery result rows are distinct because of a UNIQUE constraint.
*
* CDB TODO: Correlated subquery could use JOIN_UNIQUE if all outer
* refs can be pulled up above the UniquePath operator, or if umethod
* is UNIQUE_PATH_NOOP.
*
* CDB TODO: Avoid sorting when the subpath is ordered on the
* sub_targetlist columns.
*/
if (dedup->join_unique_ininfo)
{
Assert(dedup->join_unique_ininfo->sub_targetlist);
/* Top off the subpath with DISTINCT ON the result columns. */
upath = create_unique_exprlist_path(root,
dedup->cheapest_total_path,
dedup->join_unique_ininfo->sub_targetlist);
/* Add to rel's main pathlist. */
add_path(root, rel, (Path *)upath);
}
/*
* Consider post-join duplicate removal.
*
* Post-join duplicate removal is done by inserting a Unique operator that
* is DISTINCT ON the unique row identifiers of those relids that do *not*
* derive from flattened subqueries.
*
* If there is at least one subquery whose required inputs are all present
* in this rel, and no subquery having some inputs present and some inputs
* absent, then cdb_make_rel_dedup_info() has set the try_postjoin_dedup
* flag and filled in the prejoin_dedup_subqrelids field giving the relids
* of those subqueries' own tables. Those subqueries are candidates for
* late duplicate removal.
*
* NB: "Required inputs" means the sublink's lefthand relids, the
* subquery's own tables (the sublink's righthand relids), and the
* relids of outer references.
*/
else if (dedup->try_postjoin_dedup)
{
Relids distinct_relids = NULL;
Assert(dedup->prejoin_dedup_subqrelids);
/* hide later_dedup_pathlist while we walk it, so add_path can't upd. */
save_pathlist = dedup->later_dedup_pathlist;
dedup->later_dedup_pathlist = NIL;
foreach(cell, save_pathlist)
{
subpath = (Path *)lfirst(cell);
Assert(!subpath->subq_complete);
/*
* When one subplan of a join is the source of all of the relids in
* postjoin_dedup_allrelids, we can assume post-join duplicate
* removal has already been considered upstream. We could keep on
* considering it after every subsequent join, to insert the Unique
* op at the point where it is cheapest. But we do not attempt
* that, because at present the join result size estimates are
* untrustworthy.
*
* CDB TODO: Could remove this after fixing the join selectivity.
*/
if (cdb_subpath_tried_postjoin_dedup(subpath,
dedup->prejoin_dedup_subqrelids))
continue;
/*
* Build set of tables whose row ids will be the DISTINCT ON keys.
* The bitmapset object will be shared; don't free it!
*/
if (!distinct_relids)
distinct_relids =
bms_difference(rel->relids, dedup->prejoin_dedup_subqrelids);
/*
* Top off the subpath with DISTINCT ON the non-subquery row ids.
* Add to rel's main pathlist.
*/
upath = create_unique_rowid_path(root, subpath, distinct_relids);
add_path(root, rel, (Path *)upath);
}
/* Verify that our new paths haven't gone into the wrong pathlist. */
Insist(!dedup->later_dedup_pathlist);
dedup->later_dedup_pathlist = save_pathlist;
}
/*
* Don't consider plans that further delay duplicate suppression
* after all subqueries have been fully evaluated. (If a later join
* happens to reduce the amount of data, duplicate removal could be
* cheaper afterward and it would be better to wait. However, we
* don't currently trust the join result size estimates that much.)
*
* CDB TODO: If current query level has GROUP BY with only MIN/MAX aggs,
* DISTINCT or LIMIT 1, retain these paths... but move them into the rel's
* main pathlist since no extra step will be needed downstream for dedup.
*/
if (dedup->no_more_subqueries)
dedup->later_dedup_pathlist = NIL;
} /* cdb_set_cheapest_dedup */
/*
* cdb_is_path_deduped
*
* Returns true if there is no flattened subquery whose tables are all present,
* or if the given path takes care of all the duplicate suppression for such
* subqueries, so that they require no further action downstream.
*
* Returns false if some flattened subquery has all tables present but still
* needs duplicate suppression to be done downstream.
*
* Not affected by subqueries which have some but not all tables present.
*/
typedef struct CdbIsPathDedupedCtx
{
Relids all_subqrelids;
Relids deduped_relids;
bool deduped_unshared;
} CdbIsPathDedupedCtx;
static CdbVisitOpt
cdb_is_path_deduped_walker(Path *path, void* context)
{
CdbIsPathDedupedCtx *ctx = (CdbIsPathDedupedCtx *)context;
RelOptInfo *rel = path->parent;
Relids deduped;
/* Skip path unless rel includes all relids of some flattened subquery. */
if (!rel->dedup_info ||
!rel->dedup_info->prejoin_dedup_subqrelids)
return CdbVisit_Skip; /* no relids to add; don't visit children */
/* Dedup usually wraps up all subqueries whose tables are all present. */
deduped = rel->dedup_info->prejoin_dedup_subqrelids;
/* Already satisfied? */
if (path->subq_complete)
goto put_deduped_result;
/* If rel has only 1 row, joining with it cannot produce duplicates. */
if (rel->onerow)
goto put_deduped_result;
switch (path->pathtype)
{
/*
* Unique op is used only for subquery duplicate suppression, at present.
* Subqueries whose relids are covered by a Unique op don't require
* further duplicate suppression downstream.
*/
case T_Unique:
goto put_deduped_result;
/*
* Join with jointype JOIN_IN will suppress duplicates for all
* subqueries whose relids are covered by the join's inner rel.
*/
case T_HashJoin:
case T_MergeJoin:
case T_NestLoop:
{
JoinPath *joinpath = (JoinPath *)path;
RelOptInfo *inner_rel = joinpath->innerjoinpath->parent;
CdbVisitOpt status;
/* Visit the outer subpath. */
status = pathnode_walk_node(joinpath->outerjoinpath,
cdb_is_path_deduped_walker,
ctx);
if (status != CdbVisit_Walk)
return status;
/* Subqueries on inner side of JOIN_IN can't cause duplicates. */
if (joinpath->jointype == JOIN_IN)
{
if (!inner_rel->dedup_info ||
!inner_rel->dedup_info->prejoin_dedup_subqrelids)
return CdbVisit_Walk;
deduped = inner_rel->dedup_info->prejoin_dedup_subqrelids;
goto put_deduped_result;
}
/* Visit the inner subpath. */
return pathnode_walk_node(joinpath->innerjoinpath,
cdb_is_path_deduped_walker,
ctx);
}
default:
break;
}
return CdbVisit_Walk; /* onward to visit children */
/*
* Add this Path node's deduped relids to set.
* To reduce allocations, try to return an existing bitmapset.
*/
put_deduped_result:
if (!ctx->deduped_relids)
{
ctx->deduped_relids = deduped;
ctx->deduped_unshared = false;
}
else if (ctx->deduped_unshared)
ctx->deduped_relids = bms_add_members(ctx->deduped_relids, deduped);
else
{
ctx->deduped_relids = bms_union(ctx->deduped_relids, deduped);
ctx->deduped_unshared = true;
}
/*
* All found? Then stop.
*/
if (bms_equal(ctx->deduped_relids, ctx->all_subqrelids))
return CdbVisit_Success;
/* Done with this branch; don't visit children, but proceed to next. */
return CdbVisit_Skip;
} /* cdb_is_path_deduped_walker */
static bool
cdb_is_path_deduped(Path *path)
{
CdbIsPathDedupedCtx context;
CdbVisitOpt status;
if (!path->parent->dedup_info ||
!path->parent->dedup_info->prejoin_dedup_subqrelids)
return true;
context.all_subqrelids = path->parent->dedup_info->prejoin_dedup_subqrelids;
context.deduped_relids = NULL;
context.deduped_unshared = false;
status = pathnode_walk_node(path, cdb_is_path_deduped_walker, &context);
if (context.deduped_unshared)
bms_free(context.deduped_relids);
return status == CdbVisit_Success;
} /* cdb_is_path_deduped */
/*
* cdb_subpath_tried_postjoin_dedup
*
* Returns true if the given Path has a subpath in which post-join duplicate
* suppression can be assumed to have been considered already for exactly the
* given set of subquery relids.
*/
typedef struct CdbSubpathTriedPostjoinDedupCtx
{
RelOptInfo *given_rel;
Relids subqrelids;
} CdbSubpathTriedPostjoinDedupCtx;
static CdbVisitOpt
cdb_subpath_tried_postjoin_dedup_walker(Path *path, void* context)
{
CdbSubpathTriedPostjoinDedupCtx *ctx = (CdbSubpathTriedPostjoinDedupCtx *)context;
RelOptInfo *rel = path->parent;
/* Descend thru parent_rel's Path nodes to top Path node of an input rel. */
if (rel == ctx->given_rel)
return CdbVisit_Walk;
/* If none of the given relids come from this input rel, advance to next. */
if (!bms_overlap(ctx->subqrelids, rel->relids))
return CdbVisit_Skip;
/* Succeed if rel is marked for post-join dedup of given subqueries. */
if (rel->dedup_info &&
rel->dedup_info->try_postjoin_dedup &&
bms_equal(rel->dedup_info->prejoin_dedup_subqrelids, ctx->subqrelids))
return CdbVisit_Success;
return CdbVisit_Failure;
} /* cdb_subpath_tried_postjoin_dedup_walker */
static bool
cdb_subpath_tried_postjoin_dedup(Path *path, Relids subqrelids)
{
CdbSubpathTriedPostjoinDedupCtx context;
CdbVisitOpt status;
context.given_rel = path->parent;
context.subqrelids = subqrelids;
status = pathnode_walk_kids(path, cdb_subpath_tried_postjoin_dedup_walker, &context);
return (status == CdbVisit_Success);
} /* cdb_subpath_tried_postjoin_dedup */
/*
* add_path
* Consider a potential implementation path for the specified parent rel,
* and add it to the rel's pathlist if it is worthy of consideration.
* A path is worthy if it has either a better sort order (better pathkeys)
* or cheaper cost (on either dimension) than any of the existing old paths.
*
* We also remove from the rel's pathlist any old paths that are dominated
* by new_path --- that is, new_path is both cheaper and at least as well
* ordered.
*
* The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
* at the front. No code depends on that for correctness; it's simply
* a speed hack within this routine. Doing it that way makes it more
* likely that we will reject an inferior path after a few comparisons,
* rather than many comparisons.
*
* NOTE: discarded Path objects are immediately pfree'd to reduce planner
* memory consumption. We dare not try to free the substructure of a Path,
* since much of it may be shared with other Paths or the query tree itself;
* but just recycling discarded Path nodes is a very useful savings in
* a large join tree. We can recycle the List nodes of pathlist, too.
*
* NB: The Path that is passed to add_path() must be considered invalid
* upon return, and not touched again by the caller, because we free it
* if we already know of a better path. Likewise, a Path that is passed
* to add_path() must not be shared as a subpath of any other Path of the
* same join level. Use pathnode_copy_node() to make a copy of the top
* Path node before calling add_path(); then it'll be ok to share the copy.
*
* BUT: we do not pfree IndexPath objects, since they may be referenced as
* children of BitmapHeapPaths as well as being paths in their own right.
*
* 'parent_rel' is the relation entry to which the path corresponds.
* 'new_path' is a potential path for parent_rel.
*
* Returns nothing, but modifies parent_rel->pathlist.
*/
void add_path(PlannerInfo *root, RelOptInfo *parent_rel, Path *new_path)
{
bool accept_new = true; /* unless we find a superior old path */
ListCell *insert_after = NULL; /* where to insert new item */
ListCell *p1_prev = NULL;
ListCell *p1;
List **which_pathlist;
List *pathlist;
if (!new_path)
return;
Assert(cdbpathlocus_is_valid(new_path->locus));
/*
* CDB: Can we limit the rel to 1 row without affecting the query result?
*
* If rel's targetlist is empty, then only the rel's row count affects
* downstream processing. And if the rel consists only of subquery tables
* which have not been joined with tables of the current query level, then
* all that matters is whether the number of result rows is zero or nonzero.
* We'll insert LIMIT 1 on top of every path.
*
* This situation won't come up often. Flattened uncorrelated EXISTS
* subqueries would be a typical example, if not for the fact that they
* are pulled out for separate optimization as InitPlans.
*/
if (!parent_rel->reltargetlist &&
!parent_rel->onerow &&
!bms_overlap(parent_rel->relids, root->currlevel_relids))
new_path = (Path *)create_limit1_path(root, new_path);
/*
* CDB: Note whether duplicate suppression has been completed for all
* flattened subqueries whose tables are all present.
*
* Paths that await later addition of duplicate suppression are kept in a
* separate pathlist.
*
* NB: We set subq_complete false if all tables of a not-deduped subquery
* are present, even if the not-deduped subquery is joined with a deduped
* one. The earlier deduping can't help the later one, except perhaps by
* reducing the number of rows; so don't bother to keep flags about it.
*/
Assert(!new_path->subq_complete);
if (!parent_rel->dedup_info ||
cdb_is_path_deduped(new_path))
{
new_path->subq_complete = true;
which_pathlist = &parent_rel->pathlist;
pathlist = *which_pathlist;
}
else
{
new_path->subq_complete = false;
which_pathlist = &parent_rel->dedup_info->later_dedup_pathlist;
pathlist = *which_pathlist;
}
/*
* Loop to check proposed new path against old paths. Note it is possible
* for more than one old path to be tossed out because new_path dominates
* it.
*/
p1 = list_head(pathlist); /* cannot use foreach here */
while (p1 != NULL)
{
Path *old_path = (Path *) lfirst(p1);
bool remove_old = false; /* unless new proves superior */
int costcmp;
/*
* As of Postgres 8.0, we use fuzzy cost comparison to avoid wasting
* cycles keeping paths that are really not significantly different in
* cost.
*/
costcmp = compare_fuzzy_path_costs(new_path, old_path, TOTAL_COST);
/*
* If the two paths compare differently for startup and total cost,
* then we want to keep both, and we can skip the (much slower)
* comparison of pathkeys. If they compare the same, proceed with the
* pathkeys comparison. Note: this test relies on the fact that
* compare_fuzzy_path_costs will only return 0 if both costs are
* effectively equal (and, therefore, there's no need to call it twice
* in that case).
*/
if (costcmp == 0 ||
costcmp == compare_fuzzy_path_costs(new_path, old_path,
STARTUP_COST))
{
/* Still a tie? See which path has better pathkeys. */
switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
{
case PATHKEYS_EQUAL:
if (costcmp < 0)
remove_old = true; /* new dominates old */
else if (costcmp > 0)
accept_new = false; /* old dominates new */
else
{
/*
* Same pathkeys, and fuzzily the same cost, so keep
* just one --- but we'll do an exact cost comparison
* to decide which.
*/
if (compare_path_costs(new_path, old_path,
TOTAL_COST) < 0)
remove_old = true; /* new dominates old */
else
accept_new = false; /* old equals or dominates new */
}
break;
case PATHKEYS_BETTER1:
if (costcmp <= 0)
remove_old = true; /* new dominates old */
break;
case PATHKEYS_BETTER2:
if (costcmp >= 0)
accept_new = false; /* old dominates new */
break;
case PATHKEYS_DIFFERENT:
/* keep both paths, since they have different ordering */
break;
}
}
/*
* Remove current element from pathlist if dominated by new.
*/
if (remove_old)
{
pathlist = list_delete_cell(pathlist, p1, p1_prev);
/*
* Delete the data pointed-to by the deleted cell, if possible
*/
if (!IsA(old_path, IndexPath))
pfree(old_path);
/* Advance list pointer */
if (p1_prev)
p1 = lnext(p1_prev);
else
p1 = list_head(pathlist);
}
else
{
/* new belongs after this old path if it has cost >= old's */
if (costcmp >= 0)
insert_after = p1;
/* Advance list pointers */
p1_prev = p1;
p1 = lnext(p1);
}
/*
* If we found an old path that dominates new_path, we can quit
* scanning the pathlist; we will not add new_path, and we assume
* new_path cannot dominate any other elements of the pathlist.
*/
if (!accept_new)
break;
}
if (accept_new)
{
/* Accept the new path: insert it at proper place in pathlist */
if (insert_after)
lappend_cell(pathlist, insert_after, new_path);
else
pathlist = lcons(new_path, pathlist);
}
else
{
/* Reject and recycle the new path */
if (!IsA(new_path, IndexPath))
pfree(new_path);
}
/* Put back the updated pathlist ptr. */
*which_pathlist = pathlist;
} /* add_path */
/*****************************************************************************
* PATH NODE CREATION ROUTINES
*****************************************************************************/
/*
* create_seqscan_path
* Creates a path corresponding to a sequential scan, returning the
* pathnode.
*/
Path *
create_seqscan_path(PlannerInfo *root, RelOptInfo *rel)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SeqScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* seqscan has unordered result */
pathnode->locus = cdbpathlocus_from_baserel(root, rel);
pathnode->motionHazard = false;
pathnode->rescannable = true;
cost_seqscan(pathnode, root, rel);
return pathnode;
}
/*
* Create a path for scanning an append-only table
*/
AppendOnlyPath *
create_appendonly_path(PlannerInfo *root, RelOptInfo *rel)
{
AppendOnlyPath *pathnode = makeNode(AppendOnlyPath);
pathnode->path.pathtype = T_AppendOnlyScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* seqscan has unordered result */
pathnode->path.locus = cdbpathlocus_from_baserel(root, rel);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
cost_appendonlyscan(pathnode, root, rel);
return pathnode;
}
/*
* Create a path for scanning a parquet table
*/
ParquetPath *
create_parquet_path(PlannerInfo *root, RelOptInfo *rel)
{
ParquetPath *pathnode = makeNode(ParquetPath);
pathnode->path.pathtype = T_ParquetScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* seqscan has unordered result */
pathnode->path.locus = cdbpathlocus_from_baserel(root, rel);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
cost_parquetscan(pathnode, root, rel);
return pathnode;
}
/*
* Create a path for scanning an external table
*/
ExternalPath *
create_external_path(PlannerInfo *root, RelOptInfo *rel)
{
ExternalPath *pathnode = makeNode(ExternalPath);
pathnode->path.pathtype = T_ExternalScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* external scan has unordered result */
pathnode->path.locus = cdbpathlocus_from_baserel(root, rel);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = rel->isrescannable;
cost_externalscan(pathnode, root, rel);
return pathnode;
}
/*
* create_index_path
* Creates a path node for an index scan.
*
* 'index' is a usable index.
* 'clause_groups' is a list of lists of RestrictInfo nodes
* to be used as index qual conditions in the scan.
* 'pathkeys' describes the ordering of the path.
* 'indexscandir' is ForwardScanDirection or BackwardScanDirection
* for an ordered index, or NoMovementScanDirection for
* an unordered index.
* 'outer_rel' is the outer relation if this is a join inner indexscan path.
* (pathkeys and indexscandir are ignored if so.) NULL if not.
*
* Returns the new path node.
*/
IndexPath *
create_index_path(PlannerInfo *root,
IndexOptInfo *index,
List *clause_groups,
List *pathkeys,
ScanDirection indexscandir,
RelOptInfo *outer_rel)
{
IndexPath *pathnode = makeNode(IndexPath);
RelOptInfo *rel = index->rel;
List *indexquals,
*allclauses;
/*
* For a join inner scan, there's no point in marking the path with any
* pathkeys, since it will only ever be used as the inner path of a
* nestloop, and so its ordering does not matter. For the same reason we
* don't really care what order it's scanned in. (We could expect the
* caller to supply the correct values, but it's easier to force it here.)
*/
if (outer_rel != NULL)
{
pathkeys = NIL;
indexscandir = NoMovementScanDirection;
}
if (index->rel->ext == RELSTORAGE_EXTERNAL)
pathnode->path.pathtype = index->indexonly ?
T_MagmaIndexOnlyScan : T_MagmaIndexScan;
else if (index->rel->ext == RELSTORAGE_ORC)
{
pathnode->path.pathtype = index->indexonly ?
T_OrcIndexOnlyScan : T_OrcIndexScan;
}
else
pathnode->path.pathtype = T_IndexScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = pathkeys;
/* Convert clauses to indexquals the executor can handle */
indexquals = expand_indexqual_conditions(index, clause_groups);
/* Flatten the clause-groups list to produce indexclauses list */
allclauses = flatten_clausegroups_list(clause_groups);
/* Fill in the pathnode */
pathnode->indexinfo = index;
pathnode->indexclauses = allclauses;
pathnode->indexquals = indexquals;
pathnode->isjoininner = (outer_rel != NULL);
pathnode->indexscandir = indexscandir;
if (outer_rel != NULL)
{
/*
* We must compute the estimated number of output rows for the
* indexscan. This is less than rel->rows because of the additional
* selectivity of the join clauses. Since clause_groups may contain
* both restriction and join clauses, we have to do a set union to get
* the full set of clauses that must be considered to compute the
* correct selectivity. (Without the union operation, we might have
* some restriction clauses appearing twice, which'd mislead
* clauselist_selectivity into double-counting their selectivity.
* However, since RestrictInfo nodes aren't copied when linking them
* into different lists, it should be sufficient to use pointer
* comparison to remove duplicates.)
*
* Always assume the join type is JOIN_INNER; even if some of the join
* clauses come from other contexts, that's not our problem.
*/
allclauses = list_union_ptr(rel->baserestrictinfo, allclauses);
pathnode->rows = rel->tuples *
clauselist_selectivity(root,
allclauses,
rel->relid, /* do not use 0! */
JOIN_INNER,
false /* use_damping */);
/* Like costsize.c, force estimate to be at least one row */
pathnode->rows = clamp_row_est(pathnode->rows);
}
else
{
/*
* The number of rows is the same as the parent rel's estimate, since
* this isn't a join inner indexscan.
*/
pathnode->rows = rel->rows;
}
/* Distribution is same as the base table. */
pathnode->path.locus = cdbpathlocus_from_baserel(root, rel);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
cost_index(pathnode, root, index, indexquals, outer_rel);
return pathnode;
}
/*
* create_bitmap_heap_path
* Creates a path node for a bitmap scan.
*
* 'bitmapqual' is a tree of IndexPath, BitmapAndPath, and BitmapOrPath nodes.
*
* If this is a join inner indexscan path, 'outer_rel' is the outer relation,
* and all the component IndexPaths should have been costed accordingly.
*/
BitmapHeapPath *
create_bitmap_heap_path(PlannerInfo *root,
RelOptInfo *rel,
Path *bitmapqual,
RelOptInfo *outer_rel)
{
BitmapHeapPath *pathnode = makeNode(BitmapHeapPath);
if (rel->ext == RELSTORAGE_EXTERNAL)
pathnode->path.pathtype = T_MagmaBitmapScan;
else
pathnode->path.pathtype = T_BitmapHeapScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* always unordered */
/* Distribution is same as the base table. */
pathnode->path.locus = cdbpathlocus_from_baserel(root, rel);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
pathnode->bitmapqual = bitmapqual;
pathnode->isjoininner = (outer_rel != NULL);
if (pathnode->isjoininner)
{
/*
* We must compute the estimated number of output rows for the
* indexscan. This is less than rel->rows because of the additional
* selectivity of the join clauses. We make use of the selectivity
* estimated for the bitmap to do this; this isn't really quite right
* since there may be restriction conditions not included in the
* bitmap ...
*/
Cost indexTotalCost;
Selectivity indexSelectivity;
cost_bitmap_tree_node(bitmapqual, &indexTotalCost, &indexSelectivity);
pathnode->rows = rel->tuples * indexSelectivity;
if (pathnode->rows > rel->rows)
pathnode->rows = rel->rows;
/* Like costsize.c, force estimate to be at least one row */
pathnode->rows = clamp_row_est(pathnode->rows);
}
else
{
/*
* The number of rows is the same as the parent rel's estimate, since
* this isn't a join inner indexscan.
*/
pathnode->rows = rel->rows;
}
cost_bitmap_heap_scan(&pathnode->path, root, rel, bitmapqual, outer_rel);
return pathnode;
}
/*
* create_bitmap_and_path
* Creates a path node representing a BitmapAnd.
*/
BitmapAndPath *
create_bitmap_and_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals)
{
BitmapAndPath *pathnode = makeNode(BitmapAndPath);
pathnode->path.pathtype = T_BitmapAnd;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->bitmapquals = bitmapquals;
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_and_node(pathnode, root);
return pathnode;
}
/*
* create_bitmap_or_path
* Creates a path node representing a BitmapOr.
*/
BitmapOrPath *
create_bitmap_or_path(PlannerInfo *root,
RelOptInfo *rel,
List *bitmapquals)
{
BitmapOrPath *pathnode = makeNode(BitmapOrPath);
pathnode->path.pathtype = T_BitmapOr;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* always unordered */
pathnode->bitmapquals = bitmapquals;
/* this sets bitmapselectivity as well as the regular cost fields: */
cost_bitmap_or_node(pathnode, root);
return pathnode;
}
/*
* create_tidscan_path
* Creates a path corresponding to a scan by TID, returning the pathnode.
*/
TidPath *
create_tidscan_path(PlannerInfo *root, RelOptInfo *rel, List *tidquals)
{
TidPath *pathnode = makeNode(TidPath);
pathnode->path.pathtype = T_TidScan;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL;
pathnode->tidquals = tidquals;
/* Distribution is same as the base table. */
pathnode->path.locus = cdbpathlocus_from_baserel(root, rel);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
cost_tidscan(&pathnode->path, root, rel, tidquals);
return pathnode;
}
/*
* create_append_path
* Creates a path corresponding to an Append plan, returning the
* pathnode.
*/
AppendPath *
create_append_path(PlannerInfo *root, RelOptInfo *rel, List *subpaths)
{
AppendPath *pathnode = makeNode(AppendPath);
ListCell *l;
pathnode->path.pathtype = T_Append;
pathnode->path.parent = rel;
pathnode->path.pathkeys = NIL; /* result is always considered
* unsorted */
pathnode->subpaths = NIL;
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = 0;
/* If no subpath, any worker can execute this Append. Result has 0 rows. */
if (!subpaths)
CdbPathLocus_MakeGeneral(&pathnode->path.locus);
else
{
bool fIsNotPartitioned = false;
/*
* Do a first pass over the children to determine if
* there's any child which is not partitioned, i.e. a bottleneck or replicated
*/
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
if (CdbPathLocus_IsBottleneck(subpath->locus) ||
CdbPathLocus_IsReplicated(subpath->locus))
{
fIsNotPartitioned = true;
break;
}
}
foreach(l, subpaths)
{
Path *subpath = (Path *) lfirst(l);
CdbPathLocus projectedlocus;
/*
* In case any of the children is not partitioned convert all children
* to have singleQE locus
*/
if (fIsNotPartitioned &&
!CdbPathLocus_IsSingleQE(subpath->locus))
{
CdbPathLocus singleQE;
CdbPathLocus_MakeSingleQE(&singleQE);
subpath = cdbpath_create_motion_path(root, subpath, NIL, false, singleQE);
}
/* Transform subpath locus into the appendrel's space for comparison. */
if (subpath->parent == rel ||
subpath->parent->reloptkind != RELOPT_OTHER_MEMBER_REL)
projectedlocus = subpath->locus;
else
projectedlocus =
cdbpathlocus_pull_above_projection(root,
subpath->locus,
subpath->parent->relids,
subpath->parent->reltargetlist,
rel->reltargetlist,
rel->relid);
if (l == list_head(subpaths)) /* first node? */
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost += subpath->total_cost;
/*
* CDB: If all the scans are distributed alike, set
* the result locus to match. Otherwise, if all are partitioned,
* set it to strewn. A mixture of partitioned and non-partitioned
* scans should not occur after above correction;
*
* CDB TODO: When the scans are not all partitioned alike, and the
* result is joined with another rel, consider pushing the join
* below the Append so that child tables that are properly
* distributed can be joined in place.
*/
if (l == list_head(subpaths))
pathnode->path.locus = projectedlocus;
else if (cdbpathlocus_compare(CdbPathLocus_Comparison_Equal,
pathnode->path.locus, projectedlocus))
{}
else if (CdbPathLocus_IsPartitioned(pathnode->path.locus) &&
CdbPathLocus_IsPartitioned(projectedlocus))
CdbPathLocus_MakeStrewn(&pathnode->path.locus);
else
ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_SUPPORTED),
errmsg_internal("Cannot append paths with "
"incompatible distribution")));
if (subpath->motionHazard)
pathnode->path.motionHazard = true;
if ( !subpath->rescannable )
pathnode->path.rescannable = false;
pathnode->subpaths = lappend(pathnode->subpaths, subpath);
}
/*
* CDB: If there is exactly one subpath, its ordering is preserved.
* Child rel's pathkey exprs are already expressed in terms of the
* columns of the parent appendrel. See find_usable_indexes().
*/
if (list_length(subpaths) == 1)
pathnode->path.pathkeys = ((Path *)linitial(subpaths))->pathkeys;
}
return pathnode;
}
/*
* create_result_path
* Creates a path representing a Result-and-nothing-else plan.
* This is only used for the case of a query with an empty jointree.
*/
ResultPath *
create_result_path(RelOptInfo *rel, Path *subpath, List *quals)
{
ResultPath *pathnode = makeNode(ResultPath);
pathnode->path.pathtype = T_Result;
pathnode->path.parent = NULL;
pathnode->path.pathkeys = NIL;
pathnode->quals = quals;
/* Ideally should define cost_result(), but I'm too lazy */
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = cpu_tuple_cost;
pathnode->subpath = subpath;
pathnode->quals = quals;
/* Ideally should define cost_result(), but I'm too lazy */
if (subpath)
{
pathnode->path.startup_cost = subpath->startup_cost;
pathnode->path.total_cost = subpath->total_cost;
pathnode->path.locus = subpath->locus;
pathnode->path.motionHazard = subpath->motionHazard;
pathnode->path.rescannable = subpath->rescannable;
}
else
{
pathnode->path.startup_cost = 0;
pathnode->path.total_cost = cpu_tuple_cost;
CdbPathLocus_MakeGeneral(&pathnode->path.locus);
pathnode->path.motionHazard = false;
pathnode->path.rescannable = true;
}
/*
* In theory we should include the qual eval cost as well, but at present
* that doesn't accomplish much except duplicate work that will be done
* again in make_result; since this is only used for degenerate cases,
* nothing interesting will be done with the path cost values...
*/
return pathnode;
}
/*
* create_material_path
* Creates a path corresponding to a Material plan, returning the
* pathnode.
*/
MaterialPath *
create_material_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath)
{
MaterialPath *pathnode = makeNode(MaterialPath);
pathnode->path.pathtype = T_Material;
pathnode->path.parent = rel;
pathnode->path.pathkeys = subpath->pathkeys;
pathnode->path.locus = subpath->locus;
pathnode->path.motionHazard = subpath->motionHazard;
pathnode->cdb_strict = false;
pathnode->path.rescannable = true; /* Independent of sub-path */
pathnode->subpath = subpath;
cost_material(&pathnode->path,
root,
subpath->total_cost,
cdbpath_rows(root, subpath),
rel->width);
return pathnode;
}
/*
* create_unique_path
* Creates a path representing elimination of distinct rows from the
* input data.
*/
static UniquePath *
create_unique_path(PlannerInfo *root,
Path *subpath,
List *distinct_on_exprs,
Relids distinct_on_rowid_relids)
{
UniquePath *pathnode;
Path sort_path; /* dummy for result of cost_sort */
Path agg_path; /* dummy for result of cost_agg */
RelOptInfo *rel = subpath->parent;
int numCols;
double subpath_rows;
bool hashable = false;
subpath_rows = cdbpath_rows(root, subpath);
/* Allocate and partially initialize a UniquePath node. */
pathnode = make_unique_path(subpath);
/* Share caller's expr list and relids. */
pathnode->distinct_on_exprs = distinct_on_exprs;
pathnode->distinct_on_rowid_relids = distinct_on_rowid_relids;
/*
* Treat the output as always unsorted, since we don't necessarily have
* pathkeys to represent it.
*/
pathnode->path.pathkeys = NIL;
/*
* If we know the targetlist, try to estimate number of result rows;
* otherwise punt.
*/
if (distinct_on_exprs)
{
pathnode->rows = estimate_num_groups(root, distinct_on_exprs, subpath_rows);
numCols = list_length(distinct_on_exprs);
}
else
{
pathnode->rows = subpath_rows;
numCols = 1;
}
/*
* Estimate cost for sort+unique implementation
*/
cost_sort(&sort_path, root, NIL,
subpath->total_cost,
subpath_rows,
rel->width);
/*
* Charge one cpu_operator_cost per comparison per input tuple. We assume
* all columns get compared at most of the tuples. (XXX probably this is
* an overestimate.) This should agree with make_unique.
*/
sort_path.total_cost += cpu_operator_cost * subpath_rows * numCols;
/*
* Is it safe to use a hashed implementation? If so, estimate and compare
* costs. We only try this if we know the targetlist for sure (else we
* can't be sure about the datatypes involved).
*
* CDB: For dedup, the distinct_on_exprs list isn't built until later,
* but we know the data types will be hashable.
*/
pathnode->umethod = UNIQUE_PATH_SORT;
pathnode->path.startup_cost = sort_path.startup_cost;
pathnode->path.total_cost = sort_path.total_cost;
if (distinct_on_exprs && hash_safe_tlist(distinct_on_exprs))
hashable = true;
if (distinct_on_rowid_relids)
hashable = true;
if (hashable
&& (root->config->enable_hashagg
|| root->config->mpp_trying_fallback_plan
)
)
{
/*
* Estimate the overhead per hashtable entry at 64 bytes (same as in
* planner.c).
*/
int hashentrysize = rel->width + 64;
/*
* CDB TODO: Hybrid hashed aggregation is not limited by work_mem.
*/
if (hashentrysize * pathnode->rows <= global_work_mem(root))
{
cost_agg(&agg_path, root,
AGG_HASHED, 0,
numCols, pathnode->rows,
subpath->startup_cost,
subpath->total_cost,
rel->rows, 0.0, 0.0, hashentrysize, false);
if (agg_path.total_cost < sort_path.total_cost ||
(!root->config->enable_sort && !root->config->mpp_trying_fallback_plan))
{
pathnode->umethod = UNIQUE_PATH_HASH;
pathnode->path.startup_cost = agg_path.startup_cost;
pathnode->path.total_cost = agg_path.total_cost;
}
}
}
/* see MPP-1140 */
if (pathnode->umethod == UNIQUE_PATH_HASH)
{
/* hybrid hash agg is not rescannable, and may present a motion hazard */
pathnode->path.motionHazard = subpath->motionHazard;
pathnode->path.rescannable = false;
}
else
{
/* sort or plain implies materialization and breaks deadlock cycle.
* (NB: Must not reset motionHazard when sort is eliminated due to
* existing ordering; but Unique sort is never optimized away at present.)
*/
pathnode->path.motionHazard = subpath->motionHazard;
/* Same reasoning applies to rescanablilty. If no actual sort is placed
* in the plan, then rescannable is set correctly to the subpath value.
* If sort intervenes, it should be set to true. We depend
* on the above claim that sort will always intervene.
*/
pathnode->path.rescannable = true;
}
return pathnode;
} /* create_unique_path */
/*
* create_unique_exprlist_path
* Creates a path representing elimination of distinct rows from the
* input data.
*
* Returns a UniquePath node representing a "DISTINCT ON e1,...,en" operator,
* where e1,...,en are the expressions in the 'distinct_on_exprs' list.
*
* NB: The returned node shares the 'distinct_on_exprs' list given by the
* caller; the list and its members must not be changed or freed during the
* node's lifetime.
*
* If a row's duplicates might occur in more than one partition, a Motion
* operator will be needed to bring them together. Since this path might
* not be chosen, we won't take the time to create a CdbMotionPath node here.
* Just estimate what the cost would be, and assign a dummy locus; leave
* the real work for create_plan().
*/
UniquePath *
create_unique_exprlist_path(PlannerInfo *root,
Path *subpath,
List *distinct_on_exprs)
{
UniquePath *uniquepath;
RelOptInfo *rel = subpath->parent;
Assert(distinct_on_exprs);
/*
* If the input is a subquery whose output must be unique already, then we
* don't need to do anything. The test for uniqueness has to consider
* exactly which columns we are extracting; for example "SELECT DISTINCT
* x,y" doesn't guarantee that x alone is distinct. So we cannot check for
* this optimization unless we found our own targetlist above, and it
* consists only of simple Vars referencing subquery outputs. (Possibly
* we could do something with expressions in the subquery outputs, too,
* but for now keep it simple.)
*/
if (rel->rtekind == RTE_SUBQUERY)
{
RangeTblEntry *rte = rt_fetch(rel->relid, root->parse->rtable);
List *sub_tlist_colnos;
sub_tlist_colnos = translate_sub_tlist(distinct_on_exprs, rel->relid);
if (sub_tlist_colnos &&
query_is_distinct_for(rte->subquery, sub_tlist_colnos))
{
uniquepath = make_unique_path(subpath);
uniquepath->umethod = UNIQUE_PATH_NOOP;
uniquepath->rows = cdbpath_rows(root, subpath);
uniquepath->distinct_on_exprs = distinct_on_exprs; /* share list */
return uniquepath;
}
}
/* Repartition first if duplicates might be on different QEs. */
if (!CdbPathLocus_IsBottleneck(subpath->locus) &&
!cdbpathlocus_is_hashed_on_exprs(subpath->locus, distinct_on_exprs))
{
CdbPathLocus locus;
locus = cdbpathlocus_from_exprs(root, distinct_on_exprs);
subpath = cdbpath_create_motion_path(root, subpath, NIL, false, locus);
Insist(subpath);
}
/* Create the UniquePath node. (Might return NULL if enable_sort = off.) */
uniquepath = create_unique_path(root, subpath, distinct_on_exprs, NULL);
return uniquepath;
} /* create_unique_exprlist_path */
/*
* create_unique_rowid_path
*
* CDB: Used when a subquery predicate (such as "x IN (SELECT ...)") has
* been flattened into a join with the tables of the current query. A row
* of the cross-product of the current query's tables might join with more
* than one row from the subquery and thus be duplicated. Removing such
* duplicates after the join is called deduping.
*
* Returns a UniquePath node representing a "DISTINCT ON r1,...,rn" operator,
* where (r1,...,rn) represents a unique identifier for each row of the cross
* product of the tables specified by the 'distinct_relids' parameter.
*
* NB: The returned node shares the given 'distinct_relids' bitmapset object;
* so the caller must not free or modify it during the node's lifetime.
*
* If a row's duplicates might occur in more than one partition, a Motion
* operator will be needed to bring them together. Since this path might
* not be chosen, we won't take the time to create a CdbMotionPath node here.
* Just estimate what the cost would be, and assign a dummy locus; leave
* the real work for create_plan().
*/
UniquePath *
create_unique_rowid_path(PlannerInfo *root,
Path *subpath,
Relids distinct_relids)
{
UniquePath *uniquepath;
Assert(!bms_is_empty(distinct_relids));
/* Create the UniquePath node. (Might return NULL if enable_sort = off.) */
uniquepath = create_unique_path(root, subpath, NIL, distinct_relids);
if (!uniquepath)
return NULL;
/* Add repartitioning cost if duplicates might be on different QEs. */
if (!CdbPathLocus_IsBottleneck(subpath->locus) &&
!cdbpathlocus_is_hashed_on_relids(subpath->locus, distinct_relids))
{
CdbMotionPath motionpath; /* dummy for cost estimate */
Cost repartition_cost;
/* Tell create_unique_plan() to insert Motion operator atop subpath. */
uniquepath->must_repartition = true;
/* Set a fake locus. Repartitioning key won't be built until later. */
CdbPathLocus_MakeStrewn(&uniquepath->path.locus);
/* Estimate repartitioning cost. */
memset(&motionpath, 0, sizeof(motionpath));
motionpath.path.type = T_CdbMotionPath;
motionpath.path.parent = subpath->parent;
motionpath.path.locus = uniquepath->path.locus;
motionpath.subpath = subpath;
cdbpath_cost_motion(root, &motionpath);
/* Add MotionPath cost to UniquePath cost. */
repartition_cost = motionpath.path.total_cost - subpath->total_cost;
uniquepath->path.total_cost += repartition_cost;
}
return uniquepath;
} /* create_unique_rowid_path */
/*
* create_limit1_path
*
* CDB: Add operators on top of the path to ensure uniqueness by
* discarding all but the first row of the given subpath's result.
*
* If a row's duplicates might occur in more than one partition, a Motion
* operator will be inserted to bring them together.
*/
UniquePath *
create_limit1_path(PlannerInfo *root, Path *subpath)
{
UniquePath *uniquepath;
CdbPathLocus singleQE;
/* Add LIMIT 1 operator. */
uniquepath = make_limit1_path(subpath);
/* Add Motion operator to gather to a single qExec, then LIMIT 1 again. */
if (CdbPathLocus_IsPartitioned(subpath->locus))
{
CdbPathLocus_MakeSingleQE(&singleQE);
subpath = cdbpath_create_motion_path(root, (Path *)uniquepath, NIL,
false, singleQE);
/* Add another LIMIT 1 on top. */
uniquepath = make_limit1_path(subpath);
uniquepath->path.motionHazard = subpath->motionHazard;
}
return uniquepath;
} /* create_limit1_path */
/*
* make_limit1_path
*
* CDB: Returns a UniquePath which ensures uniqueness by producing at most
* one row of the given subpath's result.
*/
UniquePath *
make_limit1_path(Path *subpath)
{
UniquePath *uniquepath = make_unique_path(subpath);
uniquepath->umethod = UNIQUE_PATH_LIMIT1;
uniquepath->rows = 1;
/* Cost to get a single row is same as startup cost (from subpath). */
uniquepath->path.total_cost = uniquepath->path.startup_cost;
return uniquepath;
} /* make_limit1_path */
/*
* make_unique_path
* Allocates and partially initializes a UniquePath node to be completed
* by the caller.
*/
UniquePath *
make_unique_path(Path *subpath)
{
UniquePath *pathnode;
MemoryContext oldcontext;
/* Allocate in same context as parent rel in case GEQO is ever used. */
oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(subpath->parent));
/* Allocate the UniquePath node. */
pathnode = makeNode(UniquePath);
/* Copy Path header from subpath. */
pathnode->path = *subpath;
/* Fix up the Path header. */
pathnode->path.type = T_UniquePath;
pathnode->path.pathtype = T_Unique;
/* Initialize added UniquePath fields. */
pathnode->subpath = subpath;
pathnode->distinct_on_exprs = NIL;
pathnode->distinct_on_rowid_relids = NULL;
pathnode->must_repartition = false;
/* Restore caller's allocation context. */
MemoryContextSwitchTo(oldcontext);
return pathnode;
} /* make_unique_path */
/*
* translate_sub_tlist - get subquery column numbers represented by tlist
*
* The given targetlist should contain only Vars referencing the given relid.
* Extract their varattnos (ie, the column numbers of the subquery) and return
* as an integer List.
*
* If any of the tlist items is not a simple Var, we cannot determine whether
* the subquery's uniqueness condition (if any) matches ours, so punt and
* return NIL.
*/
static List *
translate_sub_tlist(List *tlist, int relid)
{
List *result = NIL;
ListCell *l;
foreach(l, tlist)
{
Var *var = (Var *) lfirst(l);
if (!var || !IsA(var, Var) ||
(int)var->varno != relid)
return NIL; /* punt */
result = lappend_int(result, var->varattno);
}
return result;
}
/*
* query_is_distinct_for - does query never return duplicates of the
* specified columns?
*
* colnos is an integer list of output column numbers (resno's). We are
* interested in whether rows consisting of just these columns are certain
* to be distinct.
*/
static bool
query_is_distinct_for(Query *query, List *colnos)
{
ListCell *l;
/*
* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
* columns in the DISTINCT clause appear in colnos.
*/
if (query->distinctClause)
{
foreach(l, query->distinctClause)
{
SortClause *scl = (SortClause *) lfirst(l);
TargetEntry *tle = get_sortgroupclause_tle(scl,
query->targetList);
if (!list_member_int(colnos, tle->resno))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
/*
* Similarly, GROUP BY guarantees uniqueness if all the grouped columns
* appear in colnos.
*/
if (query->groupClause)
{
List *grouptles =
get_sortgroupclauses_tles(query->groupClause, query->targetList);
foreach(l, grouptles)
{
TargetEntry *tle = (TargetEntry *)lfirst(l);
if (!list_member_int(colnos, tle->resno))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
else
{
/*
* If we have no GROUP BY, but do have aggregates or HAVING, then the
* result is at most one row so it's surely unique.
*/
if (query->hasAggs || query->havingQual)
return true;
}
/*
* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
* except with ALL
*/
if (query->setOperations)
{
SetOperationStmt *topop = (SetOperationStmt *) query->setOperations;
Assert(IsA(topop, SetOperationStmt));
Assert(topop->op != SETOP_NONE);
if (!topop->all)
{
/* We're good if all the nonjunk output columns are in colnos */
foreach(l, query->targetList)
{
TargetEntry *tle = (TargetEntry *) lfirst(l);
if (!tle->resjunk &&
!list_member_int(colnos, tle->resno))
break; /* exit early if no match */
}
if (l == NULL) /* had matches for all? */
return true;
}
}
/*
* XXX Are there any other cases in which we can easily see the result
* must be distinct?
*/
return false;
}
/*
* hash_safe_tlist - can datatypes of given tlist be hashed?
*
* We assume hashed aggregation will work if the datatype's equality operator
* is marked hashjoinable.
*
* XXX this probably should be somewhere else. See also hash_safe_grouping
* in plan/planner.c.
*/
static bool
hash_safe_tlist(List *tlist)
{
ListCell *tl;
foreach(tl, tlist)
{
Node *expr = (Node *) lfirst(tl);
Operator optup;
bool oprcanhash;
optup = equality_oper(exprType(expr), true);
if (!optup)
return false;
oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
ReleaseOperator(optup);
if (!oprcanhash)
return false;
}
return true;
}
/*
* create_subqueryscan_path
* Creates a path corresponding to a sequential scan of a subquery,
* returning the pathnode.
*/
Path *
create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_SubqueryScan;
pathnode->parent = rel;
pathnode->pathkeys = pathkeys;
pathnode->locus = cdbpathlocus_from_subquery(root, rel->subplan, rel->relid);
pathnode->motionHazard = true; /* better safe than sorry */
pathnode->rescannable = false;
cost_subqueryscan(pathnode, rel);
return pathnode;
}
/*
* create_functionscan_path
* Creates a path corresponding to a sequential scan of a function,
* returning the pathnode.
*/
Path *
create_functionscan_path(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_FunctionScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* for now, assume unordered result */
/*
* CDB: If expression contains mutable functions, evaluate it on entry db.
* Otherwise let it be evaluated in the same slice as its parent operator.
*/
Assert(rte->rtekind == RTE_FUNCTION);
if (contain_mutable_functions(rte->funcexpr))
CdbPathLocus_MakeEntry(&pathnode->locus);
else
CdbPathLocus_MakeGeneral(&pathnode->locus);
pathnode->motionHazard = false;
/* For now, be conservative. */
pathnode->rescannable = false;
cost_functionscan(pathnode, root, rel);
return pathnode;
}
/*
* create_tablefunction_path
* Creates a path corresponding to a sequential scan of a table function,
* returning the pathnode.
*/
Path *
create_tablefunction_path(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Path *pathnode = makeNode(Path);
Assert(rte->rtekind == RTE_TABLEFUNCTION);
/* Setup the basics of the TableFunction path */
pathnode->pathtype = T_TableFunctionScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* no way to specify output ordering */
pathnode->motionHazard = true; /* better safe than sorry */
pathnode->rescannable = false; /* better safe than sorry */
/*
* Inherit the locus of the input subquery. This is necessary to handle the
* case of a General locus, e.g. if all the data has been concentrated to a
* single segment then the output will all be on that segment, otherwise the
* output must be declared as randomly distributed because we do not know
* what relationship, if any, there is between the input data and the output
* data.
*/
pathnode->locus = cdbpathlocus_from_subquery(root, rel->subplan, rel->relid);
/* Mark the output as random if the input is partitioned */
if (CdbPathLocus_IsPartitioned(pathnode->locus))
CdbPathLocus_MakeStrewn(&pathnode->locus);
cost_tablefunction(pathnode, root, rel);
return pathnode;
}
/*
* create_valuesscan_path
* Creates a path corresponding to a scan of a VALUES list,
* returning the pathnode.
*/
Path *
create_valuesscan_path(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_ValuesScan;
pathnode->parent = rel;
pathnode->pathkeys = NIL; /* result is always unordered */
/*
* CDB: If VALUES list contains mutable functions, evaluate it on entry db.
* Otherwise let it be evaluated in the same slice as its parent operator.
*/
Assert(rte->rtekind == RTE_VALUES);
if (contain_mutable_functions((Node *)rte->values_lists))
CdbPathLocus_MakeEntry(&pathnode->locus);
else
CdbPathLocus_MakeGeneral(&pathnode->locus);
pathnode->motionHazard = false;
pathnode->rescannable = true;
cost_valuesscan(pathnode, root, rel);
return pathnode;
}
/*
* create_ctescan_path
* Creates a path corresponding to a scan of a CTE,
* returning the pathnode.
*/
Path *
create_ctescan_path(PlannerInfo *root, RelOptInfo *rel, List *pathkeys)
{
Path *pathnode = makeNode(Path);
pathnode->pathtype = T_CteScan;
pathnode->parent = rel;
pathnode->pathkeys = pathkeys;
pathnode->locus = cdbpathlocus_from_subquery(root, rel->subplan, rel->relid);
/*
* We can't extract these two values from the subplan, so we simple set
* them to their worst case here.
*/
pathnode->motionHazard = true;
pathnode->rescannable = false;
cost_ctescan(pathnode, root, rel);
return pathnode;
}
/*
* cdb_jointype_to_join_in
* Returns JOIN_IN if the jointype should be changed from JOIN_INNER to
* JOIN_IN so as to produce at most one matching inner row per outer row.
* Else returns the given jointype.
*
* CDB TODO: Allow outer joins to use the JOIN_IN technique.
* CDB TODO: Occasionally, symmetric JOIN_IN might be useful (aka 'match join').
*/
static inline JoinType
cdb_jointype_to_join_in(RelOptInfo *joinrel, JoinType jointype, Path *inner_path)
{
CdbRelDedupInfo *dedup = joinrel->dedup_info;
if (dedup &&
dedup->spent_subqrelids &&
bms_is_subset(inner_path->parent->relids, dedup->spent_subqrelids) &&
!IsA(inner_path, UniquePath) &&
jointype == JOIN_INNER)
{
jointype = JOIN_IN;
}
return jointype;
} /* cdb_jointype_to_join_in */
static bool
path_contains_inner_index(Path *path)
{
if (IsA(path, IndexPath) &&
((IndexPath *)path)->isjoininner)
return true;
else if (IsA(path, BitmapHeapPath) &&
((BitmapHeapPath *)path)->isjoininner)
return true;
else if (IsA(path, BitmapAppendOnlyPath) &&
((BitmapAppendOnlyPath *)path)->isjoininner)
return true;
else if (IsA(path, AppendPath))
{
/* MPP-2377: Append paths may conceal inner-index scans, if
* any of the subpaths are indexpaths or bitmapheap-paths we
* have to do more checking */
ListCell *l;
/* scan the subpaths of the Append */
foreach(l, ((AppendPath *)path)->subpaths)
{
Path *subpath = (Path *)lfirst(l);
if (path_contains_inner_index(subpath))
return true;
}
}
return false;
}
/*
* create_nestloop_path
* Creates a pathnode corresponding to a nestloop join between two
* relations.
*
* 'joinrel' is the join relation.
* 'jointype' is the type of join required
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
*
* Returns the resulting path node.
*/
NestPath *
create_nestloop_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *mergeclause_list, /*CDB*/
List *pathkeys)
{
NestPath *pathnode;
CdbPathLocus join_locus;
bool inner_must_be_local = false;
/* CDB: Change jointype to JOIN_IN from JOIN_INNER (if eligible). */
if (joinrel->dedup_info)
jointype = cdb_jointype_to_join_in(joinrel, jointype, inner_path);
/* CDB: Inner indexpath must execute in the same backend as the
* nested join to receive input values from the outer rel.
*/
inner_must_be_local = path_contains_inner_index(inner_path);
/* Add motion nodes above subpaths and decide where to join. */
join_locus = cdbpath_motion_for_join(root,
jointype,
&outer_path, /* INOUT */
&inner_path, /* INOUT */
mergeclause_list,
pathkeys,
NIL,
false,
inner_must_be_local);
if (CdbPathLocus_IsNull(join_locus))
return NULL;
/* Outer might not be ordered anymore after motion. */
if (!outer_path->pathkeys)
pathkeys = NIL;
/*
* If outer has at most one row, NJ will make at most one pass over inner.
* Else materialize inner rel after motion so NJ can loop over results.
* Skip if an intervening Unique operator will materialize.
*/
if (!outer_path->parent->onerow)
{
if (!inner_path->rescannable)
{
/* NLs potentially rescan the inner; if our inner path
* isn't rescannable we have to add a materialize node */
inner_path = (Path *)create_material_path(root, inner_path->parent, inner_path);
/* If we have motion on the outer, to avoid a deadlock; we
* need to set cdb_strict. In order for materialize to
* fully fetch the underlying (required to avoid our
* deadlock hazard) we must set cdb_strict! */
if (inner_path->motionHazard && outer_path->motionHazard)
{
((MaterialPath *)inner_path)->cdb_strict = true;
inner_path->motionHazard = false;
}
}
}
pathnode = makeNode(NestPath);
pathnode->jpath.path.pathtype = T_NestLoop;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->jpath.path.pathkeys = pathkeys;
pathnode->jpath.path.locus = join_locus;
pathnode->jpath.path.motionHazard = outer_path->motionHazard || inner_path->motionHazard;
/* we're only as rescannable as our child plans */
pathnode->jpath.path.rescannable = outer_path->rescannable && inner_path->rescannable;
cost_nestloop(pathnode, root);
return pathnode;
}
/*
* create_mergejoin_path
* Creates a pathnode corresponding to a mergejoin join between
* two relations
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'outer_path' is the outer path
* 'inner_path' is the inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'pathkeys' are the path keys of the new join path
* 'mergeclauses' are the RestrictInfo nodes to use as merge clauses
* (this should be a subset of the restrict_clauses list)
* 'allmergeclauses' are the RestrictInfo nodes that are of the form
* required of merge clauses (equijoin between outer and inner rel).
* Consists of the ones to be used for merging ('mergeclauses') plus
* any others in 'restrict_clauses' that are to be applied after the
* merge. We use them for motion planning. (CDB)
* 'outersortkeys' are the sort varkeys for the outer relation
* or NIL to use existing ordering
* 'innersortkeys' are the sort varkeys for the inner relation
* or NIL to use existing ordering
*/
MergePath *
create_mergejoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *pathkeys,
List *mergeclauses,
List *allmergeclauses, /*CDB*/
List *outersortkeys,
List *innersortkeys)
{
MergePath *pathnode;
CdbPathLocus join_locus;
List *outermotionkeys;
List *innermotionkeys;
/* CDB: Change jointype to JOIN_IN from JOIN_INNER (if eligible). */
if (joinrel->dedup_info)
jointype = cdb_jointype_to_join_in(joinrel, jointype, inner_path);
/*
* Do subpaths have useful ordering?
*/
if (outersortkeys == NIL) /* must preserve existing ordering */
outermotionkeys = outer_path->pathkeys;
else if (pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
outermotionkeys = outersortkeys;/* lucky coincidence, already ordered */
else /* existing order useless; must sort */
outermotionkeys = NIL;
if (innersortkeys == NIL)
innermotionkeys = inner_path->pathkeys;
else if (pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
innermotionkeys = innersortkeys;
else
innermotionkeys = NIL;
/*
* Add motion nodes above subpaths and decide where to join.
*/
join_locus = cdbpath_motion_for_join(root,
jointype,
&outer_path, /* INOUT */
&inner_path, /* INOUT */
allmergeclauses,
outermotionkeys,
innermotionkeys,
outersortkeys == NIL,
innersortkeys == NIL);
if (CdbPathLocus_IsNull(join_locus))
return NULL;
/*
* Sort is not needed if subpath is already well enough ordered and a
* disordering motion node (with pathkeys == NIL) hasn't been added.
*/
if (outermotionkeys &&
outer_path->pathkeys)
outersortkeys = NIL;
if (innermotionkeys &&
inner_path->pathkeys)
innersortkeys = NIL;
/* If user doesn't want sort, but this MJ requires a sort, fail. */
if (!root->config->enable_sort &&
!root->config->mpp_trying_fallback_plan)
{
if (outersortkeys || innersortkeys)
return NULL;
}
pathnode = makeNode(MergePath);
/*
* If we are not sorting the inner path, we may need a materialize node to
* ensure it can be marked/restored. (Sort does support mark/restore, so
* no materialize is needed in that case.)
*
* Since the inner side must be ordered, and only Sorts and IndexScans can
* create order to begin with, you might think there's no problem --- but
* you'd be wrong. Nestloop and merge joins can *preserve* the order of
* their inputs, so they can be selected as the input of a mergejoin, and
* they don't support mark/restore at present.
*/
if (!ExecSupportsMarkRestore(inner_path->pathtype))
{
/*
* The inner side does not support mark/restore capability.
* Check whether a sort node will be inserted later, and if that is not the case,
* add a materialize node.
* */
bool need_sort = false;
if (innersortkeys != NIL)
{
/* Check whether all sort keys are constants. If that is the case,
* no sort node is needed.
* The check is essentially the same as the one performed later in the
* make_sort_from_pathkeys() function (optimizer/plan/createplan.c),
* which decides whether a sort node is to be added.
*/
ListCell *sortkeycell;
foreach(sortkeycell, innersortkeys)
{
List *keysublist = (List *) lfirst(sortkeycell);
if (!CdbPathkeyEqualsConstant(keysublist))
{
/* sort key is not a constant - sort will be added later */
need_sort = true;
break;
}
}
}
// else: innersortkeys == NIL -> no sort node will be added
if (!need_sort)
{
/* no sort node will be added - add a materialize node */
inner_path = (Path *)
create_material_path(root, inner_path->parent, inner_path);
}
}
pathnode->jpath.path.pathtype = T_MergeJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
pathnode->jpath.path.pathkeys = pathkeys;
pathnode->jpath.path.locus = join_locus;
pathnode->jpath.path.motionHazard = outer_path->motionHazard || inner_path->motionHazard;
pathnode->jpath.path.rescannable = outer_path->rescannable && inner_path->rescannable;
pathnode->path_mergeclauses = mergeclauses;
pathnode->outersortkeys = outersortkeys;
pathnode->innersortkeys = innersortkeys;
cost_mergejoin(pathnode, root);
return pathnode;
}
/*
* create_hashjoin_path
* Creates a pathnode corresponding to a hash join between two relations.
*
* 'joinrel' is the join relation
* 'jointype' is the type of join required
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
* 'freeze_outer_path' is true if the outer_path should be used exactly as
* given, without adding other operators such as Motion.
*/
HashPath *
create_hashjoin_path(PlannerInfo *root,
RelOptInfo *joinrel,
JoinType jointype,
Path *outer_path,
Path *inner_path,
List *restrict_clauses,
List *mergeclause_list, /*CDB*/
List *hashclauses,
bool freeze_outer_path)
{
HashPath *pathnode;
CdbPathLocus join_locus;
/* CDB: Change jointype to JOIN_IN from JOIN_INNER (if eligible). */
if (joinrel->dedup_info)
jointype = cdb_jointype_to_join_in(joinrel, jointype, inner_path);
/* Add motion nodes above subpaths and decide where to join. */
join_locus = cdbpath_motion_for_join(root,
jointype,
&outer_path, /* INOUT */
&inner_path, /* INOUT */
mergeclause_list,
NIL, /* don't care about ordering */
NIL,
freeze_outer_path,
false);
if (CdbPathLocus_IsNull(join_locus))
return NULL;
pathnode = makeNode(HashPath);
pathnode->jpath.path.pathtype = T_HashJoin;
pathnode->jpath.path.parent = joinrel;
pathnode->jpath.jointype = jointype;
pathnode->jpath.outerjoinpath = outer_path;
pathnode->jpath.innerjoinpath = inner_path;
pathnode->jpath.joinrestrictinfo = restrict_clauses;
/* A hashjoin never has pathkeys, since its ordering is unpredictable */
pathnode->jpath.path.pathkeys = NIL;
pathnode->jpath.path.locus = join_locus;
pathnode->path_hashclauses = hashclauses;
/*
* If hash table overflows to disk, and an ancestor node requests rescan
* (e.g. because the HJ is in the inner subtree of a NJ), then the HJ has
* to be redone, including rescanning the inner rel in order to rebuild
* the hash table.
*/
pathnode->jpath.path.rescannable = outer_path->rescannable && inner_path->rescannable;
/* see the comment above; we may have a motion hazard on our inner ?! */
if (pathnode->jpath.path.rescannable)
pathnode->jpath.path.motionHazard = outer_path->motionHazard;
else
pathnode->jpath.path.motionHazard = outer_path->motionHazard || inner_path->motionHazard;
cost_hashjoin(pathnode, root);
return pathnode;
}