src/backend/optimizer/path/joinpath.c - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*-------------------------------------------------------------------------
  *
  * joinpath.c
  *	  Routines to find all possible paths for processing a set of joins
  *
  * Portions Copyright (c) 2005-2008, Greenplum inc
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.107.2.1 2007/05/22 01:40:42 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include <math.h>

 #include "executor/nodeHash.h"                  /* ExecHashRowSize() */
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"

 #include "cdb/cdbpath.h"                        /* cdbpath_rows() */


 static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, List *mergeclause_list,
 					 JoinType jointype);
 static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
 					 RelOptInfo *outerrel, RelOptInfo *innerrel,
 					 List *restrictlist, List *mergeclause_list,
 					 JoinType jointype);
 static void
 hash_inner_and_outer(PlannerInfo *root,
                      RelOptInfo *joinrel,
                      Path *outerpath,
                      Path *innerpath,
                      List *restrictlist,
                      List *mergeclause_list,    /*CDB*/
                      List *hashclause_list,     /*CDB*/
                      JoinType jointype);
 static Path *best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 						 RelOptInfo *outer_rel, JoinType jointype);


 /*
  * add_paths_to_joinrel
  *	  Given a join relation and two component rels from which it can be made,
  *	  consider all possible paths that use the two component rels as outer
  *	  and inner rel respectively.  Add these paths to the join rel's pathlist
  *	  if they survive comparison with other paths (and remove any existing
  *	  paths that are dominated by these paths).
  *
  * Modifies the pathlist field of the joinrel node to contain the best
  * paths found so far.
  */
 void
 add_paths_to_joinrel(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 JoinType jointype,
 					 List *restrictlist)
 {
 	List	   *mergeclause_list = NIL;
     List       *hashclause_list;

     Assert(outerrel->pathlist &&
            outerrel->cheapest_startup_path &&
            outerrel->cheapest_total_path);
     Assert(innerrel->pathlist &&
            innerrel->cheapest_startup_path &&
            innerrel->cheapest_total_path);

     /*
 	 * Find potential mergejoin clauses.  We can skip this if we are not
 	 * interested in doing a mergejoin.  However, mergejoin is currently our
 	 * only way of implementing full outer joins, so override mergejoin
 	 * disable if it's a full join.
      *
      * CDB: Always build mergeclause_list.  We need it for motion planning.
 	 */
 	mergeclause_list = select_mergejoin_clauses(outerrel,
 												innerrel,
 												restrictlist,
 												jointype);

 	/*
 	 * 1. Consider mergejoin paths where both relations must be explicitly
 	 * sorted.
 	 */
 	if (root->config->enable_mergejoin ||
         root->config->mpp_trying_fallback_plan ||
         jointype == JOIN_FULL)
 	    sort_inner_and_outer(root, joinrel, outerrel, innerrel,
 						     restrictlist, mergeclause_list, jointype);

 	/*
 	 * 2. Consider paths where the outer relation need not be explicitly
 	 * sorted. This includes both nestloops and mergejoins where the outer
 	 * path is already ordered.
 	 */
 	match_unsorted_outer(root, joinrel, outerrel, innerrel,
 						 restrictlist, mergeclause_list, jointype);

 #ifdef NOT_USED

 	/*
 	 * 3. Consider paths where the inner relation need not be explicitly
 	 * sorted.	This includes mergejoins only (nestloops were already built in
 	 * match_unsorted_outer).
 	 *
 	 * Diked out as redundant 2/13/2000 -- tgl.  There isn't any really
 	 * significant difference between the inner and outer side of a mergejoin,
 	 * so match_unsorted_inner creates no paths that aren't equivalent to
 	 * those made by match_unsorted_outer when add_paths_to_joinrel() is
 	 * invoked with the two rels given in the other order.
 	 */
 	match_unsorted_inner(root, joinrel, outerrel, innerrel,
 						 restrictlist, mergeclause_list, jointype);
 #endif

 	/*
 	 * 4. Consider paths where both outer and inner relations must be hashed
 	 * before being joined.
      *
 	 * We consider both the cheapest-total-cost and cheapest-startup-cost
 	 * outer paths.  There's no need to consider any but the
 	 * cheapest-total-cost inner path, however.
 	 */
 	if (root->config->enable_hashjoin
 			|| root->config->mpp_trying_fallback_plan)
     {
 	    hashclause_list = hashclauses_for_join(restrictlist,
                                                outerrel,
                                                innerrel,
                                                jointype);
 		if (hashclause_list)
         {
             hash_inner_and_outer(root,
                                  joinrel,
                                  outerrel->cheapest_total_path,
                                  innerrel->cheapest_total_path,
 							     restrictlist,
                                  mergeclause_list,
                                  hashclause_list,
                                  jointype);
             if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
                 hash_inner_and_outer(root,
                                      joinrel,
                                      outerrel->cheapest_startup_path,
                                      innerrel->cheapest_total_path,
 							         restrictlist,
                                      mergeclause_list,
                                      hashclause_list,
                                      jointype);
         }
     }
 }

 /*
  * sort_inner_and_outer
  *	  Create mergejoin join paths by explicitly sorting both the outer and
  *	  inner join relations on each available merge ordering.
  *
  * 'joinrel' is the join relation
  * 'outerrel' is the outer join relation
  * 'innerrel' is the inner join relation
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *		clauses that apply to this join
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *		mergejoin clauses in this join
  * 'jointype' is the type of join to do
  */
 static void
 sort_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
 					 List *mergeclause_list,
 					 JoinType jointype)
 {
 	bool		useallclauses;
 	Path	   *outer_path;
 	Path	   *inner_path;
 	List	   *all_pathkeys;
 	ListCell   *l;

 	/*
 	 * If we are doing a right or full join, we must use *all* the
 	 * mergeclauses as join clauses, else we will not have a valid plan.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
 		case JOIN_LASJ:
 		case JOIN_LASJ_NOTIN:
 			useallclauses = false;
 			break;
 		case JOIN_RIGHT:
 		case JOIN_FULL:
 			useallclauses = true;
 			break;
 		default:
 			elog(ERROR, "unrecognized join type: %d",
 				 (int) jointype);
 			useallclauses = false;		/* keep compiler quiet */
 			break;
 	}

 	/*
 	 * We only consider the cheapest-total-cost input paths, since we are
 	 * assuming here that a sort is required.  We will consider
 	 * cheapest-startup-cost input paths later, and only if they don't need a
 	 * sort.
 	 */
 	outer_path = outerrel->cheapest_total_path;
 	inner_path = innerrel->cheapest_total_path;

 	/*
 	 * Each possible ordering of the available mergejoin clauses will generate
 	 * a differently-sorted result path at essentially the same cost.  We have
 	 * no basis for choosing one over another at this level of joining, but
 	 * some sort orders may be more useful than others for higher-level
 	 * mergejoins, so it's worth considering multiple orderings.
 	 *
 	 * Actually, it's not quite true that every mergeclause ordering will
 	 * generate a different path order, because some of the clauses may be
 	 * redundant.  Therefore, what we do is convert the mergeclause list to a
 	 * list of canonical pathkeys, and then consider different orderings of
 	 * the pathkeys.
 	 *
 	 * Generating a path for *every* permutation of the pathkeys doesn't seem
 	 * like a winning strategy; the cost in planning time is too high. For
 	 * now, we generate one path for each pathkey, listing that pathkey first
 	 * and the rest in random order.  This should allow at least a one-clause
 	 * mergejoin without re-sorting against any other possible mergejoin
 	 * partner path.  But if we've not guessed the right ordering of secondary
 	 * keys, we may end up evaluating clauses as qpquals when they could have
 	 * been done as mergeclauses. We need to figure out a better way.  (Two
 	 * possible approaches: look at all the relevant index relations to
 	 * suggest plausible sort orders, or make just one output path and somehow
 	 * mark it as having a sort-order that can be rearranged freely.)
 	 */
 	all_pathkeys = make_pathkeys_for_mergeclauses(root,
 												  mergeclause_list,
 												  outerrel);

 	foreach(l, all_pathkeys)
 	{
 		List	   *front_pathkey = (List *) lfirst(l);
 		List	   *cur_pathkeys;
 		List	   *cur_mergeclauses;
 		List	   *outerkeys;
 		List	   *innerkeys;
 		List	   *merge_pathkeys;

 		/* Make a pathkey list with this guy first. */
 		if (l != list_head(all_pathkeys))
 			cur_pathkeys = lcons(front_pathkey,
 								 list_delete_ptr(list_copy(all_pathkeys),
 												 front_pathkey));
 		else
 			cur_pathkeys = all_pathkeys;		/* no work at first one... */

 		/*
 		 * Select mergeclause(s) that match this sort ordering.  If we had
 		 * redundant merge clauses then we will get a subset of the original
 		 * clause list.  There had better be some match, however...
 		 */
 		cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
 														  cur_pathkeys,
 														  mergeclause_list);
 		Assert(cur_mergeclauses != NIL);

 		/* Forget it if can't use all the clauses in right/full join */
 		if (useallclauses &&
 			list_length(cur_mergeclauses) != list_length(mergeclause_list))
 			continue;

 		/*
 		 * Build sort pathkeys for both sides.
 		 *
 		 * Note: it's possible that the cheapest paths will already be sorted
 		 * properly.  create_mergejoin_path will detect that case and suppress
 		 * an explicit sort step, so we needn't do so here.
 		 */
 		outerkeys = make_pathkeys_for_mergeclauses(root,
 												   cur_mergeclauses,
 												   outerrel);
 		innerkeys = make_pathkeys_for_mergeclauses(root,
 												   cur_mergeclauses,
 												   innerrel);
 		/* Build pathkeys representing output sort order. */
 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
 											 outerkeys);

 		/*
 		 * And now we can make the path.
 		 */
 		add_path(root, joinrel, (Path *)
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
 									   outer_path,
 									   inner_path,
 									   restrictlist,
 									   merge_pathkeys,
 									   cur_mergeclauses,
                                        mergeclause_list,
 									   outerkeys,
 									   innerkeys));
 	}
 }


 /*
  * match_unsorted_outer
  *	  Creates possible join paths for processing a single join relation
  *	  'joinrel' by employing either iterative substitution or
  *	  mergejoining on each of its possible outer paths (considering
  *	  only outer paths that are already ordered well enough for merging).
  *
  * We always generate a nestloop path for each available outer path.
  * In fact we may generate as many as five: one on the cheapest-total-cost
  * inner path, one on the same with materialization, one on the
  * cheapest-startup-cost inner path (if different), one on the
  * cheapest-total inner-indexscan path (if any), and one on the
  * cheapest-startup inner-indexscan path (if different).
  *
  * We also consider mergejoins if mergejoin clauses are available.	We have
  * two ways to generate the inner path for a mergejoin: sort the cheapest
  * inner path, or use an inner path that is already suitably ordered for the
  * merge.  If we have several mergeclauses, it could be that there is no inner
  * path (or only a very expensive one) for the full list of mergeclauses, but
  * better paths exist if we truncate the mergeclause list (thereby discarding
  * some sort key requirements).  So, we consider truncations of the
  * mergeclause list as well as the full list.  (Ideally we'd consider all
  * subsets of the mergeclause list, but that seems way too expensive.)
  *
  * 'joinrel' is the join relation
  * 'outerrel' is the outer join relation
  * 'innerrel' is the inner join relation
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *		clauses that apply to this join
  * 'mergeclause_list' is a list of RestrictInfo nodes for available
  *		mergejoin clauses in this join
  * 'jointype' is the type of join to do
  */
 static void
 match_unsorted_outer(PlannerInfo *root,
 					 RelOptInfo *joinrel,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 List *restrictlist,
 					 List *mergeclause_list,
 					 JoinType jointype)
 {
 	bool		nestjoinOK;
 	bool		useallclauses;
 	Path	   *inner_cheapest_startup = innerrel->cheapest_startup_path;
 	Path	   *inner_cheapest_total = innerrel->cheapest_total_path;
 	Path	   *matpath = NULL;
 	Path	   *index_cheapest_startup = NULL;
 	Path	   *index_cheapest_total = NULL;
 	ListCell   *l;

 	/*
 	 * Nestloop only supports inner and left joins.  Also, if we are
 	 * doing a right or full join, we must use *all* the mergeclauses as join
 	 * clauses, else we will not have a valid plan.  (Although these two flags
 	 * are currently inverses, keep them separate for clarity and possible
 	 * future changes.)
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
 		case JOIN_LEFT:
 		case JOIN_LASJ:
 		case JOIN_LASJ_NOTIN:
 			nestjoinOK = true;
 			useallclauses = false;
 			break;
 		case JOIN_RIGHT:
 		case JOIN_FULL:
 			nestjoinOK = false;
 			useallclauses = true;
 			break;
 		default:
 			elog(ERROR, "unrecognized join type: %d",
 				 (int) jointype);
 			nestjoinOK = false; /* keep compiler quiet */
 			useallclauses = false;
 			break;
 	}

     if (!root->config->enable_nestloop
     		&& !root->config->mpp_trying_fallback_plan)
         nestjoinOK = false;

     /*
      * For NJ there may be some special inner paths we should try.
      */
     if (nestjoinOK)
 	{
         bool    materialize_inner = true;

         /*
          * Consider materializing the cheapest inner path unless it is
          * cheaply rescannable.
          */
 		if (IsA(inner_cheapest_total, UniquePath))
         {
             UniquePath *unique_path = (UniquePath *)inner_cheapest_total;

             if (unique_path->umethod == UNIQUE_PATH_SORT ||
                 unique_path->umethod == UNIQUE_PATH_HASH)
                 materialize_inner = false;
         }

         if (materialize_inner)
 			matpath = (Path *)create_material_path(root, innerrel, inner_cheapest_total);

 		/*
 		 * Get the best innerjoin indexpaths (if any) for this outer rel.
 		 * They're the same for all outer paths.
 		 */
 		if (innerrel->reloptkind != RELOPT_JOINREL)
 		{
 			if (IsA(inner_cheapest_total, AppendPath))
 				index_cheapest_total = best_appendrel_indexscan(root,
 																innerrel,
 																outerrel,
 																jointype);
 			else if (innerrel->rtekind == RTE_RELATION)
 				best_inner_indexscan(root, innerrel, outerrel, jointype,
 									 &index_cheapest_startup,
 									 &index_cheapest_total);
 		}
 	}

 	foreach(l, outerrel->pathlist)
 	{
 		Path	   *outerpath = (Path *) lfirst(l);
 		List	   *merge_pathkeys;
 		List	   *mergeclauses;
 		List	   *innersortkeys;
 		List	   *trialsortkeys;
 		Path	   *cheapest_startup_inner;
 		Path	   *cheapest_total_inner;
 		int			num_sortkeys;
 		int			sortkeycnt;

 		/*
 		 * The result will have this sort order (even if it is implemented as
 		 * a nestloop, and even if some of the mergeclauses are implemented by
 		 * qpquals rather than as true mergeclauses):
 		 */
 		merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
 											 outerpath->pathkeys);

         /*
          * Consider nested joins.
          */
 		if (nestjoinOK)
 		{
 			/*
 			 * Always consider a nestloop join with this outer and
 			 * cheapest-total-cost inner.  When appropriate, also consider
 			 * using the materialized form of the cheapest inner, the
 			 * cheapest-startup-cost inner path, and the cheapest innerjoin
 			 * indexpaths.
 			 */
 	        add_path(root, joinrel, (Path *)
 					 create_nestloop_path(root,
 										  joinrel,
 										  jointype,
 										  outerpath,
 										  inner_cheapest_total,
 										  restrictlist,
                                           mergeclause_list,         /*CDB*/
 										  merge_pathkeys));
 			if (matpath != NULL)
 		        add_path(root, joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
 											  matpath,
 											  restrictlist,
                                               mergeclause_list,     /*CDB*/
 											  merge_pathkeys));
 			if (inner_cheapest_startup != inner_cheapest_total)
 		        add_path(root, joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
 											  inner_cheapest_startup,
 											  restrictlist,
                                               mergeclause_list,     /*CDB*/
 											  merge_pathkeys));
 			if (index_cheapest_total != NULL)
 		        add_path(root, joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
 											  index_cheapest_total,
 											  restrictlist,
                                               mergeclause_list,     /*CDB*/
 											  merge_pathkeys));
 			if (index_cheapest_startup != NULL &&
 				index_cheapest_startup != index_cheapest_total)
 				add_path(root, joinrel, (Path *)
 						 create_nestloop_path(root,
 											  joinrel,
 											  jointype,
 											  outerpath,
 											  index_cheapest_startup,
 											  restrictlist,
                                               mergeclause_list,
 											  merge_pathkeys));
 		}

 		/* Can't do anything else if outer path needs to be unique'd */
 		if (IsA(outerpath, UniquePath))     /* CDB - is this still needed? */
 			continue;

 		/* Look for useful mergeclauses (if any) */
 		mergeclauses = find_mergeclauses_for_pathkeys(root,
 													  outerpath->pathkeys,
 													  mergeclause_list);

 		/*
 		 * Done with this outer path if no chance for a mergejoin.
 		 *
 		 * Special corner case: for "x FULL JOIN y ON true", there will be no
 		 * join clauses at all.  Ordinarily we'd generate a clauseless
 		 * nestloop path, but since mergejoin is our only join type that
 		 * supports FULL JOIN, it's necessary to generate a clauseless
 		 * mergejoin path instead.
 		 */
 		if (mergeclauses == NIL
 				|| (!root->config->enable_mergejoin
 						&& !root->config->mpp_trying_fallback_plan)
 			)
 		{
 			if (jointype == JOIN_FULL)
 				 /* okay to try for mergejoin */ ;
 			else
 				continue;
 		}
 		if (useallclauses && list_length(mergeclauses) != list_length(mergeclause_list))
 			continue;

 		/* Compute the required ordering of the inner path */
 		innersortkeys = make_pathkeys_for_mergeclauses(root,
 													   mergeclauses,
 													   innerrel);

 		/*
 		 * Generate a mergejoin on the basis of sorting the cheapest inner.
 		 * Since a sort will be needed, only cheapest total cost matters. (But
 		 * create_mergejoin_path will do the right thing if
 		 * inner_cheapest_total is already correctly sorted.)
 		 */
 		add_path(root, joinrel, (Path *)
 				 create_mergejoin_path(root,
 									   joinrel,
 									   jointype,
 									   outerpath,
 									   inner_cheapest_total,
 									   restrictlist,
 									   merge_pathkeys,
 									   mergeclauses,
                                        mergeclause_list,    /*CDB*/
 									   NIL,
 									   innersortkeys));

 		/* Can't do anything else if inner path needs to be unique'd */
 		if (IsA(inner_cheapest_total, UniquePath))  /* CDB - still need this? */
 			continue;

 		/*
 		 * Look for presorted inner paths that satisfy the innersortkey list
 		 * --- or any truncation thereof, if we are allowed to build a
 		 * mergejoin using a subset of the merge clauses.  Here, we consider
 		 * both cheap startup cost and cheap total cost.  We can ignore
 		 * inner_cheapest_total on the first iteration, since we already made
 		 * a path with it --- but not on later iterations with shorter sort
 		 * keys, because then we are considering a different situation, viz
 		 * using a simpler mergejoin to avoid a sort of the inner rel.
 		 */
 		num_sortkeys = list_length(innersortkeys);
 		if (num_sortkeys > 1 && !useallclauses)
 			trialsortkeys = list_copy(innersortkeys);	/* need modifiable copy */
 		else
 			trialsortkeys = innersortkeys;		/* won't really truncate */
 		cheapest_startup_inner = NULL;
 		cheapest_total_inner = NULL;

 		for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
 		{
 			Path	   *innerpath;
 			List	   *newclauses = NIL;

 			/*
 			 * Look for an inner path ordered well enough for the first
 			 * 'sortkeycnt' innersortkeys.	NB: trialsortkeys list is modified
 			 * destructively, which is why we made a copy...
 			 */
 			trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
 			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
 													   trialsortkeys,
 													   TOTAL_COST);
 			if (innerpath != NULL &&
 				(innerpath != inner_cheapest_total ||
 				 sortkeycnt < num_sortkeys) &&
 				(cheapest_total_inner == NULL ||
 				 compare_path_costs(innerpath, cheapest_total_inner,
 									TOTAL_COST) < 0))
 			{
 				/* Found a cheap (or even-cheaper) sorted path */
 				/* Select the right mergeclauses, if we didn't already */
 				if (sortkeycnt < num_sortkeys)
 				{
 					newclauses =
 						find_mergeclauses_for_pathkeys(root,
 													   trialsortkeys,
 													   mergeclauses);
 					Assert(newclauses != NIL);
 				}
 				else
 					newclauses = mergeclauses;
 				add_path(root, joinrel, (Path *)
 						 create_mergejoin_path(root,
 											   joinrel,
 											   jointype,
 											   outerpath,
 											   innerpath,
 											   restrictlist,
 											   merge_pathkeys,
 											   newclauses,
                                                mergeclause_list,    /*CDB*/
 											   NIL,
 											   NIL));
 				cheapest_total_inner = innerpath;
 			}
 			/* Same on the basis of cheapest startup cost ... */
 			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
 													   trialsortkeys,
 													   STARTUP_COST);
 			if (innerpath != NULL &&
 				(innerpath != inner_cheapest_total ||
 				 sortkeycnt < num_sortkeys) &&
 				(cheapest_startup_inner == NULL ||
 				 compare_path_costs(innerpath, cheapest_startup_inner,
 									STARTUP_COST) < 0))
 			{
 				/* Found a cheap (or even-cheaper) sorted path */
 				if (innerpath != cheapest_total_inner)
 				{
 					/*
 					 * Avoid rebuilding clause list if we already made one;
 					 * saves memory in big join trees...
 					 */
 					if (newclauses == NIL)
 					{
 						if (sortkeycnt < num_sortkeys)
 						{
 							newclauses =
 								find_mergeclauses_for_pathkeys(root,
 															   trialsortkeys,
 															   mergeclauses);
 							Assert(newclauses != NIL);
 						}
 						else
 							newclauses = mergeclauses;
 					}
 					add_path(root, joinrel, (Path *)
 							 create_mergejoin_path(root,
 												   joinrel,
 												   jointype,
 												   outerpath,
 												   innerpath,
 												   restrictlist,
 												   merge_pathkeys,
 												   newclauses,
                                                    mergeclause_list,    /*CDB*/
 												   NIL,
 												   NIL));
 				}
 				cheapest_startup_inner = innerpath;
 			}

 			/*
 			 * Don't consider truncated sortkeys if we need all clauses.
 			 */
 			if (useallclauses)
 				break;
 		}
 	}
 }

 List *
 hashclauses_for_join(List *restrictlist,
 					 RelOptInfo *outerrel,
 					 RelOptInfo *innerrel,
 					 JoinType jointype)
 {
 	bool		isouterjoin;
 	ListCell	*l;
 	List		*clauses = NIL;

 	/*
 	 * Hash only supports inner and left joins.
 	 */
 	switch (jointype)
 	{
 		case JOIN_INNER:
 			isouterjoin = false;
 			break;
 		case JOIN_LEFT:
 		case JOIN_LASJ:
 		case JOIN_LASJ_NOTIN:
 			isouterjoin = true;
 			break;
 		default:
 			return NULL;
 	}

 	/*
 	 * We need to build only one hashpath for any given pair of outer and
 	 * inner relations; all of the hashable clauses will be used as keys.
 	 *
 	 * Scan the join's restrictinfo list to find hashjoinable clauses that are
 	 * usable with this pair of sub-relations.
 	 */
 	foreach(l, restrictlist)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

 		if (!restrictinfo->can_join ||
 			restrictinfo->hashjoinoperator == InvalidOid)
 			continue;			/* not hashjoinable */

 		/*
 		 * If processing an outer join, only use its own join clauses for
 		 * hashing.  For inner joins we need not be so picky.
 		 */
 		if (isouterjoin && restrictinfo->is_pushed_down)
 			continue;

 		/*
 		 * Check if clause is usable with these input rels.
 		 */
 		if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
 			bms_is_subset(restrictinfo->right_relids, innerrel->relids))
 		{
 			/* righthand side is inner */
 		}
 		else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
 				 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
 		{
 			/* lefthand side is inner */
 		}
 		else
 			continue;			/* no good for these input relations */

 		clauses = lappend(clauses, restrictinfo);
 	}

 	return clauses;
 }

 /*
  * hash_inner_and_outer
  *	  Create hashjoin join paths by explicitly hashing both the outer and
  *	  inner keys of each available hash clause.
  *
  * 'joinrel' is the join relation
  * 'restrictlist' contains all of the RestrictInfo nodes for restriction
  *		clauses that apply to this join
  * 'jointype' is the type of join to do
  */
 static void
 hash_inner_and_outer(PlannerInfo *root,
 					 RelOptInfo *joinrel,
                      Path *outerpath,
                      Path *innerpath,
 					 List *restrictlist,
                      List *mergeclause_list,    /*CDB*/
                      List *hashclause_list,     /*CDB*/
 					 JoinType jointype)
 {
     HashPath   *hjpath;

 	/*
 	 * Hashjoin only supports inner, left  and anti joins.
 	 */
     Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT || jointype == JOIN_LASJ || jointype == JOIN_LASJ_NOTIN);
     Assert(hashclause_list);

     /*
      * Consider hash join between the given outer path and inner path.
      */
     hjpath = create_hashjoin_path(root,
 								  joinrel,
 								  jointype,
 								  outerpath,
 								  innerpath,
 								  restrictlist,
                                   mergeclause_list,
 								  hashclause_list,
                                   false);
     if (!hjpath)
         return;

     /*
      * CDB: If gp_enable_hashjoin_size_heuristic is set, disallow inner
      * joins where the inner rel is the larger of the two inputs.
      *
      * Note create_hashjoin_path() has to precede this so we can get
      * the right jointype (in case of subquery dedup) and row count
      * (in case Broadcast Motion is inserted above an input path).
      */
     if (hjpath->jpath.jointype == JOIN_INNER &&
     		root->config->gp_enable_hashjoin_size_heuristic &&
         !root->config->mpp_trying_fallback_plan)
     {
         double  outersize = ExecHashRowSize(outerpath->parent->width) *
                                 cdbpath_rows(root, hjpath->jpath.outerjoinpath);
         double  innersize = ExecHashRowSize(innerpath->parent->width) *
                                 cdbpath_rows(root, hjpath->jpath.innerjoinpath);

         if (outersize >= innersize)
             add_path(root, joinrel, (Path *)hjpath);
     }
     else
         add_path(root, joinrel, (Path *)hjpath);
 }                               /* hash_inner_and_outer */


 /*
  * best_appendrel_indexscan
  *	  Finds the best available set of inner indexscans for a nestloop join
  *	  with the given append relation on the inside and the given outer_rel
  *	  outside.	Returns an AppendPath comprising the best inner scans, or
  *	  NULL if there are no possible inner indexscans.
  *
  * Note that we currently consider only cheapest-total-cost.  It's not
  * very clear what cheapest-startup-cost might mean for an AppendPath.
  */
 static Path *
 best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
 						 RelOptInfo *outer_rel, JoinType jointype)
 {
 	Index       parentRTindex = rel->relid;
 	List	   *append_paths = NIL;
 	bool		found_indexscan = false;
 	ListCell   *l;

 	foreach(l, root->append_rel_list)
 	{
 		AppendRelInfo *appinfo = (AppendRelInfo *) lfirst(l);
 		Index       childRTindex;
 		RelOptInfo *childrel;
 		Path	   *index_cheapest_startup;
 		Path	   *index_cheapest_total;

 		/* append_rel_list contains all append rels; ignore others */
 		if (appinfo->parent_relid != parentRTindex)
 			continue;

 		childRTindex = appinfo->child_relid;
 		childrel = find_base_rel(root, childRTindex);
 		Assert(childrel->reloptkind == RELOPT_OTHER_MEMBER_REL);

 		/*
 		 * Check to see if child was rejected by constraint exclusion. If so,
 		 * it will have a cheapest_total_path that's an Append path with no
 		 * members (see set_plain_rel_pathlist).
 		 */
 		if (IsA(childrel->cheapest_total_path, AppendPath) &&
 			((AppendPath *) childrel->cheapest_total_path)->subpaths == NIL)
 			continue;			/* OK, we can ignore it */

 		/*
 		 * Get the best innerjoin indexpaths (if any) for this child rel.
 		 */
 		best_inner_indexscan(root, childrel, outer_rel, jointype,
 							 &index_cheapest_startup, &index_cheapest_total);

 		/*
 		 * If no luck on an indexpath for this rel, we'll still consider an
 		 * Append substituting the cheapest-total inner path.  However we must
 		 * find at least one indexpath, else there's not going to be any
 		 * improvement over the base path for the appendrel.
 		 */
 		if (index_cheapest_total)
 			found_indexscan = true;
 		else
 			index_cheapest_total = childrel->cheapest_total_path;

 		append_paths = lappend(append_paths, index_cheapest_total);
 	}

 	if (!found_indexscan)
 		return NULL;

 	/* Form and return the completed Append path. */
 	return (Path *) create_append_path(root, rel, append_paths);
 }

 /*
  * select_mergejoin_clauses
  *	  Select mergejoin clauses that are usable for a particular join.
  *	  Returns a list of RestrictInfo nodes for those clauses.
  *
  * We examine each restrictinfo clause known for the join to see
  * if it is mergejoinable and involves vars from the two sub-relations
  * currently of interest.
  */
 List *
 select_mergejoin_clauses(RelOptInfo *outerrel,
 						 RelOptInfo *innerrel,
 						 List *restrictlist,
 						 JoinType jointype)
 {
 	List	   *result_list = NIL;
 	bool		isouterjoin = IS_OUTER_JOIN(jointype);
 	bool		have_nonmergeable_joinclause = false;
 	ListCell   *l;

 	foreach(l, restrictlist)
 	{
 		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

 		/*
 		 * If processing an outer join, only use its own join clauses in the
 		 * merge.  For inner joins we can use pushed-down clauses too. (Note:
 		 * we don't set have_nonmergeable_joinclause here because pushed-down
 		 * clauses will become otherquals not joinquals.)
 		 */
 		if (isouterjoin && restrictinfo->is_pushed_down)
 			continue;

 		if (!restrictinfo->can_join ||
 			restrictinfo->mergejoinoperator == InvalidOid)
 		{
 			have_nonmergeable_joinclause = true;
 			continue;			/* not mergejoinable */
 		}

 		/*
 		 * Check if clause is usable with these input rels.  All the vars
 		 * needed on each side of the clause must be available from one or the
 		 * other of the input rels.
 		 */
 		if (bms_is_subset(restrictinfo->left_relids, outerrel->relids) &&
 			bms_is_subset(restrictinfo->right_relids, innerrel->relids))
 		{
 			/* righthand side is inner */
 		}
 		else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
 				 bms_is_subset(restrictinfo->right_relids, outerrel->relids))
 		{
 			/* lefthand side is inner */
 		}
 		else
 		{
 			have_nonmergeable_joinclause = true;
 			continue;			/* no good for these input relations */
 		}

 		result_list = lcons(restrictinfo, result_list);
 	}

 	/*
 	 * If it is a right/full join then *all* the explicit join clauses must be
 	 * mergejoinable, else the executor will fail. If we are asked for a right
 	 * join then just return NIL to indicate no mergejoin is possible (we can
 	 * handle it as a left join instead). If we are asked for a full join then
 	 * emit an error, because there is no fallback.
 	 */
 	if (have_nonmergeable_joinclause)
 	{
 		switch (jointype)
 		{
 			case JOIN_RIGHT:
 				return NIL;		/* not mergejoinable */
 			case JOIN_FULL:
 				ereport(ERROR,
 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 						 errmsg("FULL JOIN is only supported with merge-joinable join conditions")));
 				break;
 			default:
 				/* otherwise, it's OK to have nonmergeable join quals */
 				break;
 		}
 	}

 	return result_list;
 }