src/backend/optimizer/plan/planagg.c - hawq - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*-------------------------------------------------------------------------
  *
  * planagg.c
  *	  Special planning for aggregate queries.
  *
  * Portions Copyright (c) 2006-2008, Greenplum inc
  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.22.2.1 2007/02/06 06:50:33 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"

 #include "catalog/pg_aggregate.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/planmain.h"
 #include "optimizer/subselect.h"
 #include "parser/parse_clause.h"
 #include "parser/parse_expr.h"
 #include "parser/parsetree.h"
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"

 #include "cdb/cdbllize.h"                   /* pull_up_Flow() */


 typedef struct
 {
 	Oid			aggfnoid;		/* pg_proc Oid of the aggregate */
 	Oid			aggsortop;		/* Oid of its sort operator */
 	Expr	   *target;			/* expression we are aggregating on */
 	IndexPath  *path;			/* access path for index scan */
 	Cost		pathcost;		/* estimated cost to fetch first row */
 	Param	   *param;			/* param for subplan's output */
 } MinMaxAggInfo;

 static bool find_minmax_aggs_walker(Node *node, List **context);
 static bool build_minmax_path(PlannerInfo *root, RelOptInfo *rel,
 				  MinMaxAggInfo *info);
 static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
 					   IndexOptInfo *index, int indexcol);
 static void make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info);
 static Node *replace_aggs_with_params_mutator(Node *node, List **context);
 static Oid	fetch_agg_sort_op(Oid aggfnoid);


 /*
  * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
  *
  * This checks to see if we can replace MIN/MAX aggregate functions by
  * subqueries of the form
  *		(SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
  * Given a suitable index on tab.col, this can be much faster than the
  * generic scan-all-the-rows plan.
  *
  * We are passed the preprocessed tlist, and the best path
  * devised for computing the input of a standard Agg node.	If we are able
  * to optimize all the aggregates, and the result is estimated to be cheaper
  * than the generic aggregate method, then generate and return a Plan that
  * does it that way.  Otherwise, return NULL.
  */
 Plan *
 optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path)
 {
 	Query	   *parse = root->parse;
 	FromExpr   *jtnode;
 	RangeTblRef *rtr;
 	RangeTblEntry *rte;
 	RelOptInfo *rel;
 	List	   *aggs_list;
 	ListCell   *l;
 	Cost		total_cost;
 	Path		agg_p;
 	Plan	   *plan;
 	Node	   *hqual;
 	QualCost	tlist_cost;

 	/* Nothing to do if query has no aggregates */
 	if (!parse->hasAggs)
 		return NULL;

 	Assert(!parse->setOperations);		/* shouldn't get here if a setop */
 	Assert(parse->rowMarks == NIL);		/* nor if FOR UPDATE */

 	/*
 	 * Reject unoptimizable cases.
 	 *
 	 * We don't handle GROUP BY, because our current implementations of
 	 * grouping require looking at all the rows anyway, and so there's not
 	 * much point in optimizing MIN/MAX.
 	 */
 	if (parse->groupClause)
 		return NULL;

 	/*
 	 * We also restrict the query to reference exactly one table, since join
 	 * conditions can't be handled reasonably.  (We could perhaps handle a
 	 * query containing cartesian-product joins, but it hardly seems worth the
 	 * trouble.)  However, the single real table could be buried in several
 	 * levels of FromExpr.
 	 */
 	jtnode = parse->jointree;
 	while (IsA(jtnode, FromExpr))
 	{
 		if (list_length(jtnode->fromlist) != 1)
 			return NULL;
 		jtnode = linitial(jtnode->fromlist);
 	}
 	if (!IsA(jtnode, RangeTblRef))
 		return NULL;
 	rtr = (RangeTblRef *) jtnode;
 	rte = rt_fetch(rtr->rtindex, parse->rtable);
 	if (rte->rtekind != RTE_RELATION || rte->inh)
 		return NULL;
 	rel = find_base_rel(root, rtr->rtindex);

 	/*
 	 * Since this optimization is not applicable all that often, we want to
 	 * fall out before doing very much work if possible.  Therefore we do the
 	 * work in several passes.	The first pass scans the tlist and HAVING qual
 	 * to find all the aggregates and verify that each of them is a MIN/MAX
 	 * aggregate.  If that succeeds, the second pass looks at each aggregate
 	 * to see if it is optimizable; if so we make an IndexPath describing how
 	 * we would scan it.  (We do not try to optimize if only some aggs are
 	 * optimizable, since that means we'll have to scan all the rows anyway.)
 	 * If that succeeds, we have enough info to compare costs against the
 	 * generic implementation. Only if that test passes do we build a Plan.
 	 */

 	/* Pass 1: find all the aggregates */
 	aggs_list = NIL;
 	if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
 		return NULL;
 	if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
 		return NULL;

 	/* Pass 2: see if each one is optimizable */
 	total_cost = 0;
 	foreach(l, aggs_list)
 	{
 		MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);

 		if (!build_minmax_path(root, rel, info))
 			return NULL;
 		total_cost += info->pathcost;
 	}

 	/*
 	 * Make the cost comparison.
 	 *
 	 * Note that we don't include evaluation cost of the tlist here; this is
 	 * OK since it isn't included in best_path's cost either, and should be
 	 * the same in either case.
 	 */
 	cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
 			 0, 0,
 			 best_path->startup_cost, best_path->total_cost,
 			 best_path->parent->rows, 0.0, 0.0, 0.0, false);

 	if (total_cost > agg_p.total_cost)
 		return NULL;			/* too expensive */

 	/*
 	 * OK, we are going to generate an optimized plan.
 	 */

 	/* Pass 3: generate subplans and output Param nodes */
 	foreach(l, aggs_list)
 	{
 		make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l));
 	}

 	/*
 	 * Modify the targetlist and HAVING qual to reference subquery outputs
 	 */
 	tlist = (List *) replace_aggs_with_params_mutator((Node *) tlist,
 													  &aggs_list);
 	hqual = replace_aggs_with_params_mutator(parse->havingQual,
 											 &aggs_list);

 	/*
 	 * Generate the output plan --- basically just a Result
 	 */
 	plan = (Plan *) make_result(tlist, hqual, NULL);

 	/* Account for evaluation cost of the tlist (make_result did the rest) */
 	cost_qual_eval(&tlist_cost, tlist, root);
 	plan->startup_cost += tlist_cost.startup;
 	plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;

 	return plan;
 }

 /*
  * find_minmax_aggs_walker
  *		Recursively scan the Aggref nodes in an expression tree, and check
  *		that each one is a MIN/MAX aggregate.  If so, build a list of the
  *		distinct aggregate calls in the tree.
  *
  * Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
  * (This seemingly-backward definition is used because expression_tree_walker
  * aborts the scan on TRUE return, which is what we want.)
  *
  * Found aggregates are added to the list at *context; it's up to the caller
  * to initialize the list to NIL.
  *
  * This does not descend into subqueries, and so should be used only after
  * reduction of sublinks to subplans.  There mustn't be outer-aggregate
  * references either.
  */
 static bool
 find_minmax_aggs_walker(Node *node, List **context)
 {
 	if (node == NULL)
 		return false;
 	if (IsA(node, Aggref))
 	{
 		Aggref	   *aggref = (Aggref *) node;
 		Oid			aggsortop;
 		Expr	   *curTarget;
 		MinMaxAggInfo *info;
 		ListCell   *l;

 		Assert(aggref->agglevelsup == 0);
 		if (list_length(aggref->args) != 1)
 			return true;		/* it couldn't be MIN/MAX */
 		/* note: we do not care if DISTINCT is mentioned ... */

 		aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
 		if (!OidIsValid(aggsortop))
 			return true;		/* not a MIN/MAX aggregate */

 		/*
 		 * Check whether it's already in the list, and add it if not.
 		 */
 		curTarget = linitial(aggref->args);
 		foreach(l, *context)
 		{
 			info = (MinMaxAggInfo *) lfirst(l);
 			if (info->aggfnoid == aggref->aggfnoid &&
 				equal(info->target, curTarget))
 				return false;
 		}

 		info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
 		info->aggfnoid = aggref->aggfnoid;
 		info->aggsortop = aggsortop;
 		info->target = curTarget;

 		*context = lappend(*context, info);

 		/*
 		 * We need not recurse into the argument, since it can't contain any
 		 * aggregates.
 		 */
 		return false;
 	}
 	Assert(!IsA(node, SubLink));
 	return expression_tree_walker(node, find_minmax_aggs_walker,
 								  (void *) context);
 }

 /*
  * build_minmax_path
  *		Given a MIN/MAX aggregate, try to find an index it can be optimized
  *		with.  Build a Path describing the best such index path.
  *
  * Returns TRUE if successful, FALSE if not.  In the TRUE case, info->path
  * is filled in.
  *
  * XXX look at sharing more code with indxpath.c.
  *
  * Note: check_partial_indexes() must have been run previously.
  */
 static bool
 build_minmax_path(PlannerInfo *root, RelOptInfo *rel, MinMaxAggInfo *info)
 {
 	IndexPath  *best_path = NULL;
 	Cost		best_cost = 0;
 	ListCell   *l;

 	foreach(l, rel->indexlist)
 	{
 		IndexOptInfo *index = (IndexOptInfo *) lfirst(l);
 		ScanDirection indexscandir = NoMovementScanDirection;
 		int			indexcol;
 		int			prevcol;
 		List	   *restrictclauses;
 		IndexPath  *new_path;
 		Cost		new_cost;
 		bool		found_clause;

 		/* Ignore non-btree indexes */
 		if (index->relam != BTREE_AM_OID)
 			continue;

 		/* Ignore partial indexes that do not match the query */
 		if (index->indpred != NIL && !index->predOK)
 			continue;

 		/*
 		 * Look for a match to one of the index columns.  (In a stupidly
 		 * designed index, there could be multiple matches, but we only care
 		 * about the first one.)
 		 */
 		for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
 		{
 			indexscandir = match_agg_to_index_col(info, index, indexcol);
 			if (!ScanDirectionIsNoMovement(indexscandir))
 				break;
 		}
 		if (ScanDirectionIsNoMovement(indexscandir))
 			continue;

 		/*
 		 * If the match is not at the first index column, we have to verify
 		 * that there are "x = something" restrictions on all the earlier
 		 * index columns.  Since we'll need the restrictclauses list anyway to
 		 * build the path, it's convenient to extract that first and then look
 		 * through it for the equality restrictions.
 		 */
 		restrictclauses = group_clauses_by_indexkey(index,
 												index->rel->baserestrictinfo,
 													NIL,
 													NULL,
 													SAOP_FORBID,
 													&found_clause);

 		if (list_length(restrictclauses) < indexcol)
 			continue;			/* definitely haven't got enough */
 		for (prevcol = 0; prevcol < indexcol; prevcol++)
 		{
 			List	   *rinfos = (List *) list_nth(restrictclauses, prevcol);
 			ListCell   *ll;

 			foreach(ll, rinfos)
 			{
 				RestrictInfo *rinfo = (RestrictInfo *) lfirst(ll);
 				int			strategy;

 				Assert(is_opclause(rinfo->clause));
 				strategy =
 					get_op_opclass_strategy(((OpExpr *) rinfo->clause)->opno,
 											index->classlist[prevcol]);
 				if (strategy == BTEqualStrategyNumber)
 					break;
 			}
 			if (ll == NULL)
 				break;			/* none are Equal for this index col */
 		}
 		if (prevcol < indexcol)
 			continue;			/* didn't find all Equal clauses */

 		/*
 		 * Build the access path.  We don't bother marking it with pathkeys.
 		 */
 		new_path = create_index_path(root, index,
 									 restrictclauses,
 									 NIL,
 									 indexscandir,
 									 NULL);

 		/*
 		 * Estimate actual cost of fetching just one row.
 		 */
 		if (new_path->rows > 1.0)
 			new_cost = new_path->path.startup_cost +
 				(new_path->path.total_cost - new_path->path.startup_cost)
 				* 1.0 / new_path->rows;
 		else
 			new_cost = new_path->path.total_cost;

 		/*
 		 * Keep if first or if cheaper than previous best.
 		 */
 		if (best_path == NULL || new_cost < best_cost)
 		{
 			best_path = new_path;
 			best_cost = new_cost;
 		}
 	}

 	info->path = best_path;
 	info->pathcost = best_cost;
 	return (best_path != NULL);
 }

 /*
  * match_agg_to_index_col
  *		Does an aggregate match an index column?
  *
  * It matches if its argument is equal to the index column's data and its
  * sortop is either the LessThan or GreaterThan member of the column's opclass.
  *
  * We return ForwardScanDirection if match the LessThan member,
  * BackwardScanDirection if match the GreaterThan member,
  * and NoMovementScanDirection if there's no match.
  */
 static ScanDirection
 match_agg_to_index_col(MinMaxAggInfo *info, IndexOptInfo *index, int indexcol)
 {
 	int			strategy;

 	/* Check for data match */
 	if (!match_index_to_operand((Node *) info->target, indexcol, index))
 		return NoMovementScanDirection;

 	/* Look up the operator in the opclass */
 	strategy = get_op_opclass_strategy(info->aggsortop,
 									   index->classlist[indexcol]);
 	if (strategy == BTLessStrategyNumber)
 		return ForwardScanDirection;
 	if (strategy == BTGreaterStrategyNumber)
 		return BackwardScanDirection;
 	return NoMovementScanDirection;
 }

 /*
  * Construct a suitable plan for a converted aggregate query
  */
 static void
 make_agg_subplan(PlannerInfo *root, MinMaxAggInfo *info)
 {
 	PlannerInfo subroot;
 	Query	   *subparse;
 	Plan	   *plan;
 	Plan	   *iplan;
 	TargetEntry *tle;
 	SortClause *sortcl;
 	NullTest   *ntest;

 	/*
 	 * Generate a suitably modified query.	Much of the work here is probably
 	 * unnecessary in the normal case, but we want to make it look good if
 	 * someone tries to EXPLAIN the result.
 	 */
 	memcpy(&subroot, root, sizeof(PlannerInfo));
 	subroot.parse = subparse = (Query *) copyObject(root->parse);
 	subparse->commandType = CMD_SELECT;
 	subparse->resultRelation = 0;
 	subparse->resultRelations = NIL;
 	subparse->returningLists = NIL;
 	subparse->intoClause = NULL;
 	subparse->hasAggs = false;
 	subparse->groupClause = NIL;
 	subparse->havingQual = NULL;
 	subparse->distinctClause = NIL;
 	subroot.hasHavingQual = false;

 	/* TODO Should we also generate a "temporary" root as in,
 	 *       e.g., inheritance planning?
 	 *
 	 * Generate modified query with this rel as target.  We have to be
 	 * prepared to translate varnos in in_info_list as well as in the
 	 * Query proper.
 	 *
 	memcpy(&subroot, root, sizeof(PlannerInfo));
 	subroot.parse = (Query *)
 	adjust_appendrel_attrs((Node *) parse,
 						   appinfo);
 	subroot.in_info_list = (List *)
 	adjust_appendrel_attrs((Node *) root->in_info_list,
 						   appinfo);
 	// There shouldn't be any OJ info to translate, as yet
 	Assert(subroot.oj_info_list == NIL);

 	subroot->resultRelations = NIL;
 	subroot->returningLists = NIL;
 	 */

 	/* single tlist entry that is the aggregate target */
 	tle = makeTargetEntry(copyObject(info->target),
 						  1,
 						  pstrdup("agg_target"),
 						  false);
 	subparse->targetList = list_make1(tle);

 	/* set up the appropriate ORDER BY entry */
 	sortcl = makeNode(SortClause);
 	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
 	sortcl->sortop = info->aggsortop;
 	subparse->sortClause = list_make1(sortcl);

 	/* set up LIMIT 1 */
 	subparse->limitOffset = NULL;
 	subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
 											  Int64GetDatum(1),
 											  false, true /* not by val */ );

 	/*
 	 * Generate the plan for the subquery.	We already have a Path for the
 	 * basic indexscan, but we have to convert it to a Plan and attach a LIMIT
 	 * node above it.
 	 *
 	 * Also we must add a "WHERE foo IS NOT NULL" restriction to the
 	 * indexscan, to be sure we don't return a NULL, which'd be contrary to
 	 * the standard behavior of MIN/MAX.  XXX ideally this should be done
 	 * earlier, so that the selectivity of the restriction could be included
 	 * in our cost estimates.  But that looks painful, and in most cases the
 	 * fraction of NULLs isn't high enough to change the decision.
 	 *
 	 * The NOT NULL qual has to go on the actual indexscan; create_plan might
 	 * have stuck a gating Result atop that, if there were any pseudoconstant
 	 * quals.
 	 */
 	plan = create_plan(&subroot, (Path *) info->path);

     /* Replace the plan's tlist with a copy of the one we built above. */
     plan = plan_pushdown_tlist(plan, copyObject(subparse->targetList));

 	if (IsA(plan, Result))
 		iplan = plan->lefttree;
 	else
 		iplan = plan;
 	Assert(IsA(iplan, IndexScan));

 	ntest = makeNode(NullTest);
 	ntest->nulltesttype = IS_NOT_NULL;
 	ntest->arg = copyObject(info->target);

 	iplan->qual = lcons(ntest, iplan->qual);

 	plan = (Plan *) make_limit(plan,
 							   subparse->limitOffset,
 							   subparse->limitCount,
 							   0, 1);

     /* Decorate the Limit node with a Flow node. */
     plan->flow = pull_up_Flow(plan, plan->lefttree, false);

 	/*
 	 * Convert the plan into an InitPlan, and make a Param for its result.
 	 */
 	info->param = SS_make_initplan_from_plan(&subroot, plan,
 											 exprType((Node *) tle->expr),
 											 -1);
 }

 /*
  * Replace original aggregate calls with subplan output Params
  */
 static Node *
 replace_aggs_with_params_mutator(Node *node, List **context)
 {
 	if (node == NULL)
 		return NULL;
 	if (IsA(node, Aggref))
 	{
 		Aggref	   *aggref = (Aggref *) node;
 		ListCell   *l;
 		Expr	   *curTarget = linitial(aggref->args);

 		foreach(l, *context)
 		{
 			MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l);

 			if (info->aggfnoid == aggref->aggfnoid &&
 				equal(info->target, curTarget))
 				return (Node *) info->param;
 		}
 		elog(ERROR, "failed to re-find aggregate info record");
 	}
 	Assert(!IsA(node, SubLink));
 	return expression_tree_mutator(node, replace_aggs_with_params_mutator,
 								   (void *) context);
 }

 /*
  * Get the OID of the sort operator, if any, associated with an aggregate.
  * Returns InvalidOid if there is no such operator.
  */
 static Oid
 fetch_agg_sort_op(Oid aggfnoid)
 {
 	HeapTuple	aggTuple;
 	Form_pg_aggregate aggform;
 	Oid			aggsortop;

 	/* fetch aggregate entry from pg_aggregate */
 	aggTuple = SearchSysCache(AGGFNOID,
 							  ObjectIdGetDatum(aggfnoid),
 							  0, 0, 0);
 	if (!HeapTupleIsValid(aggTuple))
 		return InvalidOid;
 	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
 	aggsortop = aggform->aggsortop;
 	ReleaseSysCache(aggTuple);

 	return aggsortop;
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/*-------------------------------------------------------------------------
	*
	* planagg.c
	* Special planning for aggregate queries.
	*
	* Portions Copyright (c) 2006-2008, Greenplum inc
	* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
	* Portions Copyright (c) 1994, Regents of the University of California
	*
	*
	* IDENTIFICATION
	* $PostgreSQL: pgsql/src/backend/optimizer/plan/planagg.c,v 1.22.2.1 2007/02/06 06:50:33 tgl Exp $
	*
	*-------------------------------------------------------------------------
	*/
	#include "postgres.h"

	#include "catalog/pg_aggregate.h"
	#include "catalog/pg_type.h"
	#include "nodes/makefuncs.h"
	#include "optimizer/clauses.h"
	#include "optimizer/cost.h"
	#include "optimizer/pathnode.h"
	#include "optimizer/paths.h"
	#include "optimizer/planmain.h"
	#include "optimizer/subselect.h"
	#include "parser/parse_clause.h"
	#include "parser/parse_expr.h"
	#include "parser/parsetree.h"
	#include "utils/lsyscache.h"
	#include "utils/syscache.h"

	#include "cdb/cdbllize.h" /* pull_up_Flow() */


	typedef struct
	{
	Oid aggfnoid; /* pg_proc Oid of the aggregate */
	Oid aggsortop; /* Oid of its sort operator */
	Expr target; / expression we are aggregating on */
	IndexPath path; / access path for index scan */
	Cost pathcost; /* estimated cost to fetch first row */
	Param param; / param for subplan's output */
	} MinMaxAggInfo;

	static bool find_minmax_aggs_walker(Node node, List *context);
	static bool build_minmax_path(PlannerInfo root, RelOptInfo rel,
	MinMaxAggInfo *info);
	static ScanDirection match_agg_to_index_col(MinMaxAggInfo *info,
	IndexOptInfo *index, int indexcol);
	static void make_agg_subplan(PlannerInfo root, MinMaxAggInfo info);
	static Node replace_aggs_with_params_mutator(Node node, List **context);
	static Oid fetch_agg_sort_op(Oid aggfnoid);


	/*
	* optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes
	*
	* This checks to see if we can replace MIN/MAX aggregate functions by
	* subqueries of the form
	* (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1)
	* Given a suitable index on tab.col, this can be much faster than the
	* generic scan-all-the-rows plan.
	*
	* We are passed the preprocessed tlist, and the best path
	* devised for computing the input of a standard Agg node. If we are able
	* to optimize all the aggregates, and the result is estimated to be cheaper
	* than the generic aggregate method, then generate and return a Plan that
	* does it that way. Otherwise, return NULL.
	*/
	Plan *
	optimize_minmax_aggregates(PlannerInfo root, List tlist, Path *best_path)
	{
	Query *parse = root->parse;
	FromExpr *jtnode;
	RangeTblRef *rtr;
	RangeTblEntry *rte;
	RelOptInfo *rel;
	List *aggs_list;
	ListCell *l;
	Cost total_cost;
	Path agg_p;
	Plan *plan;
	Node *hqual;
	QualCost tlist_cost;

	/* Nothing to do if query has no aggregates */
	if (!parse->hasAggs)
	return NULL;

	Assert(!parse->setOperations); /* shouldn't get here if a setop */
	Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */

	/*
	* Reject unoptimizable cases.
	*
	* We don't handle GROUP BY, because our current implementations of
	* grouping require looking at all the rows anyway, and so there's not
	* much point in optimizing MIN/MAX.
	*/
	if (parse->groupClause)
	return NULL;

	/*
	* We also restrict the query to reference exactly one table, since join
	* conditions can't be handled reasonably. (We could perhaps handle a
	* query containing cartesian-product joins, but it hardly seems worth the
	* trouble.) However, the single real table could be buried in several
	* levels of FromExpr.
	*/
	jtnode = parse->jointree;
	while (IsA(jtnode, FromExpr))
	{
	if (list_length(jtnode->fromlist) != 1)
	return NULL;
	jtnode = linitial(jtnode->fromlist);
	}
	if (!IsA(jtnode, RangeTblRef))
	return NULL;
	rtr = (RangeTblRef *) jtnode;
	rte = rt_fetch(rtr->rtindex, parse->rtable);
	if (rte->rtekind != RTE_RELATION \|\| rte->inh)
	return NULL;
	rel = find_base_rel(root, rtr->rtindex);

	/*
	* Since this optimization is not applicable all that often, we want to
	* fall out before doing very much work if possible. Therefore we do the
	* work in several passes. The first pass scans the tlist and HAVING qual
	* to find all the aggregates and verify that each of them is a MIN/MAX
	* aggregate. If that succeeds, the second pass looks at each aggregate
	* to see if it is optimizable; if so we make an IndexPath describing how
	* we would scan it. (We do not try to optimize if only some aggs are
	* optimizable, since that means we'll have to scan all the rows anyway.)
	* If that succeeds, we have enough info to compare costs against the
	* generic implementation. Only if that test passes do we build a Plan.
	*/

	/* Pass 1: find all the aggregates */
	aggs_list = NIL;
	if (find_minmax_aggs_walker((Node *) tlist, &aggs_list))
	return NULL;
	if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
	return NULL;

	/* Pass 2: see if each one is optimizable */
	total_cost = 0;
	foreach(l, aggs_list)
	{
	MinMaxAggInfo info = (MinMaxAggInfo ) lfirst(l);

	if (!build_minmax_path(root, rel, info))
	return NULL;
	total_cost += info->pathcost;
	}

	/*
	* Make the cost comparison.
	*
	* Note that we don't include evaluation cost of the tlist here; this is
	* OK since it isn't included in best_path's cost either, and should be
	* the same in either case.
	*/
	cost_agg(&agg_p, root, AGG_PLAIN, list_length(aggs_list),
	0, 0,
	best_path->startup_cost, best_path->total_cost,
	best_path->parent->rows, 0.0, 0.0, 0.0, false);

	if (total_cost > agg_p.total_cost)
	return NULL; /* too expensive */

	/*
	* OK, we are going to generate an optimized plan.
	*/

	/* Pass 3: generate subplans and output Param nodes */
	foreach(l, aggs_list)
	{
	make_agg_subplan(root, (MinMaxAggInfo *) lfirst(l));
	}

	/*
	* Modify the targetlist and HAVING qual to reference subquery outputs
	*/
	tlist = (List ) replace_aggs_with_params_mutator((Node ) tlist,
	&aggs_list);
	hqual = replace_aggs_with_params_mutator(parse->havingQual,
	&aggs_list);

	/*
	* Generate the output plan --- basically just a Result
	*/
	plan = (Plan *) make_result(tlist, hqual, NULL);

	/* Account for evaluation cost of the tlist (make_result did the rest) */
	cost_qual_eval(&tlist_cost, tlist, root);
	plan->startup_cost += tlist_cost.startup;
	plan->total_cost += tlist_cost.startup + tlist_cost.per_tuple;

	return plan;
	}

	/*
	* find_minmax_aggs_walker
	* Recursively scan the Aggref nodes in an expression tree, and check
	* that each one is a MIN/MAX aggregate. If so, build a list of the
	* distinct aggregate calls in the tree.
	*
	* Returns TRUE if a non-MIN/MAX aggregate is found, FALSE otherwise.
	* (This seemingly-backward definition is used because expression_tree_walker
	* aborts the scan on TRUE return, which is what we want.)
	*
	* Found aggregates are added to the list at *context; it's up to the caller
	* to initialize the list to NIL.
	*
	* This does not descend into subqueries, and so should be used only after
	* reduction of sublinks to subplans. There mustn't be outer-aggregate
	* references either.
	*/
	static bool
	find_minmax_aggs_walker(Node node, List *context)
	{
	if (node == NULL)
	return false;
	if (IsA(node, Aggref))
	{
	Aggref aggref = (Aggref ) node;
	Oid aggsortop;
	Expr *curTarget;
	MinMaxAggInfo *info;
	ListCell *l;

	Assert(aggref->agglevelsup == 0);
	if (list_length(aggref->args) != 1)
	return true; /* it couldn't be MIN/MAX */
	/* note: we do not care if DISTINCT is mentioned ... */

	aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
	if (!OidIsValid(aggsortop))
	return true; /* not a MIN/MAX aggregate */

	/*
	* Check whether it's already in the list, and add it if not.
	*/
	curTarget = linitial(aggref->args);
	foreach(l, *context)
	{
	info = (MinMaxAggInfo *) lfirst(l);
	if (info->aggfnoid == aggref->aggfnoid &&
	equal(info->target, curTarget))
	return false;
	}

	info = (MinMaxAggInfo *) palloc0(sizeof(MinMaxAggInfo));
	info->aggfnoid = aggref->aggfnoid;
	info->aggsortop = aggsortop;
	info->target = curTarget;

	context = lappend(context, info);

	/*
	* We need not recurse into the argument, since it can't contain any
	* aggregates.
	*/
	return false;
	}
	Assert(!IsA(node, SubLink));
	return expression_tree_walker(node, find_minmax_aggs_walker,
	(void *) context);
	}

	/*
	* build_minmax_path
	* Given a MIN/MAX aggregate, try to find an index it can be optimized
	* with. Build a Path describing the best such index path.
	*
	* Returns TRUE if successful, FALSE if not. In the TRUE case, info->path
	* is filled in.
	*
	* XXX look at sharing more code with indxpath.c.
	*
	* Note: check_partial_indexes() must have been run previously.
	*/
	static bool
	build_minmax_path(PlannerInfo root, RelOptInfo rel, MinMaxAggInfo *info)
	{
	IndexPath *best_path = NULL;
	Cost best_cost = 0;
	ListCell *l;

	foreach(l, rel->indexlist)
	{
	IndexOptInfo index = (IndexOptInfo ) lfirst(l);
	ScanDirection indexscandir = NoMovementScanDirection;
	int indexcol;
	int prevcol;
	List *restrictclauses;
	IndexPath *new_path;
	Cost new_cost;
	bool found_clause;

	/* Ignore non-btree indexes */
	if (index->relam != BTREE_AM_OID)
	continue;

	/* Ignore partial indexes that do not match the query */
	if (index->indpred != NIL && !index->predOK)
	continue;

	/*
	* Look for a match to one of the index columns. (In a stupidly
	* designed index, there could be multiple matches, but we only care
	* about the first one.)
	*/
	for (indexcol = 0; indexcol < index->ncolumns; indexcol++)
	{
	indexscandir = match_agg_to_index_col(info, index, indexcol);
	if (!ScanDirectionIsNoMovement(indexscandir))
	break;
	}
	if (ScanDirectionIsNoMovement(indexscandir))
	continue;

	/*
	* If the match is not at the first index column, we have to verify
	* that there are "x = something" restrictions on all the earlier
	* index columns. Since we'll need the restrictclauses list anyway to
	* build the path, it's convenient to extract that first and then look
	* through it for the equality restrictions.
	*/
	restrictclauses = group_clauses_by_indexkey(index,
	index->rel->baserestrictinfo,
	NIL,
	NULL,
	SAOP_FORBID,
	&found_clause);

	if (list_length(restrictclauses) < indexcol)
	continue; /* definitely haven't got enough */
	for (prevcol = 0; prevcol < indexcol; prevcol++)
	{
	List rinfos = (List ) list_nth(restrictclauses, prevcol);
	ListCell *ll;

	foreach(ll, rinfos)
	{
	RestrictInfo rinfo = (RestrictInfo ) lfirst(ll);
	int strategy;

	Assert(is_opclause(rinfo->clause));
	strategy =
	get_op_opclass_strategy(((OpExpr *) rinfo->clause)->opno,
	index->classlist[prevcol]);
	if (strategy == BTEqualStrategyNumber)
	break;
	}
	if (ll == NULL)
	break; /* none are Equal for this index col */
	}
	if (prevcol < indexcol)
	continue; /* didn't find all Equal clauses */

	/*
	* Build the access path. We don't bother marking it with pathkeys.
	*/
	new_path = create_index_path(root, index,
	restrictclauses,
	NIL,
	indexscandir,
	NULL);

	/*
	* Estimate actual cost of fetching just one row.
	*/
	if (new_path->rows > 1.0)
	new_cost = new_path->path.startup_cost +
	(new_path->path.total_cost - new_path->path.startup_cost)
	* 1.0 / new_path->rows;
	else
	new_cost = new_path->path.total_cost;

	/*
	* Keep if first or if cheaper than previous best.
	*/
	if (best_path == NULL \|\| new_cost < best_cost)
	{
	best_path = new_path;
	best_cost = new_cost;
	}
	}

	info->path = best_path;
	info->pathcost = best_cost;
	return (best_path != NULL);
	}

	/*
	* match_agg_to_index_col
	* Does an aggregate match an index column?
	*
	* It matches if its argument is equal to the index column's data and its
	* sortop is either the LessThan or GreaterThan member of the column's opclass.
	*
	* We return ForwardScanDirection if match the LessThan member,
	* BackwardScanDirection if match the GreaterThan member,
	* and NoMovementScanDirection if there's no match.
	*/
	static ScanDirection
	match_agg_to_index_col(MinMaxAggInfo info, IndexOptInfo index, int indexcol)
	{
	int strategy;

	/* Check for data match */
	if (!match_index_to_operand((Node *) info->target, indexcol, index))
	return NoMovementScanDirection;

	/* Look up the operator in the opclass */
	strategy = get_op_opclass_strategy(info->aggsortop,
	index->classlist[indexcol]);
	if (strategy == BTLessStrategyNumber)
	return ForwardScanDirection;
	if (strategy == BTGreaterStrategyNumber)
	return BackwardScanDirection;
	return NoMovementScanDirection;
	}

	/*
	* Construct a suitable plan for a converted aggregate query
	*/
	static void
	make_agg_subplan(PlannerInfo root, MinMaxAggInfo info)
	{
	PlannerInfo subroot;
	Query *subparse;
	Plan *plan;
	Plan *iplan;
	TargetEntry *tle;
	SortClause *sortcl;
	NullTest *ntest;

	/*
	* Generate a suitably modified query. Much of the work here is probably
	* unnecessary in the normal case, but we want to make it look good if
	* someone tries to EXPLAIN the result.
	*/
	memcpy(&subroot, root, sizeof(PlannerInfo));
	subroot.parse = subparse = (Query *) copyObject(root->parse);
	subparse->commandType = CMD_SELECT;
	subparse->resultRelation = 0;
	subparse->resultRelations = NIL;
	subparse->returningLists = NIL;
	subparse->intoClause = NULL;
	subparse->hasAggs = false;
	subparse->groupClause = NIL;
	subparse->havingQual = NULL;
	subparse->distinctClause = NIL;
	subroot.hasHavingQual = false;

	/* TODO Should we also generate a "temporary" root as in,
	* e.g., inheritance planning?
	*
	* Generate modified query with this rel as target. We have to be
	* prepared to translate varnos in in_info_list as well as in the
	* Query proper.
	*
	memcpy(&subroot, root, sizeof(PlannerInfo));
	subroot.parse = (Query *)
	adjust_appendrel_attrs((Node *) parse,
	appinfo);
	subroot.in_info_list = (List *)
	adjust_appendrel_attrs((Node *) root->in_info_list,
	appinfo);
	// There shouldn't be any OJ info to translate, as yet
	Assert(subroot.oj_info_list == NIL);

	subroot->resultRelations = NIL;
	subroot->returningLists = NIL;
	*/

	/* single tlist entry that is the aggregate target */
	tle = makeTargetEntry(copyObject(info->target),
	1,
	pstrdup("agg_target"),
	false);
	subparse->targetList = list_make1(tle);

	/* set up the appropriate ORDER BY entry */
	sortcl = makeNode(SortClause);
	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subparse->targetList);
	sortcl->sortop = info->aggsortop;
	subparse->sortClause = list_make1(sortcl);

	/* set up LIMIT 1 */
	subparse->limitOffset = NULL;
	subparse->limitCount = (Node *) makeConst(INT8OID, -1, sizeof(int64),
	Int64GetDatum(1),
	false, true /* not by val */ );

	/*
	* Generate the plan for the subquery. We already have a Path for the
	* basic indexscan, but we have to convert it to a Plan and attach a LIMIT
	* node above it.
	*
	* Also we must add a "WHERE foo IS NOT NULL" restriction to the
	* indexscan, to be sure we don't return a NULL, which'd be contrary to
	* the standard behavior of MIN/MAX. XXX ideally this should be done
	* earlier, so that the selectivity of the restriction could be included
	* in our cost estimates. But that looks painful, and in most cases the
	* fraction of NULLs isn't high enough to change the decision.
	*
	* The NOT NULL qual has to go on the actual indexscan; create_plan might
	* have stuck a gating Result atop that, if there were any pseudoconstant
	* quals.
	*/
	plan = create_plan(&subroot, (Path *) info->path);

	/* Replace the plan's tlist with a copy of the one we built above. */
	plan = plan_pushdown_tlist(plan, copyObject(subparse->targetList));

	if (IsA(plan, Result))
	iplan = plan->lefttree;
	else
	iplan = plan;
	Assert(IsA(iplan, IndexScan));

	ntest = makeNode(NullTest);
	ntest->nulltesttype = IS_NOT_NULL;
	ntest->arg = copyObject(info->target);

	iplan->qual = lcons(ntest, iplan->qual);

	plan = (Plan *) make_limit(plan,
	subparse->limitOffset,
	subparse->limitCount,
	0, 1);

	/* Decorate the Limit node with a Flow node. */
	plan->flow = pull_up_Flow(plan, plan->lefttree, false);

	/*
	* Convert the plan into an InitPlan, and make a Param for its result.
	*/
	info->param = SS_make_initplan_from_plan(&subroot, plan,
	exprType((Node *) tle->expr),
	-1);
	}

	/*
	* Replace original aggregate calls with subplan output Params
	*/
	static Node *
	replace_aggs_with_params_mutator(Node node, List *context)
	{
	if (node == NULL)
	return NULL;
	if (IsA(node, Aggref))
	{
	Aggref aggref = (Aggref ) node;
	ListCell *l;
	Expr *curTarget = linitial(aggref->args);

	foreach(l, *context)
	{
	MinMaxAggInfo info = (MinMaxAggInfo ) lfirst(l);

	if (info->aggfnoid == aggref->aggfnoid &&
	equal(info->target, curTarget))
	return (Node *) info->param;
	}
	elog(ERROR, "failed to re-find aggregate info record");
	}
	Assert(!IsA(node, SubLink));
	return expression_tree_mutator(node, replace_aggs_with_params_mutator,
	(void *) context);
	}

	/*
	* Get the OID of the sort operator, if any, associated with an aggregate.
	* Returns InvalidOid if there is no such operator.
	*/
	static Oid
	fetch_agg_sort_op(Oid aggfnoid)
	{
	HeapTuple aggTuple;
	Form_pg_aggregate aggform;
	Oid aggsortop;

	/* fetch aggregate entry from pg_aggregate */
	aggTuple = SearchSysCache(AGGFNOID,
	ObjectIdGetDatum(aggfnoid),
	0, 0, 0);
	if (!HeapTupleIsValid(aggTuple))
	return InvalidOid;
	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
	aggsortop = aggform->aggsortop;
	ReleaseSysCache(aggTuple);

	return aggsortop;
	}