blob: 47075ef7be26caba2858a6c9bd53fbefa6be9c9c [file] [log] [blame]
/*
* For PostgreSQL Database Management System:
* (formerly known as Postgres, then as Postgres95)
*
* Portions Copyright (c) 1996-2010, The PostgreSQL Global Development Group
*
* Portions Copyright (c) 1994, The Regents of the University of California
*
* Permission to use, copy, modify, and distribute this software and its documentation for any purpose,
* without fee, and without a written agreement is hereby granted, provided that the above copyright notice
* and this paragraph and the following two paragraphs appear in all copies.
*
* IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT,
* INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
* ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY
* OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*
* THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA
* HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
*/
#include "postgres.h"
#include "catalog/pg_constraint.h"
#include "nodes/nodeFuncs.h"
#include "optimizer/tlist.h"
#include "optimizer/optimizer.h"
#include "parser/cypher_parse_agg.h"
#include "parser/parsetree.h"
#include "rewrite/rewriteManip.h"
typedef struct
{
ParseState *pstate;
int min_varlevel;
int min_agglevel;
int sublevels_up;
} check_agg_arguments_context;
typedef struct
{
ParseState *pstate;
Query *qry;
PlannerInfo *root;
List *groupClauses;
List *groupClauseCommonVars;
bool have_non_var_grouping;
List **func_grouped_rels;
int sublevels_up;
bool in_agg_direct_args;
} check_ungrouped_columns_context;
static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
List *groupClauses, List *groupClauseVars,
bool have_non_var_grouping,
List **func_grouped_rels);
static bool check_ungrouped_columns_walker(Node *node,
check_ungrouped_columns_context *context);
static void finalize_grouping_exprs(Node *node, ParseState *pstate, Query *qry,
List *groupClauses, PlannerInfo *root,
bool have_non_var_grouping);
static bool finalize_grouping_exprs_walker(Node *node,
check_ungrouped_columns_context *context);
static List *expand_groupingset_node(GroupingSet *gs);
static List * expand_grouping_sets(List *groupingSets, int limit);
/*
* From PG's parseCheckAggregates
*
* Check for aggregates where they shouldn't be and improper grouping.
* This function should be called after the target list and qualifications
* are finalized.
*
* Misplaced aggregates are now mostly detected in transformAggregateCall,
* but it seems more robust to check for aggregates in recursive queries
* only after everything is finalized. In any case it's hard to detect
* improper grouping on-the-fly, so we have to make another pass over the
* query for that.
*/
void parse_check_aggregates(ParseState *pstate, Query *qry)
{
List *gset_common = NIL;
List *groupClauses = NIL;
List *groupClauseCommonVars = NIL;
bool have_non_var_grouping;
List *func_grouped_rels = NIL;
ListCell *l;
bool hasJoinRTEs;
bool hasSelfRefRTEs;
PlannerInfo *root = NULL;
Node *clause;
/* This should only be called if we found aggregates or grouping */
Assert(pstate->p_hasAggs || qry->groupClause || qry->havingQual || qry->groupingSets);
/*
* If we have grouping sets, expand them and find the intersection of all
* sets.
*/
if (qry->groupingSets)
{
/*
* The limit of 4096 is arbitrary and exists simply to avoid resource
* issues from pathological constructs.
*/
List *gsets = expand_grouping_sets(qry->groupingSets, 4096);
if (!gsets)
ereport(ERROR,
(errcode(ERRCODE_STATEMENT_TOO_COMPLEX),
errmsg("too many grouping sets present (maximum 4096)"),
parser_errposition(pstate,
qry->groupClause ?
exprLocation((Node *) qry->groupClause) :
exprLocation((Node *) qry->groupingSets))));
/*
* The intersection will often be empty, so help things along by
* seeding the intersect with the smallest set.
*/
gset_common = linitial(gsets);
if (gset_common)
{
for_each_cell(l, gsets, lnext(gsets, list_head(gsets)))
{
gset_common = list_intersection_int(gset_common, lfirst(l));
if (!gset_common)
break;
}
}
/*
* If there was only one grouping set in the expansion, AND if the
* groupClause is non-empty (meaning that the grouping set is not
* empty either), then we can ditch the grouping set and pretend we
* just had a normal GROUP BY.
*/
if (list_length(gsets) == 1 && qry->groupClause)
qry->groupingSets = NIL;
}
/*
* Scan the range table to see if there are JOIN or self-reference CTE
* entries. We'll need this info below.
*/
hasJoinRTEs = hasSelfRefRTEs = false;
foreach(l, pstate->p_rtable)
{
RangeTblEntry *rte = (RangeTblEntry *) lfirst(l);
if (rte->rtekind == RTE_JOIN)
hasJoinRTEs = true;
else if (rte->rtekind == RTE_CTE && rte->self_reference)
hasSelfRefRTEs = true;
}
/*
* Build a list of the acceptable GROUP BY expressions for use by
* check_ungrouped_columns().
*
* We get the TLE, not just the expr, because GROUPING wants to know the
* sortgroupref.
*/
foreach(l, qry->groupClause)
{
SortGroupClause *grpcl = (SortGroupClause *) lfirst(l);
TargetEntry *expr;
expr = get_sortgroupclause_tle(grpcl, qry->targetList);
if (expr == NULL)
continue; /* probably cannot happen */
groupClauses = lcons(expr, groupClauses);
}
/*
* If there are join alias vars involved, we have to flatten them to the
* underlying vars, so that aliased and unaliased vars will be correctly
* taken as equal. We can skip the expense of doing this if no rangetable
* entries are RTE_JOIN kind. We use the planner's flatten_join_alias_vars
* routine to do the flattening; it wants a PlannerInfo root node, which
* fortunately can be mostly dummy.
*/
if (hasJoinRTEs)
{
root = makeNode(PlannerInfo);
root->parse = qry;
root->planner_cxt = CurrentMemoryContext;
root->hasJoinRTEs = true;
groupClauses = (List *) flatten_join_alias_vars((Query*)root,
(Node *) groupClauses);
}
/*
* Detect whether any of the grouping expressions aren't simple Vars; if
* they're all Vars then we don't have to work so hard in the recursive
* scans. (Note we have to flatten aliases before this.)
*
* Track Vars that are included in all grouping sets separately in
* groupClauseCommonVars, since these are the only ones we can use to
* check for functional dependencies.
*/
have_non_var_grouping = false;
foreach(l, groupClauses)
{
TargetEntry *tle = lfirst(l);
if (!IsA(tle->expr, Var))
{
have_non_var_grouping = true;
}
else if (!qry->groupingSets ||
list_member_int(gset_common, tle->ressortgroupref))
{
groupClauseCommonVars = lappend(groupClauseCommonVars, tle->expr);
}
}
/*
* Check the targetlist and HAVING clause for ungrouped variables.
*
* Note: because we check resjunk tlist elements as well as regular ones,
* this will also find ungrouped variables that came from ORDER BY and
* WINDOW clauses. For that matter, it's also going to examine the
* grouping expressions themselves --- but they'll all pass the test ...
*
* We also finalize GROUPING expressions, but for that we need to traverse
* the original (unflattened) clause in order to modify nodes.
*/
clause = (Node *) qry->targetList;
finalize_grouping_exprs(clause, pstate, qry, groupClauses, root,
have_non_var_grouping);
if (hasJoinRTEs)
clause = flatten_join_alias_vars((Query*)root, clause);
check_ungrouped_columns(clause, pstate, qry, groupClauses,
groupClauseCommonVars, have_non_var_grouping,
&func_grouped_rels);
clause = (Node *) qry->havingQual;
finalize_grouping_exprs(clause, pstate, qry, groupClauses, root,
have_non_var_grouping);
if (hasJoinRTEs)
clause = flatten_join_alias_vars((Query*)root, clause);
check_ungrouped_columns(clause, pstate, qry, groupClauses,
groupClauseCommonVars, have_non_var_grouping,
&func_grouped_rels);
/*
* Per spec, aggregates can't appear in a recursive term.
*/
if (pstate->p_hasAggs && hasSelfRefRTEs)
ereport(ERROR,
(errcode(ERRCODE_INVALID_RECURSION),
errmsg("aggregate functions are not allowed in a recursive query's recursive term"),
parser_errposition(pstate, locate_agg_of_level((Node *) qry, 0))));
}
/*
* check_ungrouped_columns -
*
* Scan the given expression tree for ungrouped variables (variables
* that are not listed in the groupClauses list and are not within
* the arguments of aggregate functions). Emit a suitable error message
* if any are found.
*
* NOTE: we assume that the given clause has been transformed suitably for
* parser output. This means we can use expression_tree_walker.
*
* NOTE: we recognize grouping expressions in the main query, but only
* grouping Vars in subqueries. For example, this will be rejected,
* although it could be allowed:
* SELECT (SELECT x FROM bar where y = (foo.a + foo.b))
* FROM foo
* GROUP BY a + b;
* The difficulty is the need to account for different sublevels_up.
* This appears to require a whole custom version of equal(), which is
* way more pain than the feature seems worth.
*/
static void check_ungrouped_columns(Node *node, ParseState *pstate, Query *qry,
List *groupClauses,
List *groupClauseCommonVars,
bool have_non_var_grouping,
List **func_grouped_rels)
{
check_ungrouped_columns_context context;
context.pstate = pstate;
context.qry = qry;
context.root = NULL;
context.groupClauses = groupClauses;
context.groupClauseCommonVars = groupClauseCommonVars;
context.have_non_var_grouping = have_non_var_grouping;
context.func_grouped_rels = func_grouped_rels;
context.sublevels_up = 0;
context.in_agg_direct_args = false;
check_ungrouped_columns_walker(node, &context);
}
static bool check_ungrouped_columns_walker(Node *node, check_ungrouped_columns_context *context)
{
ListCell *gl;
if (node == NULL)
return false;
if (IsA(node, Const) || IsA(node, Param))
return false; /* constants are always acceptable */
if (IsA(node, Aggref))
{
Aggref *agg = (Aggref *) node;
if ((int) agg->agglevelsup == context->sublevels_up)
{
/*
* If we find an aggregate call of the original level, do not
* recurse into its normal arguments, ORDER BY arguments, or
* filter; ungrouped vars there are not an error. But we should
* check direct arguments as though they weren't in an aggregate.
* We set a special flag in the context to help produce a useful
* error message for ungrouped vars in direct arguments.
*/
bool result;
Assert(!context->in_agg_direct_args);
context->in_agg_direct_args = true;
result = check_ungrouped_columns_walker((Node *) agg->aggdirectargs,
context);
context->in_agg_direct_args = false;
return result;
}
/*
* We can skip recursing into aggregates of higher levels altogether,
* since they could not possibly contain Vars of concern to us (see
* transformAggregateCall). We do need to look at aggregates of lower
* levels, however.
*/
if ((int) agg->agglevelsup > context->sublevels_up)
return false;
}
if (IsA(node, GroupingFunc))
{
GroupingFunc *grp = (GroupingFunc *) node;
/* handled GroupingFunc separately, no need to recheck at this level */
if ((int) grp->agglevelsup >= context->sublevels_up)
return false;
}
/*
* If we have any GROUP BY items that are not simple Vars, check to see if
* subexpression as a whole matches any GROUP BY item. We need to do this
* at every recursion level so that we recognize GROUPed-BY expressions
* before reaching variables within them. But this only works at the outer
* query level, as noted above.
*/
if (context->have_non_var_grouping && context->sublevels_up == 0)
{
foreach(gl, context->groupClauses)
{
TargetEntry *tle = lfirst(gl);
if (equal(node, tle->expr))
return false; /* acceptable, do not descend more */
}
}
/*
* If we have an ungrouped Var of the original query level, we have a
* failure. Vars below the original query level are not a problem, and
* neither are Vars from above it. (If such Vars are ungrouped as far as
* their own query level is concerned, that's someone else's problem...)
*/
if (IsA(node, Var))
{
Var *var = (Var *) node;
RangeTblEntry *rte;
char *attname;
if (var->varlevelsup != context->sublevels_up)
return false; /* it's not local to my query, ignore */
/*
* Check for a match, if we didn't do it above.
*/
if (!context->have_non_var_grouping || context->sublevels_up != 0)
{
foreach(gl, context->groupClauses)
{
Var *gvar = (Var *) ((TargetEntry *) lfirst(gl))->expr;
if (IsA(gvar, Var) &&
gvar->varno == var->varno &&
gvar->varattno == var->varattno &&
gvar->varlevelsup == 0)
return false; /* acceptable, we're okay */
}
}
/*
* Check whether the Var is known functionally dependent on the GROUP
* BY columns. If so, we can allow the Var to be used, because the
* grouping is really a no-op for this table. However, this deduction
* depends on one or more constraints of the table, so we have to add
* those constraints to the query's constraintDeps list, because it's
* not semantically valid anymore if the constraint(s) get dropped.
* (Therefore, this check must be the last-ditch effort before raising
* error: we don't want to add dependencies unnecessarily.)
*
* Because this is a pretty expensive check, and will have the same
* outcome for all columns of a table, we remember which RTEs we've
* already proven functional dependency for in the func_grouped_rels
* list. This test also prevents us from adding duplicate entries to
* the constraintDeps list.
*/
if (list_member_int(*context->func_grouped_rels, var->varno))
return false; /* previously proven acceptable */
Assert(var->varno > 0 &&
(int) var->varno <= list_length(context->pstate->p_rtable));
rte = rt_fetch(var->varno, context->pstate->p_rtable);
if (rte->rtekind == RTE_RELATION)
{
if (check_functional_grouping(rte->relid, var->varno, 0,
context->groupClauseCommonVars,
&context->qry->constraintDeps))
{
*context->func_grouped_rels = lappend_int(*context->func_grouped_rels,
var->varno);
return false; /* acceptable */
}
}
/* Found an ungrouped local variable; generate error message */
attname = get_rte_attribute_name(rte, var->varattno);
if (context->sublevels_up == 0)
ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR),
errmsg("\"%s\" must be either part of an explicitly listed key or used inside an aggregate function",
attname), context->in_agg_direct_args ?
errdetail("Direct arguments of an ordered-set aggregate must use only grouped columns.") :
0, parser_errposition(context->pstate, var->location)));
else
ereport(ERROR, (errcode(ERRCODE_GROUPING_ERROR),
errmsg("subquery uses ungrouped column \"%s.%s\" from outer query",
rte->eref->aliasname, attname),
parser_errposition(context->pstate, var->location)));
}
if (IsA(node, Query))
{
/* Recurse into subselects */
bool result;
context->sublevels_up++;
result = query_tree_walker((Query *) node,
check_ungrouped_columns_walker,
(void *) context, 0);
context->sublevels_up--;
return result;
}
return expression_tree_walker(node, check_ungrouped_columns_walker,
(void *) context);
}
/*
* finalize_grouping_exprs -
* Scan the given expression tree for GROUPING() and related calls,
* and validate and process their arguments.
*
* This is split out from check_ungrouped_columns above because it needs
* to modify the nodes (which it does in-place, not via a mutator) while
* check_ungrouped_columns may see only a copy of the original thanks to
* flattening of join alias vars. So here, we flatten each individual
* GROUPING argument as we see it before comparing it.
*/
static void finalize_grouping_exprs(Node *node, ParseState *pstate, Query *qry,
List *groupClauses, PlannerInfo *root,
bool have_non_var_grouping)
{
check_ungrouped_columns_context context;
context.pstate = pstate;
context.qry = qry;
context.root = root;
context.groupClauses = groupClauses;
context.groupClauseCommonVars = NIL;
context.have_non_var_grouping = have_non_var_grouping;
context.func_grouped_rels = NULL;
context.sublevels_up = 0;
context.in_agg_direct_args = false;
finalize_grouping_exprs_walker(node, &context);
}
static bool finalize_grouping_exprs_walker(Node *node,
check_ungrouped_columns_context *context)
{
ListCell *gl;
if (node == NULL)
return false;
if (IsA(node, Const) || IsA(node, Param))
return false; /* constants are always acceptable */
if (IsA(node, Aggref))
{
Aggref *agg = (Aggref *) node;
if ((int) agg->agglevelsup == context->sublevels_up)
{
/*
* If we find an aggregate call of the original level, do not
* recurse into its normal arguments, ORDER BY arguments, or
* filter; GROUPING exprs of this level are not allowed there. But
* check direct arguments as though they weren't in an aggregate.
*/
bool result;
Assert(!context->in_agg_direct_args);
context->in_agg_direct_args = true;
result = finalize_grouping_exprs_walker((Node *) agg->aggdirectargs,
context);
context->in_agg_direct_args = false;
return result;
}
/*
* We can skip recursing into aggregates of higher levels altogether,
* since they could not possibly contain exprs of concern to us (see
* transformAggregateCall). We do need to look at aggregates of lower
* levels, however.
*/
if ((int) agg->agglevelsup > context->sublevels_up)
return false;
}
if (IsA(node, GroupingFunc))
{
GroupingFunc *grp = (GroupingFunc *) node;
/*
* We only need to check GroupingFunc nodes at the exact level to
* which they belong, since they cannot mix levels in arguments.
*/
if ((int) grp->agglevelsup == context->sublevels_up)
{
ListCell *lc;
List *ref_list = NIL;
foreach(lc, grp->args)
{
Node *expr = lfirst(lc);
Index ref = 0;
if (context->root)
expr = flatten_join_alias_vars((Query*)context->root, expr);
/*
* Each expression must match a grouping entry at the current
* query level. Unlike the general expression case, we don't
* allow functional dependencies or outer references.
*/
if (IsA(expr, Var))
{
Var *var = (Var *) expr;
if (var->varlevelsup == context->sublevels_up)
{
foreach(gl, context->groupClauses)
{
TargetEntry *tle = lfirst(gl);
Var *gvar = (Var *) tle->expr;
if (IsA(gvar, Var) &&
gvar->varno == var->varno &&
gvar->varattno == var->varattno &&
gvar->varlevelsup == 0)
{
ref = tle->ressortgroupref;
break;
}
}
}
}
else if (context->have_non_var_grouping &&
context->sublevels_up == 0)
{
foreach(gl, context->groupClauses)
{
TargetEntry *tle = lfirst(gl);
if (equal(expr, tle->expr))
{
ref = tle->ressortgroupref;
break;
}
}
}
if (ref == 0)
ereport(ERROR,
(errcode(ERRCODE_GROUPING_ERROR),
errmsg("arguments to GROUPING must be grouping expressions of the associated query level"),
parser_errposition(context->pstate,
exprLocation(expr))));
ref_list = lappend_int(ref_list, ref);
}
grp->refs = ref_list;
}
if ((int) grp->agglevelsup > context->sublevels_up)
return false;
}
if (IsA(node, Query))
{
/* Recurse into subselects */
bool result;
context->sublevels_up++;
result = query_tree_walker((Query *) node,
finalize_grouping_exprs_walker,
(void *) context, 0);
context->sublevels_up--;
return result;
}
return expression_tree_walker(node, finalize_grouping_exprs_walker,
(void *) context);
}
/*
* Given a GroupingSet node, expand it and return a list of lists.
*
* For EMPTY nodes, return a list of one empty list.
*
* For SIMPLE nodes, return a list of one list, which is the node content.
*
* For CUBE and ROLLUP nodes, return a list of the expansions.
*
* For SET nodes, recursively expand contained CUBE and ROLLUP.
*/
static List * expand_groupingset_node(GroupingSet *gs)
{
List *result = NIL;
switch (gs->kind)
{
case GROUPING_SET_EMPTY:
result = list_make1(NIL);
break;
case GROUPING_SET_SIMPLE:
result = list_make1(gs->content);
break;
case GROUPING_SET_ROLLUP:
{
List *rollup_val = gs->content;
ListCell *lc;
int curgroup_size = list_length(gs->content);
while (curgroup_size > 0)
{
List *current_result = NIL;
int i = curgroup_size;
foreach(lc, rollup_val)
{
GroupingSet *gs_current = (GroupingSet *) lfirst(lc);
Assert(gs_current->kind == GROUPING_SET_SIMPLE);
current_result = list_concat(current_result,
list_copy(gs_current->content));
/* If we are done with making the current group, break */
if (--i == 0)
break;
}
result = lappend(result, current_result);
--curgroup_size;
}
result = lappend(result, NIL);
}
break;
case GROUPING_SET_CUBE:
{
List *cube_list = gs->content;
int number_bits = list_length(cube_list);
uint32 num_sets;
uint32 i;
/* parser should cap this much lower */
Assert(number_bits < 31);
num_sets = (1U << number_bits);
for (i = 0; i < num_sets; i++)
{
List *current_result = NIL;
ListCell *lc;
uint32 mask = 1U;
foreach(lc, cube_list)
{
GroupingSet *gs_current = (GroupingSet *) lfirst(lc);
Assert(gs_current->kind == GROUPING_SET_SIMPLE);
if (mask & i)
{
current_result = list_concat(current_result,
list_copy(gs_current->content));
}
mask <<= 1;
}
result = lappend(result, current_result);
}
}
break;
case GROUPING_SET_SETS:
{
ListCell *lc;
foreach(lc, gs->content)
{
List *current_result = expand_groupingset_node(lfirst(lc));
result = list_concat(result, current_result);
}
}
break;
}
return result;
}
static int cmp_list_len_asc(const void *a, const void *b)
{
int la = list_length(*(List *const *) a);
int lb = list_length(*(List *const *) b);
return (la > lb) ? 1 : (la == lb) ? 0 : -1;
}
/*
* Expand a groupingSets clause to a flat list of grouping sets.
* The returned list is sorted by length, shortest sets first.
*
* This is mainly for the planner, but we use it here too to do
* some consistency checks.
*/
static List * expand_grouping_sets(List *groupingSets, int limit)
{
List *expanded_groups = NIL;
List *result = NIL;
double numsets = 1;
ListCell *lc;
if (groupingSets == NIL)
return NIL;
foreach(lc, groupingSets)
{
List *current_result = NIL;
GroupingSet *gs = lfirst(lc);
current_result = expand_groupingset_node(gs);
Assert(current_result != NIL);
numsets *= list_length(current_result);
if (limit >= 0 && numsets > limit)
return NIL;
expanded_groups = lappend(expanded_groups, current_result);
}
/*
* Do cartesian product between sublists of expanded_groups. While at it,
* remove any duplicate elements from individual grouping sets (we must
* NOT change the number of sets though)
*/
foreach(lc, (List *) linitial(expanded_groups))
{
result = lappend(result, list_union_int(NIL, (List *) lfirst(lc)));
}
for_each_cell(lc, expanded_groups,
lnext(expanded_groups, list_head(expanded_groups)))
{
List *p = lfirst(lc);
List *new_result = NIL;
ListCell *lc2;
foreach(lc2, result)
{
List *q = lfirst(lc2);
ListCell *lc3;
foreach(lc3, p)
{
new_result = lappend(new_result,
list_union_int(q,(List *) lfirst(lc3)));
}
}
result = new_result;
}
if (list_length(result) > 1)
{
int result_len = list_length(result);
List **buf = palloc(sizeof(List *) * result_len);
List **ptr = buf;
foreach(lc, result)
{
*ptr++ = lfirst(lc);
}
qsort(buf, result_len, sizeof(List *), cmp_list_len_asc);
result = NIL;
ptr = buf;
while (result_len-- > 0)
result = lappend(result, *ptr++);
pfree(buf);
}
return result;
}