| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*------------------------------------------------------------------------- |
| * |
| * cdbsubselect.c |
| * Flattens subqueries, transforms them to joins. |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "catalog/catquery.h" |
| #include "catalog/pg_type.h" /* INT8OID */ |
| #include "nodes/makefuncs.h" |
| #include "optimizer/clauses.h" |
| #include "optimizer/subselect.h" /* convert_testexpr() */ |
| #include "optimizer/prep.h" /* canonicalize_qual() */ |
| #include "optimizer/var.h" /* contain_vars_of_level_or_above() */ |
| #include "parser/parse_oper.h" /* make_op() */ |
| #include "parser/parse_expr.h" |
| #include "parser/parse_relation.h" /* addRangeTableEntryForSubquery() */ |
| #include "parser/parsetree.h" /* rt_fetch() */ |
| #include "rewrite/rewriteManip.h" |
| #include "utils/lsyscache.h" /* get_op_btree_interpretation() */ |
| #include "utils/syscache.h" |
| #include "cdb/cdbsubselect.h" /* me */ |
| #include "lib/stringinfo.h" |
| #include "cdb/cdbpullup.h" |
| |
| extern bool sort_op_can_sort(Oid opid, bool mergejoin); |
| extern List *add_to_flat_tlist(List *tlist, List *exprs, bool resjunk); |
| |
| static Node * |
| pull_up_IN_clauses(PlannerInfo *root, List** rtrlist_inout, Node *clause); |
| |
| static Node * |
| convert_IN_to_antijoin(PlannerInfo * root, List** rtrlist_inout, SubLink * sublink); |
| |
| static int |
| add_expr_subquery_rte(Query * parse, Query * subselect); |
| |
| static JoinExpr * |
| make_join_expr(Node *larg, int r_rtindex, int join_type); |
| |
| static Node * |
| make_lasj_quals(PlannerInfo *root, SubLink * sublink, int subquery_indx); |
| |
| static Node * |
| add_null_match_clause(Node *clause); |
| |
| static Node * |
| not_null_inner_vars(Node *clause); |
| |
| typedef struct NonNullableVarsContext |
| { |
| Query *query; /* Query in question. */ |
| List *nonNullableVars; /* Known non-nullable vars */ |
| } NonNullableVarsContext; |
| |
| |
| /** |
| * Walker that performs the following tasks: |
| * - It checks if a given expr is "safe" to be pulled up to be a join |
| * - Extracts out the vars from the outer query in the qual in order |
| * - Extracts out the vars from the inner query in the qual in order |
| */ |
| typedef struct ConvertSubqueryToJoinContext |
| { |
| bool safeToConvert; /* Can correlated expression subquery be pulled up? */ |
| bool considerNonEqualQual; /* Should we consider correlated expr of the form o.a OP i.b ?*/ |
| Node *joinQual; /* Qual to employ to join subquery */ |
| Node *innerQual; /* Qual to leave behind in subquery */ |
| List *targetList; /* targetlist for subquery */ |
| bool extractGrouping; /* extract grouping information based on join conditions */ |
| List *groupClause; /* grouping clause for subquery */ |
| } ConvertSubqueryToJoinContext; |
| |
| static void ProcessSubqueryToJoin(Query *subselect, ConvertSubqueryToJoinContext *context); |
| static void SubqueryToJoinWalker(Node *node, ConvertSubqueryToJoinContext *context); |
| static void RemoveInnerJoinQuals(Query *subselect); |
| |
| static bool find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context); |
| static bool is_attribute_nonnullable(Oid relationOid, AttrNumber attrNumber); |
| static bool is_targetlist_nullable(Query *subq); |
| static Node *make_outer_nonnull_clause(SubLink *sublink); |
| |
| static bool has_correlation_in_funcexpr_rte(List *rtable); |
| |
| #define DUMMY_COLUMN_NAME "zero" |
| |
| /* |
| * cdbsubselect_flatten_sublinks |
| */ |
| void |
| cdbsubselect_flatten_sublinks(struct PlannerInfo *root, struct Node *jtnode) |
| { |
| List *rtrlist = NIL; |
| ListCell *cell; |
| |
| if (jtnode == NULL) |
| return; |
| |
| switch (jtnode->type) |
| { |
| case T_Query: |
| { |
| Query *query = (Query *)jtnode; |
| |
| /* Flatten sublinks in JOIN...ON and WHERE search conditions. */ |
| cdbsubselect_flatten_sublinks(root, (Node *)query->jointree); |
| break; |
| } |
| |
| case T_FromExpr: |
| { |
| FromExpr *fromexpr = (FromExpr *)jtnode; |
| |
| /* Flatten sublinks in JOIN...ON search conditions. */ |
| foreach(cell, fromexpr->fromlist) |
| cdbsubselect_flatten_sublinks(root, (Node *)lfirst(cell)); |
| |
| /* Flatten sublinks in WHERE search condition. */ |
| fromexpr->quals = pull_up_IN_clauses(root, &rtrlist, fromexpr->quals); |
| |
| /* Append any new RangeTblRef nodes to the FROM clause. */ |
| if (rtrlist) |
| fromexpr->fromlist = list_concat(fromexpr->fromlist, rtrlist); |
| break; |
| } |
| |
| case T_JoinExpr: |
| { |
| JoinExpr *joinexpr = (JoinExpr *)jtnode; |
| |
| /* We support flattening of sublinks in JOIN...ON only for inner joins */ |
| if (joinexpr->jointype != JOIN_INNER) |
| break; |
| |
| /* Process left and right inputs of JOIN. */ |
| cdbsubselect_flatten_sublinks(root, joinexpr->larg); |
| cdbsubselect_flatten_sublinks(root, joinexpr->rarg); |
| |
| /* Flatten sublinks in JOIN...ON search condition. */ |
| joinexpr->quals = pull_up_IN_clauses(root, &rtrlist, joinexpr->quals); |
| |
| /* Add any new RangeTblRef nodes to the join. */ |
| joinexpr->subqfromlist = rtrlist; |
| break; |
| } |
| |
| case T_RangeTblRef: |
| break; |
| |
| default: |
| Assert(0); |
| } |
| } /* cdbsubselect_flatten_sublinks */ |
| |
| |
| /* |
| * cdbsubselect_drop_distinct |
| */ |
| static void |
| cdbsubselect_drop_distinct(Query *subselect) |
| { |
| if (subselect->limitCount == NULL && |
| subselect->limitOffset == NULL) |
| { |
| /* Delete DISTINCT. */ |
| subselect->distinctClause = NIL; |
| |
| /* Delete GROUP BY if subquery has no aggregates and no HAVING. */ |
| if (!subselect->hasAggs && |
| subselect->havingQual == NULL) |
| subselect->groupClause = NIL; |
| } |
| } /* cdbsubselect_drop_distinct */ |
| |
| |
| /* |
| * cdbsubselect_drop_orderby |
| */ |
| static void |
| cdbsubselect_drop_orderby(Query *subselect) |
| { |
| if (subselect->limitCount == NULL && |
| subselect->limitOffset == NULL) |
| { |
| /* Delete ORDER BY. */ |
| subselect->sortClause = NIL; |
| } |
| } /* cdbsubselect_drop_orderby */ |
| |
| |
| /* |
| * cdbsubselect_add_rte_and_ininfo |
| */ |
| static InClauseInfo * |
| cdbsubselect_add_rte_and_ininfo(PlannerInfo *root, |
| List **rtrlist_inout, |
| Query *subselect, |
| const char *aliasname) |
| { |
| InClauseInfo *ininfo; |
| RangeTblEntry *rte; |
| RangeTblRef *rtr; |
| int rtindex; |
| |
| /* Build an InClauseInfo struct. */ |
| ininfo = makeNode(InClauseInfo); |
| ininfo->sub_targetlist = NULL; |
| |
| /* Determine the index of the subquery RTE that we'll create below. */ |
| rtindex = list_length(root->parse->rtable) + 1; |
| ininfo->righthand = bms_make_singleton(rtindex); |
| |
| /* Tell join planner to quell duplication of outer query result rows. */ |
| root->in_info_list = lappend(root->in_info_list, ininfo); |
| |
| /* Make a subquery RTE in the current query level. */ |
| rte = addRangeTableEntryForSubquery(NULL, |
| subselect, |
| makeAlias(aliasname, NIL), |
| false); |
| root->parse->rtable = lappend(root->parse->rtable, rte); |
| |
| /* Tell caller to augment the jointree with a reference to the new RTE. */ |
| rtr = makeNode(RangeTblRef); |
| rtr->rtindex = rtindex; |
| *rtrlist_inout = lappend(*rtrlist_inout, rtr); |
| |
| return ininfo; |
| } /* cdbsubselect_add_rte_and_ininfo */ |
| |
| /** |
| * safe_to_convert_NOT_EXISTS |
| */ |
| static bool |
| safe_to_convert_NOT_EXISTS(SubLink *sublink, ConvertSubqueryToJoinContext *ctx1) |
| { |
| Query *subselect = (Query *)sublink->subselect; |
| |
| if (subselect->jointree->fromlist == NULL) |
| return false; |
| |
| if (expression_returns_set((Node *)subselect->targetList)) |
| return false; |
| |
| /* No set operations in the subquery */ |
| if (subselect->setOperations) |
| return false; |
| |
| /** |
| * If there are no correlations, then don't bother. |
| */ |
| if (!IsSubqueryCorrelated(subselect)) |
| return false; |
| |
| /** |
| * If deeply correlated, don't bother. |
| */ |
| if (IsSubqueryMultiLevelCorrelated(subselect)) |
| return false; |
| |
| /** |
| * If there are correlations in a func expr in the from clause, then don't bother. |
| */ |
| if (has_correlation_in_funcexpr_rte(subselect->rtable)) |
| { |
| return false; |
| } |
| |
| /** |
| * If there is a having clause, then don't bother. |
| */ |
| if (subselect->havingQual) |
| { |
| return false; |
| } |
| |
| /** |
| * If there is a limit offset, then don't bother. |
| */ |
| if (subselect->limitOffset) |
| return false; |
| |
| ProcessSubqueryToJoin(subselect, ctx1); |
| |
| return ctx1->safeToConvert; |
| } |
| |
| /** |
| * Initialize context. |
| */ |
| static void InitConvertSubqueryToJoinContext(ConvertSubqueryToJoinContext *ctx) |
| { |
| Assert(ctx); |
| ctx->safeToConvert = true; |
| ctx->joinQual = NULL; |
| ctx->innerQual = NULL; |
| ctx->groupClause = NIL; |
| ctx->targetList = NIL; |
| ctx->extractGrouping = false; |
| ctx->considerNonEqualQual = false; |
| } |
| |
| /** |
| * Process correlated opexpr of the form foo(outer.var) OP bar(inner.var). Extracts |
| * bar(inner.var) as innerExpr. |
| * Returns true, if this is not a compatible correlated opexpr. |
| */ |
| static bool IsCorrelatedOpExpr(OpExpr *opexp, Expr **innerExpr) |
| { |
| Assert(opexp); |
| Assert(list_length(opexp->args) > 1); |
| Assert(innerExpr); |
| |
| if (list_length(opexp->args) != 2) |
| { |
| return false; |
| } |
| |
| Expr *e1 = (Expr *) list_nth(opexp->args, 0); |
| Expr *e2 = (Expr *) list_nth(opexp->args, 1); |
| |
| /** |
| * One of the vars must be outer, and other must be inner. |
| */ |
| |
| Expr *tOuterExpr = NULL; |
| Expr *tInnerExpr = NULL; |
| |
| if (contain_vars_of_level((Node *) e1, 1)) |
| { |
| tOuterExpr = (Expr *) copyObject(e1); |
| tInnerExpr = (Expr *) copyObject(e2); |
| } |
| else if ((contain_vars_of_level((Node *) e2, 1))) |
| { |
| tOuterExpr = (Expr *) copyObject(e2); |
| tInnerExpr = (Expr *) copyObject(e1); |
| } |
| |
| /** |
| * It is correlated only if we found an outer var and inner expr |
| */ |
| |
| if (tOuterExpr) |
| { |
| *innerExpr = tInnerExpr; |
| return true; |
| } |
| |
| return false; |
| |
| } |
| |
| /** |
| * Checks if an opexpression is of the form (foo(outervar) = bar(innervar)) |
| * Input: |
| * opexp - op expression |
| * Output: |
| * returns true if correlated equality condition |
| * *innerExpr - points to the inner expr i.e. bar(innervar) in the condition |
| * *sortOp - postgres special, sort operator to implement the condition as a mergejoin. |
| */ |
| static bool IsCorrelatedEqualityOpExpr( OpExpr *opexp, Expr **innerExpr, Oid *sortOp ) |
| { |
| Assert(opexp); |
| Assert(list_length(opexp->args) > 1); |
| Assert(innerExpr); |
| Assert(sortOp); |
| |
| Oid lsortOp = InvalidOid; |
| Oid rsortOp = InvalidOid; |
| |
| /** |
| * If this is an expression of the form a = b, then we want to know about the vars involved. |
| */ |
| |
| bool isEqualityOp = op_mergejoinable(opexp->opno, &lsortOp, &rsortOp); |
| |
| if (!isEqualityOp) |
| { |
| return false; |
| } |
| |
| Assert(lsortOp == rsortOp); |
| Assert(lsortOp != InvalidOid); |
| |
| *sortOp = lsortOp; |
| |
| if (!IsCorrelatedOpExpr(opexp, innerExpr)) |
| { |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /** |
| * Process subquery to extract useful information to be able to convert it to a join |
| */ |
| static void ProcessSubqueryToJoin(Query *subselect, ConvertSubqueryToJoinContext *context) |
| { |
| Assert(context); |
| Assert(context->safeToConvert); |
| Assert(subselect); |
| Assert(list_length(subselect->jointree->fromlist) == 1); |
| Node *joinQual = NULL; |
| |
| /** |
| * If subselect's join tree is not a plain relation or an inner join, we refuse to convert. |
| */ |
| Node *jtree = (Node *) list_nth(subselect->jointree->fromlist, 0); |
| if (IsA(jtree, JoinExpr)) |
| { |
| JoinExpr *je = (JoinExpr *) jtree; |
| if (je->jointype != JOIN_INNER) |
| { |
| context->safeToConvert = false; |
| return; |
| } |
| joinQual = je->quals; |
| } |
| |
| SubqueryToJoinWalker(subselect->jointree->quals, context); |
| |
| if (context->safeToConvert) |
| { |
| SubqueryToJoinWalker(joinQual, context); |
| } |
| |
| /** |
| * If we haven't been able to extract a proper joinQual, then it is not safe to convert |
| */ |
| if (!context->joinQual) |
| { |
| context->safeToConvert = false; |
| } |
| |
| return; |
| } |
| |
| /** |
| * Wipe out join quals i.e. top-level where clause and any quals in the top-level inner join. |
| */ |
| static void RemoveInnerJoinQuals(Query *subselect) |
| { |
| Assert(subselect); |
| Assert(list_length(subselect->jointree->fromlist) == 1); |
| |
| subselect->jointree->quals = NULL; |
| |
| Node *jtree = (Node *) list_nth(subselect->jointree->fromlist, 0); |
| if (IsA(jtree, JoinExpr)) |
| { |
| JoinExpr *je = (JoinExpr *) jtree; |
| Assert(je->jointype == JOIN_INNER); |
| je->quals = NULL; |
| } |
| return; |
| } |
| |
| /** |
| * This method recursively walks down the quals of an expression subquery to see if it can be pulled up to a join |
| * and constructs the pieces necessary to perform the pullup. |
| * E.g. SELECT * FROM outer o WHERE o.a < (SELECT avg(i.x) FROM inner i WHERE o.b = i.y) |
| * This extracts interesting pieces of the subquery so as to create SELECT i.y, avg(i.x) from inner i GROUP by i.y |
| */ |
| static void |
| SubqueryToJoinWalker(Node *node, ConvertSubqueryToJoinContext *context) |
| { |
| Assert(context); |
| Assert(context->safeToConvert); |
| |
| if (node == NULL) |
| { |
| return; |
| } |
| |
| if (IsA(node, BoolExpr)) |
| { |
| |
| /** |
| * Be extremely conservative. If there are any outer vars under an or or a not expression, then give up. |
| */ |
| if (not_clause(node) |
| || or_clause(node)) |
| { |
| if (contain_vars_of_level_or_above(node, 1)) |
| { |
| context->safeToConvert = false; |
| return; |
| } |
| context->innerQual = make_and_qual(context->innerQual, node); |
| return; |
| } |
| |
| Assert(and_clause(node)); |
| |
| BoolExpr *bexp = (BoolExpr *) node; |
| ListCell *lc = NULL; |
| foreach (lc, bexp->args) |
| { |
| Node *arg = (Node *) lfirst(lc); |
| |
| /** |
| * If there is an outer var anywhere in the boolean expression, walk recursively. |
| */ |
| if (contain_vars_of_level_or_above(arg, 1)) |
| { |
| SubqueryToJoinWalker(arg, context); |
| |
| if (!context->safeToConvert) |
| { |
| return; |
| } |
| } |
| else |
| { |
| /** |
| * This qual should be part of the subquery's inner qual. |
| */ |
| context->innerQual = make_and_qual(context->innerQual, arg); |
| } |
| } |
| return; |
| } |
| |
| /** |
| * If this is a correlated opexpression, we'd need to look inside. |
| */ |
| if (contain_vars_of_level_or_above(node, 1) && IsA(node, OpExpr)) |
| { |
| OpExpr *opexp = (OpExpr *) node; |
| |
| /** |
| * If this is an expression of the form foo(outervar) = bar(innervar), then we want to know about the inner expression. |
| */ |
| Oid sortOp = InvalidOid; |
| Expr *innerExpr = NULL; |
| bool considerOpExpr = false; |
| |
| if (context->considerNonEqualQual) |
| { |
| considerOpExpr = IsCorrelatedOpExpr(opexp, &innerExpr); |
| } |
| else |
| { |
| considerOpExpr = IsCorrelatedEqualityOpExpr(opexp, &innerExpr, &sortOp); |
| } |
| |
| if (considerOpExpr) |
| { |
| |
| TargetEntry *tle = makeTargetEntry(copyObject(innerExpr), |
| list_length(context->targetList) + 1, |
| NULL, |
| false); |
| |
| if (context->extractGrouping) |
| { |
| GroupClause *gc = makeNode(GroupClause); |
| gc->sortop = sortOp; |
| gc->tleSortGroupRef = list_length(context->groupClause) + 1; |
| context->groupClause = lappend(context->groupClause, gc); |
| tle->ressortgroupref = list_length(context->targetList) + 1; |
| } |
| |
| context->targetList = lappend(context->targetList, tle); |
| context->joinQual = make_and_qual(context->joinQual, (Node *) opexp); |
| |
| AssertImply(context->extractGrouping, list_length(context->groupClause) == list_length(context->targetList)); |
| |
| return; |
| } |
| |
| /** |
| * Correlated join expression contains incompatible operators. Not safe to convert. |
| */ |
| context->safeToConvert = false; |
| } |
| |
| return; |
| } |
| |
| |
| |
| /** |
| * Safe to convert expr sublink to a join |
| */ |
| static bool |
| safe_to_convert_EXPR(SubLink *sublink, ConvertSubqueryToJoinContext * ctx1) |
| { |
| Assert(ctx1); |
| |
| Query *subselect = (Query *)sublink->subselect; |
| |
| if (subselect->jointree->fromlist == NULL) |
| return false; |
| |
| if (expression_returns_set((Node *)subselect->targetList)) |
| return false; |
| |
| /* No set operations in the subquery */ |
| if (subselect->setOperations) |
| return false; |
| |
| /** |
| * If there are no correlations in the WHERE clause, then don't bother. |
| */ |
| if (!IsSubqueryCorrelated(subselect)) |
| return false; |
| |
| /** |
| * If deeply correlated, don't bother. |
| */ |
| if (IsSubqueryMultiLevelCorrelated(subselect)) |
| return false; |
| |
| /** |
| * If there are correlations in a func expr in the from clause, then don't bother. |
| */ |
| if (has_correlation_in_funcexpr_rte(subselect->rtable)) |
| { |
| return false; |
| } |
| |
| /** |
| * If there is a having qual, then don't bother. |
| */ |
| if (subselect->havingQual != NULL) |
| return false; |
| |
| /** |
| * If it does not have aggs, then don't bother. This could result in a run-time error. |
| */ |
| if (!subselect->hasAggs) |
| return false; |
| |
| /** |
| * Cannot support grouping clause in subselect. |
| */ |
| if (subselect->groupClause) |
| return false; |
| |
| /** |
| * If targetlist of the subquery does not contain exactly one element, don't bother. |
| */ |
| if (list_length(subselect->targetList) != 1) |
| return false; |
| |
| |
| /** |
| * Walk the quals of the subquery to do a more fine grained check as to whether this subquery |
| * may be pulled up. Identify useful fragments to construct join condition if possible to pullup. |
| */ |
| ProcessSubqueryToJoin(subselect, ctx1); |
| |
| /** |
| * There should be no outer vars in innerQual |
| */ |
| Assert(!contain_vars_of_level_or_above(ctx1->innerQual, 1)); |
| |
| return ctx1->safeToConvert; |
| } |
| |
| |
| /** |
| * convert_EXPR_to_join |
| * |
| * Method attempts to convert an EXPR_SUBLINK of the form select * from T where a > (select 10*avg(x) from R where T.b=R.y) |
| */ |
| static Node * |
| convert_EXPR_to_join(PlannerInfo *root, List** rtrlist_inout, OpExpr *opexp) |
| { |
| Assert(root); |
| Assert(list_length(opexp->args) == 2); |
| Node *rarg = list_nth(opexp->args, 1); |
| |
| Assert(IsA(rarg, SubLink)); |
| SubLink *sublink = (SubLink *) rarg; |
| |
| ConvertSubqueryToJoinContext ctx1; |
| InitConvertSubqueryToJoinContext(&ctx1); |
| ctx1.extractGrouping = true; |
| |
| if (safe_to_convert_EXPR(sublink, &ctx1)) |
| { |
| Query *subselect = (Query *) copyObject(sublink->subselect); |
| Assert(IsA(subselect, Query)); |
| |
| /** |
| * Don't care about order by clause |
| */ |
| cdbsubselect_drop_orderby(subselect); |
| |
| /** |
| * Original subselect must have a single output column (e.g. 10*avg(x) ) |
| */ |
| Assert(list_length(subselect->targetList) == 1); |
| |
| /** |
| * To pull up the subquery, we need to construct a new "Query" object that has grouping |
| * columns extracted from the correlated join predicate and the extra column from the subquery's |
| * targetlist. |
| */ |
| TargetEntry *origSubqueryTLE = (TargetEntry *) list_nth(subselect->targetList,0); |
| |
| List *subselectTargetList = (List *) copyObject(ctx1.targetList); |
| subselectTargetList = add_to_flat_tlist(subselectTargetList, list_make1(origSubqueryTLE->expr), false); |
| subselect->targetList = subselectTargetList; |
| subselect->groupClause = ctx1.groupClause; |
| |
| RemoveInnerJoinQuals(subselect); |
| |
| subselect->jointree->quals = ctx1.innerQual; |
| |
| /** |
| * Construct a new range table entry for the new pulled up subquery. |
| */ |
| int rteIndex = add_expr_subquery_rte(root->parse, subselect); |
| Assert(rteIndex > 0); |
| |
| /** |
| * Construct the join expression involving the new pulled up subselect. |
| */ |
| Assert(list_length(root->parse->jointree->fromlist) == 1); |
| Node *larg = lfirst(list_head(root->parse->jointree->fromlist)); /* represents the top-level join tree entry */ |
| Assert(larg != NULL); |
| |
| JoinExpr *join_expr = make_join_expr(larg, rteIndex, JOIN_INNER); |
| |
| Node *joinQual = ctx1.joinQual; |
| |
| /** |
| * Make outer ones regular and regular ones correspond to rteIndex |
| */ |
| joinQual = (Node *) cdbpullup_expr( (Expr *) joinQual, subselect->targetList, NULL, rteIndex); |
| IncrementVarSublevelsUp(joinQual, -1, 1); |
| |
| join_expr->quals = joinQual; |
| |
| root->parse->jointree->fromlist = list_make1(join_expr); /* Replace the join-tree with the new one */ |
| |
| TargetEntry *subselectAggTLE = (TargetEntry *) list_nth(subselect->targetList, list_length(subselect->targetList) - 1); |
| |
| /** |
| * modify the op expr to involve the column that has the computed aggregate that needs to compared. |
| */ |
| Var *aggVar = (Var *) makeVar(rteIndex, |
| subselectAggTLE->resno, |
| exprType((Node *) subselectAggTLE->expr), |
| exprTypmod((Node *) subselectAggTLE->expr), |
| 0); |
| |
| list_nth_replace(opexp->args, 1, aggVar); |
| |
| } |
| |
| return (Node *) opexp; |
| } |
| |
| |
| /** |
| * convert_NOT_EXISTS_to_antijoin |
| */ |
| static Node * |
| convert_NOT_EXISTS_to_antijoin(PlannerInfo *root, List** rtrlist_inout, SubLink *sublink) |
| { |
| Assert(root); |
| Assert(sublink); |
| |
| Query *subselect = (Query *)copyObject(sublink->subselect); |
| Assert(IsA(subselect, Query)); |
| |
| ConvertSubqueryToJoinContext ctx1; |
| InitConvertSubqueryToJoinContext(&ctx1); |
| ctx1.considerNonEqualQual = true; |
| |
| if (safe_to_convert_NOT_EXISTS(sublink, &ctx1)) |
| { |
| |
| /* Delete ORDER BY and DISTINCT. */ |
| cdbsubselect_drop_orderby(subselect); |
| cdbsubselect_drop_distinct(subselect); |
| |
| /* |
| * Trivial NOT EXISTS subquery can be eliminated altogether. |
| */ |
| if (subselect->hasAggs |
| && subselect->havingQual == NULL) |
| { |
| Assert(!subselect->limitOffset); |
| |
| return makeBoolConst(false, false); |
| } |
| |
| /* HAVING is the only place that could still contain aggregates. |
| * We can delete targetlist if there is no havingQual. |
| */ |
| if (subselect->havingQual == NULL) |
| { |
| subselect->targetList = NULL; |
| subselect->hasAggs = false; |
| } |
| |
| /* If HAVING has no aggregates, demote it to WHERE. */ |
| else if (!checkExprHasAggs(subselect->havingQual)) |
| { |
| subselect->jointree->quals = make_and_qual(subselect->jointree->quals, |
| subselect->havingQual); |
| subselect->havingQual = NULL; |
| subselect->hasAggs = false; |
| } |
| |
| /* Delete GROUP BY if no aggregates. */ |
| if (!subselect->hasAggs) |
| subselect->groupClause = NIL; |
| |
| /** |
| * Create a new LASJ between the outer query's join expr and inner subselect's join expr |
| */ |
| |
| Assert(list_length(root->parse->jointree->fromlist) == 1); |
| Assert(list_length(subselect->jointree->fromlist) == 1); |
| List *subselectTargetList = (List *) copyObject(ctx1.targetList); |
| subselect->targetList = subselectTargetList; |
| |
| RemoveInnerJoinQuals(subselect); |
| |
| subselect->jointree->quals = ctx1.innerQual; |
| |
| /** |
| * Construct a new range table entry for the new pulled up subquery. |
| */ |
| int rteIndex = add_expr_subquery_rte(root->parse, subselect); |
| Assert(rteIndex > 0); |
| |
| /** |
| * Construct the join expression involving the new pulled up subselect. |
| */ |
| Assert(list_length(root->parse->jointree->fromlist) == 1); |
| Node *larg = lfirst(list_head(root->parse->jointree->fromlist)); /* represents the top-level join tree entry */ |
| Assert(larg != NULL); |
| |
| JoinExpr *join_expr = make_join_expr(larg, rteIndex, JOIN_LASJ); |
| |
| Node *joinQual = ctx1.joinQual; |
| |
| /** |
| * Make outer ones regular and regular ones correspond to rteIndex |
| */ |
| joinQual = (Node *) cdbpullup_expr( (Expr *) joinQual, subselect->targetList, NULL, rteIndex); |
| |
| Node *innerCorrelatedVarsNotNull = not_null_inner_vars(joinQual); |
| |
| IncrementVarSublevelsUp(joinQual, -1, 1); |
| IncrementVarSublevelsUp(innerCorrelatedVarsNotNull, -1, 1); |
| |
| join_expr->quals = make_and_qual(joinQual, innerCorrelatedVarsNotNull); |
| |
| root->parse->jointree->fromlist = list_make1(join_expr); /* Replace the join-tree with the new one */ |
| return NULL; |
| } |
| |
| /** |
| * convert back to EXISTS SUBLINK; |
| */ |
| sublink->subLinkType = EXISTS_SUBLINK; |
| return (Node *) make_notclause((Expr *) sublink); |
| } |
| |
| /* |
| * convert_EXISTS_to_join |
| */ |
| static Node * |
| convert_EXISTS_to_join(PlannerInfo *root, List** rtrlist_inout, SubLink *sublink) |
| { |
| Query *subselect = (Query *)sublink->subselect; |
| Node *limitqual = NULL; |
| Node *lnode; |
| Node *rnode; |
| Node *node; |
| |
| Assert(IsA(subselect, Query)); |
| |
| if (subselect->jointree->fromlist == NULL) |
| { |
| return (Node *) sublink; |
| } |
| |
| if (has_correlation_in_funcexpr_rte(subselect->rtable)) |
| { |
| return (Node *) sublink; |
| } |
| |
| /** |
| * If deeply correlated, don't bother. |
| */ |
| if (IsSubqueryMultiLevelCorrelated(subselect)) |
| { |
| return (Node *) sublink; |
| } |
| |
| /* |
| * 'LIMIT n' makes EXISTS false when n <= 0, and doesn't affect the outcome |
| * when n > 0. Delete subquery's LIMIT and build (0 < n) expr to be ANDed |
| * into the parent qual. |
| */ |
| if (subselect->limitCount) |
| { |
| rnode = copyObject(subselect->limitCount); |
| IncrementVarSublevelsUp(rnode, -1, 1); |
| lnode = (Node *)makeConst(INT8OID, -1, sizeof(int64), Int64GetDatum(0), |
| false, true); |
| limitqual = (Node *)make_op(NULL, list_make1(makeString("<")), |
| lnode, rnode, -1); |
| subselect->limitCount = NULL; |
| } |
| |
| /* CDB TODO: Set-returning function in tlist could return empty set. */ |
| if (expression_returns_set((Node *)subselect->targetList)) |
| ereport(ERROR, (errcode(ERRCODE_GP_FEATURE_NOT_YET), |
| errmsg("Set-returning function in EXISTS subquery: not yet implemented") |
| )); |
| |
| /* |
| * Trivial EXISTS subquery can be eliminated altogether. If subquery has |
| * aggregates without GROUP BY or HAVING, its result is exactly one row |
| * (assuming no errors), unless that row is discarded by LIMIT/OFFSET. |
| */ |
| if (subselect->hasAggs && |
| subselect->groupClause == NIL && |
| subselect->havingQual == NULL) |
| { |
| /* |
| * 'OFFSET m' falsifies EXISTS for m >= 1, and doesn't affect the |
| * outcome for m < 1, given that the subquery yields at most one row. |
| * Delete subquery's OFFSET and build (m < 1) expr to be anded with |
| * the current query's WHERE clause. |
| */ |
| if (subselect->limitOffset) |
| { |
| lnode = copyObject(subselect->limitOffset); |
| IncrementVarSublevelsUp(lnode, -1, 1); |
| rnode = (Node *)makeConst(INT8OID, -1, sizeof(int64), Int64GetDatum(1), |
| false, true); |
| node = (Node *)make_op(NULL, list_make1(makeString("<")), |
| lnode, rnode, -1); |
| limitqual = make_and_qual(limitqual, node); |
| } |
| |
| /* Replace trivial EXISTS(...) with TRUE if no LIMIT/OFFSET. */ |
| if (limitqual == NULL) |
| limitqual = makeBoolConst(true, false); |
| |
| return limitqual; |
| } |
| |
| /* Delete ORDER BY and DISTINCT. */ |
| subselect->sortClause = NIL; |
| subselect->distinctClause = NIL; |
| |
| /* HAVING is the only place that could still contain aggregates. |
| * We can delete targetlist if there is no havingQual. |
| */ |
| if (subselect->havingQual == NULL) |
| { |
| subselect->targetList = NULL; |
| subselect->hasAggs = false; |
| } |
| |
| /* If HAVING has no aggregates, demote it to WHERE. */ |
| else if (!checkExprHasAggs(subselect->havingQual)) |
| { |
| subselect->jointree->quals = make_and_qual(subselect->jointree->quals, |
| subselect->havingQual); |
| subselect->havingQual = NULL; |
| subselect->hasAggs = false; |
| } |
| |
| /* Delete GROUP BY if no aggregates. */ |
| if (!subselect->hasAggs) |
| subselect->groupClause = NIL; |
| |
| /* |
| * If uncorrelated, the subquery will be executed only once. Add LIMIT 1 |
| * and let the SubLink remain unflattened. It will become an InitPlan. |
| * (CDB TODO: Would it be better to go ahead and convert these to joins?) |
| */ |
| if (!contain_vars_of_level_or_above(sublink->subselect, 1)) |
| { |
| subselect->limitCount = (Node *)makeConst(INT8OID, -1, sizeof(int64), Int64GetDatum(1), |
| false, true); |
| return make_and_qual(limitqual, (Node *)sublink); |
| } |
| |
| /* |
| * Build subquery RTE, InClauseInfo, etc. |
| */ |
| cdbsubselect_add_rte_and_ininfo(root, rtrlist_inout, subselect, "EXISTS_subquery"); |
| |
| return limitqual; |
| } /* convert_EXISTS_to_join */ |
| |
| /* |
| * convert_IN_to_join: can we convert an IN SubLink to join style? |
| * |
| * This function was formerly in subselect.c. |
| */ |
| static Node * |
| convert_IN_to_join(PlannerInfo *root, List** rtrlist_inout, SubLink *sublink) |
| { |
| Query *subselect = (Query *) sublink->subselect; |
| int rtindex; |
| InClauseInfo *ininfo; |
| bool correlated; |
| |
| Assert(IsA(subselect, Query)); |
| |
| cdbsubselect_drop_orderby(subselect); |
| cdbsubselect_drop_distinct(subselect); |
| |
| /* |
| * The combining operators and left-hand expressions mustn't be volatile. |
| */ |
| if (contain_volatile_functions(sublink->testexpr)) |
| return (Node *)sublink; |
| |
| /** |
| * If subquery returns a set-returning function (SRF) in the targetlist, we |
| * do not attempt to convert the IN to a join. |
| */ |
| |
| if (expression_returns_set((Node *) subselect->targetList)) |
| return (Node *) sublink; |
| |
| /** |
| * If deeply correlated, then don't pull it up |
| */ |
| if (IsSubqueryMultiLevelCorrelated(subselect)) |
| return (Node *) sublink; |
| |
| /** |
| * If there are CTEs, then the transformation does not work. Don't attempt to pullup. |
| */ |
| if (root->parse->cteList) |
| return (Node *) sublink; |
| |
| /* |
| * If uncorrelated, and no Var nodes on lhs, the subquery will be executed |
| * only once. It should become an InitPlan, but make_subplan() doesn't |
| * handle that case, so just flatten it for now. |
| * CDB TODO: Let it become an InitPlan, so its QEs can be recycled. |
| */ |
| correlated = contain_vars_of_level_or_above(sublink->subselect, 1); |
| |
| if (correlated) |
| { |
| /** |
| * Under certain conditions, we do cannot pull up the subquery as a join. |
| */ |
| |
| if (subselect->hasAggs |
| || (subselect->jointree->fromlist == NULL) |
| || subselect->havingQual |
| || subselect->groupClause |
| || subselect->hasWindFuncs |
| || subselect->distinctClause |
| || subselect->setOperations) |
| return (Node *) sublink; |
| |
| /* do not pull subqueries with correlation in a func expr in the |
| from clause of the subselect */ |
| if (has_correlation_in_funcexpr_rte(subselect->rtable)) |
| { |
| return (Node *) sublink; |
| } |
| |
| if (contain_subplans(subselect->jointree->quals)) |
| { |
| return (Node *) sublink; |
| } |
| } |
| |
| /* |
| * Okay, pull up the sub-select into top range table and jointree. |
| * |
| * We rely here on the assumption that the outer query has no references |
| * to the inner (necessarily true, other than the Vars that we build |
| * below). Therefore this is a lot easier than what pull_up_subqueries has |
| * to go through. |
| */ |
| ininfo = cdbsubselect_add_rte_and_ininfo(root, rtrlist_inout, subselect, "IN_subquery"); |
| |
| /* Get the index of the subquery RTE that was just created. */ |
| rtindex = list_length(root->parse->rtable); |
| Assert(rt_fetch(rtindex, root->parse->rtable)->subquery == subselect); |
| |
| /* |
| * Uncorrelated "=ANY" subqueries can use JOIN_UNIQUE dedup technique. We |
| * expect that the test expression will be either a single OpExpr, or an |
| * AND-clause containing OpExprs. (If it's anything else then the parser |
| * must have determined that the operators have non-equality-like |
| * semantics. In the OpExpr case we can't be sure what the operator's |
| * semantics are like, and must check for ourselves.) |
| */ |
| ininfo->try_join_unique = false; |
| if (!correlated && |
| sublink->testexpr) |
| { |
| if (IsA(sublink->testexpr, OpExpr)) |
| { |
| List *opclasses; |
| List *opstrats; |
| |
| get_op_btree_interpretation(((OpExpr *) sublink->testexpr)->opno, |
| &opclasses, &opstrats); |
| if (list_member_int(opstrats, ROWCOMPARE_EQ)) |
| ininfo->try_join_unique = true; |
| } |
| else if (and_clause(sublink->testexpr)) |
| ininfo->try_join_unique = true; |
| } |
| |
| /* |
| * Build the result qual expression. As a side effect, |
| * ininfo->sub_targetlist is filled with a list of Vars representing the |
| * subselect outputs. |
| */ |
| return convert_testexpr(root, |
| sublink->testexpr, |
| rtindex, |
| &ininfo->sub_targetlist, |
| subselect->targetList); |
| } /* convert_IN_to_join */ |
| |
| |
| /* |
| * convert_sublink_to_join: can we convert a SubLink to join style? |
| * |
| * The caller has found a SubLink at the top level of WHERE, but has not |
| * checked the properties of the SubLink at all. Decide whether it is |
| * appropriate to process this SubLink in join style. If so, build the |
| * qual clause(s) to replace the SubLink, and return them. Else return |
| * the original sublink. In either case the subquery is modified in |
| * place to remove unnecessary clauses. |
| * |
| * Side effects of a successful conversion include adding the SubLink's |
| * subselect to the query's rangetable and adding an InClauseInfo node to |
| * its in_info_list. |
| * |
| * Upon creating an RTE for a flattened subquery, a corresponding RangeTblRef |
| * node is appended to the caller's List referenced by *rtrlist_inout, so the |
| * caller can add it to the jointree. |
| */ |
| static Node * |
| convert_sublink_to_join(PlannerInfo *root, List** rtrlist_inout, SubLink *sublink) |
| { |
| Node *result = (Node *)sublink; |
| |
| Assert(IsA(sublink, SubLink)); |
| Assert(IsA(((SubLink *) sublink)->subselect, Query)); |
| |
| switch (sublink->subLinkType) |
| { |
| case EXISTS_SUBLINK: |
| result = convert_EXISTS_to_join(root, rtrlist_inout, sublink); |
| break; |
| |
| case NOT_EXISTS_SUBLINK: |
| result = convert_NOT_EXISTS_to_antijoin(root, rtrlist_inout, sublink); |
| break; |
| |
| case ALL_SUBLINK: |
| result = convert_IN_to_antijoin(root, rtrlist_inout, sublink); |
| break; |
| |
| case ANY_SUBLINK: |
| result = convert_IN_to_join(root, rtrlist_inout, sublink); |
| break; |
| |
| case ROWCOMPARE_SUBLINK: |
| case EXPR_SUBLINK: |
| break; |
| |
| case ARRAY_SUBLINK: |
| break; |
| |
| default: |
| Insist(0); |
| } |
| return result; |
| } /* convert_sublink_to_join */ |
| |
| /* NOTIN subquery transformation -start */ |
| |
| /* check if NOT IN conversion to antijoin is possible */ |
| static bool |
| safe_to_convert_NOTIN(SubLink * sublink) |
| { |
| Query *subselect = (Query *) sublink->subselect; |
| Relids left_varnos; |
| |
| /* cases we don't currently handle are listed below. */ |
| |
| /* ARRAY sublinks have empty test expressions */ |
| if (sublink->testexpr == NULL) |
| return false; |
| |
| /* No volatile functions in the subquery */ |
| if (contain_volatile_functions(sublink->testexpr)) |
| return false; |
| |
| /** |
| * If there are correlations in a func expr in the from clause, then don't bother. |
| */ |
| if (has_correlation_in_funcexpr_rte(subselect->rtable)) |
| { |
| return false; |
| } |
| |
| /* Left-hand expressions must contain some Vars of the current */ |
| left_varnos = pull_varnos(sublink->testexpr); |
| if (bms_is_empty(left_varnos)) |
| return false; |
| |
| /* Correlation - subquery referencing Vars of parent not handled */ |
| if (contain_vars_of_level((Node *) subselect, 1)) |
| return false; |
| |
| /* No set operations in the subquery */ |
| if (subselect->setOperations) |
| return false; |
| |
| return true; |
| } |
| |
| /* |
| * Find if the supplied targetlist has any resjunk |
| * entries. We only have to check the tail since |
| * resjunks (if any) can only appear in the end. |
| */ |
| inline static bool |
| has_resjunk(List *tlist) |
| { |
| bool resjunk = false; |
| Node *tlnode = (Node *) (lfirst(tlist->tail)); |
| |
| if (IsA(tlnode, TargetEntry)) |
| { |
| TargetEntry *te = (TargetEntry *) tlnode; |
| |
| if (te->resjunk) |
| resjunk = true; |
| } |
| return resjunk; |
| } |
| |
| /* add a dummy constant var to the end of the supplied list */ |
| static List * |
| add_dummy_const(List *tlist) |
| { |
| TargetEntry *dummy; |
| Const *zconst; |
| int resno; |
| |
| zconst = makeConst(INT4OID, -1, sizeof(int32), (Datum) 0, |
| false, true); /* isnull, byval */ |
| resno = list_length(tlist) + 1; |
| dummy = makeTargetEntry((Expr *) zconst, |
| resno, |
| DUMMY_COLUMN_NAME, |
| false /* resjunk */ ); |
| |
| if (tlist == NIL) |
| tlist = list_make1(dummy); |
| else |
| tlist = lappend(tlist, dummy); |
| |
| return tlist; |
| } |
| |
| /* Add a dummy variable to the supplied target list. The |
| * variable is added to end of targetlist but before all |
| * resjunk vars (if any). The caller should make use of |
| * the returned targetlist since this code might modify |
| * the list in-place. |
| */ |
| static List * |
| mutate_targetlist(List *tlist) |
| { |
| List *new_list = NIL; |
| |
| if (has_resjunk(tlist)) |
| { |
| ListCell *prev = NULL; |
| ListCell *curr = NULL; |
| bool junk = false; |
| |
| foreach(curr, tlist) |
| { |
| TargetEntry *tle = (TargetEntry *) lfirst(curr); |
| |
| if (tle->resjunk) |
| { |
| tle->resno = tle->resno + 1; |
| if (!junk) |
| { |
| junk = true; |
| new_list = add_dummy_const(new_list); |
| } |
| } |
| new_list = lappend(new_list, tle); |
| prev = curr; |
| } |
| } |
| else |
| { |
| new_list = tlist; |
| new_list = add_dummy_const(new_list); |
| } |
| return new_list; |
| } |
| |
| /* Pulls up the subquery into the top-level range table. |
| * Before that add a dummy column zero to the target list |
| * of the subquery. |
| */ |
| static int |
| add_notin_subquery_rte(Query * parse, Query * subselect) |
| { |
| RangeTblEntry *subq_rte; |
| int subq_indx; |
| |
| subselect->targetList = mutate_targetlist(subselect->targetList); |
| subq_rte = addRangeTableEntryForSubquery(NULL, /* pstate */ |
| subselect, |
| makeAlias("NotIn_SUBQUERY", NIL), |
| false /* inFromClause */ ); |
| parse->rtable = lappend(parse->rtable, subq_rte); |
| |
| /* assume new rte is at end */ |
| subq_indx = list_length(parse->rtable); |
| Assert(subq_rte = rt_fetch(subq_indx, parse->rtable)); |
| |
| return subq_indx; |
| } |
| |
| /* |
| * Pulls up the expr sublink subquery into the top-level range table. |
| */ |
| static int |
| add_expr_subquery_rte(Query * parse, Query * subselect) |
| { |
| RangeTblEntry *subq_rte; |
| int subq_indx; |
| |
| /** |
| * Generate column names. |
| * TODO: improve this to keep old names around |
| */ |
| ListCell *lc = NULL; |
| int teNum = 0; |
| foreach (lc, subselect->targetList) |
| { |
| StringInfoData si; |
| initStringInfo(&si); |
| appendStringInfo(&si, "csq_c%d", teNum); |
| |
| TargetEntry *te = (TargetEntry *) lfirst(lc); |
| te->resname = si.data; |
| teNum++; |
| } |
| |
| subq_rte = addRangeTableEntryForSubquery(NULL, /* pstate */ |
| subselect, |
| makeAlias("Expr_SUBQUERY", NIL), |
| false /* inFromClause */ ); |
| parse->rtable = lappend(parse->rtable, subq_rte); |
| |
| /* assume new rte is at end */ |
| subq_indx = list_length(parse->rtable); |
| Assert(subq_rte = rt_fetch(subq_indx, parse->rtable)); |
| |
| return subq_indx; |
| } |
| |
| |
| /* Create a join expression linking the supplied larg node |
| * with the pulled up NOT IN subquery located at r_rtindex |
| * in the range table. The appropriate JOIN_RTE has already |
| * been created by the caller and can be located at j_rtindex |
| */ |
| static JoinExpr * |
| make_join_expr(Node *larg, int r_rtindex, int join_type) |
| { |
| JoinExpr *jexpr; |
| RangeTblRef *rhs; |
| |
| rhs = makeNode(RangeTblRef); |
| rhs->rtindex = r_rtindex; |
| |
| jexpr = makeNode(JoinExpr); |
| jexpr->jointype = join_type; |
| jexpr->isNatural = false; |
| jexpr->larg = larg; |
| jexpr->rarg = (Node *) rhs; |
| jexpr->rtindex = 0; |
| |
| return jexpr; |
| } |
| |
| /* |
| * Convert subquery's test expr to a suitable predicate. |
| * If we wanted to add correlated subquery support, this would be the place to do it. |
| */ |
| static Node * |
| make_lasj_quals(PlannerInfo *root, SubLink * sublink, int subquery_indx) |
| { |
| Expr *join_pred; |
| Query *subselect = (Query *) sublink->subselect; |
| List *subtlist = NIL; |
| |
| Assert(sublink->subLinkType == ALL_SUBLINK); |
| |
| join_pred = (Expr *) convert_testexpr(root, |
| sublink->testexpr, |
| subquery_indx, |
| &subtlist, |
| subselect->targetList); |
| |
| join_pred = canonicalize_qual(make_notclause(join_pred)); |
| |
| Assert(join_pred != NULL); |
| return (Node *) join_pred; |
| } |
| |
| /* add IS NOT FALSE clause on top of the clause */ |
| Node * |
| add_null_match_clause(Node *clause) |
| { |
| BooleanTest *btest; |
| |
| Assert(clause != NULL); |
| btest = makeNode(BooleanTest); |
| btest->arg = (Expr *) clause; |
| btest->booltesttype = IS_NOT_FALSE; |
| return (Node *) btest; |
| } |
| |
| |
| /** |
| * Given an expression tree, extract all inner vars and construct a qual that eliminates NULLs. |
| * E.g. For input (i1 = o1) and (i2 = o1 + 2), the function produces NOT NULL (i1) and NOT NULL (i2) |
| */ |
| static Node* not_null_inner_vars(Node *clause) |
| { |
| List *allVars = extract_nodes(NULL /* PlannerGlobal */, clause, T_Var); |
| List *notNullClauses = NULL; |
| ListCell *lc = NULL; |
| foreach (lc, allVars) |
| { |
| Assert(IsA(lfirst(lc), Var)); |
| Var *v = (Var *) lfirst(lc); |
| if (v->varlevelsup == 0) |
| { |
| NullTest *nullTest = makeNode(NullTest); |
| nullTest->arg = copyObject(v); |
| nullTest->nulltesttype = IS_NOT_NULL; |
| notNullClauses = lappend(notNullClauses, nullTest); |
| } |
| } |
| |
| if (notNullClauses) |
| { |
| return (Node *) make_andclause(notNullClauses); |
| } |
| else |
| { |
| return NULL; |
| } |
| } |
| |
| |
| /** |
| * Is the attribute of a base relation non-nullable? |
| * Input: |
| * relationOid |
| * attributeNumber |
| * Output: |
| * true if the attribute is non-nullable |
| */ |
| static bool is_attribute_nonnullable(Oid relationOid, AttrNumber attrNumber) |
| { |
| HeapTuple attributeTuple = NULL; |
| Form_pg_attribute attribute = NULL; |
| bool result = true; |
| cqContext *pcqCtx; |
| |
| pcqCtx = caql_beginscan( |
| NULL, |
| cql("SELECT * FROM pg_attribute " |
| " WHERE attrelid = :1 " |
| " AND attnum = :2 ", |
| ObjectIdGetDatum(relationOid), |
| Int16GetDatum(attrNumber))); |
| |
| attributeTuple = caql_getnext(pcqCtx); |
| |
| if (!HeapTupleIsValid(attributeTuple)) |
| { |
| caql_endscan(pcqCtx); |
| return false; |
| } |
| |
| attribute = (Form_pg_attribute) GETSTRUCT(attributeTuple); |
| |
| if (attribute->attisdropped) |
| result = false; |
| |
| if (!attribute->attnotnull) |
| result = false; |
| |
| caql_endscan(pcqCtx); |
| |
| return result; |
| } |
| |
| |
| /** |
| * This walker goes through a query's join-tree to determine the set of non-nullable |
| * vars. E.g. |
| * select x from t1, t2 where x=y .. the walker determines that x and y are involved in an inner join |
| * and therefore are non-nullable. |
| * select x from t1 where x > 20 .. the walker determines that the quals ensures that x is non-nullable |
| * |
| */ |
| static bool find_nonnullable_vars_walker(Node *node, NonNullableVarsContext *context) |
| { |
| Assert(context); |
| Assert(context->query); |
| |
| if (node == NULL) |
| return false; |
| |
| switch(nodeTag(node)) |
| { |
| case T_Var: |
| { |
| Var *var = (Var *) node; |
| |
| if (var->varlevelsup == 0) |
| { |
| context->nonNullableVars = list_append_unique(context->nonNullableVars, var); |
| } |
| return false; |
| } |
| case T_FuncExpr: |
| { |
| FuncExpr *expr = (FuncExpr *) node; |
| |
| if (!func_strict(expr->funcid)) |
| { |
| /* |
| * If a function is not strict, it can return non-null values |
| * for null inputs. Thus, input vars can be null and sneak through. |
| * Therefore, ignore all vars underneath. |
| */ |
| return false; |
| } |
| break; |
| } |
| case T_OpExpr: |
| { |
| OpExpr *expr = (OpExpr *) node; |
| |
| if (!op_strict(expr->opno)) |
| { |
| /* |
| * If an op is not strict, it can return non-null values for null inputs. |
| * Ignore all vars underneath. |
| */ |
| return false; |
| } |
| |
| break; |
| } |
| case T_BoolExpr: |
| { |
| BoolExpr *expr = (BoolExpr *) node; |
| |
| if (expr->boolop == NOT_EXPR) |
| { |
| /** |
| * Not negates all conditions underneath. We choose to not handle |
| * this situation. |
| */ |
| return false; |
| } else if (expr->boolop == OR_EXPR) |
| { |
| /** |
| * We add the intersection of variables determined to be |
| * non-nullable by each arg to the OR expression. |
| */ |
| NonNullableVarsContext c1; |
| c1.query = context->query; |
| c1.nonNullableVars = NIL; |
| ListCell *lc = NULL; |
| int orArgNum = 0; |
| foreach (lc, expr->args) |
| { |
| Node *orArg = lfirst(lc); |
| |
| NonNullableVarsContext c2; |
| c2.query = context->query; |
| c2.nonNullableVars = NIL; |
| expression_tree_walker(orArg, find_nonnullable_vars_walker, &c2); |
| |
| if (orArgNum == 0) |
| { |
| Assert(c1.nonNullableVars == NIL); |
| c1.nonNullableVars = c2.nonNullableVars; |
| } |
| else |
| { |
| c1.nonNullableVars = list_intersection(c1.nonNullableVars, c2.nonNullableVars); |
| } |
| orArgNum++; |
| } |
| |
| context->nonNullableVars = list_concat_unique(context->nonNullableVars, c1.nonNullableVars); |
| return false; |
| } |
| |
| Assert(expr->boolop == AND_EXPR); |
| |
| /** |
| * AND_EXPR is automatically handled by the walking algorithm. |
| */ |
| break; |
| } |
| case T_NullTest: |
| { |
| NullTest *expr = (NullTest *) node; |
| |
| if (expr->nulltesttype != IS_NOT_NULL) |
| { |
| return false; |
| } |
| |
| break; |
| } |
| case T_BooleanTest: |
| { |
| BooleanTest *expr = (BooleanTest *) node; |
| if (!(expr->booltesttype == IS_NOT_UNKNOWN |
| || expr->booltesttype == IS_TRUE |
| || expr->booltesttype == IS_FALSE)) |
| { |
| /* Other tests may allow a null value to pass through. */ |
| return false; |
| } |
| break; |
| } |
| case T_JoinExpr: |
| { |
| JoinExpr *expr = (JoinExpr *) node; |
| if (expr->jointype != JOIN_INNER) |
| { |
| /* Do not descend below any other join type */ |
| return false; |
| } |
| break; |
| } |
| case T_FromExpr: |
| case T_List: |
| { |
| /* Top-level where clause is fine -- equivalent to an inner join */ |
| break; |
| } |
| case T_RangeTblRef: |
| { |
| /* |
| * If we've gotten this far, then we can look for non-null constraints |
| * on the vars in the query's targetlist. |
| */ |
| RangeTblRef *rtf = (RangeTblRef *) node; |
| RangeTblEntry *rte = rt_fetch(rtf->rtindex, context->query->rtable); |
| |
| if (rte->rtekind == RTE_RELATION) |
| { |
| /* |
| * Find all vars in the query's targetlist that are from this relation and |
| * check if the attribute is non-nullable by base table constraint. |
| */ |
| |
| ListCell *lc = NULL; |
| foreach (lc, context->query->targetList) |
| { |
| TargetEntry *tle = (TargetEntry *) lfirst(lc); |
| Assert(tle->expr); |
| |
| if (nodeTag(tle->expr) == T_Var) |
| { |
| Var *var = (Var *) tle->expr; |
| if (var->varno == rtf->rtindex) |
| { |
| int attNum = var->varattno; |
| int relOid = rte->relid; |
| |
| Assert(relOid != InvalidOid); |
| |
| if (is_attribute_nonnullable(relOid, attNum)) |
| { |
| /* Base table constraint on the var. Add it to the list! */ |
| context->nonNullableVars = list_append_unique(context->nonNullableVars, var); |
| } |
| |
| } |
| } |
| } |
| } |
| else if (rte->rtekind == RTE_VALUES) |
| { |
| /* |
| * TODO: make this work for values scan someday. |
| */ |
| } |
| return false; |
| } |
| default: |
| { |
| /* Do not descend beyond any other node */ |
| return false; |
| } |
| } |
| return expression_tree_walker(node, find_nonnullable_vars_walker, context); |
| } |
| |
| |
| /** |
| * This method determines if the targetlist of a query is nullable. |
| * Consider a query of the form: select t1.x, t2.y from t1, t2 where t1.x > 5 |
| * This method simply determines if the targetlist i.e. (t1.x, t2.y) is nullable. |
| * A targetlist is "nullable" if all entries in the targetlist |
| * cannot be proven to be non-nullable. |
| */ |
| bool is_targetlist_nullable(Query *subq) |
| { |
| Assert(subq); |
| |
| if (subq->setOperations) |
| { |
| /* TODO: support setops in subselect someday */ |
| return false; |
| } |
| |
| /** |
| * Find all non-nullable vars in the query i.e. the set of vars, |
| * if part of the targetlist, that would be non-nullable. E.g. |
| * select x from t1 where x is not null |
| * would produce {x}. |
| */ |
| NonNullableVarsContext context; |
| context.query = subq; |
| context.nonNullableVars = NIL; |
| expression_tree_walker((Node *) subq->jointree, find_nonnullable_vars_walker, &context); |
| |
| bool result = false; |
| /** |
| * Now cross-check with the actual targetlist of the query. |
| */ |
| ListCell *lc = NULL; |
| foreach (lc, subq->targetList) |
| { |
| TargetEntry *tle = (TargetEntry *) lfirst(lc); |
| |
| if (tle->resjunk) |
| { |
| /** |
| * Be conservative. |
| */ |
| result = true; |
| } |
| |
| if (nodeTag(tle->expr) == T_Const) |
| { |
| /** |
| * Is the constant entry in the targetlist null? |
| */ |
| Const *constant = (Const *) tle->expr; |
| if (strcmp(tle->resname, DUMMY_COLUMN_NAME) != 0 |
| && constant->constisnull == true) |
| { |
| result = true; |
| } |
| } |
| else if (nodeTag(tle->expr) == T_Var) |
| { |
| Var *var = (Var *) tle->expr; |
| |
| /* Was this var determined to be non-nullable? */ |
| if (!list_member(context.nonNullableVars, var)) |
| { |
| result = true; |
| } |
| } |
| else |
| { |
| /** |
| * Be conservative. |
| */ |
| result = true; |
| } |
| } |
| |
| return result; |
| } |
| |
| /** |
| * This method looks at a sublink expression and generates a not-null |
| * clause for the outer-side expression. |
| * For e.g. (foo(a),bar(b)) not in (...) |
| * results in foo(a) is not null OR bar(b) is not null |
| */ |
| static Node *make_outer_nonnull_clause(SubLink *sublink) |
| { |
| Node *outerNonNullClause = NULL; |
| |
| Assert(nodeTag(sublink->testexpr) == T_BoolExpr || nodeTag(sublink->testexpr) == T_OpExpr); |
| |
| switch(nodeTag(sublink->testexpr)) |
| { |
| case T_BoolExpr: |
| { |
| /* |
| * This is the case when the testexpr involves multiple columns |
| * such as (foo(a), bar(b)). This is represent as an OR expression. |
| */ |
| List *exprList = NIL; |
| BoolExpr *boolExpr = (BoolExpr *) sublink->testexpr; |
| Assert(boolExpr->boolop == OR_EXPR); |
| ListCell *lc = NULL; |
| foreach (lc, boolExpr->args) |
| { |
| Node *singleColumnExpr = (Node *) lfirst(lc); |
| Assert(nodeTag(singleColumnExpr) == T_OpExpr); |
| OpExpr *opExpr = (OpExpr *) singleColumnExpr; /* opExpr represents foo(a) <> param */ |
| Assert(list_length(opExpr->args) == 2); /* i.e. foo(a), param */ |
| Expr *outerExpr = (Expr *) lfirst(list_head(opExpr->args)); /* outerExpr represents foo(a) */ |
| |
| NullTest *nullTest = makeNode(NullTest); |
| nullTest->arg = outerExpr; |
| nullTest->nulltesttype = IS_NOT_NULL; /* nullTest represents foo(a) is not null */ |
| |
| exprList = lappend(exprList, nullTest); |
| } |
| outerNonNullClause = (Node *) make_orclause(exprList); /* represents foo(a) is not null OR bar(b) is not null */ |
| break; |
| } |
| case T_OpExpr: |
| { |
| /* |
| * This represents the single column testexpr. Does not contain the top |
| * OR clause. |
| */ |
| OpExpr *opExpr = (OpExpr *) sublink->testexpr; /* opExpr represents foo(a) <> param */ |
| Assert(list_length(opExpr->args) == 2); /* i.e. foo(a), param */ |
| Expr *outerExpr = (Expr *) lfirst(list_head(opExpr->args)); /* outerExpr represents foo(a) */ |
| |
| NullTest *nullTest = makeNode(NullTest); |
| nullTest->arg = outerExpr; |
| nullTest->nulltesttype = IS_NOT_NULL; /* nullTest represents foo(a) is not null */ |
| |
| outerNonNullClause = (Node *) nullTest; /* represents foo(a) is not null OR bar(b) is not null */ |
| break; |
| } |
| default: |
| Assert(false && "Unsupported sublink expression"); |
| |
| } |
| |
| Assert(outerNonNullClause); |
| |
| return outerNonNullClause; |
| } |
| |
| /* |
| * convert_IN_to_antijoin: can we convert an ALL SubLink to join style? |
| * If not appropriate to process this SubLink, return it as it is. |
| * Side effects of a successful conversion include adding the SubLink's |
| * subselect to the top-level rangetable, adding a JOIN RTE linking the outer |
| * query with the subselect and setting up the qualifiers correctly. |
| * |
| * The transformation is to rewrite a query of the form: |
| * select c1 from t1 where c1 NOT IN (select c2 from t2); |
| * (to) |
| * select c1 from t1 left anti semi join (select 0 as zero, c2 from t2) foo |
| * ON (c1 = c2) IS NOT FALSE where zero is NULL; |
| * |
| * The pseudoconstant column zero is needed to correctly pipe in the NULLs |
| * from the subselect upstream. |
| * |
| * The current implementation assumes that the sublink expression occurs |
| * in a top-level where clause (or through a series of inner joins). |
| */ |
| static Node * |
| convert_IN_to_antijoin(PlannerInfo * root, List **rtrlist_inout __attribute__((unused)), SubLink * sublink) |
| { |
| Query *parse = root->parse; |
| Query *subselect = (Query *) sublink->subselect; |
| |
| if (safe_to_convert_NOTIN(sublink)) |
| { |
| Assert(list_length(parse->jointree->fromlist) == 1); |
| |
| Node *larg = lfirst(list_head(parse->jointree->fromlist)); /* represents the top-level join tree entry */ |
| |
| Assert(larg != NULL); |
| |
| int subq_indx = add_notin_subquery_rte(parse, subselect); |
| bool inner_nullable = is_targetlist_nullable(subselect); |
| JoinExpr *join_expr = make_join_expr(larg, subq_indx, JOIN_LASJ_NOTIN); |
| |
| join_expr->quals = make_lasj_quals(root, sublink, subq_indx); |
| |
| if (inner_nullable) |
| { |
| join_expr->quals = add_null_match_clause(join_expr->quals); |
| } |
| |
| /* |
| * What clause needs to be added to ensure that the outer side does not contain |
| * nulls. This is because nulls in the outer side are filtered out in a NOT IN |
| * clause. |
| */ |
| Node *notNullConstraints = make_outer_nonnull_clause(sublink); |
| |
| parse->jointree->fromlist = list_make1(join_expr); /* Replace the join-tree with the new one */ |
| |
| /** Add not null constraints for the outer side to the top-level clause*/ |
| return notNullConstraints; |
| } |
| else |
| { |
| /* Not safe to perform transformation. */ |
| return (Node *) sublink; |
| } |
| } |
| |
| /* |
| * pull_up_IN_clauses |
| * Attempt to pull up top-level IN clauses to be treated like joins. |
| * |
| * A clause "foo IN (sub-SELECT)" appearing at the top level of WHERE can |
| * be processed by pulling the sub-SELECT up to become a rangetable entry |
| * and handling the implied equality comparisons as join operators (with |
| * special join rules). |
| * This optimization *only* works at the top level of WHERE, because |
| * it cannot distinguish whether the IN ought to return FALSE or NULL in |
| * cases involving NULL inputs. This routine searches for such clauses |
| * and does the necessary parsetree transformations if any are found. |
| * |
| * This routine has to run before preprocess_expression(), so the WHERE |
| * clause is not yet reduced to implicit-AND format. That means we need |
| * to recursively search through explicit AND clauses, which are |
| * probably only binary ANDs. We stop as soon as we hit a non-AND item. |
| * |
| * Returns the possibly-modified version of the given qual-tree node. |
| * |
| * This function was formerly in prepjointree.c. |
| */ |
| static Node * |
| pull_up_IN_clauses(PlannerInfo * root, List **rtrlist_inout, Node *clause) |
| { |
| if (clause == NULL) |
| return NULL; |
| if (IsA(clause, SubLink)) |
| { |
| SubLink *sublink = (SubLink *) clause; |
| Node *subst; |
| |
| /* Is it a convertible IN clause? If not, return it as-is */ |
| subst = convert_sublink_to_join(root, rtrlist_inout, sublink); |
| return subst; |
| } |
| if (and_clause(clause)) |
| { |
| List *newclauses = NIL; |
| ListCell *l; |
| |
| foreach(l, ((BoolExpr *) clause)->args) |
| { |
| Node *oldclause = (Node *) lfirst(l); |
| Node *newclause = pull_up_IN_clauses(root, rtrlist_inout, oldclause); |
| |
| if (newclause) |
| newclauses = lappend(newclauses, newclause); |
| } |
| return (Node *) make_ands_explicit(newclauses); |
| } |
| if (not_clause(clause)) |
| { |
| Node *arg = (Node *) get_notclausearg((Expr *) clause); |
| |
| /* |
| * We normalize NOT subqueries using the following axioms: |
| * |
| * val NOT IN (subq) => val <> ALL (subq) |
| * NOT val op ANY (subq) => val op' ALL (subq) |
| * NOT val op ALL (subq) => val op' ANY (subq) |
| */ |
| |
| if (IsA(arg, SubLink)) |
| { |
| SubLink *sublink = (SubLink *) arg; |
| |
| if (sublink->subLinkType == ANY_SUBLINK) |
| { |
| sublink->subLinkType = ALL_SUBLINK; |
| sublink->testexpr = (Node *) canonicalize_qual( |
| make_notclause((Expr *) sublink->testexpr)); |
| } |
| else if (sublink->subLinkType == ALL_SUBLINK) |
| { |
| sublink->subLinkType = ANY_SUBLINK; |
| sublink->testexpr = (Node *) canonicalize_qual( |
| make_notclause((Expr *) sublink->testexpr)); |
| } |
| else if (sublink->subLinkType == EXISTS_SUBLINK) |
| { |
| sublink->subLinkType = NOT_EXISTS_SUBLINK; |
| } |
| else |
| { |
| return clause; /* do nothing for other sublinks */ |
| } |
| |
| return (Node *) pull_up_IN_clauses(root, rtrlist_inout, (Node *) sublink); |
| } |
| else if (not_clause(arg)) |
| { |
| /* NOT NOT (expr) => (expr) */ |
| return (Node *) pull_up_IN_clauses(root, rtrlist_inout, |
| (Node *) get_notclausearg((Expr *) arg)); |
| } |
| else if (or_clause(arg)) |
| { |
| /* NOT OR (expr1) (expr2) => (expr1) AND (expr2) */ |
| return (Node *) pull_up_IN_clauses(root, rtrlist_inout, |
| (Node *) canonicalize_qual((Expr *) clause)); |
| |
| } |
| } |
| |
| /** |
| * (expr) op SUBLINK |
| */ |
| if (IsA(clause, OpExpr)) |
| { |
| OpExpr *opexp = (OpExpr *) clause; |
| |
| if (list_length(opexp->args) == 2) |
| { |
| /** |
| * Check if second arg is sublink |
| */ |
| Node *rarg = list_nth(opexp->args, 1); |
| |
| if (IsA(rarg, SubLink)) |
| { |
| return (Node *) convert_EXPR_to_join(root, rtrlist_inout, opexp); |
| } |
| } |
| } |
| /* Stop if not an AND */ |
| return clause; |
| } |
| |
| /* |
| * Check if there is a range table entry of type func expr whose arguments |
| * are correlated |
| */ |
| bool has_correlation_in_funcexpr_rte(List *rtable) |
| { |
| /* check if correlation occurs in a func expr in the from clause of the |
| subselect */ |
| ListCell *lc_rte = NULL; |
| foreach(lc_rte, rtable) |
| { |
| RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc_rte); |
| if (NULL != rte->funcexpr && contain_vars_of_level_or_above(rte->funcexpr, 1)) |
| { |
| return true; |
| } |
| } |
| return false; |
| } |