blob: 850b62950abe61a52e325fdf906a3b0c7eb122f5 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.analysis;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.impala.analysis.AnalysisContext.AnalysisResult;
import org.apache.impala.analysis.UnionStmt.UnionOperand;
import org.apache.impala.common.AnalysisException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicates;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import static org.apache.impala.analysis.ToSqlOptions.DEFAULT;
import static org.apache.impala.analysis.ToSqlOptions.REWRITTEN;
/**
* Class representing a statement rewriter. The base class traverses the stmt tree and
* the specific rewrite rules are implemented in the subclasses and are called by the
* hooks in the base class.
* TODO: Now that we have a nested-loop join supporting all join modes we could
* allow more rewrites, although it is not clear we would always want to.
*/
public class StmtRewriter {
private final static Logger LOG = LoggerFactory.getLogger(StmtRewriter.class);
/**
* Rewrite the statement of an analysis result in-place. Assumes that BetweenPredicates
* have already been rewritten.
*/
public void rewrite(AnalysisResult analysisResult) throws AnalysisException {
// Analyzed stmt that contains a query statement with subqueries to be rewritten.
StatementBase stmt = analysisResult.getStmt();
Preconditions.checkState(stmt.isAnalyzed());
// Analyzed query statement to be rewritten.
QueryStmt queryStmt;
if (stmt instanceof QueryStmt) {
queryStmt = (QueryStmt) analysisResult.getStmt();
} else if (stmt instanceof InsertStmt) {
queryStmt = ((InsertStmt) analysisResult.getStmt()).getQueryStmt();
} else if (stmt instanceof CreateTableAsSelectStmt) {
queryStmt = ((CreateTableAsSelectStmt) analysisResult.getStmt()).getQueryStmt();
} else if (analysisResult.isUpdateStmt()) {
queryStmt = ((UpdateStmt) analysisResult.getStmt()).getQueryStmt();
} else if (analysisResult.isDeleteStmt()) {
queryStmt = ((DeleteStmt) analysisResult.getStmt()).getQueryStmt();
} else if (analysisResult.isTestCaseStmt()) {
queryStmt = ((CopyTestCaseStmt) analysisResult.getStmt()).getQueryStmt();
} else {
throw new AnalysisException("Unsupported statement: " + stmt.toSql());
}
rewriteQueryStatement(queryStmt, queryStmt.getAnalyzer());
}
/**
* Calls the appropriate rewrite method based on the specific type of query stmt. See
* rewriteSelectStatement() and rewriteUnionStatement() documentation.
*/
protected void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer)
throws AnalysisException {
Preconditions.checkNotNull(stmt);
Preconditions.checkState(stmt.isAnalyzed());
if (stmt instanceof SelectStmt) {
rewriteSelectStatement((SelectStmt) stmt, analyzer);
} else if (stmt instanceof UnionStmt) {
rewriteUnionStatement((UnionStmt) stmt);
} else {
throw new AnalysisException(
"Subqueries not supported for " + stmt.getClass().getSimpleName() +
" statements");
}
}
protected void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {
for (TableRef tblRef : stmt.fromClause_) {
if (!(tblRef instanceof InlineViewRef)) continue;
InlineViewRef inlineViewRef = (InlineViewRef) tblRef;
rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer());
}
// Currently only SubqueryRewriter touches the where clause. Recurse into the where
// clause when the need arises.
rewriteSelectStmtHook(stmt, analyzer);
if (LOG.isTraceEnabled()) LOG.trace("Rewritten SQL: " + stmt.toSql(REWRITTEN));
}
/**
* Rewrite all operands in a UNION. The conditions that apply to SelectStmt rewriting
* also apply here.
*/
private void rewriteUnionStatement(UnionStmt stmt) throws AnalysisException {
for (UnionOperand operand : stmt.getOperands()) {
Preconditions.checkState(operand.getQueryStmt() instanceof SelectStmt);
rewriteSelectStatement((SelectStmt) operand.getQueryStmt(), operand.getAnalyzer());
}
}
protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {}
static class SubqueryRewriter extends StmtRewriter {
/**
* Returns true if the Expr tree rooted at 'expr' has at least one subquery
* that participates in a disjunction.
*/
private static boolean hasSubqueryInDisjunction(Expr expr) {
if (!(expr instanceof CompoundPredicate)) return false;
if (Expr.IS_OR_PREDICATE.apply(expr)) {
return expr.contains(Subquery.class);
}
for (Expr child : expr.getChildren()) {
if (hasSubqueryInDisjunction(child)) return true;
}
return false;
}
/**
* Replace an ExistsPredicate that contains a subquery with a BoolLiteral if we
* can determine its result without evaluating it. Return null if the result of the
* ExistsPredicate can only be determined at run-time.
*/
private static BoolLiteral replaceExistsPredicate(ExistsPredicate predicate) {
Subquery subquery = predicate.getSubquery();
Preconditions.checkNotNull(subquery);
SelectStmt subqueryStmt = (SelectStmt) subquery.getStatement();
BoolLiteral boolLiteral = null;
if (subqueryStmt.getAnalyzer().hasEmptyResultSet()) {
boolLiteral = new BoolLiteral(predicate.isNotExists());
} else if (subqueryStmt.hasMultiAggInfo()
&& subqueryStmt.getMultiAggInfo().hasAggregateExprs()
&& !subqueryStmt.hasAnalyticInfo() && subqueryStmt.getHavingPred() == null) {
boolLiteral = new BoolLiteral(!predicate.isNotExists());
}
return boolLiteral;
}
/**
* Rewrites [NOT] IN predicate when the LHS is a constant and RHS is a subquery.
* If 'inPred' is not rewritten, null is returned. If 'inPred' is rewritten, the
* resulting expression is not analyzed (caller must analyze). 'outerBlock' is the
* parent block of 'inPred'.
*
* Example: SELECT * FROM t WHERE 1 IN (SELECT id FROM s)
*
* The rewrite transforms 'inPred' using the following cases. C refers to the LHS
* constant and RHS is the subquery. All cases apply to both correlated and
* uncorrelated subqueries.
*
* 1) Predicate is IN: No rewrite since it can be evaluated using the existing
* NestedLoop based Left Semijoin.
*
* 2) Predicate is NOT IN and RHS returns a single row.
*
* Example: 10 NOT IN (SELECT 1)
* Example: 10 NOT IN (SELECT MAX(b) FROM t)
* Example: 10 NOT IN (SELECT x FROM t LIMIT 1)
*
* REWRITE: C NOT IN RHS: => C != (RHS)
*
* 3) Predicate is NOT IN and RHS returns multiple rows.
*
* Example: SELECT * FROM t WHERE 1 NOT IN (SELECT id FROM s)
*
* Assume RHS is of the form SELECT expr FROM T WHERE ...
*
* REWRITE:
* C NOT IN (RHS)
* Rewrites to:
* NOT EXISTS (SELECT x FROM (SELECT x FROM RHS) tmp
* WHERE C IS NULL OR tmp.x IS NULL OR tmp.x = C)
*
* Example:
* ... 10 NOT IN (SELECT x FROM t WHERE t.y > 3)
* Rewrites to:
* ... NOT EXISTS (SELECT x (SELECT x FROM t WHERE t.y > 3) tmp
* WHERE 10 IS NULL OR tmp.x IS NULL OR tmp.x = 10)
*
* The rewrite wraps the RHS subquery in an inline view and filters it with a
* condition using the LHS constant. The inline view ensures that the filter is
* logically evaluated over the RHS result. Alternatively, injecting the filter
* into the RHS is generally incorrect so requires push-down analysis to preserve
* correctness (consider cases such as limit, aggregation, and analytic functions).
* Such special cases are avoided here by using the inline view.
* TODO: Correlated NOT IN subqueries require that column resolution be extended to
* handle references to an outer block that is more than one nesting level away.
*
* The filter constructed from the LHS constant is subtle, so warrants further
* explanation. Consider the cases where the LHS is NULL vs. NOT NULL and the RHS
* is empty vs. not-empty. When RHS subquery evaluates to the empty result set, the
* NOT EXISTS passes for all LHS values. When the RHS subquery is not-empty, it is
* useful to think of C NOT IN (RHS) as the boolean expansion:
* C != x_1 & C != x_2 & C != x_3 & ... where each x_i is bound to a result
* from the RHS subquery.
*
* So, if C is equal to any x_i, the expression is false. Similarly, if any
* x_i is null or if C is null, then the overall expression also is false.
*/
private static Expr rewriteInConstant(SelectStmt outerBlock, InPredicate inPred) {
Expr lhs = inPred.getChild(0);
Preconditions.checkArgument(lhs.isConstant());
Expr rhs = inPred.getChild(1);
QueryStmt subquery = inPred.getSubquery().getStatement();
Preconditions.checkState(subquery instanceof SelectStmt);
SelectStmt rhsQuery = (SelectStmt) subquery;
// CASE 1, IN:
if (!inPred.isNotIn()) return null;
// CASE 2, NOT IN and RHS returns a single row:
if (rhsQuery.returnsSingleRow()) {
return new BinaryPredicate(BinaryPredicate.Operator.NE, lhs, rhs);
}
// CASE 3, NOT IN, RHS returns multiple rows.
Preconditions.checkState(rhsQuery.getResultExprs().size() == 1);
// Do not rewrite NOT IN when the RHS is correlated.
if (isCorrelated(rhsQuery)) return null;
// Wrap RHS in an inline view: (select wrapperColumnAlias from RHS) wrapperTableAlias.
// Use outerBlock (parent block of subquery) to generate aliases. Doing so guarantees
// that the wrapper view does not produce the same alias if further rewritten.
String wrapperTableAlias = outerBlock.getTableAliasGenerator().getNextAlias();
String wrapperColumnAlias = outerBlock.getColumnAliasGenerator().getNextAlias();
InlineViewRef wrapperView = new InlineViewRef(wrapperTableAlias, rhsQuery,
Lists.newArrayList(wrapperColumnAlias));
SlotRef wrapperResult =
new SlotRef(Lists.newArrayList(wrapperTableAlias, wrapperColumnAlias));
// Build: lhs IS NULL OR rhsResultExpr IS NULL OR lhs = rhs
Expr rewritePredicate = new CompoundPredicate(CompoundPredicate.Operator.OR,
new IsNullPredicate(lhs, false),
new CompoundPredicate(CompoundPredicate.Operator.OR,
new IsNullPredicate(wrapperResult, false),
new BinaryPredicate(BinaryPredicate.Operator.EQ, wrapperResult, lhs)));
List<TableRef> fromList = new ArrayList<>();
fromList.add(wrapperView);
SelectStmt rewriteQuery = new SelectStmt(
new SelectList(Lists.newArrayList(new SelectListItem(wrapperResult, null))),
new FromClause(fromList), rewritePredicate, null, null, null, null);
Subquery newSubquery = new Subquery(rewriteQuery);
rhsQuery.reset();
// Build: NOT EXISTS(newSubquery)
return new ExistsPredicate(newSubquery, true);
}
/**
* Tests if a subquery is correlated to its outer block.
*/
private static boolean isCorrelated(SelectStmt subqueryStmt) {
if (!subqueryStmt.hasWhereClause()) return false;
return containsCorrelatedPredicate(subqueryStmt.getWhereClause(),
subqueryStmt.getTableRefIds());
}
/**
* Merge an expr containing a subquery with a SelectStmt 'stmt' by
* converting the subquery stmt of the former into an inline view and
* creating a join between the new inline view and the right-most table
* from 'stmt'. Return true if the rewrite introduced a new visible tuple
* due to a CROSS JOIN or a LEFT OUTER JOIN.
* <p>
* This process works as follows:
* 1. Create a new inline view with the subquery as the view's stmt. Changes
* made to the subquery's stmt will affect the inline view.
* 2. Extract all correlated predicates from the subquery's WHERE
* clause; the subquery's select list may be extended with new items and a
* GROUP BY clause may be added.
* 3. Add the inline view to stmt's tableRefs and create a
* join (left semi join, anti-join, left outer join for agg functions
* that return a non-NULL value for an empty input, or cross-join) with
* stmt's right-most table.
* 4. Initialize the ON clause of the new join from the original subquery
* predicate and the new inline view.
* 5. Apply expr substitutions such that the extracted correlated predicates
* refer to columns of the new inline view.
* 6. Add all extracted correlated predicates to the ON clause.
*/
private static boolean mergeExpr(SelectStmt stmt, Expr expr, Analyzer analyzer)
throws AnalysisException {
Preconditions.checkNotNull(expr);
Preconditions.checkNotNull(analyzer);
boolean updateSelectList = false;
SelectStmt subqueryStmt = (SelectStmt) expr.getSubquery().getStatement();
boolean isScalarSubquery = expr.getSubquery().isScalarSubquery();
boolean isRuntimeScalar = subqueryStmt.isRuntimeScalar();
// Create a new inline view from the subquery stmt. The inline view will be added
// to the stmt's table refs later. Explicitly set the inline view's column labels
// to eliminate any chance that column aliases from the parent query could reference
// select items from the inline view after the rewrite.
List<String> colLabels = new ArrayList<>();
for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
}
InlineViewRef inlineView =
new InlineViewRef(stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt,
colLabels);
// Extract all correlated predicates from the subquery.
List<Expr> onClauseConjuncts = extractCorrelatedPredicates(subqueryStmt);
if (!onClauseConjuncts.isEmpty()) {
validateCorrelatedSubqueryStmt(expr);
// For correlated subqueries, a LIMIT clause has no effect on the results, so we can
// safely remove it.
subqueryStmt.limitElement_ = new LimitElement(null, null);
}
// If runtime scalar, we need to prevent the propagation of predicates into the
// inline view by setting a limit on the statement.
if (isRuntimeScalar) subqueryStmt.setLimit(2);
// Update the subquery's select list and/or its GROUP BY clause by adding
// exprs from the extracted correlated predicates.
boolean updateGroupBy = isScalarSubquery
|| (expr instanceof ExistsPredicate
&& !subqueryStmt.getSelectList().isDistinct()
&& subqueryStmt.hasMultiAggInfo());
List<Expr> lhsExprs = new ArrayList<>();
List<Expr> rhsExprs = new ArrayList<>();
for (Expr conjunct : onClauseConjuncts) {
updateInlineView(inlineView, conjunct, stmt.getTableRefIds(), lhsExprs, rhsExprs,
updateGroupBy);
}
// Analyzing the inline view triggers reanalysis of the subquery's select statement.
// However the statement is already analyzed and since statement analysis is not
// idempotent, the analysis needs to be reset.
inlineView.reset();
inlineView.analyze(analyzer);
inlineView.setLeftTblRef(stmt.fromClause_.get(stmt.fromClause_.size() - 1));
stmt.fromClause_.add(inlineView);
JoinOperator joinOp = JoinOperator.LEFT_SEMI_JOIN;
// Create a join conjunct from the expr that contains a subquery.
Expr joinConjunct =
createJoinConjunct(expr, inlineView, analyzer, !onClauseConjuncts.isEmpty());
if (joinConjunct != null) {
SelectListItem firstItem =
((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
if (!onClauseConjuncts.isEmpty() && firstItem.getExpr() != null &&
firstItem.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
// Correlated subqueries with an aggregate function that returns non-null on
// an empty input are rewritten using a LEFT OUTER JOIN because we
// need to ensure that there is one agg value for every tuple of 'stmt'
// (parent select block), even for those tuples of 'stmt' that get rejected
// by the subquery due to some predicate. The new join conjunct is added to
// stmt's WHERE clause because it needs to be applied to the result of the
// LEFT OUTER JOIN (both matched and unmatched tuples).
//
// TODO Handle other aggregate functions and UDAs that return a non-NULL value
// on an empty set.
// TODO Handle count aggregate functions in an expression in subqueries
// select list.
stmt.whereClause_ =
CompoundPredicate.createConjunction(joinConjunct, stmt.whereClause_);
joinConjunct = null;
joinOp = JoinOperator.LEFT_OUTER_JOIN;
updateSelectList = true;
}
if (joinConjunct != null) onClauseConjuncts.add(joinConjunct);
}
// Ensure that all the extracted correlated predicates can be added to the ON-clause
// of the generated join.
if (!onClauseConjuncts.isEmpty()) {
validateCorrelatedPredicates(expr, inlineView, onClauseConjuncts);
}
// Create the ON clause from the extracted correlated predicates.
Expr onClausePredicate =
CompoundPredicate.createConjunctivePredicate(onClauseConjuncts);
if (onClausePredicate == null) {
Preconditions.checkState(expr instanceof ExistsPredicate);
ExistsPredicate existsPred = (ExistsPredicate) expr;
// TODO This is very expensive if uncorrelated. Remove it when we implement
// independent subquery evaluation.
if (existsPred.isNotExists()) {
inlineView.setJoinOp(JoinOperator.LEFT_ANTI_JOIN);
} else {
inlineView.setJoinOp(JoinOperator.LEFT_SEMI_JOIN);
}
// Note that the concept of a 'correlated inline view' is similar but not the same
// as a 'correlated subquery', i.e., a subquery with a correlated predicate.
if (!inlineView.isCorrelated()) {
// For uncorrelated subqueries, we limit the number of rows returned by the
// subquery.
subqueryStmt.setLimit(1);
inlineView.setOnClause(new BoolLiteral(true));
}
return false;
}
// Create an smap from the original select-list exprs of the select list to
// the corresponding inline-view columns.
ExprSubstitutionMap smap = new ExprSubstitutionMap();
Preconditions.checkState(lhsExprs.size() == rhsExprs.size());
for (int i = 0; i < lhsExprs.size(); ++i) {
Expr lhsExpr = lhsExprs.get(i);
Expr rhsExpr = rhsExprs.get(i);
rhsExpr.analyze(analyzer);
smap.put(lhsExpr, rhsExpr);
}
onClausePredicate = onClausePredicate.substitute(smap, analyzer, false);
// Check for references to ancestor query blocks (cycles in the dependency
// graph of query blocks are not supported).
if (!onClausePredicate.isBoundByTupleIds(stmt.getTableRefIds())) {
throw new AnalysisException(
"Unsupported correlated subquery: " + subqueryStmt.toSql());
}
// Check if we have a valid ON clause for an equi-join.
boolean hasEqJoinPred = false;
for (Expr conjunct : onClausePredicate.getConjuncts()) {
if (!(conjunct instanceof BinaryPredicate)) continue;
BinaryPredicate.Operator operator = ((BinaryPredicate) conjunct).getOp();
if (!operator.isEquivalence()) continue;
List<TupleId> lhsTupleIds = new ArrayList<>();
conjunct.getChild(0).getIds(lhsTupleIds, null);
// Allows for constants to be a join predicate.
if (lhsTupleIds.isEmpty() && !conjunct.getChild(0).isConstant()) continue;
List<TupleId> rhsTupleIds = new ArrayList<>();
conjunct.getChild(1).getIds(rhsTupleIds, null);
if (rhsTupleIds.isEmpty()) continue;
// Check if columns from the outer query block (stmt) appear in both sides
// of the binary predicate.
if ((lhsTupleIds.contains(inlineView.getDesc().getId()) &&
lhsTupleIds.size() > 1) ||
(rhsTupleIds.contains(inlineView.getDesc().getId()) &&
rhsTupleIds.size() > 1)) {
continue;
}
hasEqJoinPred = true;
break;
}
if (!hasEqJoinPred && !inlineView.isCorrelated()) {
// TODO: Remove this when independent subquery evaluation is implemented.
// TODO: Requires support for non-equi joins.
boolean hasGroupBy = ((SelectStmt) inlineView.getViewStmt()).hasGroupByClause();
if ((!isScalarSubquery && !isRuntimeScalar) ||
(hasGroupBy && !stmt.selectList_.isDistinct())) {
throw new AnalysisException(
"Unsupported predicate with subquery: " + expr.toSql());
}
// TODO: Requires support for null-aware anti-join mode in nested-loop joins
if (isScalarSubquery && expr instanceof InPredicate &&
((InPredicate) expr).isNotIn()) {
throw new AnalysisException(
"Unsupported NOT IN predicate with subquery: " + expr.toSql());
}
// We can rewrite the aggregate subquery using a cross join. All conjuncts
// that were extracted from the subquery are added to stmt's WHERE clause.
stmt.whereClause_ =
CompoundPredicate.createConjunction(onClausePredicate, stmt.whereClause_);
inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
// Indicate that the CROSS JOIN may add a new visible tuple to stmt's
// select list (if the latter contains an unqualified star item '*')
return true;
}
// We have a valid equi-join conjunct or the inline view is correlated.
if (expr instanceof InPredicate && ((InPredicate) expr).isNotIn() ||
expr instanceof ExistsPredicate && ((ExistsPredicate) expr).isNotExists()) {
// For the case of a NOT IN with an eq join conjunct, replace the join
// conjunct with a conjunct that uses the null-matching eq operator.
if (expr instanceof InPredicate) {
joinOp = JoinOperator.NULL_AWARE_LEFT_ANTI_JOIN;
List<TupleId> tIds = new ArrayList<>();
joinConjunct.getIds(tIds, null);
if (tIds.size() <= 1 || !tIds.contains(inlineView.getDesc().getId())) {
throw new AnalysisException(
"Unsupported NOT IN predicate with subquery: " + expr.toSql());
}
// Replace the EQ operator in the generated join conjunct with a
// null-matching EQ operator.
for (Expr conjunct : onClausePredicate.getConjuncts()) {
if (conjunct.equals(joinConjunct)) {
Preconditions.checkState(conjunct instanceof BinaryPredicate);
BinaryPredicate binaryPredicate = (BinaryPredicate) conjunct;
Preconditions.checkState(binaryPredicate.getOp().isEquivalence());
binaryPredicate.setOp(BinaryPredicate.Operator.NULL_MATCHING_EQ);
break;
}
}
} else {
joinOp = JoinOperator.LEFT_ANTI_JOIN;
}
}
inlineView.setJoinOp(joinOp);
inlineView.setOnClause(onClausePredicate);
return updateSelectList;
}
/**
* Replace all unqualified star exprs ('*') from stmt's select list with qualified
* ones, i.e. tbl_1.*,...,tbl_n.*, where tbl_1,...,tbl_n are the visible tablerefs
* in stmt. 'tableIdx' indicates the maximum tableRef ordinal to consider when
* replacing an unqualified star item.
*/
private static void replaceUnqualifiedStarItems(SelectStmt stmt, int tableIdx) {
Preconditions.checkState(tableIdx < stmt.fromClause_.size());
List<SelectListItem> newItems = new ArrayList<>();
for (int i = 0; i < stmt.selectList_.getItems().size(); ++i) {
SelectListItem item = stmt.selectList_.getItems().get(i);
if (!item.isStar() || item.getRawPath() != null) {
newItems.add(item);
continue;
}
// '*' needs to be replaced by tbl1.*,...,tbln.*, where
// tbl1,...,tbln are the visible tableRefs in stmt.
for (int j = 0; j < tableIdx; ++j) {
TableRef tableRef = stmt.fromClause_.get(j);
if (tableRef.getJoinOp() == JoinOperator.LEFT_SEMI_JOIN ||
tableRef.getJoinOp() == JoinOperator.LEFT_ANTI_JOIN) {
continue;
}
newItems.add(SelectListItem
.createStarItem(Lists.newArrayList(tableRef.getUniqueAlias())));
}
}
Preconditions.checkState(!newItems.isEmpty());
boolean isDistinct = stmt.selectList_.isDistinct();
stmt.selectList_ =
new SelectList(newItems, isDistinct, stmt.selectList_.getPlanHints());
}
/**
* Return true if the Expr tree rooted at 'expr' can be safely
* eliminated, i.e. it only consists of conjunctions of true BoolLiterals.
*/
private static boolean canEliminate(Expr expr) {
for (Expr conjunct : expr.getConjuncts()) {
if (!Expr.IS_TRUE_LITERAL.apply(conjunct)) return false;
}
return true;
}
/**
* Extract all correlated predicates of a subquery.
* <p>
* TODO Handle correlated predicates in a HAVING clause.
*/
private static List<Expr> extractCorrelatedPredicates(SelectStmt subqueryStmt)
throws AnalysisException {
List<TupleId> subqueryTupleIds = subqueryStmt.getTableRefIds();
List<Expr> correlatedPredicates = new ArrayList<>();
if (subqueryStmt.hasWhereClause()) {
if (!canExtractCorrelatedPredicates(subqueryStmt.getWhereClause(),
subqueryTupleIds)) {
throw new AnalysisException(
"Disjunctions with correlated predicates " + "are not supported: " +
subqueryStmt.getWhereClause().toSql());
}
// Extract the correlated predicates from the subquery's WHERE clause and
// replace them with true BoolLiterals.
Expr newWhereClause =
extractCorrelatedPredicates(subqueryStmt.getWhereClause(), subqueryTupleIds,
correlatedPredicates);
if (canEliminate(newWhereClause)) newWhereClause = null;
subqueryStmt.setWhereClause(newWhereClause);
}
// Process all correlated predicates from subquery's ON clauses.
for (TableRef tableRef : subqueryStmt.getTableRefs()) {
if (tableRef.getOnClause() == null) continue;
List<Expr> onClauseCorrelatedPreds = new ArrayList<>();
Expr newOnClause =
extractCorrelatedPredicates(tableRef.getOnClause(), subqueryTupleIds,
onClauseCorrelatedPreds);
if (onClauseCorrelatedPreds.isEmpty()) continue;
correlatedPredicates.addAll(onClauseCorrelatedPreds);
if (canEliminate(newOnClause)) {
// After the extraction of correlated predicates from an ON clause,
// the latter may only contain conjunctions of True BoolLiterals. In
// this case, we can eliminate the ON clause and set the join type to
// CROSS JOIN.
tableRef.setJoinOp(JoinOperator.CROSS_JOIN);
tableRef.setOnClause(null);
} else {
tableRef.setOnClause(newOnClause);
}
}
return correlatedPredicates;
}
/**
* Extract all correlated predicates from the expr tree rooted at 'root' and
* replace them with true BoolLiterals. The modified expr tree is returned
* and the extracted correlated predicates are added to 'matches'.
*/
private static Expr extractCorrelatedPredicates(Expr root, List<TupleId> tupleIds,
List<Expr> matches) {
if (isCorrelatedPredicate(root, tupleIds)) {
matches.add(root);
return new BoolLiteral(true);
}
for (int i = 0; i < root.getChildren().size(); ++i) {
root.getChildren()
.set(i, extractCorrelatedPredicates(root.getChild(i), tupleIds, matches));
}
return root;
}
/**
* Checks if an expr containing a correlated subquery is eligible for rewrite by
* transforming into a join. Throws an AnalysisException if 'expr' is not eligible for
* rewrite.
* TODO: Merge all the rewrite eligibility tests into a single function.
*/
private static void validateCorrelatedSubqueryStmt(Expr expr)
throws AnalysisException {
Preconditions.checkNotNull(expr);
Preconditions.checkState(expr.contains(Subquery.class));
SelectStmt stmt = (SelectStmt) expr.getSubquery().getStatement();
Preconditions.checkNotNull(stmt);
// Grouping and/or aggregation is not allowed on correlated scalar and IN subqueries
if ((expr instanceof BinaryPredicate
&& (stmt.hasGroupByClause() || stmt.hasAnalyticInfo()))
|| (expr instanceof InPredicate
&& (stmt.hasMultiAggInfo() || stmt.hasAnalyticInfo()))) {
throw new AnalysisException(
"Unsupported correlated subquery with grouping " + "and/or aggregation: " +
stmt.toSql());
}
// TODO: instead of this check, implement IMPALA-6315
if (!expr.getSubquery().isScalarSubquery() &&
!(expr instanceof InPredicate || expr instanceof ExistsPredicate)) {
throw new AnalysisException(
"Unsupported correlated subquery with runtime scalar check: " + stmt.toSql());
}
// The following correlated subqueries with a limit clause are supported:
// 1. EXISTS subqueries
// 2. Scalar subqueries with aggregation
if (stmt.hasLimit()
&& (!(expr instanceof BinaryPredicate) || !stmt.hasMultiAggInfo()
|| stmt.selectList_.isDistinct())
&& !(expr instanceof ExistsPredicate)) {
throw new AnalysisException(
"Unsupported correlated subquery with a " + "LIMIT clause: " + stmt.toSql());
}
}
/**
* Checks if all the 'correlatedPredicates' extracted from the subquery of 'expr' can be
* added to the ON-clause of the join that results from the subquery rewrite. It throws
* an AnalysisException if this is not the case. 'inlineView' is the generated inline
* view that will replace the subquery in the rewritten statement.
*/
private static void validateCorrelatedPredicates(Expr expr, InlineViewRef inlineView,
List<Expr> correlatedPredicates) throws AnalysisException {
Preconditions.checkNotNull(expr);
Preconditions.checkNotNull(correlatedPredicates);
Preconditions.checkState(inlineView.isAnalyzed());
SelectStmt stmt = (SelectStmt) expr.getSubquery().getStatement();
final com.google.common.base.Predicate<Expr> isSingleSlotRef =
new com.google.common.base.Predicate<Expr>() {
@Override
public boolean apply(Expr arg) { return arg.unwrapSlotRef(false) != null; }
};
// A HAVING clause is only allowed on correlated EXISTS subqueries with
// correlated binary predicates of the form Slot = Slot (see IMPALA-2734)
// TODO Handle binary predicates with IS NOT DISTINCT op
if (expr instanceof ExistsPredicate && stmt.hasHavingClause()
&& !correlatedPredicates.isEmpty()
&& (!stmt.hasMultiAggInfo()
|| !Iterables.all(correlatedPredicates,
Predicates.or(Expr.IS_EQ_BINARY_PREDICATE, isSingleSlotRef)))) {
throw new AnalysisException(
"Unsupported correlated EXISTS subquery with a " + "HAVING clause: " +
stmt.toSql());
}
// We only support equality correlated predicates in aggregate subqueries
// (see IMPALA-5531). This check needs to be performed after the inline view
// has been analyzed to make sure we don't incorrectly reject non-equality
// correlated predicates from nested collections.
if (expr instanceof BinaryPredicate && !inlineView.isCorrelated() &&
!correlatedPredicates.isEmpty()) {
final List<TupleId> subqueryTblIds = stmt.getTableRefIds();
final com.google.common.base.Predicate<Expr> isBoundBySubqueryTids =
new com.google.common.base.Predicate<Expr>() {
@Override
public boolean apply(Expr arg) {
List<TupleId> tids = new ArrayList<>();
arg.getIds(tids, null);
return !Collections.disjoint(tids, subqueryTblIds);
}
};
List<Expr> unsupportedPredicates = Lists.newArrayList(Iterables
.filter(correlatedPredicates,
Predicates.and(Expr.IS_NOT_EQ_BINARY_PREDICATE, isBoundBySubqueryTids)));
if (!unsupportedPredicates.isEmpty()) {
throw new AnalysisException("Unsupported aggregate subquery with "
+ "non-equality correlated predicates: "
+ Expr.listToSql(unsupportedPredicates, DEFAULT));
}
}
}
/**
* Update the subquery within an inline view by expanding its select list with exprs
* from a correlated predicate 'expr' that will be 'moved' to an ON clause in the
* subquery's parent query block. We need to make sure that every expr extracted from
* the subquery references an item in the subquery's select list. If 'updateGroupBy'
* is true, the exprs extracted from 'expr' are also added in stmt's GROUP BY clause.
* Throws an AnalysisException if we need to update the GROUP BY clause but
* both the lhs and rhs of 'expr' reference a tuple of the subquery stmt.
*/
private static void updateInlineView(InlineViewRef inlineView, Expr expr,
List<TupleId> parentQueryTids, List<Expr> lhsExprs, List<Expr> rhsExprs,
boolean updateGroupBy) throws AnalysisException {
SelectStmt stmt = (SelectStmt) inlineView.getViewStmt();
List<TupleId> subqueryTblIds = stmt.getTableRefIds();
List<Expr> groupByExprs = null;
if (updateGroupBy) groupByExprs = new ArrayList<>();
List<SelectListItem> items = stmt.selectList_.getItems();
// Collect all the SlotRefs from 'expr' and identify those that are bound by
// subquery tuple ids.
List<Expr> slotRefs = new ArrayList<>();
expr.collectAll(Predicates.instanceOf(SlotRef.class), slotRefs);
List<Expr> exprsBoundBySubqueryTids = new ArrayList<>();
for (Expr slotRef : slotRefs) {
if (slotRef.isBoundByTupleIds(subqueryTblIds)) {
exprsBoundBySubqueryTids.add(slotRef);
}
}
// The correlated predicate only references slots from a parent block,
// no need to update the subquery's select or group by list.
if (exprsBoundBySubqueryTids.isEmpty()) return;
if (updateGroupBy) {
Preconditions.checkState(expr instanceof BinaryPredicate);
Expr exprBoundBySubqueryTids;
if (exprsBoundBySubqueryTids.size() > 1) {
// If the predicate contains multiple SlotRefs bound by subquery tuple
// ids, they must all be on the same side of that predicate.
if (expr.getChild(0).isBoundByTupleIds(subqueryTblIds) &&
expr.getChild(1).isBoundByTupleIds(parentQueryTids)) {
exprBoundBySubqueryTids = expr.getChild(0);
} else if (expr.getChild(0).isBoundByTupleIds(parentQueryTids) &&
expr.getChild(1).isBoundByTupleIds(subqueryTblIds)) {
exprBoundBySubqueryTids = expr.getChild(1);
} else {
throw new AnalysisException("All subquery columns " +
"that participate in a predicate must be on the same side of " +
"that predicate: " + expr.toSql());
}
} else {
Preconditions.checkState(exprsBoundBySubqueryTids.size() == 1);
exprBoundBySubqueryTids = exprsBoundBySubqueryTids.get(0);
}
exprsBoundBySubqueryTids.clear();
exprsBoundBySubqueryTids.add(exprBoundBySubqueryTids);
}
// Add the exprs bound by subquery tuple ids to the select list and
// register it for substitution. We use a temporary substitution map
// because we cannot at this point analyze the new select list expr. Once
// the new inline view is analyzed, the entries from this map will be
// added to an ExprSubstitutionMap.
for (Expr boundExpr : exprsBoundBySubqueryTids) {
String colAlias = stmt.getColumnAliasGenerator().getNextAlias();
items.add(new SelectListItem(boundExpr, null));
inlineView.getExplicitColLabels().add(colAlias);
lhsExprs.add(boundExpr);
rhsExprs
.add(new SlotRef(Lists.newArrayList(inlineView.getUniqueAlias(), colAlias)));
if (groupByExprs != null) groupByExprs.add(boundExpr);
}
// Update the subquery's select list.
boolean isDistinct = stmt.selectList_.isDistinct();
stmt.selectList_ =
new SelectList(items, isDistinct, stmt.selectList_.getPlanHints());
// Update subquery's GROUP BY clause
if (groupByExprs != null && !groupByExprs.isEmpty()) {
if (stmt.hasGroupByClause()) {
stmt.groupingExprs_.addAll(groupByExprs);
} else {
stmt.groupingExprs_ = groupByExprs;
}
}
}
/**
* Returns true if we can extract the correlated predicates from 'expr'. A
* correlated predicate cannot be extracted if it is part of a disjunction.
*/
private static boolean canExtractCorrelatedPredicates(Expr expr,
List<TupleId> subqueryTupleIds) {
if (!(expr instanceof CompoundPredicate)) return true;
if (Expr.IS_OR_PREDICATE.apply(expr)) {
return !containsCorrelatedPredicate(expr, subqueryTupleIds);
}
for (Expr child : expr.getChildren()) {
if (!canExtractCorrelatedPredicates(child, subqueryTupleIds)) {
return false;
}
}
return true;
}
/**
* Return true if the expr tree rooted at 'root' contains a correlated
* predicate.
*/
private static boolean containsCorrelatedPredicate(Expr root,
List<TupleId> tupleIds) {
if (isCorrelatedPredicate(root, tupleIds)) return true;
for (Expr child : root.getChildren()) {
if (containsCorrelatedPredicate(child, tupleIds)) return true;
}
return false;
}
/**
* Returns true if 'expr' is a correlated predicate. A predicate is
* correlated if at least one of its SlotRefs belongs to an ancestor
* query block (i.e. is not bound by the given 'tupleIds').
*/
private static boolean isCorrelatedPredicate(Expr expr, List<TupleId> tupleIds) {
return (expr instanceof BinaryPredicate || expr instanceof SlotRef) &&
!expr.isBoundByTupleIds(tupleIds);
}
/**
* Converts an expr containing a subquery into an analyzed conjunct to be
* used in a join. The conversion is performed in place by replacing the
* subquery with the first expr from the select list of 'inlineView'.
* If 'isCorrelated' is true and the first expr from the inline view contains
* an aggregate function that returns non-null on an empty input,
* the aggregate function is wrapped into a 'zeroifnull' function.
*/
private static Expr createJoinConjunct(Expr exprWithSubquery,
InlineViewRef inlineView, Analyzer analyzer, boolean isCorrelated)
throws AnalysisException {
Preconditions.checkNotNull(exprWithSubquery);
Preconditions.checkNotNull(inlineView);
Preconditions.checkState(exprWithSubquery.contains(Subquery.class));
if (exprWithSubquery instanceof ExistsPredicate) return null;
// Create a SlotRef from the first item of inlineView's select list
SlotRef slotRef = new SlotRef(Lists
.newArrayList(inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
slotRef.analyze(analyzer);
Expr subquerySubstitute = slotRef;
if (exprWithSubquery instanceof InPredicate) {
BinaryPredicate pred =
new BinaryPredicate(BinaryPredicate.Operator.EQ, exprWithSubquery.getChild(0),
slotRef);
pred.analyze(analyzer);
return pred;
}
Subquery subquery = exprWithSubquery.getSubquery();
Preconditions.checkState(subquery.getType().isScalarType());
ExprSubstitutionMap smap = new ExprSubstitutionMap();
SelectListItem item =
((SelectStmt) inlineView.getViewStmt()).getSelectList().getItems().get(0);
if (isCorrelated && item.getExpr().contains(Expr.IS_UDA_FN)) {
throw new AnalysisException(
"UDAs are not supported in the select list of " + "correlated subqueries: " +
subquery.toSql());
}
if (isCorrelated && item.getExpr().contains(Expr.NON_NULL_EMPTY_AGG)) {
// TODO: Add support for multiple agg functions that return non-null on an
// empty input, by wrapping them with zeroifnull functions before the inline
// view is analyzed.
if (!Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr()) &&
(!(item.getExpr() instanceof CastExpr) ||
!Expr.NON_NULL_EMPTY_AGG.apply(item.getExpr().getChild(0)))) {
throw new AnalysisException("Aggregate function that returns non-null on " +
"an empty input cannot be used in an expression in a " +
"correlated subquery's select list: " + subquery.toSql());
}
List<Expr> aggFns = new ArrayList<>();
item.getExpr().collectAll(Expr.NON_NULL_EMPTY_AGG, aggFns);
// TODO Generalize this by making the aggregate functions aware of the
// literal expr that they return on empty input, e.g. max returns a
// NullLiteral whereas count returns a NumericLiteral.
if (((FunctionCallExpr) aggFns.get(0)).getReturnType().isNumericType()) {
FunctionCallExpr zeroIfNull =
new FunctionCallExpr("zeroifnull", Lists.newArrayList((Expr) slotRef));
zeroIfNull.analyze(analyzer);
subquerySubstitute = zeroIfNull;
} else if (((FunctionCallExpr) aggFns.get(0)).getReturnType().isStringType()) {
List<Expr> params = new ArrayList<>();
params.add(slotRef);
params.add(new StringLiteral(""));
FunctionCallExpr ifnull = new FunctionCallExpr("ifnull", params);
ifnull.analyze(analyzer);
subquerySubstitute = ifnull;
} else {
throw new AnalysisException("Unsupported aggregate function used in " +
"a correlated subquery's select list: " + subquery.toSql());
}
}
smap.put(subquery, subquerySubstitute);
return exprWithSubquery.substitute(smap, analyzer, false);
}
/**
* Rewrite all the subqueries of a SelectStmt in place. Subqueries are currently
* supported in FROM and WHERE clauses. The rewrite is performed in place and not in a
* clone of SelectStmt because it requires the stmt to be analyzed.
*/
@Override
protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {
// Rewrite all the subqueries in the WHERE clause.
if (stmt.hasWhereClause()) {
// Push negation to leaf operands.
stmt.whereClause_ = Expr.pushNegationToOperands(stmt.whereClause_);
// Check if we can rewrite the subqueries in the WHERE clause. OR predicates with
// subqueries are not supported.
if (hasSubqueryInDisjunction(stmt.whereClause_)) {
throw new AnalysisException("Subqueries in OR predicates are not supported: " +
stmt.whereClause_.toSql());
}
rewriteWhereClauseSubqueries(stmt, analyzer);
}
}
/**
* Rewrite all subqueries of a stmt's WHERE clause. Initially, all the
* conjuncts containing subqueries are extracted from the WHERE clause and are
* replaced with true BoolLiterals. Subsequently, each extracted conjunct is
* merged into its parent select block by converting it into a join.
* Conjuncts with subqueries that themselves contain conjuncts with subqueries are
* recursively rewritten in a bottom up fashion.
*
* The following example illustrates the bottom up rewriting of nested queries.
* Suppose we have the following three level nested query Q0:
*
* SELECT *
* FROM T1 : Q0
* WHERE T1.a IN (SELECT a
* FROM T2 WHERE T2.b IN (SELECT b
* FROM T3))
* AND T1.c < 10;
*
* This query will be rewritten as follows. Initially, the IN predicate
* T1.a IN (SELECT a FROM T2 WHERE T2.b IN (SELECT b FROM T3)) is extracted
* from the top level block (Q0) since it contains a subquery and is
* replaced by a true BoolLiteral, resulting in the following query Q1:
*
* SELECT * FROM T1 WHERE TRUE : Q1
*
* Since the stmt in the extracted predicate contains a conjunct with a subquery,
* it is also rewritten. As before, rewriting stmt SELECT a FROM T2
* WHERE T2.b IN (SELECT b FROM T3) works by first extracting the conjunct that
* contains the subquery (T2.b IN (SELECT b FROM T3)) and substituting it with
* a true BoolLiteral, producing the following stmt Q2:
*
* SELECT a FROM T2 WHERE TRUE : Q2
*
* The predicate T2.b IN (SELECT b FROM T3) is then merged with Q2,
* producing the following unnested query Q3:
*
* SELECT a FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1 ON T2.b = $a$1.b : Q3
*
* The extracted IN predicate becomes:
*
* T1.a IN (SELECT a FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1 ON T2.b = $a$1.b)
*
* Finally, the rewritten IN predicate is merged with query block Q1,
* producing the following unnested query (WHERE clauses that contain only
* conjunctions of true BoolLiterals are eliminated):
*
* SELECT *
* FROM T1 LEFT SEMI JOIN (SELECT a
* FROM T2 LEFT SEMI JOIN (SELECT b FROM T3) $a$1
* ON T2.b = $a$1.b) $a$1
* ON $a$1.a = T1.a
* WHERE T1.c < 10;
*
*/
private void rewriteWhereClauseSubqueries(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {
int numTableRefs = stmt.fromClause_.size();
List<Expr> exprsWithSubqueries = new ArrayList<>();
ExprSubstitutionMap smap = new ExprSubstitutionMap();
// Check if all the conjuncts in the WHERE clause that contain subqueries
// can currently be rewritten as a join.
for (Expr conjunct : stmt.whereClause_.getConjuncts()) {
List<Subquery> subqueries = new ArrayList<>();
conjunct.collectAll(Predicates.instanceOf(Subquery.class), subqueries);
if (subqueries.size() == 0) continue;
if (subqueries.size() > 1) {
throw new AnalysisException(
"Multiple subqueries are not supported in " + "expression: " +
conjunct.toSql());
}
if (!(conjunct instanceof InPredicate) &&
!(conjunct instanceof ExistsPredicate) &&
!(conjunct instanceof BinaryPredicate) &&
!conjunct.getSubquery().getType().isScalarType()) {
throw new AnalysisException(
"Non-scalar subquery is not supported in " + "expression: " +
conjunct.toSql());
}
Expr rewrittenConjunct = conjunct;
if (conjunct instanceof InPredicate && conjunct.getChild(0).isConstant()) {
Expr newConjunct = rewriteInConstant(stmt, (InPredicate) conjunct);
if (newConjunct != null) {
newConjunct.analyze(analyzer);
rewrittenConjunct = newConjunct;
}
}
if (rewrittenConjunct instanceof ExistsPredicate) {
// Check if we can determine the result of an ExistsPredicate during analysis.
// If so, replace the predicate with a BoolLiteral predicate and remove it from
// the list of predicates to be rewritten.
BoolLiteral boolLiteral =
replaceExistsPredicate((ExistsPredicate) rewrittenConjunct);
if (boolLiteral != null) {
boolLiteral.analyze(analyzer);
smap.put(conjunct, boolLiteral);
continue;
}
}
// Replace all the supported exprs with subqueries with true BoolLiterals
// using an smap.
BoolLiteral boolLiteral = new BoolLiteral(true);
boolLiteral.analyze(analyzer);
smap.put(conjunct, boolLiteral);
exprsWithSubqueries.add(rewrittenConjunct);
}
stmt.whereClause_ = stmt.whereClause_.substitute(smap, analyzer, false);
boolean hasNewVisibleTuple = false;
// Recursively rewrite all the exprs that contain subqueries and merge them
// with 'stmt'.
for (Expr expr : exprsWithSubqueries) {
if (mergeExpr(stmt, rewriteExpr(expr, analyzer), analyzer)) {
hasNewVisibleTuple = true;
}
}
if (canEliminate(stmt.whereClause_)) stmt.whereClause_ = null;
if (hasNewVisibleTuple) replaceUnqualifiedStarItems(stmt, numTableRefs);
}
/**
* Modifies in place an expr that contains a subquery by rewriting its
* subquery stmt. The modified analyzed expr is returned.
*/
private Expr rewriteExpr(Expr expr, Analyzer analyzer) throws AnalysisException {
// Extract the subquery and rewrite it.
Subquery subquery = expr.getSubquery();
Preconditions.checkNotNull(subquery);
rewriteSelectStatement((SelectStmt) subquery.getStatement(),
subquery.getAnalyzer());
// Create a new Subquery with the rewritten stmt and use a substitution map
// to replace the original subquery from the expr.
QueryStmt rewrittenStmt = subquery.getStatement().clone();
rewrittenStmt.reset();
Subquery newSubquery = new Subquery(rewrittenStmt);
newSubquery.analyze(analyzer);
ExprSubstitutionMap smap = new ExprSubstitutionMap();
smap.put(subquery, newSubquery);
return expr.substitute(smap, analyzer, false);
}
}
}