|  | /** | 
|  | * Licensed to the Apache Software Foundation (ASF) under one | 
|  | * or more contributor license agreements.  See the NOTICE file | 
|  | * distributed with this work for additional information | 
|  | * regarding copyright ownership.  The ASF licenses this file | 
|  | * to you under the Apache License, Version 2.0 (the | 
|  | * "License"); you may not use this file except in compliance | 
|  | * with the License.  You may obtain a copy of the License at | 
|  | * | 
|  | *   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, | 
|  | * software distributed under the License is distributed on an | 
|  | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | * KIND, either express or implied.  See the License for the | 
|  | * specific language governing permissions and limitations | 
|  | * under the License. | 
|  | **/ | 
|  |  | 
|  | #include "query_optimizer/rules/UnnestSubqueries.hpp" | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <functional> | 
|  | #include <memory> | 
|  | #include <set> | 
|  | #include <utility> | 
|  | #include <vector> | 
|  |  | 
|  | #include "query_optimizer/OptimizerContext.hpp" | 
|  | #include "query_optimizer/expressions/AttributeReference.hpp" | 
|  | #include "query_optimizer/expressions/ComparisonExpression.hpp" | 
|  | #include "query_optimizer/expressions/Exists.hpp" | 
|  | #include "query_optimizer/expressions/ExprId.hpp" | 
|  | #include "query_optimizer/expressions/ExpressionType.hpp" | 
|  | #include "query_optimizer/expressions/ExpressionUtil.hpp" | 
|  | #include "query_optimizer/expressions/InTableQuery.hpp" | 
|  | #include "query_optimizer/expressions/LogicalAnd.hpp" | 
|  | #include "query_optimizer/expressions/LogicalNot.hpp" | 
|  | #include "query_optimizer/expressions/LogicalOr.hpp" | 
|  | #include "query_optimizer/expressions/NamedExpression.hpp" | 
|  | #include "query_optimizer/expressions/PatternMatcher.hpp" | 
|  | #include "query_optimizer/expressions/SubqueryExpression.hpp" | 
|  | #include "query_optimizer/logical/Aggregate.hpp" | 
|  | #include "query_optimizer/logical/Filter.hpp" | 
|  | #include "query_optimizer/logical/HashJoin.hpp" | 
|  | #include "query_optimizer/logical/Logical.hpp" | 
|  | #include "query_optimizer/logical/LogicalType.hpp" | 
|  | #include "query_optimizer/logical/MultiwayCartesianJoin.hpp" | 
|  | #include "query_optimizer/logical/PatternMatcher.hpp" | 
|  | #include "query_optimizer/logical/Project.hpp" | 
|  | #include "query_optimizer/logical/TopLevelPlan.hpp" | 
|  | #include "query_optimizer/rules/Rule.hpp" | 
|  | #include "types/operations/comparisons/Comparison.hpp" | 
|  | #include "types/operations/comparisons/ComparisonFactory.hpp" | 
|  | #include "types/operations/comparisons/ComparisonID.hpp" | 
|  | #include "utility/SqlError.hpp" | 
|  |  | 
|  | #include "glog/logging.h" | 
|  |  | 
|  | namespace quickstep { | 
|  | namespace optimizer { | 
|  |  | 
|  | namespace E = ::quickstep::optimizer::expressions; | 
|  | namespace L = ::quickstep::optimizer::logical; | 
|  |  | 
|  | struct CorrelatedQueryInfo { | 
|  | enum class JoinType { | 
|  | kInnerJoin = 0, | 
|  | kLeftSemiJoin, | 
|  | kLeftAntiJoin, | 
|  | kCartesianJoin | 
|  | }; | 
|  |  | 
|  | CorrelatedQueryInfo(const JoinType join_type_in, | 
|  | const L::LogicalPtr &correlated_query_in, | 
|  | std::vector<E::AttributeReferencePtr> &&probe_join_attributes_in, | 
|  | std::vector<E::AttributeReferencePtr> &&build_join_attributes_in, | 
|  | std::vector<E::PredicatePtr> &&non_hash_join_predicates_in) | 
|  | : join_type(join_type_in), | 
|  | correlated_query(correlated_query_in), | 
|  | probe_join_attributes(std::move(probe_join_attributes_in)), | 
|  | build_join_attributes(std::move(build_join_attributes_in)), | 
|  | non_hash_join_predicates(std::move(non_hash_join_predicates_in)) {} | 
|  |  | 
|  | JoinType join_type; | 
|  | L::LogicalPtr correlated_query; | 
|  | std::vector<E::AttributeReferencePtr> probe_join_attributes; | 
|  | std::vector<E::AttributeReferencePtr> build_join_attributes; | 
|  | std::vector<E::PredicatePtr> non_hash_join_predicates; | 
|  | }; | 
|  |  | 
|  | L::LogicalPtr UnnestSubqueries::apply(const L::LogicalPtr &input) { | 
|  | DCHECK(L::SomeTopLevelPlan::Matches(input)) | 
|  | << "The input logical node must be of TopLevelPlan"; | 
|  |  | 
|  | const L::TopLevelPlan &top_level_plan = static_cast<const L::TopLevelPlan&>(*input); | 
|  |  | 
|  | bool has_changes = false; | 
|  |  | 
|  | std::vector<E::AttributeReferencePtr> empty_probe_join_predicates; | 
|  | std::vector<E::AttributeReferencePtr> empty_build_join_predicates; | 
|  | std::vector<E::PredicatePtr> empty_correlated_non_hash_join_predicates; | 
|  | std::unordered_map<E::ExprId, L::LogicalPtr> uncorrelated_subqueries; | 
|  | expressions::UnorderedNamedExpressionSet empty_attributes; | 
|  | UnnestSubqueriesForNonRootLogical unnest_rule(false /* scalar query */, | 
|  | empty_attributes, | 
|  | context_, | 
|  | &uncorrelated_subqueries, | 
|  | &empty_probe_join_predicates, | 
|  | &empty_build_join_predicates, | 
|  | &empty_correlated_non_hash_join_predicates); | 
|  | DCHECK(empty_probe_join_predicates.empty()); | 
|  | DCHECK(empty_build_join_predicates.empty()); | 
|  | DCHECK(empty_correlated_non_hash_join_predicates.empty()); | 
|  |  | 
|  | // Unnest subqueries in each subplan and the main plan. | 
|  | std::vector<L::LogicalPtr> new_shared_subplans; | 
|  | for (const L::LogicalPtr &shared_subplan : top_level_plan.shared_subplans()) { | 
|  | const L::LogicalPtr new_shared_subplan = unnest_rule.apply(shared_subplan); | 
|  | if (new_shared_subplan != shared_subplan && !has_changes) { | 
|  | has_changes = true; | 
|  | } | 
|  | new_shared_subplans.emplace_back(new_shared_subplan); | 
|  | } | 
|  |  | 
|  | const L::LogicalPtr new_main_plan = unnest_rule.apply(top_level_plan.plan()); | 
|  | if (new_main_plan != top_level_plan.plan() && !has_changes) { | 
|  | has_changes = true; | 
|  | } | 
|  |  | 
|  | std::unordered_map<E::ExprId, int> uncorrelated_map; | 
|  | std::vector<L::LogicalPtr>::size_type subplan_id = new_shared_subplans.size(); | 
|  | for (const std::pair<const E::ExprId, L::LogicalPtr> &uncorrelated_query_info : uncorrelated_subqueries) { | 
|  | uncorrelated_map.emplace(uncorrelated_query_info.first, | 
|  | subplan_id); | 
|  | new_shared_subplans.emplace_back(uncorrelated_query_info.second); | 
|  | ++subplan_id; | 
|  | } | 
|  |  | 
|  | if (!uncorrelated_subqueries.empty() || has_changes) { | 
|  | const L::LogicalPtr new_top_level_plan = | 
|  | L::TopLevelPlan::Create(new_main_plan, | 
|  | new_shared_subplans, | 
|  | uncorrelated_map); | 
|  | LOG_APPLYING_RULE(input, new_top_level_plan); | 
|  | return new_top_level_plan; | 
|  | } | 
|  |  | 
|  | LOG_IGNORING_RULE(input); | 
|  | return input; | 
|  | } | 
|  |  | 
|  | L::LogicalPtr UnnestSubqueriesForNonRootLogical::applyInternal( | 
|  | const L::LogicalPtr &input, | 
|  | E::UnorderedAttributeSet *inner_attributes, | 
|  | std::vector<E::AttributeReferencePtr> *probe_join_attributes, | 
|  | std::vector<E::AttributeReferencePtr> *build_join_attributes, | 
|  | std::vector<E::PredicatePtr> *non_hash_join_predicates) { | 
|  | DCHECK(inner_attributes->empty()); | 
|  | DCHECK(probe_join_attributes->empty()); | 
|  | DCHECK(build_join_attributes->empty()); | 
|  | DCHECK(non_hash_join_predicates->empty()); | 
|  |  | 
|  | // First, handle subquery expressions in the children. | 
|  | bool has_change = false; | 
|  | std::vector<L::LogicalPtr> new_children; | 
|  | for (const L::LogicalPtr &child : input->children()) { | 
|  | E::UnorderedAttributeSet inner_attributes_in_child; | 
|  | std::vector<E::AttributeReferencePtr> probe_join_predicates_in_child; | 
|  | std::vector<E::AttributeReferencePtr> build_join_predicates_in_child; | 
|  | std::vector<E::PredicatePtr> non_hash_join_predicates_in_child; | 
|  | const L::LogicalPtr new_child = | 
|  | applyInternal(child, | 
|  | &inner_attributes_in_child, | 
|  | &probe_join_predicates_in_child, | 
|  | &build_join_predicates_in_child, | 
|  | &non_hash_join_predicates_in_child); | 
|  | inner_attributes->insert(inner_attributes_in_child.begin(), | 
|  | inner_attributes_in_child.end()); | 
|  | probe_join_attributes->insert(probe_join_attributes->end(), | 
|  | probe_join_predicates_in_child.begin(), | 
|  | probe_join_predicates_in_child.end()); | 
|  | build_join_attributes->insert(build_join_attributes->end(), | 
|  | build_join_predicates_in_child.begin(), | 
|  | build_join_predicates_in_child.end()); | 
|  | non_hash_join_predicates->insert(non_hash_join_predicates->end(), | 
|  | non_hash_join_predicates_in_child.begin(), | 
|  | non_hash_join_predicates_in_child.end()); | 
|  | if (new_child != child && !has_change) { | 
|  | has_change = true; | 
|  | } | 
|  | new_children.emplace_back(new_child); | 
|  | } | 
|  |  | 
|  | L::LogicalPtr input_with_new_children = input; | 
|  | if (has_change) { | 
|  | input_with_new_children = input->copyWithNewChildren(new_children); | 
|  | } | 
|  |  | 
|  | // Next, process subquery expressions in the current node. | 
|  | const L::LogicalPtr output = applyToNode(input_with_new_children, | 
|  | inner_attributes, | 
|  | probe_join_attributes, | 
|  | build_join_attributes, | 
|  | non_hash_join_predicates); | 
|  | return output; | 
|  | } | 
|  |  | 
|  | L::LogicalPtr UnnestSubqueriesForNonRootLogical::applyToNode( | 
|  | const L::LogicalPtr &input, | 
|  | E::UnorderedAttributeSet *inner_attributes, | 
|  | std::vector<E::AttributeReferencePtr> *probe_join_attributes, | 
|  | std::vector<E::AttributeReferencePtr> *build_join_attributes, | 
|  | std::vector<E::PredicatePtr> *non_hash_join_predicates) { | 
|  | // First eliminate subqueries expressions in this node (that is, this node is the outer query). | 
|  | L::LogicalPtr input_with_no_subqueries = eliminateNestedScalarQueries(input); | 
|  | DCHECK(input_with_no_subqueries != nullptr); | 
|  |  | 
|  | // Next determine whether this node has any outer reference (that is, this node is the inner query). | 
|  | const std::vector<E::ExpressionPtr> &input_expressions = input_with_no_subqueries->input_expressions(); | 
|  | std::vector<E::ExpressionPtr> new_input_expressions; | 
|  | bool has_change = false; | 
|  | for (const E::ExpressionPtr &input_expression : input_expressions) { | 
|  | const E::ExpressionPtr new_input_expression = | 
|  | eliminateOuterAttributeReference(input_expression, | 
|  | inner_attributes, | 
|  | probe_join_attributes, | 
|  | build_join_attributes, | 
|  | non_hash_join_predicates); | 
|  | if (new_input_expression != nullptr) { | 
|  | new_input_expressions.emplace_back(new_input_expression); | 
|  | } | 
|  | if (new_input_expression != input_expression && !has_change) { | 
|  | has_change = true; | 
|  | } | 
|  | } | 
|  |  | 
|  | switch (input_with_no_subqueries->getLogicalType()) { | 
|  | case L::LogicalType::kAggregate: { | 
|  | if (!non_hash_join_predicates->empty()) { | 
|  | THROW_SQL_ERROR() | 
|  | << "Non-equality join predicate with an outer query must be above any aggregate"; | 
|  | } | 
|  |  | 
|  | const L::Aggregate &aggregate = static_cast<const L::Aggregate&>(*input_with_no_subqueries); | 
|  | DCHECK(!has_change); | 
|  | if (!inner_attributes->empty()) { | 
|  | std::vector<E::NamedExpressionPtr> group_expressions = aggregate.grouping_expressions(); | 
|  | group_expressions.insert(group_expressions.end(), | 
|  | inner_attributes->begin(), | 
|  | inner_attributes->end()); | 
|  | return L::Aggregate::Create(aggregate.input(), | 
|  | group_expressions, | 
|  | aggregate.aggregate_expressions()); | 
|  | } | 
|  | return input_with_no_subqueries; | 
|  | } | 
|  | case L::LogicalType::kFilter: { | 
|  | if (new_input_expressions.empty()) { | 
|  | const L::Filter &filter = static_cast<const L::Filter&>(*input_with_no_subqueries); | 
|  | return filter.input(); | 
|  | } | 
|  | if (has_change) { | 
|  | return input_with_no_subqueries->copyWithNewInputExpressions(new_input_expressions); | 
|  | } | 
|  | return input_with_no_subqueries; | 
|  | } | 
|  | case L::LogicalType::kProject: { | 
|  | if (has_change) { | 
|  | input_with_no_subqueries = | 
|  | input_with_no_subqueries->copyWithNewInputExpressions(new_input_expressions); | 
|  | } | 
|  | if (!inner_attributes->empty()) { | 
|  | const L::Project &project = static_cast<const L::Project&>(*input_with_no_subqueries); | 
|  | // Insert those inner attributes into the front of the project expression | 
|  | // list rather than the back to help reduce unnecessary selection for | 
|  | // reordering output attributes, since those attributes are usually also | 
|  | // grouping attributes that proceed others in projection. | 
|  | std::vector<E::NamedExpressionPtr> new_project_expressions; | 
|  | const E::UnorderedNamedExpressionSet project_expression_set(project.project_expressions().begin(), | 
|  | project.project_expressions().end()); | 
|  | for (const E::AttributeReferencePtr &inner_attribute : *inner_attributes) { | 
|  | if (project_expression_set.find(inner_attribute) == project_expression_set.end()) { | 
|  | new_project_expressions.emplace_back(inner_attribute); | 
|  | } | 
|  | } | 
|  | new_project_expressions.insert(new_project_expressions.end(), | 
|  | project_expression_set.begin(), | 
|  | project_expression_set.end()); | 
|  | if (new_project_expressions.size() != project.project_expressions().size()) { | 
|  | input_with_no_subqueries = L::Project::Create(project.input(), | 
|  | new_project_expressions); | 
|  | } | 
|  | } | 
|  | return input_with_no_subqueries; | 
|  | } | 
|  | default: | 
|  | if (has_change) { | 
|  | return input_with_no_subqueries->copyWithNewInputExpressions(new_input_expressions); | 
|  | } | 
|  | return input_with_no_subqueries; | 
|  | } | 
|  | } | 
|  |  | 
|  | E::ExpressionPtr UnnestSubqueriesForNonRootLogical::eliminateOuterAttributeReference( | 
|  | const E::ExpressionPtr &expression, | 
|  | E::UnorderedAttributeSet *inner_attributes, | 
|  | std::vector<E::AttributeReferencePtr> *probe_join_attributes, | 
|  | std::vector<E::AttributeReferencePtr> *build_join_attributes, | 
|  | std::vector<E::PredicatePtr> *non_hash_join_predicates) { | 
|  | DCHECK(expression->getExpressionType() != E::ExpressionType::kInTableQuery); | 
|  | DCHECK(expression->getExpressionType() != E::ExpressionType::kExists); | 
|  | DCHECK(expression->getExpressionType() != E::ExpressionType::kSubqueryExpression); | 
|  |  | 
|  | switch (expression->getExpressionType()) { | 
|  | case E::ExpressionType::kPredicateLiteral:  // Fall through | 
|  | case E::ExpressionType::kScalarLiteral: | 
|  | return expression; | 
|  | case E::ExpressionType::kLogicalAnd: { | 
|  | const E::LogicalAnd &logical_and = static_cast<const E::LogicalAnd&>(*expression); | 
|  | bool has_change = false; | 
|  | std::vector<E::PredicatePtr> new_children; | 
|  | for (const E::PredicatePtr &child : logical_and.operands()) { | 
|  | const E::ExpressionPtr new_child = | 
|  | eliminateOuterAttributeReference(child, | 
|  | inner_attributes, | 
|  | probe_join_attributes, | 
|  | build_join_attributes, | 
|  | non_hash_join_predicates); | 
|  | if (new_child != nullptr) { | 
|  | new_children.emplace_back( | 
|  | std::static_pointer_cast<const E::Predicate>(new_child)); | 
|  | } | 
|  | if (new_child != child && !has_change) { | 
|  | has_change = true; | 
|  | } | 
|  | } | 
|  | if (has_change) { | 
|  | if (new_children.empty()) { | 
|  | return E::ExpressionPtr(); | 
|  | } | 
|  | if (new_children.size() == 1u) { | 
|  | return new_children[0]; | 
|  | } | 
|  | return E::LogicalAnd::Create(new_children); | 
|  | } | 
|  | return expression; | 
|  | } | 
|  | case E::ExpressionType::kComparisonExpression: { | 
|  | const E::ComparisonExpressionPtr comparison_expression = | 
|  | std::static_pointer_cast<const E::ComparisonExpression>(expression); | 
|  | E::AttributeReferencePtr outer_attribute; | 
|  | E::AttributeReferencePtr inner_attribute; | 
|  | // Hash-join predicate. | 
|  | if (E::SomeAttributeReference::MatchesWithConditionalCast( | 
|  | comparison_expression->left(), &outer_attribute) && | 
|  | E::SomeAttributeReference::MatchesWithConditionalCast( | 
|  | comparison_expression->right(), &inner_attribute) && | 
|  | comparison_expression->comparison().getComparisonID() == ComparisonID::kEqual) { | 
|  | if (!isCorrelatedOuterAttribute(outer_attribute)) { | 
|  | if (!isCorrelatedOuterAttribute(inner_attribute)) { | 
|  | return expression; | 
|  | } | 
|  | std::swap(outer_attribute, inner_attribute); | 
|  | } else if (isCorrelatedOuterAttribute(inner_attribute)) { | 
|  | THROW_SQL_ERROR() << "Comparison of two outer attribute references is not allowed"; | 
|  | } | 
|  | if (visiable_attrs_from_outer_query_.find(outer_attribute) == visiable_attrs_from_outer_query_.end()) { | 
|  | THROW_SQL_ERROR() | 
|  | << "Nested queries can only reference attributes in the outer query one level above"; | 
|  | } | 
|  | outer_attribute = | 
|  | E::AttributeReference::Create(outer_attribute->id(), | 
|  | outer_attribute->attribute_name(), | 
|  | outer_attribute->attribute_alias(), | 
|  | outer_attribute->relation_name(), | 
|  | outer_attribute->getValueType(), | 
|  | E::AttributeReferenceScope::kLocal); | 
|  |  | 
|  | inner_attributes->emplace(inner_attribute); | 
|  | probe_join_attributes->emplace_back(outer_attribute); | 
|  | build_join_attributes->emplace_back(inner_attribute); | 
|  | return E::ExpressionPtr(); | 
|  | } else { | 
|  | DeOuterAttributeReference de_outer_reference_rule(!scalar_query_, | 
|  | *uncorrelated_subqueries_, | 
|  | visiable_attrs_from_outer_query_); | 
|  | const E::ExpressionPtr new_expression = | 
|  | de_outer_reference_rule.apply(expression); | 
|  | if (new_expression != expression) { | 
|  | // All the inner attributes should be insert into the inner_attributes, | 
|  | // so that they will be visible from the outer query. | 
|  | // Note that we use expression instead of new_expression, since | 
|  | // the outer attribute reference in new_expression is de-outer referenced. | 
|  | const std::vector<E::AttributeReferencePtr> &inner_attrs_vec = | 
|  | E::GetAttributeReferencesWithinScope(expression->getReferencedAttributes(), | 
|  | E::AttributeReferenceScope::kLocal); | 
|  | for (const E::AttributeReferencePtr &attr : inner_attrs_vec) { | 
|  | inner_attributes->emplace(attr); | 
|  | } | 
|  |  | 
|  | non_hash_join_predicates->emplace_back( | 
|  | std::static_pointer_cast<const E::Predicate>(new_expression)); | 
|  | return E::ExpressionPtr(); | 
|  | } | 
|  | } | 
|  | return expression; | 
|  | } | 
|  | default: { | 
|  | validateNonOuterAttributeReference(expression); | 
|  | return expression; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | void UnnestSubqueriesForNonRootLogical::validateNonOuterAttributeReference( | 
|  | const E::ExpressionPtr &expression) { | 
|  | const std::vector<E::AttributeReferencePtr> referenced_attributes = | 
|  | expression->getReferencedAttributes(); | 
|  | for (const E::AttributeReferencePtr &referenced_attribute : referenced_attributes) { | 
|  | if (isCorrelatedOuterAttribute(referenced_attribute)) { | 
|  | THROW_SQL_ERROR() | 
|  | << "Outer attribute reference can only appear in a single-attribute equality predicate"; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | bool UnnestSubqueriesForNonRootLogical::isCorrelatedOuterAttribute( | 
|  | const E::AttributeReferencePtr &attribute) const { | 
|  | return (attribute->scope() == E::AttributeReferenceScope::kOuter && | 
|  | uncorrelated_subqueries_->find(attribute->id()) == uncorrelated_subqueries_->end()); | 
|  | } | 
|  |  | 
|  | L::LogicalPtr UnnestSubqueriesForNonRootLogical::eliminateNestedScalarQueries(const L::LogicalPtr &node) { | 
|  | const std::vector<E::ExpressionPtr> &input_expressions = node->input_expressions(); | 
|  | const std::vector<E::AttributeReferencePtr> input_attributes = | 
|  | node->getInputAttributes(); | 
|  | E::UnorderedNamedExpressionSet input_attribute_set(input_attributes.begin(), | 
|  | input_attributes.end()); | 
|  |  | 
|  | // Transform subquery experssions into non-subquery expressions. | 
|  | std::vector<CorrelatedQueryInfo> correlated_query_info_vec; | 
|  | std::vector<E::ExpressionPtr> new_input_expressions; | 
|  | bool has_changed_expression = false; | 
|  | for (const E::ExpressionPtr &input_expression : input_expressions) { | 
|  | std::vector<E::PredicatePtr> correlated_predicates; | 
|  | UnnestSubqueriesForExpession unnest_rule_for_expr( | 
|  | input_attribute_set, | 
|  | context_, | 
|  | uncorrelated_subqueries_, | 
|  | &correlated_query_info_vec); | 
|  | const E::ExpressionPtr new_input_expression = | 
|  | unnest_rule_for_expr.apply(input_expression); | 
|  | if (new_input_expression != input_expression && !has_changed_expression) { | 
|  | has_changed_expression = true; | 
|  | } | 
|  | new_input_expressions.emplace_back(new_input_expression); | 
|  | } | 
|  |  | 
|  | if (has_changed_expression) { | 
|  | // If there are correlated subquery expressions, add logical joins. | 
|  | if (!correlated_query_info_vec.empty()) { | 
|  | L::LogicalPtr new_child; | 
|  |  | 
|  | if (new_input_expressions[0] == nullptr && | 
|  | node->getLogicalType() == L::LogicalType::kNestedLoopsJoin) { | 
|  | new_child = node; | 
|  | } else { | 
|  | DCHECK_EQ(1u, node->children().size()); | 
|  | new_child = node->children()[0]; | 
|  | } | 
|  |  | 
|  | // Join uncorrelated subqueries early. | 
|  | L::LogicalPtr uncorrelated_query_child; | 
|  | for (const CorrelatedQueryInfo &correlated_query_info : correlated_query_info_vec) { | 
|  | if (correlated_query_info.join_type == CorrelatedQueryInfo::JoinType::kCartesianJoin) { | 
|  | // The only case for this nested loop join is that it is an uncorrelated | 
|  | // subquery which returns a scalar (single column and single row) result. | 
|  | DCHECK(correlated_query_info.probe_join_attributes.empty()); | 
|  | DCHECK_EQ(0u, correlated_query_info.non_hash_join_predicates.size()); | 
|  | if (uncorrelated_query_child == nullptr) { | 
|  | uncorrelated_query_child = correlated_query_info.correlated_query; | 
|  | } else { | 
|  | uncorrelated_query_child = L::MultiwayCartesianJoin::Create( | 
|  | { uncorrelated_query_child, correlated_query_info.correlated_query }); | 
|  | } | 
|  | } | 
|  | } | 
|  | if (uncorrelated_query_child != nullptr) { | 
|  | new_child = L::MultiwayCartesianJoin::Create({ new_child, uncorrelated_query_child }); | 
|  | } | 
|  |  | 
|  | for (const CorrelatedQueryInfo &correlated_query_info : correlated_query_info_vec) { | 
|  | if (correlated_query_info.join_type == CorrelatedQueryInfo::JoinType::kInnerJoin) { | 
|  | DCHECK(!correlated_query_info.probe_join_attributes.empty()); | 
|  | DCHECK(correlated_query_info.non_hash_join_predicates.empty()) | 
|  | << correlated_query_info.non_hash_join_predicates[0]->toString(); | 
|  | new_child = L::HashJoin::Create(new_child, | 
|  | correlated_query_info.correlated_query, | 
|  | correlated_query_info.probe_join_attributes, | 
|  | correlated_query_info.build_join_attributes, | 
|  | nullptr, /* residual_predicate */ | 
|  | L::HashJoin::JoinType::kInnerJoin); | 
|  | } else if (correlated_query_info.join_type == CorrelatedQueryInfo::JoinType::kLeftSemiJoin || | 
|  | correlated_query_info.join_type == CorrelatedQueryInfo::JoinType::kLeftAntiJoin) { | 
|  | DCHECK(!correlated_query_info.probe_join_attributes.empty()); | 
|  | E::PredicatePtr filter_predicate; | 
|  | if (correlated_query_info.non_hash_join_predicates.size() > 1u) { | 
|  | filter_predicate = E::LogicalAnd::Create(correlated_query_info.non_hash_join_predicates); | 
|  | } else if (!correlated_query_info.non_hash_join_predicates.empty()) { | 
|  | filter_predicate = correlated_query_info.non_hash_join_predicates[0]; | 
|  | } | 
|  |  | 
|  | L::HashJoin::JoinType join_type = | 
|  | (correlated_query_info.join_type == CorrelatedQueryInfo::JoinType::kLeftSemiJoin) | 
|  | ? L::HashJoin::JoinType::kLeftSemiJoin | 
|  | : L::HashJoin::JoinType::kLeftAntiJoin; | 
|  | new_child = L::HashJoin::Create(new_child, | 
|  | correlated_query_info.correlated_query, | 
|  | correlated_query_info.probe_join_attributes, | 
|  | correlated_query_info.build_join_attributes, | 
|  | filter_predicate, | 
|  | join_type); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Special case for filter and nested loops join which may contain EXISTS and IN. | 
|  | if (new_input_expressions[0] == nullptr) { | 
|  | DCHECK_EQ(1u, new_input_expressions.size()); | 
|  | DCHECK(node->getLogicalType() == L::LogicalType::kFilter || | 
|  | node->getLogicalType() == L::LogicalType::kNestedLoopsJoin) << node->toString(); | 
|  | LOG_APPLYING_RULE(node, new_child); | 
|  | return new_child; | 
|  | } | 
|  |  | 
|  | L::LogicalPtr new_logical = node->copyWithNewInputExpressions(new_input_expressions); | 
|  | new_logical = new_logical->copyWithNewChildren({new_child}); | 
|  | LOG_APPLYING_RULE(node, new_logical); | 
|  | return new_logical; | 
|  | } | 
|  |  | 
|  | L::LogicalPtr new_logical = node->copyWithNewInputExpressions(new_input_expressions); | 
|  | LOG_APPLYING_RULE(node, new_logical); | 
|  | return new_logical; | 
|  | } | 
|  |  | 
|  | LOG_IGNORING_RULE(node); | 
|  | return node; | 
|  | } | 
|  |  | 
|  | E::ExpressionPtr UnnestSubqueriesForExpession::applyInternal( | 
|  | const bool allow_exists_or_in, | 
|  | const E::ExpressionPtr &node) { | 
|  | switch (node->getExpressionType()) { | 
|  | case E::ExpressionType::kSubqueryExpression: { | 
|  | const E::SubqueryExpression &subquery_expression = | 
|  | static_cast<const E::SubqueryExpression&>(*node); | 
|  |  | 
|  | std::vector<E::AttributeReferencePtr> probe_join_attributes; | 
|  | std::vector<E::AttributeReferencePtr> build_join_attributes; | 
|  | std::vector<E::PredicatePtr> non_hash_join_predicates; | 
|  | UnnestSubqueriesForNonRootLogical unnest_logical_rule(true,  // scalar_query | 
|  | visible_attributes_from_outer_query_, | 
|  | context_, | 
|  | uncorrelated_subqueries_, | 
|  | &probe_join_attributes, | 
|  | &build_join_attributes, | 
|  | &non_hash_join_predicates); | 
|  | const L::LogicalPtr subquery = subquery_expression.subquery(); | 
|  | const L::LogicalPtr new_subquery = unnest_logical_rule.apply(subquery); | 
|  | const E::AttributeReferencePtr output_attribute = subquery->getOutputAttributes()[0]; | 
|  | DCHECK(!new_subquery->getOutputAttributes().empty()); | 
|  | if (probe_join_attributes.empty()) { | 
|  | DCHECK(non_hash_join_predicates.empty()); | 
|  | DCHECK_EQ(1u, new_subquery->getOutputAttributes().size()) << node->toString(); | 
|  | correlated_query_info_vec_->emplace_back(CorrelatedQueryInfo::JoinType::kCartesianJoin, | 
|  | new_subquery, | 
|  | std::move(probe_join_attributes), | 
|  | std::move(build_join_attributes), | 
|  | std::move(non_hash_join_predicates)); | 
|  | } else { | 
|  | correlated_query_info_vec_->emplace_back(CorrelatedQueryInfo::JoinType::kInnerJoin, | 
|  | new_subquery, | 
|  | std::move(probe_join_attributes), | 
|  | std::move(build_join_attributes), | 
|  | std::move(non_hash_join_predicates)); | 
|  | } | 
|  |  | 
|  | return output_attribute; | 
|  | } | 
|  | case E::ExpressionType::kExists: { | 
|  | if (!allow_exists_or_in) { | 
|  | THROW_SQL_ERROR() << "EXISTS can only appear in (un-nested) NOT, AND or by itself"; | 
|  | } | 
|  | transformExists(static_cast<const E::Exists&>(*node)); | 
|  | return E::ExpressionPtr(); | 
|  | } | 
|  | case E::ExpressionType::kInTableQuery: { | 
|  | if (!allow_exists_or_in) { | 
|  | THROW_SQL_ERROR() << "IN(table query) can only appear in (un-nested) NOT, AND or by itself"; | 
|  | } | 
|  | transformInTableQuery(static_cast<const E::InTableQuery&>(*node)); | 
|  | return E::ExpressionPtr(); | 
|  | } | 
|  | case E::ExpressionType::kLogicalNot: { | 
|  | const E::LogicalNot &logical_not = static_cast<const E::LogicalNot&>(*node); | 
|  | const E::PredicatePtr &operand = logical_not.operand(); | 
|  | if (operand->getExpressionType() == E::ExpressionType::kExists) { | 
|  | if (!allow_exists_or_in) { | 
|  | THROW_SQL_ERROR() << "EXISTS can only appear in (un-nested) NOT, AND or by itself"; | 
|  | } | 
|  | transformExists(static_cast<const E::Exists&>(*operand)); | 
|  | correlated_query_info_vec_->back().join_type = CorrelatedQueryInfo::JoinType::kLeftAntiJoin; | 
|  | return E::PredicatePtr(); | 
|  | } else if (operand->getExpressionType() == E::ExpressionType::kInTableQuery) { | 
|  | if (!allow_exists_or_in) { | 
|  | THROW_SQL_ERROR() << "IN(table query) can only appear in (un-nested) NOT, AND or by itself"; | 
|  | } | 
|  | transformInTableQuery(static_cast<const E::InTableQuery&>(*operand)); | 
|  | correlated_query_info_vec_->back().join_type = CorrelatedQueryInfo::JoinType::kLeftAntiJoin; | 
|  | return E::PredicatePtr(); | 
|  | } | 
|  | const E::ExpressionPtr new_operand = | 
|  | applyInternal(false /* allow_exists_or_in */, | 
|  | operand); | 
|  | if (new_operand != operand) { | 
|  | return E::LogicalNot::Create(std::static_pointer_cast<const E::Predicate>(new_operand)); | 
|  | } else { | 
|  | return node; | 
|  | } | 
|  | } | 
|  | case E::ExpressionType::kLogicalAnd: { | 
|  | const E::LogicalAnd &logical_and = static_cast<const E::LogicalAnd&>(*node); | 
|  |  | 
|  | std::vector<E::PredicatePtr> new_operands; | 
|  | bool has_change = false; | 
|  | for (const E::PredicatePtr &operand : logical_and.operands()) { | 
|  | const E::ExpressionPtr new_operand = | 
|  | applyInternal(allow_exists_or_in, | 
|  | operand); | 
|  | if (new_operand != operand && !has_change) { | 
|  | has_change = true; | 
|  | } | 
|  | if (new_operand != nullptr) { | 
|  | new_operands.emplace_back(std::static_pointer_cast<const E::Predicate>(new_operand)); | 
|  | } | 
|  | } | 
|  |  | 
|  | if (new_operands.empty()) { | 
|  | return E::PredicatePtr(); | 
|  | } else if (new_operands.size() == 1u) { | 
|  | return new_operands[0]; | 
|  | } | 
|  |  | 
|  | if (has_change) { | 
|  | return E::LogicalAnd::Create(new_operands); | 
|  | } | 
|  | return node; | 
|  | } | 
|  | case E::ExpressionType::kLogicalOr: { | 
|  | const E::LogicalOr &logical_or = static_cast<const E::LogicalOr&>(*node); | 
|  |  | 
|  | std::vector<E::PredicatePtr> new_operands; | 
|  | bool has_change = false; | 
|  | for (const E::PredicatePtr &operand : logical_or.operands()) { | 
|  | const E::ExpressionPtr new_operand = | 
|  | applyInternal(false, | 
|  | operand); | 
|  | if (new_operand != operand && !has_change) { | 
|  | has_change = true; | 
|  | } | 
|  | DCHECK(new_operand != nullptr); | 
|  | new_operands.emplace_back(std::static_pointer_cast<const E::Predicate>(new_operand)); | 
|  | } | 
|  |  | 
|  | if (has_change) { | 
|  | return E::LogicalOr::Create(new_operands); | 
|  | } | 
|  | return node; | 
|  | } | 
|  | default: | 
|  | std::vector<E::ExpressionPtr> new_children; | 
|  | bool has_change = false; | 
|  | for (const E::ExpressionPtr &child : node->children()) { | 
|  | const E::ExpressionPtr new_child = | 
|  | applyInternal(allow_exists_or_in, child); | 
|  | if (new_child != child && !has_change) { | 
|  | has_change = true; | 
|  | } | 
|  | DCHECK(child != nullptr); | 
|  | new_children.emplace_back(new_child); | 
|  | } | 
|  |  | 
|  | if (has_change) { | 
|  | return node->copyWithNewChildren(new_children); | 
|  | } | 
|  | return node; | 
|  | } | 
|  | } | 
|  |  | 
|  | void UnnestSubqueriesForExpession::transformExists( | 
|  | const E::Exists &exists_predicate) { | 
|  | std::vector<E::AttributeReferencePtr> probe_join_attributes; | 
|  | std::vector<E::AttributeReferencePtr> build_join_attributes; | 
|  | std::vector<E::PredicatePtr> non_hash_join_predicates; | 
|  | UnnestSubqueriesForNonRootLogical unnest_logical_rule(false,  // scalar_query | 
|  | visible_attributes_from_outer_query_, | 
|  | context_, | 
|  | uncorrelated_subqueries_, | 
|  | &probe_join_attributes, | 
|  | &build_join_attributes, | 
|  | &non_hash_join_predicates); | 
|  | DCHECK_EQ(probe_join_attributes.size(), build_join_attributes.size()); | 
|  | const L::LogicalPtr new_subquery = | 
|  | unnest_logical_rule.apply(exists_predicate.exists_subquery()->subquery()); | 
|  |  | 
|  | if (probe_join_attributes.empty()) { | 
|  | if (!non_hash_join_predicates.empty()) { | 
|  | THROW_SQL_ERROR() << "Correlated queries must have an equality join predicate with the outer query"; | 
|  | } | 
|  | THROW_SQL_ERROR() << "EXISTS subquery cannot be un-correlated with the outer query"; | 
|  | } | 
|  |  | 
|  | correlated_query_info_vec_->emplace_back(CorrelatedQueryInfo::JoinType::kLeftSemiJoin, | 
|  | new_subquery, | 
|  | std::move(probe_join_attributes), | 
|  | std::move(build_join_attributes), | 
|  | std::move(non_hash_join_predicates)); | 
|  | } | 
|  |  | 
|  | void UnnestSubqueriesForExpession::transformInTableQuery( | 
|  | const E::InTableQuery &in_table_query) { | 
|  | std::vector<E::AttributeReferencePtr> probe_join_attributes; | 
|  | std::vector<E::AttributeReferencePtr> build_join_attributes; | 
|  | std::vector<E::PredicatePtr> non_hash_join_predicates; | 
|  | UnnestSubqueriesForNonRootLogical unnest_logical_rule(false,  // scalar_query | 
|  | visible_attributes_from_outer_query_, | 
|  | context_, | 
|  | uncorrelated_subqueries_, | 
|  | &probe_join_attributes, | 
|  | &build_join_attributes, | 
|  | &non_hash_join_predicates); | 
|  | const L::LogicalPtr subquery = in_table_query.table_query()->subquery(); | 
|  | const L::LogicalPtr new_subquery = unnest_logical_rule.apply(subquery); | 
|  |  | 
|  | DCHECK(!new_subquery->getOutputAttributes().empty()); | 
|  | const E::AttributeReferencePtr join_attr_in_subquery = subquery->getOutputAttributes()[0]; | 
|  |  | 
|  | E::AttributeReferencePtr test_attribute; | 
|  | if (E::SomeAttributeReference::MatchesWithConditionalCast(in_table_query.test_expression(), | 
|  | &test_attribute)) { | 
|  | probe_join_attributes.emplace_back(test_attribute); | 
|  | build_join_attributes.emplace_back(join_attr_in_subquery); | 
|  | } else { | 
|  | if (!probe_join_attributes.empty()) { | 
|  | non_hash_join_predicates.emplace_back( | 
|  | E::ComparisonExpression::Create(ComparisonFactory::GetComparison(ComparisonID::kEqual), | 
|  | in_table_query.test_expression(), | 
|  | join_attr_in_subquery)); | 
|  | } else { | 
|  | // TODO(qzeng): We can actually add a project to precompute the test expression. | 
|  | THROW_SQL_ERROR() << "Cannot find an equality join predicate for IN"; | 
|  | } | 
|  | } | 
|  |  | 
|  | correlated_query_info_vec_->emplace_back(CorrelatedQueryInfo::JoinType::kLeftSemiJoin, | 
|  | new_subquery, | 
|  | std::move(probe_join_attributes), | 
|  | std::move(build_join_attributes), | 
|  | std::move(non_hash_join_predicates)); | 
|  | } | 
|  |  | 
|  | E::ExpressionPtr DeOuterAttributeReference::applyToNode(const E::ExpressionPtr &input) { | 
|  | E::AttributeReferencePtr attr; | 
|  | if (E::SomeAttributeReference::MatchesWithConditionalCast(input, &attr) && | 
|  | attr->scope() == E::AttributeReferenceScope::kOuter && | 
|  | uncorrelated_subqueries_.find(attr->id()) == uncorrelated_subqueries_.end()) { | 
|  | if (!allow_outer_reference_) { | 
|  | THROW_SQL_ERROR() << "Non-equality join predicate is not allowed in scalar subqueries"; | 
|  | } | 
|  | if (visiable_attrs_from_outer_query_.find(attr) == visiable_attrs_from_outer_query_.end()) { | 
|  | THROW_SQL_ERROR() | 
|  | << "Nested queries can only reference attributes in the outer query one level above"; | 
|  | } | 
|  | return E::AttributeReference::Create(attr->id(), | 
|  | attr->attribute_name(), | 
|  | attr->attribute_alias(), | 
|  | attr->relation_name(), | 
|  | attr->getValueType(), | 
|  | E::AttributeReferenceScope::kLocal); | 
|  | } | 
|  | return input; | 
|  | } | 
|  |  | 
|  | }  // namespace optimizer | 
|  | }  // namespace quickstep |