| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| **/ |
| |
| #include "query_optimizer/rules/EliminateEmptyNode.hpp" |
| |
| #include <cstddef> |
| #include <memory> |
| #include <sstream> |
| #include <string> |
| #include <unordered_set> |
| #include <utility> |
| #include <vector> |
| |
| #include "catalog/CatalogAttribute.hpp" |
| #include "catalog/CatalogDatabase.hpp" |
| #include "catalog/CatalogRelation.hpp" |
| #include "catalog/CatalogRelationStatistics.hpp" |
| #include "query_optimizer/OptimizerContext.hpp" |
| #include "query_optimizer/expressions/Alias.hpp" |
| #include "query_optimizer/expressions/AttributeReference.hpp" |
| #include "query_optimizer/expressions/ExprId.hpp" |
| #include "query_optimizer/expressions/ExpressionUtil.hpp" |
| #include "query_optimizer/expressions/NamedExpression.hpp" |
| #include "query_optimizer/expressions/PatternMatcher.hpp" |
| #include "query_optimizer/physical/Aggregate.hpp" |
| #include "query_optimizer/physical/HashJoin.hpp" |
| #include "query_optimizer/physical/PatternMatcher.hpp" |
| #include "query_optimizer/physical/Physical.hpp" |
| #include "query_optimizer/physical/PhysicalType.hpp" |
| #include "query_optimizer/physical/Selection.hpp" |
| #include "query_optimizer/physical/TableReference.hpp" |
| #include "query_optimizer/physical/TopLevelPlan.hpp" |
| #include "query_optimizer/physical/UnionAll.hpp" |
| |
| #include "glog/logging.h" |
| |
| namespace quickstep { |
| namespace optimizer { |
| |
| namespace E = expressions; |
| namespace P = physical; |
| |
| namespace { |
| |
| std::string GetNewRelationName(const std::size_t id) { |
| std::ostringstream out; |
| out << OptimizerContext::kInternalTemporaryRelationNamePrefix << id; |
| return out.str(); |
| } |
| |
| bool IsTableReferenceOnEmptyRelation(const P::PhysicalPtr &node) { |
| P::TableReferencePtr table_reference; |
| if (!P::SomeTableReference::MatchesWithConditionalCast(node, &table_reference)) { |
| return false; |
| } |
| |
| const CatalogRelationStatistics &stat = table_reference->relation()->getStatistics(); |
| return stat.isExact() && (stat.getNumTuples() == 0u); |
| } |
| |
| P::PhysicalPtr ApplyToHashJoin(const P::HashJoinPtr &hash_join) { |
| const P::PhysicalPtr &left = hash_join->left(); |
| if (IsTableReferenceOnEmptyRelation(left)) { |
| return nullptr; |
| } |
| |
| const P::PhysicalPtr &right = hash_join->right(); |
| switch (hash_join->join_type()) { |
| case P::HashJoin::JoinType::kInnerJoin: |
| case P::HashJoin::JoinType::kLeftSemiJoin: |
| if (IsTableReferenceOnEmptyRelation(right)) { |
| return nullptr; |
| } |
| break; |
| case P::HashJoin::JoinType::kLeftAntiJoin: { |
| const auto &project_expressions = hash_join->project_expressions(); |
| #ifdef QUICKSTEP_DEBUG |
| const auto left_output_attributes = left->getOutputAttributes(); |
| if (!E::SubsetOfExpressions(project_expressions, left_output_attributes)) { |
| std::vector<E::NamedExpressionPtr> non_alias_expressions, alias_expressions; |
| for (const auto &project_expression : project_expressions) { |
| if (E::SomeAlias::Matches(project_expression)) { |
| for (const auto referenced_attribute : project_expression->getReferencedAttributes()) { |
| alias_expressions.push_back(referenced_attribute); |
| } |
| } else { |
| non_alias_expressions.push_back(project_expression); |
| } |
| } |
| |
| CHECK(E::SubsetOfExpressions(non_alias_expressions, left_output_attributes)); |
| CHECK(E::SubsetOfExpressions(alias_expressions, left_output_attributes)); |
| } |
| #endif |
| |
| if (IsTableReferenceOnEmptyRelation(right)) { |
| if (hash_join->residual_predicate()) { |
| return nullptr; |
| } |
| return P::Selection::Create(left, project_expressions, nullptr); |
| } |
| break; |
| } |
| case P::HashJoin::JoinType::kLeftOuterJoin: |
| if (IsTableReferenceOnEmptyRelation(right)) { |
| if (hash_join->residual_predicate()) { |
| return nullptr; |
| } |
| |
| const auto &project_expressions = hash_join->project_expressions(); |
| if (E::SubsetOfExpressions(project_expressions, left->getOutputAttributes())) { |
| return P::Selection::Create(left, project_expressions, nullptr); |
| } |
| } |
| break; |
| } |
| |
| return hash_join; |
| } |
| |
| P::PhysicalPtr ApplyToNode(const P::PhysicalPtr &node) { |
| switch (node->getPhysicalType()) { |
| case P::PhysicalType::kHashJoin: |
| return ApplyToHashJoin(std::static_pointer_cast<const P::HashJoin>(node)); |
| case P::PhysicalType::kTableReference: |
| if (IsTableReferenceOnEmptyRelation(node)) { |
| return nullptr; |
| } |
| break; |
| default: |
| break; |
| } |
| |
| return node; |
| } |
| |
| |
| P::PhysicalPtr CopyWithNewProjectExpressions(const P::UnionAllPtr &union_all, |
| const P::PhysicalPtr &child) { |
| const std::vector<E::NamedExpressionPtr> *project_expressions = nullptr; |
| |
| switch (child->getPhysicalType()) { |
| case P::PhysicalType::kHashJoin: |
| case P::PhysicalType::kNestedLoopsJoin: { |
| const auto &join = std::static_pointer_cast<const P::Join>(child); |
| project_expressions = &(join->project_expressions()); |
| break; |
| } |
| case P::PhysicalType::kSelection: { |
| const auto &selection = std::static_pointer_cast<const P::Selection>(child); |
| project_expressions = &(selection->project_expressions()); |
| break; |
| } |
| default: |
| LOG(FATAL) << "Unexpected PhysicalType."; |
| } |
| DCHECK(project_expressions); |
| |
| std::vector<E::NamedExpressionPtr> alias_expressions; |
| alias_expressions.reserve(project_expressions->size()); |
| |
| int aid = 0; |
| for (const auto &project_attribute : union_all->project_attributes()) { |
| alias_expressions.emplace_back(E::Alias::Create( |
| project_attribute->id(), |
| (*project_expressions)[aid], |
| project_attribute->attribute_name(), |
| project_attribute->attribute_alias(), |
| project_attribute->relation_name())); |
| ++aid; |
| } |
| |
| return child->copyWithNewProjectExpressions(alias_expressions); |
| } |
| |
| // TopDown approach. |
| P::PhysicalPtr Apply(const P::PhysicalPtr &node) { |
| DCHECK(node); |
| |
| const auto new_node = ApplyToNode(node); |
| if (new_node == nullptr) { |
| return nullptr; |
| } |
| |
| std::vector<P::PhysicalPtr> new_children; |
| bool has_changed_children = false; |
| for (const auto &child : new_node->children()) { |
| const auto new_child = Apply(child); |
| if (new_child == nullptr) { |
| switch (node->getPhysicalType()) { |
| case P::PhysicalType::kUnionAll: |
| has_changed_children = true; |
| break; |
| case P::PhysicalType::kInsertSelection: |
| if (!new_children.empty()) { |
| // The actual input is empty. |
| return nullptr; |
| } |
| new_children.push_back(child); |
| break; |
| default: |
| return nullptr; |
| } |
| continue; |
| } else if (child != new_child && !has_changed_children) { |
| has_changed_children = true; |
| } |
| new_children.push_back(new_child); |
| } |
| |
| if (has_changed_children) { |
| if (P::SomeUnionAll::Matches(node)) { |
| switch (new_children.size()) { |
| case 0u: |
| return nullptr; |
| case 1u: |
| return CopyWithNewProjectExpressions( |
| std::static_pointer_cast<const P::UnionAll>(node), new_children.front()); |
| default: |
| break; |
| } |
| } |
| |
| DCHECK(!new_children.empty()); |
| return new_node->copyWithNewChildren(new_children); |
| } else { |
| return new_node; |
| } |
| } |
| |
| } // namespace |
| |
| P::PhysicalPtr EliminateEmptyNode::apply(const P::PhysicalPtr &input) { |
| DCHECK(P::SomeTopLevelPlan::Matches(input)); |
| const P::TopLevelPlanPtr &top_level_plan = |
| std::static_pointer_cast<const P::TopLevelPlan>(input); |
| |
| // TODO(quickstep-team): handle subquery. |
| if (!top_level_plan->shared_subplans().empty()) { |
| return input; |
| } |
| |
| const auto &plan = top_level_plan->plan(); |
| |
| P::SelectionPtr selection; |
| if (P::SomeSelection::MatchesWithConditionalCast(plan, &selection) && |
| P::SomeTableReference::Matches(selection->input())) { |
| return input; |
| } |
| |
| const P::PhysicalType plan_type = plan->getPhysicalType(); |
| std::vector<E::AttributeReferencePtr> project_expressions; |
| switch (plan_type) { |
| case P::PhysicalType::kAggregate: |
| case P::PhysicalType::kFilterJoin: |
| case P::PhysicalType::kHashJoin: |
| case P::PhysicalType::kNestedLoopsJoin: |
| case P::PhysicalType::kSample: |
| case P::PhysicalType::kSort: |
| case P::PhysicalType::kUnionAll: |
| case P::PhysicalType::kWindowAggregate: |
| project_expressions = plan->getOutputAttributes(); |
| break; |
| case P::PhysicalType::kCopyFrom: |
| case P::PhysicalType::kCreateIndex: |
| case P::PhysicalType::kCreateTable: |
| case P::PhysicalType::kDeleteTuples: |
| case P::PhysicalType::kDropTable: |
| case P::PhysicalType::kInsertTuple: |
| case P::PhysicalType::kSharedSubplanReference: |
| case P::PhysicalType::kTableGenerator: |
| case P::PhysicalType::kUpdateTable: |
| // TODO(quickstep-team): revisit the cases above. |
| return input; |
| case P::PhysicalType::kCopyTo: |
| case P::PhysicalType::kInsertSelection: |
| project_expressions = plan->getReferencedAttributes(); |
| break; |
| case P::PhysicalType::kCrossReferenceCoalesceAggregate: |
| case P::PhysicalType::kTableReference: |
| case P::PhysicalType::kTopLevelPlan: |
| LOG(FATAL) << "Unexpected PhysicalType."; |
| case P::PhysicalType::kSelection: { |
| const auto &selection_input = selection->input(); |
| switch (selection_input->getPhysicalType()) { |
| case P::PhysicalType::kAggregate: |
| case P::PhysicalType::kWindowAggregate: { |
| DCHECK(project_expressions.empty()); |
| const auto referenced_attributes = selection_input->getReferencedAttributes(); |
| std::unordered_set<E::ExprId> unique_expr_ids; |
| for (auto &referenced_attribute : referenced_attributes) { |
| const E::ExprId expr_id = referenced_attribute->id(); |
| if (unique_expr_ids.find(expr_id) == unique_expr_ids.end()) { |
| project_expressions.emplace_back(std::move(referenced_attribute)); |
| unique_expr_ids.insert(expr_id); |
| } |
| } |
| break; |
| } |
| case P::PhysicalType::kCrossReferenceCoalesceAggregate: |
| LOG(FATAL) << "Unexpected PhysicalType."; |
| default: |
| project_expressions = plan->getOutputAttributes(); |
| break; |
| } |
| break; |
| } |
| } |
| |
| auto output = Apply(plan); |
| if (output == plan) { |
| return input; |
| } |
| |
| if (output) { |
| return input->copyWithNewChildren({output}); |
| } |
| |
| #ifdef QUICKSTEP_DEBUG |
| { |
| CHECK(!project_expressions.empty()); |
| |
| std::unordered_set<E::ExprId> unique_expr_ids; |
| unique_expr_ids.reserve(project_expressions.size()); |
| for (const auto &project_expression : project_expressions) { |
| unique_expr_ids.insert(project_expression->id()); |
| } |
| CHECK_EQ(project_expressions.size(), unique_expr_ids.size()); |
| } |
| #endif |
| |
| auto catalog_relation = std::make_unique<CatalogRelation>( |
| catalog_database_, GetNewRelationName(catalog_database_->size()), -1, true); |
| |
| attribute_id aid = 0; |
| std::unordered_set<std::string> relation_name_alias; |
| for (const auto &project_expression : project_expressions) { |
| // The attribute name is simply set to the attribute id to make it distinct. |
| auto catalog_attribute = std::make_unique<CatalogAttribute>( |
| catalog_relation.get(), project_expression->attribute_name(), |
| project_expression->getValueType(), aid, |
| project_expression->attribute_alias()); |
| catalog_relation->addAttribute(catalog_attribute.release()); |
| ++aid; |
| |
| relation_name_alias.insert(project_expression->relation_name()); |
| } |
| |
| // TODO(quickstep-team): use Nop physical node. |
| const auto temp_table = |
| P::TableReference::Create(catalog_relation.get(), |
| relation_name_alias.size() == 1u ? *relation_name_alias.begin() : "", |
| project_expressions); |
| catalog_database_->addRelation(catalog_relation.release()); |
| |
| switch (plan_type) { |
| case P::PhysicalType::kAggregate: |
| case P::PhysicalType::kCopyTo: |
| case P::PhysicalType::kWindowAggregate: |
| output = plan->copyWithNewChildren({ temp_table }); |
| break; |
| case P::PhysicalType::kFilterJoin: |
| case P::PhysicalType::kHashJoin: |
| case P::PhysicalType::kNestedLoopsJoin: |
| case P::PhysicalType::kSample: |
| case P::PhysicalType::kSort: |
| case P::PhysicalType::kUnionAll: |
| output = P::Selection::Create(temp_table, E::ToNamedExpressions(project_expressions), nullptr); |
| break; |
| case P::PhysicalType::kInsertSelection: |
| output = plan->copyWithNewChildren({ plan->children().front(), temp_table }); |
| break; |
| case P::PhysicalType::kSelection: { |
| const P::PhysicalPtr &selection_input = selection->input(); |
| switch (selection_input->getPhysicalType()) { |
| case P::PhysicalType::kAggregate: |
| case P::PhysicalType::kWindowAggregate: |
| output = selection_input->copyWithNewChildren({ temp_table }); |
| break; |
| case P::PhysicalType::kCrossReferenceCoalesceAggregate: |
| LOG(FATAL) << "Unexpected PhysicalType."; |
| default: |
| output = temp_table; |
| break; |
| } |
| output = selection->copyWithNewChildren({ output }); |
| break; |
| } |
| default: |
| LOG(FATAL) << "Unexpected PhysicalType."; |
| } |
| |
| DCHECK(output); |
| return input->copyWithNewChildren({output}); |
| } |
| |
| } // namespace optimizer |
| } // namespace quickstep |