| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| **/ |
| |
| #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" |
| |
| #include <algorithm> |
| #include <memory> |
| #include <unordered_map> |
| #include <vector> |
| |
| #include "catalog/CatalogRelation.hpp" |
| #include "query_optimizer/expressions/AttributeReference.hpp" |
| #include "query_optimizer/expressions/ComparisonExpression.hpp" |
| #include "query_optimizer/expressions/ExprId.hpp" |
| #include "query_optimizer/expressions/ExpressionType.hpp" |
| #include "query_optimizer/expressions/ExpressionUtil.hpp" |
| #include "query_optimizer/expressions/LogicalAnd.hpp" |
| #include "query_optimizer/expressions/LogicalOr.hpp" |
| #include "query_optimizer/expressions/Predicate.hpp" |
| #include "query_optimizer/expressions/PatternMatcher.hpp" |
| #include "query_optimizer/physical/Aggregate.hpp" |
| #include "query_optimizer/physical/NestedLoopsJoin.hpp" |
| #include "query_optimizer/physical/HashJoin.hpp" |
| #include "query_optimizer/physical/PatternMatcher.hpp" |
| #include "query_optimizer/physical/Physical.hpp" |
| #include "query_optimizer/physical/PhysicalType.hpp" |
| #include "query_optimizer/physical/Selection.hpp" |
| #include "query_optimizer/physical/SharedSubplanReference.hpp" |
| #include "query_optimizer/physical/Sort.hpp" |
| #include "query_optimizer/physical/TableGenerator.hpp" |
| #include "query_optimizer/physical/TableReference.hpp" |
| #include "query_optimizer/physical/TopLevelPlan.hpp" |
| |
| #include "glog/logging.h" |
| |
| namespace quickstep { |
| namespace optimizer { |
| namespace cost { |
| |
| namespace E = ::quickstep::optimizer::expressions; |
| namespace P = ::quickstep::optimizer::physical; |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinality( |
| const P::PhysicalPtr &physical_plan) { |
| switch (physical_plan->getPhysicalType()) { |
| case P::PhysicalType::kTopLevelPlan: |
| return estimateCardinalityForTopLevelPlan( |
| std::static_pointer_cast<const P::TopLevelPlan>(physical_plan)); |
| case P::PhysicalType::kTableReference: |
| return estimateCardinalityForTableReference( |
| std::static_pointer_cast<const P::TableReference>(physical_plan)); |
| case P::PhysicalType::kSelection: |
| return estimateCardinalityForSelection( |
| std::static_pointer_cast<const P::Selection>(physical_plan)); |
| case P::PhysicalType::kTableGenerator: |
| return estimateCardinalityForTableGenerator( |
| std::static_pointer_cast<const P::TableGenerator>(physical_plan)); |
| case P::PhysicalType::kHashJoin: |
| return estimateCardinalityForHashJoin( |
| std::static_pointer_cast<const P::HashJoin>(physical_plan)); |
| case P::PhysicalType::kNestedLoopsJoin: |
| return estimateCardinalityForNestedLoopsJoin( |
| std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan)); |
| case P::PhysicalType::kAggregate: |
| return estimateCardinalityForAggregate( |
| std::static_pointer_cast<const P::Aggregate>(physical_plan)); |
| case P::PhysicalType::kSharedSubplanReference: { |
| const P::SharedSubplanReferencePtr shared_subplan_reference = |
| std::static_pointer_cast<const P::SharedSubplanReference>(physical_plan); |
| return estimateCardinality( |
| shared_subplans_[shared_subplan_reference->subplan_id()]); |
| } |
| case P::PhysicalType::kSort: |
| return estimateCardinalityForSort( |
| std::static_pointer_cast<const P::Sort>(physical_plan)); |
| case P::PhysicalType::kWindowAggregate: |
| return estimateCardinalityForWindowAggregate( |
| std::static_pointer_cast<const P::WindowAggregate>(physical_plan)); |
| default: |
| LOG(FATAL) << "Unsupported physical plan:" << physical_plan->toString(); |
| } |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTopLevelPlan( |
| const P::TopLevelPlanPtr &physical_plan) { |
| return estimateCardinality(physical_plan->plan()); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableReference( |
| const P::TableReferencePtr &physical_plan) { |
| std::size_t num_tuples = physical_plan->relation()->getStatistics().getNumTuples(); |
| if (num_tuples == 0) { |
| num_tuples = physical_plan->relation()->estimateTupleCardinality(); |
| } |
| return num_tuples; |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSelection( |
| const P::SelectionPtr &physical_plan) { |
| double selectivity = |
| estimateSelectivityForPredicate(physical_plan->filter_predicate(), physical_plan); |
| return static_cast<std::size_t>( |
| estimateCardinality(physical_plan->input()) * selectivity); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForSort( |
| const physical::SortPtr &physical_plan) { |
| std::size_t cardinality = estimateCardinality( |
| std::static_pointer_cast<const P::Sort>(physical_plan)->input()); |
| return std::min(cardinality, static_cast<std::size_t>(physical_plan->limit())); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForTableGenerator( |
| const P::TableGeneratorPtr &physical_plan) { |
| return physical_plan->generator_function_handle()->getEstimatedCardinality(); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForHashJoin( |
| const P::HashJoinPtr &physical_plan) { |
| std::size_t left_cardinality = estimateCardinality(physical_plan->left()); |
| std::size_t right_cardinality = estimateCardinality(physical_plan->right()); |
| double left_selectivity = estimateSelectivity(physical_plan->left()); |
| double right_selectivity = estimateSelectivity(physical_plan->right()); |
| return std::max(static_cast<std::size_t>(left_cardinality * right_selectivity + 0.5), |
| static_cast<std::size_t>(right_cardinality * left_selectivity + 0.5)); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForNestedLoopsJoin( |
| const P::NestedLoopsJoinPtr &physical_plan) { |
| return std::max(estimateCardinality(physical_plan->left()), |
| estimateCardinality(physical_plan->right())); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForAggregate( |
| const P::AggregatePtr &physical_plan) { |
| double filter_selectivity = |
| estimateSelectivityForPredicate(physical_plan->filter_predicate(), physical_plan); |
| return static_cast<std::size_t>( |
| estimateNumGroupsForAggregate(physical_plan) * filter_selectivity); |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::estimateCardinalityForWindowAggregate( |
| const P::WindowAggregatePtr &physical_plan) { |
| return estimateCardinality(physical_plan->input()); |
| } |
| |
| |
| std::size_t StarSchemaSimpleCostModel::estimateNumGroupsForAggregate( |
| const physical::AggregatePtr &aggregate) { |
| if (aggregate->grouping_expressions().empty()) { |
| return 1uL; |
| } |
| |
| std::size_t estimated_child_cardinality = estimateCardinality(aggregate->input()); |
| std::size_t estimated_num_groups = 1; |
| std::size_t max_attr_num_distinct_values = 0; |
| for (const auto &expr : aggregate->grouping_expressions()) { |
| E::AttributeReferencePtr attr; |
| if (E::SomeAttributeReference::MatchesWithConditionalCast(expr, &attr)) { |
| std::size_t attr_num_distinct_values = |
| estimateNumDistinctValues(attr->id(), aggregate->input()); |
| estimated_num_groups *= std::max(1uL, attr_num_distinct_values); |
| max_attr_num_distinct_values = |
| std::max(max_attr_num_distinct_values, attr_num_distinct_values); |
| } else { |
| // TODO(jianqiao): implement estimateNumDistinctValues() for expressions. |
| estimated_num_groups *= 64uL; |
| } |
| } |
| estimated_num_groups = std::max( |
| std::min(estimated_num_groups, estimated_child_cardinality / 10), |
| max_attr_num_distinct_values); |
| return estimated_num_groups; |
| } |
| |
| |
| std::size_t StarSchemaSimpleCostModel::estimateNumDistinctValues( |
| const expressions::ExprId attribute_id, |
| const physical::PhysicalPtr &physical_plan) { |
| DCHECK(E::ContainsExprId(physical_plan->getOutputAttributes(), attribute_id)); |
| |
| P::TableReferencePtr table_reference; |
| if (P::SomeTableReference::MatchesWithConditionalCast(physical_plan, &table_reference)) { |
| return getNumDistinctValues(attribute_id, table_reference); |
| } |
| |
| double filter_selectivity = estimateSelectivityForFilterPredicate(physical_plan); |
| switch (physical_plan->getPhysicalType()) { |
| case P::PhysicalType::kSelection: // Fall through |
| case P::PhysicalType::kAggregate: { |
| const P::PhysicalPtr &child = physical_plan->children()[0]; |
| if (E::ContainsExprId(child->getOutputAttributes(), attribute_id)) { |
| std::size_t child_num_distinct_values = |
| estimateNumDistinctValues(attribute_id, child); |
| return static_cast<std::size_t>( |
| child_num_distinct_values * filter_selectivity + 0.5); |
| } |
| break; |
| } |
| case P::PhysicalType::kHashJoin: { |
| const P::HashJoinPtr &hash_join = |
| std::static_pointer_cast<const P::HashJoin>(physical_plan); |
| if (E::ContainsExprId(hash_join->left()->getOutputAttributes(), attribute_id)) { |
| std::size_t left_child_num_distinct_values = |
| estimateNumDistinctValues(attribute_id, hash_join->left()); |
| double right_child_selectivity = |
| estimateSelectivity(hash_join->right()); |
| return static_cast<std::size_t>( |
| left_child_num_distinct_values * right_child_selectivity * filter_selectivity + 0.5); |
| } |
| if (E::ContainsExprId(hash_join->right()->getOutputAttributes(), attribute_id)) { |
| std::size_t right_child_num_distinct_values = |
| estimateNumDistinctValues(attribute_id, hash_join->right()); |
| double left_child_selectivity = |
| estimateSelectivity(hash_join->left()); |
| return static_cast<std::size_t>( |
| right_child_num_distinct_values * left_child_selectivity * filter_selectivity + 0.5); |
| } |
| } |
| default: |
| break; |
| } |
| |
| return 16uL; |
| } |
| |
| double StarSchemaSimpleCostModel::estimateSelectivity( |
| const physical::PhysicalPtr &physical_plan) { |
| switch (physical_plan->getPhysicalType()) { |
| case P::PhysicalType::kSelection: { |
| const P::SelectionPtr &selection = |
| std::static_pointer_cast<const P::Selection>(physical_plan); |
| double filter_selectivity = |
| estimateSelectivityForPredicate(selection->filter_predicate(), selection); |
| double child_selectivity = estimateSelectivity(selection->input()); |
| return filter_selectivity * child_selectivity; |
| } |
| case P::PhysicalType::kHashJoin: { |
| const P::HashJoinPtr &hash_join = |
| std::static_pointer_cast<const P::HashJoin>(physical_plan); |
| double filter_selectivity = |
| estimateSelectivityForPredicate(hash_join->residual_predicate(), hash_join); |
| double child_selectivity = |
| estimateSelectivity(hash_join->left()) * estimateSelectivity(hash_join->right()); |
| return filter_selectivity * child_selectivity; |
| } |
| case P::PhysicalType::kNestedLoopsJoin: { |
| const P::NestedLoopsJoinPtr &nested_loop_join = |
| std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan); |
| double filter_selectivity = |
| estimateSelectivityForPredicate(nested_loop_join->join_predicate(), nested_loop_join); |
| double child_selectivity = std::min( |
| estimateSelectivity(nested_loop_join->left()), |
| estimateSelectivity(nested_loop_join->right())); |
| return filter_selectivity * child_selectivity; |
| } |
| case P::PhysicalType::kSharedSubplanReference: { |
| const P::SharedSubplanReferencePtr shared_subplan_reference = |
| std::static_pointer_cast<const P::SharedSubplanReference>(physical_plan); |
| return estimateSelectivity( |
| shared_subplans_[shared_subplan_reference->subplan_id()]); |
| } |
| default: |
| break; |
| } |
| |
| if (physical_plan->getNumChildren() == 1) { |
| return estimateSelectivity(physical_plan->children()[0]); |
| } |
| |
| return 1.0; |
| } |
| |
| double StarSchemaSimpleCostModel::estimateSelectivityForFilterPredicate( |
| const physical::PhysicalPtr &physical_plan) { |
| E::PredicatePtr filter_predicate = nullptr; |
| switch (physical_plan->getPhysicalType()) { |
| case P::PhysicalType::kSelection: |
| filter_predicate = |
| std::static_pointer_cast<const P::Selection>(physical_plan)->filter_predicate(); |
| break; |
| case P::PhysicalType::kAggregate: |
| filter_predicate = |
| std::static_pointer_cast<const P::Aggregate>(physical_plan)->filter_predicate(); |
| break; |
| case P::PhysicalType::kHashJoin: |
| filter_predicate = |
| std::static_pointer_cast<const P::HashJoin>(physical_plan)->residual_predicate(); |
| break; |
| case P::PhysicalType::kNestedLoopsJoin: |
| filter_predicate = |
| std::static_pointer_cast<const P::NestedLoopsJoin>(physical_plan)->join_predicate(); |
| break; |
| default: |
| break; |
| } |
| |
| if (filter_predicate == nullptr) { |
| return 1.0; |
| } else { |
| return estimateSelectivityForPredicate(filter_predicate, physical_plan); |
| } |
| } |
| |
| |
| double StarSchemaSimpleCostModel::estimateSelectivityForPredicate( |
| const expressions::PredicatePtr &filter_predicate, |
| const P::PhysicalPtr &physical_plan) { |
| if (filter_predicate == nullptr) { |
| return 1.0; |
| } |
| |
| switch (filter_predicate->getExpressionType()) { |
| case E::ExpressionType::kComparisonExpression: { |
| // Case 1 - Number of distinct values statistics available |
| // Case 1.1 - Equality comparison: 1.0 / num_distinct_values |
| // Case 1.2 - Otherwise: 0.1 |
| // Case 2 - Otherwise: 0.5 |
| const E::ComparisonExpressionPtr &comparison_expression = |
| std::static_pointer_cast<const E::ComparisonExpression>(filter_predicate); |
| E::AttributeReferencePtr attr; |
| if ((E::SomeAttributeReference::MatchesWithConditionalCast(comparison_expression->left(), &attr) && |
| E::SomeScalarLiteral::Matches(comparison_expression->right())) || |
| (E::SomeAttributeReference::MatchesWithConditionalCast(comparison_expression->right(), &attr) && |
| E::SomeScalarLiteral::Matches(comparison_expression->left()))) { |
| for (const auto &child : physical_plan->children()) { |
| if (E::ContainsExprId(child->getOutputAttributes(), attr->id())) { |
| const std::size_t child_num_distinct_values = estimateNumDistinctValues(attr->id(), child); |
| if (comparison_expression->isEqualityComparisonPredicate()) { |
| return 1.0 / child_num_distinct_values; |
| } else { |
| return 1.0 / std::max(std::min(child_num_distinct_values / 100.0, 10.0), 2.0); |
| } |
| } |
| } |
| return 0.1; |
| } |
| return 0.5; |
| } |
| case E::ExpressionType::kLogicalAnd: { |
| const E::LogicalAndPtr &logical_and = |
| std::static_pointer_cast<const E::LogicalAnd>(filter_predicate); |
| double selectivity = 1.0; |
| for (const auto &predicate : logical_and->operands()) { |
| selectivity = std::min(selectivity, estimateSelectivityForPredicate(predicate, physical_plan)); |
| } |
| return selectivity; |
| } |
| case E::ExpressionType::kLogicalOr: { |
| const E::LogicalOrPtr &logical_or = |
| std::static_pointer_cast<const E::LogicalOr>(filter_predicate); |
| double selectivity = 0; |
| for (const auto &predicate : logical_or->operands()) { |
| selectivity += estimateSelectivityForPredicate(predicate, physical_plan); |
| } |
| return std::min(selectivity, 1.0); |
| } |
| default: |
| break; |
| } |
| return 1.0; |
| } |
| |
| std::size_t StarSchemaSimpleCostModel::getNumDistinctValues( |
| const E::ExprId attribute_id, |
| const P::TableReferencePtr &table_reference) { |
| const CatalogRelation &relation = *table_reference->relation(); |
| const std::vector<E::AttributeReferencePtr> &attributes = table_reference->attribute_list(); |
| for (std::size_t i = 0; i < attributes.size(); ++i) { |
| if (attributes[i]->id() == attribute_id) { |
| std::size_t num_distinct_values = relation.getStatistics().getNumDistinctValues(i); |
| if (num_distinct_values > 0) { |
| return num_distinct_values; |
| } |
| break; |
| } |
| } |
| return estimateCardinalityForTableReference(table_reference); |
| } |
| |
| } // namespace cost |
| } // namespace optimizer |
| } // namespace quickstep |