| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| **/ |
| |
| #ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_ |
| #define QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_ |
| |
| #include <cstddef> |
| #include <memory> |
| #include <string> |
| #include <unordered_map> |
| #include <utility> |
| |
| #include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp" |
| #include "query_optimizer/expressions/AttributeReference.hpp" |
| #include "query_optimizer/expressions/ExprId.hpp" |
| #include "query_optimizer/physical/Physical.hpp" |
| #include "query_optimizer/physical/TableReference.hpp" |
| #include "query_optimizer/rules/Rule.hpp" |
| #include "utility/Macros.hpp" |
| |
| namespace quickstep { |
| namespace optimizer { |
| |
| class OptimizerContext; |
| |
| /** |
| * @brief Rule that applies to a physical plan to reduce the number of group-by |
| * attributes for Aggregate nodes (to improve performance) by pulling |
| * joins up the aggregations. |
| * |
| * For example, let R be a relation with PRIMARY KEY x and attributes y, z. Let |
| * S be a relation with FOREIGN KEY u refering to R(x) and attribute v. Then the |
| * optimization rule will transform the physical plan: |
| * Aggregate( |
| * [input relation]: HashJoin( |
| * [probe relation]: S |
| * [build relation]: R |
| * [join expression]: S.u = R.x |
| * [project attributes]: v, x, y, z |
| * ) |
| * [aggregate expression]: SUM(v) AS sum_v |
| * [group-by attributes]: x, y, z |
| * ) |
| * |
| * into: |
| * HashJoin( |
| * [probe relation]: Aggregate( |
| * [input relation]: S |
| * [aggregate expression]: SUM(v) AS sum_v |
| * [group-by attribute]: u |
| * ) AS T |
| * [build relation]: R |
| * [join expression]: T.u = R.x |
| * [project attributes]: sum_v, x, y, z |
| * ) |
| */ |
| class ReduceGroupByAttributes : public Rule<physical::Physical> { |
| public: |
| /** |
| * @brief Constructor. |
| * |
| * @param optimizer_context The optimizer context. |
| */ |
| explicit ReduceGroupByAttributes(OptimizerContext *optimizer_context) |
| : optimizer_context_(optimizer_context) {} |
| |
| ~ReduceGroupByAttributes() override {} |
| |
| std::string getName() const override { |
| return "ReduceGroupByAttributes"; |
| } |
| |
| physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override; |
| |
| private: |
| struct AttributeInfo { |
| AttributeInfo(const expressions::AttributeReferencePtr &attribute_in, |
| const bool is_unique_in, |
| const bool is_fixed_length_in, |
| const std::size_t maximum_size_in) |
| : attribute(attribute_in), |
| is_unique(is_unique_in), |
| is_fixed_length(is_fixed_length_in), |
| maximum_size(maximum_size_in) {} |
| |
| // In the situation that there are multiple attributes that can serve as the |
| // group-by key, we define an ordering based on aggregation performance (e.g. |
| // it is faster to do aggregation with a fix-length attribute as the group-by |
| // key than with a variable-length attribute). |
| inline static bool IsBetterThan(const AttributeInfo *lhs, |
| const AttributeInfo *rhs) { |
| if (lhs->is_unique != rhs->is_unique) { |
| return lhs->is_unique; |
| } |
| if (lhs->is_fixed_length != rhs->is_fixed_length) { |
| return lhs->is_fixed_length; |
| } |
| if (lhs->maximum_size != rhs->maximum_size) { |
| return lhs->maximum_size < rhs->maximum_size; |
| } |
| return lhs->attribute->id() < rhs->attribute->id(); |
| } |
| |
| const expressions::AttributeReferencePtr attribute; |
| const bool is_unique; |
| const bool is_fixed_length; |
| const std::size_t maximum_size; |
| }; |
| |
| physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input); |
| physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input); |
| |
| OptimizerContext *optimizer_context_; |
| std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_; |
| |
| // Maps an attribute's id to the TableReference that generates the attribute. |
| std::unordered_map<expressions::ExprId, |
| std::pair<physical::TableReferencePtr, |
| expressions::AttributeReferencePtr>> source_; |
| |
| DISALLOW_COPY_AND_ASSIGN(ReduceGroupByAttributes); |
| }; |
| |
| } // namespace optimizer |
| } // namespace quickstep |
| |
| #endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_REDUCE_GROUP_BY_ATTRIBUTES_HPP_ |