blob: 5a1f295f5586b5141cf496df3ac0b2788de3140c [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
#include <cstddef>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/physical/Physical.hpp"
#include "query_optimizer/physical/TableReference.hpp"
#include "query_optimizer/rules/Rule.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
namespace optimizer {
class OptimizerContext;
* @brief Rule that applies to a physical plan to reduce the number of group-by
* attributes for Aggregate nodes (to improve performance) by pulling
* joins up the aggregations.
* For example, let R be a relation with PRIMARY KEY x and attributes y, z. Let
* S be a relation with FOREIGN KEY u refering to R(x) and attribute v. Then the
* optimization rule will transform the physical plan:
* Aggregate(
* [input relation]: HashJoin(
* [probe relation]: S
* [build relation]: R
* [join expression]: S.u = R.x
* [project attributes]: v, x, y, z
* )
* [aggregate expression]: SUM(v) AS sum_v
* [group-by attributes]: x, y, z
* )
* into:
* HashJoin(
* [probe relation]: Aggregate(
* [input relation]: S
* [aggregate expression]: SUM(v) AS sum_v
* [group-by attribute]: u
* ) AS T
* [build relation]: R
* [join expression]: T.u = R.x
* [project attributes]: sum_v, x, y, z
* )
class ReduceGroupByAttributes : public Rule<physical::Physical> {
* @brief Constructor.
* @param optimizer_context The optimizer context.
explicit ReduceGroupByAttributes(OptimizerContext *optimizer_context)
: optimizer_context_(optimizer_context) {}
~ReduceGroupByAttributes() override {}
std::string getName() const override {
return "ReduceGroupByAttributes";
physical::PhysicalPtr apply(const physical::PhysicalPtr &input) override;
struct AttributeInfo {
AttributeInfo(const expressions::AttributeReferencePtr &attribute_in,
const bool is_unique_in,
const bool is_fixed_length_in,
const std::size_t maximum_size_in)
: attribute(attribute_in),
maximum_size(maximum_size_in) {}
// In the situation that there are multiple attributes that can serve as the
// group-by key, we define an ordering based on aggregation performance (e.g.
// it is faster to do aggregation with a fix-length attribute as the group-by
// key than with a variable-length attribute).
inline static bool IsBetterThan(const AttributeInfo *lhs,
const AttributeInfo *rhs) {
if (lhs->is_unique != rhs->is_unique) {
return lhs->is_unique;
if (lhs->is_fixed_length != rhs->is_fixed_length) {
return lhs->is_fixed_length;
if (lhs->maximum_size != rhs->maximum_size) {
return lhs->maximum_size < rhs->maximum_size;
return lhs->attribute->id() < rhs->attribute->id();
const expressions::AttributeReferencePtr attribute;
const bool is_unique;
const bool is_fixed_length;
const std::size_t maximum_size;
physical::PhysicalPtr applyInternal(const physical::PhysicalPtr &input);
physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &input);
OptimizerContext *optimizer_context_;
std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
// Maps an attribute's id to the TableReference that generates the attribute.
expressions::AttributeReferencePtr>> source_;
} // namespace optimizer
} // namespace quickstep