Hash-Join-Fuse: Feature added and tests modified.
diff --git a/query_optimizer/CMakeLists.txt b/query_optimizer/CMakeLists.txt
index 5e0db44..1e4e346 100644
--- a/query_optimizer/CMakeLists.txt
+++ b/query_optimizer/CMakeLists.txt
@@ -226,6 +226,7 @@
quickstep_queryoptimizer_rules_CollapseSelection
quickstep_queryoptimizer_rules_ExtractCommonSubexpression
quickstep_queryoptimizer_rules_FuseAggregateJoin
+ quickstep_queryoptimizer_rules_FuseHashSelect
quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_Partition
quickstep_queryoptimizer_rules_PruneColumns
diff --git a/query_optimizer/ExecutionGenerator.cpp b/query_optimizer/ExecutionGenerator.cpp
index b0d3c48..5ef58a9 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -939,6 +939,15 @@
query_context_proto_->add_predicates()->CopyFrom(residual_predicate->getProto());
}
+ // Convert the build predicate proto.
+ QueryContext::predicate_id build_predicate_index = QueryContext::kInvalidPredicateId;
+ if (physical_plan->build_predicate()) {
+ build_predicate_index = query_context_proto_->predicates_size();
+
+ unique_ptr<const Predicate> build_predicate(convertPredicate(physical_plan->build_predicate()));
+ query_context_proto_->add_predicates()->MergeFrom(build_predicate->getProto());
+ }
+
// Convert the project expressions proto.
const QueryContext::scalar_group_id project_expressions_group_index =
query_context_proto_->scalar_groups_size();
@@ -966,7 +975,9 @@
const CatalogRelation *probe_relation = probe_relation_info->relation;
// FIXME(quickstep-team): Add support for self-join.
- if (build_relation == probe_relation) {
+ // We check to see if the build_predicate is null as certain queries that
+ // support hash-select fuse will result in the first check being true.
+ if (build_relation == probe_relation && physical_plan->build_predicate() == nullptr) {
THROW_SQL_ERROR() << "Self-join is not supported";
}
@@ -1006,7 +1017,8 @@
build_attribute_ids,
any_build_attributes_nullable,
probe_num_partitions,
- join_hash_table_index));
+ join_hash_table_index,
+ build_predicate_index));
// Create InsertDestination proto.
const CatalogRelation *output_relation = nullptr;
diff --git a/query_optimizer/PhysicalGenerator.cpp b/query_optimizer/PhysicalGenerator.cpp
index 865cd11..0d15a66 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -30,6 +30,7 @@
#include "query_optimizer/rules/CollapseSelection.hpp"
#include "query_optimizer/rules/ExtractCommonSubexpression.hpp"
#include "query_optimizer/rules/FuseAggregateJoin.hpp"
+#include "query_optimizer/rules/FuseHashSelect.hpp"
#include "query_optimizer/rules/InjectJoinFilters.hpp"
#include "query_optimizer/rules/Partition.hpp"
#include "query_optimizer/rules/PruneColumns.hpp"
@@ -67,6 +68,10 @@
"If true, apply an optimization to support partitioned inputs. The "
"optimization may add additional Selection for repartitioning.");
+DEFINE_bool(use_fuse_hash_select, true,
+ "If true, apply an optimization that moves build-side Selection nodes"
+ "into the hash join operator instead.");
+
DEFINE_bool(use_filter_joins, true,
"If true, apply an optimization that strength-reduces HashJoins to "
"FilterJoins (implemented as LIPFilters attached to some anchoring "
@@ -176,6 +181,10 @@
rules.push_back(std::make_unique<PruneColumns>());
}
+ if (FLAGS_use_fuse_hash_select) {
+ rules.emplace_back(new FuseHashSelect());
+ }
+
// NOTE(jianqiao): Adding rules after InjectJoinFilters (or AttachLIPFilters)
// requires extra handling of LIPFilterConfiguration for transformed nodes.
// So currently it is suggested that all the new rules be placed before this
diff --git a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
index e0e3dff..5aec4b4 100644
--- a/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
+++ b/query_optimizer/cost_model/StarSchemaSimpleCostModel.cpp
@@ -179,8 +179,9 @@
std::size_t right_cardinality = estimateCardinality(physical_plan->right());
double left_selectivity = estimateSelectivity(physical_plan->left());
double right_selectivity = estimateSelectivity(physical_plan->right());
- return std::max(static_cast<std::size_t>(left_cardinality * right_selectivity + 0.5),
- static_cast<std::size_t>(right_cardinality * left_selectivity + 0.5));
+ double build_selectivity = estimateSelectivityForPredicate(physical_plan->build_predicate(), physical_plan);
+ return std::max(static_cast<std::size_t>(left_cardinality * right_selectivity * build_selectivity + 0.5),
+ static_cast<std::size_t>(right_cardinality * left_selectivity * build_selectivity + 0.5));
}
std::size_t StarSchemaSimpleCostModel::estimateCardinalityForNestedLoopsJoin(
@@ -295,16 +296,20 @@
estimateNumDistinctValues(attribute_id, hash_join->left());
double right_child_selectivity =
estimateSelectivity(hash_join->right());
+ double build_selectivity =
+ estimateSelectivityForPredicate(hash_join->build_predicate(), hash_join);
return static_cast<std::size_t>(
- left_child_num_distinct_values * right_child_selectivity * filter_selectivity + 0.5);
+ left_child_num_distinct_values * right_child_selectivity * filter_selectivity * build_selectivity + 0.5);
}
if (E::ContainsExprId(hash_join->right()->getOutputAttributes(), attribute_id)) {
std::size_t right_child_num_distinct_values =
estimateNumDistinctValues(attribute_id, hash_join->right());
double left_child_selectivity =
estimateSelectivity(hash_join->left());
+ double build_selectivity =
+ estimateSelectivityForPredicate(hash_join->build_predicate(), hash_join);
return static_cast<std::size_t>(
- right_child_num_distinct_values * left_child_selectivity * filter_selectivity + 0.5);
+ right_child_num_distinct_values * left_child_selectivity * filter_selectivity * build_selectivity + 0.5);
}
}
default:
@@ -351,9 +356,11 @@
std::static_pointer_cast<const P::HashJoin>(physical_plan);
double filter_selectivity =
estimateSelectivityForPredicate(hash_join->residual_predicate(), hash_join);
+ double build_selectivity =
+ estimateSelectivityForPredicate(hash_join->build_predicate(), hash_join);
double child_selectivity =
estimateSelectivity(hash_join->left()) * estimateSelectivity(hash_join->right());
- return filter_selectivity * child_selectivity;
+ return filter_selectivity * child_selectivity * build_selectivity;
}
case P::PhysicalType::kNestedLoopsJoin: {
const P::NestedLoopsJoinPtr &nested_loop_join =
diff --git a/query_optimizer/physical/HashJoin.cpp b/query_optimizer/physical/HashJoin.cpp
index a49bc8e..ca6bf9f 100644
--- a/query_optimizer/physical/HashJoin.cpp
+++ b/query_optimizer/physical/HashJoin.cpp
@@ -57,6 +57,13 @@
referenced_attributes_in_residual.begin(),
referenced_attributes_in_residual.end());
}
+ if (build_predicate_ != nullptr) {
+ const std::vector<expressions::AttributeReferencePtr> referenced_attributes_in_build =
+ build_predicate_->getReferencedAttributes();
+ referenced_attributes.insert(referenced_attributes.end(),
+ referenced_attributes_in_build.begin(),
+ referenced_attributes_in_build.end());
+ }
return referenced_attributes;
}
@@ -79,6 +86,7 @@
left_join_attributes_,
right_join_attributes_,
residual_predicate_,
+ build_predicate_,
new_project_expressions,
join_type_,
has_repartition_,
@@ -105,6 +113,10 @@
non_container_child_field_names->push_back("residual_predicate");
non_container_child_fields->push_back(residual_predicate_);
}
+ if (build_predicate_ != nullptr) {
+ non_container_child_field_names->push_back("build_predicate");
+ non_container_child_fields->push_back(build_predicate_);
+ }
container_child_field_names->push_back("left_join_attributes");
container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(left_join_attributes_));
container_child_field_names->push_back("right_join_attributes");
diff --git a/query_optimizer/physical/HashJoin.hpp b/query_optimizer/physical/HashJoin.hpp
index 2ed83af..1c341df 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -103,6 +103,13 @@
}
/**
+ * @ brief The select predicate for a hash join fuse.
+ */
+ const expressions::PredicatePtr& build_predicate() const {
+ return build_predicate_;
+ }
+
+ /**
* @return Join type of this hash join.
*/
JoinType join_type() const {
@@ -117,6 +124,7 @@
left_join_attributes_,
right_join_attributes_,
residual_predicate_,
+ build_predicate_,
project_expressions(),
join_type_,
has_repartition_,
@@ -129,7 +137,7 @@
PartitionSchemeHeader *partition_scheme_header,
const bool has_repartition = true) const override {
return Create(left(), right(), left_join_attributes_, right_join_attributes_,
- residual_predicate_, project_expressions(), join_type_,
+ residual_predicate_, build_predicate_, project_expressions(), join_type_,
has_repartition, partition_scheme_header);
}
@@ -146,6 +154,7 @@
* @param left_join_attributes The join attributes in the 'left'.
* @param right_join_attributes The join attributes in the 'right'.
* @param residual_predicate Optional filtering predicate evaluated after join.
+ * @param build_predicate Optional select predicate for a hash join fuse.
* @param project_expressions The project expressions.
* @param Join type of this hash join.
* @param has_repartition Whether this node has repartition.
@@ -159,6 +168,7 @@
const std::vector<expressions::AttributeReferencePtr> &left_join_attributes,
const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
const expressions::PredicatePtr &residual_predicate,
+ const expressions::PredicatePtr &build_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions,
const JoinType join_type,
const bool has_repartition = false,
@@ -169,6 +179,7 @@
left_join_attributes,
right_join_attributes,
residual_predicate,
+ build_predicate,
project_expressions,
join_type,
has_repartition,
@@ -191,6 +202,7 @@
const std::vector<expressions::AttributeReferencePtr> &left_join_attributes,
const std::vector<expressions::AttributeReferencePtr> &right_join_attributes,
const expressions::PredicatePtr &residual_predicate,
+ const expressions::PredicatePtr &build_predicate,
const std::vector<expressions::NamedExpressionPtr> &project_expressions,
const JoinType join_type,
const bool has_repartition,
@@ -199,12 +211,14 @@
left_join_attributes_(left_join_attributes),
right_join_attributes_(right_join_attributes),
residual_predicate_(residual_predicate),
+ build_predicate_(build_predicate),
join_type_(join_type) {
}
std::vector<expressions::AttributeReferencePtr> left_join_attributes_;
std::vector<expressions::AttributeReferencePtr> right_join_attributes_;
expressions::PredicatePtr residual_predicate_;
+ expressions::PredicatePtr build_predicate_;
JoinType join_type_;
DISALLOW_COPY_AND_ASSIGN(HashJoin);
diff --git a/query_optimizer/rules/CMakeLists.txt b/query_optimizer/rules/CMakeLists.txt
index 73a80d2..d6c043f 100644
--- a/query_optimizer/rules/CMakeLists.txt
+++ b/query_optimizer/rules/CMakeLists.txt
@@ -26,6 +26,7 @@
ExtractCommonSubexpression.cpp
ExtractCommonSubexpression.hpp)
add_library(quickstep_queryoptimizer_rules_FuseAggregateJoin FuseAggregateJoin.cpp FuseAggregateJoin.hpp)
+add_library(quickstep_queryoptimizer_rules_FuseHashSelect FuseHashSelect.cpp FuseHashSelect.hpp)
add_library(quickstep_queryoptimizer_rules_GenerateJoins GenerateJoins.cpp GenerateJoins.hpp)
add_library(quickstep_queryoptimizer_rules_InjectJoinFilters InjectJoinFilters.cpp InjectJoinFilters.hpp)
add_library(quickstep_queryoptimizer_rules_Partition Partition.cpp Partition.hpp)
@@ -138,6 +139,14 @@
quickstep_queryoptimizer_physical_TopLevelPlan
quickstep_queryoptimizer_rules_BottomUpRule
quickstep_utility_Macros)
+target_link_libraries(quickstep_queryoptimizer_rules_FuseHashSelect
+ quickstep_queryoptimizer_expressions_Predicate
+ quickstep_queryoptimizer_physical_HashJoin
+ quickstep_queryoptimizer_physical_PatternMatcher
+ quickstep_queryoptimizer_physical_Physical
+ quickstep_queryoptimizer_physical_PhysicalType
+ quickstep_queryoptimizer_physical_Selection
+ quickstep_queryoptimizer_rules_TopDownRule)
target_link_libraries(quickstep_queryoptimizer_rules_GenerateJoins
glog
quickstep_queryoptimizer_expressions_AttributeReference
@@ -415,6 +424,7 @@
quickstep_queryoptimizer_rules_CollapseSelection
quickstep_queryoptimizer_rules_ExtractCommonSubexpression
quickstep_queryoptimizer_rules_FuseAggregateJoin
+ quickstep_queryoptimizer_rules_FuseHashSelect
quickstep_queryoptimizer_rules_GenerateJoins
quickstep_queryoptimizer_rules_InjectJoinFilters
quickstep_queryoptimizer_rules_Partition
diff --git a/query_optimizer/rules/ExtractCommonSubexpression.cpp b/query_optimizer/rules/ExtractCommonSubexpression.cpp
index 4c4d33e..eb4adcf 100644
--- a/query_optimizer/rules/ExtractCommonSubexpression.cpp
+++ b/query_optimizer/rules/ExtractCommonSubexpression.cpp
@@ -171,6 +171,7 @@
hash_join->left_join_attributes(),
hash_join->right_join_attributes(),
hash_join->residual_predicate(),
+ hash_join->build_predicate(),
new_expressions,
hash_join->join_type());
}
diff --git a/query_optimizer/rules/FuseHashSelect.cpp b/query_optimizer/rules/FuseHashSelect.cpp
new file mode 100644
index 0000000..b260ee9
--- /dev/null
+++ b/query_optimizer/rules/FuseHashSelect.cpp
@@ -0,0 +1,88 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#include "query_optimizer/rules/FuseHashSelect.hpp"
+
+#include <algorithm>
+#include <cstddef>
+#include <unordered_set>
+#include <vector>
+
+#include "query_optimizer/expressions/Predicate.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
+#include "query_optimizer/physical/PatternMatcher.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/PhysicalType.hpp"
+#include "query_optimizer/physical/Selection.hpp"
+
+#include "glog/logging.h"
+
+namespace quickstep {
+namespace optimizer {
+
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
+P::PhysicalPtr FuseHashSelect::applyToNode(
+ const P::PhysicalPtr &node) {
+ P::HashJoinPtr hash_join;
+
+ // Check to see if the join is a hash join, if the hash join is an inner
+ // join.. We also check that there are no partitions.
+ // TODO(dbacon): Add support for other join types.
+
+ if ((!P::SomeHashJoin::MatchesWithConditionalCast(node, &hash_join)) ||
+ hash_join->getOutputPartitionSchemeHeader() != nullptr) {
+ return node;
+ }
+
+ // Get the join attributes from the build side.
+
+ P::PhysicalPtr right_child = hash_join->right();
+ const std::vector<E::AttributeReferencePtr> &right_join_attributes =
+ hash_join->right_join_attributes();
+ E::PredicatePtr build_predicate = nullptr;
+
+ // Check that the build side is a Selection and that the join attributes match up.
+ // If so, set the new hash join build side to the Selection input and the build predicate
+ // to the selection's filter.
+
+ P::SelectionPtr selection;
+ if (P::SomeSelection::MatchesWithConditionalCast(right_child, &selection)) {
+ if (E::SubsetOfExpressions(right_join_attributes,
+ selection->input()->getOutputAttributes())) {
+ right_child = selection->input();
+ build_predicate = selection->filter_predicate();
+ }
+ }
+
+ return P::HashJoin::Create(hash_join->left(),
+ right_child,
+ hash_join->left_join_attributes(),
+ right_join_attributes,
+ hash_join->residual_predicate(),
+ build_predicate,
+ hash_join->project_expressions(),
+ hash_join->join_type(),
+ hash_join->hasRepartition(),
+ hash_join->cloneOutputPartitionSchemeHeader());
+}
+
+} // namespace optimizer
+} // namespace quickstep
diff --git a/query_optimizer/rules/FuseHashSelect.hpp b/query_optimizer/rules/FuseHashSelect.hpp
new file mode 100644
index 0000000..e88530a
--- /dev/null
+++ b/query_optimizer/rules/FuseHashSelect.hpp
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ **/
+
+#ifndef QUICKSTEP_QUERY_OPTIMIZER_RULES_FUSE_HASH_SELECT_HPP_
+#define QUICKSTEP_QUERY_OPTIMIZER_RULES_FUSE_HASH_SELECT_HPP_
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/rules/TopDownRule.hpp"
+
+namespace quickstep {
+namespace optimizer {
+
+/** \addtogroup OptimizerRules
+ * @{
+ */
+
+/**
+ * @brief Rule that applies to a physical plan to fuse a select node in the
+ * build predicate of a hash join to the join.
+ */
+class FuseHashSelect : public TopDownRule<physical::Physical> {
+ public:
+ /**
+ * @brief Constructor.
+ */
+ FuseHashSelect() {}
+
+ ~FuseHashSelect() override {}
+
+ std::string getName() const override {
+ return "FuseHashSelect";
+ }
+
+ protected:
+ physical::PhysicalPtr applyToNode(const physical::PhysicalPtr &node) override;
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(FuseHashSelect);
+};
+
+} // namespace optimizer
+} // namespace quickstep
+
+#endif // QUICKSTEP_QUERY_OPTIMIZER_RULES_FUSE_HASH_SELECT_HPP_
diff --git a/query_optimizer/rules/InjectJoinFilters.cpp b/query_optimizer/rules/InjectJoinFilters.cpp
index 0aa2f5b..34d0f83 100644
--- a/query_optimizer/rules/InjectJoinFilters.cpp
+++ b/query_optimizer/rules/InjectJoinFilters.cpp
@@ -165,13 +165,17 @@
P::PhysicalPtr build_child = new_children[1];
E::PredicatePtr build_side_filter_predicate = nullptr;
P::SelectionPtr selection;
- if (P::SomeSelection::MatchesWithConditionalCast(build_child, &selection) &&
- E::SubsetOfExpressions(hash_join->right_join_attributes(),
- selection->input()->getOutputAttributes())) {
- build_child = selection->input();
- build_side_filter_predicate = selection->filter_predicate();
+ if (hash_join->build_predicate() == nullptr) {
+ if (P::SomeSelection::MatchesWithConditionalCast(build_child, &selection) &&
+ E::SubsetOfExpressions(hash_join->right_join_attributes(),
+ selection->input()->getOutputAttributes())) {
+ build_child = selection->input();
+ build_side_filter_predicate = selection->filter_predicate();
+ }
+ } else {
+ build_child = hash_join->right();
+ build_side_filter_predicate = hash_join->build_predicate();
}
-
return P::FilterJoin::Create(new_children[0],
build_child,
hash_join->left_join_attributes(),
diff --git a/query_optimizer/rules/Partition.cpp b/query_optimizer/rules/Partition.cpp
index 5f68cd3..fdb5581 100644
--- a/query_optimizer/rules/Partition.cpp
+++ b/query_optimizer/rules/Partition.cpp
@@ -497,6 +497,7 @@
if (left_needs_repartition || right_needs_repartition) {
return P::HashJoin::Create(left, right, left_join_attributes, right_join_attributes,
hash_join->residual_predicate(),
+ hash_join->build_predicate(),
hash_join->project_expressions(),
hash_join->join_type(),
false /* has_repartition */,
diff --git a/query_optimizer/rules/ReduceGroupByAttributes.cpp b/query_optimizer/rules/ReduceGroupByAttributes.cpp
index dcdd27a..ff9180c 100644
--- a/query_optimizer/rules/ReduceGroupByAttributes.cpp
+++ b/query_optimizer/rules/ReduceGroupByAttributes.cpp
@@ -206,6 +206,7 @@
{probe_attribute},
{build_attribute},
nullptr,
+ nullptr,
project_expressions,
P::HashJoin::JoinType::kInnerJoin);
}
diff --git a/query_optimizer/rules/ReorderColumns.cpp b/query_optimizer/rules/ReorderColumns.cpp
index 4783a8d..5f52938 100644
--- a/query_optimizer/rules/ReorderColumns.cpp
+++ b/query_optimizer/rules/ReorderColumns.cpp
@@ -180,6 +180,7 @@
old_node->left_join_attributes(),
old_node->right_join_attributes(),
old_node->residual_predicate(),
+ old_node->build_predicate(),
project_expressions,
old_node->join_type());
break;
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 5906b98..340875d 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -315,6 +315,7 @@
probe_attributes,
build_attributes,
nullptr,
+ nullptr,
output_attributes,
P::HashJoin::JoinType::kInnerJoin);
@@ -347,6 +348,7 @@
probe_attributes,
build_attributes,
residual_predicate,
+ nullptr,
project_expressions,
P::HashJoin::JoinType::kInnerJoin);
}
diff --git a/query_optimizer/rules/SwapProbeBuild.cpp b/query_optimizer/rules/SwapProbeBuild.cpp
index d707e01..12dd27d 100644
--- a/query_optimizer/rules/SwapProbeBuild.cpp
+++ b/query_optimizer/rules/SwapProbeBuild.cpp
@@ -58,6 +58,7 @@
right_join_attributes,
left_join_attributes,
hash_join->residual_predicate(),
+ hash_join->build_predicate(),
hash_join->project_expressions(),
hash_join->join_type());
LOG_APPLYING_RULE(input, output);
diff --git a/query_optimizer/rules/tests/PruneColumns_unittest.cpp b/query_optimizer/rules/tests/PruneColumns_unittest.cpp
index 8781750..02b727d 100644
--- a/query_optimizer/rules/tests/PruneColumns_unittest.cpp
+++ b/query_optimizer/rules/tests/PruneColumns_unittest.cpp
@@ -95,6 +95,7 @@
{relation_attribute_reference_0_0_},
{relation_attribute_reference_1_0_},
E::PredicatePtr(),
+ E::PredicatePtr(),
project_expressions_with_redundancy,
P::HashJoin::JoinType::kInnerJoin);
@@ -123,6 +124,7 @@
{relation_attribute_reference_0_0_},
{relation_attribute_reference_1_0_},
E::PredicatePtr(),
+ E::PredicatePtr(),
pruned_project_expressions_for_join,
P::HashJoin::JoinType::kInnerJoin);
const P::SelectionPtr new_selection = std::static_pointer_cast<const P::Selection>(
diff --git a/query_optimizer/strategy/Join.cpp b/query_optimizer/strategy/Join.cpp
index f7d6d24..f3e6b8d 100644
--- a/query_optimizer/strategy/Join.cpp
+++ b/query_optimizer/strategy/Join.cpp
@@ -371,6 +371,7 @@
left_join_attributes,
right_join_attributes,
residual_predicate,
+ E::PredicatePtr() /* build_predicate */,
project_expressions,
join_type);
}
diff --git a/query_optimizer/strategy/tests/Join_unittest.cpp b/query_optimizer/strategy/tests/Join_unittest.cpp
index 87a8a97..ad34bec 100644
--- a/query_optimizer/strategy/tests/Join_unittest.cpp
+++ b/query_optimizer/strategy/tests/Join_unittest.cpp
@@ -96,6 +96,7 @@
{relation_attribute_reference_0_0_},
{relation_attribute_reference_1_0_},
E::PredicatePtr(),
+ E::PredicatePtr(),
project_expressions,
P::HashJoin::JoinType::kInnerJoin);
}
@@ -142,6 +143,7 @@
{relation_attribute_reference_0_0_},
{relation_attribute_reference_1_0_},
E::PredicatePtr(),
+ E::PredicatePtr(),
logical_project_0_->project_expressions(),
P::HashJoin::JoinType::kInnerJoin);
EXPECT_CORRECT_PHYSICAL();
@@ -183,6 +185,7 @@
{relation_attribute_reference_1_0_,
relation_attribute_reference_1_1_},
filter_predicate_0_,
+ E::PredicatePtr(),
logical_project_1_->project_expressions(),
P::HashJoin::JoinType::kInnerJoin);
EXPECT_CORRECT_PHYSICAL();
@@ -212,6 +215,7 @@
{relation_attribute_reference_0_0_},
{relation_attribute_reference_1_0_},
filter_predicate_0_,
+ E::PredicatePtr(),
physical_nested_loops_join_->project_expressions(),
P::HashJoin::JoinType::kInnerJoin);
EXPECT_CORRECT_PHYSICAL();
@@ -248,6 +252,7 @@
{relation_attribute_reference_0_0_},
{relation_attribute_reference_1_0_},
E::PredicatePtr(),
+ E::PredicatePtr(),
{alias_on_alias_reference_after_pullup} /* project_expressions */,
P::HashJoin::JoinType::kInnerJoin);
EXPECT_CORRECT_PHYSICAL();
@@ -286,6 +291,7 @@
{E::ToRef(alias_add_literal_0_)},
{relation_attribute_reference_1_0_},
E::PredicatePtr(),
+ E::PredicatePtr(),
project_expressions,
P::HashJoin::JoinType::kInnerJoin);
EXPECT_CORRECT_PHYSICAL();
diff --git a/query_optimizer/tests/physical_generator/Join.test b/query_optimizer/tests/physical_generator/Join.test
index 45ea0ed..d0b9e4b 100644
--- a/query_optimizer/tests/physical_generator/Join.test
+++ b/query_optimizer/tests/physical_generator/Join.test
@@ -331,16 +331,12 @@
| | | | +-AttributeReference[id=4,name=w,relation=b,type=Int]
| | | +-right_join_attributes=
| | | +-AttributeReference[id=0,name=w,relation=a,type=Int]
-| | +-right=Selection[has_repartition=false]
-| | | +-input=TableReference[relation=c]
-| | | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
-| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
-| | | +-filter_predicate=Greater
-| | | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
-| | | | +-Literal[value=20,type=Int]
-| | | +-project_expressions=
-| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
-| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-right=TableReference[relation=c]
+| | | +-AttributeReference[id=6,name=x,relation=c,type=Int]
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | +-build_predicate=Greater
+| | | +-AttributeReference[id=7,name=y,relation=c,type=Int]
+| | | +-Literal[value=20,type=Int]
| | +-project_expressions=
| | | +-AttributeReference[id=5,name=x,relation=b,type=Int]
| | | +-AttributeReference[id=0,name=w,relation=a,type=Int NULL]
@@ -377,16 +373,12 @@
| +-left=TableReference[relation=b]
| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
-| +-right=Selection[has_repartition=false]
-| | +-input=TableReference[relation=c]
-| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
-| | | +-AttributeReference[id=3,name=y,relation=c,type=Int]
-| | +-filter_predicate=Greater
-| | | +-AttributeReference[id=2,name=x,relation=c,type=Int]
-| | | +-Literal[value=10,type=Int]
-| | +-project_expressions=
-| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
-| | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| +-right=TableReference[relation=c]
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | +-AttributeReference[id=3,name=y,relation=c,type=Int]
+| +-build_predicate=Greater
+| | +-AttributeReference[id=2,name=x,relation=c,type=Int]
+| | +-Literal[value=10,type=Int]
| +-project_expressions=
| | +-AttributeReference[id=0,name=w,relation=b,type=Int]
| | +-AttributeReference[id=1,name=x,relation=b,type=Int]
diff --git a/query_optimizer/tests/physical_generator/Select.test b/query_optimizer/tests/physical_generator/Select.test
index 8bb6599..720d248 100644
--- a/query_optimizer/tests/physical_generator/Select.test
+++ b/query_optimizer/tests/physical_generator/Select.test
@@ -2113,23 +2113,20 @@
| | +-AttributeReference[id=3,name=double_col,relation=test,type=Double NULL]
| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
| | +-AttributeReference[id=5,name=vchar_col,relation=test,type=VarChar(20) NULL]
-| +-right=Selection[has_repartition=false]
-| | +-input=TableReference[relation=Test,alias=test]
-| | | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
-| | | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
-| | | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
-| | | +-AttributeReference[id=9,name=double_col,relation=test,type=Double NULL]
-| | | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
-| | | +-AttributeReference[id=11,name=vchar_col,relation=test,
-| | | type=VarChar(20) NULL]
-| | +-filter_predicate=InValueList
-| | | +-test_expression=AttributeReference[id=7,name=long_col,relation=test,
-| | | | type=Long]
-| | | +-match_expressions=
-| | | +-Literal[value=1,type=Long]
-| | | +-Literal[value=2,type=Long]
-| | +-project_expressions=
-| | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| +-right=TableReference[relation=Test,alias=test]
+| | +-AttributeReference[id=6,name=int_col,relation=test,type=Int NULL]
+| | +-AttributeReference[id=7,name=long_col,relation=test,type=Long]
+| | +-AttributeReference[id=8,name=float_col,relation=test,type=Float]
+| | +-AttributeReference[id=9,name=double_col,relation=test,type=Double NULL]
+| | +-AttributeReference[id=10,name=char_col,relation=test,type=Char(20)]
+| | +-AttributeReference[id=11,name=vchar_col,relation=test,
+| | type=VarChar(20) NULL]
+| +-build_predicate=InValueList
+| | +-test_expression=AttributeReference[id=7,name=long_col,relation=test,
+| | | type=Long]
+| | +-match_expressions=
+| | +-Literal[value=1,type=Long]
+| | +-Literal[value=2,type=Long]
| +-project_expressions=
| | +-AttributeReference[id=4,name=char_col,relation=test,type=Char(20)]
| +-left_join_attributes=
diff --git a/relational_operators/BuildHashOperator.cpp b/relational_operators/BuildHashOperator.cpp
index 768c141..3a67993 100644
--- a/relational_operators/BuildHashOperator.cpp
+++ b/relational_operators/BuildHashOperator.cpp
@@ -70,6 +70,10 @@
tmb::MessageBus *bus) {
DCHECK(query_context != nullptr);
+ // Get the build predicate from the query context if it exists.
+ const Predicate *predicate =
+ query_context->getPredicate(build_predicate_index_);
+
if (input_relation_is_stored_) {
if (started_) {
return true;
@@ -80,7 +84,7 @@
for (const block_id block : input_relation_block_ids_[part_id]) {
container->addNormalWorkOrder(
new BuildHashWorkOrder(query_id_, input_relation_, join_key_attributes_, any_join_key_attributes_nullable_,
- part_id, block, hash_table, storage_manager,
+ part_id, block, predicate, hash_table, storage_manager,
CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)),
op_index_);
}
@@ -95,7 +99,7 @@
container->addNormalWorkOrder(
new BuildHashWorkOrder(query_id_, input_relation_, join_key_attributes_, any_join_key_attributes_nullable_,
part_id, input_relation_block_ids_[part_id][num_workorders_generated_[part_id]],
- hash_table, storage_manager,
+ predicate, hash_table, storage_manager,
CreateLIPFilterBuilderHelper(lip_deployment_index_, query_context)),
op_index_);
++num_workorders_generated_[part_id];
@@ -143,6 +147,7 @@
proto->SetExtension(serialization::BuildHashWorkOrder::any_join_key_attributes_nullable,
any_join_key_attributes_nullable_);
proto->SetExtension(serialization::BuildHashWorkOrder::join_hash_table_index, hash_table_index_);
+ proto->SetExtension(serialization::BuildHashWorkOrder::build_predicate_index, build_predicate_index_);
proto->SetExtension(serialization::BuildHashWorkOrder::partition_id, part_id);
proto->SetExtension(serialization::BuildHashWorkOrder::block_id, block);
proto->SetExtension(serialization::BuildHashWorkOrder::lip_deployment_index, lip_deployment_index_);
@@ -158,8 +163,25 @@
BlockReference block(
storage_manager_->getBlock(build_block_id_, input_relation_));
+ // Create the ValueAccessor to be initialized later.
+ std::unique_ptr<ValueAccessor> accessor;
+
+ // If there is a build predicate, find the tuples that match it in the block.
+ std::unique_ptr<TupleIdSequence> predicate_matches;
+ if (predicate_ != nullptr) {
+ predicate_matches.reset(
+ block->getMatchesForPredicate(predicate_));
+ }
+
+ // Use the tuples from the build predicate to fill a ValueAccessor, else
+ // initialize it to a default state for normal use.
TupleReferenceGenerator generator(build_block_id_);
- std::unique_ptr<ValueAccessor> accessor(block->getTupleStorageSubBlock().createValueAccessor());
+ if (predicate_ != nullptr) {
+ accessor.reset(
+ block->getTupleStorageSubBlock().createValueAccessor(predicate_matches.get()));
+ } else {
+ accessor.reset(block->getTupleStorageSubBlock().createValueAccessor());
+ }
// Build LIPFilters if enabled.
if (lip_filter_builder_ != nullptr) {
diff --git a/relational_operators/BuildHashOperator.hpp b/relational_operators/BuildHashOperator.hpp
index dfb6dfe..e4a2783 100644
--- a/relational_operators/BuildHashOperator.hpp
+++ b/relational_operators/BuildHashOperator.hpp
@@ -81,6 +81,7 @@
* @param hash_table_index The index of the JoinHashTable in QueryContext.
* The HashTable's key Type(s) should be the Type(s) of the
* join_key_attributes in input_relation.
+ * @param build_predicate_index The index of the build_predicate in QueryContext.
**/
BuildHashOperator(const std::size_t query_id,
const CatalogRelation &input_relation,
@@ -88,7 +89,8 @@
const std::vector<attribute_id> &join_key_attributes,
const bool any_join_key_attributes_nullable,
const std::size_t num_partitions,
- const QueryContext::join_hash_table_id hash_table_index)
+ const QueryContext::join_hash_table_id hash_table_index,
+ const QueryContext::predicate_id build_predicate_index)
: RelationalOperator(query_id, num_partitions),
input_relation_(input_relation),
input_relation_is_stored_(input_relation_is_stored),
@@ -96,6 +98,7 @@
any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
is_broadcast_join_(num_partitions > 1u && !input_relation.hasPartitionScheme()),
hash_table_index_(hash_table_index),
+ build_predicate_index_(build_predicate_index),
input_relation_block_ids_(num_partitions),
num_workorders_generated_(num_partitions),
started_(false) {
@@ -164,6 +167,7 @@
const bool any_join_key_attributes_nullable_;
const bool is_broadcast_join_;
const QueryContext::join_hash_table_id hash_table_index_;
+ const QueryContext::predicate_id build_predicate_index_;
// The index is the partition id.
std::vector<BlocksInPartition> input_relation_block_ids_;
@@ -189,6 +193,7 @@
* @param any_join_key_attributes_nullable If any attribute is nullable.
* @param part_id The partition id of 'input_relation'.
* @param build_block_id The block id.
+ * @param predicate The Predicate to use.
* @param hash_table The JoinHashTable to use.
* @param storage_manager The StorageManager to use.
* @param lip_filter_builder The attached LIP filter builer.
@@ -199,6 +204,7 @@
const bool any_join_key_attributes_nullable,
const partition_id part_id,
const block_id build_block_id,
+ const Predicate *predicate,
JoinHashTable *hash_table,
StorageManager *storage_manager,
LIPFilterBuilder *lip_filter_builder)
@@ -207,6 +213,7 @@
join_key_attributes_(join_key_attributes),
any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
build_block_id_(build_block_id),
+ predicate_(predicate),
hash_table_(DCHECK_NOTNULL(hash_table)),
storage_manager_(DCHECK_NOTNULL(storage_manager)),
lip_filter_builder_(lip_filter_builder) {}
@@ -221,6 +228,7 @@
* @param any_join_key_attributes_nullable If any attribute is nullable.
* @param part_id The partition id of 'input_relation'.
* @param build_block_id The block id.
+ * @param predicate The Predicate to use.
* @param hash_table The JoinHashTable to use.
* @param storage_manager The StorageManager to use.
* @param lip_filter_builder The attached LIP filter builer.
@@ -231,6 +239,7 @@
const bool any_join_key_attributes_nullable,
const partition_id part_id,
const block_id build_block_id,
+ const Predicate *predicate,
JoinHashTable *hash_table,
StorageManager *storage_manager,
LIPFilterBuilder *lip_filter_builder)
@@ -239,6 +248,7 @@
join_key_attributes_(std::move(join_key_attributes)),
any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
build_block_id_(build_block_id),
+ predicate_(predicate),
hash_table_(DCHECK_NOTNULL(hash_table)),
storage_manager_(DCHECK_NOTNULL(storage_manager)),
lip_filter_builder_(lip_filter_builder) {}
@@ -256,6 +266,7 @@
const std::vector<attribute_id> join_key_attributes_;
const bool any_join_key_attributes_nullable_;
const block_id build_block_id_;
+ const Predicate *predicate_;
JoinHashTable *hash_table_;
StorageManager *storage_manager_;
diff --git a/relational_operators/WorkOrder.proto b/relational_operators/WorkOrder.proto
index b84e758..15fde92 100644
--- a/relational_operators/WorkOrder.proto
+++ b/relational_operators/WorkOrder.proto
@@ -82,7 +82,7 @@
}
}
-// Next tag: 40.
+// Next tag: 41.
message BuildHashWorkOrder {
extend WorkOrder {
// All required.
@@ -94,6 +94,7 @@
optional fixed64 block_id = 36;
optional int32 lip_deployment_index = 37;
repeated uint32 lip_filter_indexes = 39;
+ optional uint32 build_predicate_index = 40;
}
}
diff --git a/relational_operators/WorkOrderFactory.cpp b/relational_operators/WorkOrderFactory.cpp
index 7f11e3e..3c52f67 100644
--- a/relational_operators/WorkOrderFactory.cpp
+++ b/relational_operators/WorkOrderFactory.cpp
@@ -163,6 +163,8 @@
proto.GetExtension(serialization::BuildHashWorkOrder::any_join_key_attributes_nullable),
part_id,
proto.GetExtension(serialization::BuildHashWorkOrder::block_id),
+ query_context->getPredicate(
+ proto.GetExtension(serialization::BuildHashWorkOrder::build_predicate_index)),
query_context->getJoinHashTable(
proto.GetExtension(serialization::BuildHashWorkOrder::join_hash_table_index), part_id),
storage_manager,
diff --git a/relational_operators/tests/HashJoinOperator_unittest.cpp b/relational_operators/tests/HashJoinOperator_unittest.cpp
index cfd4314..eeeca4b 100644
--- a/relational_operators/tests/HashJoinOperator_unittest.cpp
+++ b/relational_operators/tests/HashJoinOperator_unittest.cpp
@@ -427,7 +427,8 @@
std::vector<attribute_id>(1, dim_col_long.getID()),
dim_col_long.getType().isNullable(),
kSinglePartition,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create the prober operator with one selection attribute.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -573,7 +574,8 @@
std::vector<attribute_id>(1, dim_col_int.getID()),
dim_col_int.getType().isNullable(),
kSinglePartition,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create the prober operator with two selection attributes.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -738,7 +740,8 @@
std::vector<attribute_id>(1, dim_col_char.getID()),
dim_col_char.getType().isNullable(),
kSinglePartition,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create prober operator with one selection attribute.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -877,7 +880,8 @@
std::vector<attribute_id>(1, dim_col_varchar.getID()),
dim_col_varchar.getType().isNullable(),
kSinglePartition,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create prober operator with two selection attributes.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -1052,7 +1056,8 @@
dim_key_attrs,
dim_col_long.getType().isNullable() || dim_col_varchar.getType().isNullable(),
kSinglePartition,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create the prober operator with two selection attributes.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -1232,7 +1237,8 @@
dim_key_attrs,
dim_col_long.getType().isNullable() || dim_col_varchar.getType().isNullable(),
kSinglePartition,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create prober operator with two selection attributes.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -1425,7 +1431,8 @@
{ dim_col_long.getID() },
dim_col_long.getType().isNullable(),
kMultiplePartitions,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create the prober operator with one selection attribute.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -1566,7 +1573,8 @@
{ dim_col_long.getID(), dim_col_varchar.getID() },
dim_col_long.getType().isNullable() || dim_col_varchar.getType().isNullable(),
kMultiplePartitions,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create the prober operator with two selection attributes.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();
@@ -1737,7 +1745,8 @@
{ dim_col_long.getID(), dim_col_varchar.getID() },
dim_col_long.getType().isNullable() || dim_col_varchar.getType().isNullable(),
kMultiplePartitions,
- join_hash_table_index));
+ join_hash_table_index,
+ QueryContext::kInvalidPredicateId));
// Create prober operator with two selection attributes.
const QueryContext::scalar_group_id selection_index = query_context_proto.scalar_groups_size();