QUICKSTEP-125: Fixed the non-determinism in JoinReordering due to pointer cmp.
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index 1007dd5..78c0caf 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -30,6 +30,7 @@
#include "query_optimizer/cost_model/StarSchemaSimpleCostModel.hpp"
#include "query_optimizer/expressions/AttributeReference.hpp"
+#include "query_optimizer/expressions/ExprId.hpp"
#include "query_optimizer/expressions/NamedExpression.hpp"
#include "query_optimizer/expressions/PatternMatcher.hpp"
#include "query_optimizer/physical/Aggregate.hpp"
@@ -68,7 +69,7 @@
bool is_valid_cascading_hash_join = false;
if (hash_join->residual_predicate() == nullptr) {
is_valid_cascading_hash_join = true;
- for (const E::NamedExpressionPtr expr : hash_join->project_expressions()) {
+ for (const E::NamedExpressionPtr &expr : hash_join->project_expressions()) {
if (!E::SomeAttributeReference::Matches(expr)) {
is_valid_cascading_hash_join = false;
break;
@@ -95,8 +96,8 @@
// Gather join attribute pairs.
for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) {
- const std::size_t left_attr_id = hash_join->left_join_attributes()[i]->id();
- const std::size_t right_attr_id = hash_join->right_join_attributes()[i]->id();
+ const E::ExprId left_attr_id = hash_join->left_join_attributes()[i]->id();
+ const E::ExprId right_attr_id = hash_join->right_join_attributes()[i]->id();
join_group->join_attribute_pairs.emplace_back(left_attr_id, right_attr_id);
}
@@ -150,7 +151,7 @@
std::vector<TableInfo> table_info_storage;
const std::vector<P::PhysicalPtr> &tables = join_group.tables;
- for (std::size_t i = 0; i < join_group.tables.size(); ++i) {
+ for (std::size_t i = 0; i < tables.size(); ++i) {
table_info_storage.emplace_back(
i,
tables[i],
@@ -279,16 +280,8 @@
remaining_tables.erase(selected_probe_table_info);
remaining_tables.erase(selected_build_table_info);
- // Figure out the output attributes.
const P::PhysicalPtr &probe_child = selected_probe_table_info->table;
const P::PhysicalPtr &build_child = selected_build_table_info->table;
- std::vector<E::NamedExpressionPtr> output_attributes;
- for (const E::AttributeReferencePtr &probe_attr : probe_child->getOutputAttributes()) {
- output_attributes.emplace_back(probe_attr);
- }
- for (const E::AttributeReferencePtr &build_attr : build_child->getOutputAttributes()) {
- output_attributes.emplace_back(build_attr);
- }
// Figure out the join attributes.
std::vector<E::AttributeReferencePtr> probe_attributes;
@@ -311,6 +304,19 @@
// the table pool. Return the last table in the table pool if there is only
// one table left.
if (remaining_tables.size() > 0) {
+ const auto probe_output_attributes = probe_child->getOutputAttributes();
+ const auto build_output_attributes = build_child->getOutputAttributes();
+
+ // Figure out the output attributes.
+ std::vector<E::NamedExpressionPtr> output_attributes;
+ output_attributes.reserve(probe_output_attributes.size() + build_output_attributes.size());
+ for (const E::AttributeReferencePtr &probe_attr : probe_output_attributes) {
+ output_attributes.emplace_back(probe_attr);
+ }
+ for (const E::AttributeReferencePtr &build_attr : build_output_attributes) {
+ output_attributes.emplace_back(build_attr);
+ }
+
P::PhysicalPtr output =
P::HashJoin::Create(probe_child,
build_child,
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index 64e2478..4dc9397 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -109,15 +109,20 @@
inline bool isBetterThan(const JoinPair &rhs) const {
const auto &lhs = *this;
+ const TableInfo &lhs_probe = *(lhs.probe);
+ const TableInfo &lhs_build = *(lhs.build);
+
+ const TableInfo &rhs_probe = *(rhs.probe);
+ const TableInfo &rhs_build = *(rhs.build);
// Avoid carrying too many output attributes all the way through a long
// chain of hash joins.
const bool lhs_has_large_output =
- lhs.build->estimated_num_output_attributes
- + lhs.probe->estimated_num_output_attributes > 5;
+ lhs_build.estimated_num_output_attributes
+ + lhs_probe.estimated_num_output_attributes > 5;
const bool rhs_has_large_output =
- rhs.build->estimated_num_output_attributes
- + rhs.probe->estimated_num_output_attributes > 5;
+ rhs_build.estimated_num_output_attributes
+ + rhs_probe.estimated_num_output_attributes > 5;
if (lhs_has_large_output != rhs_has_large_output) {
return rhs_has_large_output;
}
@@ -127,33 +132,43 @@
return lhs.build_side_unique;
}
+ // Prefer hash joins where the build side table has no output attributes.
+ const bool lhs_build_has_no_output = lhs_build.estimated_num_output_attributes == 0u;
+ const bool rhs_build_has_no_output = rhs_build.estimated_num_output_attributes == 0u;
+ if (lhs_build_has_no_output != rhs_build_has_no_output) {
+ return lhs_build_has_no_output;
+ }
+
// Prefer hash joins where the build side table is small.
- const bool lhs_has_small_build = lhs.build->estimated_cardinality < 0x100;
- const bool rhs_has_small_build = rhs.build->estimated_cardinality < 0x100;
+ const bool lhs_has_small_build = lhs_build.estimated_cardinality < 0x100;
+ const bool rhs_has_small_build = rhs_build.estimated_cardinality < 0x100;
if (lhs_has_small_build != rhs_has_small_build) {
return lhs_has_small_build;
}
// Prefer hash joins where the probe side table is small. This is effective
// for TPCH style (snowflake schema) queries, with the help of LIPFilters.
- if (lhs.probe->estimated_cardinality != rhs.probe->estimated_cardinality) {
- return lhs.probe->estimated_cardinality < rhs.probe->estimated_cardinality;
+ if (lhs_probe.estimated_cardinality != rhs_probe.estimated_cardinality) {
+ return lhs_probe.estimated_cardinality < rhs_probe.estimated_cardinality;
}
// Prefer build side tables with better selectivity. This is effective
// for SSB style queries.
- if (lhs.build->estimated_selectivity != rhs.build->estimated_selectivity) {
- return lhs.build->estimated_selectivity < rhs.build->estimated_selectivity;
+ if (lhs_build.estimated_selectivity != rhs_build.estimated_selectivity) {
+ return lhs_build.estimated_selectivity < rhs_build.estimated_selectivity;
}
// Residual rules that help provide a total order.
- if (lhs.build->estimated_cardinality != rhs.build->estimated_cardinality) {
- return lhs.build->estimated_cardinality < rhs.build->estimated_cardinality;
+ if (lhs_build.estimated_cardinality != rhs_build.estimated_cardinality) {
+ return lhs_build.estimated_cardinality < rhs_build.estimated_cardinality;
}
- if (lhs.probe->table != rhs.probe->table) {
- return lhs.probe->table < rhs.probe->table;
+
+ const std::size_t lhs_probe_table_info_id = lhs_probe.table_info_id;
+ const std::size_t rhs_probe_table_info_id = rhs_probe.table_info_id;
+ if (lhs_probe_table_info_id != rhs_probe_table_info_id) {
+ return lhs_probe_table_info_id < rhs_probe_table_info_id;
} else {
- return lhs.build->table < rhs.build->table;
+ return lhs_build.table_info_id < rhs_build.table_info_id;
}
}