Implemented HashJoinOperator::getAllWorkOrders
- Renamed HashJoinWorkOrder to HashInnerJoinWorkOrder.
- More clarifying comments in the HashJoinOperator cpp file.
- Renamed hasMatch to recordMatch in a join collector class.
diff --git a/relational_operators/HashJoinOperator.cpp b/relational_operators/HashJoinOperator.cpp
index b58f6b0..404006f 100644
--- a/relational_operators/HashJoinOperator.cpp
+++ b/relational_operators/HashJoinOperator.cpp
@@ -86,7 +86,7 @@
// Consolidation is a no-op for this version, but we provide this trivial
// call so that MapBasedJoinedTupleCollector and
// VectorBasedJoinedTupleCollector have the same interface and can both be
- // used in the templated HashJoinWorkOrder::executeWithCollectorType() method.
+ // used in the templated HashInnerJoinWorkOrder::executeWithCollectorType() method.
inline void consolidate() const {
}
@@ -216,7 +216,7 @@
}
template <typename ValueAccessorT>
- inline void hasMatch(const ValueAccessorT &accessor) {
+ inline void recordMatch(const ValueAccessorT &accessor) {
filter_->set(accessor.getCurrentPosition(), false);
}
@@ -243,31 +243,57 @@
StorageManager *storage_manager,
const tmb::client_id foreman_client_id,
tmb::MessageBus *bus) {
+ switch (join_type_) {
+ case JoinType::kInnerJoin:
+ return getAllNonOuterJoinWorkOrders<HashInnerJoinWorkOrder>(
+ container, query_context, storage_manager);
+ case JoinType::kLeftOuterJoin:
+ return getAllOuterJoinWorkOrders(container, query_context,
+ storage_manager);
+ case JoinType::kLeftSemiJoin:
+ return getAllNonOuterJoinWorkOrders<HashSemiJoinWorkOrder>(
+ container, query_context, storage_manager);
+ case JoinType::kLeftAntiJoin:
+ return getAllNonOuterJoinWorkOrders<HashAntiJoinWorkOrder>(
+ container, query_context, storage_manager);
+ default:
+ LOG(FATAL) << "Unknown join type in HashJoinOperator::getAllWorkOrders()";
+ }
+}
+
+template <class JoinWorkOrderClass>
+bool HashJoinOperator::getAllNonOuterJoinWorkOrders(
+ WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager) {
// We wait until the building of global hash table is complete.
if (blocking_dependencies_met_) {
DCHECK(query_context != nullptr);
- const Predicate *residual_predicate = query_context->getPredicate(residual_predicate_index_);
+ const Predicate *residual_predicate =
+ query_context->getPredicate(residual_predicate_index_);
const vector<unique_ptr<const Scalar>> &selection =
- query_context->getScalarGroup(selection_index_);
+ query_context->getScalarGroup(selection_on_probe_index_);
InsertDestination *output_destination =
query_context->getInsertDestination(output_destination_index_);
- JoinHashTable *hash_table = query_context->getJoinHashTable(hash_table_index_);
+ const JoinHashTable &hash_table =
+ *(query_context->getJoinHashTable(hash_table_index_));
if (probe_relation_is_stored_) {
if (!started_) {
for (const block_id probe_block_id : probe_relation_block_ids_) {
container->addNormalWorkOrder(
- new HashJoinWorkOrder(build_relation_,
- probe_relation_,
- join_key_attributes_,
- any_join_key_attributes_nullable_,
- probe_block_id,
- residual_predicate,
- selection,
- output_destination,
- hash_table,
- storage_manager),
+ new JoinWorkOrderClass(
+ build_relation_,
+ probe_relation_,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ probe_block_id,
+ selection,
+ hash_table,
+ residual_predicate,
+ output_destination,
+ storage_manager),
op_index_);
}
started_ = true;
@@ -276,27 +302,99 @@
} else {
while (num_workorders_generated_ < probe_relation_block_ids_.size()) {
container->addNormalWorkOrder(
- new HashJoinWorkOrder(
+ new JoinWorkOrderClass(
build_relation_,
probe_relation_,
join_key_attributes_,
any_join_key_attributes_nullable_,
probe_relation_block_ids_[num_workorders_generated_],
- residual_predicate,
selection,
- output_destination,
hash_table,
+ residual_predicate,
+ output_destination,
storage_manager),
op_index_);
++num_workorders_generated_;
- } // end while
+ }
return done_feeding_input_relation_;
- } // end else (input_relation_is_stored is false)
- } // end if (blocking_dependencies_met)
+ } // end else (probe_relation_is_stored_)
+ } // end if (blocking_dependencies_met_)
return false;
}
-void HashJoinWorkOrder::execute() {
+bool HashJoinOperator::getAllOuterJoinWorkOrders(
+ WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager) {
+ // We wait until the building of global hash table is complete.
+ if (blocking_dependencies_met_) {
+ DCHECK(query_context != nullptr);
+
+ const vector<unique_ptr<const Scalar>> &selection_on_probe =
+ query_context->getScalarGroup(selection_on_probe_index_);
+ const vector<unique_ptr<const Scalar>> &selection_on_build =
+ query_context->getScalarGroup(selection_on_build_index_);
+ InsertDestination *output_destination =
+ query_context->getInsertDestination(output_destination_index_);
+ const JoinHashTable &hash_table =
+ *(query_context->getJoinHashTable(hash_table_index_));
+
+ // TODO(harshad, jianqiao) Construct the vector below in ExecutionGenerator
+ // and pass it as an argument to the HashJoinOperator.
+ std::vector<const Type*> selection_on_build_types;
+ for (auto selection_on_build_it = selection_on_build.begin();
+ selection_on_build_it != selection_on_build.end();
+ ++selection_on_build_it) {
+ selection_on_build_types.emplace_back(
+ (&(*selection_on_build_it)->getType().getNullableVersion()));
+ }
+
+ if (probe_relation_is_stored_) {
+ if (!started_) {
+ for (const block_id probe_block_id : probe_relation_block_ids_) {
+ container->addNormalWorkOrder(
+ new HashOuterJoinWorkOrder(
+ build_relation_,
+ probe_relation_,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ hash_table,
+ selection_on_probe,
+ selection_on_build,
+ selection_on_build_types,
+ probe_block_id,
+ output_destination,
+ storage_manager),
+ op_index_);
+ }
+ started_ = true;
+ }
+ return started_;
+ } else {
+ while (num_workorders_generated_ < probe_relation_block_ids_.size()) {
+ container->addNormalWorkOrder(
+ new HashOuterJoinWorkOrder(
+ build_relation_,
+ probe_relation_,
+ join_key_attributes_,
+ any_join_key_attributes_nullable_,
+ hash_table,
+ selection_on_probe,
+ selection_on_build,
+ selection_on_build_types,
+ probe_relation_block_ids_[num_workorders_generated_],
+ output_destination,
+ storage_manager),
+ op_index_);
+ ++num_workorders_generated_;
+ }
+ return done_feeding_input_relation_;
+ } // end else (probe_relation_is_stored_)
+ } // end if (blocking_dependencies_met_)
+ return false;
+}
+
+void HashInnerJoinWorkOrder::execute() {
if (FLAGS_vector_based_joined_tuple_collector) {
executeWithCollectorType<VectorBasedJoinedTupleCollector>();
} else {
@@ -305,7 +403,7 @@
}
template <typename CollectorT>
-void HashJoinWorkOrder::executeWithCollectorType() {
+void HashInnerJoinWorkOrder::executeWithCollectorType() {
BlockReference probe_block(
storage_manager_->getBlock(block_id_, probe_relation_));
const TupleStorageSubBlock &probe_store = probe_block->getTupleStorageSubBlock();
@@ -313,13 +411,13 @@
std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
CollectorT collector;
if (join_key_attributes_.size() == 1) {
- hash_table_->getAllFromValueAccessor(
+ hash_table_.getAllFromValueAccessor(
probe_accessor.get(),
join_key_attributes_.front(),
any_join_key_attributes_nullable_,
&collector);
} else {
- hash_table_->getAllFromValueAccessorCompositeKey(
+ hash_table_.getAllFromValueAccessorCompositeKey(
probe_accessor.get(),
join_key_attributes_,
any_join_key_attributes_nullable_,
@@ -419,6 +517,7 @@
std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
// TODO(harshad) - Make this function work with both types of collectors.
+
// We collect all the matching probe relation tuples, as there's a residual
// preidcate that needs to be applied after collecting these matches.
MapBasedJoinedTupleCollector collector;
@@ -481,10 +580,10 @@
std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
probe_store.createValueAccessor(&filter));
ColumnVectorsValueAccessor temp_result;
- for (PtrList<Scalar>::const_iterator selection_it = selection_.begin();
+ for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
selection_it != selection_.end();
++selection_it) {
- temp_result.addColumn(selection_it->getAllValues(
+ temp_result.addColumn((*selection_it)->getAllValues(
probe_accessor_with_filter.get(), &sub_blocks_ref));
}
@@ -503,7 +602,7 @@
// We collect all the probe relation tuples which have at least one matching
// tuple in the build relation. As a performance optimization, the hash table
// just looks for the existence of the probing key in the hash table and sets
- // the bit for the probing key in the collector. The optimization is correct
+ // the bit for the probing key in the collector. The optimization works
// because there is no residual predicate in this case, unlike
// executeWithResidualPredicate().
if (join_key_attributes_.size() == 1u) {
@@ -529,9 +628,9 @@
std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
probe_store.createValueAccessor(collector.filter()));
ColumnVectorsValueAccessor temp_result;
- for (PtrList<Scalar>::const_iterator selection_it = selection_.begin();
+ for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
selection_it != selection_.end(); ++selection_it) {
- temp_result.addColumn(selection_it->getAllValues(
+ temp_result.addColumn((*selection_it)->getAllValues(
probe_accessor_with_filter.get(), &sub_blocks_ref));
}
@@ -572,9 +671,9 @@
std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
probe_store.createValueAccessor(collector.filter()));
ColumnVectorsValueAccessor temp_result;
- for (PtrList<Scalar>::const_iterator selection_it = selection_.begin();
+ for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
selection_it != selection_.end(); ++selection_it) {
- temp_result.addColumn(selection_it->getAllValues(
+ temp_result.addColumn((*selection_it)->getAllValues(
probe_accessor_with_filter.get(), &sub_blocks_ref));
}
@@ -592,7 +691,10 @@
std::unique_ptr<ValueAccessor> probe_accessor(probe_store.createValueAccessor());
// TODO(harshad) - Make the following code work with both types of collectors.
MapBasedJoinedTupleCollector collector;
- // We probe the hash table and get all the matches.
+ // We probe the hash table and get all the matches. Unlike
+ // executeWithoutResidualPredicate(), we have to collect all the matching
+ // tuples, because after this step we still have to evalute the residual
+ // predicate.
if (join_key_attributes_.size() == 1) {
hash_table_.getAllFromValueAccessor(
probe_accessor.get(),
@@ -645,10 +747,10 @@
std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
probe_store.createValueAccessor(filter.get()));
ColumnVectorsValueAccessor temp_result;
- for (PtrList<Scalar>::const_iterator selection_it = selection_.begin();
+ for (vector<unique_ptr<const Scalar>>::const_iterator selection_it = selection_.begin();
selection_it != selection_.end();
++selection_it) {
- temp_result.addColumn(selection_it->getAllValues(probe_accessor_with_filter.get(),
+ temp_result.addColumn((*selection_it)->getAllValues(probe_accessor_with_filter.get(),
&sub_blocks_ref));
}
@@ -681,29 +783,35 @@
for (const std::pair<const block_id, std::vector<std::pair<tuple_id, tuple_id>>>
&build_block_entry : *collector.getJoinedTupleMap()) {
- BlockReference build_block = storage_manager_->getBlock(build_block_entry.first,
- build_relation_);
- const TupleStorageSubBlock &build_store = build_block->getTupleStorageSubBlock();
+ BlockReference build_block =
+ storage_manager_->getBlock(build_block_entry.first, build_relation_);
+ const TupleStorageSubBlock &build_store =
+ build_block->getTupleStorageSubBlock();
- std::unique_ptr<ValueAccessor> build_accessor(build_store.createValueAccessor());
+ std::unique_ptr<ValueAccessor> build_accessor(
+ build_store.createValueAccessor());
ColumnVectorsValueAccessor temp_result;
- for (PtrList<Scalar>::const_iterator selection_it = selection_on_probe_.begin();
+ for (auto selection_it = selection_on_probe_.begin();
selection_it != selection_on_probe_.end();
++selection_it) {
- temp_result.addColumn(selection_it->getAllValuesForJoin(build_relation_id,
- build_accessor.get(),
- probe_relation_id,
- probe_accessor.get(),
- build_block_entry.second));
+ temp_result.addColumn(
+ (*selection_it)->getAllValuesForJoin(
+ build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor.get(),
+ build_block_entry.second));
}
- for (PtrList<Scalar>::const_iterator selection_it = selection_on_build_.begin();
+ for (auto selection_it = selection_on_build_.begin();
selection_it != selection_on_build_.end();
++selection_it) {
- temp_result.addColumn(selection_it->getAllValuesForJoin(build_relation_id,
- build_accessor.get(),
- probe_relation_id,
- probe_accessor.get(),
- build_block_entry.second));
+ temp_result.addColumn(
+ (*selection_it)->getAllValuesForJoin(
+ build_relation_id,
+ build_accessor.get(),
+ probe_relation_id,
+ probe_accessor.get(),
+ build_block_entry.second));
}
output_destination_->bulkInsertTuples(&temp_result);
@@ -713,28 +821,30 @@
probe_block->getIndices(),
probe_block->getIndicesConsistent());
+ // Populate the output tuples for non-matches.
const TupleIdSequence *filter = collector.filter();
const TupleIdSequence::size_type num_tuples_without_matches = filter->size();
if (num_tuples_without_matches > 0) {
std::unique_ptr<ValueAccessor> probe_accessor_with_filter(
probe_store.createValueAccessor(filter));
ColumnVectorsValueAccessor temp_result;
- for (PtrList<Scalar>::const_iterator selection_it = selection_on_probe_.begin();
+ for (auto selection_it = selection_on_probe_.begin();
selection_it != selection_on_probe_.end();
++selection_it) {
- temp_result.addColumn(selection_it->getAllValues(probe_accessor_with_filter.get(),
- &sub_blocks_ref));
+ temp_result.addColumn(
+ (*selection_it)->getAllValues(probe_accessor_with_filter.get(),
+ &sub_blocks_ref));
}
for (const Type *selection_on_build_type : selection_on_build_types_) {
if (NativeColumnVector::UsableForType(*selection_on_build_type)) {
- NativeColumnVector *result = new NativeColumnVector(*selection_on_build_type,
- num_tuples_without_matches);
+ NativeColumnVector *result = new NativeColumnVector(
+ *selection_on_build_type, num_tuples_without_matches);
result->fillWithNulls();
temp_result.addColumn(result);
} else {
- IndirectColumnVector *result = new IndirectColumnVector(*selection_on_build_type,
- num_tuples_without_matches);
+ IndirectColumnVector *result = new IndirectColumnVector(
+ *selection_on_build_type, num_tuples_without_matches);
result->fillWithValue(TypedValue(selection_on_build_type->getTypeID()));
temp_result.addColumn(result);
}
diff --git a/relational_operators/HashJoinOperator.hpp b/relational_operators/HashJoinOperator.hpp
index 3ccf439..4141e14 100644
--- a/relational_operators/HashJoinOperator.hpp
+++ b/relational_operators/HashJoinOperator.hpp
@@ -58,14 +58,14 @@
class HashJoinOperator : public RelationalOperator {
public:
enum class JoinType {
- kInnerJoin,
+ kInnerJoin = 0,
kLeftOuterJoin,
kLeftSemiJoin,
kLeftAntiJoin
};
/**
- * @brief Constructor for a non anti-join.
+ * @brief Constructor.
*
* @note This operator can be constructed with an optional parameter
* residual_predicate_index that applies an additional filter besides
@@ -101,37 +101,48 @@
* additional filter to pairs of tuples that match the hash-join (i.e.
* key equality) predicate. Effectively, this makes the join predicate
* the conjunction of the key-equality predicate and residual predicate.
- * @param selection_index The group index of Scalars in QueryContext,
- * corresponding to the attributes of the relation referred by
+ * Note that this field is not relevant for anti-join.
+ * @param selection_on_probe_index The group index of Scalars in QueryContext,
+ * corresponding to the attributes of the probe relation referred by
* output_relation_id. Each Scalar is evaluated for the joined tuples,
* and the resulting value is inserted into the join result.
+ * @param selection_on_build_index The group index of Scalars in QueryContext,
+ * corresponding to the attributes of the build relation referred by
+ * output_relation_id. Each Scalar is evaluated for the joined tuples,
+ * and the resulting value is inserted into the join result.
+ * @param join_type The type of join corresponding to this operator.
**/
- HashJoinOperator(const CatalogRelation &build_relation,
- const CatalogRelation &probe_relation,
- const bool probe_relation_is_stored,
- const std::vector<attribute_id> &join_key_attributes,
- const bool any_join_key_attributes_nullable,
- const CatalogRelation &output_relation,
- const QueryContext::insert_destination_id output_destination_index,
- const QueryContext::join_hash_table_id hash_table_index,
- const QueryContext::predicate_id residual_predicate_index,
- const QueryContext::scalar_group_id selection_index,
- const JoinType join_type = JoinType::kInnerJoin)
- : build_relation_(build_relation),
- probe_relation_(probe_relation),
- probe_relation_is_stored_(probe_relation_is_stored),
- join_key_attributes_(join_key_attributes),
- any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
- output_relation_(output_relation),
- output_destination_index_(output_destination_index),
- hash_table_index_(hash_table_index),
- residual_predicate_index_(residual_predicate_index),
- selection_index_(selection_index),
- // join_type_(join_type),
- probe_relation_block_ids_(probe_relation_is_stored ? probe_relation.getBlocksSnapshot()
- : std::vector<block_id>()),
- num_workorders_generated_(0),
- started_(false) {}
+ HashJoinOperator(
+ const CatalogRelation &build_relation,
+ const CatalogRelation &probe_relation,
+ const bool probe_relation_is_stored,
+ const std::vector<attribute_id> &join_key_attributes,
+ const bool any_join_key_attributes_nullable,
+ const CatalogRelation &output_relation,
+ const QueryContext::insert_destination_id output_destination_index,
+ const QueryContext::join_hash_table_id hash_table_index,
+ const QueryContext::predicate_id residual_predicate_index,
+ const QueryContext::scalar_group_id selection_on_probe_index,
+ const QueryContext::scalar_group_id selection_on_build_index =
+ QueryContext::kInvalidScalarGroupId,
+ const JoinType join_type = JoinType::kInnerJoin)
+ : build_relation_(build_relation),
+ probe_relation_(probe_relation),
+ probe_relation_is_stored_(probe_relation_is_stored),
+ join_key_attributes_(join_key_attributes),
+ any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
+ output_relation_(output_relation),
+ output_destination_index_(output_destination_index),
+ hash_table_index_(hash_table_index),
+ residual_predicate_index_(residual_predicate_index),
+ selection_on_probe_index_(selection_on_probe_index),
+ selection_on_build_index_(selection_on_build_index),
+ join_type_(join_type),
+ probe_relation_block_ids_(probe_relation_is_stored
+ ? probe_relation.getBlocksSnapshot()
+ : std::vector<block_id>()),
+ num_workorders_generated_(0),
+ started_(false) {}
~HashJoinOperator() override {}
@@ -173,14 +184,14 @@
}
private:
- template<class JoinWorkOrderClass>
- bool getAllNonOuterJoinWorkOrders(WorkOrdersContainer *container) {
- return false;
- }
+ template <class JoinWorkOrderClass>
+ bool getAllNonOuterJoinWorkOrders(WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager);
- bool getAllOuterJoinWorkOrders(WorkOrdersContainer *container) {
- return false;
- }
+ bool getAllOuterJoinWorkOrders(WorkOrdersContainer *container,
+ QueryContext *query_context,
+ StorageManager *storage_manager);
const CatalogRelation &build_relation_;
const CatalogRelation &probe_relation_;
@@ -191,8 +202,9 @@
const QueryContext::insert_destination_id output_destination_index_;
const QueryContext::join_hash_table_id hash_table_index_;
const QueryContext::predicate_id residual_predicate_index_;
- const QueryContext::scalar_group_id selection_index_;
- // const JoinType join_type_;
+ const QueryContext::scalar_group_id selection_on_probe_index_;
+ const QueryContext::scalar_group_id selection_on_build_index_;
+ const JoinType join_type_;
std::vector<block_id> probe_relation_block_ids_;
std::size_t num_workorders_generated_;
@@ -204,10 +216,8 @@
/**
* @brief An inner join WorkOrder produced by HashJoinOperator.
- *
- * TODO(harshad) Rename this class to HashInnerJoinWorkOrder.
**/
-class HashJoinWorkOrder : public WorkOrder {
+class HashInnerJoinWorkOrder : public WorkOrder {
public:
/**
* @brief Constructor.
@@ -220,26 +230,26 @@
* probe_relation.
* @param any_join_key_attributes_nullable If any attribute is nullable.
* @param lookup_block_id The block id of the probe_relation.
+ * @param selection A list of Scalars corresponding to the relation attributes
+ * in \c output_destination. Each Scalar is evaluated for the joined
+ * tuples, and the resulting value is inserted into the join result.
+ * @param hash_table The JoinHashTable to use.
* @param residual_predicate If non-null, apply as an additional filter to
* pairs of tuples that match the hash-join (i.e. key equality)
* predicate. Effectively, this makes the join predicate the
* conjunction of the key-equality predicate and residual_predicate.
- * @param selection A list of Scalars corresponding to the relation attributes
- * in \c output_destination. Each Scalar is evaluated for the joined
- * tuples, and the resulting value is inserted into the join result.
* @param output_destination The InsertDestination to insert the join results.
- * @param hash_table The JoinHashTable to use.
* @param storage_manager The StorageManager to use.
**/
- HashJoinWorkOrder(const CatalogRelationSchema &build_relation,
+ HashInnerJoinWorkOrder(const CatalogRelationSchema &build_relation,
const CatalogRelationSchema &probe_relation,
const std::vector<attribute_id> &join_key_attributes,
const bool any_join_key_attributes_nullable,
const block_id lookup_block_id,
- const Predicate *residual_predicate,
const std::vector<std::unique_ptr<const Scalar>> &selection,
+ const JoinHashTable &hash_table,
+ const Predicate *residual_predicate,
InsertDestination *output_destination,
- JoinHashTable *hash_table,
StorageManager *storage_manager)
: build_relation_(build_relation),
probe_relation_(probe_relation),
@@ -248,11 +258,11 @@
block_id_(lookup_block_id),
residual_predicate_(residual_predicate),
selection_(selection),
+ hash_table_(hash_table),
output_destination_(DCHECK_NOTNULL(output_destination)),
- hash_table_(DCHECK_NOTNULL(hash_table)),
storage_manager_(DCHECK_NOTNULL(storage_manager)) {}
- ~HashJoinWorkOrder() override {}
+ ~HashInnerJoinWorkOrder() override {}
/**
* @exception TupleTooLargeForBlock A tuple produced by this join was too
@@ -275,51 +285,49 @@
const Predicate *residual_predicate_;
const std::vector<std::unique_ptr<const Scalar>> &selection_;
+ const JoinHashTable &hash_table_;
InsertDestination *output_destination_;
- JoinHashTable *hash_table_;
StorageManager *storage_manager_;
- DISALLOW_COPY_AND_ASSIGN(HashJoinWorkOrder);
+ DISALLOW_COPY_AND_ASSIGN(HashInnerJoinWorkOrder);
};
/**
- * @brief A semi-join WorkOrder produced by the HashJoinOperator.
+ * @brief A left semi-join WorkOrder produced by the HashJoinOperator to execute
+ * EXISTS() clause.
**/
class HashSemiJoinWorkOrder : public WorkOrder {
public:
/**
* @brief Constructor.
- * TODO(harshad) - Sync the doxygen.
*
- * @param build_relation_id The id of relation that the hash table was
- * originally built on (i.e. the inner relation in the join).
- * @param probe_relation_id The id of relation to probe the hash table with
- * (i.e. the outer relation in the join).
- * @param join_key_attributes The IDs of equijoin attributes in
+ * @param build_relation The relation that the hash table was originally built
+ * on (i.e. the inner relation in the join).
+ * @param probe_relation The relation to probe the hash table with (i.e. the
+ * outer relation in the join).
+ * @param join_key_attributes The IDs of equijoin attributes in \c
* probe_relation.
* @param any_join_key_attributes_nullable If any attribute is nullable.
- * @param output_destination_index The index of the InsertDestination in the
- * QueryContext to insert the join results.
- * @param hash_table_index The index of the JoinHashTable in QueryContext.
+ * @param lookup_block_id The block id of the probe_relation.
+ * @param selection A list of Scalars corresponding to the relation attributes
+ * in \c output_destination. Each Scalar is evaluated for the joined
+ * tuples, and the resulting value is inserted into the join result.
+ * @param hash_table The JoinHashTable to use.
* @param residual_predicate If non-null, apply as an additional filter to
* pairs of tuples that match the hash-join (i.e. key equality)
* predicate. Effectively, this makes the join predicate the
* conjunction of the key-equality predicate and residual_predicate.
- * @param selection_index The group index of Scalars in QueryContext,
- * corresponding to the relation attributes in InsertDestination
- * referred by output_destination_index in QueryContext. Each Scalar is
- * evaluated for the joined tuples, and the resulting value is inserted
- * into the join result.
- * @param lookup_block_id The block id of the probe_relation.
+ * @param output_destination The InsertDestination to insert the join results.
+ * @param storage_manager The StorageManager to use.
**/
HashSemiJoinWorkOrder(const CatalogRelationSchema &build_relation,
const CatalogRelationSchema &probe_relation,
const std::vector<attribute_id> &join_key_attributes,
const bool any_join_key_attributes_nullable,
- const JoinHashTable &hash_table,
- const PtrList<Scalar> &selection,
- const Predicate *residual_predicate,
const block_id lookup_block_id,
+ const std::vector<std::unique_ptr<const Scalar>> &selection,
+ const JoinHashTable &hash_table,
+ const Predicate *residual_predicate,
InsertDestination *output_destination,
StorageManager *storage_manager)
: build_relation_(build_relation),
@@ -347,7 +355,7 @@
const std::vector<attribute_id> join_key_attributes_;
const bool any_join_key_attributes_nullable_;
const JoinHashTable &hash_table_;
- const PtrList<Scalar> &selection_;
+ const std::vector<std::unique_ptr<const Scalar>> &selection_;
const Predicate *residual_predicate_;
const block_id block_id_;
@@ -358,7 +366,8 @@
};
/**
- * @brief An anti-join WorkOrder produced by the HashJoinOperator.
+ * @brief A left anti-join WorkOrder produced by the HashJoinOperator to execute
+ * NOT EXISTS() clause.
**/
class HashAntiJoinWorkOrder : public WorkOrder {
public:
@@ -366,45 +375,43 @@
* @brief Constructor.
* TODO(harshad) - Sync the doxygen.
*
- * @param build_relation_id The id of relation that the hash table was
- * originally built on (i.e. the inner relation in the join).
- * @param probe_relation_id The id of relation to probe the hash table with
- * (i.e. the outer relation in the join).
- * @param join_key_attributes The IDs of equijoin attributes in
+ * @param build_relation The relation that the hash table was originally built
+ * on (i.e. the inner relation in the join).
+ * @param probe_relation The relation to probe the hash table with (i.e. the
+ * outer relation in the join).
+ * @param join_key_attributes The IDs of equijoin attributes in \c
* probe_relation.
* @param any_join_key_attributes_nullable If any attribute is nullable.
- * @param output_destination_index The index of the InsertDestination in the
- * QueryContext to insert the join results.
- * @param hash_table_index The index of the JoinHashTable in QueryContext.
+ * @param lookup_block_id The block id of the probe_relation.
+ * @param selection A list of Scalars corresponding to the relation attributes
+ * in \c output_destination. Each Scalar is evaluated for the joined
+ * tuples, and the resulting value is inserted into the join result.
+ * @param hash_table The JoinHashTable to use.
* @param residual_predicate If non-null, apply as an additional filter to
* pairs of tuples that match the hash-join (i.e. key equality)
* predicate. Effectively, this makes the join predicate the
* conjunction of the key-equality predicate and residual_predicate.
- * @param selection_index The group index of Scalars in QueryContext,
- * corresponding to the relation attributes in InsertDestination
- * referred by output_destination_index in QueryContext. Each Scalar is
- * evaluated for the joined tuples, and the resulting value is inserted
- * into the join result.
- * @param lookup_block_id The block id of the probe_relation.
+ * @param output_destination The InsertDestination to insert the join results.
+ * @param storage_manager The StorageManager to use.
**/
HashAntiJoinWorkOrder(const CatalogRelationSchema &build_relation,
const CatalogRelationSchema &probe_relation,
const std::vector<attribute_id> &join_key_attributes,
const bool any_join_key_attributes_nullable,
- const JoinHashTable &hash_table,
- const PtrList<Scalar> &selection,
- const Predicate *residual_predicate,
const block_id lookup_block_id,
+ const std::vector<std::unique_ptr<const Scalar>> &selection,
+ const JoinHashTable &hash_table,
+ const Predicate *residual_predicate,
InsertDestination *output_destination,
StorageManager *storage_manager)
: build_relation_(build_relation),
probe_relation_(probe_relation),
join_key_attributes_(join_key_attributes),
any_join_key_attributes_nullable_(any_join_key_attributes_nullable),
- hash_table_(hash_table),
selection_(selection),
residual_predicate_(residual_predicate),
block_id_(lookup_block_id),
+ hash_table_(hash_table),
output_destination_(output_destination),
storage_manager_(storage_manager) {}
@@ -427,10 +434,10 @@
const CatalogRelationSchema &probe_relation_;
const std::vector<attribute_id> join_key_attributes_;
const bool any_join_key_attributes_nullable_;
- const JoinHashTable &hash_table_;
- const PtrList<Scalar> &selection_;
+ const std::vector<std::unique_ptr<const Scalar>> &selection_;
const Predicate *residual_predicate_;
const block_id block_id_;
+ const JoinHashTable &hash_table_;
InsertDestination *output_destination_;
StorageManager *storage_manager_;
@@ -439,47 +446,36 @@
};
/**
- * @brief An outer join WorkOrder produced by the HashJoinOperator.
+ * @brief A left outer join WorkOrder produced by the HashJoinOperator.
**/
class HashOuterJoinWorkOrder : public WorkOrder {
public:
/**
* @brief Constructor.
- * TODO(harshad) - Sync the doxygen.
*
- * @param build_relation_id The id of relation that the hash table was
- * originally built on (i.e. the inner relation in the join).
- * @param probe_relation_id The id of relation to probe the hash table with
- * (i.e. the outer relation in the join).
- * @param join_key_attributes The IDs of equijoin attributes in
+ * @param build_relation The relation that the hash table was originally built
+ * on (i.e. the inner relation in the join).
+ * @param probe_relation The relation to probe the hash table with (i.e. the
+ * outer relation in the join).
+ * @param join_key_attributes The IDs of equijoin attributes in \c
* probe_relation.
* @param any_join_key_attributes_nullable If any attribute is nullable.
- * @param output_destination_index The index of the InsertDestination in the
- * QueryContext to insert the join results.
- * @param hash_table_index The index of the JoinHashTable in QueryContext.
- * @param residual_predicate If non-null, apply as an additional filter to
- * pairs of tuples that match the hash-join (i.e. key equality)
- * predicate. Effectively, this makes the join predicate the
- * conjunction of the key-equality predicate and residual_predicate.
- * @param selection_on_probe_index The group index of Scalars from probe
- * relation in QueryContext, corresponding to the relation attributes
- * in InsertDestination referred by output_destination_index in
- * QueryContext.
- * @param selection_on_build_index The group index of Scalars from build
- * relation in QueryContext, corresponding to the relation attributes
- * in InsertDestination referred by output_destination_index in
- * QueryContext.
- * @param selection_on_build_types The Types of the selected attributes from
- * the build relation.
+ * @param hash_table The JoinHashTable to use.
+ * @param selection_on_probe A list of Scalars from probe relation,
+ * corresponding to the relation attributes in \c output_destination.
+ * @param selection_on_build A list of Scalars from build relation,
+ * corresponding to the relation attributes in \c output_destination.
* @param lookup_block_id The block id of the probe_relation.
+ * @param output_destination The InsertDestination to insert the join results.
+ * @param storage_manager The StorageManager to use.
**/
HashOuterJoinWorkOrder(const CatalogRelationSchema &build_relation,
const CatalogRelationSchema &probe_relation,
const std::vector<attribute_id> &join_key_attributes,
const bool any_join_key_attributes_nullable,
const JoinHashTable &hash_table,
- const PtrList<Scalar> &selection_on_probe,
- const PtrList<Scalar> &selection_on_build,
+ const std::vector<std::unique_ptr<const Scalar>> &selection_on_probe,
+ const std::vector<std::unique_ptr<const Scalar>> &selection_on_build,
const std::vector<const Type*> &selection_on_build_types,
const block_id lookup_block_id,
InsertDestination *output_destination,
@@ -506,8 +502,8 @@
const std::vector<attribute_id> join_key_attributes_;
const bool any_join_key_attributes_nullable_;
const JoinHashTable &hash_table_;
- const PtrList<Scalar> &selection_on_probe_;
- const PtrList<Scalar> &selection_on_build_;
+ const std::vector<std::unique_ptr<const Scalar>> &selection_on_probe_;
+ const std::vector<std::unique_ptr<const Scalar>> &selection_on_build_;
const std::vector<const Type*> &selection_on_build_types_;
const block_id block_id_;
@@ -517,6 +513,7 @@
DISALLOW_COPY_AND_ASSIGN(HashOuterJoinWorkOrder);
};
+
/** @} */
} // namespace quickstep
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 284a1a4..0178970 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -751,7 +751,8 @@
/**
* @brief Lookup (multiple) keys from a ValueAccessor, apply a functor to
* the matching values and additionally call a hasMatch() function of
- * the functor when the first match for a key is found. Composite key version.
+ * the functor when the first match for a key is found. Composite key
+ * version.
* @warning This method assumes that no concurrent calls to put(),
* putCompositeKey(), putValueAccessor(),
* putValueAccessorCompositeKey(), upsert(), upsertCompositeKey(),
@@ -761,7 +762,7 @@
* dereferenced). Concurrent calls to getSingle(),
* getSingleCompositeKey(), getAll(), getAllCompositeKey(),
* getAllFromValueAccessor(), getAllFromValueAccessorCompositeKey(),
- * forEach(), and forEachCompositeKey() are safe..
+ * forEach(), and forEachCompositeKey() are safe.
*
* @param accessor A ValueAccessor which will be used to access keys.
* beginIteration() should be called on accessor before calling this
@@ -1909,11 +1910,11 @@
std::size_t entry_num = 0;
const ValueT *value;
if (getNextEntryForKey(key, hash_code, &value, &entry_num)) {
- functor->hasMatch(*accessor);
+ functor->recordMatch(*accessor);
(*functor)(*accessor, *value);
if (!allow_duplicate_keys) {
continue;
- }
+ }
while (getNextEntryForKey(key, hash_code, &value, &entry_num)) {
(*functor)(*accessor, *value);
}
@@ -1960,7 +1961,7 @@
std::size_t entry_num = 0;
const ValueT *value;
if (getNextEntryForCompositeKey(key_vector, hash_code, &value, &entry_num)) {
- functor->hasMatch(*accessor);
+ functor->recordMatch(*accessor);
(*functor)(*accessor, *value);
if (!allow_duplicate_keys) {
continue;