blob: 3ef399d9699830b7d3aa20c3327c84f8bdb97cf2 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_
#define QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_
#include <algorithm>
#include <cstddef>
#include <memory>
#include <vector>
#include "storage/HashTableBase.hpp"
#include "storage/HashTableFactory.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
namespace quickstep {
class AggregationHandle;
class StorageManager;
class Type;
/** \addtogroup Storage
* @{
*/
/**
* @brief A pool of HashTables used for a single aggregation handle. Each
* HashTable represents values from a given partition, which is
* determined by the keys in the group by clause.
**/
class PartitionedHashTablePool {
public:
/**
* @brief Constructor.
*
* @note This constructor is relevant for the HashTable specialized for
* aggregation.
*
* @param estimated_num_entries The maximum number of entries in a hash table.
* @param num_partitions The number of partitions (i.e. number of HashTables)
* @param hash_table_impl_type The type of hash table implementation.
* @param group_by_types A vector of pointer of types which form the group by
* key.
* @param handles The aggregation handles.
* @param storage_manager A pointer to the storage manager.
**/
PartitionedHashTablePool(const std::size_t estimated_num_entries,
const std::size_t num_partitions,
const HashTableImplType hash_table_impl_type,
const std::vector<const Type *> &group_by_types,
const std::vector<AggregationHandle *> &handles,
StorageManager *storage_manager)
: estimated_num_entries_(
setHashTableSize(estimated_num_entries, num_partitions)),
num_partitions_(num_partitions),
hash_table_impl_type_(hash_table_impl_type),
group_by_types_(group_by_types),
handles_(handles),
storage_manager_(DCHECK_NOTNULL(storage_manager)) {
initializeAllHashTables();
}
/**
* @brief Check out a hash table for insertion.
*
* @param partition_id The ID of the partitioned HashTable.
*
* @return A hash table pointer for the given HashTable.
**/
AggregationStateHashTableBase* getHashTable(const std::size_t partition_id) {
DCHECK_LT(partition_id, num_partitions_);
DCHECK_LT(partition_id, hash_tables_.size());
return hash_tables_[partition_id].get();
}
/**
* @brief Get all the hash tables from the pool.
*
* @warning The caller should ensure that this call is made when no hash table
* is being checked in or checked out from the pool. In other words
* the hash table pool is in read-only state.
*
* @param All the hash tables in the pool.
*
**/
std::vector<std::unique_ptr<AggregationStateHashTableBase>>*
getAllHashTables() {
return &hash_tables_;
}
/**
* @brief Get the number of partitions used for the aggregation.
**/
inline std::size_t getNumPartitions() const {
return num_partitions_;
}
/**
* @brief Get the total memory consumed by the hash tables in this pool.
**/
std::size_t getMemoryConsumptionPoolBytes() const {
std::size_t memory = 0;
for (std::size_t ht_id = 0; ht_id < hash_tables_.size(); ++ht_id) {
if (hash_tables_[ht_id] != nullptr) {
memory += hash_tables_[ht_id]->getMemoryConsumptionBytes();
}
}
return memory;
}
private:
void initializeAllHashTables() {
for (std::size_t part_num = 0; part_num < num_partitions_; ++part_num) {
AggregationStateHashTableBase *part_hash_table = createNewHashTable();
hash_tables_.push_back(
std::unique_ptr<AggregationStateHashTableBase>(part_hash_table));
}
}
AggregationStateHashTableBase* createNewHashTable() {
return AggregationStateHashTableFactory::CreateResizable(
hash_table_impl_type_,
group_by_types_,
estimated_num_entries_,
handles_,
storage_manager_);
}
inline std::size_t setHashTableSize(const std::size_t overall_estimate,
const std::size_t num_partitions) const {
CHECK_NE(num_partitions, 0Lu);
// The minimum size of the hash table is set to 100.
return std::max(static_cast<std::size_t>(overall_estimate / num_partitions),
100Lu);
}
std::vector<std::unique_ptr<AggregationStateHashTableBase>> hash_tables_;
const std::size_t estimated_num_entries_;
const std::size_t num_partitions_;
const HashTableImplType hash_table_impl_type_;
const std::vector<const Type *> group_by_types_;
const std::vector<AggregationHandle *> handles_;
StorageManager *storage_manager_;
DISALLOW_COPY_AND_ASSIGN(PartitionedHashTablePool);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_STORAGE_HASH_TABLE_POOL_HPP_