blob: 7b9fccedfc6a478ef493b76812ef21699a56b174 [file] [log] [blame]
/**
* Copyright 2011-2015 Quickstep Technologies LLC.
* Copyright 2015-2016 Pivotal Software, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
#ifndef QUICKSTEP_CATALOG_PARTITION_SCHEME_HEADER_HPP_
#define QUICKSTEP_CATALOG_PARTITION_SCHEME_HEADER_HPP_
#include <cstddef>
#include <memory>
#include <utility>
#include <vector>
#include "catalog/Catalog.pb.h"
#include "catalog/CatalogTypedefs.hpp"
#include "types/TypedValue.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/LessComparison.hpp"
#include "utility/Macros.hpp"
#include "glog/logging.h"
namespace quickstep {
class Type;
/** \addtogroup Catalog
* @{
*/
/**
* @brief The base class which stores the partitioning information for a
* particular relation.
**/
class PartitionSchemeHeader {
public:
enum PartitionType {
kHash = 0,
kRange
};
/**
* @brief Virtual destructor.
**/
virtual ~PartitionSchemeHeader() {
}
/**
* @brief Reconstruct a PartitionSchemeHeader from its serialized
* Protocol Buffer form.
*
* @param proto The Protocol Buffer serialization of a PartitionSchemeHeader,
* previously produced by getProto().
* @param attr_type The attribute type of the partitioning attribute.
* @return The reconstructed PartitionSchemeHeader object.
**/
static PartitionSchemeHeader* ReconstructFromProto(
const serialization::PartitionSchemeHeader &proto,
const Type &attr_type);
/**
* @brief Check whether a serialization::PartitionSchemeHeader is fully-formed
* and all parts are valid.
*
* @param proto A serialized Protocol Buffer representation of a
* PartitionSchemeHeader, originally generated by getProto().
* @return Whether proto is fully-formed and valid.
**/
static bool ProtoIsValid(const serialization::PartitionSchemeHeader &proto);
/**
* @brief Calculate the partition id into which the attribute value should
* be inserted.
*
* @param value_of_attribute The attribute value for which the
* partition id is to be determined.
* @return The partition id of the partition for the attribute value.
**/
// TODO(gerald): Make this method more efficient since currently this is
// done for each and every tuple. We can go through the entire set of tuples
// once using a value accessor and create bitmaps for each partition with
// tuples that correspond to those partitions.
virtual partition_id getPartitionId(
const TypedValue &value_of_attribute) const = 0;
/**
* @brief Serialize the Partition Scheme as Protocol Buffer.
*
* @return The Protocol Buffer representation of Partition Scheme.
**/
virtual serialization::PartitionSchemeHeader getProto() const;
/**
* @brief Get the partition type of the relation.
*
* @return The partition type used to partition the relation.
**/
inline PartitionType getPartitionType() const {
return partition_type_;
}
/**
* @brief Get the number of partitions for the relation.
*
* @return The number of partitions the relation is partitioned into.
**/
inline std::size_t getNumPartitions() const {
return num_partitions_;
}
/**
* @brief Get the partitioning attribute for the relation.
*
* @return The partitioning attribute with which the relation
* is partitioned into.
**/
inline attribute_id getPartitionAttributeId() const {
return partition_attribute_id_;
}
protected:
/**
* @brief Constructor.
*
* @param type The type of partitioning to be used to partition the
* relation.
* @param num_partitions The number of partitions to be created.
* @param attr_id The attribute on which the partitioning happens.
**/
PartitionSchemeHeader(const PartitionType type,
const std::size_t num_partitions,
const attribute_id attr_id);
// The type of partitioning: Hash or Range.
const PartitionType partition_type_;
// The number of partitions.
const std::size_t num_partitions_;
// The attribute of partioning.
const attribute_id partition_attribute_id_;
private:
DISALLOW_COPY_AND_ASSIGN(PartitionSchemeHeader);
};
/**
* @brief Implementation of PartitionSchemeHeader that partitions the tuples in
* a relation based on a hash function on the partitioning attribute.
**/
class HashPartitionSchemeHeader : public PartitionSchemeHeader {
public:
/**
* @brief Constructor.
*
* @param num_partitions The number of partitions to be created.
* @param attribute The attribute on which the partitioning happens.
**/
HashPartitionSchemeHeader(const std::size_t num_partitions, const attribute_id attribute)
: PartitionSchemeHeader(PartitionType::kHash, num_partitions, attribute) {
}
/**
* @brief Destructor.
**/
~HashPartitionSchemeHeader() override {
}
/**
* @brief Calulate the partition id into which the attribute value
* should be inserted.
*
* @param value_of_attribute The attribute value for which the
* partition id is to be determined.
* @return The partition id of the partition for the attribute value.
**/
partition_id getPartitionId(
const TypedValue &value_of_attribute) const override {
// TODO(gerald): Optimize for the case where the number of partitions is a
// power of 2. We can just mask out the lower-order hash bits rather than
// doing a division operation.
return value_of_attribute.getHash() % num_partitions_;
}
private:
DISALLOW_COPY_AND_ASSIGN(HashPartitionSchemeHeader);
};
/**
* @brief Implementation of PartitionSchemeHeader that partitions the tuples in
* a relation based on a given value range on the partitioning attribute.
**/
class RangePartitionSchemeHeader : public PartitionSchemeHeader {
public:
/**
* @brief Constructor.
*
* @param partition_attribute_type The type of CatalogAttribute that is used
* for partitioning.
* @param num_partitions The number of partitions to be created.
* @param attribute The attribute_id on which the partitioning happens.
* @param partition_range The mapping between the partition ids and the upper
* bound of the range boundaries. If two ranges R1 and
* R2 are separated by a boundary value V, then V
* would fall into range R2. For creating a range
* partition scheme with n partitions, you need to
* specify n-1 boundary values. The first partition
* will have all the values less than the first
* boundary and the last partition would have all
* values greater than or equal to the last boundary
* value.
**/
RangePartitionSchemeHeader(const Type &partition_attribute_type,
const std::size_t num_partitions,
const attribute_id attribute,
std::vector<TypedValue> &&partition_range)
: PartitionSchemeHeader(PartitionType::kRange, num_partitions, attribute),
partition_range_boundaries_(std::move(partition_range)) {
DCHECK_EQ(num_partitions - 1, partition_range_boundaries_.size());
const Comparison &less_comparison_op(LessComparison::Instance());
less_unchecked_comparator_.reset(
less_comparison_op.makeUncheckedComparatorForTypes(
partition_attribute_type, partition_attribute_type));
#ifdef QUICKSTEP_DEBUG
checkPartitionRangeBoundaries();
#endif
}
/**
* @brief Destructor.
**/
~RangePartitionSchemeHeader() override {
}
/**
* @brief Calulate the partition id into which the attribute value
* should be inserted.
*
* @param value_of_attribute The attribute value for which the
* partition id is to be determined.
* @return The partition id of the partition for the attribute value.
**/
partition_id getPartitionId(
const TypedValue &value_of_attribute) const override {
partition_id start = 0, end = partition_range_boundaries_.size() - 1;
if (!less_unchecked_comparator_->compareTypedValues(value_of_attribute, partition_range_boundaries_[end])) {
return num_partitions_ - 1;
}
while (start < end) {
const partition_id mid = start + ((end - start) >> 1);
if (less_unchecked_comparator_->compareTypedValues(value_of_attribute, partition_range_boundaries_[mid])) {
end = mid;
} else {
start = mid + 1;
}
}
return start;
}
serialization::PartitionSchemeHeader getProto() const override;
/**
* @brief Get the range boundaries for partitions.
*
* @return The vector of range boundaries for partitions.
**/
inline const std::vector<TypedValue>& getPartitionRangeBoundaries() const {
return partition_range_boundaries_;
}
private:
/**
* @brief Check if the partition range boundaries are in ascending order.
**/
void checkPartitionRangeBoundaries() {
for (partition_id part_id = 1; part_id < partition_range_boundaries_.size(); ++part_id) {
if (less_unchecked_comparator_->compareTypedValues(
partition_range_boundaries_[part_id],
partition_range_boundaries_[part_id - 1])) {
LOG(FATAL) << "Partition boundaries are not in ascending order.";
}
}
}
// The boundaries for each range in the RangePartitionSchemeHeader.
// The upper bound of the range is stored here.
const std::vector<TypedValue> partition_range_boundaries_;
std::unique_ptr<UncheckedComparator> less_unchecked_comparator_;
DISALLOW_COPY_AND_ASSIGN(RangePartitionSchemeHeader);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_CATALOG_PARTITION_SCHEME_HEADER_HPP_