| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| **/ |
| |
| #ifndef QUICKSTEP_CATALOG_PARTITION_SCHEME_HEADER_HPP_ |
| #define QUICKSTEP_CATALOG_PARTITION_SCHEME_HEADER_HPP_ |
| |
| #include <cstddef> |
| #include <memory> |
| #include <utility> |
| #include <vector> |
| |
| #include "catalog/Catalog.pb.h" |
| #include "catalog/CatalogTypedefs.hpp" |
| #include "types/TypedValue.hpp" |
| #include "types/operations/comparisons/Comparison.hpp" |
| #include "types/operations/comparisons/LessComparison.hpp" |
| #include "utility/Macros.hpp" |
| |
| #include "glog/logging.h" |
| |
| namespace quickstep { |
| |
| class Type; |
| |
| /** \addtogroup Catalog |
| * @{ |
| */ |
| |
| /** |
| * @brief The base class which stores the partitioning information for a |
| * particular relation. |
| **/ |
| class PartitionSchemeHeader { |
| public: |
| enum PartitionType { |
| kHash = 0, |
| kRange |
| }; |
| |
| /** |
| * @brief Virtual destructor. |
| **/ |
| virtual ~PartitionSchemeHeader() { |
| } |
| |
| /** |
| * @brief Reconstruct a PartitionSchemeHeader from its serialized |
| * Protocol Buffer form. |
| * |
| * @param proto The Protocol Buffer serialization of a PartitionSchemeHeader, |
| * previously produced by getProto(). |
| * @return The reconstructed PartitionSchemeHeader object. |
| **/ |
| static PartitionSchemeHeader* ReconstructFromProto( |
| const serialization::PartitionSchemeHeader &proto); |
| |
| /** |
| * @brief Check whether a serialization::PartitionSchemeHeader is fully-formed |
| * and all parts are valid. |
| * |
| * @param proto A serialized Protocol Buffer representation of a |
| * PartitionSchemeHeader, originally generated by getProto(). |
| * @return Whether proto is fully-formed and valid. |
| **/ |
| static bool ProtoIsValid(const serialization::PartitionSchemeHeader &proto); |
| |
| /** |
| * @brief Calculate the partition id into which the attribute value should |
| * be inserted. |
| * |
| * @param value_of_attribute The attribute value for which the |
| * partition id is to be determined. |
| * @return The partition id of the partition for the attribute value. |
| **/ |
| // TODO(gerald): Make this method more efficient since currently this is |
| // done for each and every tuple. We can go through the entire set of tuples |
| // once using a value accessor and create bitmaps for each partition with |
| // tuples that correspond to those partitions. |
| virtual partition_id getPartitionId( |
| const TypedValue &value_of_attribute) const = 0; |
| |
| /** |
| * @brief Serialize the Partition Scheme as Protocol Buffer. |
| * |
| * @return The Protocol Buffer representation of Partition Scheme. |
| **/ |
| virtual serialization::PartitionSchemeHeader getProto() const; |
| |
| /** |
| * @brief Get the partition type of the relation. |
| * |
| * @return The partition type used to partition the relation. |
| **/ |
| inline PartitionType getPartitionType() const { |
| return partition_type_; |
| } |
| |
| /** |
| * @brief Get the number of partitions for the relation. |
| * |
| * @return The number of partitions the relation is partitioned into. |
| **/ |
| inline std::size_t getNumPartitions() const { |
| return num_partitions_; |
| } |
| |
| /** |
| * @brief Get the partitioning attribute for the relation. |
| * |
| * @return The partitioning attribute with which the relation |
| * is partitioned into. |
| **/ |
| inline attribute_id getPartitionAttributeId() const { |
| return partition_attribute_id_; |
| } |
| |
| protected: |
| /** |
| * @brief Constructor. |
| * |
| * @param type The type of partitioning to be used to partition the |
| * relation. |
| * @param num_partitions The number of partitions to be created. |
| * @param attr_id The attribute on which the partitioning happens. |
| **/ |
| PartitionSchemeHeader(const PartitionType type, |
| const std::size_t num_partitions, |
| const attribute_id attr_id); |
| |
| // The type of partitioning: Hash or Range. |
| const PartitionType partition_type_; |
| // The number of partitions. |
| const std::size_t num_partitions_; |
| // The attribute of partioning. |
| const attribute_id partition_attribute_id_; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(PartitionSchemeHeader); |
| }; |
| |
| /** |
| * @brief Implementation of PartitionSchemeHeader that partitions the tuples in |
| * a relation based on a hash function on the partitioning attribute. |
| **/ |
| class HashPartitionSchemeHeader : public PartitionSchemeHeader { |
| public: |
| /** |
| * @brief Constructor. |
| * |
| * @param num_partitions The number of partitions to be created. |
| * @param attribute The attribute on which the partitioning happens. |
| **/ |
| HashPartitionSchemeHeader(const std::size_t num_partitions, const attribute_id attribute) |
| : PartitionSchemeHeader(PartitionType::kHash, num_partitions, attribute) { |
| } |
| |
| /** |
| * @brief Destructor. |
| **/ |
| ~HashPartitionSchemeHeader() override { |
| } |
| |
| /** |
| * @brief Calulate the partition id into which the attribute value |
| * should be inserted. |
| * |
| * @param value_of_attribute The attribute value for which the |
| * partition id is to be determined. |
| * @return The partition id of the partition for the attribute value. |
| **/ |
| partition_id getPartitionId( |
| const TypedValue &value_of_attribute) const override { |
| // TODO(gerald): Optimize for the case where the number of partitions is a |
| // power of 2. We can just mask out the lower-order hash bits rather than |
| // doing a division operation. |
| return value_of_attribute.getHash() % num_partitions_; |
| } |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(HashPartitionSchemeHeader); |
| }; |
| |
| /** |
| * @brief Implementation of PartitionSchemeHeader that partitions the tuples in |
| * a relation based on a given value range on the partitioning attribute. |
| **/ |
| class RangePartitionSchemeHeader : public PartitionSchemeHeader { |
| public: |
| /** |
| * @brief Constructor. |
| * |
| * @param partition_attribute_type The type of CatalogAttribute that is used |
| * for partitioning. |
| * @param num_partitions The number of partitions to be created. |
| * @param attribute The attribute_id on which the partitioning happens. |
| * @param partition_range The mapping between the partition ids and the upper |
| * bound of the range boundaries. If two ranges R1 and |
| * R2 are separated by a boundary value V, then V |
| * would fall into range R2. For creating a range |
| * partition scheme with n partitions, you need to |
| * specify n-1 boundary values. The first partition |
| * will have all the values less than the first |
| * boundary and the last partition would have all |
| * values greater than or equal to the last boundary |
| * value. |
| **/ |
| RangePartitionSchemeHeader(const Type &partition_attribute_type, |
| const std::size_t num_partitions, |
| const attribute_id attribute, |
| std::vector<TypedValue> &&partition_range) |
| : PartitionSchemeHeader(PartitionType::kRange, num_partitions, attribute), |
| partition_attr_type_(&partition_attribute_type), |
| partition_range_boundaries_(std::move(partition_range)) { |
| DCHECK_EQ(num_partitions - 1, partition_range_boundaries_.size()); |
| |
| const Comparison &less_comparison_op(LessComparison::Instance()); |
| less_unchecked_comparator_.reset( |
| less_comparison_op.makeUncheckedComparatorForTypes( |
| partition_attribute_type, partition_attribute_type)); |
| |
| #ifdef QUICKSTEP_DEBUG |
| checkPartitionRangeBoundaries(); |
| #endif |
| } |
| |
| /** |
| * @brief Destructor. |
| **/ |
| ~RangePartitionSchemeHeader() override { |
| } |
| |
| /** |
| * @brief Calulate the partition id into which the attribute value |
| * should be inserted. |
| * |
| * @param value_of_attribute The attribute value for which the |
| * partition id is to be determined. |
| * @return The partition id of the partition for the attribute value. |
| **/ |
| partition_id getPartitionId( |
| const TypedValue &value_of_attribute) const override { |
| partition_id start = 0, end = partition_range_boundaries_.size() - 1; |
| if (!less_unchecked_comparator_->compareTypedValues(value_of_attribute, partition_range_boundaries_[end])) { |
| return num_partitions_ - 1; |
| } |
| |
| while (start < end) { |
| const partition_id mid = start + ((end - start) >> 1); |
| if (less_unchecked_comparator_->compareTypedValues(value_of_attribute, partition_range_boundaries_[mid])) { |
| end = mid; |
| } else { |
| start = mid + 1; |
| } |
| } |
| |
| return start; |
| } |
| |
| serialization::PartitionSchemeHeader getProto() const override; |
| |
| /** |
| * @brief Get the range boundaries for partitions. |
| * |
| * @return The vector of range boundaries for partitions. |
| **/ |
| inline const std::vector<TypedValue>& getPartitionRangeBoundaries() const { |
| return partition_range_boundaries_; |
| } |
| |
| private: |
| /** |
| * @brief Check if the partition range boundaries are in ascending order. |
| **/ |
| void checkPartitionRangeBoundaries() { |
| for (partition_id part_id = 1; part_id < partition_range_boundaries_.size(); ++part_id) { |
| if (less_unchecked_comparator_->compareTypedValues( |
| partition_range_boundaries_[part_id], |
| partition_range_boundaries_[part_id - 1])) { |
| LOG(FATAL) << "Partition boundaries are not in ascending order."; |
| } |
| } |
| } |
| |
| const Type* partition_attr_type_; |
| |
| // The boundaries for each range in the RangePartitionSchemeHeader. |
| // The upper bound of the range is stored here. |
| const std::vector<TypedValue> partition_range_boundaries_; |
| |
| std::unique_ptr<UncheckedComparator> less_unchecked_comparator_; |
| |
| DISALLOW_COPY_AND_ASSIGN(RangePartitionSchemeHeader); |
| }; |
| |
| /** @} */ |
| |
| } // namespace quickstep |
| |
| #endif // QUICKSTEP_CATALOG_PARTITION_SCHEME_HEADER_HPP_ |