storage/SplitRowStoreTupleStorageSubBlock.hpp - incubator-retired-quickstep - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  **/

 #ifndef QUICKSTEP_STORAGE_SPLIT_ROW_STORE_TUPLE_STORAGE_SUB_BLOCK_HPP_
 #define QUICKSTEP_STORAGE_SPLIT_ROW_STORE_TUPLE_STORAGE_SUB_BLOCK_HPP_

 #include <cstddef>
 #include <cstdint>
 #include <memory>
 #include <unordered_map>
 #include <utility>
 #include <vector>

 #include "expressions/predicate/PredicateCost.hpp"
 #include "storage/SubBlockTypeRegistryMacros.hpp"
 #include "storage/TupleIdSequence.hpp"
 #include "storage/TupleStorageSubBlock.hpp"
 #include "types/TypedValue.hpp"
 #include "utility/BitVector.hpp"
 #include "utility/Macros.hpp"

 #include "gtest/gtest_prod.h"

 namespace quickstep {

 class ComparisonPredicate;
 class Tuple;
 class TupleStorageSubBlockDescription;
 class ValueAccessor;

 QUICKSTEP_DECLARE_SUB_BLOCK_TYPE_REGISTERED(SplitRowStoreTupleStorageSubBlock);

 namespace splitrow_internal {
 // A CopyGroup contains information about one run of attributes in the source
 // ValueAccessor that can be copied into the output block. The
 // getCopyGroupsForAttributeMap function below takes an attribute map for a source
 // and converts it into a sequence of runs. The goal is to minimize the number
 // of memcpy calls and address calculations that occur during bulk insertion.
 // Contiguous attributes from a rowstore source can be merged into a single copy group.
 //
 // A single ContiguousAttrs CopyGroup consists of contiguous attributes, nullable
 // or not. "Contiguous" here means that their attribute IDs are successive in both
 // the source and destination relations.
 //
 // A NullAttr refers to exactly one nullable attribute. Nullable columns are
 // represented using fixed length inline data as well as a null bitmap.
 // In a particular tuple, if the attribute has a null value, the inline data
 // has no meaning. So it is safe to copy it or not. We use this fact to merge
 // runs together aggressively, i.e., a ContiguousAttrs group may include a
 // nullable attribute. However, we also create a NullableAttr in that case in
 // order to check the null bitmap.
 //
 // A gap is a run of destination (output) attributes that don't come from a
 // particular source. This occurs during bulkInsertPartialTuples. They must be
 // skipped during the insert (not copied over). They are indicated by a
 // kInvalidCatalogId in the attribute map. For efficiency, the gap size
 // is merged into the bytes_to_advance of previous ContiguousAttrs copy group.
 // For gaps at the start of the attribute map, we just create a ContiguousAttrs
 // copy group with 0 bytes to copy and dummy (0) source attribute id.
 //
 // eg. For 4B integer attrs, from a row store source,
 // if the input attribute_map is {-1,0,5,6,7,-1,2,4,9,10,-1}
 // with input/output attributes 4 and 7 being nullable,
 // we will create the following ContiguousAttrs copy groups
 //
 //  ----------------------------------------------------
 //  |src_id       |bytes_to_advance |bytes_to_copy     |
 //  |-------------|-----------------|------------------|
 //  |            0|                4|                 4|
 //  |            5|                4|                12|
 //  |            2|               16|                 4|
 //  |            4|                4|                 4|
 //  |            9|                4|                 8|
 //  ----------------------------------------------------
 // and two NullableAttrs with src_attr_id set to 4 and 7.
 //
 // In this example, we do 6 memcpy calls and 6 address calculations
 // as well as 2 bitvector lookups for each tuple. A naive copy algorithm
 // would do 11 memcpy calls and address calculations, along with the
 // bitvector lookups, not to mention the schema lookups,
 // all interspersed in a complex loop with lots of branches.
 //
 // If the source was a column store, then we can't merge contiguous
 // attributes (or gaps). So we would have 11 ContigousAttrs copy groups with
 // three of them having bytes_to_copy = 0 (corresponding to the gaps) and
 // the rest having bytes_to_copy = 4.
 //
 // SplitRowStore supports variable length attributes. Since the layout of the
 // tuple is like: [null bitmap][fixed length attributes][variable length offsets]
 // we do all the variable length copies after the fixed length copies.
 //
 struct CopyGroup {
   explicit CopyGroup(const attribute_id source_attr_id)
       : src_attr_id(source_attr_id) {}

   // The id of starting input attribute for run.
   const attribute_id src_attr_id;
 };

 struct ContiguousAttrs : public CopyGroup {
   ContiguousAttrs(const attribute_id source_attr_id,
                   const std::size_t bytes_to_copy_in,
                   const std::size_t bytes_to_advance_in)
       : CopyGroup(source_attr_id),
         bytes_to_advance(bytes_to_advance_in),
         bytes_to_copy(bytes_to_copy_in) {}

   // Number of bytes to advance destination ptr to get to the location where we
   // copy THIS attribute.
   std::size_t bytes_to_advance;

   // Number of bytes to copy from source.
   std::size_t bytes_to_copy;
 };

 struct VarLenAttr : public CopyGroup {
   VarLenAttr(const attribute_id source_attr_id,
              const attribute_id destination_attr_id,
              const std::size_t bytes_to_advance_in)
       : CopyGroup(source_attr_id),
         dst_attr_id(destination_attr_id),
         bytes_to_advance(bytes_to_advance_in) {}

   const attribute_id dst_attr_id;
   std::size_t bytes_to_advance;
 };

 struct NullableAttr : public CopyGroup {
   NullableAttr(const attribute_id source_attr_id,
                const std::size_t nullable_attr_idx_in)
       : CopyGroup(source_attr_id),
         nullable_attr_idx(nullable_attr_idx_in) {}

   // Index into null bitmap.
   const std::size_t nullable_attr_idx;
 };

 struct CopyGroupList {
   CopyGroupList() {}

   /**
    * @brief Attributes which are exactly sequential are merged to a single copy.
    */
   void mergeContiguous() {
     if (contiguous_attrs.size() < 2) {
       return;
     }

     int add_to_advance = 0;

     std::vector<ContiguousAttrs> merged_attrs;
     merged_attrs.emplace_back(contiguous_attrs.front());
     for (std::size_t idx = 1; idx < contiguous_attrs.size(); ++idx) {
       const ContiguousAttrs &current_attr = contiguous_attrs[idx];
       const ContiguousAttrs &previous_attr = contiguous_attrs[idx - 1];

       if (previous_attr.src_attr_id + 1 == current_attr.src_attr_id &&
           previous_attr.bytes_to_copy == current_attr.bytes_to_advance) {
         merged_attrs.back().bytes_to_copy += current_attr.bytes_to_copy;
         add_to_advance += current_attr.bytes_to_advance;
       } else {
         merged_attrs.emplace_back(current_attr.src_attr_id,
                                   current_attr.bytes_to_copy,
                                   current_attr.bytes_to_advance + add_to_advance);
         add_to_advance = 0;
       }
     }
     contiguous_attrs = std::move(merged_attrs);

     if (varlen_attrs.size() > 0) {
       varlen_attrs[0].bytes_to_advance += add_to_advance;
     }
   }

   std::vector<ContiguousAttrs> contiguous_attrs;
   std::vector<NullableAttr> nullable_attrs;
   std::vector<VarLenAttr> varlen_attrs;
 };

 }  // namespace splitrow_internal

 /** \addtogroup Storage
  *  @{
  */

 /**
  * @brief An implementation of TupleStorageSubBlock as a slotted row-store that
  *        splits storage between an array of slots for fixed-length attributes
  *        at the front of the block and a region of storage for variable-length
  *        attributes at the back of the block that both grow towards each
  *        other.
  * @note Deletions and updates can cause both the slot array and the
  *       variable-length storage region to become fragmented. Otherwise wasted
  *       storage can be reclaimed by calling rebuild().
  **/
 class SplitRowStoreTupleStorageSubBlock: public TupleStorageSubBlock {
   static const std::size_t kVarLenSlotSize;

  public:
   SplitRowStoreTupleStorageSubBlock(const CatalogRelationSchema &relation,
                                     const TupleStorageSubBlockDescription &description,
                                     const bool new_block,
                                     void *sub_block_memory,
                                     const std::size_t sub_block_memory_size);

   ~SplitRowStoreTupleStorageSubBlock() override {
   }

   /**
    * @brief Determine whether a TupleStorageSubBlockDescription is valid for
    *        this type of TupleStorageSubBlock.
    *
    * @param relation The relation a tuple store described by description would
    *        belong to.
    * @param description A description of the parameters for this type of
    *        TupleStorageSubBlock, which will be checked for validity.
    * @return Whether description is well-formed and valid for this type of
    *         TupleStorageSubBlock belonging to relation (i.e. whether a
    *         TupleStorageSubBlock of this type, belonging to relation, can be
    *         constructed according to description).
    **/
   static bool DescriptionIsValid(const CatalogRelationSchema &relation,
                                  const TupleStorageSubBlockDescription &description);

   /**
    * @brief Estimate the average number of bytes (including any applicable
    *        overhead) used to store a single tuple in this type of
    *        TupleStorageSubBlock. Used by StorageBlockLayout::finalize() to
    *        divide block memory amongst sub-blocks.
    * @warning description must be valid. DescriptionIsValid() should be called
    *          first if necessary.
    *
    * @param relation The relation tuples belong to.
    * @param description A description of the parameters for this type of
    *        TupleStorageSubBlock.
    * @return The average/ammortized number of bytes used to store a single
    *         tuple of relation in a TupleStorageSubBlock of this type described
    *         by description.
    **/
   static std::size_t EstimateBytesPerTuple(const CatalogRelationSchema &relation,
                                            const TupleStorageSubBlockDescription &description);

   bool supportsUntypedGetAttributeValue(const attribute_id attr) const override {
     return true;
   }

   bool supportsAdHocInsert() const override {
     return true;
   }

   bool adHocInsertIsEfficient() const override {
     return true;
   }

   TupleStorageSubBlockType getTupleStorageSubBlockType() const override {
     return kSplitRowStore;
   }

   bool isEmpty() const override {
     return header_->num_tuples == 0;
   }

   bool isPacked() const override {
     return header_->num_tuples == header_->max_tid + 1;
   }

   tuple_id getMaxTupleID() const override {
     return header_->max_tid;
   }

   tuple_id numTuples() const override {
     return header_->num_tuples;
   }

   bool hasTupleWithID(const tuple_id tuple) const override {
     return (tuple >= 0)
            && (static_cast<std::size_t>(tuple) < occupancy_bitmap_->size())
            && occupancy_bitmap_->getBit(tuple);
   }

   InsertResult insertTuple(const Tuple &tuple) override;

   inline bool insertTupleInBatch(const Tuple &tuple) override {
     const InsertResult result = insertTuple(tuple);
     return (result.inserted_id >= 0);
   }

   tuple_id bulkInsertTuples(ValueAccessor *accessor) override;

   tuple_id bulkInsertTuplesWithRemappedAttributes(
       const std::vector<attribute_id> &attribute_map,
       ValueAccessor *accessor) override;

   tuple_id bulkInsertPartialTuples(
     const std::vector<attribute_id> &attribute_map,
     ValueAccessor *accessor,
     const tuple_id max_num_tuples_to_insert) override;

   void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted) override;

   const void* getAttributeValue(const tuple_id tuple,
                                 const attribute_id attr) const override;

   TypedValue getAttributeValueTyped(const tuple_id tuple,
                                     const attribute_id attr) const override;

   ValueAccessor* createValueAccessor(
       const TupleIdSequence *sequence = nullptr) const override;

   bool canSetAttributeValuesInPlaceTyped(
       const tuple_id tuple,
       const std::unordered_map<attribute_id, TypedValue> &new_values) const override;

   void setAttributeValueInPlaceTyped(const tuple_id tuple,
                                      const attribute_id attr,
                                      const TypedValue &value) override;

   bool deleteTuple(const tuple_id tuple) override;

   bool bulkDeleteTuples(TupleIdSequence *tuples) override;

   TupleIdSequence* getExistenceMap() const override {
     return new TupleIdSequence(header_->max_tid + 1, *occupancy_bitmap_);
   }

   OrderedTupleIdSequence* getExistenceList() const override;

   predicate_cost_t estimatePredicateEvaluationCost(
       const ComparisonPredicate &predicate) const override {
     return predicate_cost::kRowScan;
   }

   void rebuild() override;

   bool isInsertOrderPreserving() const override {
     return true;
   }

  private:
   struct Header {
     tuple_id num_tuples;
     tuple_id max_tid;
     std::uint32_t variable_length_bytes_allocated;
     bool variable_length_storage_compact;
   };

   inline bool spaceToInsert(const tuple_id position,
                             const std::size_t variable_length_bytes) const {
     return (header_->max_tid + 1) * tuple_slot_bytes_                // Already allocated slots
            + (position == header_->max_tid + 1) * tuple_slot_bytes_  // An extra slot if needed
            + header_->variable_length_bytes_allocated                // Already allocated variable storage.
            + variable_length_bytes                                   // Extra needed variable storage.
          <= tuple_storage_bytes_;
   }

   template <bool nullable_attrs, bool variable_length_attrs>
   InsertResult insertTupleImpl(const Tuple &tuple);

   template<bool copy_nulls, bool copy_varlen, bool fill_to_capacity>
   tuple_id bulkInsertPartialTuplesImpl(
       const splitrow_internal::CopyGroupList &copy_groups,
       ValueAccessor *accessor,
       std::size_t max_num_tuples_to_insert);

   tuple_id bulkInsertDispatcher(
       const std::vector<attribute_id> &attribute_map,
       ValueAccessor *accessor,
       tuple_id max_num_tuples_to_insert,
       bool finalize);

   void getCopyGroupsForAttributeMap(
       const std::vector<attribute_id> &attribute_map,
       splitrow_internal::CopyGroupList *copy_groups);

   std::size_t getInsertLowerBound() const;

   // When varlen attributes are bulk inserted, the difference between the maximum
   // possible size and the actual size of the tuples will cause an underestimate of
   // the number of tuples we can insert. This threshold puts a limit on the number
   // of tuples to attempt to insert. A smaller number will give more rounds of insertion
   // and a more-packed block, but at the cost of insertion speed.
   std::size_t getInsertLowerBoundThreshold() const {
     return 10;
   }

   Header *header_;

   std::unique_ptr<BitVector<false>> occupancy_bitmap_;
   std::size_t occupancy_bitmap_bytes_;

   void *tuple_storage_;
   std::size_t tuple_storage_bytes_;
   std::size_t tuple_slot_bytes_;
   std::vector<std::size_t> fixed_len_attr_sizes_;

   std::size_t num_null_attrs_;
   std::size_t num_fixed_attrs_;
   std::size_t num_var_attrs_;

   std::size_t per_tuple_null_bitmap_bytes_;

   friend class SplitRowStoreTupleStorageSubBlockTest;
   friend class SplitRowStoreValueAccessor;
   FRIEND_TEST(SplitRowStoreTupleStorageSubBlockTest, InitializeTest);
   FRIEND_TEST(SplitRowStoreTupleStorageSubBlockTest, GetCopyGroupsForAttributeMapTest);

   DISALLOW_COPY_AND_ASSIGN(SplitRowStoreTupleStorageSubBlock);
 };

 /** @} */

 }  // namespace quickstep

 #endif  // QUICKSTEP_STORAGE_SPLIT_ROW_STORE_TUPLE_STORAGE_SUB_BLOCK_HPP_
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	**/

	#ifndef QUICKSTEP_STORAGE_SPLIT_ROW_STORE_TUPLE_STORAGE_SUB_BLOCK_HPP_
	#define QUICKSTEP_STORAGE_SPLIT_ROW_STORE_TUPLE_STORAGE_SUB_BLOCK_HPP_

	#include <cstddef>
	#include <cstdint>
	#include <memory>
	#include <unordered_map>
	#include <utility>
	#include <vector>

	#include "expressions/predicate/PredicateCost.hpp"
	#include "storage/SubBlockTypeRegistryMacros.hpp"
	#include "storage/TupleIdSequence.hpp"
	#include "storage/TupleStorageSubBlock.hpp"
	#include "types/TypedValue.hpp"
	#include "utility/BitVector.hpp"
	#include "utility/Macros.hpp"

	#include "gtest/gtest_prod.h"

	namespace quickstep {

	class ComparisonPredicate;
	class Tuple;
	class TupleStorageSubBlockDescription;
	class ValueAccessor;

	QUICKSTEP_DECLARE_SUB_BLOCK_TYPE_REGISTERED(SplitRowStoreTupleStorageSubBlock);

	namespace splitrow_internal {
	// A CopyGroup contains information about one run of attributes in the source
	// ValueAccessor that can be copied into the output block. The
	// getCopyGroupsForAttributeMap function below takes an attribute map for a source
	// and converts it into a sequence of runs. The goal is to minimize the number
	// of memcpy calls and address calculations that occur during bulk insertion.
	// Contiguous attributes from a rowstore source can be merged into a single copy group.
	//
	// A single ContiguousAttrs CopyGroup consists of contiguous attributes, nullable
	// or not. "Contiguous" here means that their attribute IDs are successive in both
	// the source and destination relations.
	//
	// A NullAttr refers to exactly one nullable attribute. Nullable columns are
	// represented using fixed length inline data as well as a null bitmap.
	// In a particular tuple, if the attribute has a null value, the inline data
	// has no meaning. So it is safe to copy it or not. We use this fact to merge
	// runs together aggressively, i.e., a ContiguousAttrs group may include a
	// nullable attribute. However, we also create a NullableAttr in that case in
	// order to check the null bitmap.
	//
	// A gap is a run of destination (output) attributes that don't come from a
	// particular source. This occurs during bulkInsertPartialTuples. They must be
	// skipped during the insert (not copied over). They are indicated by a
	// kInvalidCatalogId in the attribute map. For efficiency, the gap size
	// is merged into the bytes_to_advance of previous ContiguousAttrs copy group.
	// For gaps at the start of the attribute map, we just create a ContiguousAttrs
	// copy group with 0 bytes to copy and dummy (0) source attribute id.
	//
	// eg. For 4B integer attrs, from a row store source,
	// if the input attribute_map is {-1,0,5,6,7,-1,2,4,9,10,-1}
	// with input/output attributes 4 and 7 being nullable,
	// we will create the following ContiguousAttrs copy groups
	//
	// ----------------------------------------------------
	// \|src_id \|bytes_to_advance \|bytes_to_copy \|
	// \|-------------\|-----------------\|------------------\|
	// \| 0\| 4\| 4\|
	// \| 5\| 4\| 12\|
	// \| 2\| 16\| 4\|
	// \| 4\| 4\| 4\|
	// \| 9\| 4\| 8\|
	// ----------------------------------------------------
	// and two NullableAttrs with src_attr_id set to 4 and 7.
	//
	// In this example, we do 6 memcpy calls and 6 address calculations
	// as well as 2 bitvector lookups for each tuple. A naive copy algorithm
	// would do 11 memcpy calls and address calculations, along with the
	// bitvector lookups, not to mention the schema lookups,
	// all interspersed in a complex loop with lots of branches.
	//
	// If the source was a column store, then we can't merge contiguous
	// attributes (or gaps). So we would have 11 ContigousAttrs copy groups with
	// three of them having bytes_to_copy = 0 (corresponding to the gaps) and
	// the rest having bytes_to_copy = 4.
	//
	// SplitRowStore supports variable length attributes. Since the layout of the
	// tuple is like: [null bitmap][fixed length attributes][variable length offsets]
	// we do all the variable length copies after the fixed length copies.
	//
	struct CopyGroup {
	explicit CopyGroup(const attribute_id source_attr_id)
	: src_attr_id(source_attr_id) {}

	// The id of starting input attribute for run.
	const attribute_id src_attr_id;
	};

	struct ContiguousAttrs : public CopyGroup {
	ContiguousAttrs(const attribute_id source_attr_id,
	const std::size_t bytes_to_copy_in,
	const std::size_t bytes_to_advance_in)
	: CopyGroup(source_attr_id),
	bytes_to_advance(bytes_to_advance_in),
	bytes_to_copy(bytes_to_copy_in) {}

	// Number of bytes to advance destination ptr to get to the location where we
	// copy THIS attribute.
	std::size_t bytes_to_advance;

	// Number of bytes to copy from source.
	std::size_t bytes_to_copy;
	};

	struct VarLenAttr : public CopyGroup {
	VarLenAttr(const attribute_id source_attr_id,
	const attribute_id destination_attr_id,
	const std::size_t bytes_to_advance_in)
	: CopyGroup(source_attr_id),
	dst_attr_id(destination_attr_id),
	bytes_to_advance(bytes_to_advance_in) {}

	const attribute_id dst_attr_id;
	std::size_t bytes_to_advance;
	};

	struct NullableAttr : public CopyGroup {
	NullableAttr(const attribute_id source_attr_id,
	const std::size_t nullable_attr_idx_in)
	: CopyGroup(source_attr_id),
	nullable_attr_idx(nullable_attr_idx_in) {}

	// Index into null bitmap.
	const std::size_t nullable_attr_idx;
	};

	struct CopyGroupList {
	CopyGroupList() {}

	/**
	* @brief Attributes which are exactly sequential are merged to a single copy.
	*/
	void mergeContiguous() {
	if (contiguous_attrs.size() < 2) {
	return;
	}

	int add_to_advance = 0;

	std::vector<ContiguousAttrs> merged_attrs;
	merged_attrs.emplace_back(contiguous_attrs.front());
	for (std::size_t idx = 1; idx < contiguous_attrs.size(); ++idx) {
	const ContiguousAttrs &current_attr = contiguous_attrs[idx];
	const ContiguousAttrs &previous_attr = contiguous_attrs[idx - 1];

	if (previous_attr.src_attr_id + 1 == current_attr.src_attr_id &&
	previous_attr.bytes_to_copy == current_attr.bytes_to_advance) {
	merged_attrs.back().bytes_to_copy += current_attr.bytes_to_copy;
	add_to_advance += current_attr.bytes_to_advance;
	} else {
	merged_attrs.emplace_back(current_attr.src_attr_id,
	current_attr.bytes_to_copy,
	current_attr.bytes_to_advance + add_to_advance);
	add_to_advance = 0;
	}
	}
	contiguous_attrs = std::move(merged_attrs);

	if (varlen_attrs.size() > 0) {
	varlen_attrs[0].bytes_to_advance += add_to_advance;
	}
	}

	std::vector<ContiguousAttrs> contiguous_attrs;
	std::vector<NullableAttr> nullable_attrs;
	std::vector<VarLenAttr> varlen_attrs;
	};

	} // namespace splitrow_internal

	/** \addtogroup Storage
	* @{
	*/

	/**
	* @brief An implementation of TupleStorageSubBlock as a slotted row-store that
	* splits storage between an array of slots for fixed-length attributes
	* at the front of the block and a region of storage for variable-length
	* attributes at the back of the block that both grow towards each
	* other.
	* @note Deletions and updates can cause both the slot array and the
	* variable-length storage region to become fragmented. Otherwise wasted
	* storage can be reclaimed by calling rebuild().
	**/
	class SplitRowStoreTupleStorageSubBlock: public TupleStorageSubBlock {
	static const std::size_t kVarLenSlotSize;

	public:
	SplitRowStoreTupleStorageSubBlock(const CatalogRelationSchema &relation,
	const TupleStorageSubBlockDescription &description,
	const bool new_block,
	void *sub_block_memory,
	const std::size_t sub_block_memory_size);

	~SplitRowStoreTupleStorageSubBlock() override {
	}

	/**
	* @brief Determine whether a TupleStorageSubBlockDescription is valid for
	* this type of TupleStorageSubBlock.
	*
	* @param relation The relation a tuple store described by description would
	* belong to.
	* @param description A description of the parameters for this type of
	* TupleStorageSubBlock, which will be checked for validity.
	* @return Whether description is well-formed and valid for this type of
	* TupleStorageSubBlock belonging to relation (i.e. whether a
	* TupleStorageSubBlock of this type, belonging to relation, can be
	* constructed according to description).
	**/
	static bool DescriptionIsValid(const CatalogRelationSchema &relation,
	const TupleStorageSubBlockDescription &description);

	/**
	* @brief Estimate the average number of bytes (including any applicable
	* overhead) used to store a single tuple in this type of
	* TupleStorageSubBlock. Used by StorageBlockLayout::finalize() to
	* divide block memory amongst sub-blocks.
	* @warning description must be valid. DescriptionIsValid() should be called
	* first if necessary.
	*
	* @param relation The relation tuples belong to.
	* @param description A description of the parameters for this type of
	* TupleStorageSubBlock.
	* @return The average/ammortized number of bytes used to store a single
	* tuple of relation in a TupleStorageSubBlock of this type described
	* by description.
	**/
	static std::size_t EstimateBytesPerTuple(const CatalogRelationSchema &relation,
	const TupleStorageSubBlockDescription &description);

	bool supportsUntypedGetAttributeValue(const attribute_id attr) const override {
	return true;
	}

	bool supportsAdHocInsert() const override {
	return true;
	}

	bool adHocInsertIsEfficient() const override {
	return true;
	}

	TupleStorageSubBlockType getTupleStorageSubBlockType() const override {
	return kSplitRowStore;
	}

	bool isEmpty() const override {
	return header_->num_tuples == 0;
	}

	bool isPacked() const override {
	return header_->num_tuples == header_->max_tid + 1;
	}

	tuple_id getMaxTupleID() const override {
	return header_->max_tid;
	}

	tuple_id numTuples() const override {
	return header_->num_tuples;
	}

	bool hasTupleWithID(const tuple_id tuple) const override {
	return (tuple >= 0)
	&& (static_cast<std::size_t>(tuple) < occupancy_bitmap_->size())
	&& occupancy_bitmap_->getBit(tuple);
	}

	InsertResult insertTuple(const Tuple &tuple) override;

	inline bool insertTupleInBatch(const Tuple &tuple) override {
	const InsertResult result = insertTuple(tuple);
	return (result.inserted_id >= 0);
	}

	tuple_id bulkInsertTuples(ValueAccessor *accessor) override;

	tuple_id bulkInsertTuplesWithRemappedAttributes(
	const std::vector<attribute_id> &attribute_map,
	ValueAccessor *accessor) override;

	tuple_id bulkInsertPartialTuples(
	const std::vector<attribute_id> &attribute_map,
	ValueAccessor *accessor,
	const tuple_id max_num_tuples_to_insert) override;

	void bulkInsertPartialTuplesFinalize(const tuple_id num_tuples_inserted) override;

	const void* getAttributeValue(const tuple_id tuple,
	const attribute_id attr) const override;

	TypedValue getAttributeValueTyped(const tuple_id tuple,
	const attribute_id attr) const override;

	ValueAccessor* createValueAccessor(
	const TupleIdSequence *sequence = nullptr) const override;

	bool canSetAttributeValuesInPlaceTyped(
	const tuple_id tuple,
	const std::unordered_map<attribute_id, TypedValue> &new_values) const override;

	void setAttributeValueInPlaceTyped(const tuple_id tuple,
	const attribute_id attr,
	const TypedValue &value) override;

	bool deleteTuple(const tuple_id tuple) override;

	bool bulkDeleteTuples(TupleIdSequence *tuples) override;

	TupleIdSequence* getExistenceMap() const override {
	return new TupleIdSequence(header_->max_tid + 1, *occupancy_bitmap_);
	}

	OrderedTupleIdSequence* getExistenceList() const override;

	predicate_cost_t estimatePredicateEvaluationCost(
	const ComparisonPredicate &predicate) const override {
	return predicate_cost::kRowScan;
	}

	void rebuild() override;

	bool isInsertOrderPreserving() const override {
	return true;
	}

	private:
	struct Header {
	tuple_id num_tuples;
	tuple_id max_tid;
	std::uint32_t variable_length_bytes_allocated;
	bool variable_length_storage_compact;
	};

	inline bool spaceToInsert(const tuple_id position,
	const std::size_t variable_length_bytes) const {
	return (header_->max_tid + 1) * tuple_slot_bytes_ // Already allocated slots
	+ (position == header_->max_tid + 1) * tuple_slot_bytes_ // An extra slot if needed
	+ header_->variable_length_bytes_allocated // Already allocated variable storage.
	+ variable_length_bytes // Extra needed variable storage.
	<= tuple_storage_bytes_;
	}

	template <bool nullable_attrs, bool variable_length_attrs>
	InsertResult insertTupleImpl(const Tuple &tuple);

	template<bool copy_nulls, bool copy_varlen, bool fill_to_capacity>
	tuple_id bulkInsertPartialTuplesImpl(
	const splitrow_internal::CopyGroupList &copy_groups,
	ValueAccessor *accessor,
	std::size_t max_num_tuples_to_insert);

	tuple_id bulkInsertDispatcher(
	const std::vector<attribute_id> &attribute_map,
	ValueAccessor *accessor,
	tuple_id max_num_tuples_to_insert,
	bool finalize);

	void getCopyGroupsForAttributeMap(
	const std::vector<attribute_id> &attribute_map,
	splitrow_internal::CopyGroupList *copy_groups);

	std::size_t getInsertLowerBound() const;

	// When varlen attributes are bulk inserted, the difference between the maximum
	// possible size and the actual size of the tuples will cause an underestimate of
	// the number of tuples we can insert. This threshold puts a limit on the number
	// of tuples to attempt to insert. A smaller number will give more rounds of insertion
	// and a more-packed block, but at the cost of insertion speed.
	std::size_t getInsertLowerBoundThreshold() const {
	return 10;
	}

	Header *header_;

	std::unique_ptr<BitVector<false>> occupancy_bitmap_;
	std::size_t occupancy_bitmap_bytes_;

	void *tuple_storage_;
	std::size_t tuple_storage_bytes_;
	std::size_t tuple_slot_bytes_;
	std::vector<std::size_t> fixed_len_attr_sizes_;

	std::size_t num_null_attrs_;
	std::size_t num_fixed_attrs_;
	std::size_t num_var_attrs_;

	std::size_t per_tuple_null_bitmap_bytes_;

	friend class SplitRowStoreTupleStorageSubBlockTest;
	friend class SplitRowStoreValueAccessor;
	FRIEND_TEST(SplitRowStoreTupleStorageSubBlockTest, InitializeTest);
	FRIEND_TEST(SplitRowStoreTupleStorageSubBlockTest, GetCopyGroupsForAttributeMapTest);

	DISALLOW_COPY_AND_ASSIGN(SplitRowStoreTupleStorageSubBlock);
	};

	/** @} */

	} // namespace quickstep

	#endif // QUICKSTEP_STORAGE_SPLIT_ROW_STORE_TUPLE_STORAGE_SUB_BLOCK_HPP_