storage/SplitRowStoreTupleStorageSubBlock.cpp - incubator-retired-quickstep - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  **/

 #include "storage/SplitRowStoreTupleStorageSubBlock.hpp"

 #include <cstddef>
 #include <cstdint>
 #include <cstring>
 #include <utility>
 #include <vector>

 #include "catalog/CatalogRelationSchema.hpp"
 #include "storage/SplitRowStoreValueAccessor.hpp"
 #include "storage/StorageBlockLayout.pb.h"
 #include "storage/StorageErrors.hpp"
 #include "storage/SubBlockTypeRegistry.hpp"
 #include "storage/ValueAccessor.hpp"
 #include "storage/ValueAccessorUtil.hpp"
 #include "types/TypedValue.hpp"
 #include "utility/BitVector.hpp"
 #include "utility/Macros.hpp"
 #include "utility/ScopedBuffer.hpp"

 #include "glog/logging.h"

 namespace quickstep {

 QUICKSTEP_REGISTER_TUPLE_STORE(SplitRowStoreTupleStorageSubBlock, SPLIT_ROW_STORE);

 using splitrow_internal::CopyGroupList;
 using splitrow_internal::ContiguousAttrs;
 using splitrow_internal::NullableAttr;
 using splitrow_internal::VarLenAttr;

 const std::size_t SplitRowStoreTupleStorageSubBlock::kVarLenSlotSize = sizeof(std::uint32_t) * 2;

 namespace {

   template<typename ValueAccessorT, bool nullable_attrs>
   inline std::size_t CalculateVariableSize(
     const CatalogRelationSchema &relation,
     const ValueAccessorT &accessor) {
     std::size_t total_size = 0;
     attribute_id accessor_attr_id = 0;
     for (CatalogRelationSchema::const_iterator attr_it = relation.begin();
          attr_it != relation.end();
          ++attr_it, ++accessor_attr_id) {
       if (!attr_it->getType().isVariableLength()) {
         continue;
       }

       TypedValue value(accessor.getTypedValue(accessor_attr_id));
       if (nullable_attrs && value.isNull()) {
         continue;
       }
       total_size += value.getDataSize();
     }
     return total_size;
   }

   template<typename ValueAccessorT, bool nullable_attrs>
   inline std::size_t CalculateVariableSizeWithRemappedAttributes(
     const CatalogRelationSchema &relation,
     const ValueAccessorT &accessor,
     const std::vector<attribute_id> &attribute_map) {
     std::size_t total_size = 0;
     std::vector<attribute_id>::const_iterator attr_map_it = attribute_map.begin();
     for (CatalogRelationSchema::const_iterator attr_it = relation.begin();
          attr_it != relation.end();
          ++attr_it, ++attr_map_it) {
       if (!attr_it->getType().isVariableLength()) {
         continue;
       }

       TypedValue value(accessor.getTypedValue(*attr_map_it));
       if (nullable_attrs && value.isNull()) {
         continue;
       }
       total_size += value.getDataSize();
     }
     return total_size;
   }

 }  // anonymous namespace

 SplitRowStoreTupleStorageSubBlock::SplitRowStoreTupleStorageSubBlock(
     const CatalogRelationSchema &relation,
     const TupleStorageSubBlockDescription &description,
     const bool new_block,
     void *sub_block_memory,
     const std::size_t sub_block_memory_size)
     : TupleStorageSubBlock(relation,
                            description,
                            new_block,
                            sub_block_memory,
                            sub_block_memory_size),
       header_(static_cast<Header*>(sub_block_memory)),
       num_null_attrs_(0),
       num_fixed_attrs_(0),
       num_var_attrs_(0) {
   if (!DescriptionIsValid(relation_, description_)) {
     FATAL_ERROR("Attempted to construct a SplitRowStoreTupleStorageSubBlock from an invalid description.");
   }

   if (sub_block_memory_size < sizeof(Header)) {
     throw BlockMemoryTooSmall("SplitRowStoreTupleStorageSubBlock", sub_block_memory_size);
   }

   // Slot format is a bitmap of null attributes, followed by fixed-length
   // attribute storage, followed by references to variable-length attribute
   // values (each is a pair of uint32_ts indicating the offset in the
   // tuple-storage region and the size of the value).
   per_tuple_null_bitmap_bytes_ = BitVector<true>::BytesNeeded(relation.numNullableAttributes());
   tuple_slot_bytes_ = per_tuple_null_bitmap_bytes_
                       + relation.getFixedByteLength()
                       + relation.numVariableLengthAttributes() * (sizeof(std::uint32_t) * 2);
   if (tuple_slot_bytes_ == 0) {
     LOG(WARNING)
         << "Estimated zero bytes per tuple for relation \"" << relation.getName()
         << "\" (relation_id: " << relation.getID()
         << "). Adjusting to 1 byte.";
     tuple_slot_bytes_ = 1;
   }

   // Size the occupancy bitmap by calculating the maximum tuples that can fit
   // assuming the bare-minimum per tuple storage is used (no variable-length
   // storage used for nullable variable-length attributes, otherwise the
   // minimum possible.
   //
   // TODO(chasseur): This will slightly over-size the occupancy bitmap, since
   // it doesn't take into account bytes used to store the bitmap itself. Adding
   // some generic size-estimation functions to BitVector might be a good idea.
   std::size_t max_num_tuples
       = (sub_block_memory_size_ - sizeof(Header))
         / (tuple_slot_bytes_ + relation_.getMinimumVariableByteLengthExcludingNullable());

   occupancy_bitmap_bytes_ = BitVector<false>::BytesNeeded(max_num_tuples);
   if (sub_block_memory_size < occupancy_bitmap_bytes_ + sizeof(Header)) {
     throw BlockMemoryTooSmall("SplitRowStoreTupleStorageSubBlock", sub_block_memory_size);
   }
   occupancy_bitmap_.reset(
       new BitVector<false>(static_cast<char*>(sub_block_memory_) + sizeof(Header),
                            max_num_tuples));

   tuple_storage_ = static_cast<char*>(sub_block_memory_)
                    + sizeof(Header) + occupancy_bitmap_bytes_;
   tuple_storage_bytes_ = sub_block_memory_size_ - (sizeof(Header) + occupancy_bitmap_bytes_);

   // Some accounting information for bulk inserts.
   for (attribute_id attr_id = 0;
        attr_id < static_cast<attribute_id>(relation.size());
        ++attr_id) {
     const Type& attr_type = relation.getAttributeById(attr_id)->getType();
     if (attr_type.isVariableLength()) {
       fixed_len_attr_sizes_.push_back(kInvalidAttributeID);
       num_var_attrs_++;
     } else {
       fixed_len_attr_sizes_.push_back(attr_type.maximumByteLength());
       num_fixed_attrs_++;
     }
     num_null_attrs_ += attr_type.isNullable();
   }

   if (new_block) {
     // Only need to initialize these fields, the rest of the block will be
     // zeroed-out by the StorageManager.
     header_->max_tid = -1;
     header_->variable_length_storage_compact = true;
   }
 }

 bool SplitRowStoreTupleStorageSubBlock::DescriptionIsValid(
     const CatalogRelationSchema &relation,
     const TupleStorageSubBlockDescription &description) {
   // Make sure description is initialized and specifies SplitRowStore.
   if (!description.IsInitialized()) {
     return false;
   }
   if (description.sub_block_type() != TupleStorageSubBlockDescription::SPLIT_ROW_STORE) {
     return false;
   }

   return true;
 }

 std::size_t SplitRowStoreTupleStorageSubBlock::EstimateBytesPerTuple(
     const CatalogRelationSchema &relation,
     const TupleStorageSubBlockDescription &description) {
   DEBUG_ASSERT(DescriptionIsValid(relation, description));

   return relation.getFixedByteLength()                                           // Fixed-length attrs
          + BitVector<true>::BytesNeeded(relation.numNullableAttributes())        // Null bitmap
          + relation.numVariableLengthAttributes() * (sizeof(std::uint32_t) * 2)  // Variable-length ref
          + relation.getEstimatedVariableByteLength();                            // Variable-length attrs
 }

 TupleStorageSubBlock::InsertResult SplitRowStoreTupleStorageSubBlock::insertTuple(
     const Tuple &tuple) {
   if (relation_.hasNullableAttributes()) {
     if (relation_.isVariableLength()) {
       return insertTupleImpl<true, true>(tuple);
     } else {
       return insertTupleImpl<true, false>(tuple);
     }
   } else {
     if (relation_.isVariableLength()) {
       return insertTupleImpl<false, true>(tuple);
     } else {
       return insertTupleImpl<false, false>(tuple);
     }
   }
 }

 tuple_id SplitRowStoreTupleStorageSubBlock::bulkInsertTuples(ValueAccessor *accessor) {
   std::vector<attribute_id> simple_remap;
   for (attribute_id attr_id = 0;
       attr_id < static_cast<attribute_id>(relation_.size());
       ++attr_id) {
     simple_remap.push_back(attr_id);
   }
   return bulkInsertDispatcher(simple_remap, accessor, kCatalogMaxID, true);
 }

 tuple_id SplitRowStoreTupleStorageSubBlock::bulkInsertPartialTuples(
   const std::vector<attribute_id> &attribute_map,
   ValueAccessor *accessor,
   const tuple_id max_num_tuples_to_insert) {
   return bulkInsertDispatcher(attribute_map, accessor, max_num_tuples_to_insert, false);
 }

 tuple_id SplitRowStoreTupleStorageSubBlock::bulkInsertDispatcher(
   const std::vector<attribute_id> &attribute_map,
   ValueAccessor *accessor,
   tuple_id max_num_tuples_to_insert,
   bool finalize) {
   const bool fill_to_capacity = max_num_tuples_to_insert == kCatalogMaxID;

   CopyGroupList copy_groups;
   getCopyGroupsForAttributeMap(attribute_map, &copy_groups);
   if (accessor->getImplementationType() == ValueAccessor::Implementation::kSplitRowStore) {
     copy_groups.mergeContiguous();
   }

   const bool copy_nulls = copy_groups.nullable_attrs.size() > 0;
   const bool copy_varlen = copy_groups.varlen_attrs.size() > 0;

   if (fill_to_capacity) {
     if (relation_.hasNullableAttributes()) {
       // TODO(marc) This is an annoying gotcha: the insertion loop assumes the null
       // bitmaps are zero'd for a fresh insert. We could clear the bit map on each tuple
       // iteration, but that'd be costlier.
       std::int64_t remaining_bytes = tuple_storage_bytes_ -
                                      (header_->variable_length_bytes_allocated +
                                       (header_->num_tuples * tuple_slot_bytes_));
       memset(static_cast<char *>(tuple_storage_) + header_->num_tuples * tuple_slot_bytes_, 0x0, remaining_bytes);
     }
   }

   tuple_id num_inserted = 0;
   if (max_num_tuples_to_insert == kCatalogMaxID) {
     max_num_tuples_to_insert = getInsertLowerBound();
   }
   if (copy_varlen) {
     if (copy_nulls) {
       if (fill_to_capacity) {
         num_inserted = bulkInsertPartialTuplesImpl<true, true, true>(copy_groups, accessor,
                                                                      max_num_tuples_to_insert);
       } else {
         num_inserted = bulkInsertPartialTuplesImpl<true, true, false>(copy_groups, accessor,
                                                                       max_num_tuples_to_insert);
       }
     } else {
       if (fill_to_capacity) {
         num_inserted = bulkInsertPartialTuplesImpl<false, true, true>(copy_groups, accessor,
                                                                       max_num_tuples_to_insert);
       } else {
         num_inserted = bulkInsertPartialTuplesImpl<false, true, false>(copy_groups, accessor,
                                                                        max_num_tuples_to_insert);
       }
     }
   } else {
     if (copy_nulls) {
       num_inserted = bulkInsertPartialTuplesImpl<true, false, false>(copy_groups, accessor, max_num_tuples_to_insert);
     } else {
       num_inserted = bulkInsertPartialTuplesImpl<false, false, false>(copy_groups, accessor, max_num_tuples_to_insert);
     }
   }

   if (finalize) {
     bulkInsertPartialTuplesFinalize(num_inserted);
   }
   return num_inserted;
 }

 // copy_nulls is true if the incoming attributes include at least one nullable attribute
 // copy_varlen is true if the incoming attributes include at least one varlen attribute
 template<bool copy_nulls, bool copy_varlen, bool fill_to_capacity>
 tuple_id SplitRowStoreTupleStorageSubBlock::bulkInsertPartialTuplesImpl(
   const CopyGroupList &copy_groups,
   ValueAccessor *accessor,
   std::size_t max_num_tuples_to_insert) {
   std::size_t num_tuples_inserted = 0;

   // We only append to the end of the block to cut down on complexity.
   char *tuple_slot = static_cast<char *>(tuple_storage_) + header_->num_tuples * tuple_slot_bytes_;

   std::uint32_t varlen_heap_offset = tuple_storage_bytes_ - header_->variable_length_bytes_allocated;
   std::uint32_t varlen_heap_offset_orig = varlen_heap_offset;

   std::size_t storage_available = tuple_storage_bytes_ -
                                     (header_->variable_length_bytes_allocated +
                                      header_->num_tuples * tuple_slot_bytes_);

   const std::vector<ContiguousAttrs> &contiguous_attrs = copy_groups.contiguous_attrs;
   const std::vector<VarLenAttr> &varlen_attrs = copy_groups.varlen_attrs;
   const std::vector<NullableAttr> &nullable_attrs = copy_groups.nullable_attrs;

   // The number of bytes that must be reserved per tuple inserted due to gaps.
   std::size_t varlen_reserve = relation_.getMaximumVariableByteLength();
   if (fill_to_capacity) {
     for (std::size_t vattr_idx = 0; vattr_idx < varlen_attrs.size(); vattr_idx++) {
       varlen_reserve -= relation_.getAttributeById(
           varlen_attrs[vattr_idx].dst_attr_id)->getType().maximumByteLength();
     }
     DCHECK_GE(relation_.getMaximumVariableByteLength(), varlen_reserve);
   }

   InvokeOnAnyValueAccessor(
       accessor,
       [&](auto *accessor) -> void {  // NOLINT(build/c++11)
     BitVector<true> tuple_null_bitmap(tuple_slot, num_null_attrs_);
     const std::size_t nullmap_size = BitVector<true>::BytesNeeded(num_null_attrs_);

     const std::size_t num_c_attr = contiguous_attrs.size();
     const std::size_t num_n_attr = nullable_attrs.size();
     const std::size_t num_v_attr = varlen_attrs.size();

     do {
       while (num_tuples_inserted < max_num_tuples_to_insert && accessor->next()) {
         char *attr_cursor = tuple_slot + nullmap_size;
         for (std::size_t cattr_idx = 0; cattr_idx < num_c_attr; cattr_idx++) {
           const ContiguousAttrs &cattr = contiguous_attrs[cattr_idx];
           attr_cursor += cattr.bytes_to_advance;
           const void *attr_value =
               accessor->template getUntypedValue<false>(cattr.src_attr_id);
           std::memcpy(attr_cursor, attr_value, cattr.bytes_to_copy);
         }

         if (copy_nulls) {
           tuple_null_bitmap.setMemory(tuple_slot);
           for (std::size_t nattr_idx = 0; nattr_idx < num_n_attr; nattr_idx++) {
             const NullableAttr &nattr = nullable_attrs[nattr_idx];
             const void *attr_value =
                 accessor->template getUntypedValue<true>(nattr.src_attr_id);
             if (attr_value == nullptr) {
               tuple_null_bitmap.setBit(nattr.nullable_attr_idx, true);
             }
           }
         }

         if (copy_varlen) {
           for (std::size_t vattr_idx = 0; vattr_idx < num_v_attr; vattr_idx++) {
             const VarLenAttr &vattr = varlen_attrs[vattr_idx];
             attr_cursor += vattr.bytes_to_advance;
             // Typed value is necessary as we need the length.
             const TypedValue &attr_value =
                 accessor->template getTypedValue(vattr.src_attr_id);
             if (attr_value.isNull()) {
               continue;
             }
             const std::size_t attr_size = attr_value.getDataSize();
             varlen_heap_offset -= attr_size;
             std::memcpy(static_cast<char *>(tuple_storage_) + varlen_heap_offset,
                         attr_value.getDataPtr(),
                         attr_size);
             reinterpret_cast<std::uint32_t *>(attr_cursor)[0] = varlen_heap_offset;
             reinterpret_cast<std::uint32_t *>(attr_cursor)[1] = static_cast<std::uint32_t>(attr_size);
           }
         }
         tuple_slot += tuple_slot_bytes_;
         num_tuples_inserted++;
       }
       if (fill_to_capacity) {
         const std::int64_t remaining_storage_after_inserts =
             storage_available -
                 (num_tuples_inserted * (tuple_slot_bytes_ + varlen_reserve) +
                     (varlen_heap_offset_orig - varlen_heap_offset));
         DCHECK_LE(0, remaining_storage_after_inserts);
         const std::size_t additional_tuples_insert =
           remaining_storage_after_inserts / (tuple_slot_bytes_ + this->relation_.getMaximumByteLength());
         // We want to avoid a situation where we have several short insert iterations
         // near the end of an insertion cycle.
         if (additional_tuples_insert > this->getInsertLowerBoundThreshold()) {
           max_num_tuples_to_insert += additional_tuples_insert;
         }
       }
     } while (fill_to_capacity && !accessor->iterationFinished() &&
              num_tuples_inserted < max_num_tuples_to_insert);
   });

   if (copy_varlen) {
     header_->variable_length_bytes_allocated += (varlen_heap_offset_orig - varlen_heap_offset);
   }

   return num_tuples_inserted;
 }

 void SplitRowStoreTupleStorageSubBlock::bulkInsertPartialTuplesFinalize(
     const tuple_id num_tuples_inserted) {
   occupancy_bitmap_->setBitRange(header_->max_tid + 1, num_tuples_inserted, true);
   header_->num_tuples += num_tuples_inserted;
   header_->max_tid += num_tuples_inserted;
 }

 std::size_t SplitRowStoreTupleStorageSubBlock::getInsertLowerBound() const {
   const std::size_t remaining_storage_bytes = tuple_storage_bytes_ -
                                               (header_->variable_length_bytes_allocated +
                                                ((header_->max_tid + 1) * tuple_slot_bytes_));
   const std::size_t tuple_max_size = tuple_slot_bytes_ + relation_.getMaximumVariableByteLength();
   return remaining_storage_bytes / tuple_max_size;
 }

 tuple_id SplitRowStoreTupleStorageSubBlock::bulkInsertTuplesWithRemappedAttributes(
     const std::vector<attribute_id> &attribute_map,
     ValueAccessor *accessor) {
   DCHECK_EQ(relation_.size(), attribute_map.size());
   return bulkInsertDispatcher(attribute_map, accessor, kCatalogMaxID, true);
 }

 const void* SplitRowStoreTupleStorageSubBlock::getAttributeValue(
     const tuple_id tuple,
     const attribute_id attr) const {
   DEBUG_ASSERT(occupancy_bitmap_->getBit(tuple));
   DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
   const char *tuple_slot = static_cast<const char*>(tuple_storage_)
                            + tuple_slot_bytes_ * tuple;

   const int nullable_idx = relation_.getNullableAttributeIndex(attr);
   if (nullable_idx != -1) {
     // const_cast is safe here. We will only be using read-only methods of
     // BitVector.
     BitVector<true> tuple_null_bitmap(const_cast<void*>(static_cast<const void*>(tuple_slot)),
                                       relation_.numNullableAttributes());
     if (tuple_null_bitmap.getBit(nullable_idx)) {
       return nullptr;
     }
   }

   const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr);
   if (variable_length_idx == -1) {
     // Fixed-length, stored in-line in slot.
     return tuple_slot
            + per_tuple_null_bitmap_bytes_
            + relation_.getFixedLengthAttributeOffset(attr);
   } else {
     // Variable-length, stored at back of block.
     const std::uint32_t pos = *reinterpret_cast<const std::uint32_t*>(
         tuple_slot + per_tuple_null_bitmap_bytes_
                    + relation_.getFixedByteLength()
                    + variable_length_idx * 2 * sizeof(std::uint32_t));
     return static_cast<const char*>(tuple_storage_) + pos;
   }
 }

 TypedValue SplitRowStoreTupleStorageSubBlock::getAttributeValueTyped(
     const tuple_id tuple,
     const attribute_id attr) const {
   DEBUG_ASSERT(occupancy_bitmap_->getBit(tuple));
   DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
   const Type &attr_type = relation_.getAttributeById(attr)->getType();
   const char *tuple_slot = static_cast<const char*>(tuple_storage_)
                            + tuple_slot_bytes_ * tuple;
   const int nullable_idx = relation_.getNullableAttributeIndex(attr);
   if (nullable_idx != -1) {
     // const_cast is safe here. We will only be using read-only methods of
     // BitVector.
     BitVector<true> tuple_null_bitmap(const_cast<void*>(static_cast<const void*>(tuple_slot)),
                                       relation_.numNullableAttributes());
     if (tuple_null_bitmap.getBit(nullable_idx)) {
       return attr_type.makeNullValue();
     }
   }

   const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr);
   if (variable_length_idx == -1) {
     // Fixed-length, stored in-line in slot.
     return attr_type.makeValue(
         tuple_slot + per_tuple_null_bitmap_bytes_ + relation_.getFixedLengthAttributeOffset(attr),
         attr_type.maximumByteLength());
   } else {
     // Variable-length, stored at back of block.
     const std::uint32_t *pos_ptr = reinterpret_cast<const std::uint32_t*>(
         tuple_slot + per_tuple_null_bitmap_bytes_
                    + relation_.getFixedByteLength()
                    + variable_length_idx * 2 * sizeof(std::uint32_t));
     return attr_type.makeValue(static_cast<const char*>(tuple_storage_) + pos_ptr[0],
                                pos_ptr[1]);
   }
 }

 ValueAccessor* SplitRowStoreTupleStorageSubBlock::createValueAccessor(
     const TupleIdSequence *sequence) const {
   SplitRowStoreValueAccessor *base_accessor
       = new SplitRowStoreValueAccessor(relation_,
                                        header_->num_tuples,
                                        header_->max_tid,
                                        *occupancy_bitmap_,
                                        tuple_storage_,
                                        tuple_slot_bytes_,
                                        per_tuple_null_bitmap_bytes_);
   if (sequence == nullptr) {
     return base_accessor;
   } else {
     return new TupleIdSequenceAdapterValueAccessor<SplitRowStoreValueAccessor>(
         base_accessor,
         *sequence);
   }
 }

 bool SplitRowStoreTupleStorageSubBlock::canSetAttributeValuesInPlaceTyped(
     const tuple_id tuple,
     const std::unordered_map<attribute_id, TypedValue> &new_values) const {
   DEBUG_ASSERT(hasTupleWithID(tuple));

   if (!relation_.isVariableLength()) {
     return true;
   }

   const void *tuple_slot = static_cast<const char*>(tuple_storage_) + tuple * tuple_slot_bytes_;
   // const_cast is safe. Only read-only methods of BitVector are used here.
   BitVector<true> tuple_null_bitmap(const_cast<void*>(tuple_slot),
                                     relation_.numNullableAttributes());
   const std::uint32_t *variable_length_info_array = reinterpret_cast<const std::uint32_t*>(
       static_cast<const char*>(tuple_slot)
           + per_tuple_null_bitmap_bytes_
           + relation_.getFixedByteLength());

   std::size_t extra_variable_bytes = 0;
   for (const std::pair<const attribute_id, TypedValue> &update_pair : new_values) {
     int nullable_idx = relation_.getNullableAttributeIndex(update_pair.first);
     bool original_null = false;
     if (nullable_idx != -1) {
       if (update_pair.second.isNull()) {
         // If new attribute value is NULL, no extra storage is needed.
         continue;
       }
       original_null = tuple_null_bitmap.getBit(nullable_idx);
     }
     int variable_idx = relation_.getVariableLengthAttributeIndex(update_pair.first);
     if (variable_idx == -1) {
       // If attribute is fixed-length, it can be overwritten inline in the slot.
       continue;
     }
     const std::size_t update_size = update_pair.second.getDataSize();
     if (original_null
         || (update_size > variable_length_info_array[(variable_idx << 1) + 1])) {
       // If value's size is less than or equal to the existing value, we can
       // overwrite it in place. We only need to allocate more storage if it is
       // larger.
       extra_variable_bytes += update_size;
     }
   }

   if ((header_->max_tid + 1) * tuple_slot_bytes_
           + header_->variable_length_bytes_allocated
           + extra_variable_bytes
       > tuple_storage_bytes_) {
     return false;
   }

   return true;
 }

 void SplitRowStoreTupleStorageSubBlock::setAttributeValueInPlaceTyped(
     const tuple_id tuple,
     const attribute_id attr,
     const TypedValue &value) {
   DEBUG_ASSERT(hasTupleWithID(tuple));
   DEBUG_ASSERT(relation_.hasAttributeWithId(attr));

   void *tuple_slot = static_cast<char*>(tuple_storage_) + tuple * tuple_slot_bytes_;
   const int nullable_idx = relation_.getNullableAttributeIndex(attr);
   bool original_null = false;
   if (nullable_idx != -1) {
     // Set bit in null bitmap.
     BitVector<true> tuple_null_bitmap(tuple_slot, relation_.numNullableAttributes());
     original_null = tuple_null_bitmap.getBit(nullable_idx);
     if (value.isNull()) {
       tuple_null_bitmap.setBit(nullable_idx, true);
       return;
     } else {
       tuple_null_bitmap.setBit(nullable_idx, false);
     }
   }
   DEBUG_ASSERT(!value.isNull());

   const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr);
   if (variable_length_idx == -1) {
     // Copy fixed-length value into inline position in slot.
     value.copyInto(static_cast<char*>(tuple_slot)
                    + per_tuple_null_bitmap_bytes_
                    + relation_.getFixedLengthAttributeOffset(attr));
   } else {
     const std::size_t value_size = value.getDataSize();
     std::uint32_t *variable_length_info_array = reinterpret_cast<std::uint32_t*>(
         static_cast<char*>(tuple_slot)
             + per_tuple_null_bitmap_bytes_
             + relation_.getFixedByteLength());
     if (original_null
         || (value_size > variable_length_info_array[(variable_length_idx << 1) + 1])) {
       // Allocate more space.
       DEBUG_ASSERT((header_->max_tid + 1) * tuple_slot_bytes_
                        + header_->variable_length_bytes_allocated
                        + value_size
                    <= tuple_storage_bytes_);

       header_->variable_length_bytes_allocated += value_size;

       // Point to the newly-allocated location.
       variable_length_info_array[variable_length_idx << 1]
           = tuple_storage_bytes_ - header_->variable_length_bytes_allocated;
       header_->variable_length_storage_compact = false;
     } else if (value_size < variable_length_info_array[(variable_length_idx << 1) + 1]) {
       // If we are overwriting in place and using less than exactly the same
       // number of bytes as the original value, storage is no longer compact.
       header_->variable_length_storage_compact = false;
     }
     // Actually copy the value into variable-length storage.
     value.copyInto(static_cast<char*>(tuple_storage_)
                    + variable_length_info_array[variable_length_idx << 1]);
     // Update the size slot.
     variable_length_info_array[(variable_length_idx << 1) + 1] = value_size;
   }
 }

 bool SplitRowStoreTupleStorageSubBlock::deleteTuple(const tuple_id tuple) {
   DEBUG_ASSERT(hasTupleWithID(tuple));
   occupancy_bitmap_->setBit(tuple, false);
   --(header_->num_tuples);
   if (header_->num_tuples) {
     if (tuple == header_->max_tid) {
       header_->max_tid = occupancy_bitmap_->lastOne();
       // Since 'header_->num_tuples' is nonzero, there should be at least one
       // bit still set in '*occupancy_bitmap_'.
       DEBUG_ASSERT(static_cast<std::size_t>(header_->max_tid) != occupancy_bitmap_->size());
     }
     header_->variable_length_storage_compact = false;
   } else {
     header_->max_tid = -1;
     header_->variable_length_bytes_allocated = 0;
     header_->variable_length_storage_compact = true;
   }

   return false;
 }

 bool SplitRowStoreTupleStorageSubBlock::bulkDeleteTuples(TupleIdSequence *tuples) {
   occupancy_bitmap_->unsetFrom(tuples->getInternalBitVector());
   header_->num_tuples = occupancy_bitmap_->onesCount();
   if (header_->num_tuples) {
     header_->max_tid = occupancy_bitmap_->lastOne();
     header_->variable_length_storage_compact = false;
   } else {
     header_->max_tid = -1;
     header_->variable_length_bytes_allocated = 0;
     header_->variable_length_storage_compact = true;
   }
   return false;
 }

 OrderedTupleIdSequence* SplitRowStoreTupleStorageSubBlock::getExistenceList() const {
   if (isPacked()) {
     return TupleStorageSubBlock::getExistenceList();
   }

   OrderedTupleIdSequence *existence_list = new OrderedTupleIdSequence();
   existence_list->reserve(header_->num_tuples);
   for (std::size_t pos = occupancy_bitmap_->firstOne();
        pos < occupancy_bitmap_->size();
        occupancy_bitmap_->firstOne(pos + 1)) {
     existence_list->push_back(pos);
   }
   return existence_list;
 }

 void SplitRowStoreTupleStorageSubBlock::rebuild() {
   // First, pack all tuple slots in the front of the block.
   if (!isPacked()) {
     // The packing algorithm scans for holes starting at the beginning of the
     // slot array and fills them with tuples taken from the back. This
     // minimizes the total number of moves.
     std::size_t dest = occupancy_bitmap_->firstZero();
     std::size_t src = occupancy_bitmap_->lastOne(header_->max_tid + 1);
     while (dest < static_cast<std::size_t>(header_->num_tuples)) {
       std::memcpy(static_cast<char*>(tuple_storage_) + dest * tuple_slot_bytes_,
                   static_cast<char*>(tuple_storage_) + src * tuple_slot_bytes_,
                   tuple_slot_bytes_);
       dest = occupancy_bitmap_->firstZero(dest + 1);
       src = occupancy_bitmap_->lastOne(src);
     }
     // Update header and reset occupancy bitmap.
     header_->max_tid = header_->num_tuples - 1;
     occupancy_bitmap_->clear();
     occupancy_bitmap_->setBitRange(0, header_->num_tuples, true);
   }

   if (relation_.isVariableLength()
       && !header_->variable_length_storage_compact
       && (header_->variable_length_bytes_allocated != 0)) {
     // Some variable-length storage is used, but it has holes.
     // Allocate a temporary buffer to store packed variable-length values.
     ScopedBuffer packed_buffer(header_->variable_length_bytes_allocated);
     std::size_t buffer_pos = 0;
     for (tuple_id tid = 0; tid < header_->num_tuples; ++tid) {
       void *tuple_slot = static_cast<char*>(tuple_storage_) + tid * tuple_slot_bytes_;
       BitVector<true> tuple_null_bitmap(tuple_slot, relation_.numNullableAttributes());
       std::uint32_t *variable_length_info_array = reinterpret_cast<std::uint32_t*>(
           static_cast<char*>(tuple_slot)
               + per_tuple_null_bitmap_bytes_
               + relation_.getFixedByteLength());

       for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
            attr_it != relation_.end();
            ++attr_it) {
         const int nullable_idx = relation_.getNullableAttributeIndex(attr_it->getID());
         if ((nullable_idx != -1) && tuple_null_bitmap.getBit(nullable_idx)) {
           // Nothing to copy for null values.
           continue;
         }
         const int variable_idx = relation_.getVariableLengthAttributeIndex(attr_it->getID());
         if (variable_idx != -1) {
           // Copy into the temporary buffer.
           std::memcpy(static_cast<char*>(packed_buffer.get()) + buffer_pos,
                       static_cast<char*>(tuple_storage_)
                           + variable_length_info_array[variable_idx << 1],
                       variable_length_info_array[(variable_idx << 1) + 1]);
           // Temporarily reset the offset entry in the slot to the offset in
           // the temp buffer (will be corrected in a second pass below).
           variable_length_info_array[variable_idx << 1] = buffer_pos;
           buffer_pos += variable_length_info_array[(variable_idx << 1) + 1];
         }
       }
     }

     if (buffer_pos != 0) {
       // Copy temporary packing buffer back into block.
       const std::uint32_t variable_start = tuple_storage_bytes_ - buffer_pos;
       std::memcpy(static_cast<char*>(tuple_storage_) + variable_start,
                   packed_buffer.get(),
                   buffer_pos);
       // Correct offsets of variable-length attribute storage to point to the
       // actual location in the block.
       for (tuple_id tid = 0; tid < header_->num_tuples; ++tid) {
         void *tuple_slot = static_cast<char*>(tuple_storage_) + tid * tuple_slot_bytes_;
         std::uint32_t *variable_length_info_array = reinterpret_cast<std::uint32_t*>(
             static_cast<char*>(tuple_slot)
                 + per_tuple_null_bitmap_bytes_
                 + relation_.getFixedByteLength());

         // Note that we skip null checks, as doing arithmetic on meaningless
         // offsets for null values is harmless and avoids a branch.
         for (std::size_t variable_idx = 0;
              variable_idx < relation_.numVariableLengthAttributes();
              ++variable_idx) {
           variable_length_info_array[variable_idx << 1] += variable_start;
         }
       }
     }

     header_->variable_length_bytes_allocated = buffer_pos;
   }

   header_->variable_length_storage_compact = true;
 }

 template <bool nullable_attrs, bool variable_length_attrs>
 TupleStorageSubBlock::InsertResult SplitRowStoreTupleStorageSubBlock::insertTupleImpl(
     const Tuple &tuple) {
   tuple_id pos = isPacked() ? header_->num_tuples
                             : occupancy_bitmap_->firstZero();
   if ((pos + 1) * tuple_slot_bytes_ + header_->variable_length_bytes_allocated > tuple_storage_bytes_) {
     // Early check: if tuple would cause us to run out of space without even
     // counting variable length storage, fail immediately.
     return InsertResult(-1, false);
   }

   std::size_t variable_length_value_pos = 0;

   // Calculate the required variable-length storage.
   if (variable_length_attrs) {
     std::size_t total_variable_length_bytes = 0;
     Tuple::const_iterator value_it = tuple.begin();
     CatalogRelationSchema::const_iterator attr_it = relation_.begin();
     for (; value_it != tuple.end(); ++value_it, ++attr_it) {
       if (nullable_attrs && value_it->isNull()) {
         continue;
       } else if (attr_it->getType().isVariableLength()) {
         total_variable_length_bytes += value_it->getDataSize();
       }
     }

     if (!spaceToInsert(pos, total_variable_length_bytes)) {
       return InsertResult(-1, false);
     }

     // Allocate variable-length storage.
     header_->variable_length_bytes_allocated += total_variable_length_bytes;
     variable_length_value_pos = tuple_storage_bytes_ - header_->variable_length_bytes_allocated;
   }

   // Locate the slot and its sub-structures.
   void *tuple_slot = static_cast<char*>(tuple_storage_) + pos * tuple_slot_bytes_;
   BitVector<true> tuple_null_bitmap(tuple_slot,
                                     relation_.numNullableAttributes());
   tuple_null_bitmap.clear();
   char *fixed_length_attr_storage = static_cast<char*>(tuple_slot) + per_tuple_null_bitmap_bytes_;
   std::uint32_t *variable_length_info_array
       = variable_length_attrs ? reinterpret_cast<std::uint32_t*>(
                                     fixed_length_attr_storage + relation_.getFixedByteLength())
                               : nullptr;

   Tuple::const_iterator value_it = tuple.begin();
   CatalogRelationSchema::const_iterator attr_it = relation_.begin();
   for (; value_it != tuple.end(); ++value_it, ++attr_it) {
     const attribute_id attr_id = attr_it->getID();
     if (nullable_attrs) {
       // Set null bit if needed.
       const int nullable_idx = relation_.getNullableAttributeIndex(attr_id);
       if ((nullable_idx != -1) && value_it->isNull()) {
         tuple_null_bitmap.setBit(nullable_idx, true);
         continue;
       }
     }
     if (variable_length_attrs) {
       const int variable_length_idx = relation_.getVariableLengthAttributeIndex(attr_id);
       if (variable_length_idx != -1) {
         // If attribute is variable-length, point to it and record its size
         // in the slot, then copy it into the variable-length storage region.
         const std::size_t data_size = value_it->getDataSize();

         variable_length_info_array[variable_length_idx << 1] = variable_length_value_pos;
         variable_length_info_array[(variable_length_idx << 1) + 1] = data_size;

         value_it->copyInto(static_cast<char*>(tuple_storage_) + variable_length_value_pos);
         variable_length_value_pos += data_size;
         continue;
       }
     }
     // Not null or variable-length, so copy directly into slot.
     value_it->copyInto(fixed_length_attr_storage + relation_.getFixedLengthAttributeOffset(attr_id));
   }

   ++(header_->num_tuples);
   if (pos > header_->max_tid) {
     header_->max_tid = pos;
   }
   occupancy_bitmap_->setBit(pos, true);

   return InsertResult(pos, false);
 }

 // Copy groups are used by insert algorithms to efficiently copy attributes from a
 // variety of source schemas with some matching attributes in the destination (this) store.
 // SplitRow has 3 distinct zones which define a physical tuple:
 //    [null_bitmap] [fixed_length_zone] [var_len_pairs]
 // When we do our insert algorithm, we first copy over fixed length attributes. Since there
 // can be gaps, and reorderings in the source schema, we need to know:
 //    * Where to copy the src attr into (ie offset from start of fixed_len_zone)
 //    * How many bytes to copy
 //    * Which src attr we are copying
 // When copying fixed length attributes, we calculate the offset into our tuple, do a memcpy for
 // the length of the data with the src attribute.
 //
 // Copying variable length attributes pairs is similar. Note that there is a heap at the end of
 // the SplitRow for actual data and the tuple contains pairs of (heap offset, length). Having to
 // copy varlen into the heap is the main difference from copying fixed length.
 void SplitRowStoreTupleStorageSubBlock::getCopyGroupsForAttributeMap(
     const std::vector<attribute_id> &attribute_map,
     CopyGroupList *copy_groups) {
   DCHECK_EQ(attribute_map.size(), relation_.size());

   std::size_t contig_adv = 0;
   std::size_t varlen_adv = 0;
   for (std::size_t attr_id = 0; attr_id < attribute_map.size(); ++attr_id) {
     const attribute_id src_attr = attribute_map[attr_id];

     // Attribute doesn't exist in src.
     if (src_attr == kInvalidCatalogId) {
       // create a placeholder for now
       if (relation_.getVariableLengthAttributeIndex(attr_id) == -1) {
         // fixed len
         contig_adv += fixed_len_attr_sizes_[attr_id];
       } else {
         // var len
         varlen_adv += kVarLenSlotSize;
       }
       continue;
     }

     // Attribute exists in src.
     if (relation_.getVariableLengthAttributeIndex(attr_id) == -1) {
       // fixed len
       copy_groups->contiguous_attrs.emplace_back(
           src_attr, fixed_len_attr_sizes_[attr_id], contig_adv);
       contig_adv = fixed_len_attr_sizes_[attr_id];
     } else {
       // var len
       copy_groups->varlen_attrs.emplace_back(src_attr, attr_id, varlen_adv);
       varlen_adv = SplitRowStoreTupleStorageSubBlock::kVarLenSlotSize;
     }

     if (relation_.getNullableAttributeIndex(attr_id) != -1) {
       copy_groups->nullable_attrs.emplace_back(
           src_attr, relation_.getNullableAttributeIndex(attr_id));
     }
   }
   // This will point us to the beginning of the varlen zone.
   if (copy_groups->varlen_attrs.size() > 0) {
     copy_groups->varlen_attrs[0].bytes_to_advance += contig_adv;
   }
 }

 }  // namespace quickstep