blob: 34a1620102d6442e49133df50e0a6f3fad2b6e0f [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#include "storage/BasicColumnStoreTupleStorageSubBlock.hpp"
#include <cstddef>
#include <cstring>
#include <unordered_map>
#include <utility>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogRelationSchema.hpp"
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/ComparisonPredicate.hpp"
#include "expressions/predicate/Predicate.hpp"
#include "expressions/predicate/PredicateCost.hpp"
#include "expressions/scalar/Scalar.hpp"
#include "storage/BasicColumnStoreValueAccessor.hpp"
#include "storage/ColumnStoreUtil.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/StorageBlockLayout.pb.h"
#include "storage/StorageErrors.hpp"
#include "storage/SubBlockTypeRegistry.hpp"
#include "storage/TupleIdSequence.hpp"
#include "storage/ValueAccessor.hpp"
#include "storage/ValueAccessorUtil.hpp"
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "types/containers/Tuple.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
#include "types/operations/comparisons/ComparisonUtil.hpp"
#include "utility/BitVector.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrMap.hpp"
#include "utility/PtrVector.hpp"
#include "utility/ScopedBuffer.hpp"
#include "glog/logging.h"
using std::memcpy;
using std::memmove;
using std::size_t;
using std::vector;
using quickstep::column_store_util::ColumnStripeIterator;
using quickstep::column_store_util::SortColumnPredicateEvaluator;
namespace quickstep {
QUICKSTEP_REGISTER_TUPLE_STORE(BasicColumnStoreTupleStorageSubBlock, BASIC_COLUMN_STORE);
// Hide this helper in an anonymous namespace:
namespace {
class SortColumnValueReference {
public:
SortColumnValueReference(const void *value, const tuple_id tuple)
: value_(value), tuple_(tuple) {
}
explicit SortColumnValueReference(const void *value)
: value_(value), tuple_(-1) {
}
inline const void* getDataPtr() const {
return value_;
}
inline tuple_id getTupleID() const {
return tuple_;
}
private:
const void *value_;
tuple_id tuple_;
};
} // anonymous namespace
BasicColumnStoreTupleStorageSubBlock::BasicColumnStoreTupleStorageSubBlock(
const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description,
const bool new_block,
void *sub_block_memory,
const std::size_t sub_block_memory_size)
: TupleStorageSubBlock(relation,
description,
new_block,
sub_block_memory,
sub_block_memory_size),
sort_specified_(description_.HasExtension(
BasicColumnStoreTupleStorageSubBlockDescription::sort_attribute_id)),
sorted_(true),
header_(static_cast<BasicColumnStoreHeader*>(sub_block_memory)) {
if (!DescriptionIsValid(relation_, description_)) {
FATAL_ERROR("Attempted to construct a BasicColumnStoreTupleStorageSubBlock from an invalid description.");
}
if (sort_specified_) {
sort_column_id_ = description_.GetExtension(
BasicColumnStoreTupleStorageSubBlockDescription::sort_attribute_id);
sort_column_type_ = &(relation_.getAttributeById(sort_column_id_)->getType());
}
if (sub_block_memory_size < sizeof(BasicColumnStoreHeader)) {
throw BlockMemoryTooSmall("BasicColumnStoreTupleStorageSubBlock", sub_block_memory_size_);
}
// Determine the amount of tuples this sub-block can hold. Compute on the
// order of bits to account for null bitmap storage.
max_tuples_ = ((sub_block_memory_size_ - sizeof(BasicColumnStoreHeader)) << 3)
/ ((relation_.getFixedByteLength() << 3) + relation_.numNullableAttributes());
if (max_tuples_ == 0) {
throw BlockMemoryTooSmall("BasicColumnStoreTupleStorageSubBlock", sub_block_memory_size_);
}
// BitVector's storage requirements "round up" to sizeof(size_t), so now redo
// the calculation accurately.
std::size_t bitmap_required_bytes = BitVector<false>::BytesNeeded(max_tuples_);
max_tuples_ = (sub_block_memory_size_
- sizeof(BasicColumnStoreHeader)
- (relation_.numNullableAttributes() * bitmap_required_bytes))
/ relation_.getFixedByteLength();
if (max_tuples_ == 0) {
throw BlockMemoryTooSmall("BasicColumnStoreTupleStorageSubBlock", sub_block_memory_size_);
}
bitmap_required_bytes = BitVector<false>::BytesNeeded(max_tuples_);
// Allocate memory for this sub-block's structures, starting with the header.
char *memory_location = static_cast<char*>(sub_block_memory_) + sizeof(BasicColumnStoreHeader);
// Per-column NULL bitmaps.
for (attribute_id attr_id = 0;
attr_id <= relation_.getMaxAttributeId();
++attr_id) {
if (relation_.hasAttributeWithId(attr_id)
&& relation_.getAttributeById(attr_id)->getType().isNullable()) {
column_null_bitmaps_.push_back(
new BitVector<false>(memory_location, max_tuples_));
if (new_block) {
column_null_bitmaps_.back().clear();
}
memory_location += bitmap_required_bytes;
} else {
column_null_bitmaps_.push_back(nullptr);
}
}
// Column stripes.
column_stripes_.resize(relation_.getMaxAttributeId() + 1, nullptr);
for (const CatalogAttribute &attr : relation_) {
column_stripes_[attr.getID()] = memory_location;
memory_location += max_tuples_ * attr.getType().maximumByteLength();
}
DEBUG_ASSERT(memory_location
<= static_cast<const char*>(sub_block_memory_) + sub_block_memory_size_);
if (new_block) {
header_->num_tuples = 0;
header_->nulls_in_sort_column = 0;
}
}
bool BasicColumnStoreTupleStorageSubBlock::DescriptionIsValid(
const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description) {
// Make sure description is initialized and specifies BasicColumnStore.
if (!description.IsInitialized()) {
return false;
}
if (description.sub_block_type() != TupleStorageSubBlockDescription::BASIC_COLUMN_STORE) {
return false;
}
// Make sure relation is not variable-length.
if (relation.isVariableLength()) {
return false;
}
// If a sort attribute is specified, check that it exists and can be ordered
// by LessComparison.
if (description.HasExtension(
BasicColumnStoreTupleStorageSubBlockDescription::sort_attribute_id)) {
const attribute_id sort_attribute_id = description.GetExtension(
BasicColumnStoreTupleStorageSubBlockDescription::sort_attribute_id);
if (!relation.hasAttributeWithId(sort_attribute_id)) {
return false;
}
const Type &sort_attribute_type =
relation.getAttributeById(sort_attribute_id)->getType();
if (!ComparisonFactory::GetComparison(ComparisonID::kLess).canCompareTypes(
sort_attribute_type, sort_attribute_type)) {
return false;
}
}
return true;
}
std::size_t BasicColumnStoreTupleStorageSubBlock::EstimateBytesPerTuple(
const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description) {
DEBUG_ASSERT(DescriptionIsValid(relation, description));
// NOTE(chasseur): We round-up the number of bytes needed in the NULL bitmaps
// to avoid estimating 0 bytes needed for a relation with less than 8
// attributes which are all NullType.
return relation.getFixedByteLength()
+ ((relation.numNullableAttributes() + 7) >> 3);
}
TupleStorageSubBlock::InsertResult BasicColumnStoreTupleStorageSubBlock::insertTuple(
const Tuple &tuple) {
#ifdef QUICKSTEP_DEBUG
paranoidInsertTypeCheck(tuple);
#endif
if (!hasSpaceToInsert(1)) {
return InsertResult(-1, false);
}
tuple_id insert_position = header_->num_tuples;
// If this column store is unsorted, or the value of the sort column is NULL,
// skip the search and put the new tuple at the end of everything else.
if (sort_specified_ && !tuple.getAttributeValue(sort_column_id_).isNull()) {
// Binary search for the appropriate insert location.
ColumnStripeIterator begin_it(column_stripes_[sort_column_id_],
relation_.getAttributeById(sort_column_id_)->getType().maximumByteLength(),
0);
ColumnStripeIterator end_it(column_stripes_[sort_column_id_],
relation_.getAttributeById(sort_column_id_)->getType().maximumByteLength(),
header_->num_tuples - header_->nulls_in_sort_column);
insert_position = GetBoundForUntypedValue<ColumnStripeIterator, UpperBoundFunctor>(
*sort_column_type_,
begin_it,
end_it,
tuple.getAttributeValue(sort_column_id_).getDataPtr()).getTuplePosition();
}
InsertResult retval(insert_position, insert_position != header_->num_tuples);
insertTupleAtPosition(tuple, insert_position);
return retval;
}
bool BasicColumnStoreTupleStorageSubBlock::insertTupleInBatch(const Tuple &tuple) {
#ifdef QUICKSTEP_DEBUG
paranoidInsertTypeCheck(tuple);
#endif
if (!hasSpaceToInsert(1)) {
return false;
}
insertTupleAtPosition(tuple, header_->num_tuples);
sorted_ = false;
return true;
}
tuple_id BasicColumnStoreTupleStorageSubBlock::bulkInsertTuples(ValueAccessor *accessor) {
const tuple_id original_num_tuples = header_->num_tuples;
InvokeOnAnyValueAccessor(
accessor,
[&](auto *accessor) -> void { // NOLINT(build/c++11)
if (relation_.hasNullableAttributes()) {
while (this->hasSpaceToInsert(1) && accessor->next()) {
attribute_id accessor_attr_id = 0;
// TODO(chasseur): Column-wise copy is probably preferable to
// tuple-at-a-time, but will require some API changes.
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
const attribute_id attr_id = attr_it->getID();
const std::size_t attr_size = attr_it->getType().maximumByteLength();
if (attr_it->getType().isNullable()) {
const void *attr_value
= accessor->template getUntypedValue<true>(accessor_attr_id);
if (attr_value == nullptr) {
column_null_bitmaps_[attr_id].setBit(header_->num_tuples, true);
} else {
memcpy(static_cast<char*>(column_stripes_[attr_id])
+ header_->num_tuples * attr_size,
attr_value,
attr_size);
}
} else {
memcpy(static_cast<char*>(column_stripes_[attr_id])
+ header_->num_tuples * attr_size,
accessor->template getUntypedValue<false>(accessor_attr_id),
attr_size);
}
++accessor_attr_id;
}
++(header_->num_tuples);
}
} else {
while (this->hasSpaceToInsert(1) && accessor->next()) {
attribute_id accessor_attr_id = 0;
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
const std::size_t attr_size = attr_it->getType().maximumByteLength();
memcpy(static_cast<char*>(column_stripes_[attr_it->getID()])
+ header_->num_tuples * attr_size,
accessor->template getUntypedValue<false>(accessor_attr_id),
attr_size);
++accessor_attr_id;
}
++(header_->num_tuples);
}
}
});
const tuple_id num_inserted = header_->num_tuples - original_num_tuples;
sorted_ = sorted_ && (num_inserted == 0);
return num_inserted;
}
tuple_id BasicColumnStoreTupleStorageSubBlock::bulkInsertTuplesWithRemappedAttributes(
const std::vector<attribute_id> &attribute_map,
ValueAccessor *accessor) {
DEBUG_ASSERT(attribute_map.size() == relation_.size());
const tuple_id original_num_tuples = header_->num_tuples;
InvokeOnAnyValueAccessor(
accessor,
[&](auto *accessor) -> void { // NOLINT(build/c++11)
if (relation_.hasNullableAttributes()) {
while (this->hasSpaceToInsert(1) && accessor->next()) {
std::vector<attribute_id>::const_iterator attribute_map_it = attribute_map.begin();
// TODO(chasseur): Column-wise copy is probably preferable to
// tuple-at-a-time, but will require some API changes.
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
const attribute_id attr_id = attr_it->getID();
const std::size_t attr_size = attr_it->getType().maximumByteLength();
if (attr_it->getType().isNullable()) {
const void *attr_value
= accessor->template getUntypedValue<true>(*attribute_map_it);
if (attr_value == nullptr) {
column_null_bitmaps_[attr_id].setBit(header_->num_tuples, true);
} else {
memcpy(static_cast<char*>(column_stripes_[attr_id])
+ header_->num_tuples * attr_size,
attr_value,
attr_size);
}
} else {
memcpy(static_cast<char*>(column_stripes_[attr_id])
+ header_->num_tuples * attr_size,
accessor->template getUntypedValue<false>(*attribute_map_it),
attr_size);
}
++attribute_map_it;
}
++(header_->num_tuples);
}
} else {
while (this->hasSpaceToInsert(1) && accessor->next()) {
std::vector<attribute_id>::const_iterator attribute_map_it = attribute_map.begin();
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
const std::size_t attr_size = attr_it->getType().maximumByteLength();
memcpy(static_cast<char*>(column_stripes_[attr_it->getID()])
+ header_->num_tuples * attr_size,
accessor->template getUntypedValue<false>(*attribute_map_it),
attr_size);
++attribute_map_it;
}
++(header_->num_tuples);
}
}
});
const tuple_id num_inserted = header_->num_tuples - original_num_tuples;
sorted_ = sorted_ && (num_inserted == 0);
return num_inserted;
}
const void* BasicColumnStoreTupleStorageSubBlock::getAttributeValue(
const tuple_id tuple,
const attribute_id attr) const {
DEBUG_ASSERT(hasTupleWithID(tuple));
DEBUG_ASSERT(relation_.hasAttributeWithId(attr));
if ((!column_null_bitmaps_.elementIsNull(attr))
&& column_null_bitmaps_[attr].getBit(tuple)) {
return nullptr;
}
return static_cast<const char*>(column_stripes_[attr])
+ (tuple * relation_.getAttributeById(attr)->getType().maximumByteLength());
}
TypedValue BasicColumnStoreTupleStorageSubBlock::getAttributeValueTyped(
const tuple_id tuple,
const attribute_id attr) const {
const Type &attr_type = relation_.getAttributeById(attr)->getType();
const void *untyped_value = getAttributeValue(tuple, attr);
return (untyped_value == nullptr)
? attr_type.makeNullValue()
: attr_type.makeValue(untyped_value, attr_type.maximumByteLength());
}
ValueAccessor* BasicColumnStoreTupleStorageSubBlock::createValueAccessor(
const TupleIdSequence *sequence) const {
BasicColumnStoreValueAccessor *base_accessor
= new BasicColumnStoreValueAccessor(relation_,
relation_,
header_->num_tuples,
column_stripes_,
column_null_bitmaps_);
if (sequence == nullptr) {
return base_accessor;
} else {
return new TupleIdSequenceAdapterValueAccessor<BasicColumnStoreValueAccessor>(
base_accessor,
*sequence);
}
}
bool BasicColumnStoreTupleStorageSubBlock::canSetAttributeValuesInPlaceTyped(
const tuple_id tuple,
const std::unordered_map<attribute_id, TypedValue> &new_values) const {
DEBUG_ASSERT(hasTupleWithID(tuple));
if (!sort_specified_) {
return true;
}
for (std::unordered_map<attribute_id, TypedValue>::const_iterator it
= new_values.begin();
it != new_values.end();
++it) {
DEBUG_ASSERT(relation_.hasAttributeWithId(it->first));
// TODO(chasseur): Could check if the new value for sort column would
// remain in the same position in the stripe.
if (it->first == sort_column_id_) {
return false;
}
}
return true;
}
void BasicColumnStoreTupleStorageSubBlock::setAttributeValueInPlaceTyped(
const tuple_id tuple,
const attribute_id attr,
const TypedValue &value) {
DCHECK(!sort_specified_ || (attr != sort_column_id_));
const Type &attr_type = relation_.getAttributeById(attr)->getType();
void *value_position = static_cast<char*>(column_stripes_[attr])
+ tuple * attr_type.maximumByteLength();
if (!column_null_bitmaps_.elementIsNull(attr)) {
if (value.isNull()) {
column_null_bitmaps_[attr].setBit(tuple, true);
return;
} else {
column_null_bitmaps_[attr].setBit(tuple, false);
}
}
value.copyInto(value_position);
}
bool BasicColumnStoreTupleStorageSubBlock::deleteTuple(const tuple_id tuple) {
DEBUG_ASSERT(hasTupleWithID(tuple));
if (sort_specified_
&& !column_null_bitmaps_.elementIsNull(sort_column_id_)
&& column_null_bitmaps_[sort_column_id_].getBit(tuple)) {
--(header_->nulls_in_sort_column);
}
if (tuple == header_->num_tuples - 1) {
// If deleting the last tuple, simply truncate.
--(header_->num_tuples);
// Clear any null bits for the tuple.
for (PtrVector<BitVector<false>, true>::iterator it = column_null_bitmaps_.begin();
it != column_null_bitmaps_.end();
++it) {
if (!it.isNull()) {
it->setBit(header_->num_tuples, false);
}
}
return false;
} else {
// Pack each column stripe.
shiftTuples(tuple, tuple + 1, header_->num_tuples - tuple - 1);
shiftNullBitmaps(tuple, -1);
--(header_->num_tuples);
return true;
}
}
bool BasicColumnStoreTupleStorageSubBlock::bulkDeleteTuples(TupleIdSequence *tuples) {
if (tuples->empty()) {
// Nothing to do.
return false;
}
const tuple_id front = tuples->front();
const tuple_id back = tuples->back();
const tuple_id num_tuples = tuples->numTuples();
if ((back == header_->num_tuples - 1)
&& (back - front == num_tuples - 1)) {
// Just truncate the back.
header_->num_tuples = front;
// Clear any null bits.
for (PtrVector<BitVector<false>, true>::iterator it = column_null_bitmaps_.begin();
it != column_null_bitmaps_.end();
++it) {
if (!it.isNull()) {
it->setBitRange(header_->num_tuples, num_tuples, false);
}
}
return false;
}
// Pack the non-deleted tuples.
tuple_id dest_position = front;
tuple_id src_tuple = dest_position;
TupleIdSequence::const_iterator it = tuples->begin();
tuple_id tail_shifted_distance = 0;
for (tuple_id current_id = tuples->front();
current_id < header_->num_tuples;
++current_id, ++src_tuple) {
if (current_id == *it) {
// Don't copy a deleted tuple.
shiftNullBitmaps(*it - tail_shifted_distance, -1);
++tail_shifted_distance;
++it;
if (it == tuples->end()) {
// No more to delete, so copy all the remaining tuples in one go.
shiftTuples(dest_position,
src_tuple + 1,
header_->num_tuples - back - 1);
break;
}
} else {
// Shift the next tuple forward.
shiftTuples(dest_position, src_tuple, 1);
++dest_position;
}
}
header_->num_tuples -= static_cast<tuple_id>(num_tuples);
return true;
}
predicate_cost_t BasicColumnStoreTupleStorageSubBlock::estimatePredicateEvaluationCost(
const ComparisonPredicate &predicate) const {
if (sort_specified_ && predicate.isAttributeLiteralComparisonPredicate()) {
std::pair<bool, attribute_id> comparison_attr = predicate.getAttributeFromAttributeLiteralComparison();
if (comparison_attr.second == sort_column_id_) {
return predicate_cost::kBinarySearch;
}
}
return predicate_cost::kColumnScan;
}
TupleIdSequence* BasicColumnStoreTupleStorageSubBlock::getMatchesForPredicate(
const ComparisonPredicate &predicate,
const TupleIdSequence *filter) const {
DCHECK(sort_specified_) <<
"Called BasicColumnStoreTupleStorageSubBlock::getMatchesForPredicate() "
"for an unsorted column store (predicate should have been evaluated "
"with a scan instead).";
TupleIdSequence *matches = SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn(
predicate,
relation_,
sort_column_id_,
column_stripes_[sort_column_id_],
header_->num_tuples - header_->nulls_in_sort_column);
if (matches == nullptr) {
FATAL_ERROR("Called BasicColumnStoreTupleStorageSubBlock::getMatchesForPredicate() "
"with a predicate that can only be evaluated with a scan.");
} else {
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
}
}
void BasicColumnStoreTupleStorageSubBlock::insertTupleAtPosition(
const Tuple &tuple,
const tuple_id position) {
DEBUG_ASSERT(hasSpaceToInsert(1));
DEBUG_ASSERT(position >= 0);
DEBUG_ASSERT(position < max_tuples_);
if (position != header_->num_tuples) {
// If not inserting in the last position, shift subsequent tuples back.
shiftTuples(position + 1, position, header_->num_tuples - position);
shiftNullBitmaps(position, 1);
}
// Copy attribute values into place in the column stripes.
Tuple::const_iterator value_it = tuple.begin();
CatalogRelationSchema::const_iterator attr_it = relation_.begin();
while (value_it != tuple.end()) {
const attribute_id attr_id = attr_it->getID();
const Type &attr_type = attr_it->getType();
if (value_it->isNull()) {
column_null_bitmaps_[attr_id].setBit(position, true);
if (attr_id == sort_column_id_) {
++(header_->nulls_in_sort_column);
// Copy in a special value that always compares last.
GetLastValueForType(attr_type).copyInto(
static_cast<char*>(column_stripes_[attr_id])
+ position * attr_type.maximumByteLength());
}
} else {
value_it->copyInto(static_cast<char*>(column_stripes_[attr_id])
+ position * attr_type.maximumByteLength());
}
++value_it;
++attr_it;
}
++(header_->num_tuples);
}
void BasicColumnStoreTupleStorageSubBlock::shiftTuples(
const tuple_id dest_position,
const tuple_id src_tuple,
const tuple_id num_tuples) {
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
size_t attr_length = attr_it->getType().maximumByteLength();
memmove(static_cast<char*>(column_stripes_[attr_it->getID()]) + dest_position * attr_length,
static_cast<const char*>(column_stripes_[attr_it->getID()]) + src_tuple * attr_length,
attr_length * num_tuples);
}
}
void BasicColumnStoreTupleStorageSubBlock::shiftNullBitmaps(
const tuple_id from_position,
const tuple_id distance) {
if (relation_.hasNullableAttributes()) {
for (PtrVector<BitVector<false>, true>::size_type idx = 0;
idx < column_null_bitmaps_.size();
++idx) {
if (!column_null_bitmaps_.elementIsNull(idx)) {
if (distance < 0) {
column_null_bitmaps_[idx].shiftTailForward(from_position, -distance);
} else {
column_null_bitmaps_[idx].shiftTailBackward(from_position, distance);
}
}
}
}
}
// TODO(chasseur): This implementation uses out-of-band memory up to the
// total size of tuples contained in this sub-block. It could be done with
// less memory, although the implementation would be more complex.
bool BasicColumnStoreTupleStorageSubBlock::rebuildInternal() {
DCHECK(sort_specified_);
const tuple_id num_tuples = header_->num_tuples;
// Immediately return if 1 or 0 tuples.
if (num_tuples <= 1) {
sorted_ = true;
return false;
}
// Determine the properly-sorted order of tuples.
vector<SortColumnValueReference> sort_column_values;
const char *sort_value_position
= static_cast<const char*>(column_stripes_[sort_column_id_]);
const std::size_t sort_value_size = sort_column_type_->maximumByteLength();
for (tuple_id tid = 0; tid < num_tuples; ++tid) {
// NOTE(chasseur): The insert methods put a special last-comparing
// substitute value for NULLs into the sorted column stripe.
sort_column_values.emplace_back(sort_value_position, tid);
sort_value_position += sort_value_size;
}
SortWrappedValues<SortColumnValueReference, vector<SortColumnValueReference>::iterator>(
*sort_column_type_,
sort_column_values.begin(),
sort_column_values.end());
if (header_->nulls_in_sort_column > 0) {
// If any NULL values exist in the sort column, make sure they are ordered
// after all the "real" values which compare as the same.
vector<SortColumnValueReference>::iterator tail_it =
GetBoundForWrappedValues<SortColumnValueReference,
vector<SortColumnValueReference>::iterator,
LowerBoundFunctor>(
*sort_column_type_,
sort_column_values.begin(),
sort_column_values.end(),
GetLastValueForType(*sort_column_type_).getDataPtr());
if (sort_column_values.end() - tail_it > header_->nulls_in_sort_column) {
vector<SortColumnValueReference> non_nulls;
vector<SortColumnValueReference> true_nulls;
for (vector<SortColumnValueReference>::const_iterator tail_sort_it = tail_it;
tail_sort_it != sort_column_values.end();
++tail_sort_it) {
if (column_null_bitmaps_[sort_column_id_].getBit(tail_sort_it->getTupleID())) {
true_nulls.push_back(*tail_sort_it);
} else {
non_nulls.push_back(*tail_sort_it);
}
}
sort_column_values.erase(tail_it, sort_column_values.end());
sort_column_values.insert(sort_column_values.end(),
non_nulls.begin(),
non_nulls.end());
sort_column_values.insert(sort_column_values.end(),
true_nulls.begin(),
true_nulls.end());
}
}
// If a prefix of the total order of tuples is already sorted, don't bother
// copying it around.
tuple_id ordered_prefix_tuples = 0;
for (vector<SortColumnValueReference>::const_iterator it = sort_column_values.begin();
it != sort_column_values.end();
++it) {
if (it->getTupleID() != ordered_prefix_tuples) {
break;
}
++ordered_prefix_tuples;
}
if (ordered_prefix_tuples == num_tuples) {
// Already sorted.
sorted_ = true;
return false;
}
// Allocate buffers for each resorted column stripe which can hold exactly as
// many values as needed.
PtrVector<ScopedBuffer, true> column_stripe_buffers;
PtrVector<ScopedBuffer, true> null_bitmap_buffers;
PtrVector<BitVector<false>, true> new_null_bitmaps;
const std::size_t bitmap_required_bytes = BitVector<false>::BytesNeeded(max_tuples_);
char *bitmap_location = static_cast<char*>(sub_block_memory_)
+ sizeof(BasicColumnStoreHeader);
for (attribute_id stripe_id = 0; stripe_id <= relation_.getMaxAttributeId(); ++stripe_id) {
if (relation_.hasAttributeWithId(stripe_id)) {
column_stripe_buffers.push_back(
new ScopedBuffer((num_tuples - ordered_prefix_tuples)
* relation_.getAttributeById(stripe_id)->getType().maximumByteLength()));
if (column_null_bitmaps_.elementIsNull(stripe_id)) {
null_bitmap_buffers.push_back(nullptr);
new_null_bitmaps.push_back(nullptr);
} else {
// Make a copy of the null bitmap for this column.
null_bitmap_buffers.push_back(new ScopedBuffer(bitmap_required_bytes));
new_null_bitmaps.push_back(new BitVector<false>(null_bitmap_buffers.back().get(), max_tuples_));
bitmap_location += bitmap_required_bytes;
// Clear out the tail, which will be reorganized.
if (ordered_prefix_tuples > 0) {
memcpy(null_bitmap_buffers.back().get(), bitmap_location, bitmap_required_bytes);
new_null_bitmaps.back().setBitRange(ordered_prefix_tuples,
num_tuples - ordered_prefix_tuples,
false);
} else {
new_null_bitmaps.back().clear();
}
}
} else {
column_stripe_buffers.push_back(nullptr);
null_bitmap_buffers.push_back(nullptr);
new_null_bitmaps.push_back(nullptr);
}
}
// Copy attribute values into the column stripe buffers in properly-sorted
// order.
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
const size_t attr_length = attr_it->getType().maximumByteLength();
const attribute_id attr_id = attr_it->getID();
for (tuple_id unordered_tuple_num = 0;
unordered_tuple_num < num_tuples - ordered_prefix_tuples;
++unordered_tuple_num) {
memcpy(static_cast<char*>(column_stripe_buffers[attr_id].get())
+ unordered_tuple_num * attr_length,
static_cast<char*>(column_stripes_[attr_id]) +
sort_column_values[ordered_prefix_tuples + unordered_tuple_num].getTupleID() * attr_length,
attr_length);
}
if (!new_null_bitmaps.elementIsNull(attr_id)) {
for (tuple_id tuple_num = ordered_prefix_tuples;
tuple_num < num_tuples;
++tuple_num) {
new_null_bitmaps[attr_id].setBit(
tuple_num,
column_null_bitmaps_[attr_id].getBit(sort_column_values[tuple_num].getTupleID()));
}
}
}
// Overwrite the unsorted tails of the column stripes in this block with the
// sorted values from the buffers. Also copy back any null bitmaps.
bitmap_location = static_cast<char*>(sub_block_memory_)
+ sizeof(BasicColumnStoreHeader);
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
size_t attr_length = attr_it->getType().maximumByteLength();
memcpy(static_cast<char*>(column_stripes_[attr_it->getID()]) + ordered_prefix_tuples * attr_length,
column_stripe_buffers[attr_it->getID()].get(),
(num_tuples - ordered_prefix_tuples) * attr_length);
if (!null_bitmap_buffers.elementIsNull(attr_it->getID())) {
std::memcpy(bitmap_location,
null_bitmap_buffers[attr_it->getID()].get(),
bitmap_required_bytes);
bitmap_location += bitmap_required_bytes;
}
}
sorted_ = true;
return true;
}
} // namespace quickstep