blob: 3bd0c3adaf0cd51ded53a76dde21c60223980b3e [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#include "storage/CompressedColumnStoreTupleStorageSubBlock.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstring>
#include <functional>
#include <limits>
#include <unordered_set>
#include <utility>
#include <vector>
#include "catalog/CatalogAttribute.hpp"
#include "catalog/CatalogRelationSchema.hpp"
#include "compression/CompressionDictionaryLite.hpp"
#include "expressions/predicate/ComparisonPredicate.hpp"
#include "expressions/predicate/PredicateCost.hpp"
#include "storage/ColumnStoreUtil.hpp"
#include "storage/CompressedColumnStoreValueAccessor.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/StorageBlockLayout.pb.h"
#include "storage/SubBlockTypeRegistry.hpp"
#include "storage/TupleIdSequence.hpp"
#include "types/Type.hpp"
#include "types/TypedValue.hpp"
#include "types/operations/comparisons/Comparison.hpp"
#include "types/operations/comparisons/ComparisonFactory.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
#include "utility/BitVector.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
using std::equal_to;
using std::greater;
using std::less_equal;
using std::lower_bound;
using std::memmove;
using std::not_equal_to;
using std::numeric_limits;
using std::pair;
using std::size_t;
using std::uint8_t;
using std::uint16_t;
using std::uint32_t;
using quickstep::column_store_util::SortColumnPredicateEvaluator;
namespace quickstep {
QUICKSTEP_REGISTER_TUPLE_STORE(CompressedColumnStoreTupleStorageSubBlock, COMPRESSED_COLUMN_STORE);
CompressedColumnStoreTupleStorageSubBlock::CompressedColumnStoreTupleStorageSubBlock(
const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description,
const bool new_block,
void *sub_block_memory,
const std::size_t sub_block_memory_size)
: CompressedTupleStorageSubBlock(relation,
description,
new_block,
sub_block_memory,
sub_block_memory_size),
uncompressed_nulls_in_sort_column_(0) {
if (!DescriptionIsValid(relation_, description_)) {
FATAL_ERROR("Attempted to construct a CompressedColumnStoreTupleStorageSubBlock "
"from an invalid description.");
}
sort_column_id_ = description_.GetExtension(
CompressedColumnStoreTupleStorageSubBlockDescription::sort_attribute_id);
if ((!new_block) && (*static_cast<tuple_id*>(sub_block_memory_) != 0)) {
initialize();
}
}
bool CompressedColumnStoreTupleStorageSubBlock::DescriptionIsValid(
const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description) {
// Make sure description is initialized and specifies CompressedColumnStore.
if (!description.IsInitialized()) {
return false;
}
if (description.sub_block_type() != TupleStorageSubBlockDescription::COMPRESSED_COLUMN_STORE) {
return false;
}
const Comparison &less_comparison = ComparisonFactory::GetComparison(ComparisonID::kLess);
// Make sure the specified sort attribute exists and can be ordered by
// LessComparison.
if (!description.HasExtension(CompressedColumnStoreTupleStorageSubBlockDescription::sort_attribute_id)) {
return false;
}
attribute_id sort_attribute_id = description.GetExtension(
CompressedColumnStoreTupleStorageSubBlockDescription::sort_attribute_id);
if (!relation.hasAttributeWithId(sort_attribute_id)) {
return false;
}
const Type &sort_attr_type = relation.getAttributeById(sort_attribute_id)->getType();
if (!less_comparison.canCompareTypes(sort_attr_type, sort_attr_type)) {
return false;
}
// Make sure all the specified compressed attributes exist and can be ordered
// by LessComparison.
std::unordered_set<attribute_id> compressed_variable_length_attributes;
for (int compressed_attribute_num = 0;
compressed_attribute_num < description.ExtensionSize(
CompressedColumnStoreTupleStorageSubBlockDescription::compressed_attribute_id);
++compressed_attribute_num) {
attribute_id compressed_attribute_id = description.GetExtension(
CompressedColumnStoreTupleStorageSubBlockDescription::compressed_attribute_id,
compressed_attribute_num);
if (!relation.hasAttributeWithId(compressed_attribute_id)) {
return false;
}
const Type &attr_type = relation.getAttributeById(compressed_attribute_id)->getType();
if (!less_comparison.canCompareTypes(attr_type, attr_type)) {
return false;
}
if (attr_type.isVariableLength()) {
compressed_variable_length_attributes.insert(compressed_attribute_id);
}
}
// If the relation has variable-length attributes, make sure they are all
// compressed.
if (relation.isVariableLength()) {
for (CatalogRelationSchema::const_iterator attr_it = relation.begin();
attr_it != relation.end();
++attr_it) {
if (attr_it->getType().isVariableLength()) {
if (compressed_variable_length_attributes.find(attr_it->getID())
== compressed_variable_length_attributes.end()) {
return false;
}
}
}
}
return true;
}
// TODO(chasseur): Make this heuristic better.
std::size_t CompressedColumnStoreTupleStorageSubBlock::EstimateBytesPerTuple(
const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description) {
DEBUG_ASSERT(DescriptionIsValid(relation, description));
std::unordered_set<attribute_id> compressed_attributes;
for (int compressed_attribute_num = 0;
compressed_attribute_num < description.ExtensionSize(
CompressedColumnStoreTupleStorageSubBlockDescription::compressed_attribute_id);
++compressed_attribute_num) {
compressed_attributes.insert(description.GetExtension(
CompressedColumnStoreTupleStorageSubBlockDescription::compressed_attribute_id,
compressed_attribute_num));
}
size_t total_size = 0;
for (CatalogRelationSchema::const_iterator attr_it = relation.begin();
attr_it != relation.end();
++attr_it) {
if (compressed_attributes.find(attr_it->getID()) == compressed_attributes.end()) {
total_size += attr_it->getType().estimateAverageByteLength();
} else {
// For compressed attributes, estimate 1/3 space.
total_size += attr_it->getType().estimateAverageByteLength() / 3;
}
}
// NOTE(chasseur): We round-up the number of bytes needed in the NULL bitmaps
// to avoid estimating 0 bytes needed for a relation with less than 8
// attributes which are all NullType. We estimate the bytes needed in the
// NULL bitmaps based on the number of nullable attributes in the base
// relation (not only those that are not specified as compressed in
// 'description') because if compression does not actually save any space,
// then such attributes will be stored uncompressed and still require space
// in the NULL bitmaps.
return total_size + ((relation.numNullableAttributes() + 7) >> 3);
}
const void* CompressedColumnStoreTupleStorageSubBlock::getAttributeValue(
const tuple_id tuple,
const attribute_id attr) const {
DEBUG_ASSERT(hasTupleWithID(tuple));
DEBUG_ASSERT(supportsUntypedGetAttributeValue(attr));
if (dictionary_coded_attributes_[attr]) {
return dictionaries_.atUnchecked(attr).getUntypedValueForCode<true>(
compressedGetCode(tuple, attr));
} else {
return getAttributePtr<true>(tuple, attr);
}
}
TypedValue CompressedColumnStoreTupleStorageSubBlock::getAttributeValueTyped(
const tuple_id tuple,
const attribute_id attr) const {
DEBUG_ASSERT(hasTupleWithID(tuple));
if (dictionary_coded_attributes_[attr]) {
return dictionaries_.atUnchecked(attr).getTypedValueForCode(
compressedGetCode(tuple, attr));
} else if (truncated_attributes_[attr]) {
if (truncated_attribute_is_int_[attr]) {
return TypedValue(static_cast<int>(compressedGetCode(tuple, attr)));
} else {
return TypedValue(static_cast<std::int64_t>(compressedGetCode(tuple, attr)));
}
} else {
const Type &attr_type = relation_.getAttributeById(attr)->getType();
const void *untyped_value = getAttributePtr(tuple, attr);
return (untyped_value == nullptr)
? attr_type.makeNullValue()
: attr_type.makeValue(untyped_value, attr_type.maximumByteLength());
}
}
ValueAccessor* CompressedColumnStoreTupleStorageSubBlock::createValueAccessor(
const TupleIdSequence *sequence) const {
CompressedColumnStoreValueAccessor *base_accessor
= new CompressedColumnStoreValueAccessor(relation_,
relation_,
*static_cast<const tuple_id*>(sub_block_memory_),
compression_info_,
dictionary_coded_attributes_,
truncated_attributes_,
truncated_attribute_is_int_,
dictionaries_,
column_stripes_,
uncompressed_column_null_bitmaps_);
if (sequence == nullptr) {
return base_accessor;
} else {
return new TupleIdSequenceAdapterValueAccessor<CompressedColumnStoreValueAccessor>(
base_accessor,
*sequence);
}
}
bool CompressedColumnStoreTupleStorageSubBlock::deleteTuple(const tuple_id tuple) {
DEBUG_ASSERT(hasTupleWithID(tuple));
if (compression_info_.uncompressed_attribute_has_nulls(sort_column_id_)
&& uncompressed_column_null_bitmaps_[sort_column_id_].getBit(tuple)) {
--uncompressed_nulls_in_sort_column_;
}
if (tuple == *static_cast<const tuple_id*>(sub_block_memory_) - 1) {
--(*static_cast<tuple_id*>(sub_block_memory_));
// Clear any null bits for the tuple.
if (compression_info_.null_bitmap_bits() > 0) {
for (PtrVector<BitVector<false>, true>::iterator it
= uncompressed_column_null_bitmaps_.begin();
it != uncompressed_column_null_bitmaps_.end();
++it) {
if (!it.isNull()) {
it->setBit(*static_cast<const tuple_id*>(sub_block_memory_), false);
}
}
}
return false;
} else {
// Shift subsequent tuples forward.
shiftTuples(tuple, tuple + 1, *static_cast<const tuple_id*>(sub_block_memory_) - tuple - 1);
shiftUncompressedNullBitmaps(tuple, -1);
--(*static_cast<tuple_id*>(sub_block_memory_));
return true;
}
}
bool CompressedColumnStoreTupleStorageSubBlock::bulkDeleteTuples(TupleIdSequence *tuples) {
if (tuples->empty()) {
// Nothing to do.
return false;
}
const tuple_id front = tuples->front();
const tuple_id back = tuples->back();
const tuple_id num_tuples = tuples->numTuples();
if ((back == *static_cast<const tuple_id*>(sub_block_memory_) - 1)
&& (back - front == num_tuples - 1)) {
// Just truncate the back.
*static_cast<tuple_id*>(sub_block_memory_) = front;
// Clear any null bits.
if (compression_info_.null_bitmap_bits() > 0) {
for (PtrVector<BitVector<false>, true>::iterator it
= uncompressed_column_null_bitmaps_.begin();
it != uncompressed_column_null_bitmaps_.end();
++it) {
if (!it.isNull()) {
it->setBitRange(*static_cast<const tuple_id*>(sub_block_memory_),
num_tuples,
false);
}
}
}
return false;
}
// Pack the non-deleted tuples.
tuple_id dest_position = front;
tuple_id src_tuple = dest_position;
TupleIdSequence::const_iterator it = tuples->begin();
tuple_id tail_shifted_distance = 0;
for (tuple_id current_id = front;
current_id < *static_cast<const tuple_id*>(sub_block_memory_);
++current_id, ++src_tuple) {
if (current_id == *it) {
// Don't copy a deleted tuple.
shiftUncompressedNullBitmaps(*it - tail_shifted_distance, -1);
++tail_shifted_distance;
++it;
if (it == tuples->end()) {
// No more to delete, so copy all the remaining tuples in one go.
shiftTuples(dest_position,
src_tuple + 1,
*static_cast<const tuple_id*>(sub_block_memory_) - back - 1);
break;
}
} else {
// Shift the next tuple forward.
shiftTuples(dest_position, src_tuple, 1);
++dest_position;
}
}
*static_cast<tuple_id*>(sub_block_memory_) -= num_tuples;
return true;
}
predicate_cost_t CompressedColumnStoreTupleStorageSubBlock::estimatePredicateEvaluationCost(
const ComparisonPredicate &predicate) const {
if (predicate.isAttributeLiteralComparisonPredicate()) {
std::pair<bool, attribute_id> comparison_attr = predicate.getAttributeFromAttributeLiteralComparison();
if (comparison_attr.second == sort_column_id_) {
return predicate_cost::kBinarySearch;
} else if (dictionary_coded_attributes_[comparison_attr.second]
|| truncated_attributes_[comparison_attr.second]) {
return predicate_cost::kCompressedColumnScan;
}
}
return predicate_cost::kColumnScan;
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getMatchesForPredicate(
const ComparisonPredicate &predicate,
const TupleIdSequence *filter) const {
if (dictionary_coded_attributes_[sort_column_id_] || truncated_attributes_[sort_column_id_]) {
// NOTE(chasseur): The version from CompressedTupleStorageSubBlock will in
// turn call getEqualCodes(), getNotEqualCodes(), or getCodesInRange() as
// necessary for this block, which will use a fast binary search if
// evaluating a predicate on the sort column.
return CompressedTupleStorageSubBlock::getMatchesForPredicate(predicate, filter);
} else {
TupleIdSequence *matches = SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn(
predicate,
relation_,
sort_column_id_,
column_stripes_[sort_column_id_],
*static_cast<const tuple_id*>(sub_block_memory_) - uncompressed_nulls_in_sort_column_);
if (matches == nullptr) {
// TODO(chasseur): There is considerable duplication of effort in
// SortColumnPredicateEvaluator::EvaluatePredicateForUncompressedSortColumn()
// and CompressedTupleStorageSubBlock::getMatchesForPredicate() which we
// should try to eliminate.
return CompressedTupleStorageSubBlock::getMatchesForPredicate(predicate, filter);
} else {
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
}
}
}
void CompressedColumnStoreTupleStorageSubBlock::rebuild() {
if (builder_.get() != nullptr) {
builder_->buildCompressedColumnStoreTupleStorageSubBlock(sub_block_memory_);
builder_.reset();
initialize();
}
}
std::uint32_t CompressedColumnStoreTupleStorageSubBlock::compressedGetCode(
const tuple_id tid,
const attribute_id attr_id) const {
DEBUG_ASSERT(hasTupleWithID(tid));
DEBUG_ASSERT((dictionary_coded_attributes_[attr_id]) || (truncated_attributes_[attr_id]));
const void *code_location = getAttributePtr<false>(tid, attr_id);
switch (compression_info_.attribute_size(attr_id)) {
case 1:
return *static_cast<const std::uint8_t*>(code_location);
case 2:
return *static_cast<const std::uint16_t*>(code_location);
case 4:
return *static_cast<const std::uint32_t*>(code_location);
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::compressedGetCode()");
}
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getEqualCodes(
const attribute_id attr_id,
const std::uint32_t code,
const TupleIdSequence *filter) const {
if (attr_id == sort_column_id_) {
// Special (fast) case: do a binary search of the sort column.
pair<uint32_t, uint32_t> code_range(code, code + 1);
// Adjust the upper limit if doing so can avoid an extra binary search.
if (dictionary_coded_attributes_[attr_id]) {
if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) {
code_range.second = numeric_limits<uint32_t>::max();
}
} else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) {
code_range.second = numeric_limits<uint32_t>::max();
}
pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range);
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
} else {
return getCodesSatisfyingComparison<equal_to>(attr_id, code, filter);
}
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodes(
const attribute_id attr_id,
const std::uint32_t code,
const TupleIdSequence *filter) const {
if (attr_id == sort_column_id_) {
// Special (fast) case: do a binary search of the sort column.
pair<uint32_t, uint32_t> code_range(code, code + 1);
// Adjust the upper limit if doing so can avoid an extra binary search.
if (dictionary_coded_attributes_[attr_id]) {
if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) {
code_range.second = numeric_limits<uint32_t>::max();
}
} else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) {
code_range.second = numeric_limits<uint32_t>::max();
}
pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range);
// We searched for the range of equal codes, so return its complement.
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
matches->setRange(0, tuple_range.first, true);
matches->setRange(tuple_range.second,
*static_cast<const tuple_id*>(sub_block_memory_) - tuple_range.second,
true);
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
} else {
return getCodesSatisfyingComparison<not_equal_to>(attr_id, code, filter);
}
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull(
const attribute_id attr_id,
const std::uint32_t code,
const std::uint32_t null_code,
const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
static constexpr bool short_circuit = true;
#else
static constexpr bool short_circuit = false;
#endif
if (attr_id == sort_column_id_) {
// Special (fast) case: do a binary search of the sort column.
pair<uint32_t, uint32_t> code_range(code, code + 1);
// Adjust the upper limit if doing so can avoid an extra binary search.
if (dictionary_coded_attributes_[attr_id]) {
if (code_range.second == compressedGetDictionary(attr_id).numberOfCodes()) {
code_range.second = numeric_limits<uint32_t>::max();
}
} else if (code_range.first == GetMaxTruncatedValue(compression_info_.attribute_size(attr_id))) {
code_range.second = numeric_limits<uint32_t>::max();
}
pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(code_range);
// Search for the beginning of nulls in the sort column.
pair<uint32_t, uint32_t> null_range(null_code, numeric_limits<uint32_t>::max());
pair<tuple_id, tuple_id> limit_range = getCompressedSortColumnRange(null_range);
// We searched for the range of equal codes, so return its complement.
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
matches->setRange(0, tuple_range.first, true);
matches->setRange(tuple_range.second, limit_range.first - tuple_range.second, true);
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
} else {
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
const void *attr_stripe = column_stripes_[attr_id];
if (!short_circuit || (filter == nullptr)) {
switch (compression_info_.attribute_size(attr_id)) {
case 1:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if ((code != static_cast<const uint8_t*>(attr_stripe)[tid])
&& (null_code != static_cast<const uint8_t*>(attr_stripe)[tid])) {
matches->set(tid, true);
}
}
break;
case 2:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if ((code != static_cast<const uint16_t*>(attr_stripe)[tid])
&& (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) {
matches->set(tid, true);
}
}
break;
case 4:
for (tuple_id tid = 0;
tid <= *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if ((code != static_cast<const uint16_t*>(attr_stripe)[tid])
&& (null_code != static_cast<const uint16_t*>(attr_stripe)[tid])) {
matches->set(tid, true);
}
}
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()");
}
if (filter != nullptr) {
matches->intersectWith(*filter);
}
} else {
switch (compression_info_.attribute_size(attr_id)) {
case 1:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if ((code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])
&& (null_code != static_cast<const uint8_t*>(attr_stripe)[*filter_it])) {
matches->set(*filter_it, true);
}
}
break;
case 2:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])
&& (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
matches->set(*filter_it, true);
}
}
break;
case 4:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if ((code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])
&& (null_code != static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
matches->set(*filter_it, true);
}
}
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::getNotEqualCodesExcludingNull()");
}
}
return matches;
}
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getLessCodes(
const attribute_id attr_id,
const std::uint32_t code,
const TupleIdSequence *filter) const {
if (attr_id == sort_column_id_) {
// Special (fast) case: do a binary search of the sort column.
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
pair<tuple_id, tuple_id> tuple_range
= getCompressedSortColumnRange(pair<uint32_t, uint32_t>(0, code));
matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
} else {
return getCodesSatisfyingComparison<greater>(attr_id, code, filter);
}
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getGreaterOrEqualCodes(
const attribute_id attr_id,
const std::uint32_t code,
const TupleIdSequence *filter) const {
if (attr_id == sort_column_id_) {
// Special (fast) case: do a binary search of the sort column.
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
pair<tuple_id, tuple_id> tuple_range
= getCompressedSortColumnRange(pair<uint32_t, uint32_t>(code, numeric_limits<uint32_t>::max()));
matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
if (filter != nullptr) {
matches->intersectWith(*filter);
}
return matches;
} else {
return getCodesSatisfyingComparison<less_equal>(attr_id, code, filter);
}
}
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesInRange(
const attribute_id attr_id,
const std::pair<std::uint32_t, std::uint32_t> range,
const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
static constexpr bool short_circuit = true;
#else
static constexpr bool short_circuit = false;
#endif
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
if (attr_id == sort_column_id_) {
// Special (fast) case: do a binary search of the sort column.
pair<tuple_id, tuple_id> tuple_range = getCompressedSortColumnRange(range);
matches->setRange(tuple_range.first, tuple_range.second - tuple_range.first, true);
if (filter != nullptr) {
matches->intersectWith(*filter);
}
} else {
const void *attr_stripe = column_stripes_[attr_id];
if (!short_circuit || (filter == nullptr)) {
switch (compression_info_.attribute_size(attr_id)) {
case 1:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[tid])
&& (static_cast<const uint8_t*>(attr_stripe)[tid] < range.second)) {
matches->set(tid, true);
}
}
break;
case 2:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[tid])
&& (static_cast<const uint16_t*>(attr_stripe)[tid] < range.second)) {
matches->set(tid, true);
}
}
break;
case 4:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[tid])
&& (static_cast<const uint32_t*>(attr_stripe)[tid] < range.second)) {
matches->set(tid, true);
}
}
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()");
}
if (filter != nullptr) {
matches->intersectWith(*filter);
}
} else {
switch (compression_info_.attribute_size(attr_id)) {
case 1:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if (range.first <= (static_cast<const uint8_t*>(attr_stripe)[*filter_it])
&& (static_cast<const uint8_t*>(attr_stripe)[*filter_it] < range.second)) {
matches->set(*filter_it, true);
}
}
break;
case 2:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if (range.first <= (static_cast<const uint16_t*>(attr_stripe)[*filter_it])
&& (static_cast<const uint16_t*>(attr_stripe)[*filter_it] < range.second)) {
matches->set(*filter_it, true);
}
}
break;
case 4:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if (range.first <= (static_cast<const uint32_t*>(attr_stripe)[*filter_it])
&& (static_cast<const uint32_t*>(attr_stripe)[*filter_it] < range.second)) {
matches->set(*filter_it, true);
}
}
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::getCodesInRange()");
}
}
}
return matches;
}
void CompressedColumnStoreTupleStorageSubBlock::initialize() {
void *memory_position = initializeCommon();
if (compression_info_.null_bitmap_bits() > 0) {
for (attribute_id attr_id = 0;
attr_id < compression_info_.uncompressed_attribute_has_nulls_size();
++attr_id) {
if (compression_info_.uncompressed_attribute_has_nulls(attr_id)) {
uncompressed_column_null_bitmaps_.push_back(
new BitVector<false>(memory_position,
compression_info_.null_bitmap_bits()));
memory_position = static_cast<char*>(memory_position)
+ BitVector<false>::BytesNeeded(compression_info_.null_bitmap_bits());
if (attr_id == sort_column_id_) {
uncompressed_nulls_in_sort_column_
= uncompressed_column_null_bitmaps_.back().onesCount();
}
} else {
uncompressed_column_null_bitmaps_.push_back(nullptr);
}
}
}
size_t tuple_length = 0;
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
tuple_length += compression_info_.attribute_size(attr_it->getID());
}
size_t max_num_tuples = (static_cast<const char*>(sub_block_memory_)
+ sub_block_memory_size_ - static_cast<const char*>(memory_position))
/ tuple_length;
column_stripes_.resize(relation_.getMaxAttributeId() + 1, nullptr);
for (CatalogRelationSchema::const_iterator attr_it = relation_.begin();
attr_it != relation_.end();
++attr_it) {
column_stripes_[attr_it->getID()] = memory_position;
memory_position = static_cast<char*>(memory_position)
+ max_num_tuples * compression_info_.attribute_size(attr_it->getID());
}
}
void CompressedColumnStoreTupleStorageSubBlock::shiftTuples(
const tuple_id dest_position,
const tuple_id src_tuple,
const tuple_id num_tuples) {
for (attribute_id attr_id = 0;
attr_id < compression_info_.attribute_size_size();
++attr_id) {
size_t attr_length = compression_info_.attribute_size(attr_id);
if (attr_length > 0) {
memmove(static_cast<char*>(column_stripes_[attr_id]) + dest_position * attr_length,
static_cast<const char*>(column_stripes_[attr_id]) + src_tuple * attr_length,
attr_length * num_tuples);
}
}
}
void CompressedColumnStoreTupleStorageSubBlock::shiftUncompressedNullBitmaps(
const tuple_id from_position,
const tuple_id distance) {
if (compression_info_.null_bitmap_bits() > 0) {
for (PtrVector<BitVector<false>, true>::size_type idx = 0;
idx < uncompressed_column_null_bitmaps_.size();
++idx) {
if (!uncompressed_column_null_bitmaps_.elementIsNull(idx)) {
if (distance < 0) {
uncompressed_column_null_bitmaps_[idx].shiftTailForward(from_position, -distance);
} else {
uncompressed_column_null_bitmaps_[idx].shiftTailBackward(from_position, distance);
}
}
}
}
}
std::pair<tuple_id, tuple_id> CompressedColumnStoreTupleStorageSubBlock::getCompressedSortColumnRange(
const std::pair<std::uint32_t, std::uint32_t> code_range) const {
DEBUG_ASSERT(dictionary_coded_attributes_[sort_column_id_] || truncated_attributes_[sort_column_id_]);
const void *attr_stripe = column_stripes_[sort_column_id_];
pair<tuple_id, tuple_id> tuple_range;
if (code_range.first == 0) {
tuple_range.first = 0;
} else {
switch (compression_info_.attribute_size(sort_column_id_)) {
case 1:
tuple_range.first = lower_bound(static_cast<const uint8_t*>(attr_stripe),
static_cast<const uint8_t*>(attr_stripe)
+ *static_cast<const tuple_id*>(sub_block_memory_),
code_range.first)
- static_cast<const uint8_t*>(attr_stripe);
break;
case 2:
tuple_range.first = lower_bound(static_cast<const uint16_t*>(attr_stripe),
static_cast<const uint16_t*>(attr_stripe)
+ *static_cast<const tuple_id*>(sub_block_memory_),
code_range.first)
- static_cast<const uint16_t*>(attr_stripe);
break;
case 4:
tuple_range.first = lower_bound(static_cast<const uint32_t*>(attr_stripe),
static_cast<const uint32_t*>(attr_stripe)
+ *static_cast<const tuple_id*>(sub_block_memory_),
code_range.first)
- static_cast<const uint32_t*>(attr_stripe);
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << sort_column_id_
<< " in CompressedColumnStoreTupleStorageSubBlock::getCompressedSortColumnRange()");
}
}
if (code_range.second == numeric_limits<uint32_t>::max()) {
tuple_range.second = *static_cast<const tuple_id*>(sub_block_memory_);
} else {
switch (compression_info_.attribute_size(sort_column_id_)) {
case 1:
tuple_range.second = lower_bound(static_cast<const uint8_t*>(attr_stripe),
static_cast<const uint8_t*>(attr_stripe)
+ *static_cast<const tuple_id*>(sub_block_memory_),
code_range.second)
- static_cast<const uint8_t*>(attr_stripe);
break;
case 2:
tuple_range.second = lower_bound(static_cast<const uint16_t*>(attr_stripe),
static_cast<const uint16_t*>(attr_stripe)
+ *static_cast<const tuple_id*>(sub_block_memory_),
code_range.second)
- static_cast<const uint16_t*>(attr_stripe);
break;
case 4:
tuple_range.second = lower_bound(static_cast<const uint32_t*>(attr_stripe),
static_cast<const uint32_t*>(attr_stripe)
+ *static_cast<const tuple_id*>(sub_block_memory_),
code_range.second)
- static_cast<const uint32_t*>(attr_stripe);
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << sort_column_id_
<< " in CompressedColumnStoreTupleStorageSubBlock::getCompressedSortColumnRange()");
}
}
return tuple_range;
}
template <template <typename T> class comparison_functor>
TupleIdSequence* CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison(
const attribute_id attr_id,
const std::uint32_t code,
const TupleIdSequence *filter) const {
#ifdef QUICKSTEP_ENABLE_VECTOR_PREDICATE_SHORT_CIRCUIT
static constexpr bool short_circuit = true;
#else
static constexpr bool short_circuit = false;
#endif
comparison_functor<uint32_t> comp;
TupleIdSequence *matches = new TupleIdSequence(*static_cast<const tuple_id*>(sub_block_memory_));
const void *attr_stripe = column_stripes_[attr_id];
if (!short_circuit || (filter == nullptr)) {
switch (compression_info_.attribute_size(attr_id)) {
case 1:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if (comp(code, static_cast<const uint8_t*>(attr_stripe)[tid])) {
matches->set(tid, true);
}
}
break;
case 2:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if (comp(code, static_cast<const uint16_t*>(attr_stripe)[tid])) {
matches->set(tid, true);
}
}
break;
case 4:
for (tuple_id tid = 0;
tid < *static_cast<const tuple_id*>(sub_block_memory_);
++tid) {
if (comp(code, static_cast<const uint32_t*>(attr_stripe)[tid])) {
matches->set(tid, true);
}
}
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()");
}
if (filter != nullptr) {
matches->intersectWith(*filter);
}
} else {
switch (compression_info_.attribute_size(attr_id)) {
case 1:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if (comp(code, static_cast<const uint8_t*>(attr_stripe)[*filter_it])) {
matches->set(*filter_it, true);
}
}
break;
case 2:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if (comp(code, static_cast<const uint16_t*>(attr_stripe)[*filter_it])) {
matches->set(*filter_it, true);
}
}
break;
case 4:
for (TupleIdSequence::const_iterator filter_it = filter->begin();
filter_it != filter->end();
++filter_it) {
if (comp(code, static_cast<const uint32_t*>(attr_stripe)[*filter_it])) {
matches->set(*filter_it, true);
}
}
break;
default:
FATAL_ERROR("Unexpected byte-length (not 1, 2, or 4) for compressed "
"attribute ID " << attr_id
<< " in CompressedColumnStoreTupleStorageSubBlock::getCodesSatisfyingComparison()");
}
}
return matches;
}
} // namespace quickstep