blob: c8fb3b3c6bf67f8261ef33d98fa0158a2bc3cad3 [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_SMA_INDEX_SUB_BLOCK_HPP_
#define QUICKSTEP_STORAGE_SMA_INDEX_SUB_BLOCK_HPP_
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <memory>
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/PredicateCost.hpp"
#include "storage/IndexSubBlock.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/SubBlockTypeRegistryMacros.hpp"
#include "types/TypeID.hpp"
#include "types/TypedValue.hpp"
#include "types/operations/comparisons/ComparisonID.hpp"
#include "utility/Macros.hpp"
#include "utility/PtrVector.hpp"
#include "glog/logging.h"
namespace quickstep {
class CatalogRelationSchema;
class ComparisonPredicate;
class IndexSubBlockDescription;
class TupleIdSequence;
class TupleStorageSubBlock;
class Type;
class UncheckedBinaryOperator;
class UncheckedComparator;
/**
* @brief 'Small Materialized Aggregates' Index. This class holds count, min,
* max, and sum aggregates for attributes in a block.
*/
QUICKSTEP_DECLARE_SUB_BLOCK_TYPE_REGISTERED(SMAIndexSubBlock);
/**
* Namespace contains helper functions of the SMA block. This includes functions
* to find the selectivity of predicates over a storage subblock. Also includes
* structs that make up the index.
*/
namespace sma_internal {
/**
* @brief Roughly describes how many tuples of a subblock will be selected by
* a predicate.
* @details kAll, kNone indicate that the SMA has determined that all, or none
* of the tuples will be selected. kSome means that some tuples may be
* selected, but a scan must be performed. kUnknown indicates that the
* SMA tried to answer the predicate but did not have enough information.
* kUnsolved indicates that the predicate has been created but not
* analyzed by the SMA.
*/
enum class Selectivity {
kAll, // Select all tuples in storage subblock.
kSome, // Select some of the tuples.
kNone, // Select none of the tuples.
kUnknown, // The predicate cannot be determined.
kUnsolved // We have not tried to determine the selectivity.
};
/**
* @brief Helper method. Uses the stored values from the SMA Index to determine
* selectivity of a predicate.
*
* @param literal The literal value to compare against.
* @param comparison The id of the predicate comparison function.
* @param min The minimum value associated with that attribute.
* @param max The maximum value associated with that attribute.
* @param less_comparator The less comparator for the attribute type.
* @param equals_comparator The equals comparator for the attribute type.
*
* @return Selectivity of this predicate.
*/
Selectivity getSelectivity(const TypedValue &literal,
const ComparisonID comparison,
const TypedValue &min,
const TypedValue &max,
const UncheckedComparator *less_comparator,
const UncheckedComparator *equals_comparator);
/**
* @brief A simple holding struct for a comparison predicate. Selectivity enum
* indicates if the SMA has been used to solve the predicate and if so,
* what is the selectivity over the block.
*/
struct SMAPredicate {
/**
* @brief Extracts a comparison predicate into an SMAPredicate.
*
* @param predicate A comparison of the form {attribute} {comparison} {literal}
* or {literal} {comparison} {attribute}.
* @return An SMAPredicate pointer which the caller must manage.
*/
static SMAPredicate* ExtractSMAPredicate(const ComparisonPredicate& predicate);
const attribute_id attribute;
const ComparisonID comparison;
const TypedValue literal;
// How much of the TupleStore this predicate will select. kUnsolved indicates
// that the predicate had not been used previously.
Selectivity selectivity;
SMAPredicate(const attribute_id attr,
const ComparisonID comp,
const TypedValue lit)
: attribute(attr),
comparison(comp),
literal(lit),
selectivity(Selectivity::kUnsolved) { }
};
// A 64-bit header.
struct SMAHeader {
// Count refers the SQL aggregate COUNT.
std::uint32_t count_aggregate;
union {
// Consistent refers to the index being in a consistent state.
bool index_consistent;
// Padding is so we keep this data structure to 64 bits in length.
std::uint32_t padding;
};
};
// Reference to an attribute value in a tuple. This struct is used to keep a
// of the min and max value for an attribute.
struct EntryReference {
tuple_id entry_ref_tuple;
// There are certain cases when the entry reference can be invalid such as
// when the index is being rebuilt. value should not be used in this case.
bool valid;
// A copy of the typed value referenced by tuple.
TypedValue value;
};
// Index entry for an attribute in the SMA.
struct SMAEntry {
attribute_id attribute;
// The type of the attribute contained in this entry.
TypeID type_id;
// A reference to the minimum value of the attribute.
EntryReference min_entry_ref;
// A reference to the maximum value of the attribute.
EntryReference max_entry_ref;
// The sum of the attribute. This is only used in the case of numeric types.
TypedValue sum_aggregate;
};
} // namespace sma_internal
/**
* @brief Small Materialized Aggregate SubBlock.
* @details Keeps account of several types of aggregate functions per Block.
* Currently supports min, max, sum, and count (for numeric types).
* Physical organization of the block is as follows:
*
* [SMAHeader][SMAEntry x number of indexed attributes]
*/
class SMAIndexSubBlock : public IndexSubBlock {
public:
/**
* @brief Constructor.
*
* @param tuple_store The TupleStorageSubBlock whose contents are indexed by
* this IndexSubBlock.
* @param description A description containing any parameters needed to
* construct this SubBlock (e.g. what attributes to index on).
* Implementation-specific parameters are defined as extensions in
* StorageBlockLayout.proto.
* @param new_block Whether this is a newly-created block.
* @param sub_block_memory The memory slot to use for the block's contents.
* @param sub_block_memory_size The size of the memory slot in bytes.
*/
SMAIndexSubBlock(const TupleStorageSubBlock &tuple_store,
const IndexSubBlockDescription &description,
const bool new_block,
void *sub_block_memory,
const std::size_t sub_block_memory_size);
/**
* @brief Frees data associated with variable length attributes.
*
* Several of the data structures in this index are on the heap, therefore it is
* important that the destructor be called when evicted. The variables which
* are held out of line (on the heap_ are the variable length TypedValues and
* the comparators.
*/
~SMAIndexSubBlock();
/**
* @brief Determine whether an IndexSubBlockDescription is valid for this
* type of IndexSubBlock.
*
* @param relation The relation an index described by description would
* belong to.
* @param description A description of the parameters for this type of
* IndexSubBlock, which will be checked for validity.
* @return Whether description is well-formed and valid for this type of
* IndexSubBlock belonging to relation (i.e. whether an IndexSubBlock
* of this type, belonging to relation, can be constructed according
* to description).
**/
static bool DescriptionIsValid(const CatalogRelationSchema &relation,
const IndexSubBlockDescription &description);
/**
* @brief Estimate the average number of bytes (including any applicable
* overhead) used to index a single tuple in this type of
* IndexSubBlock. Used by StorageBlockLayout::finalize() to divide
* block memory amongst sub-blocks.
* @warning description must be valid. DescriptionIsValid() should be called
* first if necessary.
*
* @param relation The relation tuples belong to.
* @param description A description of the parameters for this type of
* IndexSubBlock.
*
* @return The average/amortized number of bytes used to index a single
* tuple of relation in an IndexSubBlock of this type described by
* description.
**/
static std::size_t EstimateBytesPerTuple(const CatalogRelationSchema &relation,
const IndexSubBlockDescription &description);
/**
* @brief Estimate the total number of bytes (including any applicable
* overhead) occupied by this IndexSubBlock within the StorageBlock.
* This function is to be used by those indicies whose occupied size
* in the block does not depend on the number of tuples being indexed.
* @warning description must be valid. DescriptionIsValid() should be called
* first if necessary.
* @note This function will be invoked by StorageBlockLayout::finalize()
* if and only when EstimateBytesPerTuple() returns a zero size.
*
* @param relation The relation tuples belong to.
* @param description A description of the parameters for this type of
* IndexSubBlock.
* @return The total number of bytes occupied by this IndexSubBlock within
* the StorageBlock.
**/
static std::size_t EstimateBytesPerBlock(const CatalogRelationSchema &relation,
const IndexSubBlockDescription &description);
/**
* @return The sub block type.
*/
IndexSubBlockType getIndexSubBlockType() const override {
return kSMA;
}
/**
* @return \c true if the SMA block is initialized.
*/
bool supportsAdHocAdd() const override {
return initialized_;
}
/**
* @return \c true if the SMA block is initialized.
*/
bool supportsAdHocRemove() const override {
return initialized_;
}
/**
* @param tuple The id of the new tuple.
* @return \c true always. There's no reason we should ever run out of space once
* this index has been successfully created.
*/
bool addEntry(const tuple_id tuple) override;
/**
* @brief Updates the index to reflect the removal of a single tuple.
*
* @param tuple The id of the tuple which is going to be removed.
*/
void removeEntry(const tuple_id tuple) override;
/**
* @brief Updates the index to reflect the addition of several tuples.
*
* @param tuples The ids of the tuple which have been added.
*
* @return \c true if successful, false otherwise.
*/
bool bulkAddEntries(const TupleIdSequence &tuples) override;
/**
* @brief Updates the index to reflect the removal of several tuples.
*
* @param tuples The ids of the tuple which is going to be removed (i.e. they
* still exist within the storage block.
*/
void bulkRemoveEntries(const TupleIdSequence &tuples) override;
/**
* @brief Gives an estimate of how long it will take to evaluate the predicate
* on this StorageBlock.
*
* The SMA index will detect if one of the following cases is true:
* 1) Complete match: all the tuples in this subblock will match the predicate
* 2) Empty match: none of the tuples will match
* 3) Partial match: some of the tuples may match
* If there is a partial match, the SMA index is of no use. However, in a
* Complete or empty match, the SMA index can speed up the selection process
* and should be used.
*
* @param predicate A simple predicate too be evaluated.
*
* @return The cost associated with the type of match. Empty matches are constant,
* partial matches require a scan, and complete matches require something
* less than a regular scan (because we don't need to do the comparison
* operation for each tuple) but is still linear with the number of tuples.
*/
predicate_cost_t estimatePredicateEvaluationCost(
const ComparisonPredicate &predicate) const override;
/**
* @warning Calling this method on the SMA index implies that we are not going
* to do a scan for some tuple matches. As in, the SMA index will
* either return an empty set of tuple ids, or a set of tuple ids
* which is the entire set of all tuple ids in the storage subblock.
* @note Currently this version only supports simple comparisons of a literal
* value with a non-composite key.
**/
TupleIdSequence* getMatchesForPredicate(const ComparisonPredicate &predicate,
const TupleIdSequence *filter) const override;
/**
* @brief Update the index to reflect the current state of the storage block.
*
* @return \c true if successful. False otherwise.
*/
bool rebuild() override;
/**
* @brief Given an attribute, quickly check to see if the SMA index contains an
* entry for it.
*
* @param attribute The ID of the attribute to check.
*
* @return \c true if this index contains an entry.
*/
bool hasEntryForAttribute(attribute_id attribute) const;
/**
* @brief Returns the number of tuples of the storage sub block.
*
* @return Number of tuples in the sub block.
*/
std::uint32_t getCount() const {
return reinterpret_cast<sma_internal::SMAHeader*>(sub_block_memory_)->count_aggregate;
}
private:
bool requiresRebuild() const;
sma_internal::Selectivity getSelectivityForPredicate(const ComparisonPredicate &predicate) const;
// Retrieves an entry, first checking if the given attribute is indexed.
inline const sma_internal::SMAEntry* getEntryChecked(attribute_id attribute) const {
DCHECK(attribute > -1) << "Attribute Id must be >= 0";
if (total_attributes_ > static_cast<std::size_t>(attribute)) {
int index = attribute_to_entry_.get()[attribute];
if (index != -1) {
return entries_ + index;
}
}
return nullptr;
}
// Retrieves an entry, not checking if the given attribute is indexed.
// Warning: This should not be used unless the attribute is indexed.
inline const sma_internal::SMAEntry* getEntryUnchecked(attribute_id attribute) const {
return entries_ + attribute_to_entry_.get()[attribute];
}
// Resets a single entry to a zero and invalid state.
// This should be called before rebuilding, or while initializing.
void resetEntry(sma_internal::SMAEntry *entry,
const attribute_id attribute,
const Type &attribute_type);
// Sets all entries to a zero'd and invalid state.
// This is called prior to a rebuild.
void resetAllEntries();
void addTuple(tuple_id tuple);
// Frees any TypedValue data which is held in the SMA entries.
void freeOutOfLineData();
// Handy pointer to the header.
sma_internal::SMAHeader *header_;
// Handy pointer to the beginning of the entries.
sma_internal::SMAEntry *entries_;
// Total number of attributes in the relation.
std::size_t total_attributes_;
// Used to lookup the index of the SMA entry given the attribute_id.
// For example SMAEntry &entry =
// entries_[attribute_to_entry_[someAttribute->getID()]]
// A value of -1 indicates that that attribute is not indexed.
std::unique_ptr<int> attribute_to_entry_;
// Number of indexed attributes.
const std::size_t num_indexed_attributes_;
// True if the index has gone through the initialization process.
// The key aspect of initialization is the creation of the comparators and
// binary operators which are used to update the SMAEntries on insertion
// and deletion.
bool initialized_;
// An array of pointers to necessary add operations for updating SUM.
// Essentially, it maps between TypeID and the pointer to the correct
// operator for that type.
// For example: add_operations_[AttributeTypeID]->applyWithTypedValues(
// TypedValue of the attributes type,
// TypedValue of the sum type)
PtrVector<UncheckedBinaryOperator, true> add_operations_;
// An array of pointers to necessary comparison operations for updating MIN/MAX.
// Essentially, it maps between TypeID and the pointer to the correct
// operator for that type.
// For example: less_comparisons_[AttributeTypeID]->compareTypedValues(
// TypedValue of the attributes type,
// TypedValue of the attributes type)
PtrVector<UncheckedComparator, true> less_comparisons_;
PtrVector<UncheckedComparator, true> equal_comparisons_;
friend class SMAIndexSubBlockTest;
DISALLOW_COPY_AND_ASSIGN(SMAIndexSubBlock);
};
} // namespace quickstep
#endif // QUICKSTEP_STORAGE_SMA_INDEX_SUB_BLOCK_HPP_