| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| **/ |
| |
| #ifndef QUICKSTEP_STORAGE_SMA_INDEX_SUB_BLOCK_HPP_ |
| #define QUICKSTEP_STORAGE_SMA_INDEX_SUB_BLOCK_HPP_ |
| |
| #include <cstddef> |
| #include <cstdint> |
| #include <iostream> |
| #include <memory> |
| |
| #include "catalog/CatalogTypedefs.hpp" |
| #include "expressions/predicate/PredicateCost.hpp" |
| #include "storage/IndexSubBlock.hpp" |
| #include "storage/StorageBlockInfo.hpp" |
| #include "storage/SubBlockTypeRegistryMacros.hpp" |
| #include "types/TypeID.hpp" |
| #include "types/TypedValue.hpp" |
| #include "types/operations/comparisons/ComparisonID.hpp" |
| #include "utility/Macros.hpp" |
| #include "utility/PtrVector.hpp" |
| |
| #include "glog/logging.h" |
| |
| namespace quickstep { |
| |
| class CatalogRelationSchema; |
| class ComparisonPredicate; |
| class IndexSubBlockDescription; |
| class TupleIdSequence; |
| class TupleStorageSubBlock; |
| class Type; |
| class UncheckedBinaryOperator; |
| class UncheckedComparator; |
| |
| /** |
| * @brief 'Small Materialized Aggregates' Index. This class holds count, min, |
| * max, and sum aggregates for attributes in a block. |
| */ |
| QUICKSTEP_DECLARE_SUB_BLOCK_TYPE_REGISTERED(SMAIndexSubBlock); |
| |
| /** |
| * Namespace contains helper functions of the SMA block. This includes functions |
| * to find the selectivity of predicates over a storage subblock. Also includes |
| * structs that make up the index. |
| */ |
| namespace sma_internal { |
| /** |
| * @brief Roughly describes how many tuples of a subblock will be selected by |
| * a predicate. |
| * @details kAll, kNone indicate that the SMA has determined that all, or none |
| * of the tuples will be selected. kSome means that some tuples may be |
| * selected, but a scan must be performed. kUnknown indicates that the |
| * SMA tried to answer the predicate but did not have enough information. |
| * kUnsolved indicates that the predicate has been created but not |
| * analyzed by the SMA. |
| */ |
| enum class Selectivity { |
| kAll, // Select all tuples in storage subblock. |
| kSome, // Select some of the tuples. |
| kNone, // Select none of the tuples. |
| kUnknown, // The predicate cannot be determined. |
| kUnsolved // We have not tried to determine the selectivity. |
| }; |
| |
| /** |
| * @brief Helper method. Uses the stored values from the SMA Index to determine |
| * selectivity of a predicate. |
| * |
| * @param literal The literal value to compare against. |
| * @param comparison The id of the predicate comparison function. |
| * @param min The minimum value associated with that attribute. |
| * @param max The maximum value associated with that attribute. |
| * @param less_comparator The less comparator for the attribute type. |
| * @param equals_comparator The equals comparator for the attribute type. |
| * |
| * @return Selectivity of this predicate. |
| */ |
| Selectivity getSelectivity(const TypedValue &literal, |
| const ComparisonID comparison, |
| const TypedValue &min, |
| const TypedValue &max, |
| const UncheckedComparator *less_comparator, |
| const UncheckedComparator *equals_comparator); |
| |
| /** |
| * @brief A simple holding struct for a comparison predicate. Selectivity enum |
| * indicates if the SMA has been used to solve the predicate and if so, |
| * what is the selectivity over the block. |
| */ |
| struct SMAPredicate { |
| /** |
| * @brief Extracts a comparison predicate into an SMAPredicate. |
| * |
| * @param predicate A comparison of the form {attribute} {comparison} {literal} |
| * or {literal} {comparison} {attribute}. |
| * @return An SMAPredicate pointer which the caller must manage. |
| */ |
| static SMAPredicate* ExtractSMAPredicate(const ComparisonPredicate& predicate); |
| |
| const attribute_id attribute; |
| const ComparisonID comparison; |
| const TypedValue literal; |
| // How much of the TupleStore this predicate will select. kUnsolved indicates |
| // that the predicate had not been used previously. |
| Selectivity selectivity; |
| |
| SMAPredicate(const attribute_id attr, |
| const ComparisonID comp, |
| const TypedValue lit) |
| : attribute(attr), |
| comparison(comp), |
| literal(lit), |
| selectivity(Selectivity::kUnsolved) { } |
| }; |
| |
| // A 64-bit header. |
| struct SMAHeader { |
| // Count refers the SQL aggregate COUNT. |
| std::uint32_t count_aggregate; |
| |
| union { |
| // Consistent refers to the index being in a consistent state. |
| bool index_consistent; |
| // Padding is so we keep this data structure to 64 bits in length. |
| std::uint32_t padding; |
| }; |
| }; |
| |
| // Reference to an attribute value in a tuple. This struct is used to keep a |
| // of the min and max value for an attribute. |
| struct EntryReference { |
| tuple_id entry_ref_tuple; |
| // There are certain cases when the entry reference can be invalid such as |
| // when the index is being rebuilt. value should not be used in this case. |
| bool valid; |
| // A copy of the typed value referenced by tuple. |
| TypedValue value; |
| }; |
| |
| // Index entry for an attribute in the SMA. |
| struct SMAEntry { |
| attribute_id attribute; |
| // The type of the attribute contained in this entry. |
| TypeID type_id; |
| // A reference to the minimum value of the attribute. |
| EntryReference min_entry_ref; |
| // A reference to the maximum value of the attribute. |
| EntryReference max_entry_ref; |
| // The sum of the attribute. This is only used in the case of numeric types. |
| TypedValue sum_aggregate; |
| }; |
| |
| } // namespace sma_internal |
| |
| |
| /** |
| * @brief Small Materialized Aggregate SubBlock. |
| * @details Keeps account of several types of aggregate functions per Block. |
| * Currently supports min, max, sum, and count (for numeric types). |
| * Physical organization of the block is as follows: |
| * |
| * [SMAHeader][SMAEntry x number of indexed attributes] |
| */ |
| class SMAIndexSubBlock : public IndexSubBlock { |
| public: |
| /** |
| * @brief Constructor. |
| * |
| * @param tuple_store The TupleStorageSubBlock whose contents are indexed by |
| * this IndexSubBlock. |
| * @param description A description containing any parameters needed to |
| * construct this SubBlock (e.g. what attributes to index on). |
| * Implementation-specific parameters are defined as extensions in |
| * StorageBlockLayout.proto. |
| * @param new_block Whether this is a newly-created block. |
| * @param sub_block_memory The memory slot to use for the block's contents. |
| * @param sub_block_memory_size The size of the memory slot in bytes. |
| */ |
| SMAIndexSubBlock(const TupleStorageSubBlock &tuple_store, |
| const IndexSubBlockDescription &description, |
| const bool new_block, |
| void *sub_block_memory, |
| const std::size_t sub_block_memory_size); |
| |
| /** |
| * @brief Frees data associated with variable length attributes. |
| * |
| * Several of the data structures in this index are on the heap, therefore it is |
| * important that the destructor be called when evicted. The variables which |
| * are held out of line (on the heap_ are the variable length TypedValues and |
| * the comparators. |
| */ |
| ~SMAIndexSubBlock(); |
| |
| /** |
| * @brief Determine whether an IndexSubBlockDescription is valid for this |
| * type of IndexSubBlock. |
| * |
| * @param relation The relation an index described by description would |
| * belong to. |
| * @param description A description of the parameters for this type of |
| * IndexSubBlock, which will be checked for validity. |
| * @return Whether description is well-formed and valid for this type of |
| * IndexSubBlock belonging to relation (i.e. whether an IndexSubBlock |
| * of this type, belonging to relation, can be constructed according |
| * to description). |
| **/ |
| static bool DescriptionIsValid(const CatalogRelationSchema &relation, |
| const IndexSubBlockDescription &description); |
| |
| /** |
| * @brief Estimate the average number of bytes (including any applicable |
| * overhead) used to index a single tuple in this type of |
| * IndexSubBlock. Used by StorageBlockLayout::finalize() to divide |
| * block memory amongst sub-blocks. |
| * @warning description must be valid. DescriptionIsValid() should be called |
| * first if necessary. |
| * |
| * @param relation The relation tuples belong to. |
| * @param description A description of the parameters for this type of |
| * IndexSubBlock. |
| * |
| * @return The average/amortized number of bytes used to index a single |
| * tuple of relation in an IndexSubBlock of this type described by |
| * description. |
| **/ |
| static std::size_t EstimateBytesPerTuple(const CatalogRelationSchema &relation, |
| const IndexSubBlockDescription &description); |
| /** |
| * @brief Estimate the total number of bytes (including any applicable |
| * overhead) occupied by this IndexSubBlock within the StorageBlock. |
| * This function is to be used by those indicies whose occupied size |
| * in the block does not depend on the number of tuples being indexed. |
| * @warning description must be valid. DescriptionIsValid() should be called |
| * first if necessary. |
| * @note This function will be invoked by StorageBlockLayout::finalize() |
| * if and only when EstimateBytesPerTuple() returns a zero size. |
| * |
| * @param relation The relation tuples belong to. |
| * @param description A description of the parameters for this type of |
| * IndexSubBlock. |
| * @return The total number of bytes occupied by this IndexSubBlock within |
| * the StorageBlock. |
| **/ |
| static std::size_t EstimateBytesPerBlock(const CatalogRelationSchema &relation, |
| const IndexSubBlockDescription &description); |
| |
| /** |
| * @return The sub block type. |
| */ |
| IndexSubBlockType getIndexSubBlockType() const override { |
| return kSMA; |
| } |
| |
| /** |
| * @return \c true if the SMA block is initialized. |
| */ |
| bool supportsAdHocAdd() const override { |
| return initialized_; |
| } |
| |
| /** |
| * @return \c true if the SMA block is initialized. |
| */ |
| bool supportsAdHocRemove() const override { |
| return initialized_; |
| } |
| |
| /** |
| * @param tuple The id of the new tuple. |
| * @return \c true always. There's no reason we should ever run out of space once |
| * this index has been successfully created. |
| */ |
| bool addEntry(const tuple_id tuple) override; |
| |
| /** |
| * @brief Updates the index to reflect the removal of a single tuple. |
| * |
| * @param tuple The id of the tuple which is going to be removed. |
| */ |
| void removeEntry(const tuple_id tuple) override; |
| |
| /** |
| * @brief Updates the index to reflect the addition of several tuples. |
| * |
| * @param tuples The ids of the tuple which have been added. |
| * |
| * @return \c true if successful, false otherwise. |
| */ |
| bool bulkAddEntries(const TupleIdSequence &tuples) override; |
| |
| /** |
| * @brief Updates the index to reflect the removal of several tuples. |
| * |
| * @param tuples The ids of the tuple which is going to be removed (i.e. they |
| * still exist within the storage block. |
| */ |
| void bulkRemoveEntries(const TupleIdSequence &tuples) override; |
| |
| /** |
| * @brief Gives an estimate of how long it will take to evaluate the predicate |
| * on this StorageBlock. |
| * |
| * The SMA index will detect if one of the following cases is true: |
| * 1) Complete match: all the tuples in this subblock will match the predicate |
| * 2) Empty match: none of the tuples will match |
| * 3) Partial match: some of the tuples may match |
| * If there is a partial match, the SMA index is of no use. However, in a |
| * Complete or empty match, the SMA index can speed up the selection process |
| * and should be used. |
| * |
| * @param predicate A simple predicate too be evaluated. |
| * |
| * @return The cost associated with the type of match. Empty matches are constant, |
| * partial matches require a scan, and complete matches require something |
| * less than a regular scan (because we don't need to do the comparison |
| * operation for each tuple) but is still linear with the number of tuples. |
| */ |
| predicate_cost_t estimatePredicateEvaluationCost( |
| const ComparisonPredicate &predicate) const override; |
| |
| /** |
| * @warning Calling this method on the SMA index implies that we are not going |
| * to do a scan for some tuple matches. As in, the SMA index will |
| * either return an empty set of tuple ids, or a set of tuple ids |
| * which is the entire set of all tuple ids in the storage subblock. |
| * @note Currently this version only supports simple comparisons of a literal |
| * value with a non-composite key. |
| **/ |
| TupleIdSequence* getMatchesForPredicate(const ComparisonPredicate &predicate, |
| const TupleIdSequence *filter) const override; |
| |
| /** |
| * @brief Update the index to reflect the current state of the storage block. |
| * |
| * @return \c true if successful. False otherwise. |
| */ |
| bool rebuild() override; |
| |
| /** |
| * @brief Given an attribute, quickly check to see if the SMA index contains an |
| * entry for it. |
| * |
| * @param attribute The ID of the attribute to check. |
| * |
| * @return \c true if this index contains an entry. |
| */ |
| bool hasEntryForAttribute(attribute_id attribute) const; |
| |
| /** |
| * @brief Returns the number of tuples of the storage sub block. |
| * |
| * @return Number of tuples in the sub block. |
| */ |
| std::uint32_t getCount() const { |
| return reinterpret_cast<sma_internal::SMAHeader*>(sub_block_memory_)->count_aggregate; |
| } |
| |
| private: |
| bool requiresRebuild() const; |
| |
| sma_internal::Selectivity getSelectivityForPredicate(const ComparisonPredicate &predicate) const; |
| |
| // Retrieves an entry, first checking if the given attribute is indexed. |
| inline const sma_internal::SMAEntry* getEntryChecked(attribute_id attribute) const { |
| DCHECK(attribute > -1) << "Attribute Id must be >= 0"; |
| |
| if (total_attributes_ > static_cast<std::size_t>(attribute)) { |
| int index = attribute_to_entry_.get()[attribute]; |
| if (index != -1) { |
| return entries_ + index; |
| } |
| } |
| return nullptr; |
| } |
| |
| // Retrieves an entry, not checking if the given attribute is indexed. |
| // Warning: This should not be used unless the attribute is indexed. |
| inline const sma_internal::SMAEntry* getEntryUnchecked(attribute_id attribute) const { |
| return entries_ + attribute_to_entry_.get()[attribute]; |
| } |
| |
| // Resets a single entry to a zero and invalid state. |
| // This should be called before rebuilding, or while initializing. |
| void resetEntry(sma_internal::SMAEntry *entry, |
| const attribute_id attribute, |
| const Type &attribute_type); |
| |
| // Sets all entries to a zero'd and invalid state. |
| // This is called prior to a rebuild. |
| void resetAllEntries(); |
| |
| void addTuple(tuple_id tuple); |
| |
| // Frees any TypedValue data which is held in the SMA entries. |
| void freeOutOfLineData(); |
| |
| // Handy pointer to the header. |
| sma_internal::SMAHeader *header_; |
| // Handy pointer to the beginning of the entries. |
| sma_internal::SMAEntry *entries_; |
| // Total number of attributes in the relation. |
| std::size_t total_attributes_; |
| // Used to lookup the index of the SMA entry given the attribute_id. |
| // For example SMAEntry &entry = |
| // entries_[attribute_to_entry_[someAttribute->getID()]] |
| // A value of -1 indicates that that attribute is not indexed. |
| std::unique_ptr<int> attribute_to_entry_; |
| // Number of indexed attributes. |
| const std::size_t num_indexed_attributes_; |
| // True if the index has gone through the initialization process. |
| // The key aspect of initialization is the creation of the comparators and |
| // binary operators which are used to update the SMAEntries on insertion |
| // and deletion. |
| bool initialized_; |
| |
| // An array of pointers to necessary add operations for updating SUM. |
| // Essentially, it maps between TypeID and the pointer to the correct |
| // operator for that type. |
| // For example: add_operations_[AttributeTypeID]->applyWithTypedValues( |
| // TypedValue of the attributes type, |
| // TypedValue of the sum type) |
| PtrVector<UncheckedBinaryOperator, true> add_operations_; |
| |
| // An array of pointers to necessary comparison operations for updating MIN/MAX. |
| // Essentially, it maps between TypeID and the pointer to the correct |
| // operator for that type. |
| // For example: less_comparisons_[AttributeTypeID]->compareTypedValues( |
| // TypedValue of the attributes type, |
| // TypedValue of the attributes type) |
| PtrVector<UncheckedComparator, true> less_comparisons_; |
| PtrVector<UncheckedComparator, true> equal_comparisons_; |
| |
| friend class SMAIndexSubBlockTest; |
| |
| DISALLOW_COPY_AND_ASSIGN(SMAIndexSubBlock); |
| }; |
| |
| } // namespace quickstep |
| |
| #endif // QUICKSTEP_STORAGE_SMA_INDEX_SUB_BLOCK_HPP_ |