blob: 26e80273f335f9d445a78c6557dd15cb7d3064ca [file] [log] [blame]
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
**/
#ifndef QUICKSTEP_STORAGE_TUPLE_STORAGE_SUB_BLOCK_HPP_
#define QUICKSTEP_STORAGE_TUPLE_STORAGE_SUB_BLOCK_HPP_
#include <unordered_map>
#include <vector>
#include "catalog/CatalogTypedefs.hpp"
#include "expressions/predicate/PredicateCost.hpp"
#include "storage/StorageBlockInfo.hpp"
#include "storage/TupleIdSequence.hpp"
#include "types/TypedValue.hpp"
#include "utility/Macros.hpp"
namespace quickstep {
class CatalogRelationSchema;
class ComparisonPredicate;
class Tuple;
class TupleStorageSubBlockDescription;
class ValueAccessor;
/** \addtogroup Storage
* @{
*/
// TODO(chasseur): Abstract more common safety checks into base class.
/**
* @brief SubBlock which stores complete tuples.
**/
class TupleStorageSubBlock {
public:
/**
* @brief Structure describing the result of an insertion of a single tuple.
**/
struct InsertResult {
InsertResult(const tuple_id inserted_id_arg, const bool ids_mutated_arg)
: inserted_id(inserted_id_arg),
ids_mutated(ids_mutated_arg) {
}
/**
* @brief The ID of the inserted tuple, or -1 if unable to insert.
**/
tuple_id inserted_id;
/**
* @brief True if other tuples in the TupleStorageSubBlock had their ids
* mutated (requiring that indexes be rebuilt), false otherwise.
**/
bool ids_mutated;
};
/**
* @brief Constructor.
*
* @param relation The CatalogRelationSchema which this SubBlock belongs to.
* @param description A description containing any parameters needed to
* construct this SubBlock. Implementation-specific parameters are
* defined as extensions in StorageBlockLayout.proto.
* @param new_block Whether this is a newly-created block.
* @param sub_block_memory The memory slot to use for the block's contents.
* @param sub_block_memory_size The size of the memory slot in bytes.
* @exception BlockMemoryTooSmall This TupleStorageSubBlock hasn't been
* provided enough memory to store metadata.
**/
TupleStorageSubBlock(const CatalogRelationSchema &relation,
const TupleStorageSubBlockDescription &description,
const bool new_block,
void *sub_block_memory,
const std::size_t sub_block_memory_size)
: relation_(relation),
description_(description),
sub_block_memory_(sub_block_memory),
sub_block_memory_size_(sub_block_memory_size) {
}
/**
* @brief Virtual destructor.
**/
virtual ~TupleStorageSubBlock() {
}
/**
* @brief Get the CatalogRelationSchema this TupleStorageSubBlock belongs to.
*
* @return The relation this TupleStorageSubBlock belongs to.
**/
inline const CatalogRelationSchema& getRelation() const {
return relation_;
}
/**
* @brief Identify the type of this TupleStorageSubBlock.
*
* @return This TupleStorageSubBlock's type.
**/
virtual TupleStorageSubBlockType getTupleStorageSubBlockType() const = 0;
/**
* @brief Determine whether this SubBlock supports the getAttributeValue()
* method to get an untyped pointer to a value for a particular
* attribute.
*
* @param attr The ID of the attribute which getAttributeValue() would be
* used with.
* @return Whether the getAttributeValue() method can be used on this
* SubBlock with the specified attr.
**/
virtual bool supportsUntypedGetAttributeValue(const attribute_id attr) const = 0;
/**
* @brief Determine whether this SubBlock supports ad-hoc insertion of
* individual tuples via the insertTuple() method.
* @note If this method returns false, then tuples can only be inserted via
* insertTupleInBatch(), which should be followed by a call to
* rebuild() when a batch is fully inserted.
* @note Even if this method returns false, it is still legal to call
* insertTuple(), although it will always fail to actually insert.
*
* @return Whether the insertTuple() can be used on this SubBlock.
**/
virtual bool supportsAdHocInsert() const = 0;
/**
* @brief Determine whether inserting tuples one-at-a-time via the
* insertTuple() method is efficient (i.e. has constant time and space
* costs and does not require expensive reorganization of other tuples
* in this SubBlock).
*
* @return Whether insertTuple() is efficient for this SubBlock.
**/
virtual bool adHocInsertIsEfficient() const = 0;
/**
* @brief Determine whether this block has any tuples in it.
*
* @return True if this SubBlock is empty, false otherwise.
**/
virtual bool isEmpty() const = 0;
/**
* @brief Determine whether this block is packed, i.e. there are no holes in
* the tuple-id sequence.
*
* @return True if this SubBlock is packed, false otherwise.
**/
virtual bool isPacked() const = 0;
/**
* @brief Get the highest tuple-id of a valid tuple in this SubBlock.
*
* @return The highest tuple-id of a tuple stored in this SubBlock (-1 if
* this SubBlock is empty).
**/
virtual tuple_id getMaxTupleID() const = 0;
/**
* @brief Get the number of tuples contained in this SubBlock.
* @note This method returns tuple_id for ease of comparison with other
* tuple_id values referring to tuples in this block. The return value
* will always be non-negative, though.
* @note The default implementation is O(1) for packed TupleStorageSubBlocks,
* but TupleStorageSubBlock implementations which may be non-packed
* should override this where possible, as the default version of this
* method is O(N) when the SubBlock is non-packed.
*
* @return The number of tuples contained in this TupleStorageSubBlock.
**/
virtual tuple_id numTuples() const;
/**
* @brief Determine whether a tuple with the given id exists in this
* SubBlock.
*
* @param tuple The ID to check.
* @return True if tuple exists, false if it does not.
**/
virtual bool hasTupleWithID(const tuple_id tuple) const = 0;
/**
* @brief Insert a single tuple into this TupleStorageSubBlock.
*
* @param tuple The tuple to insert, whose values must be in the correct
* order.
* @return The result of the insertion.
**/
virtual InsertResult insertTuple(const Tuple &tuple) = 0;
/**
* @brief Insert a single tuple as part of a batch.
* @note This method is intended to allow a large number of tuples to be
* loaded without incurring the full cost of maintaining a "clean"
* internal block structure. Once a batch of tuples have been inserted,
* rebuild() should be called to put this TupleStorageSubBlock into a
* consistent state.
* @warning The inserted tuple may be placed in an "incorrect" or sub-optimal
* location in this TupleStorageSubBlock. The only methods which are
* safe to call between bulkInsertTuples() and rebuild() are
* insertTupleInBatch(), bulkInsertTuples(), and
* bulkInsertTuplesWithRemappedAttributes().
*
* @param tuple The tuple to insert, whose values must be in the correct
* order.
* @return True if the insertion was successful, false if out of space.
**/
virtual bool insertTupleInBatch(const Tuple &tuple) = 0;
/**
* @brief Insert as many tuples as possible from a ValueAccessor (all of the
* tuples accessible or as many as will fit in this
* TupleStorageSubBlock) as a single batch.
*
* @warning The inserted tuples may be placed in an "incorrect" or
* sub-optimal locations in this TupleStorageSubBlock. The only
* methods which are safe to call between bulkInsertTuples() and
* rebuild() are insertTupleInBatch(), bulkInsertTuples(), and
* bulkInsertTuplesWithRemappedAttributes().
*
* @param accessor A ValueAccessor to insert tuples from. This method assumes
* that the attributes in the ValueAccessor are in exactly the same
* order (and the same type as) the attributes of this
* TupleStorageStorageBlock's relation. The accessor's iteration will
* be advanced to the first non-inserted tuple or, if all accessible
* tuples were inserted in this sub-block, to the end position.
* @return The number of tuples inserted from accessor.
**/
virtual tuple_id bulkInsertTuples(ValueAccessor *accessor) = 0;
/**
* @brief Insert as many tuples as possible from a ValueAccessor (all of the
* tuples accessible or as many as will fit in this
* TupleStorageSubBlock) as a single batch. This version remaps
* attribute IDs from the ValueAccessor to the correct order of
* attributes for this StorageBlock's relation.
*
* @warning The inserted tuples may be placed in an "incorrect" or
* sub-optimal locations in this TupleStorageSubBlock. The only
* methods which are safe to call between bulkInsertTuples() and
* rebuild() are insertTupleInBatch(), bulkInsertTuples(), and
* bulkInsertTuplesWithRemappedAttributes().
*
* @param attribute_map A vector which maps the attributes of this
* TupleStorageSubBlock's relation (in order with no gaps) to the
* corresponding attributes which should be read from accessor.
* @param accessor A ValueAccessor to insert tuples from. The accessor's
* iteration will be advanced to the first non-inserted tuple or, if
* all accessible tuples were inserted in this sub-block, to the end
* position.
* @return The number of tuples inserted from accessor.
**/
virtual tuple_id bulkInsertTuplesWithRemappedAttributes(
const std::vector<attribute_id> &attribute_map,
ValueAccessor *accessor) = 0;
/**
* @brief Insert up to max_num_tuples_to_insert tuples from a ValueAccessor
* as a single batch, using the attribute_map to project and reorder
* columns from the input ValueAccessor. Does not update header.
*
* @note Typical usage is where you want to bulk-insert columns from two
* or more value accessors. Instead of writing out the columns into
* one or more column vector value accessors, you can simply use this
* function with the appropriate attribute_map for each value
* accessor (InsertDestination::bulkInsertTuplesFromValueAccessors
* handles all the details) to insert tuples without an extra temp copy.
*
* @warning Must call bulkInsertPartialTuplesFinalize() to update the header,
* until which point, the insertion is not visible to others.
* @warning The inserted tuples may be placed in a suboptimal position in the
* block.
*
* @param attribute_map A vector which maps the attributes of this
* TupleStorageSubBlock's relation (gaps indicated with kInvalidCatalogId)
* to the corresponding attributes which should be read from accessor.
* @param accessor A ValueAccessor to insert tuples from. The accessor's
* iteration will be advanced to the first non-inserted tuple or, if
* all accessible tuples were inserted in this sub-block, to the end
* position.
* @return The number of tuples inserted from accessor.
**/
virtual tuple_id bulkInsertPartialTuples(
const std::vector<attribute_id> &attribute_map,
ValueAccessor *accessor,
const tuple_id max_num_tuples_to_insert) {
LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type ("
<< getTupleStorageSubBlockType() << ").";
}
/**
* @brief Update header after a bulkInsertPartialTuples.
*
* @warning Only call this after a bulkInsertPartialTuples, passing in the
* number of tuples that were inserted (return value of that function).
*
* @param num_tuples_inserted Number of tuples inserted (i.e., how much to
* advance the header.num_tuples by). Should be equal to the return
* value of bulkInsertPartialTuples.
**/
virtual void bulkInsertPartialTuplesFinalize(
const tuple_id num_tuples_inserted) {
LOG(FATAL) << "Partial bulk insert is not supported for this TupleStorageBlock type ("
<< getTupleStorageSubBlockType() << ").";
}
/**
* @brief Get the (untyped) value of an attribute in a tuple in this buffer.
* @warning This method may not be supported for all implementations of
* TupleStorageSubBlock. supportsUntypedGetAttributeValue() MUST be
* called first to determine if this method is usable.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
*
* @param tuple The desired tuple in this SubBlock.
* @param attr The attribute id of the desired attribute.
* @return An untyped pointer to the value of the specified attribute in the
* specified tuple.
**/
virtual const void* getAttributeValue(const tuple_id tuple, const attribute_id attr) const = 0;
/**
* @brief Get the value of the specified attribute of the specified tuple as
* a TypedValue.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
*
* @param tuple The desired tuple in this SubBlock.
* @param attr The attribute id of the desired attribute.
* @return The data as a TypedValue.
**/
virtual TypedValue getAttributeValueTyped(const tuple_id tuple, const attribute_id attr) const = 0;
/**
* @brief Create a ValueAccessor object that can read attribute values from
* within this TupleStorageSubBlock.
*
* @param sequence If non-null, the ValueAccessor returned with only iterate
* over the tuples in sequence. Note that the returned ValueAccessor
* will only be valid so long as sequence exists.
* @return A ValueAccessor object which can iterate over and read values from
* this TupleStorageSubBlock.
**/
virtual ValueAccessor* createValueAccessor(const TupleIdSequence *sequence = nullptr) const = 0;
/**
* @brief Determine whether it is possible to update some attribute values
* in-place without relocating the tuple.
*
* @param tuple The desired tuple in this SubBlock.
* @param new_values A map of attribute IDs to corresponding new (typed)
* values.
* @return True if an in-place update for all values in new_values is
* possible, false otherwise.
**/
virtual bool canSetAttributeValuesInPlaceTyped(
const tuple_id tuple,
const std::unordered_map<attribute_id, TypedValue> &new_values) const = 0;
/**
* @brief Set (update) the value of an attribute in an existing tuple.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
* @warning Sometimes it is not possible to update an attribute value
* in-place, and the tuple must be relocated. ALWAYS call
* canSetAttributeValuesInPlaceTyped() first to check whether this
* method is safe to use.
*
* @param tuple The desired tuple in this SubBlock.
* @param attr The attribute id of the desired attribute.
* @param value A TypedValue containing the new value for the attribute.
**/
virtual void setAttributeValueInPlaceTyped(const tuple_id tuple,
const attribute_id attr,
const TypedValue &value) = 0;
/**
* @brief Delete a single tuple from this TupleStorageSubBlock.
* @warning For debug builds, an assertion checks whether the specified tuple
* actually exists. In general, this method should only be called
* for tuples which are known to exist (from hasTupleWithID(),
* isPacked() and getMaxTupleID(), or presence in an index).
* @warning Always check the return value of this call to see whether indexes
* must be totally rebuilt.
*
* @param tuple The tuple to delete.
* @return True if other tuples have had their ids mutated (requiring indexes
* to be rebuilt), false if other tuple IDs are stable.
**/
virtual bool deleteTuple(const tuple_id tuple) = 0;
/**
* @brief Delete multiple tuples from this TupleStorageSubBlock.
* @warning For debug builds, an assertion checks whether the specified
* tuples actually exist. In general, this method should only be
* called for tuples which are known to exist (from
* hasTupleWithID(), isPacked() and getMaxTupleID(), or presence
* in an index).
* @warning Always check the return value of this call to see whether indexes
* must be totally rebuilt.
*
* @param tuples A sequence of tuple IDs which indicate the tuples to be
* deleted.
* @return True if other tuples have had their ids mutated (requiring
* indexes to be rebuilt), false if other tuple IDs are stable.
**/
virtual bool bulkDeleteTuples(TupleIdSequence *tuples) = 0;
/**
* @brief Estimate the cost of evaluating a ComparisonPredicate on this
* TupleStorageStorageBlock.
*
* @param predicate The predicate to be evaluated.
* @return A cost estimate for evaluating the predicate using this
* TupleStorageSubBlock's getMatchesForPredicate() method. Lower
* values indicate a cheaper/faster estimated cost.
**/
virtual predicate_cost_t estimatePredicateEvaluationCost(
const ComparisonPredicate &predicate) const = 0;
/**
* @brief Get the IDs of tuples in this SubBlock which match a given
* ComparisonPredicate using a "fast-path" technique specific to this
* TupleStorageStorageBlock.
* @warning This should only be called if estimatePredicateEvaluationCost()
* returns a value OTHER than kInfinite, kRowScan, or kColumnScan
* for the predicate.
*
* @param predicate The predicate to match.
* @param filter If non-NULL, then only tuple IDs which are set in the
* filter will be checked (all others will be assumed to be false).
* @return The IDs of tuples matching the specified predicate.
**/
virtual TupleIdSequence* getMatchesForPredicate(const ComparisonPredicate &predicate,
const TupleIdSequence *filter) const {
FATAL_ERROR("Called TupleStorageSubBlock::getMatchesForPredicate() on a "
"TupleStorageStorageBlock that does not provide any non-scan "
"method for evaluating predicates.");
}
/**
* @brief Get the IDs of all tuples which exist in this SubBlock.
* @note As with numTuples(), the default implementation of this method is
* O(1) for packed blocks, but O(n) for non-packed blocks. New
* TupleStorageSubBlock implementations which are not packed should
* override this with a faster version if possible.
*
* @return The IDs of all tuples which exist in this SubBlock.
**/
virtual TupleIdSequence* getExistenceMap() const;
/**
* @brief Get the IDs of all tuples which exist in this SubBlock.
* @note The default implementation of this method is O(n) for both packed and
* non-packed blocks. However, with packed with packed blocks there is
* not branching in the tight loop.
*
* @return The list of IDs of all tuples which exist in this SubBlock.
**/
virtual OrderedTupleIdSequence* getExistenceList() const;
/**
* @brief Rebuild this TupleStorageSubBlock, compacting storage and
* reordering tuples where applicable.
* @note This method may use an unbounded amount of out-of-band memory.
**/
virtual void rebuild() = 0;
/**
* @brief Check if this TupleStorageSubBlock is compressed, i.e. whether it
* can safely be cast to CompressedTupleStorageSubBlock.
*
* @return true if this is a CompressedTupleStorageSubBlock, false otherwise.
**/
virtual bool isCompressed() const {
return false;
}
/**
* @brief Check if the instance of tuple storage sub-block order preserving.
* This is useful in merging of sorted runs, where insertion order
* should be preserved.
*
* @return \c true if this tuple storage sub-block is order preserving.
**/
virtual bool isInsertOrderPreserving() const = 0;
protected:
/**
* @brief In debug builds, performs assertions to make sure that the values
* in tuple can be inserted into this TupleStorageSubBlock.
*
* @param tuple The tuple to check.
**/
void paranoidInsertTypeCheck(const Tuple &tuple);
const CatalogRelationSchema &relation_;
const TupleStorageSubBlockDescription &description_;
void *sub_block_memory_;
const std::size_t sub_block_memory_size_;
private:
DISALLOW_COPY_AND_ASSIGN(TupleStorageSubBlock);
};
/** @} */
} // namespace quickstep
#endif // QUICKSTEP_STORAGE_TUPLE_STORAGE_SUB_BLOCK_HPP_