| /** |
| * Copyright 2011-2015 Quickstep Technologies LLC. |
| * Copyright 2015-2016 Pivotal Software, Inc. |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| **/ |
| |
| #ifndef QUICKSTEP_RELATIONAL_OPERATORS_AGGREGATION_OPERATION_HPP_ |
| #define QUICKSTEP_RELATIONAL_OPERATORS_AGGREGATION_OPERATION_HPP_ |
| |
| #include <cstddef> |
| #include <memory> |
| #include <vector> |
| |
| #include "catalog/CatalogTypedefs.hpp" |
| #include "expressions/aggregation/AggregationHandle.hpp" |
| #include "expressions/aggregation/AggregationID.hpp" |
| #include "expressions/predicate/Predicate.hpp" |
| #include "expressions/scalar/Scalar.hpp" |
| #include "storage/AggregationOperationState.pb.h" |
| #include "storage/HashTableBase.hpp" |
| #include "storage/StorageBlockInfo.hpp" |
| #include "utility/Macros.hpp" |
| |
| namespace quickstep { |
| |
| class AggregateFunction; |
| class CatalogDatabaseLite; |
| class CatalogRelationSchema; |
| class InsertDestination; |
| class StorageManager; |
| |
| /** \addtogroup Storage |
| * @{ |
| */ |
| |
| /** |
| * @brief Helper class for maintaining state during aggregation operation. |
| * If a GROUP BY list was provided, this class maintains a hash table |
| * for each aggregate computed where the key is the GROUP BY expression |
| * values and payload is each group's corresponding running aggregation |
| * state. Without GROUP BY, this class maintains a single aggregation |
| * state for each aggregate computed. |
| * @note See also AggregationHandle, which encapsulates logic for actually |
| * computing aggregates, and AggregateFunction, which represents a |
| * particular SQL aggregate in the abstract sense. |
| * |
| * This class represents the common state for an instance of |
| * AggregationOperator, and also encapsulates the high-level logic for |
| * aggregating over blocks and generating final results. |
| * AggregationWorkOrder::execute() is mainly just a call to aggregateBlock(), |
| * while FinalizeAggregationWorkOrder::execute() is mainly just a call to |
| * finalizeAggregate(). |
| **/ |
| class AggregationOperationState { |
| public: |
| /** |
| * @brief Constructor for aggregation operation state. |
| * @note The order of some of the parameters to this constructor (or the |
| * corresponding fields when reconstructing from a protobuf) determines |
| * the schema of tuples written out by finalizeAggregate(). If group_by |
| * is nonempty, the first attribute(s) will be the group-by values, in |
| * order. Following that will be the values for each aggregate |
| * specified by aggregate_functions (with arguments specified by |
| * attributes), in order. |
| * |
| * @param input_relation Input relation on which the aggregates are computed. |
| * @param aggregate_functions A list of the aggregate functions to be |
| * computed. |
| * @param arguments For each entry in aggregate_functions, a corresponding |
| * list of argument expressions to that aggregate. This is moved-from, |
| * with AggregationOperationState taking ownership. |
| * @param is_distinct For each entry in aggregate_functions, whether DISTINCT |
| * should be applied to the entry's arguments. |
| * @param group_by A list of expressions to compute the GROUP BY values. If |
| * empty, no grouping is used. This is moved-from, with |
| * AggregationOperationState taking ownership. |
| * @param predicate The predicate to be applied prior to aggregation. nullptr |
| * indicates no predicate to be applied. This object takes ownership |
| * of predicate. |
| * @param estimated_num_entries Estimated of number of entries in the hash |
| * table. A good estimate would be a fraction of total number of tuples |
| * in the input relation. |
| * @param hash_table_impl_type The HashTable implementation to use for |
| * GROUP BY. Ignored if group_by is empty. |
| * @param storage_manager The StorageManager to use for allocating hash |
| * tables. Single aggregation state (when GROUP BY list is not |
| * specified) is not allocated using memory from storage manager. |
| */ |
| AggregationOperationState(const CatalogRelationSchema &input_relation, |
| const std::vector<const AggregateFunction*> &aggregate_functions, |
| std::vector<std::vector<std::unique_ptr<const Scalar>>> &&arguments, |
| std::vector<bool> &&is_distinct, |
| std::vector<std::unique_ptr<const Scalar>> &&group_by, |
| const Predicate *predicate, |
| const std::size_t estimated_num_entries, |
| const HashTableImplType hash_table_impl_type, |
| StorageManager *storage_manager); |
| |
| ~AggregationOperationState() {} |
| |
| /** |
| * @brief Generate the aggregation operation state from the serialized |
| * Protocol Buffer representation. |
| * @note The order of some repeated fields in the proto representation |
| * determines the schema of tuples written out by finalizeAggregate(). |
| * See the note for the constructor for details. |
| * |
| * @param proto A serialized Protocol Buffer representation of an |
| * AggregationOperationState, originally generated by the optimizer. |
| * @param database The Database to resolve relation and attribute references |
| * in. |
| * @param storage_manager The StorageManager to use. |
| **/ |
| static AggregationOperationState* ReconstructFromProto( |
| const serialization::AggregationOperationState &proto, |
| const CatalogDatabaseLite &database, |
| StorageManager *storage_manager); |
| |
| /** |
| * @brief Check whether a serialization::AggregationOperationState is |
| * fully-formed and all parts are valid. |
| * |
| * @param proto A serialized Protocol Buffer representation of an |
| * AggregationOperationState, originally generated by the optimizer. |
| * @param database The Database to resolve relation and attribute references |
| * in. |
| * @return Whether proto is fully-formed and valid. |
| **/ |
| static bool ProtoIsValid(const serialization::AggregationOperationState &proto, |
| const CatalogDatabaseLite &database); |
| |
| /** |
| * @brief Compute aggregates on the tuples of the given storage block, |
| * updating the running state maintained by this |
| * AggregationOperationState. |
| * |
| * @param input_block The block ID of the storage block where the aggreates |
| * are going to be computed. |
| **/ |
| void aggregateBlock(const block_id input_block); |
| |
| /** |
| * @brief Generate the final results for the aggregates managed by this |
| * AggregationOperationState and write them out to StorageBlock(s). |
| * |
| * @param output_destination An InsertDestination where the finalized output |
| * tuple(s) from this aggregate are to be written. |
| **/ |
| void finalizeAggregate(InsertDestination *output_destination); |
| |
| private: |
| // Merge locally (per storage block) aggregated states with global aggregation |
| // states. |
| void mergeSingleState(const std::vector<std::unique_ptr<AggregationState>> &local_state); |
| |
| // Aggregate on input block. |
| void aggregateBlockSingleState(const block_id input_block); |
| void aggregateBlockHashTable(const block_id input_block); |
| |
| void finalizeSingleState(InsertDestination *output_destination); |
| void finalizeHashTable(InsertDestination *output_destination); |
| |
| // Common state for all aggregates in this operation: the input relation, the |
| // filter predicate (if any), and the list of GROUP BY expressions (if any). |
| const CatalogRelationSchema &input_relation_; |
| std::unique_ptr<const Predicate> predicate_; |
| std::vector<std::unique_ptr<const Scalar>> group_by_list_; |
| |
| // Each individual aggregate in this operation has an AggregationHandle and |
| // some number of Scalar arguments. |
| std::vector<std::unique_ptr<AggregationHandle>> handles_; |
| std::vector<std::vector<std::unique_ptr<const Scalar>>> arguments_; |
| |
| // For each aggregate, whether DISTINCT should be applied to the aggregate's |
| // arguments. |
| std::vector<bool> is_distinct_; |
| |
| // Hash table for obtaining distinct (i.e. unique) arguments. |
| std::vector<std::unique_ptr<AggregationStateHashTableBase>> distinctify_hashtables_; |
| |
| #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION |
| // If all an aggregate's argument expressions are simply attributes in |
| // 'input_relation_', then this caches the attribute IDs of those arguments. |
| std::vector<std::vector<attribute_id>> arguments_as_attributes_; |
| #endif |
| |
| // Per-aggregate global states for aggregation without GROUP BY. |
| std::vector<std::unique_ptr<AggregationState>> single_states_; |
| |
| // Per-aggregate HashTables for aggregation with GROUP BY. |
| // |
| // TODO(shoban): We should ideally store the aggregation state together in one |
| // hash table to prevent multiple lookups. |
| std::vector<std::unique_ptr<AggregationStateHashTableBase>> group_by_hashtables_; |
| |
| StorageManager *storage_manager_; |
| |
| DISALLOW_COPY_AND_ASSIGN(AggregationOperationState); |
| }; |
| |
| /** @} */ |
| |
| } // namespace quickstep |
| |
| #endif // QUICKSTEP_RELATIONAL_OPERATORS_AGGREGATION_OPERATION_HPP_ |