expressions/aggregation/AggregationHandle.hpp - incubator-retired-quickstep - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  **/

 #ifndef QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_HPP_
 #define QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_HPP_

 #include <cstddef>
 #include <memory>
 #include <vector>

 #include "catalog/CatalogTypedefs.hpp"
 #include "storage/HashTableBase.hpp"
 #include "types/TypedValue.hpp"
 #include "utility/Macros.hpp"

 namespace quickstep {

 class ColumnVector;
 class StorageManager;
 class Type;
 class ValueAccessor;

 /** \addtogroup Expressions
  *  @{
  */

 /**
  * @brief Abstract base class for aggregation state.
  **/
 class AggregationState {
  public:
   /**
    * @brief Default constructor.
    **/
   AggregationState() {}

   /**
    * @brief Pure virtual destructor.
    **/
   virtual ~AggregationState() = 0;
 };

 // Destructor should be defined. This will be called when derived class
 // destructor is called.
 inline AggregationState::~AggregationState() {}

 /**
  * @brief AggregationHandle encapsulates logic for actually computing
  *        aggregates with particular argument(s).
  * @note See also AggregateFunction, which represents a SQL aggregate function
  *       in the abstract sense.
  *
  * An AggregationHandle is created by calling
  * AggregateFunction::createHandle(). The AggregationHandle object provides
  * methods that are used to actually compute the aggregate, storing
  * intermediate results in AggregationState objects.
  *
  * I. The work-flow for computing an aggregate without GROUP BY is as follows:
  *     1. Create a global state for the aggregate with createInitialState().
  *     2. For each block in a relation (parallelizable):
  *        a. Call StorageBlock::aggregate() to accumulate results from the
  *           block (under the covers, this calls either
  *           accumulateColumnVectors() or accumulateValueAccessor() to do the
  *           actual per-block aggregation in a vectorized fashion).
  *        b. Merge the per-block results back with the global state by calling
  *           mergeStates() (this is threadsafe).
  *     3. Generate the final result by calling finalize() on the global state.
  *
  * II. The work-flow for computing an aggregate with GROUP BY is as follows:
  *     1. Create a HashTable to hold per-group states by calling
  *        createGroupByHashTable().
  *     2. For each block in a relation (parallelizable):
  *        a. Call StorageBlock::aggregateGroupBy() to update the states in the
  *           HashTable according to the values in the block (under the covers,
  *           this calls aggregateValueAccessorIntoHashTable() to aggregate over
  *           all the values/groups in a block in one shot; this is threadsafe).
  *     3. Generate the final set of groups and their corresponding results by
  *        calling finalizeHashTable().
  *
  * See also AggregationOperationState, which holds 1 or more global states or
  * HashTables for an aggregate query, and has some logic to re-use common
  * information across multiple aggregates being computed on the same block
  * (e.g. the set of matches for a predicate, or the values of computed GROUP BY
  * expressions). AggregationOperationState also has a method to write out
  * finalized aggregate values to an InsertDestination.
  **/
 class AggregationHandle {
  public:
   /**
    * @brief Virtual destructor.
    *
    **/
   virtual ~AggregationHandle() {}

   /**
    * @brief Create an initial "blank" state for this aggregation.
    *
    * @return An initial "blank" state for this particular aggregation.
    **/
   virtual AggregationState* createInitialState() const = 0;

   /**
    * @brief Create a new HashTable for aggregation with GROUP BY.
    *
    * @param hash_table_impl The choice of which concrete HashTable
    *        implementation to use.
    * @param group_by_types The types of the GROUP BY columns/expressions. These
    *        correspond to the (composite) key type for the HashTable.
    * @param estimated_num_groups The estimated number of distinct groups for
    *        the GROUP BY aggregation. This is used to size the initial
    *        HashTable. This is an estimate only, and the HashTable will be
    *        resized if it becomes over-full.
    * @param storage_manager The StorageManager to use to create the HashTable.
    *        A StorageBlob will be allocated to serve as the HashTable's
    *        in-memory storage.
    * @return A new HashTable instance with the appropriate state type for this
    *         aggregate.
    **/
   virtual AggregationStateHashTableBase* createGroupByHashTable(
       const HashTableImplType hash_table_impl,
       const std::vector<const Type *> &group_by_types,
       const std::size_t estimated_num_groups,
       StorageManager *storage_manager) const = 0;

   /**
    * @brief Accumulate over tuples for a nullary aggregate function (one that
    *        has zero arguments, i.e. COUNT(*)).
    *
    * @param num_tuples The number of tuples to "accumulate". No actual tuple
    *        data is accessed, the only thing that a nullary aggeregate can know
    *        about input is its cardinality.
    * @return A new AggregationState which contains the accumulated results from
    *         applying the (nullary) aggregate to the specified number of
    *         tuples.
    **/
   virtual AggregationState* accumulateNullary(
       const std::size_t num_tuples) const = 0;

   /**
    * @brief Accumulate (iterate over) all values in one or more ColumnVectors
    *        and return a new AggregationState which can be merged with other
    *        states or finalized.
    *
    * @param column_vectors One or more ColumnVectors that the aggregate will be
    *        applied to. These correspond to the aggregate function's arguments,
    *        in order.
    * @return A new AggregationState which contains the accumulated results from
    *         applying the aggregate to column_vectors. Caller is responsible
    *         for deleting the returned AggregationState.
    **/
   virtual AggregationState* accumulateColumnVectors(
       const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) const = 0;

 #ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
   /**
    * @brief Accumulate (iterate over) all values in columns accessible through
    *        a ValueAccessor and return a new AggregationState which can be
    *        merged with other states or finalized.
    *
    * @param accessor A ValueAccessor that the columns to be aggregated can be
    *        accessed through.
    * @param accessor_ids The attribute_ids that correspond to the columns in
    *        accessor to aggeregate. These correspond to the aggregate
    *        function's arguments, in order.
    * @return A new AggregationState which contains the accumulated results from
    *         applying the aggregate to the specified columns in accessor.
    *         Caller is responsible for deleting the returned AggregationState.
    **/
   virtual AggregationState* accumulateValueAccessor(
       ValueAccessor *accessor,
       const std::vector<attribute_id> &accessor_ids) const = 0;
 #endif

   /**
    * @brief Perform an aggregation with GROUP BY over all the tuples accessible
    *        through a ValueAccessor, upserting states in a HashTable.
    *
    * @note Implementations of this method are threadsafe with respect to
    *       hash_table, and can be called concurrently from multiple threads
    *       with the same HashTable object.
    *
    * @param accessor The ValueAccessor that will be iterated over to read
    *        tuples.
    * @param argument_ids The attribute_ids of the arguments to this aggregate
    *        in accessor, in order.
    * @param group_by_key_ids The attribute_ids of the group-by
    *        columns/expressions in accessor.
    * @param hash_table The HashTable to upsert AggregationStates in. This
    *        should have been created by calling createGroupByHashTable() on
    *        this same AggregationHandle.
    **/
   virtual void aggregateValueAccessorIntoHashTable(
       ValueAccessor *accessor,
       const std::vector<attribute_id> &argument_ids,
       const std::vector<attribute_id> &group_by_key_ids,
       AggregationStateHashTableBase *hash_table) const = 0;

   /**
    * @brief Merge two AggregationStates, updating one in-place. This computes a
    *        new intermediate result and stores it in the destination
    *        AggregationState.
    *
    * @note The order of arguments (i.e. which AggregationState is source and
    *       which is destination) is not important for the result of the merging
    *       process, but it does determine which of the two states is
    *       overwritten (destination) and which remains unchanged (source).
    * @note Implementations of this method are threadsafe with respect to the
    *       destination state, and can be called concurrently from multiple
    *       threads with the same state object.
    *
    * @param source The AggregationState to merge "from".
    * @param destination The AggregationState to merge "to". The internal state
    *        will be overwritten with the merged result.
    **/
   virtual void mergeStates(const AggregationState &source,
                            AggregationState *destination) const = 0;

   /**
    * @brief Computes and returns the resulting aggregate by using intermediate
    *        result saved in this handle.
    *
    * @note Except for count, SQL89 aggeregates return NULL when no rows are
    *       selected.
    * @warning It is dangerous to assume that calling mergeStates() or some
    *          iterate method after previous finalize call will work correctly
    *          when the aggregate function is not one of SQL89 aggregates (SUM,
    *          COUNT, MIN, MAX, AVG).
    *
    * @return The result of this aggregation.
    **/
   virtual TypedValue finalize(const AggregationState &state) const = 0;

   /**
    * @brief Compute and return finalized aggregates for all groups in a
    *        HashTable.
    *
    * @param hash_table The HashTable to finalize states from. This should have
    *        have been created by calling createGroupByHashTable() on this same
    *        AggregationHandle.
    * @param group_by_keys A pointer to a vector of vectors of GROUP BY keys. If
    *        this is initially empty, it will be filled in with the GROUP BY
    *        keys visited by this method in the same order as the finalized
    *        values returned in the ColumnVector. If this is already filled in,
    *        then this method will visit the GROUP BY keys in the exact order
    *        specified.
    * @param index The index of the AggregationHandle to be finalized.
    *
    * @return A ColumnVector containing each group's finalized aggregate value.
    **/
   virtual ColumnVector* finalizeHashTable(
       const AggregationStateHashTableBase &hash_table,
       std::vector<std::vector<TypedValue>> *group_by_keys,
       int index) const = 0;

   /**
    * @brief Create a new HashTable for the distinctify step for DISTINCT
    * aggregation.
    *
    * Distinctify is the first step for DISTINCT aggregation. This step inserts
    * the GROUP BY expression values and aggregation arguments together as keys
    * into the distinctify hash table, so that arguments are distinctified within
    * each GROUP BY group. Later, a second-round aggregation on the distinctify
    * hash table will be performed to actually compute the aggregated result for
    * each GROUP BY group.
    *
    * In the case of single aggregation where there is no GROUP BY expressions,
    * we simply treat it as a special GROUP BY case that the GROUP BY expression
    * vector is empty.
    *
    * @param hash_table_impl The choice of which concrete HashTable
    *        implementation to use.
    * @param key_types The types of the GROUP BY expressions together with the
    *        types of the aggregation arguments.
    * @param estimated_num_distinct_keys The estimated number of distinct keys
    *        (i.e. GROUP BY expressions together with aggregation arguments) for
    *        the distinctify step. This is used to size the initial HashTable.
    *        This is an estimate only, and the HashTable will be resized if it
    *        becomes over-full.
    * @param storage_manager The StorageManager to use to create the HashTable.
    *        A StorageBlob will be allocated to serve as the HashTable's
    *        in-memory storage.
    *
    * @return A new HashTable instance with the appropriate state type for this
    *         aggregate.
    */
   virtual AggregationStateHashTableBase* createDistinctifyHashTable(
       const HashTableImplType hash_table_impl,
       const std::vector<const Type *> &key_types,
       const std::size_t estimated_num_distinct_keys,
       StorageManager *storage_manager) const = 0;

   /**
    * @brief Inserts the GROUP BY expressions and aggregation arguments together
    * as keys into the distinctify hash table.
    *
    * @param accessor The ValueAccessor that will be iterated over to read
    *        tuples.
    * @param key_ids The attribute_ids of the GROUP BY expressions in accessor
    *        together with the attribute_ids of the arguments to this aggregate
    *        in accessor, in order.
    * @param distinctify_hash_table The HashTable to store the GROUP BY
    *        expressions and the aggregation arguments together as hash table
    *        keys and a bool constant \c true as hash table value (So the hash
    *        table actually serves as a hash set). This should have been created
    *        by calling createDistinctifyHashTable();
    */
   virtual void insertValueAccessorIntoDistinctifyHashTable(
       ValueAccessor *accessor,
       const std::vector<attribute_id> &key_ids,
       AggregationStateHashTableBase *distinctify_hash_table) const = 0;

   /**
    * @brief Perform single (i.e. without GROUP BY) aggregation on the keys from
    * the distinctify hash table to actually compute the aggregated results.
    *
    * @param distinctify_hash_table Hash table which stores the distinctified
    *        aggregation arguments as hash table keys. This should have been
    *        created by calling createDistinctifyHashTable();
    * @return A new AggregationState which contains the aggregated results from
    *         applying the aggregate to the distinctify hash table.
    *         Caller is responsible for deleting the returned AggregationState.
    */
   virtual AggregationState* aggregateOnDistinctifyHashTableForSingle(
       const AggregationStateHashTableBase &distinctify_hash_table) const = 0;

   /**
    * @brief Perform GROUP BY aggregation on the keys from the distinctify hash
    * table and upserts states into the aggregation hash table.
    *
    * @param distinctify_hash_table Hash table which stores the GROUP BY
    *        expression values and aggregation arguments together as hash table
    *        keys.
    * @param aggregation_hash_table The HashTable to upsert AggregationStates in.
    *        This should have been created by calling createGroupByHashTable() on
    *        this same AggregationHandle.
    * @param index The index of the distinctify hash table for which we perform
    *        the DISTINCT aggregation.
    */
   virtual void aggregateOnDistinctifyHashTableForGroupBy(
       const AggregationStateHashTableBase &distinctify_hash_table,
       AggregationStateHashTableBase *aggregation_hash_table,
       std::size_t index) const = 0;

   /**
    * @brief Get the number of bytes needed to store the aggregation handle's
    *        state.
    **/
   virtual std::size_t getPayloadSize() const { return 1; }

   /**
    * @brief Update the aggregation state for nullary aggregation function e.g.
    *        COUNT(*).
    *
    * @note This function should be overloaded by those aggregation function
    *       which can perform nullary operations, e.g. COUNT.
    *
    * @param byte_ptr The pointer where the aggregation state is stored.
    **/
   virtual void updateStateNullary(std::uint8_t *byte_ptr) const {}

   /**
    * @brief Update the aggregation state for unary aggregation function e.g.
    *        SUM(a).
    *
    * @param argument The argument which will be used to update the state of the
    *        aggregation function.
    * @param byte_ptr The pointer where the aggregation state is stored.
    **/
   virtual void updateStateUnary(const TypedValue &argument,
                                 std::uint8_t *byte_ptr) const {}

   /**
    * @brief Merge two aggregation states for this aggregation handle.
    *
    * @note This function should be used with the hash table specifically meant
    *       for aggregations only.
    *
    * @param src A pointer to the source aggregation state.
    * @param dst A pointer to the destination aggregation state.
    **/
   virtual void mergeStatesFast(const std::uint8_t *src,
                                std::uint8_t *dst) const {}

   /**
    * @brief Initialize the payload (in the aggregation hash table) for the given
    *        aggregation handle.
    *
    * @param byte_ptr The pointer to the aggregation state in the hash table.
    **/
   virtual void initPayload(std::uint8_t *byte_ptr) const {}

   /**
    * @brief Destroy the payload (in the aggregation hash table) for the given
    *        aggregation handle.
    *
    * @param byte_ptr The pointer to the aggregation state in the hash table.
    **/
   virtual void destroyPayload(std::uint8_t *byte_ptr) const {}

   /**
    * @brief Inform the aggregation handle to block (prohibit) updates on the
    *        aggregation state.
    **/
   virtual void blockUpdate() {}

   /**
    * @brief Inform the aggregation handle to allow updates on the
    *        aggregation state.
    **/
   virtual void allowUpdate() {}

  protected:
   AggregationHandle() {}

  private:
   DISALLOW_COPY_AND_ASSIGN(AggregationHandle);
 };

 }  // namespace quickstep

 #endif  // QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_HPP_
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	**/

	#ifndef QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_HPP_
	#define QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_HPP_

	#include <cstddef>
	#include <memory>
	#include <vector>

	#include "catalog/CatalogTypedefs.hpp"
	#include "storage/HashTableBase.hpp"
	#include "types/TypedValue.hpp"
	#include "utility/Macros.hpp"

	namespace quickstep {

	class ColumnVector;
	class StorageManager;
	class Type;
	class ValueAccessor;

	/** \addtogroup Expressions
	* @{
	*/

	/**
	* @brief Abstract base class for aggregation state.
	**/
	class AggregationState {
	public:
	/**
	* @brief Default constructor.
	**/
	AggregationState() {}

	/**
	* @brief Pure virtual destructor.
	**/
	virtual ~AggregationState() = 0;
	};

	// Destructor should be defined. This will be called when derived class
	// destructor is called.
	inline AggregationState::~AggregationState() {}

	/**
	* @brief AggregationHandle encapsulates logic for actually computing
	* aggregates with particular argument(s).
	* @note See also AggregateFunction, which represents a SQL aggregate function
	* in the abstract sense.
	*
	* An AggregationHandle is created by calling
	* AggregateFunction::createHandle(). The AggregationHandle object provides
	* methods that are used to actually compute the aggregate, storing
	* intermediate results in AggregationState objects.
	*
	* I. The work-flow for computing an aggregate without GROUP BY is as follows:
	* 1. Create a global state for the aggregate with createInitialState().
	* 2. For each block in a relation (parallelizable):
	* a. Call StorageBlock::aggregate() to accumulate results from the
	* block (under the covers, this calls either
	* accumulateColumnVectors() or accumulateValueAccessor() to do the
	* actual per-block aggregation in a vectorized fashion).
	* b. Merge the per-block results back with the global state by calling
	* mergeStates() (this is threadsafe).
	* 3. Generate the final result by calling finalize() on the global state.
	*
	* II. The work-flow for computing an aggregate with GROUP BY is as follows:
	* 1. Create a HashTable to hold per-group states by calling
	* createGroupByHashTable().
	* 2. For each block in a relation (parallelizable):
	* a. Call StorageBlock::aggregateGroupBy() to update the states in the
	* HashTable according to the values in the block (under the covers,
	* this calls aggregateValueAccessorIntoHashTable() to aggregate over
	* all the values/groups in a block in one shot; this is threadsafe).
	* 3. Generate the final set of groups and their corresponding results by
	* calling finalizeHashTable().
	*
	* See also AggregationOperationState, which holds 1 or more global states or
	* HashTables for an aggregate query, and has some logic to re-use common
	* information across multiple aggregates being computed on the same block
	* (e.g. the set of matches for a predicate, or the values of computed GROUP BY
	* expressions). AggregationOperationState also has a method to write out
	* finalized aggregate values to an InsertDestination.
	**/
	class AggregationHandle {
	public:
	/**
	* @brief Virtual destructor.
	*
	**/
	virtual ~AggregationHandle() {}

	/**
	* @brief Create an initial "blank" state for this aggregation.
	*
	* @return An initial "blank" state for this particular aggregation.
	**/
	virtual AggregationState* createInitialState() const = 0;

	/**
	* @brief Create a new HashTable for aggregation with GROUP BY.
	*
	* @param hash_table_impl The choice of which concrete HashTable
	* implementation to use.
	* @param group_by_types The types of the GROUP BY columns/expressions. These
	* correspond to the (composite) key type for the HashTable.
	* @param estimated_num_groups The estimated number of distinct groups for
	* the GROUP BY aggregation. This is used to size the initial
	* HashTable. This is an estimate only, and the HashTable will be
	* resized if it becomes over-full.
	* @param storage_manager The StorageManager to use to create the HashTable.
	* A StorageBlob will be allocated to serve as the HashTable's
	* in-memory storage.
	* @return A new HashTable instance with the appropriate state type for this
	* aggregate.
	**/
	virtual AggregationStateHashTableBase* createGroupByHashTable(
	const HashTableImplType hash_table_impl,
	const std::vector<const Type *> &group_by_types,
	const std::size_t estimated_num_groups,
	StorageManager *storage_manager) const = 0;

	/**
	* @brief Accumulate over tuples for a nullary aggregate function (one that
	* has zero arguments, i.e. COUNT(*)).
	*
	* @param num_tuples The number of tuples to "accumulate". No actual tuple
	* data is accessed, the only thing that a nullary aggeregate can know
	* about input is its cardinality.
	* @return A new AggregationState which contains the accumulated results from
	* applying the (nullary) aggregate to the specified number of
	* tuples.
	**/
	virtual AggregationState* accumulateNullary(
	const std::size_t num_tuples) const = 0;

	/**
	* @brief Accumulate (iterate over) all values in one or more ColumnVectors
	* and return a new AggregationState which can be merged with other
	* states or finalized.
	*
	* @param column_vectors One or more ColumnVectors that the aggregate will be
	* applied to. These correspond to the aggregate function's arguments,
	* in order.
	* @return A new AggregationState which contains the accumulated results from
	* applying the aggregate to column_vectors. Caller is responsible
	* for deleting the returned AggregationState.
	**/
	virtual AggregationState* accumulateColumnVectors(
	const std::vector<std::unique_ptr<ColumnVector>> &column_vectors) const = 0;

	#ifdef QUICKSTEP_ENABLE_VECTOR_COPY_ELISION_SELECTION
	/**
	* @brief Accumulate (iterate over) all values in columns accessible through
	* a ValueAccessor and return a new AggregationState which can be
	* merged with other states or finalized.
	*
	* @param accessor A ValueAccessor that the columns to be aggregated can be
	* accessed through.
	* @param accessor_ids The attribute_ids that correspond to the columns in
	* accessor to aggeregate. These correspond to the aggregate
	* function's arguments, in order.
	* @return A new AggregationState which contains the accumulated results from
	* applying the aggregate to the specified columns in accessor.
	* Caller is responsible for deleting the returned AggregationState.
	**/
	virtual AggregationState* accumulateValueAccessor(
	ValueAccessor *accessor,
	const std::vector<attribute_id> &accessor_ids) const = 0;
	#endif

	/**
	* @brief Perform an aggregation with GROUP BY over all the tuples accessible
	* through a ValueAccessor, upserting states in a HashTable.
	*
	* @note Implementations of this method are threadsafe with respect to
	* hash_table, and can be called concurrently from multiple threads
	* with the same HashTable object.
	*
	* @param accessor The ValueAccessor that will be iterated over to read
	* tuples.
	* @param argument_ids The attribute_ids of the arguments to this aggregate
	* in accessor, in order.
	* @param group_by_key_ids The attribute_ids of the group-by
	* columns/expressions in accessor.
	* @param hash_table The HashTable to upsert AggregationStates in. This
	* should have been created by calling createGroupByHashTable() on
	* this same AggregationHandle.
	**/
	virtual void aggregateValueAccessorIntoHashTable(
	ValueAccessor *accessor,
	const std::vector<attribute_id> &argument_ids,
	const std::vector<attribute_id> &group_by_key_ids,
	AggregationStateHashTableBase *hash_table) const = 0;

	/**
	* @brief Merge two AggregationStates, updating one in-place. This computes a
	* new intermediate result and stores it in the destination
	* AggregationState.
	*
	* @note The order of arguments (i.e. which AggregationState is source and
	* which is destination) is not important for the result of the merging
	* process, but it does determine which of the two states is
	* overwritten (destination) and which remains unchanged (source).
	* @note Implementations of this method are threadsafe with respect to the
	* destination state, and can be called concurrently from multiple
	* threads with the same state object.
	*
	* @param source The AggregationState to merge "from".
	* @param destination The AggregationState to merge "to". The internal state
	* will be overwritten with the merged result.
	**/
	virtual void mergeStates(const AggregationState &source,
	AggregationState *destination) const = 0;

	/**
	* @brief Computes and returns the resulting aggregate by using intermediate
	* result saved in this handle.
	*
	* @note Except for count, SQL89 aggeregates return NULL when no rows are
	* selected.
	* @warning It is dangerous to assume that calling mergeStates() or some
	* iterate method after previous finalize call will work correctly
	* when the aggregate function is not one of SQL89 aggregates (SUM,
	* COUNT, MIN, MAX, AVG).
	*
	* @return The result of this aggregation.
	**/
	virtual TypedValue finalize(const AggregationState &state) const = 0;

	/**
	* @brief Compute and return finalized aggregates for all groups in a
	* HashTable.
	*
	* @param hash_table The HashTable to finalize states from. This should have
	* have been created by calling createGroupByHashTable() on this same
	* AggregationHandle.
	* @param group_by_keys A pointer to a vector of vectors of GROUP BY keys. If
	* this is initially empty, it will be filled in with the GROUP BY
	* keys visited by this method in the same order as the finalized
	* values returned in the ColumnVector. If this is already filled in,
	* then this method will visit the GROUP BY keys in the exact order
	* specified.
	* @param index The index of the AggregationHandle to be finalized.
	*
	* @return A ColumnVector containing each group's finalized aggregate value.
	**/
	virtual ColumnVector* finalizeHashTable(
	const AggregationStateHashTableBase &hash_table,
	std::vector<std::vector<TypedValue>> *group_by_keys,
	int index) const = 0;

	/**
	* @brief Create a new HashTable for the distinctify step for DISTINCT
	* aggregation.
	*
	* Distinctify is the first step for DISTINCT aggregation. This step inserts
	* the GROUP BY expression values and aggregation arguments together as keys
	* into the distinctify hash table, so that arguments are distinctified within
	* each GROUP BY group. Later, a second-round aggregation on the distinctify
	* hash table will be performed to actually compute the aggregated result for
	* each GROUP BY group.
	*
	* In the case of single aggregation where there is no GROUP BY expressions,
	* we simply treat it as a special GROUP BY case that the GROUP BY expression
	* vector is empty.
	*
	* @param hash_table_impl The choice of which concrete HashTable
	* implementation to use.
	* @param key_types The types of the GROUP BY expressions together with the
	* types of the aggregation arguments.
	* @param estimated_num_distinct_keys The estimated number of distinct keys
	* (i.e. GROUP BY expressions together with aggregation arguments) for
	* the distinctify step. This is used to size the initial HashTable.
	* This is an estimate only, and the HashTable will be resized if it
	* becomes over-full.
	* @param storage_manager The StorageManager to use to create the HashTable.
	* A StorageBlob will be allocated to serve as the HashTable's
	* in-memory storage.
	*
	* @return A new HashTable instance with the appropriate state type for this
	* aggregate.
	*/
	virtual AggregationStateHashTableBase* createDistinctifyHashTable(
	const HashTableImplType hash_table_impl,
	const std::vector<const Type *> &key_types,
	const std::size_t estimated_num_distinct_keys,
	StorageManager *storage_manager) const = 0;

	/**
	* @brief Inserts the GROUP BY expressions and aggregation arguments together
	* as keys into the distinctify hash table.
	*
	* @param accessor The ValueAccessor that will be iterated over to read
	* tuples.
	* @param key_ids The attribute_ids of the GROUP BY expressions in accessor
	* together with the attribute_ids of the arguments to this aggregate
	* in accessor, in order.
	* @param distinctify_hash_table The HashTable to store the GROUP BY
	* expressions and the aggregation arguments together as hash table
	* keys and a bool constant \c true as hash table value (So the hash
	* table actually serves as a hash set). This should have been created
	* by calling createDistinctifyHashTable();
	*/
	virtual void insertValueAccessorIntoDistinctifyHashTable(
	ValueAccessor *accessor,
	const std::vector<attribute_id> &key_ids,
	AggregationStateHashTableBase *distinctify_hash_table) const = 0;

	/**
	* @brief Perform single (i.e. without GROUP BY) aggregation on the keys from
	* the distinctify hash table to actually compute the aggregated results.
	*
	* @param distinctify_hash_table Hash table which stores the distinctified
	* aggregation arguments as hash table keys. This should have been
	* created by calling createDistinctifyHashTable();
	* @return A new AggregationState which contains the aggregated results from
	* applying the aggregate to the distinctify hash table.
	* Caller is responsible for deleting the returned AggregationState.
	*/
	virtual AggregationState* aggregateOnDistinctifyHashTableForSingle(
	const AggregationStateHashTableBase &distinctify_hash_table) const = 0;

	/**
	* @brief Perform GROUP BY aggregation on the keys from the distinctify hash
	* table and upserts states into the aggregation hash table.
	*
	* @param distinctify_hash_table Hash table which stores the GROUP BY
	* expression values and aggregation arguments together as hash table
	* keys.
	* @param aggregation_hash_table The HashTable to upsert AggregationStates in.
	* This should have been created by calling createGroupByHashTable() on
	* this same AggregationHandle.
	* @param index The index of the distinctify hash table for which we perform
	* the DISTINCT aggregation.
	*/
	virtual void aggregateOnDistinctifyHashTableForGroupBy(
	const AggregationStateHashTableBase &distinctify_hash_table,
	AggregationStateHashTableBase *aggregation_hash_table,
	std::size_t index) const = 0;

	/**
	* @brief Get the number of bytes needed to store the aggregation handle's
	* state.
	**/
	virtual std::size_t getPayloadSize() const { return 1; }

	/**
	* @brief Update the aggregation state for nullary aggregation function e.g.
	* COUNT(*).
	*
	* @note This function should be overloaded by those aggregation function
	* which can perform nullary operations, e.g. COUNT.
	*
	* @param byte_ptr The pointer where the aggregation state is stored.
	**/
	virtual void updateStateNullary(std::uint8_t *byte_ptr) const {}

	/**
	* @brief Update the aggregation state for unary aggregation function e.g.
	* SUM(a).
	*
	* @param argument The argument which will be used to update the state of the
	* aggregation function.
	* @param byte_ptr The pointer where the aggregation state is stored.
	**/
	virtual void updateStateUnary(const TypedValue &argument,
	std::uint8_t *byte_ptr) const {}

	/**
	* @brief Merge two aggregation states for this aggregation handle.
	*
	* @note This function should be used with the hash table specifically meant
	* for aggregations only.
	*
	* @param src A pointer to the source aggregation state.
	* @param dst A pointer to the destination aggregation state.
	**/
	virtual void mergeStatesFast(const std::uint8_t *src,
	std::uint8_t *dst) const {}

	/**
	* @brief Initialize the payload (in the aggregation hash table) for the given
	* aggregation handle.
	*
	* @param byte_ptr The pointer to the aggregation state in the hash table.
	**/
	virtual void initPayload(std::uint8_t *byte_ptr) const {}

	/**
	* @brief Destroy the payload (in the aggregation hash table) for the given
	* aggregation handle.
	*
	* @param byte_ptr The pointer to the aggregation state in the hash table.
	**/
	virtual void destroyPayload(std::uint8_t *byte_ptr) const {}

	/**
	* @brief Inform the aggregation handle to block (prohibit) updates on the
	* aggregation state.
	**/
	virtual void blockUpdate() {}

	/**
	* @brief Inform the aggregation handle to allow updates on the
	* aggregation state.
	**/
	virtual void allowUpdate() {}

	protected:
	AggregationHandle() {}

	private:
	DISALLOW_COPY_AND_ASSIGN(AggregationHandle);
	};

	} // namespace quickstep

	#endif // QUICKSTEP_EXPRESSIONS_AGGREGATION_AGGREGATION_HANDLE_HPP_