| // Licensed to the Apache Software Foundation (ASF) under one |
| // or more contributor license agreements. See the NOTICE file |
| // distributed with this work for additional information |
| // regarding copyright ownership. The ASF licenses this file |
| // to you under the Apache License, Version 2.0 (the |
| // "License"); you may not use this file except in compliance |
| // with the License. You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, |
| // software distributed under the License is distributed on an |
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| // KIND, either express or implied. See the License for the |
| // specific language governing permissions and limitations |
| // under the License. |
| |
| #ifndef IMPALA_EXEC_HASH_TABLE_H |
| #define IMPALA_EXEC_HASH_TABLE_H |
| |
| #include <memory> |
| #include <vector> |
| #include <boost/cstdint.hpp> |
| #include <boost/scoped_array.hpp> |
| #include <boost/scoped_ptr.hpp> |
| |
| #include "codegen/impala-ir.h" |
| #include "common/compiler-util.h" |
| #include "common/logging.h" |
| #include "runtime/buffered-tuple-stream.h" |
| #include "runtime/buffered-tuple-stream.inline.h" |
| #include "runtime/bufferpool/buffer-pool.h" |
| #include "runtime/bufferpool/suballocator.h" |
| #include "runtime/tuple-row.h" |
| #include "util/bitmap.h" |
| #include "util/hash-util.h" |
| #include "util/runtime-profile.h" |
| |
| namespace llvm { |
| class Function; |
| } |
| |
| namespace impala { |
| |
| class LlvmCodeGen; |
| class MemTracker; |
| class RowDescriptor; |
| class RuntimeState; |
| class ScalarExpr; |
| class ScalarExprEvaluator; |
| class Tuple; |
| class TupleRow; |
| class HashTable; |
| |
| /// Linear or quadratic probing hash table implementation tailored to the usage pattern |
| /// for partitioned hash aggregation and hash joins. The hash table stores TupleRows and |
| /// allows for different exprs for insertions and finds. This is the pattern we use for |
| /// joins and aggregation where the input/build tuple row descriptor is different from the |
| /// find/probe descriptor. The implementation is designed to allow codegen for some paths. |
| // |
| /// In addition to the hash table there is also an accompanying hash table context that is |
| /// used for insertions and probes. For example, the hash table context stores evaluated |
| /// expr results for the current row being processed when possible into a contiguous |
| /// memory buffer. This allows for efficient hash computation. |
| // |
| /// The hash table does not support removes. The hash table is not thread safe. |
| /// The table is optimized for the partition hash aggregation and hash joins and is not |
| /// intended to be a generic hash table implementation. The API loosely mimics the |
| /// std::hashset API. |
| // |
| /// The data (rows) are stored in a BufferedTupleStream. The basic data structure of this |
| /// hash table is a vector of buckets. The buckets (indexed by the mod of the hash) |
| /// contain a pointer to either the slot in the tuple-stream or in case of duplicate |
| /// values, to the head of a linked list of nodes that in turn contain a pointer to |
| /// tuple-stream slots. When inserting an entry we start at the bucket at position |
| /// (hash % size) and search for either a bucket with the same hash or for an empty |
| /// bucket. If a bucket with the same hash is found, we then compare for row equality and |
| /// either insert a duplicate node if the equality is true, or continue the search if the |
| /// row equality is false. Similarly, when probing we start from the bucket at position |
| /// (hash % size) and search for an entry with the same hash or for an empty bucket. |
| /// In the former case, we then check for row equality and continue the search if the row |
| /// equality is false. In the latter case, the probe is not successful. When growing the |
| /// hash table, the number of buckets is doubled. We trigger a resize when the fill |
| /// factor is approx 75%. Due to the doubling nature of the buckets, we require that the |
| /// number of buckets is a power of 2. This allows us to perform a modulo of the hash |
| /// using a bitmask. |
| /// |
| /// We choose to use linear or quadratic probing because they exhibit good (predictable) |
| /// cache behavior. |
| /// |
| /// The first NUM_SMALL_BLOCKS of nodes_ are made of blocks less than the IO size (of 8MB) |
| /// to reduce the memory footprint of small queries. |
| /// |
| /// TODO: Compare linear and quadratic probing and remove the loser. |
| /// TODO: We currently use 32-bit hashes. There is room in the bucket structure for at |
| /// least 48-bits. We should exploit this space. |
| /// TODO: Consider capping the probes with a threshold value. If an insert reaches |
| /// that threshold it is inserted to another linked list of overflow entries. |
| /// TODO: Smarter resizes, and perhaps avoid using powers of 2 as the hash table size. |
| /// TODO: this is not a fancy hash table in terms of memory access patterns |
| /// (cuckoo-hashing or something that spills to disk). We will likely want to invest |
| /// more time into this. |
| /// TODO: hash-join and aggregation have very different access patterns. Joins insert all |
| /// the rows and then calls scan to find them. Aggregation interleaves FindProbeRow() and |
| /// Inserts(). We may want to optimize joins more heavily for Inserts() (in particular |
| /// growing). |
| /// TODO: Batched interface for inserts and finds. |
| /// TODO: as an optimization, compute variable-length data size for the agg node. |
| |
| /// Control block for a hash table. This class contains the logic as well as the variables |
| /// needed by a thread to operate on a hash table. |
| class HashTableCtx { |
| public: |
| /// Create a hash table context with the specified parameters, invoke Init() to |
| /// initialize the new hash table context and return it in 'ht_ctx'. Expression |
| /// evaluators for the build and probe expressions will also be allocated. |
| /// Please see the comments of HashTableCtx constructor and Init() for details |
| /// of other parameters. |
| static Status Create(ObjectPool* pool, RuntimeState* state, |
| const std::vector<ScalarExpr*>& build_exprs, |
| const std::vector<ScalarExpr*>& probe_exprs, bool stores_nulls, |
| const std::vector<bool>& finds_nulls, int32_t initial_seed, int max_levels, |
| int num_build_tuples, MemPool* expr_perm_pool, MemPool* build_expr_results_pool, |
| MemPool* probe_expr_results_pool, boost::scoped_ptr<HashTableCtx>* ht_ctx); |
| |
| /// Initialize the build and probe expression evaluators. |
| Status Open(RuntimeState* state); |
| |
| /// Call to cleanup any resources allocated by the expression evaluators. |
| void Close(RuntimeState* state); |
| |
| void set_level(int level); |
| |
| int ALWAYS_INLINE level() const { return level_; } |
| |
| uint32_t ALWAYS_INLINE seed(int level) { return seeds_.at(level); } |
| |
| TupleRow* ALWAYS_INLINE scratch_row() const { return scratch_row_; } |
| |
| /// Returns the results of the expression at 'expr_idx' evaluated at the current row. |
| /// This value is invalid if the expr evaluated to NULL. |
| /// TODO: this is an awkward abstraction but aggregation node can take advantage of |
| /// it and save some expr evaluation calls. |
| void* ALWAYS_INLINE ExprValue(int expr_idx) const { |
| return expr_values_cache_.ExprValuePtr( |
| expr_values_cache_.cur_expr_values(), expr_idx); |
| } |
| |
| /// Returns if the expression at 'expr_idx' is evaluated to NULL for the current row. |
| bool ALWAYS_INLINE ExprValueNull(int expr_idx) const { |
| return static_cast<bool>(*(expr_values_cache_.cur_expr_values_null() + expr_idx)); |
| } |
| |
| /// Evaluate and hash the build/probe row, saving the evaluation to the current row of |
| /// the ExprValuesCache in this hash table context: the results are saved in |
| /// 'cur_expr_values_', the nullness of expressions values in 'cur_expr_values_null_', |
| /// and the hashed expression values in 'cur_expr_values_hash_'. Returns false if this |
| /// row should be rejected (doesn't need to be processed further) because it contains |
| /// NULL. These need to be inlined in the IR module so we can find and replace the |
| /// calls to EvalBuildRow()/EvalProbeRow(). |
| bool IR_ALWAYS_INLINE EvalAndHashBuild(const TupleRow* row); |
| bool IR_ALWAYS_INLINE EvalAndHashProbe(const TupleRow* row); |
| |
| /// Codegen for evaluating a tuple row. Codegen'd function matches the signature |
| /// for EvalBuildRow and EvalTupleRow. |
| /// If build_row is true, the codegen uses the build_exprs, otherwise the probe_exprs. |
| Status CodegenEvalRow(LlvmCodeGen* codegen, bool build_row, llvm::Function** fn); |
| |
| /// Codegen for evaluating a TupleRow and comparing equality. Function signature |
| /// matches HashTable::Equals(). 'inclusive_equality' is true if the generated |
| /// equality function should treat all NULLs as equal and all NaNs as equal. |
| /// See the template parameter to HashTable::Equals(). |
| Status CodegenEquals(LlvmCodeGen* codegen, bool inclusive_equality, |
| llvm::Function** fn); |
| |
| /// Codegen for hashing expr values. Function prototype matches HashRow identically. |
| /// Unlike HashRow(), the returned function only uses a single hash function, rather |
| /// than switching based on level_. If 'use_murmur' is true, murmur hash is used, |
| /// otherwise CRC is used if the hardware supports it (see hash-util.h). |
| Status CodegenHashRow(LlvmCodeGen* codegen, bool use_murmur, llvm::Function** fn); |
| |
| /// Struct that returns the number of constants replaced by ReplaceConstants(). |
| struct HashTableReplacedConstants { |
| int stores_nulls; |
| int finds_some_nulls; |
| int stores_tuples; |
| int stores_duplicates; |
| int quadratic_probing; |
| }; |
| |
| /// Replace hash table parameters with constants in 'fn'. Updates 'replacement_counts' |
| /// with the number of replacements made. 'num_build_tuples' and 'stores_duplicates' |
| /// correspond to HashTable parameters with the same name. |
| Status ReplaceHashTableConstants(LlvmCodeGen* codegen, bool stores_duplicates, |
| int num_build_tuples, llvm::Function* fn, |
| HashTableReplacedConstants* replacement_counts); |
| |
| static const char* LLVM_CLASS_NAME; |
| |
| /// To enable prefetching, the hash table building and probing are pipelined by the |
| /// exec nodes. A set of rows in a row batch will be evaluated and hashed first and |
| /// the corresponding hash table buckets are prefetched before they are probed against |
| /// the hash table. ExprValuesCache is a container for caching the results of |
| /// expressions evaluations for the rows in a prefetch set to avoid re-evaluating the |
| /// rows again during probing. Expressions evaluation can be very expensive. |
| /// |
| /// The expression evaluation results are cached in the following data structures: |
| /// |
| /// - 'expr_values_array_' is an array caching the results of the rows |
| /// evaluated against either the build or probe expressions. 'cur_expr_values_' |
| /// is a pointer into this array. |
| /// - 'expr_values_null_array_' is an array caching the nullness of each evaluated |
| /// expression in each row. 'cur_expr_values_null_' is a pointer into this array. |
| /// - 'expr_values_hash_array_' is an array of cached hash values of the rows. |
| /// 'cur_expr_values_hash_' is a pointer into this array. |
| /// - 'null_bitmap_' is a bitmap which indicates rows evaluated to NULL. |
| /// |
| /// ExprValuesCache provides an iterator like interface for performing a write pass |
| /// followed by a read pass. We refrain from providing an interface for random accesses |
| /// as there isn't a use case for it now and we want to avoid expensive multiplication |
| /// as the buffer size of each row is not necessarily power of two: |
| /// - Reset(), ResetForRead(): reset the iterators before writing / reading cached values. |
| /// - NextRow(): moves the iterators to point to the next row of cached values. |
| /// - AtEnd(): returns true if all cached rows have been read. Valid in read mode only. |
| /// |
| /// Various metadata information such as layout of results buffer is also stored in |
| /// this class. Note that the result buffer doesn't store variable length data. It only |
| /// contains pointers to the variable length data (e.g. if an expression value is a |
| /// StringValue). |
| /// |
| class ExprValuesCache { |
| public: |
| ExprValuesCache(); |
| |
| /// Allocates memory and initializes various data structures. Return error status |
| /// if memory allocation leads to the memory limits of the exec node to be exceeded. |
| /// 'tracker' is the memory tracker of the exec node which owns this HashTableCtx. |
| Status Init(RuntimeState* state, MemTracker* tracker, |
| const std::vector<ScalarExpr*>& build_exprs); |
| |
| /// Frees up various resources and updates memory tracker with proper accounting. |
| /// 'tracker' should be the same memory tracker which was passed in for Init(). |
| void Close(MemTracker* tracker); |
| |
| /// Resets the cache states (iterators, end pointers etc) before writing. |
| void Reset() noexcept; |
| |
| /// Resets the iterators to the start before reading. Will record the current position |
| /// of the iterators in end pointer before resetting so AtEnd() can determine if all |
| /// cached values have been read. |
| void ResetForRead(); |
| |
| /// Advances the iterators to the next row by moving to the next entries in the |
| /// arrays of cached values. |
| void ALWAYS_INLINE NextRow(); |
| |
| /// Compute the total memory usage of this ExprValuesCache. |
| static int MemUsage(int capacity, int results_buffer_size, int num_build_exprs); |
| |
| /// Returns the maximum number rows of expression values states which can be cached. |
| int ALWAYS_INLINE capacity() const { return capacity_; } |
| |
| /// Returns the total size in bytes of a row of evaluated expressions' values. |
| int ALWAYS_INLINE expr_values_bytes_per_row() const { |
| return expr_values_bytes_per_row_; |
| } |
| |
| /// Returns the offset into the result buffer of the first variable length |
| /// data results. |
| int ALWAYS_INLINE var_result_offset() const { return var_result_offset_; } |
| |
| /// Returns true if the current read pass is complete, meaning all cached values |
| /// have been read. |
| bool ALWAYS_INLINE AtEnd() const { |
| return cur_expr_values_hash_ == cur_expr_values_hash_end_; |
| } |
| |
| /// Returns true if the current row is null but nulls are not considered in the current |
| /// phase (build or probe). |
| bool ALWAYS_INLINE IsRowNull() const { return null_bitmap_.Get(CurIdx()); } |
| |
| /// Record in a bitmap that the current row is null but nulls are not considered in |
| /// the current phase (build or probe). |
| void ALWAYS_INLINE SetRowNull() { null_bitmap_.Set(CurIdx(), true); } |
| |
| /// Returns the hash values of the current row. |
| uint32_t ALWAYS_INLINE CurExprValuesHash() const { return *cur_expr_values_hash_; } |
| |
| /// Sets the hash values for the current row. |
| void ALWAYS_INLINE SetCurExprValuesHash(uint32_t hash) { |
| *cur_expr_values_hash_ = hash; |
| } |
| |
| /// Returns a pointer to the expression value at 'expr_idx' in 'expr_values'. |
| uint8_t* ExprValuePtr(uint8_t* expr_values, int expr_idx) const; |
| const uint8_t* ExprValuePtr(const uint8_t* expr_values, int expr_idx) const; |
| |
| /// Returns the current row's expression buffer. The expression values in the buffer |
| /// are accessed using ExprValuePtr(). |
| uint8_t* ALWAYS_INLINE cur_expr_values() const { return cur_expr_values_; } |
| |
| /// Returns null indicator bytes for the current row, one per expression. Non-zero |
| /// bytes mean NULL, zero bytes mean non-NULL. Indexed by the expression index. |
| /// These are uint8_t instead of bool to simplify codegen with IRBuilder. |
| /// TODO: is there actually a valid reason why this is necessary for codegen? |
| uint8_t* ALWAYS_INLINE cur_expr_values_null() const { return cur_expr_values_null_; } |
| |
| /// Returns the offset into the results buffer of the expression value at 'expr_idx'. |
| int ALWAYS_INLINE expr_values_offsets(int expr_idx) const { |
| return expr_values_offsets_[expr_idx]; |
| } |
| |
| private: |
| friend class HashTableCtx; |
| |
| /// Resets the iterators to the beginning of the cache values' arrays. |
| void ResetIterators(); |
| |
| /// Returns the offset in number of rows into the cached values' buffer. |
| int ALWAYS_INLINE CurIdx() const { |
| return cur_expr_values_hash_ - expr_values_hash_array_.get(); |
| } |
| |
| /// Max amount of memory in bytes for caching evaluated expression values. |
| static const int MAX_EXPR_VALUES_ARRAY_SIZE = 256 << 10; |
| |
| /// Maximum number of rows of expressions evaluation states which this |
| /// ExprValuesCache can cache. |
| int capacity_; |
| |
| /// Byte size of a row of evaluated expression values. Never changes once set, |
| /// can be used for constant substitution during codegen. |
| int expr_values_bytes_per_row_; |
| |
| /// Number of build/probe expressions. |
| int num_exprs_; |
| |
| /// Pointer into 'expr_values_array_' for the current row's expression values. |
| uint8_t* cur_expr_values_; |
| |
| /// Pointer into 'expr_values_null_array_' for the current row's nullness of each |
| /// expression value. |
| uint8_t* cur_expr_values_null_; |
| |
| /// Pointer into 'expr_hash_value_array_' for the hash value of current row's |
| /// expression values. |
| uint32_t* cur_expr_values_hash_; |
| |
| /// Pointer to the buffer one beyond the end of the last entry of cached expressions' |
| /// hash values. |
| uint32_t* cur_expr_values_hash_end_; |
| |
| /// Array for caching up to 'capacity_' number of rows worth of evaluated expression |
| /// values. Each row consumes 'expr_values_bytes_per_row_' number of bytes. |
| boost::scoped_array<uint8_t> expr_values_array_; |
| |
| /// Array for caching up to 'capacity_' number of rows worth of null booleans. |
| /// Each row contains 'num_exprs_' booleans to indicate nullness of expression values. |
| /// Used when the hash table supports NULL. Use 'uint8_t' to guarantee each entry is 1 |
| /// byte as sizeof(bool) is implementation dependent. The IR depends on this |
| /// assumption. |
| boost::scoped_array<uint8_t> expr_values_null_array_; |
| |
| /// Array for caching up to 'capacity_' number of rows worth of hashed values. |
| boost::scoped_array<uint32_t> expr_values_hash_array_; |
| |
| /// One bit for each row. A bit is set if that row is not hashed as it's evaluated |
| /// to NULL but the hash table doesn't support NULL. Such rows may still be included |
| /// in outputs for certain join types (e.g. left anti joins). |
| Bitmap null_bitmap_; |
| |
| /// Maps from expression index to the byte offset into a row of expression values. |
| /// One entry per build/probe expression. |
| std::vector<int> expr_values_offsets_; |
| |
| /// Byte offset into 'cur_expr_values_' that begins the variable length results for |
| /// a row. If -1, there are no variable length slots. Never changes once set, can be |
| /// constant substituted with codegen. |
| int var_result_offset_; |
| }; |
| |
| ExprValuesCache* ALWAYS_INLINE expr_values_cache() { return &expr_values_cache_; } |
| |
| private: |
| friend class HashTable; |
| friend class HashTableTest_HashEmpty_Test; |
| |
| /// Construct a hash table context. |
| /// - build_exprs are the exprs that should be used to evaluate rows during Insert(). |
| /// - probe_exprs are used during FindProbeRow() |
| /// - stores_nulls: if false, TupleRows with nulls are ignored during Insert |
| /// - finds_nulls: if finds_nulls[i] is false, FindProbeRow() returns End() for |
| /// TupleRows with nulls in position i even if stores_nulls is true. |
| /// - initial_seed: initial seed value to use when computing hashes for rows with |
| /// level 0. Other levels have their seeds derived from this seed. |
| /// - max_levels: the max lhashevels we will hash with. |
| /// - expr_perm_pool: the MemPool from which the expression evaluators make permanent |
| /// allocations that live until Close(). Owned by the exec node which owns this |
| /// hash table context. Memory usage of the expression value cache is charged |
| /// against this MemPool's tracker. |
| /// - build_expr_results_pool: the MemPool from which the expression evaluators make |
| /// allocations to hold expression results. Cached build expression values may |
| /// reference memory in this pool. Owned by the exec node which owns this hash |
| /// table context. |
| /// - probe_expr_results_pool: the MemPool from which the expression evaluators make |
| /// allocations to hold expression results. Cached probe expression values may |
| /// reference memory in this pool. Owned by the exec node which owns this hash |
| /// table context. |
| /// |
| /// TODO: stores_nulls is too coarse: for a hash table in which some columns are joined |
| /// with '<=>' and others with '=', stores_nulls could distinguish between columns |
| /// in which nulls are stored and columns in which they are not, which could save |
| /// space by not storing some rows we know will never match. |
| HashTableCtx(const std::vector<ScalarExpr*>& build_exprs, |
| const std::vector<ScalarExpr*>& probe_exprs, bool stores_nulls, |
| const std::vector<bool>& finds_nulls, int32_t initial_seed, |
| int max_levels, MemPool* expr_perm_pool, MemPool* build_expr_results_pool, |
| MemPool* probe_expr_results_pool); |
| |
| /// Allocate various buffers for storing expression evaluation results, hash values, |
| /// null bits etc. Also allocate evaluators for the build and probe expressions and |
| /// store them in 'pool'. Returns error if allocation causes query memory limit to |
| /// be exceeded or the evaluators fail to initialize. 'num_build_tuples' is the number |
| /// of tuples of a row in the build side, used for computing the size of a scratch row. |
| Status Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples); |
| |
| /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null'. |
| /// This will be replaced by codegen. We don't want this inlined for replacing |
| /// with codegen'd functions so the function name does not change. |
| uint32_t IR_NO_INLINE HashRow( |
| const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept; |
| |
| /// Wrapper function for calling correct HashUtil function in non-codegen'd case. |
| uint32_t Hash(const void* input, int len, uint32_t hash) const; |
| |
| /// Evaluate 'row' over build exprs, storing values into 'expr_values' and nullness |
| /// into 'expr_values_null'. This will be replaced by codegen. We do not want this |
| /// function inlined when cross compiled because we need to be able to differentiate |
| /// between EvalBuildRow and EvalProbeRow by name and the build/probe exprs are baked |
| /// into the codegen'd function. |
| bool IR_NO_INLINE EvalBuildRow( |
| const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { |
| return EvalRow(row, build_expr_evals_, expr_values, expr_values_null); |
| } |
| |
| /// Evaluate 'row' over probe exprs, storing the values into 'expr_values' and nullness |
| /// into 'expr_values_null'. This will be replaced by codegen. |
| bool IR_NO_INLINE EvalProbeRow( |
| const TupleRow* row, uint8_t* expr_values, uint8_t* expr_values_null) noexcept { |
| return EvalRow(row, probe_expr_evals_, expr_values, expr_values_null); |
| } |
| |
| /// Compute the hash of the values in 'expr_values' with nullness 'expr_values_null' |
| /// for a row with variable length fields (e.g. strings). |
| uint32_t HashVariableLenRow( |
| const uint8_t* expr_values, const uint8_t* expr_values_null) const; |
| |
| /// Evaluate the exprs over row, storing the values into 'expr_values' and nullness into |
| /// 'expr_values_null'. Returns whether any expr evaluated to NULL. This will be |
| /// replaced by codegen. |
| bool EvalRow(const TupleRow* row, const std::vector<ScalarExprEvaluator*>& evaluators, |
| uint8_t* expr_values, uint8_t* expr_values_null) noexcept; |
| |
| /// Returns true if the values of build_exprs evaluated over 'build_row' equal the |
| /// values in 'expr_values' with nullness 'expr_values_null'. INCLUSIVE_EQUALITY |
| /// means "NULL==NULL" and "NaN==NaN". This will be replaced by codegen. |
| template <bool INCLUSIVE_EQUALITY> |
| bool IR_NO_INLINE Equals(const TupleRow* build_row, const uint8_t* expr_values, |
| const uint8_t* expr_values_null) const noexcept; |
| |
| /// Helper function that calls Equals() with the current row. Always inlined so that |
| /// it does not appear in cross-compiled IR. |
| template <bool INCLUSIVE_EQUALITY> |
| bool ALWAYS_INLINE Equals(const TupleRow* build_row) const { |
| return Equals<INCLUSIVE_EQUALITY>(build_row, expr_values_cache_.cur_expr_values(), |
| expr_values_cache_.cur_expr_values_null()); |
| } |
| |
| /// Cross-compiled function to access member variables used in CodegenHashRow(). |
| uint32_t IR_ALWAYS_INLINE GetHashSeed() const; |
| |
| /// Functions to be replaced by codegen to specialize the hash table. |
| bool IR_NO_INLINE stores_nulls() const { return stores_nulls_; } |
| bool IR_NO_INLINE finds_some_nulls() const { return finds_some_nulls_; } |
| |
| /// Cross-compiled function to access the build/probe expression evaluators. |
| ScalarExprEvaluator* const* IR_ALWAYS_INLINE build_expr_evals() const; |
| ScalarExprEvaluator* const* IR_ALWAYS_INLINE probe_expr_evals() const; |
| |
| /// The exprs used to evaluate rows for inserting rows into hash table. |
| /// Also used when matching hash table entries against probe rows. |
| const std::vector<ScalarExpr*>& build_exprs_; |
| std::vector<ScalarExprEvaluator*> build_expr_evals_; |
| |
| /// The exprs used to evaluate rows for look-up in the hash table. |
| const std::vector<ScalarExpr*>& probe_exprs_; |
| std::vector<ScalarExprEvaluator*> probe_expr_evals_; |
| |
| /// Constants on how the hash table should behave. Joins and aggs have slightly |
| /// different behavior. |
| const bool stores_nulls_; |
| const std::vector<bool> finds_nulls_; |
| |
| /// finds_some_nulls_ is just the logical OR of finds_nulls_. |
| const bool finds_some_nulls_; |
| |
| /// The current level this context is working on. Each level needs to use a |
| /// different seed. |
| int level_; |
| |
| /// The seeds to use for hashing. Indexed by the level. |
| std::vector<uint32_t> seeds_; |
| |
| /// The ExprValuesCache for caching expression evaluation results, null bytes and hash |
| /// values for rows. Used to store results of batch evaluations of rows. |
| ExprValuesCache expr_values_cache_; |
| |
| /// Scratch buffer to generate rows on the fly. |
| TupleRow* scratch_row_; |
| |
| /// MemPool for 'build_expr_evals_' and 'probe_expr_evals_' to allocate expr-managed |
| /// memory from. Not owned. |
| MemPool* expr_perm_pool_; |
| |
| /// MemPools for allocations by 'build_expr_evals_' and 'probe_expr_evals_' that hold |
| /// results of expr evaluation. Not owned. The owner of these pools is responsible for |
| /// clearing them when results from the respective expr evaluators are no longer needed. |
| MemPool* build_expr_results_pool_; |
| MemPool* probe_expr_results_pool_; |
| }; |
| |
| /// HashTableStatsProfile encapsulates hash tables stats. It tracks the stats of all the |
| /// hash tables created by a node. It should be created, stored by the node, and be |
| /// released when the node is released. |
| struct HashTableStatsProfile { |
| /// Profile object for HashTable Stats |
| RuntimeProfile* hashtable_profile = nullptr; |
| |
| /// Number of hash collisions - unequal rows that have identical hash values |
| RuntimeProfile::Counter* num_hash_collisions_ = nullptr; |
| |
| /// Number of hash table probes. |
| RuntimeProfile::Counter* num_hash_probes_ = nullptr; |
| |
| /// Total distance traveled for each hash table probe. |
| RuntimeProfile::Counter* num_hash_travels_ = nullptr; |
| |
| /// Number of hash table resized |
| RuntimeProfile::Counter* num_hash_resizes_ = nullptr; |
| |
| /// Total number of hash buckets across all partitions. |
| RuntimeProfile::Counter* num_hash_buckets_ = nullptr; |
| |
| }; |
| |
| /// The hash table consists of a contiguous array of buckets that contain a pointer to the |
| /// data, the hash value and three flags: whether this bucket is filled, whether this |
| /// entry has been matched (used in right and full joins) and whether this entry has |
| /// duplicates. If there are duplicates, then the data is pointing to the head of a |
| /// linked list of duplicate nodes that point to the actual data. Note that the duplicate |
| /// nodes do not contain the hash value, because all the linked nodes have the same hash |
| /// value, the one in the bucket. The data is either a tuple stream index or a Tuple*. |
| /// This array of buckets is sparse, we are shooting for up to 3/4 fill factor (75%). The |
| /// data allocated by the hash table comes from the BufferPool. |
| class HashTable { |
| private: |
| /// Rows are represented as pointers into the BufferedTupleStream data with one |
| /// of two formats, depending on the number of tuples in the row. |
| union HtData { |
| // For rows with multiple tuples per row, a pointer to the flattened TupleRow. |
| BufferedTupleStream::FlatRowPtr flat_row; |
| // For rows with one tuple per row, a pointer to the Tuple itself. |
| Tuple* tuple; |
| }; |
| |
| /// Linked list of entries used for duplicates. |
| struct DuplicateNode { |
| /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the |
| /// row in the DuplicateNode has been matched. |
| /// From an abstraction point of view, this is an awkward place to store this |
| /// information. |
| /// TODO: Fold this flag in the next pointer below. |
| bool matched; |
| |
| /// Chain to next duplicate node, NULL when end of list. |
| DuplicateNode* next; |
| HtData htdata; |
| }; |
| |
| struct Bucket { |
| /// Whether this bucket contains a vaild entry, or it is empty. |
| bool filled; |
| |
| /// Used for full outer and right {outer, anti, semi} joins. Indicates whether the |
| /// row in the bucket has been matched. |
| /// From an abstraction point of view, this is an awkward place to store this |
| /// information but it is efficient. This space is otherwise unused. |
| bool matched; |
| |
| /// Used in case of duplicates. If true, then the bucketData union should be used as |
| /// 'duplicates'. |
| bool hasDuplicates; |
| |
| /// Cache of the hash for data. |
| /// TODO: Do we even have to cache the hash value? |
| uint32_t hash; |
| |
| /// Either the data for this bucket or the linked list of duplicates. |
| union { |
| HtData htdata; |
| DuplicateNode* duplicates; |
| } bucketData; |
| }; |
| |
| static_assert(BitUtil::IsPowerOf2(sizeof(Bucket)), |
| "We assume that Hash-table bucket directories are a power-of-two sizes because " |
| "allocating only bucket directories with power-of-two byte sizes avoids internal " |
| "fragmentation in the simple buddy allocator."); |
| |
| public: |
| class Iterator; |
| |
| /// Returns a newly allocated HashTable. The probing algorithm is set by the |
| /// FLAG_enable_quadratic_probing. |
| /// - allocator: allocator to allocate bucket directory and data pages from. |
| /// - stores_duplicates: true if rows with duplicate keys may be inserted into the |
| /// hash table. |
| /// - num_build_tuples: number of Tuples in the build tuple row. |
| /// - tuple_stream: the tuple stream which contains the tuple rows index by the |
| /// hash table. Can be NULL if the rows contain only a single tuple, in which |
| /// case the 'tuple_stream' is unused. |
| /// - max_num_buckets: the maximum number of buckets that can be stored. If we |
| /// try to grow the number of buckets to a larger number, the inserts will fail. |
| /// -1, if it unlimited. |
| /// - initial_num_buckets: number of buckets that the hash table should be initialized |
| /// with. |
| static HashTable* Create(Suballocator* allocator, bool stores_duplicates, |
| int num_build_tuples, BufferedTupleStream* tuple_stream, int64_t max_num_buckets, |
| int64_t initial_num_buckets); |
| |
| /// Allocates the initial bucket structure. Returns a non-OK status if an error is |
| /// encountered. If an OK status is returned , 'got_memory' is set to indicate whether |
| /// enough memory for the initial buckets was allocated from the Suballocator. |
| Status Init(bool* got_memory) WARN_UNUSED_RESULT; |
| |
| /// Create the counters for HashTable stats and put them into the child profile |
| /// "Hash Table". |
| /// Returns a HashTableStatsProfile object. |
| static std::unique_ptr<HashTableStatsProfile> AddHashTableCounters( |
| RuntimeProfile* parent_profile); |
| |
| /// Call to cleanup any resources. Must be called once. |
| void Close(); |
| |
| /// Add operations stats of this hash table to the counters in profile. |
| /// This method should only be called once for each HashTable and be called during |
| /// closing the owner object of the HashTable. Not all the counters are added with the |
| /// method, only counters for Probes, travels, collisions and resizes are affected. |
| void StatsCountersAdd(HashTableStatsProfile* profile); |
| |
| /// Inserts the row to the hash table. The caller is responsible for ensuring that the |
| /// table has free buckets. Returns true if the insertion was successful. Always |
| /// returns true if the table has free buckets and the key is not a duplicate. If the |
| /// key was a duplicate and memory could not be allocated for the new duplicate node, |
| /// returns false. If an error is encountered while creating a duplicate node, returns |
| /// false and sets 'status' to the error. |
| /// |
| /// 'flat_row' is a pointer to the flattened row in 'tuple_stream_' If the row contains |
| /// only one tuple, a pointer to that tuple is stored. Otherwise the 'flat_row' pointer |
| /// is stored. The 'row' is not copied by the hash table and the caller must guarantee |
| /// it stays in memory. This will not grow the hash table. |
| bool IR_ALWAYS_INLINE Insert(HashTableCtx* ht_ctx, |
| BufferedTupleStream::FlatRowPtr flat_row, TupleRow* row, |
| Status* status) WARN_UNUSED_RESULT; |
| |
| /// Prefetch the hash table bucket which the given hash value 'hash' maps to. |
| template <const bool READ> |
| void IR_ALWAYS_INLINE PrefetchBucket(uint32_t hash); |
| |
| /// Returns an iterator to the bucket that matches the probe expression results that |
| /// are cached at the current position of the ExprValuesCache in 'ht_ctx'. Assumes that |
| /// the ExprValuesCache was filled using EvalAndHashProbe(). Returns HashTable::End() |
| /// if no match is found. The iterator can be iterated until HashTable::End() to find |
| /// all the matching rows. Advancing the returned iterator will go to the next matching |
| /// row. The matching rows do not need to be evaluated since all the nodes of a bucket |
| /// are duplicates. One scan can be in progress for each 'ht_ctx'. Used in the probe |
| /// phase of hash joins. |
| Iterator IR_ALWAYS_INLINE FindProbeRow(HashTableCtx* ht_ctx); |
| |
| /// If a match is found in the table, return an iterator as in FindProbeRow(). If a |
| /// match was not present, return an iterator pointing to the empty bucket where the key |
| /// should be inserted. Returns End() if the table is full. The caller can set the data |
| /// in the bucket using a Set*() method on the iterator. |
| Iterator IR_ALWAYS_INLINE FindBuildRowBucket(HashTableCtx* ht_ctx, bool* found); |
| |
| /// Returns number of elements inserted in the hash table |
| int64_t size() const { |
| return num_filled_buckets_ - num_buckets_with_duplicates_ + num_duplicate_nodes_; |
| } |
| |
| /// Returns the number of empty buckets. |
| int64_t EmptyBuckets() const { return num_buckets_ - num_filled_buckets_; } |
| |
| /// Returns the number of buckets |
| int64_t num_buckets() const { return num_buckets_; } |
| |
| /// Returns the load factor (the number of non-empty buckets) |
| double load_factor() const { |
| return static_cast<double>(num_filled_buckets_) / num_buckets_; |
| } |
| |
| /// Return an estimate of the number of bytes needed to build the hash table |
| /// structure for 'num_rows'. To do that, it estimates the number of buckets, |
| /// rounded up to a power of two, and also assumes that there are no duplicates. |
| static int64_t EstimateNumBuckets(int64_t num_rows) { |
| /// Assume max 66% fill factor and no duplicates. |
| return BitUtil::RoundUpToPowerOfTwo(3 * num_rows / 2); |
| } |
| static int64_t EstimateSize(int64_t num_rows) { |
| int64_t num_buckets = EstimateNumBuckets(num_rows); |
| return num_buckets * sizeof(Bucket); |
| } |
| |
| /// Return the size of a hash table bucket in bytes. |
| static int64_t BucketSize() { return sizeof(Bucket); } |
| |
| /// Returns the memory occupied by the hash table, takes into account the number of |
| /// duplicates. |
| int64_t CurrentMemSize() const; |
| |
| /// Returns the number of inserts that can be performed before resizing the table. |
| int64_t NumInsertsBeforeResize() const; |
| |
| /// Calculates the fill factor if 'buckets_to_fill' additional buckets were to be |
| /// filled and resizes the hash table so that the projected fill factor is below the |
| /// max fill factor. |
| /// If 'got_memory' is true, then it is guaranteed at least 'rows_to_add' rows can be |
| /// inserted without need to resize. If there is not enough memory available to |
| /// resize the hash table, Status::OK() is returned and 'got_memory' is false. If a |
| /// another error occurs, an error status may be returned. |
| Status CheckAndResize(uint64_t buckets_to_fill, const HashTableCtx* ht_ctx, |
| bool* got_memory) WARN_UNUSED_RESULT; |
| |
| /// Returns the number of bytes allocated to the hash table from the block manager. |
| int64_t ByteSize() const { |
| return num_buckets_ * sizeof(Bucket) + total_data_page_size_; |
| } |
| |
| /// Returns an iterator at the beginning of the hash table. Advancing this iterator |
| /// will traverse all elements. |
| Iterator Begin(const HashTableCtx* ht_ctx); |
| |
| /// Return an iterator pointing to the first element (Bucket or DuplicateNode, if the |
| /// bucket has duplicates) in the hash table that does not have its matched flag set. |
| /// Used in right joins and full-outer joins. |
| Iterator FirstUnmatched(HashTableCtx* ctx); |
| |
| /// Return true if there was a least one match. |
| bool HasMatches() const { return has_matches_; } |
| |
| /// Return end marker. |
| Iterator End() { return Iterator(); } |
| |
| /// Dump out the entire hash table to string. If 'skip_empty', empty buckets are |
| /// skipped. If 'show_match', it also prints the matched flag of each node. If |
| /// 'build_desc' is non-null, the build rows will be printed. Otherwise, only the |
| /// the addresses of the build rows will be printed. |
| std::string DebugString(bool skip_empty, bool show_match, |
| const RowDescriptor* build_desc); |
| |
| /// Print the content of a bucket or node. |
| void DebugStringTuple(std::stringstream& ss, HtData& htdata, const RowDescriptor* desc); |
| |
| /// Update and print some statistics that can be used for performance debugging. |
| std::string PrintStats() const; |
| |
| /// stl-like iterator interface. |
| class Iterator { |
| private: |
| /// Bucket index value when probe is not successful. |
| static const int64_t BUCKET_NOT_FOUND = -1; |
| |
| public: |
| IR_ALWAYS_INLINE Iterator() : |
| table_(NULL), |
| scratch_row_(NULL), |
| bucket_idx_(BUCKET_NOT_FOUND), |
| node_(NULL) { } |
| |
| /// Iterates to the next element. It should be called only if !AtEnd(). |
| void IR_ALWAYS_INLINE Next(); |
| |
| /// Iterates to the next duplicate node. If the bucket does not have duplicates or |
| /// when it reaches the last duplicate node, then it moves the Iterator to AtEnd(). |
| /// Used when we want to iterate over all the duplicate nodes bypassing the Next() |
| /// interface (e.g. in semi/outer joins without other_join_conjuncts, in order to |
| /// iterate over all nodes of an unmatched bucket). |
| void IR_ALWAYS_INLINE NextDuplicate(); |
| |
| /// Iterates to the next element that does not have its matched flag set. Used in |
| /// right-outer and full-outer joins. |
| void IR_ALWAYS_INLINE NextUnmatched(); |
| |
| /// Return the current row or tuple. Callers must check the iterator is not AtEnd() |
| /// before calling them. The returned row is owned by the iterator and valid until |
| /// the next call to GetRow(). It is safe to advance the iterator. |
| TupleRow* IR_ALWAYS_INLINE GetRow() const; |
| Tuple* IR_ALWAYS_INLINE GetTuple() const; |
| |
| /// Set the current tuple for an empty bucket. Designed to be used with the iterator |
| /// returned from FindBuildRowBucket() in the case when the value is not found. It is |
| /// not valid to call this function if the bucket already has an entry. |
| void SetTuple(Tuple* tuple, uint32_t hash); |
| |
| /// Sets as matched the Bucket or DuplicateNode currently pointed by the iterator, |
| /// depending on whether the bucket has duplicates or not. The iterator cannot be |
| /// AtEnd(). |
| void SetMatched(); |
| |
| /// Returns the 'matched' flag of the current Bucket or DuplicateNode, depending on |
| /// whether the bucket has duplicates or not. It should be called only if !AtEnd(). |
| bool IsMatched() const; |
| |
| /// Resets everything but the pointer to the hash table. |
| void SetAtEnd(); |
| |
| /// Returns true if this iterator is at the end, i.e. GetRow() cannot be called. |
| bool ALWAYS_INLINE AtEnd() const { return bucket_idx_ == BUCKET_NOT_FOUND; } |
| |
| /// Prefetch the hash table bucket which the iterator is pointing to now. |
| template<const bool READ> |
| void IR_ALWAYS_INLINE PrefetchBucket(); |
| |
| private: |
| friend class HashTable; |
| |
| ALWAYS_INLINE |
| Iterator(HashTable* table, TupleRow* row, int bucket_idx, DuplicateNode* node) |
| : table_(table), |
| scratch_row_(row), |
| bucket_idx_(bucket_idx), |
| node_(node) { |
| } |
| |
| HashTable* table_; |
| |
| /// Scratch buffer to hold generated rows. Not owned. |
| TupleRow* scratch_row_; |
| |
| /// Current bucket idx. |
| int64_t bucket_idx_; |
| |
| /// Pointer to the current duplicate node. |
| DuplicateNode* node_; |
| }; |
| |
| private: |
| friend class Iterator; |
| friend class HashTableTest; |
| |
| /// Hash table constructor. Private because Create() should be used, instead |
| /// of calling this constructor directly. |
| /// - quadratic_probing: set to true when the probing algorithm is quadratic, as |
| /// opposed to linear. |
| HashTable(bool quadratic_probing, Suballocator* allocator, bool stores_duplicates, |
| int num_build_tuples, BufferedTupleStream* tuple_stream, int64_t max_num_buckets, |
| int64_t initial_num_buckets); |
| |
| /// Performs the probing operation according to the probing algorithm (linear or |
| /// quadratic. Returns one of the following: |
| /// (a) the index of the bucket that contains the entry that matches with the last row |
| /// evaluated in 'ht_ctx'. If 'ht_ctx' is NULL then it does not check for row |
| /// equality and returns the index of the first empty bucket. |
| /// (b) the index of the first empty bucket according to the probing algorithm (linear |
| /// or quadratic), if the entry is not in the hash table or 'ht_ctx' is NULL. |
| /// (c) Iterator::BUCKET_NOT_FOUND if the probe was not successful, i.e. the maximum |
| /// distance was traveled without finding either an empty or a matching bucket. |
| /// Using the returned index value, the caller can create an iterator that can be |
| /// iterated until End() to find all the matching rows. |
| /// |
| /// EvalAndHashBuild() or EvalAndHashProbe() must have been called before calling |
| /// this function. The values of the expression values cache in 'ht_ctx' will be |
| /// used to probe the hash table. |
| /// |
| /// 'INCLUSIVE_EQUALITY' is true if NULLs and NaNs should always be |
| /// considered equal when comparing two rows. |
| /// |
| /// 'hash' is the hash computed by EvalAndHashBuild() or EvalAndHashProbe(). |
| /// 'found' indicates that a bucket that contains an equal row is found. |
| /// |
| /// There are wrappers of this function that perform the Find and Insert logic. |
| template <bool INCLUSIVE_EQUALITY> |
| int64_t IR_ALWAYS_INLINE Probe(Bucket* buckets, int64_t num_buckets, |
| HashTableCtx* ht_ctx, uint32_t hash, bool* found); |
| |
| /// Performs the insert logic. Returns the HtData* of the bucket or duplicate node |
| /// where the data should be inserted. Returns NULL if the insert was not successful |
| /// and either sets 'status' to OK if it failed because not enough reservation was |
| /// available or the error if an error was encountered. |
| HtData* IR_ALWAYS_INLINE InsertInternal(HashTableCtx* ht_ctx, Status* status); |
| |
| /// Updates 'bucket_idx' to the index of the next non-empty bucket. If the bucket has |
| /// duplicates, 'node' will be pointing to the head of the linked list of duplicates. |
| /// Otherwise, 'node' should not be used. If there are no more buckets, sets |
| /// 'bucket_idx' to BUCKET_NOT_FOUND. |
| void NextFilledBucket(int64_t* bucket_idx, DuplicateNode** node); |
| |
| /// Resize the hash table to 'num_buckets'. 'got_memory' is false on OOM. |
| Status ResizeBuckets(int64_t num_buckets, const HashTableCtx* ht_ctx, bool* got_memory); |
| |
| /// Appends the DuplicateNode pointed by next_node_ to 'bucket' and moves the next_node_ |
| /// pointer to the next DuplicateNode in the page, updating the remaining node counter. |
| DuplicateNode* IR_ALWAYS_INLINE AppendNextNode(Bucket* bucket); |
| |
| /// Creates a new DuplicateNode for a entry and chains it to the bucket with index |
| /// 'bucket_idx'. The duplicate nodes of a bucket are chained as a linked list. |
| /// This places the new duplicate node at the beginning of the list. If this is the |
| /// first duplicate entry inserted in this bucket, then the entry already contained by |
| /// the bucket is converted to a DuplicateNode. That is, the contents of 'data' of the |
| /// bucket are copied to a DuplicateNode and 'data' is updated to pointing to a |
| /// DuplicateNode. |
| /// Returns NULL and sets 'status' to OK if the node array could not grow, i.e. there |
| /// was not enough memory to allocate a new DuplicateNode. Returns NULL and sets |
| /// 'status' to an error if another error was encountered. |
| DuplicateNode* IR_ALWAYS_INLINE InsertDuplicateNode(int64_t bucket_idx, Status* status); |
| |
| /// Resets the contents of the empty bucket with index 'bucket_idx', in preparation for |
| /// an insert. Sets all the fields of the bucket other than 'data'. |
| void IR_ALWAYS_INLINE PrepareBucketForInsert(int64_t bucket_idx, uint32_t hash); |
| |
| /// Return the TupleRow pointed by 'htdata'. |
| TupleRow* GetRow(HtData& htdata, TupleRow* row) const; |
| |
| /// Returns the TupleRow of the pointed 'bucket'. In case of duplicates, it |
| /// returns the content of the first chained duplicate node of the bucket. |
| TupleRow* GetRow(Bucket* bucket, TupleRow* row) const; |
| |
| /// Grow the node array. Returns true and sets 'status' to OK on success. Returns false |
| /// and set 'status' to OK if we can't get sufficient reservation to allocate the next |
| /// data page. Returns false and sets 'status' if another error is encountered. |
| bool GrowNodeArray(Status* status); |
| |
| /// Functions to be replaced by codegen to specialize the hash table. |
| bool IR_NO_INLINE stores_tuples() const { return stores_tuples_; } |
| bool IR_NO_INLINE stores_duplicates() const { return stores_duplicates_; } |
| bool IR_NO_INLINE quadratic_probing() const { return quadratic_probing_; } |
| |
| /// Load factor that will trigger growing the hash table on insert. This is |
| /// defined as the number of non-empty buckets / total_buckets |
| static constexpr double MAX_FILL_FACTOR = 0.75; |
| |
| /// The size in bytes of each page of duplicate nodes. Should be large enough to fit |
| /// enough DuplicateNodes to amortise the overhead of allocating each page and low |
| /// enough to not waste excessive memory to internal fragmentation. |
| static constexpr int64_t DATA_PAGE_SIZE = 64L * 1024; |
| |
| RuntimeState* state_; |
| |
| /// Suballocator to allocate data pages and hash table buckets with. |
| Suballocator* const allocator_; |
| |
| /// Stream contains the rows referenced by the hash table. Can be NULL if the |
| /// row only contains a single tuple, in which case the TupleRow indirection |
| /// is removed by the hash table. |
| BufferedTupleStream* const tuple_stream_; |
| |
| /// Constants on how the hash table should behave. |
| |
| /// True if the HtData uses the Tuple* representation, or false if it uses FlatRowPtr. |
| const bool stores_tuples_; |
| |
| /// True if duplicates may be inserted into hash table. |
| const bool stores_duplicates_; |
| |
| /// Quadratic probing enabled (as opposed to linear). |
| const bool quadratic_probing_; |
| |
| /// Data pages for all nodes. Allocated from suballocator to reduce memory |
| /// consumption of small tables. |
| std::vector<std::unique_ptr<Suballocation>> data_pages_; |
| |
| /// Byte size of all buffers in data_pages_. |
| int64_t total_data_page_size_ = 0; |
| |
| /// Next duplicate node to insert. Vaild when node_remaining_current_page_ > 0. |
| DuplicateNode* next_node_ = nullptr; |
| |
| /// Number of nodes left in the current page. |
| int node_remaining_current_page_ = 0; |
| |
| /// Number of duplicate nodes. |
| int64_t num_duplicate_nodes_ = 0; |
| |
| const int64_t max_num_buckets_ = 0; |
| |
| /// Allocation containing all buckets. |
| std::unique_ptr<Suballocation> bucket_allocation_; |
| |
| /// Pointer to the 'buckets_' array from 'bucket_allocation_'. |
| Bucket* buckets_ = nullptr; |
| |
| /// Total number of buckets (filled and empty). |
| int64_t num_buckets_; |
| |
| /// Number of non-empty buckets. Used to determine when to resize. |
| int64_t num_filled_buckets_ = 0; |
| |
| /// Number of (non-empty) buckets with duplicates. These buckets do not point to slots |
| /// in the tuple stream, rather than to a linked list of Nodes. |
| int64_t num_buckets_with_duplicates_ = 0; |
| |
| /// Number of build tuples, used for constructing temp row* for probes. |
| const int num_build_tuples_; |
| |
| /// Flag used to check that we don't lose stored matches when spilling hash tables |
| /// (IMPALA-1488). |
| bool has_matches_ = false; |
| |
| /// The stats below can be used for debugging perf. |
| /// Number of Probe() calls that probe the hash table. |
| int64_t num_probes_ = 0; |
| |
| /// Total distance traveled for each probe. That is the sum of the diff between the end |
| /// position of a probe (find/insert) and its start position |
| /// (hash & (num_buckets_ - 1)). |
| int64_t travel_length_ = 0; |
| |
| /// The number of cases where we had to compare buckets with the same hash value, but |
| /// the row equality failed. |
| int64_t num_hash_collisions_ = 0; |
| |
| /// How many times this table has resized so far. |
| int64_t num_resizes_ = 0; |
| }; |
| |
| } |
| |
| #endif |