be/src/exec/hash-table.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "exec/hash-table.inline.h"

 #include <functional>
 #include <numeric>
 #include <gutil/strings/substitute.h>

 #include "codegen/codegen-anyval.h"
 #include "codegen/llvm-codegen.h"
 #include "exprs/slot-ref.h"
 #include "exprs/scalar-expr.h"
 #include "exprs/scalar-expr-evaluator.h"
 #include "runtime/bufferpool/reservation-tracker.h"
 #include "runtime/mem-tracker.h"
 #include "runtime/raw-value.inline.h"
 #include "runtime/runtime-state.h"
 #include "runtime/string-value.inline.h"
 #include "util/debug-util.h"
 #include "util/impalad-metrics.h"

 #include "common/names.h"

 using namespace impala;
 using strings::Substitute;

 DEFINE_bool(enable_quadratic_probing, true, "Enable quadratic probing hash table");

 const char* HashTableCtx::LLVM_CLASS_NAME = "class.impala::HashTableCtx";

 // Random primes to multiply the seed with.
 static uint32_t SEED_PRIMES[] = {
   1, // First seed must be 1, level 0 is used by other operators in the fragment.
   1431655781,
   1183186591,
   622729787,
   472882027,
   338294347,
   275604541,
   41161739,
   29999999,
   27475109,
   611603,
   16313357,
   11380003,
   21261403,
   33393119,
   101,
   71043403
 };

 // Put a non-zero constant in the result location for NULL.
 // We don't want(NULL, 1) to hash to the same as (0, 1).
 // This needs to be as big as the biggest primitive type since the bytes
 // get copied directly.
 // TODO find a better approach, since primitives like CHAR(N) can be up
 // to 255 bytes
 static int64_t NULL_VALUE[] = {
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED,
   HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED, HashUtil::FNV_SEED
 };

 static_assert(sizeof(NULL_VALUE) >= ColumnType::MAX_CHAR_LENGTH,
     "NULL_VALUE must be at least as large as the largest possible slot");

 HashTableCtx::HashTableCtx(const std::vector<ScalarExpr*>& build_exprs,
     const std::vector<ScalarExpr*>& probe_exprs, bool stores_nulls,
     const std::vector<bool>& finds_nulls, int32_t initial_seed,
     int max_levels, MemPool* expr_perm_pool, MemPool* build_expr_results_pool,
     MemPool* probe_expr_results_pool)
     : build_exprs_(build_exprs),
       probe_exprs_(probe_exprs),
       stores_nulls_(stores_nulls),
       finds_nulls_(finds_nulls),
       finds_some_nulls_(std::accumulate(
           finds_nulls_.begin(), finds_nulls_.end(), false, std::logical_or<bool>())),
       level_(0),
       scratch_row_(NULL),
       expr_perm_pool_(expr_perm_pool),
       build_expr_results_pool_(build_expr_results_pool),
       probe_expr_results_pool_(probe_expr_results_pool) {
   DCHECK(!finds_some_nulls_ || stores_nulls_);
   // Compute the layout and buffer size to store the evaluated expr results
   DCHECK_EQ(build_exprs_.size(), probe_exprs_.size());
   DCHECK_EQ(build_exprs_.size(), finds_nulls_.size());
   DCHECK(!build_exprs_.empty());

   // Populate the seeds to use for all the levels. TODO: revisit how we generate these.
   DCHECK_GE(max_levels, 0);
   DCHECK_LT(max_levels, sizeof(SEED_PRIMES) / sizeof(SEED_PRIMES[0]));
   DCHECK_NE(initial_seed, 0);
   seeds_.resize(max_levels + 1);
   seeds_[0] = initial_seed;
   for (int i = 1; i <= max_levels; ++i) {
     seeds_[i] = seeds_[i - 1] * SEED_PRIMES[i];
   }
 }

 Status HashTableCtx::Init(ObjectPool* pool, RuntimeState* state, int num_build_tuples) {
   int scratch_row_size = sizeof(Tuple*) * num_build_tuples;
   scratch_row_ = reinterpret_cast<TupleRow*>(malloc(scratch_row_size));
   if (UNLIKELY(scratch_row_ == NULL)) {
     return Status(Substitute("Failed to allocate $0 bytes for scratch row of "
         "HashTableCtx.", scratch_row_size));
   }
   RETURN_IF_ERROR(ScalarExprEvaluator::Create(build_exprs_, state, pool, expr_perm_pool_,
       build_expr_results_pool_, &build_expr_evals_));
   DCHECK_EQ(build_exprs_.size(), build_expr_evals_.size());
   RETURN_IF_ERROR(ScalarExprEvaluator::Create(probe_exprs_, state, pool, expr_perm_pool_,
       probe_expr_results_pool_, &probe_expr_evals_));
   DCHECK_EQ(probe_exprs_.size(), probe_expr_evals_.size());
   return expr_values_cache_.Init(state, expr_perm_pool_->mem_tracker(), build_exprs_);
 }

 Status HashTableCtx::Create(ObjectPool* pool, RuntimeState* state,
     const std::vector<ScalarExpr*>& build_exprs,
     const std::vector<ScalarExpr*>& probe_exprs, bool stores_nulls,
     const std::vector<bool>& finds_nulls, int32_t initial_seed, int max_levels,
     int num_build_tuples, MemPool* expr_perm_pool, MemPool* build_expr_results_pool,
     MemPool* probe_expr_results_pool, scoped_ptr<HashTableCtx>* ht_ctx) {
   ht_ctx->reset(new HashTableCtx(build_exprs, probe_exprs, stores_nulls,
       finds_nulls, initial_seed, max_levels, expr_perm_pool,
       build_expr_results_pool, probe_expr_results_pool));
   return (*ht_ctx)->Init(pool, state, num_build_tuples);
 }

 Status HashTableCtx::Open(RuntimeState* state) {
   RETURN_IF_ERROR(ScalarExprEvaluator::Open(build_expr_evals_, state));
   RETURN_IF_ERROR(ScalarExprEvaluator::Open(probe_expr_evals_, state));
   return Status::OK();
 }

 void HashTableCtx::Close(RuntimeState* state) {
   free(scratch_row_);
   scratch_row_ = NULL;
   expr_values_cache_.Close(expr_perm_pool_->mem_tracker());
   ScalarExprEvaluator::Close(build_expr_evals_, state);
   ScalarExprEvaluator::Close(probe_expr_evals_, state);
 }

 uint32_t HashTableCtx::Hash(const void* input, int len, uint32_t hash) const {
   /// Use CRC hash at first level for better performance. Switch to murmur hash at
   /// subsequent levels since CRC doesn't randomize well with different seed inputs.
   if (level_ == 0) return HashUtil::Hash(input, len, hash);
   return HashUtil::MurmurHash2_64(input, len, hash);
 }

 uint32_t HashTableCtx::HashRow(
     const uint8_t* expr_values, const uint8_t* expr_values_null) const noexcept {
   DCHECK_LT(level_, seeds_.size());
   if (expr_values_cache_.var_result_offset() == -1) {
     /// This handles NULLs implicitly since a constant seed value was put
     /// into results buffer for nulls.
     return Hash(
         expr_values, expr_values_cache_.expr_values_bytes_per_row(), seeds_[level_]);
   } else {
     return HashTableCtx::HashVariableLenRow(expr_values, expr_values_null);
   }
 }

 bool HashTableCtx::EvalRow(const TupleRow* row,
     const vector<ScalarExprEvaluator*>& evals,
     uint8_t* expr_values, uint8_t* expr_values_null) noexcept {
   bool has_null = false;
   for (int i = 0; i < evals.size(); ++i) {
     void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
     const void* val = evals[i]->GetValue(row);
     if (val == NULL) {
       // If the table doesn't store nulls, no reason to keep evaluating
       if (!stores_nulls_) return true;
       expr_values_null[i] = true;
       val = reinterpret_cast<void*>(&NULL_VALUE);
       has_null = true;
     } else {
       expr_values_null[i] = false;
     }
     const ColumnType& expr_type = build_exprs_[i]->type();
     DCHECK_LE(expr_type.GetSlotSize(), sizeof(NULL_VALUE));
     if (RawValue::IsNaN(val, expr_type)) {
       val = RawValue::CanonicalNaNValue(expr_type);
     }
     RawValue::Write(val, loc, expr_type, NULL);
   }
   return has_null;
 }

 uint32_t HashTableCtx::HashVariableLenRow(const uint8_t* expr_values,
     const uint8_t* expr_values_null) const {
   uint32_t hash = seeds_[level_];
   int var_result_offset = expr_values_cache_.var_result_offset();
   // Hash the non-var length portions (if there are any)
   if (var_result_offset != 0) {
     hash = Hash(expr_values, var_result_offset, hash);
   }

   for (int i = 0; i < build_exprs_.size(); ++i) {
     // non-string and null slots are already part of 'expr_values'.
     if (build_exprs_[i]->type().type != TYPE_STRING &&
         build_exprs_[i]->type().type != TYPE_VARCHAR) {
       continue;
     }
     const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
     if (expr_values_null[i]) {
       // Hash the null random seed values at 'loc'
       hash = Hash(loc, sizeof(StringValue), hash);
     } else {
       // Hash the string
       // TODO: when using CRC hash on empty string, this only swaps bytes.
       const StringValue* str = reinterpret_cast<const StringValue*>(loc);
       hash = Hash(str->ptr, str->len, hash);
     }
   }
   return hash;
 }

 template <bool INCLUSIVE_EQUALITY>
 bool HashTableCtx::Equals(const TupleRow* build_row, const uint8_t* expr_values,
     const uint8_t* expr_values_null) const noexcept {
   for (int i = 0; i < build_expr_evals_.size(); ++i) {
     void* val = build_expr_evals_[i]->GetValue(build_row);
     if (val == NULL) {
       if (!(INCLUSIVE_EQUALITY || finds_nulls_[i])) return false;
       if (!expr_values_null[i]) return false;
       continue;
     } else {
       if (expr_values_null[i]) return false;
     }

     const void* loc = expr_values_cache_.ExprValuePtr(expr_values, i);
     DCHECK(build_exprs_[i] == &build_expr_evals_[i]->root());

     const ColumnType& expr_type = build_exprs_[i]->type();
     if (!RawValue::Eq(loc, val, expr_type)) {
       if (INCLUSIVE_EQUALITY) {
         bool val_is_nan = RawValue::IsNaN(val, expr_type);
         bool local_is_nan = RawValue::IsNaN(loc, expr_type);
         if (val_is_nan && local_is_nan) continue;
       }
       return false;
     }
   }
   return true;
 }
 template bool HashTableCtx::Equals<true>(const TupleRow* build_row,
     const uint8_t* expr_values, const uint8_t* expr_values_null) const;
 template bool HashTableCtx::Equals<false>(const TupleRow* build_row,
     const uint8_t* expr_values, const uint8_t* expr_values_null) const;

 HashTableCtx::ExprValuesCache::ExprValuesCache()
   : capacity_(0),
     cur_expr_values_(NULL),
     cur_expr_values_null_(NULL),
     cur_expr_values_hash_(NULL),
     cur_expr_values_hash_end_(NULL),
     expr_values_array_(NULL),
     expr_values_null_array_(NULL),
     expr_values_hash_array_(NULL),
     null_bitmap_(0) {}

 Status HashTableCtx::ExprValuesCache::Init(RuntimeState* state,
     MemTracker* tracker, const std::vector<ScalarExpr*>& build_exprs) {
   // Initialize the number of expressions.
   num_exprs_ = build_exprs.size();
   // Compute the layout of evaluated values of a row.
   expr_values_bytes_per_row_ = ScalarExpr::ComputeResultsLayout(build_exprs,
       &expr_values_offsets_, &var_result_offset_);
   if (expr_values_bytes_per_row_ == 0) {
     DCHECK_EQ(num_exprs_, 0);
     return Status::OK();
   }
   DCHECK_GT(expr_values_bytes_per_row_, 0);
   // Compute the maximum number of cached rows which can fit in the memory budget.
   // TODO: Find the optimal prefetch batch size. This may be something
   // processor dependent so we may need calibration at Impala startup time.
   capacity_ = std::max(1, std::min(state->batch_size(),
       MAX_EXPR_VALUES_ARRAY_SIZE / expr_values_bytes_per_row_));

   int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_);
   if (UNLIKELY(!tracker->TryConsume(mem_usage))) {
     capacity_ = 0;
     string details = Substitute("HashTableCtx::ExprValuesCache failed to allocate $0 bytes.",
         mem_usage);
     return tracker->MemLimitExceeded(state, details, mem_usage);
   }

   int expr_values_size = expr_values_bytes_per_row_ * capacity_;
   expr_values_array_.reset(new uint8_t[expr_values_size]);
   cur_expr_values_ = expr_values_array_.get();
   memset(cur_expr_values_, 0, expr_values_size);

   int expr_values_null_size = num_exprs_ * capacity_;
   expr_values_null_array_.reset(new uint8_t[expr_values_null_size]);
   cur_expr_values_null_ = expr_values_null_array_.get();
   memset(cur_expr_values_null_, 0, expr_values_null_size);

   expr_values_hash_array_.reset(new uint32_t[capacity_]);
   cur_expr_values_hash_ = expr_values_hash_array_.get();
   cur_expr_values_hash_end_ = cur_expr_values_hash_;
   memset(cur_expr_values_hash_, 0, sizeof(uint32) * capacity_);

   null_bitmap_.Reset(capacity_);
   return Status::OK();
 }

 void HashTableCtx::ExprValuesCache::Close(MemTracker* tracker) {
   if (capacity_ == 0) return;
   cur_expr_values_ = NULL;
   cur_expr_values_null_ = NULL;
   cur_expr_values_hash_ = NULL;
   cur_expr_values_hash_end_ = NULL;
   expr_values_array_.reset();
   expr_values_null_array_.reset();
   expr_values_hash_array_.reset();
   null_bitmap_.Reset(0);
   int mem_usage = MemUsage(capacity_, expr_values_bytes_per_row_, num_exprs_);
   tracker->Release(mem_usage);
 }

 int HashTableCtx::ExprValuesCache::MemUsage(int capacity,
     int expr_values_bytes_per_row, int num_exprs) {
   return expr_values_bytes_per_row * capacity + // expr_values_array_
       num_exprs * capacity +                    // expr_values_null_array_
       sizeof(uint32) * capacity +               // expr_values_hash_array_
       Bitmap::MemUsage(capacity);               // null_bitmap_
 }

 uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(
     uint8_t* expr_values, int expr_idx) const {
   return expr_values + expr_values_offsets_[expr_idx];
 }

 const uint8_t* HashTableCtx::ExprValuesCache::ExprValuePtr(
     const uint8_t* expr_values, int expr_idx) const {
   return expr_values + expr_values_offsets_[expr_idx];
 }

 void HashTableCtx::ExprValuesCache::ResetIterators() {
   cur_expr_values_ = expr_values_array_.get();
   cur_expr_values_null_ = expr_values_null_array_.get();
   cur_expr_values_hash_ = expr_values_hash_array_.get();
 }

 void HashTableCtx::ExprValuesCache::Reset() noexcept {
   ResetIterators();
   // Set the end pointer after resetting the other pointers so they point to
   // the same location.
   cur_expr_values_hash_end_ = cur_expr_values_hash_;
   null_bitmap_.SetAllBits(false);
 }

 void HashTableCtx::ExprValuesCache::ResetForRead() {
   // Record the end of hash values iterator to be used in AtEnd().
   // Do it before resetting the pointers.
   cur_expr_values_hash_end_ = cur_expr_values_hash_;
   ResetIterators();
 }

 constexpr double HashTable::MAX_FILL_FACTOR;
 constexpr int64_t HashTable::DATA_PAGE_SIZE;

 HashTable* HashTable::Create(Suballocator* allocator, bool stores_duplicates,
     int num_build_tuples, BufferedTupleStream* tuple_stream, int64_t max_num_buckets,
     int64_t initial_num_buckets) {
   return new HashTable(FLAGS_enable_quadratic_probing, allocator, stores_duplicates,
       num_build_tuples, tuple_stream, max_num_buckets, initial_num_buckets);
 }

 HashTable::HashTable(bool quadratic_probing, Suballocator* allocator,
     bool stores_duplicates, int num_build_tuples, BufferedTupleStream* stream,
     int64_t max_num_buckets, int64_t num_buckets)
   : allocator_(allocator),
     tuple_stream_(stream),
     stores_tuples_(num_build_tuples == 1),
     stores_duplicates_(stores_duplicates),
     quadratic_probing_(quadratic_probing),
     max_num_buckets_(max_num_buckets),
     num_buckets_(num_buckets),
     num_build_tuples_(num_build_tuples) {
   DCHECK_EQ((num_buckets & (num_buckets - 1)), 0) << "num_buckets must be a power of 2";
   DCHECK_GT(num_buckets, 0) << "num_buckets must be larger than 0";
   DCHECK(stores_tuples_ || stream != NULL);
 }

 Status HashTable::Init(bool* got_memory) {
   int64_t buckets_byte_size = num_buckets_ * sizeof(Bucket);
   RETURN_IF_ERROR(allocator_->Allocate(buckets_byte_size, &bucket_allocation_));
   if (bucket_allocation_ == nullptr) {
     num_buckets_ = 0;
     *got_memory = false;
     return Status::OK();
   }
   buckets_ = reinterpret_cast<Bucket*>(bucket_allocation_->data());
   memset(buckets_, 0, buckets_byte_size);
   *got_memory = true;
   return Status::OK();
 }

 unique_ptr<HashTableStatsProfile> HashTable::AddHashTableCounters(
     RuntimeProfile* parent_profile) {
   unique_ptr<HashTableStatsProfile> stats_profile(new HashTableStatsProfile());
   RuntimeProfile *hashtable_profile = stats_profile->hashtable_profile =
       parent_profile->CreateChild("Hash Table", true, true);
   stats_profile->num_hash_probes_ =
       ADD_COUNTER(hashtable_profile, "Probes", TUnit::UNIT);
   stats_profile->num_hash_travels_ =
       ADD_COUNTER(hashtable_profile, "Travel", TUnit::UNIT);
   stats_profile->num_hash_collisions_ =
       ADD_COUNTER(hashtable_profile, "HashCollisions", TUnit::UNIT);
   stats_profile->num_hash_buckets_ =
       ADD_COUNTER(hashtable_profile, "HashBuckets", TUnit::UNIT);
   stats_profile->num_hash_resizes_ =
       ADD_COUNTER(hashtable_profile, "Resizes", TUnit::UNIT);
   return stats_profile;
 }

 void HashTable::Close() {
   // Print statistics only for the large or heavily used hash tables.
   // TODO: Tweak these numbers/conditions, or print them always?
   const int64_t LARGE_HT = 128 * 1024;
   const int64_t HEAVILY_USED = 1024 * 1024;
   // TODO: These statistics should go to the runtime profile as well.
   if ((num_buckets_ > LARGE_HT) || (num_probes_ > HEAVILY_USED)) VLOG(2) << PrintStats();
   for (auto& data_page : data_pages_) allocator_->Free(move(data_page));
   data_pages_.clear();
   if (bucket_allocation_ != nullptr) allocator_->Free(move(bucket_allocation_));
 }

 void HashTable::StatsCountersAdd(HashTableStatsProfile* profile) {
   COUNTER_ADD(profile->num_hash_collisions_, num_hash_collisions_);
   COUNTER_ADD(profile->num_hash_probes_, num_probes_);
   COUNTER_ADD(profile->num_hash_travels_, travel_length_);
   COUNTER_ADD(profile->num_hash_resizes_, this->num_resizes_);
 }

 Status HashTable::CheckAndResize(
     uint64_t buckets_to_fill, const HashTableCtx* ht_ctx, bool* got_memory) {
   uint64_t shift = 0;
   while (num_filled_buckets_ + buckets_to_fill >
          (num_buckets_ << shift) * MAX_FILL_FACTOR) {
     ++shift;
   }
   if (shift > 0) return ResizeBuckets(num_buckets_ << shift, ht_ctx, got_memory);
   *got_memory = true;
   return Status::OK();
 }

 Status HashTable::ResizeBuckets(
     int64_t num_buckets, const HashTableCtx* ht_ctx, bool* got_memory) {
   DCHECK_EQ((num_buckets & (num_buckets - 1)), 0)
       << "num_buckets=" << num_buckets << " must be a power of 2";
   DCHECK_GT(num_buckets, num_filled_buckets_)
     << "Cannot shrink the hash table to smaller number of buckets than the number of "
     << "filled buckets.";
   VLOG(2) << "Resizing hash table from " << num_buckets_ << " to " << num_buckets
           << " buckets.";
   if (max_num_buckets_ != -1 && num_buckets > max_num_buckets_) {
     *got_memory = false;
     return Status::OK();
   }
   ++num_resizes_;

   // All memory that can grow proportional to the input should come from the block mgrs
   // mem tracker.
   // Note that while we copying over the contents of the old hash table, we need to have
   // allocated both the old and the new hash table. Once we finish, we return the memory
   // of the old hash table.
   // int64_t old_size = num_buckets_ * sizeof(Bucket);
   int64_t new_size = num_buckets * sizeof(Bucket);

   unique_ptr<Suballocation> new_allocation;
   RETURN_IF_ERROR(allocator_->Allocate(new_size, &new_allocation));
   if (new_allocation == NULL) {
     *got_memory = false;
     return Status::OK();
   }
   Bucket* new_buckets = reinterpret_cast<Bucket*>(new_allocation->data());
   memset(new_buckets, 0, new_size);

   // Walk the old table and copy all the filled buckets to the new (resized) table.
   // We do not have to do anything with the duplicate nodes. This operation is expected
   // to succeed.
   for (HashTable::Iterator iter = Begin(ht_ctx); !iter.AtEnd();
        NextFilledBucket(&iter.bucket_idx_, &iter.node_)) {
     Bucket* bucket_to_copy = &buckets_[iter.bucket_idx_];
     bool found = false;
     int64_t bucket_idx =
         Probe<true>(new_buckets, num_buckets, NULL, bucket_to_copy->hash, &found);
     DCHECK(!found);
     DCHECK_NE(bucket_idx, Iterator::BUCKET_NOT_FOUND) << " Probe failed even though "
         " there are free buckets. " << num_buckets << " " << num_filled_buckets_;
     Bucket* dst_bucket = &new_buckets[bucket_idx];
     *dst_bucket = *bucket_to_copy;
   }

   num_buckets_ = num_buckets;
   allocator_->Free(move(bucket_allocation_));
   bucket_allocation_ = move(new_allocation);
   buckets_ = reinterpret_cast<Bucket*>(bucket_allocation_->data());
   *got_memory = true;
   return Status::OK();
 }

 bool HashTable::GrowNodeArray(Status* status) {
   unique_ptr<Suballocation> allocation;
   *status = allocator_->Allocate(DATA_PAGE_SIZE, &allocation);
   if (!status->ok() || allocation == nullptr) return false;
   next_node_ = reinterpret_cast<DuplicateNode*>(allocation->data());
   data_pages_.push_back(move(allocation));
   node_remaining_current_page_ = DATA_PAGE_SIZE / sizeof(DuplicateNode);
   total_data_page_size_ += DATA_PAGE_SIZE;
   return true;
 }

 void HashTable::DebugStringTuple(stringstream& ss, HtData& htdata,
     const RowDescriptor* desc) {
   if (stores_tuples_) {
     ss << "(" << htdata.tuple << ")";
   } else {
     ss << "(" << htdata.flat_row << ")";
   }
   if (desc != NULL) {
     Tuple* row[num_build_tuples_];
     ss << " " << PrintRow(GetRow(htdata, reinterpret_cast<TupleRow*>(row)), *desc);
   }
 }

 string HashTable::DebugString(bool skip_empty, bool show_match,
     const RowDescriptor* desc) {
   stringstream ss;
   ss << endl;
   for (int i = 0; i < num_buckets_; ++i) {
     if (skip_empty && !buckets_[i].filled) continue;
     ss << i << ": ";
     if (show_match) {
       if (buckets_[i].matched) {
         ss << " [M]";
       } else {
         ss << " [U]";
       }
     }
     if (buckets_[i].hasDuplicates) {
       DuplicateNode* node = buckets_[i].bucketData.duplicates;
       bool first = true;
       ss << " [D] ";
       while (node != NULL) {
         if (!first) ss << ",";
         DebugStringTuple(ss, node->htdata, desc);
         node = node->next;
         first = false;
       }
     } else {
       ss << " [B] ";
       if (buckets_[i].filled) {
         DebugStringTuple(ss, buckets_[i].bucketData.htdata, desc);
       } else {
         ss << " - ";
       }
     }
     ss << endl;
   }
   return ss.str();
 }

 string HashTable::PrintStats() const {
   double curr_fill_factor = (double)num_filled_buckets_/(double)num_buckets_;
   double avg_travel = (double)travel_length_/(double)num_probes_;
   double avg_collisions = (double)num_hash_collisions_/(double)num_filled_buckets_;
   stringstream ss;
   ss << "Buckets: " << num_buckets_ << " " << num_filled_buckets_ << " "
      << curr_fill_factor << endl;
   ss << "Duplicates: " << num_buckets_with_duplicates_ << " buckets "
      << num_duplicate_nodes_ << " nodes" << endl;
   ss << "Probes: " << num_probes_ << endl;
   ss << "Travel: " << travel_length_ << " " << avg_travel << endl;
   ss << "HashCollisions: " << num_hash_collisions_ << " " << avg_collisions << endl;
   ss << "Resizes: " << num_resizes_ << endl;
   return ss.str();
 }

 // Helper function to store a value into the results buffer if the expr
 // evaluated to NULL.  We don't want (NULL, 1) to hash to the same as (0,1) so
 // we'll pick a more random value.
 static void CodegenAssignNullValue(LlvmCodeGen* codegen, LlvmBuilder* builder,
     llvm::Value* dst, const ColumnType& type) {
   uint64_t fnv_seed = HashUtil::FNV_SEED;

   if (type.type == TYPE_STRING || type.type == TYPE_VARCHAR) {
     llvm::Value* dst_ptr = builder->CreateStructGEP(NULL, dst, 0, "string_ptr");
     llvm::Value* dst_len = builder->CreateStructGEP(NULL, dst, 1, "string_len");
     llvm::Value* null_len = codegen->GetI32Constant(fnv_seed);
     llvm::Value* null_ptr = builder->CreateIntToPtr(null_len, codegen->ptr_type());
     builder->CreateStore(null_ptr, dst_ptr);
     builder->CreateStore(null_len, dst_len);
   } else {
     llvm::Value* null_value = NULL;
     int byte_size = type.GetByteSize();
     // Get a type specific representation of fnv_seed
     switch (type.type) {
       case TYPE_BOOLEAN:
         // In results, booleans are stored as 1 byte
         dst = builder->CreateBitCast(dst, codegen->ptr_type());
         null_value = codegen->GetI8Constant(fnv_seed);
         break;
       case TYPE_TIMESTAMP: {
         // Cast 'dst' to 'i128*'
         DCHECK_EQ(byte_size, 16);
         llvm::PointerType* fnv_seed_ptr_type =
             codegen->GetPtrType(llvm::Type::getIntNTy(codegen->context(), byte_size * 8));
         dst = builder->CreateBitCast(dst, fnv_seed_ptr_type);
         null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed);
         break;
       }
       case TYPE_TINYINT:
       case TYPE_SMALLINT:
       case TYPE_INT:
       case TYPE_BIGINT:
       case TYPE_DECIMAL:
       case TYPE_DATE:
         null_value = codegen->GetIntConstant(byte_size, fnv_seed, fnv_seed);
         break;
       case TYPE_FLOAT: {
         // Don't care about the value, just the bit pattern
         float fnv_seed_float = *reinterpret_cast<float*>(&fnv_seed);
         null_value =
             llvm::ConstantFP::get(codegen->context(), llvm::APFloat(fnv_seed_float));
         break;
       }
       case TYPE_DOUBLE: {
         // Don't care about the value, just the bit pattern
         double fnv_seed_double = *reinterpret_cast<double*>(&fnv_seed);
         null_value =
             llvm::ConstantFP::get(codegen->context(), llvm::APFloat(fnv_seed_double));
         break;
       }
       default:
         DCHECK(false);
     }
     builder->CreateStore(null_value, dst);
   }
 }

 // Codegen for evaluating a tuple row over either build_expr_evals_ or
 // probe_expr_evals_. For a group by with (big int, string) the IR looks like:
 //
 // define i1 @EvalProbeRow(%"class.impala::HashTableCtx"* %this_ptr,
 //    %"class.impala::TupleRow"* %row, i8* %expr_values, i8* %expr_values_null) #34 {
 // entry:
 //   %loc_addr = getelementptr i8, i8* %expr_values, i32 0
 //   %loc = bitcast i8* %loc_addr to i64*
 //   %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
 //        inttoptr (i64 197737664 to %"class.impala::ExprContext"*),
 //        %"class.impala::TupleRow"* %row)
 //   %0 = extractvalue { i8, i64 } %result, 0
 //   %is_null = trunc i8 %0 to i1
 //   %1 = zext i1 %is_null to i8
 //   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
 //   store i8 %1, i8* %null_byte_loc
 //   br i1 %is_null, label %null, label %not_null
 //
 // null:                                             ; preds = %entry
 //   store i64 2166136261, i64* %loc
 //   br label %continue
 //
 // not_null:                                         ; preds = %entry
 //   %val = extractvalue { i8, i64 } %result, 1
 //   store i64 %val, i64* %loc
 //   br label %continue
 //
 // continue:                                         ; preds = %not_null, %null
 //   %is_null_phi = phi i1 [ true, %null ], [ false, %not_null ]
 //   %has_null = or i1 false, %is_null_phi
 //   %loc_addr1 = getelementptr i8, i8* %expr_values, i32 8
 //   %loc2 = bitcast i8* %loc_addr1 to %"struct.impala::StringValue"*
 //   %result6 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
 //      inttoptr (i64 197738048 to %"class.impala::ExprContext"*),
 //      %"class.impala::TupleRow"* %row)
 //   %2 = extractvalue { i64, i8* } %result6, 0
 //   %is_null7 = trunc i64 %2 to i1
 //   %3 = zext i1 %is_null7 to i8
 //   %null_byte_loc8 = getelementptr i8, i8* %expr_values_null, i32 1
 //   store i8 %3, i8* %null_byte_loc8
 //   br i1 %is_null7, label %null3, label %not_null4
 //
 // null3:                                            ; preds = %continue
 //   %string_ptr = getelementptr inbounds %"struct.impala::StringValue",
 //        %"struct.impala::StringValue"* %loc2, i32 0, i32 0
 //   %string_len = getelementptr inbounds %"struct.impala::StringValue",
 //        %"struct.impala::StringValue"* %loc2, i32 0, i32 1
 //   store i8* inttoptr (i32 -2128831035 to i8*), i8** %string_ptr
 //   store i32 -2128831035, i32* %string_len
 //   br label %continue5
 //
 // not_null4:                                        ; preds = %continue
 //   %4 = extractvalue { i64, i8* } %result6, 0
 //   %5 = ashr i64 %4, 32
 //   %6 = trunc i64 %5 to i32
 //   %7 = insertvalue %"struct.impala::StringValue" zeroinitializer, i32 %6, 1
 //   %result9 = extractvalue { i64, i8* } %result6, 1
 //   %8 = insertvalue %"struct.impala::StringValue" %7, i8* %result9, 0
 //   store %"struct.impala::StringValue" %8, %"struct.impala::StringValue"* %loc2
 //   br label %continue5
 //
 // continue5:                                        ; preds = %not_null4, %null3
 //   %is_null_phi10 = phi i1 [ true, %null3 ], [ false, %not_null4 ]
 //   %has_null11 = or i1 %has_null, %is_null_phi10
 //   ret i1 %has_null11
 // }
 //
 // For each expr, we create 3 code blocks.  The null, not null and continue blocks.
 // Both the null and not null branch into the continue block.  The continue block
 // becomes the start of the next block for codegen (either the next expr or just the
 // end of the function).
 Status HashTableCtx::CodegenEvalRow(
     LlvmCodeGen* codegen, bool build, llvm::Function** fn) {
   const vector<ScalarExpr*>& exprs = build ? build_exprs_ : probe_exprs_;
   for (int i = 0; i < exprs.size(); ++i) {
     // Disable codegen for CHAR
     if (exprs[i]->type().type == TYPE_CHAR) {
       return Status("HashTableCtx::CodegenEvalRow(): CHAR NYI");
     }
   }

   // Get types to generate function prototype
   llvm::PointerType* this_ptr_type = codegen->GetStructPtrType<HashTableCtx>();
   llvm::PointerType* tuple_row_ptr_type = codegen->GetStructPtrType<TupleRow>();
   LlvmCodeGen::FnPrototype prototype(codegen, build ? "EvalBuildRow" : "EvalProbeRow",
       codegen->bool_type());
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type()));
   prototype.AddArgument(
       LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));

   llvm::LLVMContext& context = codegen->context();
   LlvmBuilder builder(context);
   llvm::Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, args);
   llvm::Value* this_ptr = args[0];
   llvm::Value* row = args[1];
   llvm::Value* expr_values = args[2];
   llvm::Value* expr_values_null = args[3];
   llvm::Value* has_null = codegen->false_value();

   // evaluator_vector = build_expr_evals_.data() / probe_expr_evals_.data()
   llvm::Value* eval_vector = codegen->CodegenCallFunction(&builder,
       build ? IRFunction::HASH_TABLE_GET_BUILD_EXPR_EVALUATORS :
               IRFunction::HASH_TABLE_GET_PROBE_EXPR_EVALUATORS,
       this_ptr, "eval_vector");

   for (int i = 0; i < exprs.size(); ++i) {
     // TODO: refactor this to somewhere else?  This is not hash table specific except for
     // the null handling bit and would be used for anyone that needs to materialize a
     // vector of exprs
     // Convert result buffer to llvm ptr type
     int offset = expr_values_cache_.expr_values_offsets(i);
     llvm::Value* loc = builder.CreateInBoundsGEP(
         NULL, expr_values, codegen->GetI32Constant(offset), "loc_addr");
     llvm::Value* llvm_loc = builder.CreatePointerCast(loc,
         codegen->GetSlotPtrType(exprs[i]->type()), "loc");

     llvm::BasicBlock* null_block = llvm::BasicBlock::Create(context, "null", *fn);
     llvm::BasicBlock* not_null_block = llvm::BasicBlock::Create(context, "not_null", *fn);
     llvm::BasicBlock* continue_block = llvm::BasicBlock::Create(context, "continue", *fn);

     // Call expr
     llvm::Function* expr_fn;
     Status status = exprs[i]->GetCodegendComputeFn(codegen, false, &expr_fn);
     if (!status.ok()) {
       *fn = NULL;
       return Status(Substitute(
           "Problem with HashTableCtx::CodegenEvalRow(): $0", status.GetDetail()));
     }

     // Avoid bloating function by inlining too many exprs into it.
     if (i >= LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) {
       codegen->SetNoInline(expr_fn);
     }

     llvm::Value* eval_arg = codegen->CodegenArrayAt(&builder, eval_vector, i, "eval");
     CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(
         codegen, &builder, exprs[i]->type(), expr_fn, {eval_arg, row}, "result");
     llvm::Value* is_null = result.GetIsNull();

     // Set null-byte result
     llvm::Value* null_byte = builder.CreateZExt(is_null, codegen->i8_type());
     llvm::Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(
         NULL, expr_values_null, codegen->GetI32Constant(i), "null_byte_loc");
     builder.CreateStore(null_byte, llvm_null_byte_loc);
     builder.CreateCondBr(is_null, null_block, not_null_block);

     // Null block
     builder.SetInsertPoint(null_block);
     if (!stores_nulls_) {
       // hash table doesn't store nulls, no reason to keep evaluating exprs
       builder.CreateRet(codegen->true_value());
     } else {
       CodegenAssignNullValue(codegen, &builder, llvm_loc, exprs[i]->type());
       builder.CreateBr(continue_block);
     }

     // Not null block
     builder.SetInsertPoint(not_null_block);

     result.ConvertToCanonicalForm();

     result.StoreToNativePtr(llvm_loc);
     builder.CreateBr(continue_block);

     // Continue block
     builder.SetInsertPoint(continue_block);
     if (stores_nulls_) {
       // Update has_null
       llvm::PHINode* is_null_phi =
           builder.CreatePHI(codegen->bool_type(), 2, "is_null_phi");
       is_null_phi->addIncoming(codegen->true_value(), null_block);
       is_null_phi->addIncoming(codegen->false_value(), not_null_block);
       has_null = builder.CreateOr(has_null, is_null_phi, "has_null");
     }
   }
   builder.CreateRet(has_null);

   // Avoid inlining a large EvalRow() function into caller.
   if (exprs.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) {
     codegen->SetNoInline(*fn);
   }

   *fn = codegen->FinalizeFunction(*fn);
   if (*fn == NULL) {
     return Status("Codegen'd HashTableCtx::EvalRow() function failed verification, "
                   "see log");
   }
   return Status::OK();
 }

 // Codegen for hashing the current row.  In the case with both string and non-string data
 // (group by int_col, string_col), the IR looks like:
 //
 // define i32 @HashRow(%"class.impala::HashTableCtx"* %this_ptr, i8* %expr_values,
 //    i8* %expr_values_null) #34 {
 // entry:
 //   %seed = call i32 @_ZNK6impala12HashTableCtx11GetHashSeedEv(
 //        %"class.impala::HashTableCtx"* %this_ptr)
 //   %hash = call i32 @CrcHash8(i8* %expr_values, i32 8, i32 %seed)
 //   %loc_addr = getelementptr i8, i8* %expr_values, i32 8
 //   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 1
 //   %null_byte = load i8, i8* %null_byte_loc
 //   %is_null = icmp ne i8 %null_byte, 0
 //   br i1 %is_null, label %null, label %not_null
 //
 // null:                                             ; preds = %entry
 //   %str_null = call i32 @CrcHash16(i8* %loc_addr, i32 16, i32 %hash)
 //   br label %continue
 //
 // not_null:                                         ; preds = %entry
 //   %str_val = bitcast i8* %loc_addr to %"struct.impala::StringValue"*
 //   %0 = getelementptr inbounds %"struct.impala::StringValue",
 //        %"struct.impala::StringValue"* %str_val, i32 0, i32 0
 //   %1 = getelementptr inbounds %"struct.impala::StringValue",
 //        %"struct.impala::StringValue"* %str_val, i32 0, i32 1
 //   %ptr = load i8*, i8** %0
 //   %len = load i32, i32* %1
 //   %string_hash = call i32 @IrCrcHash(i8* %ptr, i32 %len, i32 %hash)
 //   br label %continue
 //
 // continue:                                         ; preds = %not_null, %null
 //   %hash_phi = phi i32 [ %string_hash, %not_null ], [ %str_null, %null ]
 //   ret i32 %hash_phi
 // }
 Status HashTableCtx::CodegenHashRow(
     LlvmCodeGen* codegen, bool use_murmur, llvm::Function** fn) {
   for (int i = 0; i < build_exprs_.size(); ++i) {
     // Disable codegen for CHAR
     if (build_exprs_[i]->type().type == TYPE_CHAR) {
       return Status("HashTableCtx::CodegenHashRow(): CHAR NYI");
     }
   }

   // Get types to generate function prototype
   llvm::PointerType* this_ptr_type = codegen->GetStructPtrType<HashTableCtx>();

   LlvmCodeGen::FnPrototype prototype(
       codegen, (use_murmur ? "MurmurHashRow" : "HashRow"), codegen->i32_type());
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type()));
   prototype.AddArgument(
       LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));

   llvm::LLVMContext& context = codegen->context();
   LlvmBuilder builder(context);
   llvm::Value* args[3];
   *fn = prototype.GeneratePrototype(&builder, args);
   llvm::Value* this_arg = args[0];
   llvm::Value* expr_values = args[1];
   llvm::Value* expr_values_null = args[2];

   // Call GetHashSeed() to get seeds_[level_]
   llvm::Value* seed = codegen->CodegenCallFunction(
       &builder, IRFunction::HASH_TABLE_GET_HASH_SEED, this_arg, "seed");

   llvm::Value* hash_result = seed;
   const int var_result_offset = expr_values_cache_.var_result_offset();
   const int expr_values_bytes_per_row = expr_values_cache_.expr_values_bytes_per_row();
   if (var_result_offset == -1) {
     // No variable length slots, just hash what is in 'expr_expr_values_cache_'
     if (expr_values_bytes_per_row > 0) {
       llvm::Function* hash_fn = use_murmur ?
           codegen->GetMurmurHashFunction(expr_values_bytes_per_row) :
           codegen->GetHashFunction(expr_values_bytes_per_row);
       llvm::Value* len = codegen->GetI32Constant(expr_values_bytes_per_row);
       hash_result = builder.CreateCall(
           hash_fn, llvm::ArrayRef<llvm::Value*>({expr_values, len, hash_result}), "hash");
     }
   } else {
     if (var_result_offset > 0) {
       llvm::Function* hash_fn = use_murmur ?
           codegen->GetMurmurHashFunction(var_result_offset) :
           codegen->GetHashFunction(var_result_offset);
       llvm::Value* len = codegen->GetI32Constant(var_result_offset);
       hash_result = builder.CreateCall(
           hash_fn, llvm::ArrayRef<llvm::Value*>({expr_values, len, hash_result}), "hash");
     }

     // Hash string slots
     for (int i = 0; i < build_exprs_.size(); ++i) {
       if (build_exprs_[i]->type().type != TYPE_STRING &&
           build_exprs_[i]->type().type != TYPE_VARCHAR) {
         continue;
       }

       llvm::BasicBlock* null_block = NULL;
       llvm::BasicBlock* not_null_block = NULL;
       llvm::BasicBlock* continue_block = NULL;
       llvm::Value* str_null_result = NULL;

       int offset = expr_values_cache_.expr_values_offsets(i);
       llvm::Value* llvm_loc = builder.CreateInBoundsGEP(
           NULL, expr_values, codegen->GetI32Constant(offset), "loc_addr");

       // If the hash table stores nulls, we need to check if the stringval
       // evaluated to NULL
       if (stores_nulls_) {
         null_block = llvm::BasicBlock::Create(context, "null", *fn);
         not_null_block = llvm::BasicBlock::Create(context, "not_null", *fn);
         continue_block = llvm::BasicBlock::Create(context, "continue", *fn);

         llvm::Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(NULL,
             expr_values_null, codegen->GetI32Constant(i), "null_byte_loc");
         llvm::Value* null_byte = builder.CreateLoad(llvm_null_byte_loc, "null_byte");
         llvm::Value* is_null = builder.CreateICmpNE(
             null_byte, codegen->GetI8Constant(0), "is_null");
         builder.CreateCondBr(is_null, null_block, not_null_block);

         // For null, we just want to call the hash function on the portion of
         // the data
         builder.SetInsertPoint(null_block);
         llvm::Function* null_hash_fn = use_murmur ?
             codegen->GetMurmurHashFunction(sizeof(StringValue)) :
             codegen->GetHashFunction(sizeof(StringValue));
         llvm::Value* len = codegen->GetI32Constant(sizeof(StringValue));
         str_null_result = builder.CreateCall(null_hash_fn,
             llvm::ArrayRef<llvm::Value*>({llvm_loc, len, hash_result}), "str_null");
         builder.CreateBr(continue_block);

         builder.SetInsertPoint(not_null_block);
       }

       // Convert expr_values_buffer_ loc to llvm value
       llvm::Value* str_val = builder.CreatePointerCast(
           llvm_loc, codegen->GetSlotPtrType(TYPE_STRING), "str_val");

       llvm::Value* ptr = builder.CreateStructGEP(NULL, str_val, 0);
       llvm::Value* len = builder.CreateStructGEP(NULL, str_val, 1);
       ptr = builder.CreateLoad(ptr, "ptr");
       len = builder.CreateLoad(len, "len");

       // Call hash(ptr, len, hash_result);
       llvm::Function* general_hash_fn =
           use_murmur ? codegen->GetMurmurHashFunction() : codegen->GetHashFunction();
       llvm::Value* string_hash_result = builder.CreateCall(general_hash_fn,
           llvm::ArrayRef<llvm::Value*>({ptr, len, hash_result}), "string_hash");

       if (stores_nulls_) {
         builder.CreateBr(continue_block);
         builder.SetInsertPoint(continue_block);
         // Use phi node to reconcile that we could have come from the string-null
         // path and string not null paths.
         llvm::PHINode* phi_node =
             builder.CreatePHI(codegen->i32_type(), 2, "hash_phi");
         phi_node->addIncoming(string_hash_result, not_null_block);
         phi_node->addIncoming(str_null_result, null_block);
         hash_result = phi_node;
       } else {
         hash_result = string_hash_result;
       }
     }
   }

   builder.CreateRet(hash_result);

   // Avoid inlining into caller if there are many exprs.
   if (build_exprs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) {
     codegen->SetNoInline(*fn);
   }
   *fn = codegen->FinalizeFunction(*fn);
   if (*fn == NULL) {
     return Status(
         "Codegen'd HashTableCtx::HashRow() function failed verification, see log");
   }
   return Status::OK();
 }

 // Codegen for HashTableCtx::Equals.  For a group by with (bigint, string),
 // the IR looks like:
 //
 // define i1 @Equals(%"class.impala::HashTableCtx"* %this_ptr, %"class.impala::TupleRow"*
 // %row,
 //      i8* %expr_values, i8* %expr_values_null) #34 {
 // entry:
 //   %0 = alloca { i64, i8* }
 //   %result = call { i8, i64 } @GetSlotRef.2(%"class.impala::ExprContext"*
 //        inttoptr (i64 139107136 to %"class.impala::ExprContext"*),
 //        %"class.impala::TupleRow"* %row)
 //   %1 = extractvalue { i8, i64 } %result, 0
 //   %is_null = trunc i8 %1 to i1
 //   %null_byte_loc = getelementptr i8, i8* %expr_values_null, i32 0
 //   %2 = load i8, i8* %null_byte_loc
 //   %3 = icmp ne i8 %2, 0
 //   %loc = getelementptr i8, i8* %expr_values, i32 0
 //   %row_val = bitcast i8* %loc to i64*
 //   br i1 %is_null, label %null, label %not_null
 //
 // false_block:                                      ; preds = %cmp9, %not_null2, %null1,
 //                                                             %cmp, %not_null, %null
 //   ret i1 false
 //
 // null:                                             ; preds = %entry
 //   br i1 %3, label %continue, label %false_block
 //
 // not_null:                                         ; preds = %entry
 //   br i1 %3, label %false_block, label %cmp
 //
 // continue:                                         ; preds = %cmp, %null
 //   %result4 = call { i64, i8* } @GetSlotRef.3(%"class.impala::ExprContext"*
 //        inttoptr (i64 139107328 to %"class.impala::ExprContext"*),
 //        %"class.impala::TupleRow"* %row)
 //   %4 = extractvalue { i64, i8* } %result4, 0
 //   %is_null5 = trunc i64 %4 to i1
 //   %null_byte_loc6 = getelementptr i8, i8* %expr_values_null, i32 1
 //   %5 = load i8, i8* %null_byte_loc6
 //   %6 = icmp ne i8 %5, 0
 //   %loc7 = getelementptr i8, i8* %expr_values, i32 8
 //   %row_val8 = bitcast i8* %loc7 to %"struct.impala::StringValue"*
 //   br i1 %is_null5, label %null1, label %not_null2
 //
 // cmp:                                              ; preds = %not_null
 //   %7 = load i64, i64* %row_val
 //   %val = extractvalue { i8, i64 } %result, 1
 //   %cmp_raw = icmp eq i64 %val, %7
 //   br i1 %cmp_raw, label %continue, label %false_block
 //
 // null1:                                            ; preds = %continue
 //   br i1 %6, label %continue3, label %false_block
 //
 // not_null2:                                        ; preds = %continue
 //   br i1 %6, label %false_block, label %cmp9
 //
 // continue3:                                        ; preds = %cmp9, %null1
 //   ret i1 true
 //
 // cmp9:                                             ; preds = %not_null2
 //   store { i64, i8* } %result4, { i64, i8* }* %0
 //   %8 = bitcast { i64, i8* }* %0 to %"struct.impala_udf::StringVal"*
 //   %cmp_raw10 = call i1
 //        @_Z13StringValueEqRKN10impala_udf9StringValERKN6impala11StringValueE(
 //        %"struct.impala_udf::StringVal"* %8, %"struct.impala::StringValue"* %row_val8)
 //   br i1 %cmp_raw10, label %continue3, label %false_block
 // }
 Status HashTableCtx::CodegenEquals(
     LlvmCodeGen* codegen, bool inclusive_equality, llvm::Function** fn) {
   for (int i = 0; i < build_exprs_.size(); ++i) {
     // Disable codegen for CHAR
     if (build_exprs_[i]->type().type == TYPE_CHAR) {
       return Status("HashTableCtx::CodegenEquals(): CHAR NYI");
     }
   }

   // Get types to generate function prototype
   llvm::PointerType* this_ptr_type = codegen->GetStructPtrType<HashTableCtx>();
   llvm::PointerType* tuple_row_ptr_type = codegen->GetStructPtrType<TupleRow>();

   LlvmCodeGen::FnPrototype prototype(codegen, "Equals", codegen->bool_type());
   prototype.AddArgument(LlvmCodeGen::NamedVariable("this_ptr", this_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("row", tuple_row_ptr_type));
   prototype.AddArgument(LlvmCodeGen::NamedVariable("expr_values", codegen->ptr_type()));
   prototype.AddArgument(
       LlvmCodeGen::NamedVariable("expr_values_null", codegen->ptr_type()));

   llvm::LLVMContext& context = codegen->context();
   LlvmBuilder builder(context);
   llvm::Value* args[4];
   *fn = prototype.GeneratePrototype(&builder, args);
   llvm::Value* this_ptr = args[0];
   llvm::Value* row = args[1];
   llvm::Value* expr_values = args[2];
   llvm::Value* expr_values_null = args[3];

   // eval_vector = build_expr_evals_.data()
   llvm::Value* eval_vector = codegen->CodegenCallFunction(&builder,
       IRFunction::HASH_TABLE_GET_BUILD_EXPR_EVALUATORS, this_ptr, "eval_vector");

   llvm::BasicBlock* false_block = llvm::BasicBlock::Create(context, "false_block", *fn);
   for (int i = 0; i < build_exprs_.size(); ++i) {
     llvm::BasicBlock* null_block = llvm::BasicBlock::Create(context, "null", *fn);
     llvm::BasicBlock* not_null_block = llvm::BasicBlock::Create(context, "not_null", *fn);
     llvm::BasicBlock* continue_block = llvm::BasicBlock::Create(context, "continue", *fn);

     // call GetValue on build_exprs[i]
     llvm::Function* expr_fn;
     Status status = build_exprs_[i]->GetCodegendComputeFn(codegen, false, &expr_fn);
     if (!status.ok()) {
       *fn = NULL;
       return Status(
           Substitute("Problem with HashTableCtx::CodegenEquals: $0", status.GetDetail()));
     }
     if (build_exprs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPRS_THRESHOLD) {
       // Avoid bloating function by inlining too many exprs into it.
       codegen->SetNoInline(expr_fn);
     }

     // Load ScalarExprEvaluator*: eval = eval_vector[i];
     llvm::Value* eval_arg = codegen->CodegenArrayAt(&builder, eval_vector, i, "eval");
     // Evaluate the expression.
     CodegenAnyVal result = CodegenAnyVal::CreateCallWrapped(codegen, &builder,
         build_exprs_[i]->type(), expr_fn, {eval_arg, row}, "result");
     llvm::Value* is_null = result.GetIsNull();

     // Determine if row is null (i.e. expr_values_null[i] == true). In
     // the case where the hash table does not store nulls, this is always false.
     llvm::Value* row_is_null = codegen->false_value();

     // We consider null values equal if we are comparing build rows or if the join
     // predicate is <=>
     if (inclusive_equality || finds_nulls_[i]) {
       llvm::Value* llvm_null_byte_loc = builder.CreateInBoundsGEP(
           NULL, expr_values_null, codegen->GetI32Constant(i), "null_byte_loc");
       llvm::Value* null_byte = builder.CreateLoad(llvm_null_byte_loc);
       row_is_null =
           builder.CreateICmpNE(null_byte, codegen->GetI8Constant(0));
     }
     if (inclusive_equality) result.ConvertToCanonicalForm();

     // Get llvm value for row_val from 'expr_values'
     int offset = expr_values_cache_.expr_values_offsets(i);
     llvm::Value* loc = builder.CreateInBoundsGEP(
         NULL, expr_values, codegen->GetI32Constant(offset), "loc");
     llvm::Value* row_val = builder.CreatePointerCast(
         loc, codegen->GetSlotPtrType(build_exprs_[i]->type()), "row_val");

     // Branch for GetValue() returning NULL
     builder.CreateCondBr(is_null, null_block, not_null_block);

     // Null block
     builder.SetInsertPoint(null_block);
     builder.CreateCondBr(row_is_null, continue_block, false_block);

     // Not-null block
     builder.SetInsertPoint(not_null_block);
     if (stores_nulls_) {
       llvm::BasicBlock* cmp_block = llvm::BasicBlock::Create(context, "cmp", *fn);
       // First need to compare that row expr[i] is not null
       builder.CreateCondBr(row_is_null, false_block, cmp_block);
       builder.SetInsertPoint(cmp_block);
     }
     // Check result == row_val
     llvm::Value* is_equal = result.EqToNativePtr(row_val, inclusive_equality);
     builder.CreateCondBr(is_equal, continue_block, false_block);

     builder.SetInsertPoint(continue_block);
   }
   builder.CreateRet(codegen->true_value());

   builder.SetInsertPoint(false_block);
   builder.CreateRet(codegen->false_value());

   // Avoid inlining into caller if it is large.
   if (build_exprs_.size() > LlvmCodeGen::CODEGEN_INLINE_EXPR_BATCH_THRESHOLD) {
     codegen->SetNoInline(*fn);
   }
   *fn = codegen->FinalizeFunction(*fn);
   if (*fn == NULL) {
     return Status("Codegen'd HashTableCtx::Equals() function failed verification, "
                   "see log");
   }
   return Status::OK();
 }

 Status HashTableCtx::ReplaceHashTableConstants(LlvmCodeGen* codegen,
     bool stores_duplicates, int num_build_tuples, llvm::Function* fn,
     HashTableReplacedConstants* replacement_counts) {
   replacement_counts->stores_nulls = codegen->ReplaceCallSitesWithBoolConst(
       fn, stores_nulls(), "stores_nulls");
   replacement_counts->finds_some_nulls = codegen->ReplaceCallSitesWithBoolConst(
       fn, finds_some_nulls(), "finds_some_nulls");
   replacement_counts->stores_tuples = codegen->ReplaceCallSitesWithBoolConst(
       fn, num_build_tuples == 1, "stores_tuples");
   replacement_counts->stores_duplicates = codegen->ReplaceCallSitesWithBoolConst(
       fn, stores_duplicates, "stores_duplicates");
   replacement_counts->quadratic_probing = codegen->ReplaceCallSitesWithBoolConst(
       fn, FLAGS_enable_quadratic_probing, "quadratic_probing");
   return Status::OK();
 }