core/sql/executor/hash_table.cpp - trafodion - Git at Google

 // -*-C++-*-
 // *********************************************************************
 //
 // File:         hash_table.cpp
 // Description:
 //
 //
 // Created:      7/10/95
 // Language:     C++
 //
 //
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 // *********************************************************************

 #include "hash_table.h"

 void HashRow::print(ULng32 rowlength) {
   printf("\tHashValue: %6d\n\tData: ", hashValue_);
   unsigned char * data = (unsigned char *)this;
   ULng32 i = 0;
   for (; i < rowlength; i++)
     printf("%02x", data[i]);
   printf("\n\t    : ");
   for (i = 0; i < rowlength; i++)
     printf("%2c", ((data[i] >= ' ' && data[i] < 127) ? data[i] : '?'));
   printf("\n\t      ");
   for (i = 0; i < rowlength; i++)
     if (i % 10)
       printf ("  ");
     else
       printf ("%1d0", i/10);
   printf ("\n");
 }

 HashTableCursor::HashTableCursor() {
   init();
 }
 HashTableCursor::~HashTableCursor() {
 }

 HashTableHeader::HashTableHeader() {
   init();
 }

 void HashTableHeader::init() {
   // rowCount_ = 0;
   row_ = NULL;
 }


 HashTable::HashTable(ULng32 headerCount,
 		     NABoolean evenFactor,
 		     ULng32 primeFactor,
 		     NABoolean noHVDups,
 		     NABoolean doResize
 		     )
   : headerCount_(headerCount > MAX_HEADER_COUNT ?
                    MAX_HEADER_COUNT : headerCount < MIN_HEADER_COUNT ?
                      MIN_HEADER_COUNT : headerCount),
     rowCount_(0),
     header_(NULL),
     noHashValueDups_(noHVDups),
     resizeThreshold_(UINT_MAX), // default - don't resize
     originalSize_(TRUE), // this HT was not resized yet
     hashTableOverflow_(FALSE),
     singleChainLastRow_(NULL) {

 #ifdef _DEBUG
   if ( getenv("ALLOW_HV_DUPS" ) ) noHashValueDups_ = FALSE; // for testing
 #endif

   headerCount_ *= 2; // the first iteration below would divide it back

   // allocate the hash table headers. If we can't allocate it,
   // try a smaller table. If we can't even allocate a table with
   // MIN_HEADER_COUNT or somewhat more, we die!
   while (!header_) {

     headerCount_ /= 2;
     if ( headerCount_ < MIN_HEADER_COUNT ) return; // allocation failed

     // Ensure that headerCount_ has no common prime factors with #buckets
     if ( evenFactor && 0 == headerCount_ % 2 )
       headerCount_++ ; // the count is even no more
     if ( 0 == headerCount_ % primeFactor )
       headerCount_ += 2 ; // break the common factor, since primeFactor > 2

     header_ = (HashTableHeader *)collHeap()->allocateMemory((UInt32)
              (headerCount_ * sizeof(HashTableHeader)), FALSE);
   }

   if ( doResize )
     resizeThreshold_ = (headerCount_ / 100) * HT_LOAD_FACTOR_PERCENT ;

   init(); // set all entries in the hash table to NULL
 }

 HashTable::~HashTable() {
   if (header_) {
     collHeap()->deallocateMemory((void*)header_);
     header_ = NULL;
   }
 }

 void HashTable::init() {
   // memset is about six times faster than this loop
   //     for (unsigned long i = 0; i < headerCount_; i++)
   //        header_[i].init();
   memset( header_ , 0, headerCount_ * sizeof(HashTableHeader) );
 }

 //////////////////////////////////////////////////////////////////////
 // Chains a row into the appropriate hash table chain. It is
 // inserted after the first matching row or at the beginning
 // of the hash chain, if a matching row is not found.
 //////////////////////////////////////////////////////////////////////
 void HashTable::insert(atp_struct * workAtp,
 		       HashRow * newRow,
                        tupp &workAtpTupp1,
                        tupp &workAtpTupp2,
 		       ex_expr * searchExpr) {

   ex_assert( searchExpr,
 	     "This insert() should only be used for searchable entries. ");

   // Calculate the simple hash value for the new row
   SimpleHashValue newRowHV = newRow->hashValue();

   // Determine the hash chain and get the first entry in the chain.
   ULng32 chainIdx = newRowHV % headerCount_;
   ULng32 origChainIdx = chainIdx;
   HashRow *row = header_[chainIdx].row_;

   // Boolean used to control when the data pointer for tupp1 should be set
   NABoolean isTupp1Set = FALSE;


   while (row != NULL) {
     if (newRowHV == row->hashValue()) {

       // The expression does not expect the HashRow to be part of the row.
       // Adjust the datapointer in the work atp to point beyond the HashRow.
       workAtpTupp2.setDataPointer(row->getData());

       // Set the data in the workAtp for the first index if not set already
       if (!isTupp1Set) {

         // The expression does not expect the HashRow to be part of the row.
         // Adjust the datapointer in the work atp to point beyond the HashRow.
         workAtpTupp1.setDataPointer(newRow->getData());
         isTupp1Set = TRUE;
       }

       if ( searchExpr->eval(workAtp, workAtp) == ex_expr::EXPR_TRUE )
          break; // Break from while loop.
       else if ( noHashValueDups_ ) {
 	// Different key-value, but with the same HV, try the next HT entry
 	chainIdx = ++chainIdx % headerCount_; // next HT entry

 	if ( (chainIdx + 1) % headerCount_ == origChainIdx ) {
 	  // Oops - an overflow -- more diff key-value sub-chains (with same HV)
 	  // than the size of the hash table
 	  // (extremely rare - practically impossible -- requires a very
 	  // small HT (i.e. memory pressure) and a very bad hash function.)
 	  //   After an overflow - revert to the other method -- allow dups
 	  // (The "+ 1" above ensures at least one HT entry without this HV
 	  //  to prevent an infinite loop in position() )
 	  noHashValueDups_ = FALSE; // stop disallowing dups
 	  chainIdx = origChainIdx; // return to the original chain
 	  hashTableOverflow_ = TRUE; // remember that the HT still contains
 	        // "re-placed" sub-chains from before the overflow; they should
 	        // be searched for in the position() method
 	}

 	row = header_[chainIdx].row_; // first row in next chain
 	continue; // loop again
       }
     }
     row = row->next();
   }

   // If the row was not found, insert at the beginning of the chain.
   // Otherwise, insert after the first match.
   if (row == NULL) {
     newRow->setNext(header_[chainIdx].row_);
     header_[chainIdx].row_ = newRow;
   }
   else {
     newRow->setNext(row->next());
     row->setNext(newRow);
   }

   // one more row in the chain
   // header_[chainIdx].rowCount_++;
   // one more row in the hash table
   rowCount_++;
 }

 ///////////////////////////////////////////////////////////////////
 // chain a row unconditionally into the hash table. Don't look
 // for matches.
 ///////////////////////////////////////////////////////////////////
 NABoolean HashTable::insert(HashRow * newRow) {
   // determine the hash chain
   ULng32 chainIdx = newRow->hashValue() % headerCount_;

   // Insert at the beginning of the chain
   newRow->setNext(header_[chainIdx].row_);
   header_[chainIdx].row_ = newRow;

   // one more row in the hash table
   return ++rowCount_ > resizeThreshold_ ;
 }

 ////////////////////////////////////////////////////////////////////////
 // Resize the hash-table (HGB only; no duplicate entries; don't care order)
 // (if we can't allocate a new hash-table then we stop resizing; reason is
 //  that we'd have to overflow anyway soon, and likely flush this cluster.)
 ///////////////////////////////////////////////////////////////////////
 ULng32 HashTable::resize(NABoolean enoughMemory) {
   if ( ! enoughMemory ) {
     resizeThreshold_ = UINT_MAX ; // stop resizing
     return 0;
   }

   ULng32 newSize = resizeTo() ; // the size for the new HT

   HashTableHeader * newHeader =
     (HashTableHeader *)
     collHeap()->allocateMemory((UInt32)
 			       (newSize * sizeof(HashTableHeader)), FALSE);

   if ( ! newHeader ) {
     resizeThreshold_ = UINT_MAX ; // stop resizing
     return 0;
   }

   // initialize new HT
   memset( newHeader , 0, newSize * sizeof(HashTableHeader) );

   // insert entries from the old HT into the new HT
   for ( ULng32 oldind = 0; oldind < headerCount_; oldind++ ) {
     HashRow *thisRow = header_[oldind].row_ ;
     // traverse the old chain; reassign each entry
     while ( thisRow ) {
       HashRow * nextRow = thisRow->next_ ;

       // determine the new hash chain
       ULng32 chainIdx = thisRow->hashValue() % newSize;

       // Insert at the beginning of the chain in the new HT
       thisRow->setNext(newHeader[chainIdx].row_);
       newHeader[chainIdx].row_ = thisRow;

       thisRow = nextRow ;
     }
   }

   ULng32 memAdded =
     ( newSize - headerCount_ ) * sizeof(HashTableHeader) ;

   // remove old one and replace old fields
   collHeap()->deallocateMemory((void*)header_);
   header_ = newHeader ;
   headerCount_ = newSize ;
   resizeThreshold_ = HT_LOAD_FACTOR_PERCENT * (headerCount_ / 100) ;
   originalSize_ = FALSE; // this HT was resized

   return memAdded ;
 }

 // HashTable::insertUniq()
 // An insert method used by UniqueHashJoin.
 // Assumes input is unique and so does not need to check for
 // duplicates.
 // Entries are inserted in HashValue order.
 //
 void HashTable::insertUniq(HashRow *newRow)
 {
   SimpleHashValue newRowHashValue = newRow->hashValueRaw();

   // Get the first entry for this chain
   //
   ULng32 chainIdx = newRowHashValue % headerCount_;
   HashRow *row = header_[chainIdx].row_;

   // Entries are inserted in HashValue order
   // If this entry belongs at the beginning of the chain
   // insert it there.
   //
   if(!row || newRowHashValue <= row->hashValueRaw()) {
     newRow->setNext(row);
     header_[chainIdx].row_ = newRow;
     return;
   }

   // Cursor so we can insert the new row in the proper position.
   //
   HashRow *prevRow = row;
   row = row->next();

   // Find the position in the chain where this hash value belongs
   //
   while (row && newRowHashValue > row->hashValueRaw()) {
     prevRow = row;
     row = row->next();
   }

   // Insert this entry between this row and the previous row
   //
   newRow->setNext(row);
   prevRow->setNext(newRow);

   return;

 }

 ///////////////////////////////////////////////////////////////////
 // Add a row to the end of the first hash header.  The purpose
 // of this function is to speed up cross product joins that don't
 // have a search predicate and to keep the rows in order by
 // placing new rows at the tail. This function is only used by
 // cross-product.
 ///////////////////////////////////////////////////////////////////
 void HashTable::insertSingleChain(HashRow * newRow) {

   if (header_[0].row_ == NULL)
     header_[0].row_ = newRow;
   else
     singleChainLastRow_->next_ = newRow;

   singleChainLastRow_ = newRow;
   newRow->setNext(NULL);

   // one more row in the hash table
   rowCount_++;

 }

 ///////////////////////////////////////////////////////////////
 // convert the rowCounts in the HashTableHeader into offsets
 // to allow addressing of the bitmap for right outer joins.
 // This feature is disabled for now
 ///////////////////////////////////////////////////////////////
 void HashTable::convertToOffsets() {
   /*
   unsigned long offset = 0;
   unsigned long rowCount = 0;
   for (unsigned long i = 0; i < headerCount_; i++) {
     rowCount += header_[headerCount_].rowCount_;
     header_[headerCount_].rowCount_ = offset;
     offset = rowCount;
   }
   */
 }

 ///////////////////////////////////////////////////////////////
 // RETURNS: the next row for the cursor, if it exists.
 //          NULL(0), if all rows have been returned.
 ///////////////////////////////////////////////////////////////
 HashRow * HashTable::getNext(HashTableCursor * cursor) {

   HashRow * row = cursor->currRow_;

   if (!row)
     // we reached the end of this cursor
     return (HashRow *) NULL;

   if (cursor->currRow_ != cursor->endRow_) {
     cursor->currRow_ = row->next();
     if (cursor->currRow_ == NULL) {
       // end of chain, if there is another chain, switch to it
       cursor->currHeader_++;
       while ((cursor->currHeader_ < headerCount_) &&
 	     //skip all empty chains
 	     (!(header_[cursor->currHeader_].row_)))
 	cursor->currHeader_++;
       if (cursor->currHeader_ < headerCount_)
 	cursor->currRow_ = header_[cursor->currHeader_].row_;
     }
   }
   else
     // end of cursor, set currRow_ NULL, such that with the next call
     // we return NULL
     cursor->currRow_ = NULL;

   return row;
 }

 /////////////////////////////////////////////////////////////////////
 // finds the first matching entry and remembers it.
 // finds the last matching entry and remembers it.
 // They will be used later to return them to the caller by
 // calling getNext().
 /////////////////////////////////////////////////////////////////////
 void HashTable::position(HashTableCursor * cursor,
 			 atp_struct * rowAtp,
 			 atp_struct * workAtp,
 			 short hashTableRowAtpIndex,
 			 ex_expr * searchExpr,
 			 SimpleHashValue hashValue,
                          NABoolean noDupChaining,
 			 NABoolean returnOrdered) {

   ex_assert( searchExpr,
 	     "This position() should only be used for searchable entries. ");

   // Get the tuple used for the search expression and get the chain index.
   tupp &workAtpTupp = workAtp->getTupp(hashTableRowAtpIndex);
   cursor->currHeader_ = hashValue % headerCount_;

   // When no HV dups, and when we need to skip to the next Hash Table entry,
   // the dummyRow is logically positioned one behind the first row in that
   HashRow dummyRow; // next HT entry to fix the for-loop's "row = row->next_"

   // This loop searches for a first matching row.  If one is found,
   // the last matching row is found within the "if" condition in
   // this loop.
   for (HashRow *row = header_[cursor->currHeader_].row_;
        row != NULL;
        row = row->next()) {

     // If the simple hash doesn't match, then skip to the next row.
     if (hashValue != row->hashValue())
       continue;

     // Set the data pointer in the tuple to point to this row.

     // The expression does not expect the HashRow to be part of the row.
     // Adjust the datapointer in the work atp to point beyond the HashRow.
     workAtpTupp.setDataPointer(row->getData());

     // If this row matches, then set the cursor to the current row,
     // search for the last matching row and return.
     if ( searchExpr->eval(rowAtp, workAtp) == ex_expr::EXPR_TRUE ) {
       ULng32 matchCount = 1; // num matched so far

       // Set the row pointers in the cursor to the first matching row
       cursor->beginRow_ = cursor->endRow_ = cursor->currRow_ = row;

       // Sometimes (e.g. for some (anti)semi-joins) we don't need more than
       // one match and if there are many dups, finding the end of the
       // sub-chain has a severe performance penalty -- see solution
       // 10-080320-1596.
       if (noDupChaining)
         return;

       // Start searching on the next row for the last matching row
       for (row = row->next(); row != NULL; row = row->next()) {

         if (row->hashValue() != hashValue)
           break; // different HV, search no more (key-value must be different)

 	if ( ! noHashValueDups_ ) {
 	  // the next HV is the same - check if the key-value is different

           // The expression does not expect the HashRow to be part of the row.
           // Adjust the datapointer in the work atp to point beyond the HashRow.
 	  workAtpTupp.setDataPointer(row->getData());
 	  if ( searchExpr->eval(rowAtp, workAtp) != ex_expr::EXPR_TRUE )
 	    break;  // different key-value - end of this sub-chain
 	}

 	// Found one more row for this sub-chain
         // Set the end row to this matching row.
         cursor->endRow_ = row;
 	matchCount++;
       }

       // At this point the cursor is pointing to the sub-chain of matched rows

       // Ordering request would be made at most once per sub-chain (since this is
       // only for Ordered HJ with unique left rows); the subchain is ordered now
       //     (first) --> (last) --> (before-last) --> ..... (second)
       // so we reorder the subchain before returning, in a way similar to insert(),
       // by reinserting all the rows following the first one.
       if ( returnOrdered && matchCount > 2 ) {
 	HashRow *first = cursor->beginRow_;
 	// HashRow *second = cursor->endRow_ ;
 	HashRow *beyondLast = cursor->endRow_->next() ;
 	HashRow *last = cursor->beginRow_->next();
 	HashRow *tmpPtr = last->next(); // start from before-last
 	last->setNext( beyondLast );
 	cursor->endRow_ = last ; // current 2nd is the last
 	while ( tmpPtr != beyondLast ) {
 	  HashRow *nextOne = tmpPtr->next();
 	  tmpPtr->setNext(first->next());
 	  first->setNext(tmpPtr);
 	  tmpPtr = nextOne;
 	}
       }

       return;
     }
     else if ( noHashValueDups_ || hashTableOverflow_ ) {
       // if HT overflow -- keep trying on this chain
       if ( hashTableOverflow_ ) {
 	if ( row->next() && row->next()->hashValue() == hashValue )
 	  continue; // stay on this chain; maybe it is the next row
       }
       // Same HV in this HT entry, but of different key-value, (or after a HT
       // overflow, but at the end of the chain) then try next HT entry
       cursor->currHeader_ = ++cursor->currHeader_ % headerCount_;
       // set the dummyRow logically behind the first row on the next HT entry
       dummyRow.next_ = header_[cursor->currHeader_].row_ ;
       row = &dummyRow ; // so after "row = row->next_" it'll be the 1-st row
       continue; // back to loop to handle the first row of the next HT entry
     }
   }

   // No matching rows were found. Reset the cursor before returning.
   cursor->init();
 }

 // HashTable::positionUniq()
 // A position method used by UniqueHashJoin.
 // Assumes entries are unique and so does not need to check for
 // duplicates.
 // Returns first match found.
 //
 ex_expr::exp_return_type HashTable::positionUniq(HashRow **currRow,
                                                  atp_struct * leftRowAtp,
                                                  atp_struct * workAtp,
                                                  short rightRowAtpIndex,
                                                  ex_expr * searchExpr,
                                                  SimpleHashValue hashValue)
 {

   // Get the first entry for this chain
   //
   ULng32 chainIdx = hashValue % headerCount_;
   HashRow *row = header_[chainIdx].row_;

   // Skip over entries that have a lower hash value.
   //
   while (row && hashValue > row->hashValueRaw()) {
     row = row->next();
   }

   // Look for a match among those entries with the same hash value.
   //
   while (row && hashValue == row->hashValueRaw())  {

     // Set the data pointer in the tuple to point to this row.

     // The expression does not expect the HashRow to be part of the row.
     // Adjust the datapointer in the work atp to point beyond the HashRow.
     workAtp->getTupp(rightRowAtpIndex).setDataPointer(row->getData());

     // If this row matches, then return this row
     // No need to search further since we know the entries are unique.
     //
     ex_expr::exp_return_type retCode = searchExpr->eval(leftRowAtp, workAtp);
     if(retCode != ex_expr::EXPR_FALSE) {

       // Set the row pointer to the matching row.
       *currRow = row;

       // Found (or error condition).
       return retCode;
     }

     // Try the next row.
     row = row->next();
   }

   // If got to the end of the chain or an entry with a greater hash value
   // then no match was found.

   // Not found
   return ex_expr::EXPR_FALSE;
 }

 /////////////////////////////////////////////////////////
 // positions to the first entry in the hash table.
 // Remember the last entry in the table, if table has
 // entries.
 // Used to retrieve all rows from the hash table by
 // calling getNext().
 /////////////////////////////////////////////////////////
 void HashTable::position(HashTableCursor * cursor) {
   // find the first non empty chain
   ULng32  i = 0;
   while ((i < headerCount_) && (!(header_[i].row_)))
     i++;

   if (i == headerCount_)
     // empty table
     cursor->init();
   else {
     cursor->beginRow_ = header_[i].row_;
     cursor->currHeader_ = i;

     // now find the last entry in the hash table
     i = headerCount_ - 1;
     // can never reach i < 0. Atleast one non-null chain must exist
     while (!(header_[i].row_))
       i--;

     cursor->endRow_ = header_[i].row_;
     while (cursor->endRow_->next_)
       cursor->endRow_ = cursor->endRow_->next_;
   }
   cursor->currRow_ = cursor->beginRow_;
 }

 /////////////////////////////////////////////////////////
 // positionSingleChain() is used by cross product to
 // return a single list of all rows in the inner table
 /////////////////////////////////////////////////////////
 void HashTable::positionSingleChain(HashTableCursor * cursor) {
   cursor->currHeader_ = 0;
   cursor->currRow_ = cursor->beginRow_ = header_[0].row_;
   cursor->endRow_ = singleChainLastRow_;
 }

 /////////////////////////////////////////////////////////
 // determine the length of hash chain chainIdx
 /////////////////////////////////////////////////////////
 ULng32 HashTable::getChainSize(ULng32 chainIdx) {
   // sanity check
   ex_assert(chainIdx < headerCount_, "out of bound hash table access");
   ULng32 count = 0;
   HashRow * row = header_[chainIdx].row_;
   while(row) {
     count++;
     row = row->next();
   }
   return count;
 }
	// --C++--
	// *********************************************************************
	//
	// File: hash_table.cpp
	// Description:
	//
	//
	// Created: 7/10/95
	// Language: C++
	//
	//
	// @@@ START COPYRIGHT @@@
	//
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//
	// @@@ END COPYRIGHT @@@
	// *********************************************************************

	#include "hash_table.h"

	void HashRow::print(ULng32 rowlength) {
	printf("\tHashValue: %6d\n\tData: ", hashValue_);
	unsigned char * data = (unsigned char *)this;
	ULng32 i = 0;
	for (; i < rowlength; i++)
	printf("%02x", data[i]);
	printf("\n\t : ");
	for (i = 0; i < rowlength; i++)
	printf("%2c", ((data[i] >= ' ' && data[i] < 127) ? data[i] : '?'));
	printf("\n\t ");
	for (i = 0; i < rowlength; i++)
	if (i % 10)
	printf (" ");
	else
	printf ("%1d0", i/10);
	printf ("\n");
	}

	HashTableCursor::HashTableCursor() {
	init();
	}
	HashTableCursor::~HashTableCursor() {
	}

	HashTableHeader::HashTableHeader() {
	init();
	}

	void HashTableHeader::init() {
	// rowCount_ = 0;
	row_ = NULL;
	}


	HashTable::HashTable(ULng32 headerCount,
	NABoolean evenFactor,
	ULng32 primeFactor,
	NABoolean noHVDups,
	NABoolean doResize
	)
	: headerCount_(headerCount > MAX_HEADER_COUNT ?
	MAX_HEADER_COUNT : headerCount < MIN_HEADER_COUNT ?
	MIN_HEADER_COUNT : headerCount),
	rowCount_(0),
	header_(NULL),
	noHashValueDups_(noHVDups),
	resizeThreshold_(UINT_MAX), // default - don't resize
	originalSize_(TRUE), // this HT was not resized yet
	hashTableOverflow_(FALSE),
	singleChainLastRow_(NULL) {

	#ifdef _DEBUG
	if ( getenv("ALLOW_HV_DUPS" ) ) noHashValueDups_ = FALSE; // for testing
	#endif

	headerCount_ *= 2; // the first iteration below would divide it back

	// allocate the hash table headers. If we can't allocate it,
	// try a smaller table. If we can't even allocate a table with
	// MIN_HEADER_COUNT or somewhat more, we die!
	while (!header_) {

	headerCount_ /= 2;
	if ( headerCount_ < MIN_HEADER_COUNT ) return; // allocation failed

	// Ensure that headerCount_ has no common prime factors with #buckets
	if ( evenFactor && 0 == headerCount_ % 2 )
	headerCount_++ ; // the count is even no more
	if ( 0 == headerCount_ % primeFactor )
	headerCount_ += 2 ; // break the common factor, since primeFactor > 2

	header_ = (HashTableHeader *)collHeap()->allocateMemory((UInt32)
	(headerCount_ * sizeof(HashTableHeader)), FALSE);
	}

	if ( doResize )
	resizeThreshold_ = (headerCount_ / 100) * HT_LOAD_FACTOR_PERCENT ;

	init(); // set all entries in the hash table to NULL
	}

	HashTable::~HashTable() {
	if (header_) {
	collHeap()->deallocateMemory((void*)header_);
	header_ = NULL;
	}
	}

	void HashTable::init() {
	// memset is about six times faster than this loop
	// for (unsigned long i = 0; i < headerCount_; i++)
	// header_[i].init();
	memset( header_ , 0, headerCount_ * sizeof(HashTableHeader) );
	}

	//////////////////////////////////////////////////////////////////////
	// Chains a row into the appropriate hash table chain. It is
	// inserted after the first matching row or at the beginning
	// of the hash chain, if a matching row is not found.
	//////////////////////////////////////////////////////////////////////
	void HashTable::insert(atp_struct * workAtp,
	HashRow * newRow,
	tupp &workAtpTupp1,
	tupp &workAtpTupp2,
	ex_expr * searchExpr) {

	ex_assert( searchExpr,
	"This insert() should only be used for searchable entries. ");

	// Calculate the simple hash value for the new row
	SimpleHashValue newRowHV = newRow->hashValue();

	// Determine the hash chain and get the first entry in the chain.
	ULng32 chainIdx = newRowHV % headerCount_;
	ULng32 origChainIdx = chainIdx;
	HashRow *row = header_[chainIdx].row_;

	// Boolean used to control when the data pointer for tupp1 should be set
	NABoolean isTupp1Set = FALSE;


	while (row != NULL) {
	if (newRowHV == row->hashValue()) {

	// The expression does not expect the HashRow to be part of the row.
	// Adjust the datapointer in the work atp to point beyond the HashRow.
	workAtpTupp2.setDataPointer(row->getData());

	// Set the data in the workAtp for the first index if not set already
	if (!isTupp1Set) {

	// The expression does not expect the HashRow to be part of the row.
	// Adjust the datapointer in the work atp to point beyond the HashRow.
	workAtpTupp1.setDataPointer(newRow->getData());
	isTupp1Set = TRUE;
	}

	if ( searchExpr->eval(workAtp, workAtp) == ex_expr::EXPR_TRUE )
	break; // Break from while loop.
	else if ( noHashValueDups_ ) {
	// Different key-value, but with the same HV, try the next HT entry
	chainIdx = ++chainIdx % headerCount_; // next HT entry

	if ( (chainIdx + 1) % headerCount_ == origChainIdx ) {
	// Oops - an overflow -- more diff key-value sub-chains (with same HV)
	// than the size of the hash table
	// (extremely rare - practically impossible -- requires a very
	// small HT (i.e. memory pressure) and a very bad hash function.)
	// After an overflow - revert to the other method -- allow dups
	// (The "+ 1" above ensures at least one HT entry without this HV
	// to prevent an infinite loop in position() )
	noHashValueDups_ = FALSE; // stop disallowing dups
	chainIdx = origChainIdx; // return to the original chain
	hashTableOverflow_ = TRUE; // remember that the HT still contains
	// "re-placed" sub-chains from before the overflow; they should
	// be searched for in the position() method
	}

	row = header_[chainIdx].row_; // first row in next chain
	continue; // loop again
	}
	}
	row = row->next();
	}

	// If the row was not found, insert at the beginning of the chain.
	// Otherwise, insert after the first match.
	if (row == NULL) {
	newRow->setNext(header_[chainIdx].row_);
	header_[chainIdx].row_ = newRow;
	}
	else {
	newRow->setNext(row->next());
	row->setNext(newRow);
	}

	// one more row in the chain
	// header_[chainIdx].rowCount_++;
	// one more row in the hash table
	rowCount_++;
	}

	///////////////////////////////////////////////////////////////////
	// chain a row unconditionally into the hash table. Don't look
	// for matches.
	///////////////////////////////////////////////////////////////////
	NABoolean HashTable::insert(HashRow * newRow) {
	// determine the hash chain
	ULng32 chainIdx = newRow->hashValue() % headerCount_;

	// Insert at the beginning of the chain
	newRow->setNext(header_[chainIdx].row_);
	header_[chainIdx].row_ = newRow;

	// one more row in the hash table
	return ++rowCount_ > resizeThreshold_ ;
	}

	////////////////////////////////////////////////////////////////////////
	// Resize the hash-table (HGB only; no duplicate entries; don't care order)
	// (if we can't allocate a new hash-table then we stop resizing; reason is
	// that we'd have to overflow anyway soon, and likely flush this cluster.)
	///////////////////////////////////////////////////////////////////////
	ULng32 HashTable::resize(NABoolean enoughMemory) {
	if ( ! enoughMemory ) {
	resizeThreshold_ = UINT_MAX ; // stop resizing
	return 0;
	}

	ULng32 newSize = resizeTo() ; // the size for the new HT

	HashTableHeader * newHeader =
	(HashTableHeader *)
	collHeap()->allocateMemory((UInt32)
	(newSize * sizeof(HashTableHeader)), FALSE);

	if ( ! newHeader ) {
	resizeThreshold_ = UINT_MAX ; // stop resizing
	return 0;
	}

	// initialize new HT
	memset( newHeader , 0, newSize * sizeof(HashTableHeader) );

	// insert entries from the old HT into the new HT
	for ( ULng32 oldind = 0; oldind < headerCount_; oldind++ ) {
	HashRow *thisRow = header_[oldind].row_ ;
	// traverse the old chain; reassign each entry
	while ( thisRow ) {
	HashRow * nextRow = thisRow->next_ ;

	// determine the new hash chain
	ULng32 chainIdx = thisRow->hashValue() % newSize;

	// Insert at the beginning of the chain in the new HT
	thisRow->setNext(newHeader[chainIdx].row_);
	newHeader[chainIdx].row_ = thisRow;

	thisRow = nextRow ;
	}
	}

	ULng32 memAdded =
	( newSize - headerCount_ ) * sizeof(HashTableHeader) ;

	// remove old one and replace old fields
	collHeap()->deallocateMemory((void*)header_);
	header_ = newHeader ;
	headerCount_ = newSize ;
	resizeThreshold_ = HT_LOAD_FACTOR_PERCENT * (headerCount_ / 100) ;
	originalSize_ = FALSE; // this HT was resized

	return memAdded ;
	}

	// HashTable::insertUniq()
	// An insert method used by UniqueHashJoin.
	// Assumes input is unique and so does not need to check for
	// duplicates.
	// Entries are inserted in HashValue order.
	//
	void HashTable::insertUniq(HashRow *newRow)
	{
	SimpleHashValue newRowHashValue = newRow->hashValueRaw();

	// Get the first entry for this chain
	//
	ULng32 chainIdx = newRowHashValue % headerCount_;
	HashRow *row = header_[chainIdx].row_;

	// Entries are inserted in HashValue order
	// If this entry belongs at the beginning of the chain
	// insert it there.
	//
	if(!row \|\| newRowHashValue <= row->hashValueRaw()) {
	newRow->setNext(row);
	header_[chainIdx].row_ = newRow;
	return;
	}

	// Cursor so we can insert the new row in the proper position.
	//
	HashRow *prevRow = row;
	row = row->next();

	// Find the position in the chain where this hash value belongs
	//
	while (row && newRowHashValue > row->hashValueRaw()) {
	prevRow = row;
	row = row->next();
	}

	// Insert this entry between this row and the previous row
	//
	newRow->setNext(row);
	prevRow->setNext(newRow);

	return;

	}

	///////////////////////////////////////////////////////////////////
	// Add a row to the end of the first hash header. The purpose
	// of this function is to speed up cross product joins that don't
	// have a search predicate and to keep the rows in order by
	// placing new rows at the tail. This function is only used by
	// cross-product.
	///////////////////////////////////////////////////////////////////
	void HashTable::insertSingleChain(HashRow * newRow) {

	if (header_[0].row_ == NULL)
	header_[0].row_ = newRow;
	else
	singleChainLastRow_->next_ = newRow;

	singleChainLastRow_ = newRow;
	newRow->setNext(NULL);

	// one more row in the hash table
	rowCount_++;

	}

	///////////////////////////////////////////////////////////////
	// convert the rowCounts in the HashTableHeader into offsets
	// to allow addressing of the bitmap for right outer joins.
	// This feature is disabled for now
	///////////////////////////////////////////////////////////////
	void HashTable::convertToOffsets() {
	/*
	unsigned long offset = 0;
	unsigned long rowCount = 0;
	for (unsigned long i = 0; i < headerCount_; i++) {
	rowCount += header_[headerCount_].rowCount_;
	header_[headerCount_].rowCount_ = offset;
	offset = rowCount;
	}
	*/
	}

	///////////////////////////////////////////////////////////////
	// RETURNS: the next row for the cursor, if it exists.
	// NULL(0), if all rows have been returned.
	///////////////////////////////////////////////////////////////
	HashRow * HashTable::getNext(HashTableCursor * cursor) {

	HashRow * row = cursor->currRow_;

	if (!row)
	// we reached the end of this cursor
	return (HashRow *) NULL;

	if (cursor->currRow_ != cursor->endRow_) {
	cursor->currRow_ = row->next();
	if (cursor->currRow_ == NULL) {
	// end of chain, if there is another chain, switch to it
	cursor->currHeader_++;
	while ((cursor->currHeader_ < headerCount_) &&
	//skip all empty chains
	(!(header_[cursor->currHeader_].row_)))
	cursor->currHeader_++;
	if (cursor->currHeader_ < headerCount_)
	cursor->currRow_ = header_[cursor->currHeader_].row_;
	}
	}
	else
	// end of cursor, set currRow_ NULL, such that with the next call
	// we return NULL
	cursor->currRow_ = NULL;

	return row;
	}

	/////////////////////////////////////////////////////////////////////
	// finds the first matching entry and remembers it.
	// finds the last matching entry and remembers it.
	// They will be used later to return them to the caller by
	// calling getNext().
	/////////////////////////////////////////////////////////////////////
	void HashTable::position(HashTableCursor * cursor,
	atp_struct * rowAtp,
	atp_struct * workAtp,
	short hashTableRowAtpIndex,
	ex_expr * searchExpr,
	SimpleHashValue hashValue,
	NABoolean noDupChaining,
	NABoolean returnOrdered) {

	ex_assert( searchExpr,
	"This position() should only be used for searchable entries. ");

	// Get the tuple used for the search expression and get the chain index.
	tupp &workAtpTupp = workAtp->getTupp(hashTableRowAtpIndex);
	cursor->currHeader_ = hashValue % headerCount_;

	// When no HV dups, and when we need to skip to the next Hash Table entry,
	// the dummyRow is logically positioned one behind the first row in that
	HashRow dummyRow; // next HT entry to fix the for-loop's "row = row->next_"

	// This loop searches for a first matching row. If one is found,
	// the last matching row is found within the "if" condition in
	// this loop.
	for (HashRow *row = header_[cursor->currHeader_].row_;
	row != NULL;
	row = row->next()) {

	// If the simple hash doesn't match, then skip to the next row.
	if (hashValue != row->hashValue())
	continue;

	// Set the data pointer in the tuple to point to this row.

	// The expression does not expect the HashRow to be part of the row.
	// Adjust the datapointer in the work atp to point beyond the HashRow.
	workAtpTupp.setDataPointer(row->getData());

	// If this row matches, then set the cursor to the current row,
	// search for the last matching row and return.
	if ( searchExpr->eval(rowAtp, workAtp) == ex_expr::EXPR_TRUE ) {
	ULng32 matchCount = 1; // num matched so far

	// Set the row pointers in the cursor to the first matching row
	cursor->beginRow_ = cursor->endRow_ = cursor->currRow_ = row;

	// Sometimes (e.g. for some (anti)semi-joins) we don't need more than
	// one match and if there are many dups, finding the end of the
	// sub-chain has a severe performance penalty -- see solution
	// 10-080320-1596.
	if (noDupChaining)
	return;

	// Start searching on the next row for the last matching row
	for (row = row->next(); row != NULL; row = row->next()) {

	if (row->hashValue() != hashValue)
	break; // different HV, search no more (key-value must be different)

	if ( ! noHashValueDups_ ) {
	// the next HV is the same - check if the key-value is different

	// The expression does not expect the HashRow to be part of the row.
	// Adjust the datapointer in the work atp to point beyond the HashRow.
	workAtpTupp.setDataPointer(row->getData());
	if ( searchExpr->eval(rowAtp, workAtp) != ex_expr::EXPR_TRUE )
	break; // different key-value - end of this sub-chain
	}

	// Found one more row for this sub-chain
	// Set the end row to this matching row.
	cursor->endRow_ = row;
	matchCount++;
	}

	// At this point the cursor is pointing to the sub-chain of matched rows

	// Ordering request would be made at most once per sub-chain (since this is
	// only for Ordered HJ with unique left rows); the subchain is ordered now
	// (first) --> (last) --> (before-last) --> ..... (second)
	// so we reorder the subchain before returning, in a way similar to insert(),
	// by reinserting all the rows following the first one.
	if ( returnOrdered && matchCount > 2 ) {
	HashRow *first = cursor->beginRow_;
	// HashRow *second = cursor->endRow_ ;
	HashRow *beyondLast = cursor->endRow_->next() ;
	HashRow *last = cursor->beginRow_->next();
	HashRow *tmpPtr = last->next(); // start from before-last
	last->setNext( beyondLast );
	cursor->endRow_ = last ; // current 2nd is the last
	while ( tmpPtr != beyondLast ) {
	HashRow *nextOne = tmpPtr->next();
	tmpPtr->setNext(first->next());
	first->setNext(tmpPtr);
	tmpPtr = nextOne;
	}
	}

	return;
	}
	else if ( noHashValueDups_ \|\| hashTableOverflow_ ) {
	// if HT overflow -- keep trying on this chain
	if ( hashTableOverflow_ ) {
	if ( row->next() && row->next()->hashValue() == hashValue )
	continue; // stay on this chain; maybe it is the next row
	}
	// Same HV in this HT entry, but of different key-value, (or after a HT
	// overflow, but at the end of the chain) then try next HT entry
	cursor->currHeader_ = ++cursor->currHeader_ % headerCount_;
	// set the dummyRow logically behind the first row on the next HT entry
	dummyRow.next_ = header_[cursor->currHeader_].row_ ;
	row = &dummyRow ; // so after "row = row->next_" it'll be the 1-st row
	continue; // back to loop to handle the first row of the next HT entry
	}
	}

	// No matching rows were found. Reset the cursor before returning.
	cursor->init();
	}

	// HashTable::positionUniq()
	// A position method used by UniqueHashJoin.
	// Assumes entries are unique and so does not need to check for
	// duplicates.
	// Returns first match found.
	//
	ex_expr::exp_return_type HashTable::positionUniq(HashRow **currRow,
	atp_struct * leftRowAtp,
	atp_struct * workAtp,
	short rightRowAtpIndex,
	ex_expr * searchExpr,
	SimpleHashValue hashValue)
	{

	// Get the first entry for this chain
	//
	ULng32 chainIdx = hashValue % headerCount_;
	HashRow *row = header_[chainIdx].row_;

	// Skip over entries that have a lower hash value.
	//
	while (row && hashValue > row->hashValueRaw()) {
	row = row->next();
	}

	// Look for a match among those entries with the same hash value.
	//
	while (row && hashValue == row->hashValueRaw()) {

	// Set the data pointer in the tuple to point to this row.

	// The expression does not expect the HashRow to be part of the row.
	// Adjust the datapointer in the work atp to point beyond the HashRow.
	workAtp->getTupp(rightRowAtpIndex).setDataPointer(row->getData());

	// If this row matches, then return this row
	// No need to search further since we know the entries are unique.
	//
	ex_expr::exp_return_type retCode = searchExpr->eval(leftRowAtp, workAtp);
	if(retCode != ex_expr::EXPR_FALSE) {

	// Set the row pointer to the matching row.
	*currRow = row;

	// Found (or error condition).
	return retCode;
	}

	// Try the next row.
	row = row->next();
	}

	// If got to the end of the chain or an entry with a greater hash value
	// then no match was found.

	// Not found
	return ex_expr::EXPR_FALSE;
	}

	/////////////////////////////////////////////////////////
	// positions to the first entry in the hash table.
	// Remember the last entry in the table, if table has
	// entries.
	// Used to retrieve all rows from the hash table by
	// calling getNext().
	/////////////////////////////////////////////////////////
	void HashTable::position(HashTableCursor * cursor) {
	// find the first non empty chain
	ULng32 i = 0;
	while ((i < headerCount_) && (!(header_[i].row_)))
	i++;

	if (i == headerCount_)
	// empty table
	cursor->init();
	else {
	cursor->beginRow_ = header_[i].row_;
	cursor->currHeader_ = i;

	// now find the last entry in the hash table
	i = headerCount_ - 1;
	// can never reach i < 0. Atleast one non-null chain must exist
	while (!(header_[i].row_))
	i--;

	cursor->endRow_ = header_[i].row_;
	while (cursor->endRow_->next_)
	cursor->endRow_ = cursor->endRow_->next_;
	}
	cursor->currRow_ = cursor->beginRow_;
	}

	/////////////////////////////////////////////////////////
	// positionSingleChain() is used by cross product to
	// return a single list of all rows in the inner table
	/////////////////////////////////////////////////////////
	void HashTable::positionSingleChain(HashTableCursor * cursor) {
	cursor->currHeader_ = 0;
	cursor->currRow_ = cursor->beginRow_ = header_[0].row_;
	cursor->endRow_ = singleChainLastRow_;
	}

	/////////////////////////////////////////////////////////
	// determine the length of hash chain chainIdx
	/////////////////////////////////////////////////////////
	ULng32 HashTable::getChainSize(ULng32 chainIdx) {
	// sanity check
	ex_assert(chainIdx < headerCount_, "out of bound hash table access");
	ULng32 count = 0;
	HashRow * row = header_[chainIdx].row_;
	while(row) {
	count++;
	row = row->next();
	}
	return count;
	}